From f5895943d91b41b0368830cdb6eaffb8eda0f4c8 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 14 Mar 2014 17:44:49 +0000
Subject: KEYS: Move the flags representing required permission to linux/key.h

Move the flags representing required permission to linux/key.h as the perm
parameter of security_key_permission() is in terms of them - and not the
permissions mask flags used in key->perm.

Whilst we're at it:

 (1) Rename them to be KEY_NEED_xxx rather than KEY_xxx to avoid collisions
     with symbols in uapi/linux/input.h.

 (2) Don't use key_perm_t for a mask of required permissions, but rather limit
     it to the permissions mask attached to the key and arguments related
     directly to that.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
---
 include/linux/key.h      | 11 +++++++++++
 include/linux/security.h |  6 +++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 80d677483e31..cd0abb8c9c33 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -309,6 +309,17 @@ static inline key_serial_t key_serial(const struct key *key)
 
 extern void key_set_timeout(struct key *, unsigned);
 
+/*
+ * The permissions required on a key that we're looking up.
+ */
+#define	KEY_NEED_VIEW	0x01	/* Require permission to view attributes */
+#define	KEY_NEED_READ	0x02	/* Require permission to read content */
+#define	KEY_NEED_WRITE	0x04	/* Require permission to update / modify */
+#define	KEY_NEED_SEARCH	0x08	/* Require permission to search (keyring) or find (key) */
+#define	KEY_NEED_LINK	0x10	/* Require permission to link */
+#define	KEY_NEED_SETATTR 0x20	/* Require permission to change attributes */
+#define	KEY_NEED_ALL	0x3f	/* All the above permissions */
+
 /**
  * key_is_instantiated - Determine if a key has been positively instantiated
  * @key: The key to check.
diff --git a/include/linux/security.h b/include/linux/security.h
index 5623a7f965b7..3a5ed0cd2751 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1707,7 +1707,7 @@ struct security_operations {
 	void (*key_free) (struct key *key);
 	int (*key_permission) (key_ref_t key_ref,
 			       const struct cred *cred,
-			       key_perm_t perm);
+			       unsigned perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
 #endif	/* CONFIG_KEYS */
 
@@ -3026,7 +3026,7 @@ static inline int security_path_chroot(struct path *path)
 int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags);
 void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
-			    const struct cred *cred, key_perm_t perm);
+			    const struct cred *cred, unsigned perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
 
 #else
@@ -3044,7 +3044,7 @@ static inline void security_key_free(struct key *key)
 
 static inline int security_key_permission(key_ref_t key_ref,
 					  const struct cred *cred,
-					  key_perm_t perm)
+					  unsigned perm)
 {
 	return 0;
 }
-- 
cgit 


From 476d4af22cec8a9ebc90137712e5ab7070b7379d Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@debian.org>
Date: Fri, 3 Oct 2014 17:25:00 +0100
Subject: iio: inkern: add iio_read_channel_average_raw

Add iio_read_channel_average_raw to support reading
averaged raw values in consumer drivers.

Signed-off-by: Sebastian Reichel <sre@debian.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/inkern.c         | 18 ++++++++++++++++++
 include/linux/iio/consumer.h | 13 +++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 0cf5f8e06cfc..adeba5a0ecf7 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -443,6 +443,24 @@ err_unlock:
 }
 EXPORT_SYMBOL_GPL(iio_read_channel_raw);
 
+int iio_read_channel_average_raw(struct iio_channel *chan, int *val)
+{
+	int ret;
+
+	mutex_lock(&chan->indio_dev->info_exist_lock);
+	if (chan->indio_dev->info == NULL) {
+		ret = -ENODEV;
+		goto err_unlock;
+	}
+
+	ret = iio_channel_read(chan, val, NULL, IIO_CHAN_INFO_AVERAGE_RAW);
+err_unlock:
+	mutex_unlock(&chan->indio_dev->info_exist_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iio_read_channel_average_raw);
+
 static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan,
 	int raw, int *processed, unsigned int scale)
 {
diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h
index 2752b1fd12be..651f9a0e2765 100644
--- a/include/linux/iio/consumer.h
+++ b/include/linux/iio/consumer.h
@@ -122,6 +122,19 @@ struct iio_channel
 int iio_read_channel_raw(struct iio_channel *chan,
 			 int *val);
 
+/**
+ * iio_read_channel_average_raw() - read from a given channel
+ * @chan:		The channel being queried.
+ * @val:		Value read back.
+ *
+ * Note raw reads from iio channels are in adc counts and hence
+ * scale will need to be applied if standard units required.
+ *
+ * In opposit to the normal iio_read_channel_raw this function
+ * returns the average of multiple reads.
+ */
+int iio_read_channel_average_raw(struct iio_channel *chan, int *val);
+
 /**
  * iio_read_channel_processed() - read processed value from a given channel
  * @chan:		The channel being queried.
-- 
cgit 


From 9fb6bf02e3ad04c20edb8e46536ce3eeda32c736 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Mon, 7 Apr 2014 13:39:33 -0400
Subject: HID: rmi: introduce RMI driver for Synaptics touchpads

This driver add support for RMI4 over USB or I2C.
The current state is that it uses its own RMI4 implementation, but once
RMI4 is merged upstream, the driver will be a transport driver for the
RMI4 library.

Part of this driver should be considered as temporary. Most of the RMI4
processing and input handling will be deleted at some point.

I based my work on Andrew's regarding its port of RMI4 over HID (see
https://github.com/mightybigcar/synaptics-rmi4/tree/rmihid )
This repo presents how the driver may looks like at the end:
https://github.com/mightybigcar/synaptics-rmi4/blob/rmihid/drivers/input/rmi4/rmi_hid.c

Without this temporary solution, the workaround we gave to users
is to disable i2c-hid, which leads to disabling the touchscreen on the
XPS 11 and 12 (Haswell generation).

Related bugs:
https://bugzilla.redhat.com/show_bug.cgi?id=1048314
https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1218973

Signed-off-by: Andrew Duggan <aduggan@synaptics.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig    |   8 +
 drivers/hid/Makefile   |   1 +
 drivers/hid/hid-core.c |   2 +
 drivers/hid/hid-rmi.c  | 889 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h    |   2 +
 5 files changed, 902 insertions(+)
 create mode 100644 drivers/hid/hid-rmi.c

(limited to 'include/linux')

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 7af9d0b5dea1..762f15d6ed88 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -657,6 +657,14 @@ config HID_SUNPLUS
 	---help---
 	Support for Sunplus wireless desktop.
 
+config HID_RMI
+	tristate "Synaptics RMI4 device support"
+	depends on HID
+	---help---
+	Support for Synaptics RMI4 touchpads.
+	Say Y here if you have a Synaptics RMI4 touchpads over i2c-hid or usbhid
+	and want support for its special functionalities.
+
 config HID_GREENASIA
 	tristate "GreenAsia (Product ID 0x12) game controller support"
 	depends on HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index fc712dde02a4..a6fa6baf368e 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_HID_ROCCAT)	+= hid-roccat.o hid-roccat-common.o \
 	hid-roccat-arvo.o hid-roccat-isku.o hid-roccat-kone.o \
 	hid-roccat-koneplus.o hid-roccat-konepure.o hid-roccat-kovaplus.o \
 	hid-roccat-lua.o hid-roccat-pyra.o hid-roccat-ryos.o hid-roccat-savu.o
+obj-$(CONFIG_HID_RMI)		+= hid-rmi.o
 obj-$(CONFIG_HID_SAITEK)	+= hid-saitek.o
 obj-$(CONFIG_HID_SAMSUNG)	+= hid-samsung.o
 obj-$(CONFIG_HID_SMARTJOYPLUS)	+= hid-sjoy.o
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 9e8064205bc7..f05255d92de7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1882,6 +1882,8 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
+	{ HID_I2C_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
new file mode 100644
index 000000000000..699d631c6920
--- /dev/null
+++ b/drivers/hid/hid-rmi.c
@@ -0,0 +1,889 @@
+/*
+ *  Copyright (c) 2013 Andrew Duggan <aduggan@synaptics.com>
+ *  Copyright (c) 2013 Synaptics Incorporated
+ *  Copyright (c) 2014 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ *  Copyright (c) 2014 Red Hat, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/hid.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include "hid-ids.h"
+
+#define RMI_MOUSE_REPORT_ID		0x01 /* Mouse emulation Report */
+#define RMI_WRITE_REPORT_ID		0x09 /* Output Report */
+#define RMI_READ_ADDR_REPORT_ID		0x0a /* Output Report */
+#define RMI_READ_DATA_REPORT_ID		0x0b /* Input Report */
+#define RMI_ATTN_REPORT_ID		0x0c /* Input Report */
+#define RMI_SET_RMI_MODE_REPORT_ID	0x0f /* Feature Report */
+
+/* flags */
+#define RMI_READ_REQUEST_PENDING	BIT(0)
+#define RMI_READ_DATA_PENDING		BIT(1)
+#define RMI_STARTED			BIT(2)
+
+enum rmi_mode_type {
+	RMI_MODE_OFF			= 0,
+	RMI_MODE_ATTN_REPORTS		= 1,
+	RMI_MODE_NO_PACKED_ATTN_REPORTS	= 2,
+};
+
+struct rmi_function {
+	unsigned page;			/* page of the function */
+	u16 query_base_addr;		/* base address for queries */
+	u16 command_base_addr;		/* base address for commands */
+	u16 control_base_addr;		/* base address for controls */
+	u16 data_base_addr;		/* base address for datas */
+	unsigned int interrupt_base;	/* cross-function interrupt number
+					 * (uniq in the device)*/
+	unsigned int interrupt_count;	/* number of interrupts */
+	unsigned int report_size;	/* size of a report */
+	unsigned long irq_mask;		/* mask of the interrupts
+					 * (to be applied against ATTN IRQ) */
+};
+
+/**
+ * struct rmi_data - stores information for hid communication
+ *
+ * @page_mutex: Locks current page to avoid changing pages in unexpected ways.
+ * @page: Keeps track of the current virtual page
+ *
+ * @wait: Used for waiting for read data
+ *
+ * @writeReport: output buffer when writing RMI registers
+ * @readReport: input buffer when reading RMI registers
+ *
+ * @input_report_size: size of an input report (advertised by HID)
+ * @output_report_size: size of an output report (advertised by HID)
+ *
+ * @flags: flags for the current device (started, reading, etc...)
+ *
+ * @f11: placeholder of internal RMI function F11 description
+ * @f30: placeholder of internal RMI function F30 description
+ *
+ * @max_fingers: maximum finger count reported by the device
+ * @max_x: maximum x value reported by the device
+ * @max_y: maximum y value reported by the device
+ *
+ * @gpio_led_count: count of GPIOs + LEDs reported by F30
+ * @button_count: actual physical buttons count
+ * @button_mask: button mask used to decode GPIO ATTN reports
+ * @button_state_mask: pull state of the buttons
+ *
+ * @input: pointer to the kernel input device
+ *
+ * @reset_work: worker which will be called in case of a mouse report
+ * @hdev: pointer to the struct hid_device
+ */
+struct rmi_data {
+	struct mutex page_mutex;
+	int page;
+
+	wait_queue_head_t wait;
+
+	u8 *writeReport;
+	u8 *readReport;
+
+	int input_report_size;
+	int output_report_size;
+
+	unsigned long flags;
+
+	struct rmi_function f11;
+	struct rmi_function f30;
+
+	unsigned int max_fingers;
+	unsigned int max_x;
+	unsigned int max_y;
+	unsigned int x_size_mm;
+	unsigned int y_size_mm;
+
+	unsigned int gpio_led_count;
+	unsigned int button_count;
+	unsigned long button_mask;
+	unsigned long button_state_mask;
+
+	struct input_dev *input;
+
+	struct work_struct reset_work;
+	struct hid_device *hdev;
+};
+
+#define RMI_PAGE(addr) (((addr) >> 8) & 0xff)
+
+static int rmi_write_report(struct hid_device *hdev, u8 *report, int len);
+
+/**
+ * rmi_set_page - Set RMI page
+ * @hdev: The pointer to the hid_device struct
+ * @page: The new page address.
+ *
+ * RMI devices have 16-bit addressing, but some of the physical
+ * implementations (like SMBus) only have 8-bit addressing. So RMI implements
+ * a page address at 0xff of every page so we can reliable page addresses
+ * every 256 registers.
+ *
+ * The page_mutex lock must be held when this function is entered.
+ *
+ * Returns zero on success, non-zero on failure.
+ */
+static int rmi_set_page(struct hid_device *hdev, u8 page)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	int retval;
+
+	data->writeReport[0] = RMI_WRITE_REPORT_ID;
+	data->writeReport[1] = 1;
+	data->writeReport[2] = 0xFF;
+	data->writeReport[4] = page;
+
+	retval = rmi_write_report(hdev, data->writeReport,
+			data->output_report_size);
+	if (retval != data->output_report_size) {
+		dev_err(&hdev->dev,
+			"%s: set page failed: %d.", __func__, retval);
+		return retval;
+	}
+
+	data->page = page;
+	return 0;
+}
+
+static int rmi_set_mode(struct hid_device *hdev, u8 mode)
+{
+	int ret;
+	u8 txbuf[2] = {RMI_SET_RMI_MODE_REPORT_ID, mode};
+
+	ret = hid_hw_raw_request(hdev, RMI_SET_RMI_MODE_REPORT_ID, txbuf,
+			sizeof(txbuf), HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+	if (ret < 0) {
+		dev_err(&hdev->dev, "unable to set rmi mode to %d (%d)\n", mode,
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int rmi_write_report(struct hid_device *hdev, u8 *report, int len)
+{
+	int ret;
+
+	ret = hid_hw_output_report(hdev, (void *)report, len);
+	if (ret < 0) {
+		dev_err(&hdev->dev, "failed to write hid report (%d)\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+
+static int rmi_read_block(struct hid_device *hdev, u16 addr, void *buf,
+		const int len)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	int ret;
+	int bytes_read;
+	int bytes_needed;
+	int retries;
+	int read_input_count;
+
+	mutex_lock(&data->page_mutex);
+
+	if (RMI_PAGE(addr) != data->page) {
+		ret = rmi_set_page(hdev, RMI_PAGE(addr));
+		if (ret < 0)
+			goto exit;
+	}
+
+	for (retries = 5; retries > 0; retries--) {
+		data->writeReport[0] = RMI_READ_ADDR_REPORT_ID;
+		data->writeReport[1] = 0; /* old 1 byte read count */
+		data->writeReport[2] = addr & 0xFF;
+		data->writeReport[3] = (addr >> 8) & 0xFF;
+		data->writeReport[4] = len  & 0xFF;
+		data->writeReport[5] = (len >> 8) & 0xFF;
+
+		set_bit(RMI_READ_REQUEST_PENDING, &data->flags);
+
+		ret = rmi_write_report(hdev, data->writeReport,
+						data->output_report_size);
+		if (ret != data->output_report_size) {
+			clear_bit(RMI_READ_REQUEST_PENDING, &data->flags);
+			dev_err(&hdev->dev,
+				"failed to write request output report (%d)\n",
+				ret);
+			goto exit;
+		}
+
+		bytes_read = 0;
+		bytes_needed = len;
+		while (bytes_read < len) {
+			if (!wait_event_timeout(data->wait,
+				test_bit(RMI_READ_DATA_PENDING, &data->flags),
+					msecs_to_jiffies(1000))) {
+				hid_warn(hdev, "%s: timeout elapsed\n",
+					 __func__);
+				ret = -EAGAIN;
+				break;
+			}
+
+			read_input_count = data->readReport[1];
+			memcpy(buf + bytes_read, &data->readReport[2],
+				read_input_count < bytes_needed ?
+					read_input_count : bytes_needed);
+
+			bytes_read += read_input_count;
+			bytes_needed -= read_input_count;
+			clear_bit(RMI_READ_DATA_PENDING, &data->flags);
+		}
+
+		if (ret >= 0) {
+			ret = 0;
+			break;
+		}
+	}
+
+exit:
+	clear_bit(RMI_READ_REQUEST_PENDING, &data->flags);
+	mutex_unlock(&data->page_mutex);
+	return ret;
+}
+
+static inline int rmi_read(struct hid_device *hdev, u16 addr, void *buf)
+{
+	return rmi_read_block(hdev, addr, buf, 1);
+}
+
+static void rmi_f11_process_touch(struct rmi_data *hdata, int slot,
+		u8 finger_state, u8 *touch_data)
+{
+	int x, y, wx, wy;
+	int wide, major, minor;
+	int z;
+
+	input_mt_slot(hdata->input, slot);
+	input_mt_report_slot_state(hdata->input, MT_TOOL_FINGER,
+			finger_state == 0x01);
+	if (finger_state == 0x01) {
+		x = (touch_data[0] << 4) | (touch_data[2] & 0x07);
+		y = (touch_data[1] << 4) | (touch_data[2] >> 4);
+		wx = touch_data[3] & 0x07;
+		wy = touch_data[3] >> 4;
+		wide = (wx > wy);
+		major = max(wx, wy);
+		minor = min(wx, wy);
+		z = touch_data[4];
+
+		/* y is inverted */
+		y = hdata->max_y - y;
+
+		input_event(hdata->input, EV_ABS, ABS_MT_POSITION_X, x);
+		input_event(hdata->input, EV_ABS, ABS_MT_POSITION_Y, y);
+		input_event(hdata->input, EV_ABS, ABS_MT_ORIENTATION, wide);
+		input_event(hdata->input, EV_ABS, ABS_MT_PRESSURE, z);
+		input_event(hdata->input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
+		input_event(hdata->input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
+	}
+}
+
+static void rmi_reset_work(struct work_struct *work)
+{
+	struct rmi_data *hdata = container_of(work, struct rmi_data,
+						reset_work);
+
+	/* switch the device to RMI if we receive a generic mouse report */
+	rmi_set_mode(hdata->hdev, RMI_MODE_ATTN_REPORTS);
+}
+
+static inline int rmi_schedule_reset(struct hid_device *hdev)
+{
+	struct rmi_data *hdata = hid_get_drvdata(hdev);
+	return schedule_work(&hdata->reset_work);
+}
+
+static int rmi_f11_input_event(struct hid_device *hdev, u8 irq, u8 *data,
+		int size)
+{
+	struct rmi_data *hdata = hid_get_drvdata(hdev);
+	int offset;
+	int i;
+
+	if (size < hdata->f11.report_size)
+		return 0;
+
+	if (!(irq & hdata->f11.irq_mask))
+		return 0;
+
+	offset = (hdata->max_fingers >> 2) + 1;
+	for (i = 0; i < hdata->max_fingers; i++) {
+		int fs_byte_position = i >> 2;
+		int fs_bit_position = (i & 0x3) << 1;
+		int finger_state = (data[fs_byte_position] >> fs_bit_position) &
+					0x03;
+
+		rmi_f11_process_touch(hdata, i, finger_state,
+				&data[offset + 5 * i]);
+	}
+	input_mt_sync_frame(hdata->input);
+	input_sync(hdata->input);
+	return hdata->f11.report_size;
+}
+
+static int rmi_f30_input_event(struct hid_device *hdev, u8 irq, u8 *data,
+		int size)
+{
+	struct rmi_data *hdata = hid_get_drvdata(hdev);
+	int i;
+	int button = 0;
+	bool value;
+
+	if (!(irq & hdata->f30.irq_mask))
+		return 0;
+
+	for (i = 0; i < hdata->gpio_led_count; i++) {
+		if (test_bit(i, &hdata->button_mask)) {
+			value = (data[i / 8] >> (i & 0x07)) & BIT(0);
+			if (test_bit(i, &hdata->button_state_mask))
+				value = !value;
+			input_event(hdata->input, EV_KEY, BTN_LEFT + button++,
+					value);
+		}
+	}
+	return hdata->f30.report_size;
+}
+
+static int rmi_input_event(struct hid_device *hdev, u8 *data, int size)
+{
+	struct rmi_data *hdata = hid_get_drvdata(hdev);
+	unsigned long irq_mask = 0;
+	unsigned index = 2;
+
+	if (!(test_bit(RMI_STARTED, &hdata->flags)))
+		return 0;
+
+	irq_mask |= hdata->f11.irq_mask;
+	irq_mask |= hdata->f30.irq_mask;
+
+	if (data[1] & ~irq_mask)
+		hid_warn(hdev, "unknown intr source:%02lx %s:%d\n",
+			data[1] & ~irq_mask, __FILE__, __LINE__);
+
+	if (hdata->f11.interrupt_base < hdata->f30.interrupt_base) {
+		index += rmi_f11_input_event(hdev, data[1], &data[index],
+				size - index);
+		index += rmi_f30_input_event(hdev, data[1], &data[index],
+				size - index);
+	} else {
+		index += rmi_f30_input_event(hdev, data[1], &data[index],
+				size - index);
+		index += rmi_f11_input_event(hdev, data[1], &data[index],
+				size - index);
+	}
+
+	return 1;
+}
+
+static int rmi_read_data_event(struct hid_device *hdev, u8 *data, int size)
+{
+	struct rmi_data *hdata = hid_get_drvdata(hdev);
+
+	if (!test_bit(RMI_READ_REQUEST_PENDING, &hdata->flags)) {
+		hid_err(hdev, "no read request pending\n");
+		return 0;
+	}
+
+	memcpy(hdata->readReport, data, size < hdata->input_report_size ?
+			size : hdata->input_report_size);
+	set_bit(RMI_READ_DATA_PENDING, &hdata->flags);
+	wake_up(&hdata->wait);
+
+	return 1;
+}
+
+static int rmi_raw_event(struct hid_device *hdev,
+		struct hid_report *report, u8 *data, int size)
+{
+	switch (data[0]) {
+	case RMI_READ_DATA_REPORT_ID:
+		return rmi_read_data_event(hdev, data, size);
+	case RMI_ATTN_REPORT_ID:
+		return rmi_input_event(hdev, data, size);
+	case RMI_MOUSE_REPORT_ID:
+		rmi_schedule_reset(hdev);
+		break;
+	}
+
+	return 0;
+}
+
+static int rmi_post_reset(struct hid_device *hdev)
+{
+	return rmi_set_mode(hdev, RMI_MODE_ATTN_REPORTS);
+}
+
+static int rmi_post_resume(struct hid_device *hdev)
+{
+	return rmi_set_mode(hdev, RMI_MODE_ATTN_REPORTS);
+}
+
+#define RMI4_MAX_PAGE 0xff
+#define RMI4_PAGE_SIZE 0x0100
+
+#define PDT_START_SCAN_LOCATION 0x00e9
+#define PDT_END_SCAN_LOCATION	0x0005
+#define RMI4_END_OF_PDT(id) ((id) == 0x00 || (id) == 0xff)
+
+struct pdt_entry {
+	u8 query_base_addr:8;
+	u8 command_base_addr:8;
+	u8 control_base_addr:8;
+	u8 data_base_addr:8;
+	u8 interrupt_source_count:3;
+	u8 bits3and4:2;
+	u8 function_version:2;
+	u8 bit7:1;
+	u8 function_number:8;
+} __attribute__((__packed__));
+
+static inline unsigned long rmi_gen_mask(unsigned irq_base, unsigned irq_count)
+{
+	return GENMASK(irq_count + irq_base - 1, irq_base);
+}
+
+static void rmi_register_function(struct rmi_data *data,
+	struct pdt_entry *pdt_entry, int page, unsigned interrupt_count)
+{
+	struct rmi_function *f = NULL;
+	u16 page_base = page << 8;
+
+	switch (pdt_entry->function_number) {
+	case 0x11:
+		f = &data->f11;
+		break;
+	case 0x30:
+		f = &data->f30;
+		break;
+	}
+
+	if (f) {
+		f->page = page;
+		f->query_base_addr = page_base | pdt_entry->query_base_addr;
+		f->command_base_addr = page_base | pdt_entry->command_base_addr;
+		f->control_base_addr = page_base | pdt_entry->control_base_addr;
+		f->data_base_addr = page_base | pdt_entry->data_base_addr;
+		f->interrupt_base = interrupt_count;
+		f->interrupt_count = pdt_entry->interrupt_source_count;
+		f->irq_mask = rmi_gen_mask(f->interrupt_base,
+						f->interrupt_count);
+	}
+}
+
+static int rmi_scan_pdt(struct hid_device *hdev)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	struct pdt_entry entry;
+	int page;
+	bool page_has_function;
+	int i;
+	int retval;
+	int interrupt = 0;
+	u16 page_start, pdt_start , pdt_end;
+
+	hid_info(hdev, "Scanning PDT...\n");
+
+	for (page = 0; (page <= RMI4_MAX_PAGE); page++) {
+		page_start = RMI4_PAGE_SIZE * page;
+		pdt_start = page_start + PDT_START_SCAN_LOCATION;
+		pdt_end = page_start + PDT_END_SCAN_LOCATION;
+
+		page_has_function = false;
+		for (i = pdt_start; i >= pdt_end; i -= sizeof(entry)) {
+			retval = rmi_read_block(hdev, i, &entry, sizeof(entry));
+			if (retval) {
+				hid_err(hdev,
+					"Read of PDT entry at %#06x failed.\n",
+					i);
+				goto error_exit;
+			}
+
+			if (RMI4_END_OF_PDT(entry.function_number))
+				break;
+
+			page_has_function = true;
+
+			hid_info(hdev, "Found F%02X on page %#04x\n",
+					entry.function_number, page);
+
+			rmi_register_function(data, &entry, page, interrupt);
+			interrupt += entry.interrupt_source_count;
+		}
+
+		if (!page_has_function)
+			break;
+	}
+
+	hid_info(hdev, "%s: Done with PDT scan.\n", __func__);
+	retval = 0;
+
+error_exit:
+	return retval;
+}
+
+static int rmi_populate_f11(struct hid_device *hdev)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	u8 buf[20];
+	int ret;
+	bool has_query12;
+	bool has_physical_props;
+	unsigned x_size, y_size;
+
+	if (!data->f11.query_base_addr) {
+		hid_err(hdev, "No 2D sensor found, giving up.\n");
+		return -ENODEV;
+	}
+
+	/* query 0 contains some useful information */
+	ret = rmi_read(hdev, data->f11.query_base_addr, buf);
+	if (ret) {
+		hid_err(hdev, "can not get query 0: %d.\n", ret);
+		return ret;
+	}
+	has_query12 = !!(buf[0] & BIT(5));
+
+	/* query 1 to get the max number of fingers */
+	ret = rmi_read(hdev, data->f11.query_base_addr + 1, buf);
+	if (ret) {
+		hid_err(hdev, "can not get NumberOfFingers: %d.\n", ret);
+		return ret;
+	}
+	data->max_fingers = (buf[0] & 0x07) + 1;
+	if (data->max_fingers > 5)
+		data->max_fingers = 10;
+
+	data->f11.report_size = data->max_fingers * 5 +
+				DIV_ROUND_UP(data->max_fingers, 4);
+
+	if (!(buf[0] & BIT(4))) {
+		hid_err(hdev, "No absolute events, giving up.\n");
+		return -ENODEV;
+	}
+
+	/*
+	 * query 12 to know if the physical properties are reported
+	 * (query 12 is at offset 10 for HID devices)
+	 */
+	if (has_query12) {
+		ret = rmi_read(hdev, data->f11.query_base_addr + 10, buf);
+		if (ret) {
+			hid_err(hdev, "can not get query 12: %d.\n", ret);
+			return ret;
+		}
+		has_physical_props = !!(buf[0] & BIT(5));
+
+		if (has_physical_props) {
+			ret = rmi_read_block(hdev,
+					data->f11.query_base_addr + 11, buf, 4);
+			if (ret) {
+				hid_err(hdev, "can not read query 15-18: %d.\n",
+					ret);
+				return ret;
+			}
+
+			x_size = buf[0] | (buf[1] << 8);
+			y_size = buf[2] | (buf[3] << 8);
+
+			data->x_size_mm = DIV_ROUND_CLOSEST(x_size, 10);
+			data->y_size_mm = DIV_ROUND_CLOSEST(y_size, 10);
+
+			hid_info(hdev, "%s: size in mm: %d x %d\n",
+				 __func__, data->x_size_mm, data->y_size_mm);
+		}
+	}
+
+	/* retrieve the ctrl registers */
+	ret = rmi_read_block(hdev, data->f11.control_base_addr, buf, 20);
+	if (ret) {
+		hid_err(hdev, "can not read ctrl block of size 20: %d.\n", ret);
+		return ret;
+	}
+
+	data->max_x = buf[6] | (buf[7] << 8);
+	data->max_y = buf[8] | (buf[9] << 8);
+
+	return 0;
+}
+
+static int rmi_populate_f30(struct hid_device *hdev)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	u8 buf[20];
+	int ret;
+	bool has_gpio, has_led;
+	unsigned bytes_per_ctrl;
+	u8 ctrl2_addr;
+	int ctrl2_3_length;
+	int i;
+
+	/* function F30 is for physical buttons */
+	if (!data->f30.query_base_addr) {
+		hid_err(hdev, "No GPIO/LEDs found, giving up.\n");
+		return -ENODEV;
+	}
+
+	ret = rmi_read_block(hdev, data->f30.query_base_addr, buf, 2);
+	if (ret) {
+		hid_err(hdev, "can not get F30 query registers: %d.\n", ret);
+		return ret;
+	}
+
+	has_gpio = !!(buf[0] & BIT(3));
+	has_led = !!(buf[0] & BIT(2));
+	data->gpio_led_count = buf[1] & 0x1f;
+
+	/* retrieve ctrl 2 & 3 registers */
+	bytes_per_ctrl = (data->gpio_led_count + 7) / 8;
+	/* Ctrl0 is present only if both has_gpio and has_led are set*/
+	ctrl2_addr = (has_gpio && has_led) ? bytes_per_ctrl : 0;
+	/* Ctrl1 is always be present */
+	ctrl2_addr += bytes_per_ctrl;
+	ctrl2_3_length = 2 * bytes_per_ctrl;
+
+	data->f30.report_size = bytes_per_ctrl;
+
+	ret = rmi_read_block(hdev, data->f30.control_base_addr + ctrl2_addr,
+				buf, ctrl2_3_length);
+	if (ret) {
+		hid_err(hdev, "can not read ctrl 2&3 block of size %d: %d.\n",
+			ctrl2_3_length, ret);
+		return ret;
+	}
+
+	for (i = 0; i < data->gpio_led_count; i++) {
+		int byte_position = i >> 3;
+		int bit_position = i & 0x07;
+		u8 dir_byte = buf[byte_position];
+		u8 data_byte = buf[byte_position + bytes_per_ctrl];
+		bool dir = (dir_byte >> bit_position) & BIT(0);
+		bool dat = (data_byte >> bit_position) & BIT(0);
+
+		if (dir == 0) {
+			/* input mode */
+			if (dat) {
+				/* actual buttons have pull up resistor */
+				data->button_count++;
+				set_bit(i, &data->button_mask);
+				set_bit(i, &data->button_state_mask);
+			}
+		}
+
+	}
+
+	return 0;
+}
+
+static int rmi_populate(struct hid_device *hdev)
+{
+	int ret;
+
+	ret = rmi_scan_pdt(hdev);
+	if (ret) {
+		hid_err(hdev, "PDT scan failed with code %d.\n", ret);
+		return ret;
+	}
+
+	ret = rmi_populate_f11(hdev);
+	if (ret) {
+		hid_err(hdev, "Error while initializing F11 (%d).\n", ret);
+		return ret;
+	}
+
+	ret = rmi_populate_f30(hdev);
+	if (ret)
+		hid_warn(hdev, "Error while initializing F30 (%d).\n", ret);
+
+	return 0;
+}
+
+static void rmi_input_configured(struct hid_device *hdev, struct hid_input *hi)
+{
+	struct rmi_data *data = hid_get_drvdata(hdev);
+	struct input_dev *input = hi->input;
+	int ret;
+	int res_x, res_y, i;
+
+	data->input = input;
+
+	hid_dbg(hdev, "Opening low level driver\n");
+	ret = hid_hw_open(hdev);
+	if (ret)
+		return;
+
+	/* Allow incoming hid reports */
+	hid_device_io_start(hdev);
+
+	ret = rmi_set_mode(hdev, RMI_MODE_ATTN_REPORTS);
+	if (ret < 0) {
+		dev_err(&hdev->dev, "failed to set rmi mode\n");
+		goto exit;
+	}
+
+	ret = rmi_set_page(hdev, 0);
+	if (ret < 0) {
+		dev_err(&hdev->dev, "failed to set page select to 0.\n");
+		goto exit;
+	}
+
+	ret = rmi_populate(hdev);
+	if (ret)
+		goto exit;
+
+	__set_bit(EV_ABS, input->evbit);
+	input_set_abs_params(input, ABS_MT_POSITION_X, 1, data->max_x, 0, 0);
+	input_set_abs_params(input, ABS_MT_POSITION_Y, 1, data->max_y, 0, 0);
+
+	if (data->x_size_mm && data->x_size_mm) {
+		res_x = (data->max_x - 1) / data->x_size_mm;
+		res_y = (data->max_y - 1) / data->x_size_mm;
+
+		input_abs_set_res(input, ABS_MT_POSITION_X, res_x);
+		input_abs_set_res(input, ABS_MT_POSITION_Y, res_y);
+	}
+
+	input_set_abs_params(input, ABS_MT_ORIENTATION, 0, 1, 0, 0);
+	input_set_abs_params(input, ABS_MT_PRESSURE, 0, 0xff, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MAJOR, 0, 0x0f, 0, 0);
+	input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, 0x0f, 0, 0);
+
+	input_mt_init_slots(input, data->max_fingers, INPUT_MT_POINTER);
+
+	if (data->button_count) {
+		__set_bit(EV_KEY, input->evbit);
+		for (i = 0; i < data->button_count; i++)
+			__set_bit(BTN_LEFT + i, input->keybit);
+
+		if (data->button_count == 1)
+			__set_bit(INPUT_PROP_BUTTONPAD, input->propbit);
+	}
+
+	set_bit(RMI_STARTED, &data->flags);
+
+exit:
+	hid_device_io_stop(hdev);
+	hid_hw_close(hdev);
+}
+
+static int rmi_input_mapping(struct hid_device *hdev,
+		struct hid_input *hi, struct hid_field *field,
+		struct hid_usage *usage, unsigned long **bit, int *max)
+{
+	/* we want to make HID ignore the advertised HID collection */
+	return -1;
+}
+
+static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+	struct rmi_data *data = NULL;
+	int ret;
+	size_t alloc_size;
+
+	data = devm_kzalloc(&hdev->dev, sizeof(struct rmi_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	INIT_WORK(&data->reset_work, rmi_reset_work);
+	data->hdev = hdev;
+
+	hid_set_drvdata(hdev, data);
+
+	hdev->quirks |= HID_QUIRK_NO_INIT_REPORTS;
+
+	ret = hid_parse(hdev);
+	if (ret) {
+		hid_err(hdev, "parse failed\n");
+		return ret;
+	}
+
+	data->input_report_size = (hdev->report_enum[HID_INPUT_REPORT]
+		.report_id_hash[RMI_ATTN_REPORT_ID]->size >> 3)
+		+ 1 /* report id */;
+	data->output_report_size = (hdev->report_enum[HID_OUTPUT_REPORT]
+		.report_id_hash[RMI_WRITE_REPORT_ID]->size >> 3)
+		+ 1 /* report id */;
+
+	alloc_size = data->output_report_size + data->input_report_size;
+
+	data->writeReport = devm_kzalloc(&hdev->dev, alloc_size, GFP_KERNEL);
+	if (!data->writeReport) {
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	data->readReport = data->writeReport + data->output_report_size;
+
+	init_waitqueue_head(&data->wait);
+
+	mutex_init(&data->page_mutex);
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+	if (ret) {
+		hid_err(hdev, "hw start failed\n");
+		return ret;
+	}
+
+	if (!test_bit(RMI_STARTED, &data->flags)) {
+		hid_hw_stop(hdev);
+		return -EIO;
+	}
+
+	hid_hw_stop(hdev);
+	return 0;
+}
+
+static void rmi_remove(struct hid_device *hdev)
+{
+	struct rmi_data *hdata = hid_get_drvdata(hdev);
+
+	clear_bit(RMI_STARTED, &hdata->flags);
+
+	hid_hw_stop(hdev);
+}
+
+static const struct hid_device_id rmi_id[] = {
+	{ HID_I2C_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, rmi_id);
+
+static struct hid_driver rmi_driver = {
+	.name = "hid-rmi",
+	.id_table		= rmi_id,
+	.probe			= rmi_probe,
+	.remove			= rmi_remove,
+	.raw_event		= rmi_raw_event,
+	.input_mapping		= rmi_input_mapping,
+	.input_configured	= rmi_input_configured,
+#ifdef CONFIG_PM
+	.resume			= rmi_post_resume,
+	.reset_resume		= rmi_post_reset,
+#endif
+};
+
+module_hid_driver(rmi_driver);
+
+MODULE_AUTHOR("Andrew Duggan <aduggan@synaptics.com>");
+MODULE_DESCRIPTION("RMI HID driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 720e3a10608c..54f855b2c902 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -570,6 +570,8 @@ struct hid_descriptor {
 	.bus = BUS_USB, .vendor = (ven), .product = (prod)
 #define HID_BLUETOOTH_DEVICE(ven, prod)					\
 	.bus = BUS_BLUETOOTH, .vendor = (ven), .product = (prod)
+#define HID_I2C_DEVICE(ven, prod)				\
+	.bus = BUS_I2C, .vendor = (ven), .product = (prod)
 
 #define HID_REPORT_ID(rep) \
 	.report_type = (rep)
-- 
cgit 


From 59c3d45e487315e6e05a3f2310b61109f8e503e7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 8 Apr 2014 09:15:35 -0600
Subject: block: remove 'q' parameter from kblockd_schedule_*_work()

The queue parameter is never used, just get rid of it.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c        | 6 +++---
 block/blk-flush.c       | 2 +-
 block/blk-mq.c          | 7 ++-----
 block/cfq-iosched.c     | 2 +-
 drivers/scsi/scsi_lib.c | 2 +-
 include/linux/blkdev.h  | 4 ++--
 6 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 34d7c196338b..f7d2c3335dfa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2904,14 +2904,14 @@ free_and_out:
 }
 EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
 
-int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
+int kblockd_schedule_work(struct work_struct *work)
 {
 	return queue_work(kblockd_workqueue, work);
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
-int kblockd_schedule_delayed_work(struct request_queue *q,
-			struct delayed_work *dwork, unsigned long delay)
+int kblockd_schedule_delayed_work(struct delayed_work *dwork,
+				  unsigned long delay)
 {
 	return queue_delayed_work(kblockd_workqueue, dwork, delay);
 }
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 43e6b4755e9a..77f20458910c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -144,7 +144,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
 {
 	if (rq->q->mq_ops) {
 		INIT_WORK(&rq->mq_flush_work, mq_flush_run);
-		kblockd_schedule_work(rq->q, &rq->mq_flush_work);
+		kblockd_schedule_work(&rq->mq_flush_work);
 		return false;
 	} else {
 		if (add_front)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1d2a9bdbee57..9c8f1f4ada7f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -608,11 +608,8 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 
 	if (!async)
 		__blk_mq_run_hw_queue(hctx);
-	else {
-		struct request_queue *q = hctx->queue;
-
-		kblockd_schedule_delayed_work(q, &hctx->delayed_work, 0);
-	}
+	else
+		kblockd_schedule_delayed_work(&hctx->delayed_work, 0);
 }
 
 void blk_mq_run_queues(struct request_queue *q, bool async)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e0985f1955e7..5063a0bd831a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -908,7 +908,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
+		kblockd_schedule_work(&cfqd->unplug_work);
 	}
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5681c05ac506..91f99f4ce2e8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -139,7 +139,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy)
 	 */
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_requeue_request(q, cmd->request);
-	kblockd_schedule_work(q, &device->requeue_work);
+	kblockd_schedule_work(&device->requeue_work);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1e1fa3f93d5f..2425945d36ab 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1359,8 +1359,8 @@ static inline void put_dev_sector(Sector p)
 }
 
 struct work_struct;
-int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
+int kblockd_schedule_work(struct work_struct *work);
+int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
 
 #ifdef CONFIG_BLK_CGROUP
 /*
-- 
cgit 


From 8ab14595b6dffecea264dcca2d6d9eea7c59273a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 8 Apr 2014 09:17:40 -0600
Subject: block: add kblockd_schedule_delayed_work_on()

Same function as kblockd_schedule_delayed_work(), but allow the
caller to pass in a CPU that the work should be executed on. This
just directly extends and maps into the workqueue API, and will
be used to make the blk-mq mappings more strict.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 7 +++++++
 include/linux/blkdev.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index f7d2c3335dfa..7af4a4898dcb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2917,6 +2917,13 @@ int kblockd_schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(kblockd_schedule_delayed_work);
 
+int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+				     unsigned long delay)
+{
+	return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
+
 #define PLUG_MAGIC	0x91827364
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2425945d36ab..5a31307c5ded 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1361,6 +1361,7 @@ static inline void put_dev_sector(Sector p)
 struct work_struct;
 int kblockd_schedule_work(struct work_struct *work);
 int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
+int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
 
 #ifdef CONFIG_BLK_CGROUP
 /*
-- 
cgit 


From e4043dcf30811f5db15181168e2aac172514302a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 9 Apr 2014 10:18:23 -0600
Subject: blk-mq: ensure that hardware queues are always run on the mapped CPUs

Instead of providing soft mappings with no guarantees on hardware
queues always being run on the right CPU, switch to a hard mapping
guarantee that ensure that we always run the hardware queue on
(one of, if more) the mapped CPU.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 66 ++++++++++++++++++++++++++++++++++++++------------
 include/linux/blk-mq.h |  1 +
 2 files changed, 52 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9c8f1f4ada7f..5455ed19de1c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -209,11 +209,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 			break;
 		}
 
-		blk_mq_put_ctx(ctx);
-		if (!(gfp & __GFP_WAIT))
+		if (gfp & __GFP_WAIT) {
+			__blk_mq_run_hw_queue(hctx);
+			blk_mq_put_ctx(ctx);
+		} else {
+			blk_mq_put_ctx(ctx);
 			break;
+		}
 
-		__blk_mq_run_hw_queue(hctx);
 		blk_mq_wait_for_tags(hctx->tags);
 	} while (1);
 
@@ -514,6 +517,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	LIST_HEAD(rq_list);
 	int bit, queued;
 
+	WARN_ON(!preempt_count());
+
 	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
 		return;
 
@@ -606,10 +611,22 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
 		return;
 
-	if (!async)
+	if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
 		__blk_mq_run_hw_queue(hctx);
-	else
+	else if (hctx->queue->nr_hw_queues == 1)
 		kblockd_schedule_delayed_work(&hctx->delayed_work, 0);
+	else {
+		unsigned int cpu;
+
+		/*
+		 * It'd be great if the workqueue API had a way to pass
+		 * in a mask and had some smarts for more clever placement
+		 * than the first CPU. Or we could round-robin here. For now,
+		 * just queue on the first CPU.
+		 */
+		cpu = cpumask_first(hctx->cpumask);
+		kblockd_schedule_delayed_work_on(cpu, &hctx->delayed_work, 0);
+	}
 }
 
 void blk_mq_run_queues(struct request_queue *q, bool async)
@@ -623,7 +640,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
 		    test_bit(BLK_MQ_S_STOPPED, &hctx->state))
 			continue;
 
+		preempt_disable();
 		blk_mq_run_hw_queue(hctx, async);
+		preempt_enable();
 	}
 }
 EXPORT_SYMBOL(blk_mq_run_queues);
@@ -648,7 +667,10 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queues);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+
+	preempt_disable();
 	__blk_mq_run_hw_queue(hctx);
+	preempt_enable();
 }
 EXPORT_SYMBOL(blk_mq_start_hw_queue);
 
@@ -662,7 +684,9 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q)
 			continue;
 
 		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
+		preempt_disable();
 		blk_mq_run_hw_queue(hctx, true);
+		preempt_enable();
 	}
 }
 EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -672,7 +696,10 @@ static void blk_mq_work_fn(struct work_struct *work)
 	struct blk_mq_hw_ctx *hctx;
 
 	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work);
+
+	preempt_disable();
 	__blk_mq_run_hw_queue(hctx);
+	preempt_enable();
 }
 
 static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
@@ -716,10 +743,10 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
 		spin_unlock(&ctx->lock);
 	}
 
-	blk_mq_put_ctx(current_ctx);
-
 	if (run_queue)
 		blk_mq_run_hw_queue(hctx, async);
+
+	blk_mq_put_ctx(current_ctx);
 }
 
 static void blk_mq_insert_requests(struct request_queue *q,
@@ -755,9 +782,8 @@ static void blk_mq_insert_requests(struct request_queue *q,
 	}
 	spin_unlock(&ctx->lock);
 
-	blk_mq_put_ctx(current_ctx);
-
 	blk_mq_run_hw_queue(hctx, from_schedule);
+	blk_mq_put_ctx(current_ctx);
 }
 
 static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
@@ -876,7 +902,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	if (unlikely(is_flush_fua)) {
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_put_ctx(ctx);
 		blk_insert_flush(rq);
 		goto run_queue;
 	}
@@ -914,7 +939,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	spin_unlock(&ctx->lock);
-	blk_mq_put_ctx(ctx);
 
 	/*
 	 * For a SYNC request, send it to the hardware immediately. For an
@@ -923,6 +947,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	 */
 run_queue:
 	blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua);
+	blk_mq_put_ctx(ctx);
 }
 
 /*
@@ -990,9 +1015,9 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	blk_mq_hctx_mark_pending(hctx, ctx);
 
 	spin_unlock(&ctx->lock);
-	blk_mq_put_ctx(ctx);
 
 	blk_mq_run_hw_queue(hctx, true);
+	blk_mq_put_ctx(ctx);
 }
 
 static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
@@ -1255,12 +1280,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
 		__ctx->queue = q;
 
 		/* If the cpu isn't online, the cpu is mapped to first hctx */
-		hctx = q->mq_ops->map_queue(q, i);
-		hctx->nr_ctx++;
-
 		if (!cpu_online(i))
 			continue;
 
+		hctx = q->mq_ops->map_queue(q, i);
+		cpumask_set_cpu(i, hctx->cpumask);
+		hctx->nr_ctx++;
+
 		/*
 		 * Set local node, IFF we have more than one hw queue. If
 		 * not, we remain on the home node of the device
@@ -1277,6 +1303,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	struct blk_mq_ctx *ctx;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
+		cpumask_clear(hctx->cpumask);
 		hctx->nr_ctx = 0;
 	}
 
@@ -1285,7 +1312,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	 */
 	queue_for_each_ctx(q, ctx, i) {
 		/* If the cpu isn't online, the cpu is mapped to first hctx */
+		if (!cpu_online(i))
+			continue;
+
 		hctx = q->mq_ops->map_queue(q, i);
+		cpumask_set_cpu(i, hctx->cpumask);
 		ctx->index_hw = hctx->nr_ctx;
 		hctx->ctxs[hctx->nr_ctx++] = ctx;
 	}
@@ -1329,6 +1360,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
 		if (!hctxs[i])
 			goto err_hctxs;
 
+		if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
+			goto err_hctxs;
+
 		hctxs[i]->numa_node = NUMA_NO_NODE;
 		hctxs[i]->queue_num = i;
 	}
@@ -1392,6 +1426,7 @@ err_hctxs:
 	for (i = 0; i < reg->nr_hw_queues; i++) {
 		if (!hctxs[i])
 			break;
+		free_cpumask_var(hctxs[i]->cpumask);
 		reg->ops->free_hctx(hctxs[i], i);
 	}
 	kfree(hctxs);
@@ -1413,6 +1448,7 @@ void blk_mq_free_queue(struct request_queue *q)
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
 		if (q->mq_ops->exit_hctx)
 			q->mq_ops->exit_hctx(hctx, i);
+		free_cpumask_var(hctx->cpumask);
 		q->mq_ops->free_hctx(hctx, i);
 	}
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0120451545d8..b6ee48740458 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -19,6 +19,7 @@ struct blk_mq_hw_ctx {
 
 	unsigned long		state;		/* BLK_MQ_S_* flags */
 	struct delayed_work	delayed_work;
+	cpumask_var_t		cpumask;
 
 	unsigned long		flags;		/* BLK_MQ_F_* flags */
 
-- 
cgit 


From 848ef58695d8c013f24633352586279cfb40e9d9 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Tue, 1 Apr 2014 17:02:53 +0300
Subject: net: rfkill: gpio: remove unused and obsolete platform parameters

After upgrading to descriptor based gpios, the gpio numbers
are not used anymore. The power_clk_name and the platform
specific setup and close hooks are not used by anybody, and
we should not encourage use of such things, so removing them.

Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/rfkill-gpio.h | 10 ----------
 net/rfkill/rfkill-gpio.c    | 15 +--------------
 2 files changed, 1 insertion(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rfkill-gpio.h b/include/linux/rfkill-gpio.h
index 4d09f6eab359..20bcb55498cd 100644
--- a/include/linux/rfkill-gpio.h
+++ b/include/linux/rfkill-gpio.h
@@ -27,21 +27,11 @@
  * struct rfkill_gpio_platform_data - platform data for rfkill gpio device.
  * for unused gpio's, the expected value is -1.
  * @name:		name for the gpio rf kill instance
- * @reset_gpio:		GPIO which is used for reseting rfkill switch
- * @shutdown_gpio:	GPIO which is used for shutdown of rfkill switch
- * @power_clk_name:	[optional] name of clk to turn off while blocked
- * @gpio_runtime_close:	clean up platform specific gpio configuration
- * @gpio_runtime_setup:	set up platform specific gpio configuration
  */
 
 struct rfkill_gpio_platform_data {
 	char			*name;
-	int			reset_gpio;
-	int			shutdown_gpio;
-	const char		*power_clk_name;
 	enum rfkill_type	type;
-	void	(*gpio_runtime_close)(struct platform_device *);
-	int	(*gpio_runtime_setup)(struct platform_device *);
 };
 
 #endif /* __RFKILL_GPIO_H */
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index bd2a5b90400c..0adda445dfe7 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -87,7 +87,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 {
 	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 	struct rfkill_gpio_data *rfkill;
-	const char *clk_name = NULL;
 	struct gpio_desc *gpio;
 	int ret;
 	int len;
@@ -101,7 +100,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 		if (ret)
 			return ret;
 	} else if (pdata) {
-		clk_name = pdata->power_clk_name;
 		rfkill->name = pdata->name;
 		rfkill->type = pdata->type;
 	} else {
@@ -120,7 +118,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 	snprintf(rfkill->reset_name, len + 6 , "%s_reset", rfkill->name);
 	snprintf(rfkill->shutdown_name, len + 9, "%s_shutdown", rfkill->name);
 
-	rfkill->clk = devm_clk_get(&pdev->dev, clk_name);
+	rfkill->clk = devm_clk_get(&pdev->dev, NULL);
 
 	gpio = devm_gpiod_get_index(&pdev->dev, rfkill->reset_name, 0);
 	if (!IS_ERR(gpio)) {
@@ -146,14 +144,6 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (pdata && pdata->gpio_runtime_setup) {
-		ret = pdata->gpio_runtime_setup(pdev);
-		if (ret) {
-			dev_err(&pdev->dev, "can't set up gpio\n");
-			return ret;
-		}
-	}
-
 	rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
 					  rfkill->type, &rfkill_gpio_ops,
 					  rfkill);
@@ -174,10 +164,7 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
 static int rfkill_gpio_remove(struct platform_device *pdev)
 {
 	struct rfkill_gpio_data *rfkill = platform_get_drvdata(pdev);
-	struct rfkill_gpio_platform_data *pdata = pdev->dev.platform_data;
 
-	if (pdata && pdata->gpio_runtime_close)
-		pdata->gpio_runtime_close(pdev);
 	rfkill_unregister(rfkill->rfkill_dev);
 	rfkill_destroy(rfkill->rfkill_dev);
 
-- 
cgit 


From 766e3721990d2c78e0d614b57753f105adbaa8c5 Mon Sep 17 00:00:00 2001
From: Scott Jiang <scott.jiang.linux@gmail.com>
Date: Fri, 4 Apr 2014 16:27:17 +0800
Subject: spi: convert spi-bfin-v3.c to a multiplatform driver

Spi v3 controller is not only used on Blackfin. So rename it
and use ioread/iowrite api to make it work on other platform.

Signed-off-by: Scott Jiang <scott.jiang.linux@gmail.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 arch/blackfin/include/asm/bfin_spi3.h   | 258 ---------
 arch/blackfin/mach-bf609/boards/ezkit.c |  22 +-
 drivers/spi/Kconfig                     |   4 +-
 drivers/spi/Makefile                    |   2 +-
 drivers/spi/spi-adi-v3.c                | 985 ++++++++++++++++++++++++++++++++
 drivers/spi/spi-bfin-v3.c               | 965 -------------------------------
 include/linux/spi/adi_spi3.h            | 254 ++++++++
 7 files changed, 1253 insertions(+), 1237 deletions(-)
 delete mode 100644 arch/blackfin/include/asm/bfin_spi3.h
 create mode 100644 drivers/spi/spi-adi-v3.c
 delete mode 100644 drivers/spi/spi-bfin-v3.c
 create mode 100644 include/linux/spi/adi_spi3.h

(limited to 'include/linux')

diff --git a/arch/blackfin/include/asm/bfin_spi3.h b/arch/blackfin/include/asm/bfin_spi3.h
deleted file mode 100644
index 0957e65a54be..000000000000
--- a/arch/blackfin/include/asm/bfin_spi3.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Analog Devices SPI3 controller driver
- *
- * Copyright (c) 2011 Analog Devices Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef _SPI_CHANNEL_H_
-#define _SPI_CHANNEL_H_
-
-#include <linux/types.h>
-
-/* SPI_CONTROL */
-#define SPI_CTL_EN                  0x00000001    /* Enable */
-#define SPI_CTL_MSTR                0x00000002    /* Master/Slave */
-#define SPI_CTL_PSSE                0x00000004    /* controls modf error in master mode */
-#define SPI_CTL_ODM                 0x00000008    /* Open Drain Mode */
-#define SPI_CTL_CPHA                0x00000010    /* Clock Phase */
-#define SPI_CTL_CPOL                0x00000020    /* Clock Polarity */
-#define SPI_CTL_ASSEL               0x00000040    /* Slave Select Pin Control */
-#define SPI_CTL_SELST               0x00000080    /* Slave Select Polarity in-between transfers */
-#define SPI_CTL_EMISO               0x00000100    /* Enable MISO */
-#define SPI_CTL_SIZE                0x00000600    /* Word Transfer Size */
-#define SPI_CTL_SIZE08              0x00000000    /* SIZE: 8 bits */
-#define SPI_CTL_SIZE16              0x00000200    /* SIZE: 16 bits */
-#define SPI_CTL_SIZE32              0x00000400    /* SIZE: 32 bits */
-#define SPI_CTL_LSBF                0x00001000    /* LSB First */
-#define SPI_CTL_FCEN                0x00002000    /* Flow-Control Enable */
-#define SPI_CTL_FCCH                0x00004000    /* Flow-Control Channel Selection */
-#define SPI_CTL_FCPL                0x00008000    /* Flow-Control Polarity */
-#define SPI_CTL_FCWM                0x00030000    /* Flow-Control Water-Mark */
-#define SPI_CTL_FIFO0               0x00000000    /* FCWM: TFIFO empty or RFIFO Full */
-#define SPI_CTL_FIFO1               0x00010000    /* FCWM: TFIFO 75% or more empty or RFIFO 75% or more full */
-#define SPI_CTL_FIFO2               0x00020000    /* FCWM: TFIFO 50% or more empty or RFIFO 50% or more full */
-#define SPI_CTL_FMODE               0x00040000    /* Fast-mode Enable */
-#define SPI_CTL_MIOM                0x00300000    /* Multiple I/O Mode */
-#define SPI_CTL_MIO_DIS             0x00000000    /* MIOM: Disable */
-#define SPI_CTL_MIO_DUAL            0x00100000    /* MIOM: Enable DIOM (Dual I/O Mode) */
-#define SPI_CTL_MIO_QUAD            0x00200000    /* MIOM: Enable QUAD (Quad SPI Mode) */
-#define SPI_CTL_SOSI                0x00400000    /* Start on MOSI */
-/* SPI_RX_CONTROL */
-#define SPI_RXCTL_REN               0x00000001    /* Receive Channel Enable */
-#define SPI_RXCTL_RTI               0x00000004    /* Receive Transfer Initiate */
-#define SPI_RXCTL_RWCEN             0x00000008    /* Receive Word Counter Enable */
-#define SPI_RXCTL_RDR               0x00000070    /* Receive Data Request */
-#define SPI_RXCTL_RDR_DIS           0x00000000    /* RDR: Disabled */
-#define SPI_RXCTL_RDR_NE            0x00000010    /* RDR: RFIFO not empty */
-#define SPI_RXCTL_RDR_25            0x00000020    /* RDR: RFIFO 25% full */
-#define SPI_RXCTL_RDR_50            0x00000030    /* RDR: RFIFO 50% full */
-#define SPI_RXCTL_RDR_75            0x00000040    /* RDR: RFIFO 75% full */
-#define SPI_RXCTL_RDR_FULL          0x00000050    /* RDR: RFIFO full */
-#define SPI_RXCTL_RDO               0x00000100    /* Receive Data Over-Run */
-#define SPI_RXCTL_RRWM              0x00003000    /* FIFO Regular Water-Mark */
-#define SPI_RXCTL_RWM_0             0x00000000    /* RRWM: RFIFO Empty */
-#define SPI_RXCTL_RWM_25            0x00001000    /* RRWM: RFIFO 25% full */
-#define SPI_RXCTL_RWM_50            0x00002000    /* RRWM: RFIFO 50% full */
-#define SPI_RXCTL_RWM_75            0x00003000    /* RRWM: RFIFO 75% full */
-#define SPI_RXCTL_RUWM              0x00070000    /* FIFO Urgent Water-Mark */
-#define SPI_RXCTL_UWM_DIS           0x00000000    /* RUWM: Disabled */
-#define SPI_RXCTL_UWM_25            0x00010000    /* RUWM: RFIFO 25% full */
-#define SPI_RXCTL_UWM_50            0x00020000    /* RUWM: RFIFO 50% full */
-#define SPI_RXCTL_UWM_75            0x00030000    /* RUWM: RFIFO 75% full */
-#define SPI_RXCTL_UWM_FULL          0x00040000    /* RUWM: RFIFO full */
-/* SPI_TX_CONTROL */
-#define SPI_TXCTL_TEN               0x00000001    /* Transmit Channel Enable */
-#define SPI_TXCTL_TTI               0x00000004    /* Transmit Transfer Initiate */
-#define SPI_TXCTL_TWCEN             0x00000008    /* Transmit Word Counter Enable */
-#define SPI_TXCTL_TDR               0x00000070    /* Transmit Data Request */
-#define SPI_TXCTL_TDR_DIS           0x00000000    /* TDR: Disabled */
-#define SPI_TXCTL_TDR_NF            0x00000010    /* TDR: TFIFO not full */
-#define SPI_TXCTL_TDR_25            0x00000020    /* TDR: TFIFO 25% empty */
-#define SPI_TXCTL_TDR_50            0x00000030    /* TDR: TFIFO 50% empty */
-#define SPI_TXCTL_TDR_75            0x00000040    /* TDR: TFIFO 75% empty */
-#define SPI_TXCTL_TDR_EMPTY         0x00000050    /* TDR: TFIFO empty */
-#define SPI_TXCTL_TDU               0x00000100    /* Transmit Data Under-Run */
-#define SPI_TXCTL_TRWM              0x00003000    /* FIFO Regular Water-Mark */
-#define SPI_TXCTL_RWM_FULL          0x00000000    /* TRWM: TFIFO full */
-#define SPI_TXCTL_RWM_25            0x00001000    /* TRWM: TFIFO 25% empty */
-#define SPI_TXCTL_RWM_50            0x00002000    /* TRWM: TFIFO 50% empty */
-#define SPI_TXCTL_RWM_75            0x00003000    /* TRWM: TFIFO 75% empty */
-#define SPI_TXCTL_TUWM              0x00070000    /* FIFO Urgent Water-Mark */
-#define SPI_TXCTL_UWM_DIS           0x00000000    /* TUWM: Disabled */
-#define SPI_TXCTL_UWM_25            0x00010000    /* TUWM: TFIFO 25% empty */
-#define SPI_TXCTL_UWM_50            0x00020000    /* TUWM: TFIFO 50% empty */
-#define SPI_TXCTL_UWM_75            0x00030000    /* TUWM: TFIFO 75% empty */
-#define SPI_TXCTL_UWM_EMPTY         0x00040000    /* TUWM: TFIFO empty */
-/* SPI_CLOCK */
-#define SPI_CLK_BAUD                0x0000FFFF    /* Baud Rate */
-/* SPI_DELAY */
-#define SPI_DLY_STOP                0x000000FF    /* Transfer delay time in multiples of SCK period */
-#define SPI_DLY_LEADX               0x00000100    /* Extended (1 SCK) LEAD Control */
-#define SPI_DLY_LAGX                0x00000200    /* Extended (1 SCK) LAG control */
-/* SPI_SSEL */
-#define SPI_SLVSEL_SSE1             0x00000002    /* SPISSEL1 Enable */
-#define SPI_SLVSEL_SSE2             0x00000004    /* SPISSEL2 Enable */
-#define SPI_SLVSEL_SSE3             0x00000008    /* SPISSEL3 Enable */
-#define SPI_SLVSEL_SSE4             0x00000010    /* SPISSEL4 Enable */
-#define SPI_SLVSEL_SSE5             0x00000020    /* SPISSEL5 Enable */
-#define SPI_SLVSEL_SSE6             0x00000040    /* SPISSEL6 Enable */
-#define SPI_SLVSEL_SSE7             0x00000080    /* SPISSEL7 Enable */
-#define SPI_SLVSEL_SSEL1            0x00000200    /* SPISSEL1 Value */
-#define SPI_SLVSEL_SSEL2            0x00000400    /* SPISSEL2 Value */
-#define SPI_SLVSEL_SSEL3            0x00000800    /* SPISSEL3 Value */
-#define SPI_SLVSEL_SSEL4            0x00001000    /* SPISSEL4 Value */
-#define SPI_SLVSEL_SSEL5            0x00002000    /* SPISSEL5 Value */
-#define SPI_SLVSEL_SSEL6            0x00004000    /* SPISSEL6 Value */
-#define SPI_SLVSEL_SSEL7            0x00008000    /* SPISSEL7 Value */
-/* SPI_RWC */
-#define SPI_RWC_VALUE               0x0000FFFF    /* Received Word-Count */
-/* SPI_RWCR */
-#define SPI_RWCR_VALUE              0x0000FFFF    /* Received Word-Count Reload */
-/* SPI_TWC */
-#define SPI_TWC_VALUE               0x0000FFFF    /* Transmitted Word-Count */
-/* SPI_TWCR */
-#define SPI_TWCR_VALUE              0x0000FFFF    /* Transmitted Word-Count Reload */
-/* SPI_IMASK */
-#define SPI_IMSK_RUWM               0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_TUWM               0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_ROM                0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_TUM                0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_TCM                0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_MFM                0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_RSM                0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_TSM                0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_RFM                0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_TFM                0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_IMASKCL */
-#define SPI_IMSK_CLR_RUW            0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_CLR_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_CLR_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_CLR_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_CLR_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_CLR_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_CLR_RSM            0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_CLR_TSM            0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_CLR_RFM            0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_CLR_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_IMASKST */
-#define SPI_IMSK_SET_RUWM           0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_SET_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
-#define SPI_IMSK_SET_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
-#define SPI_IMSK_SET_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
-#define SPI_IMSK_SET_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
-#define SPI_IMSK_SET_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
-#define SPI_IMSK_SET_RSM            0x00000100    /* Receive Start Interrupt Mask */
-#define SPI_IMSK_SET_TSM            0x00000200    /* Transmit Start Interrupt Mask */
-#define SPI_IMSK_SET_RFM            0x00000400    /* Receive Finish Interrupt Mask */
-#define SPI_IMSK_SET_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
-/* SPI_STATUS */
-#define SPI_STAT_SPIF               0x00000001    /* SPI Finished */
-#define SPI_STAT_RUWM               0x00000002    /* Receive Urgent Water-Mark Breached */
-#define SPI_STAT_TUWM               0x00000004    /* Transmit Urgent Water-Mark Breached */
-#define SPI_STAT_ROE                0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_STAT_TUE                0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_STAT_TCE                0x00000040    /* Transmit Collision Error Indication */
-#define SPI_STAT_MODF               0x00000080    /* Mode Fault Error Indication */
-#define SPI_STAT_RS                 0x00000100    /* Receive Start Indication */
-#define SPI_STAT_TS                 0x00000200    /* Transmit Start Indication */
-#define SPI_STAT_RF                 0x00000400    /* Receive Finish Indication */
-#define SPI_STAT_TF                 0x00000800    /* Transmit Finish Indication */
-#define SPI_STAT_RFS                0x00007000    /* SPI_RFIFO status */
-#define SPI_STAT_RFIFO_EMPTY        0x00000000    /* RFS: RFIFO Empty */
-#define SPI_STAT_RFIFO_25           0x00001000    /* RFS: RFIFO 25% Full */
-#define SPI_STAT_RFIFO_50           0x00002000    /* RFS: RFIFO 50% Full */
-#define SPI_STAT_RFIFO_75           0x00003000    /* RFS: RFIFO 75% Full */
-#define SPI_STAT_RFIFO_FULL         0x00004000    /* RFS: RFIFO Full */
-#define SPI_STAT_TFS                0x00070000    /* SPI_TFIFO status */
-#define SPI_STAT_TFIFO_FULL         0x00000000    /* TFS: TFIFO full */
-#define SPI_STAT_TFIFO_25           0x00010000    /* TFS: TFIFO 25% empty */
-#define SPI_STAT_TFIFO_50           0x00020000    /* TFS: TFIFO 50% empty */
-#define SPI_STAT_TFIFO_75           0x00030000    /* TFS: TFIFO 75% empty */
-#define SPI_STAT_TFIFO_EMPTY        0x00040000    /* TFS: TFIFO empty */
-#define SPI_STAT_FCS                0x00100000    /* Flow-Control Stall Indication */
-#define SPI_STAT_RFE                0x00400000    /* SPI_RFIFO Empty */
-#define SPI_STAT_TFF                0x00800000    /* SPI_TFIFO Full */
-/* SPI_ILAT */
-#define SPI_ILAT_RUWMI              0x00000002    /* Receive Urgent Water Mark Interrupt */
-#define SPI_ILAT_TUWMI              0x00000004    /* Transmit Urgent Water Mark Interrupt */
-#define SPI_ILAT_ROI                0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_ILAT_TUI                0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_ILAT_TCI                0x00000040    /* Transmit Collision Error Indication */
-#define SPI_ILAT_MFI                0x00000080    /* Mode Fault Error Indication */
-#define SPI_ILAT_RSI                0x00000100    /* Receive Start Indication */
-#define SPI_ILAT_TSI                0x00000200    /* Transmit Start Indication */
-#define SPI_ILAT_RFI                0x00000400    /* Receive Finish Indication */
-#define SPI_ILAT_TFI                0x00000800    /* Transmit Finish Indication */
-/* SPI_ILATCL */
-#define SPI_ILAT_CLR_RUWMI          0x00000002    /* Receive Urgent Water Mark Interrupt */
-#define SPI_ILAT_CLR_TUWMI          0x00000004    /* Transmit Urgent Water Mark Interrupt */
-#define SPI_ILAT_CLR_ROI            0x00000010    /* Receive Over-Run Error Indication */
-#define SPI_ILAT_CLR_TUI            0x00000020    /* Transmit Under-Run Error Indication */
-#define SPI_ILAT_CLR_TCI            0x00000040    /* Transmit Collision Error Indication */
-#define SPI_ILAT_CLR_MFI            0x00000080    /* Mode Fault Error Indication */
-#define SPI_ILAT_CLR_RSI            0x00000100    /* Receive Start Indication */
-#define SPI_ILAT_CLR_TSI            0x00000200    /* Transmit Start Indication */
-#define SPI_ILAT_CLR_RFI            0x00000400    /* Receive Finish Indication */
-#define SPI_ILAT_CLR_TFI            0x00000800    /* Transmit Finish Indication */
-
-/*
- * bfin spi3 registers layout
- */
-struct bfin_spi_regs {
-	u32 revid;
-	u32 control;
-	u32 rx_control;
-	u32 tx_control;
-	u32 clock;
-	u32 delay;
-	u32 ssel;
-	u32 rwc;
-	u32 rwcr;
-	u32 twc;
-	u32 twcr;
-	u32 reserved0;
-	u32 emask;
-	u32 emaskcl;
-	u32 emaskst;
-	u32 reserved1;
-	u32 status;
-	u32 elat;
-	u32 elatcl;
-	u32 reserved2;
-	u32 rfifo;
-	u32 reserved3;
-	u32 tfifo;
-};
-
-#define MAX_CTRL_CS          8  /* cs in spi controller */
-
-/* device.platform_data for SSP controller devices */
-struct bfin_spi3_master {
-	u16 num_chipselect;
-	u16 pin_req[7];
-};
-
-/* spi_board_info.controller_data for SPI slave devices,
- * copied to spi_device.platform_data ... mostly for dma tuning
- */
-struct bfin_spi3_chip {
-	u32 control;
-	u16 cs_chg_udelay; /* Some devices require 16-bit delays */
-	u32 tx_dummy_val; /* tx value for rx only transfer */
-	bool enable_dma;
-};
-
-#endif /* _SPI_CHANNEL_H_ */
diff --git a/arch/blackfin/mach-bf609/boards/ezkit.c b/arch/blackfin/mach-bf609/boards/ezkit.c
index 943f7e95ec15..1ba4600de69f 100644
--- a/arch/blackfin/mach-bf609/boards/ezkit.c
+++ b/arch/blackfin/mach-bf609/boards/ezkit.c
@@ -20,7 +20,7 @@
 #include <linux/pinctrl/machine.h>
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/platform_data/pinctrl-adi2.h>
-#include <asm/bfin_spi3.h>
+#include <linux/spi/adi_spi3.h>
 #include <asm/dma.h>
 #include <asm/gpio.h>
 #include <asm/nand.h>
@@ -767,13 +767,13 @@ static struct flash_platform_data bfin_spi_flash_data = {
 	.type = "w25q32",
 };
 
-static struct bfin_spi3_chip spi_flash_chip_info = {
+static struct adi_spi3_chip spi_flash_chip_info = {
 	.enable_dma = true,         /* use dma transfer with this chip*/
 };
 #endif
 
 #if IS_ENABLED(CONFIG_SPI_SPIDEV)
-static struct bfin_spi3_chip spidev_chip_info = {
+static struct adi_spi3_chip spidev_chip_info = {
 	.enable_dma = true,
 };
 #endif
@@ -1736,7 +1736,7 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
 	},
 #endif
 };
-#if IS_ENABLED(CONFIG_SPI_BFIN_V3)
+#if IS_ENABLED(CONFIG_SPI_ADI_V3)
 /* SPI (0) */
 static struct resource bfin_spi0_resource[] = {
 	{
@@ -1777,13 +1777,13 @@ static struct resource bfin_spi1_resource[] = {
 };
 
 /* SPI controller data */
-static struct bfin_spi3_master bf60x_spi_master_info0 = {
+static struct adi_spi3_master bf60x_spi_master_info0 = {
 	.num_chipselect = MAX_CTRL_CS + MAX_BLACKFIN_GPIOS,
 	.pin_req = {P_SPI0_SCK, P_SPI0_MISO, P_SPI0_MOSI, 0},
 };
 
 static struct platform_device bf60x_spi_master0 = {
-	.name = "bfin-spi3",
+	.name = "adi-spi3",
 	.id = 0, /* Bus number */
 	.num_resources = ARRAY_SIZE(bfin_spi0_resource),
 	.resource = bfin_spi0_resource,
@@ -1792,13 +1792,13 @@ static struct platform_device bf60x_spi_master0 = {
 	},
 };
 
-static struct bfin_spi3_master bf60x_spi_master_info1 = {
+static struct adi_spi3_master bf60x_spi_master_info1 = {
 	.num_chipselect = MAX_CTRL_CS + MAX_BLACKFIN_GPIOS,
 	.pin_req = {P_SPI1_SCK, P_SPI1_MISO, P_SPI1_MOSI, 0},
 };
 
 static struct platform_device bf60x_spi_master1 = {
-	.name = "bfin-spi3",
+	.name = "adi-spi3",
 	.id = 1, /* Bus number */
 	.num_resources = ARRAY_SIZE(bfin_spi1_resource),
 	.resource = bfin_spi1_resource,
@@ -1990,7 +1990,7 @@ static struct platform_device *ezkit_devices[] __initdata = {
 	&bfin_sdh_device,
 #endif
 
-#if IS_ENABLED(CONFIG_SPI_BFIN_V3)
+#if IS_ENABLED(CONFIG_SPI_ADI_V3)
 	&bf60x_spi_master0,
 	&bf60x_spi_master1,
 #endif
@@ -2051,8 +2051,8 @@ static struct pinctrl_map __initdata bfin_pinmux_map[] = {
 	PIN_MAP_MUX_GROUP_DEFAULT("bfin_sir.1",  "pinctrl-adi2.0", NULL, "uart1"),
 	PIN_MAP_MUX_GROUP_DEFAULT("bfin-sdh.0",  "pinctrl-adi2.0", NULL, "rsi0"),
 	PIN_MAP_MUX_GROUP_DEFAULT("stmmaceth.0",  "pinctrl-adi2.0", NULL, "eth0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("bfin-spi3.0",  "pinctrl-adi2.0", NULL, "spi0"),
-	PIN_MAP_MUX_GROUP_DEFAULT("bfin-spi3.1",  "pinctrl-adi2.0", NULL, "spi1"),
+	PIN_MAP_MUX_GROUP_DEFAULT("adi-spi3.0",  "pinctrl-adi2.0", NULL, "spi0"),
+	PIN_MAP_MUX_GROUP_DEFAULT("adi-spi3.1",  "pinctrl-adi2.0", NULL, "spi1"),
 	PIN_MAP_MUX_GROUP_DEFAULT("i2c-bfin-twi.0",  "pinctrl-adi2.0", NULL, "twi0"),
 	PIN_MAP_MUX_GROUP_DEFAULT("i2c-bfin-twi.1",  "pinctrl-adi2.0", NULL, "twi1"),
 	PIN_MAP_MUX_GROUP_DEFAULT("bfin-rotary",  "pinctrl-adi2.0", NULL, "rotary"),
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index 60f2b41c7310..a52e0edb7146 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -91,8 +91,8 @@ config SPI_BFIN5XX
 	help
 	  This is the SPI controller master driver for Blackfin 5xx processor.
 
-config SPI_BFIN_V3
-	tristate "SPI controller v3 for Blackfin"
+config SPI_ADI_V3
+	tristate "SPI controller v3 for ADI"
 	depends on BF60x
 	help
 	  This is the SPI controller v3 master driver
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index bd792669e563..71e65dfc0ea3 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_SPI_BCM2835)		+= spi-bcm2835.o
 obj-$(CONFIG_SPI_BCM63XX)		+= spi-bcm63xx.o
 obj-$(CONFIG_SPI_BCM63XX_HSSPI)		+= spi-bcm63xx-hsspi.o
 obj-$(CONFIG_SPI_BFIN5XX)		+= spi-bfin5xx.o
-obj-$(CONFIG_SPI_BFIN_V3)               += spi-bfin-v3.o
+obj-$(CONFIG_SPI_ADI_V3)                += spi-adi-v3.o
 obj-$(CONFIG_SPI_BFIN_SPORT)		+= spi-bfin-sport.o
 obj-$(CONFIG_SPI_BITBANG)		+= spi-bitbang.o
 obj-$(CONFIG_SPI_BUTTERFLY)		+= spi-butterfly.o
diff --git a/drivers/spi/spi-adi-v3.c b/drivers/spi/spi-adi-v3.c
new file mode 100644
index 000000000000..0c2914cfcdb5
--- /dev/null
+++ b/drivers/spi/spi-adi-v3.c
@@ -0,0 +1,985 @@
+/*
+ * Analog Devices SPI3 controller driver
+ *
+ * Copyright (c) 2014 Analog Devices Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/errno.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/adi_spi3.h>
+#include <linux/types.h>
+
+#include <asm/dma.h>
+#include <asm/portmux.h>
+
+enum adi_spi_state {
+	START_STATE,
+	RUNNING_STATE,
+	DONE_STATE,
+	ERROR_STATE
+};
+
+struct adi_spi_master;
+
+struct adi_spi_transfer_ops {
+	void (*write) (struct adi_spi_master *);
+	void (*read) (struct adi_spi_master *);
+	void (*duplex) (struct adi_spi_master *);
+};
+
+/* runtime info for spi master */
+struct adi_spi_master {
+	/* SPI framework hookup */
+	struct spi_master *master;
+
+	/* Regs base of SPI controller */
+	struct adi_spi_regs __iomem *regs;
+
+	/* Pin request list */
+	u16 *pin_req;
+
+	/* Message Transfer pump */
+	struct tasklet_struct pump_transfers;
+
+	/* Current message transfer state info */
+	struct spi_message *cur_msg;
+	struct spi_transfer *cur_transfer;
+	struct adi_spi_device *cur_chip;
+	unsigned transfer_len;
+
+	/* transfer buffer */
+	void *tx;
+	void *tx_end;
+	void *rx;
+	void *rx_end;
+
+	/* dma info */
+	unsigned int tx_dma;
+	unsigned int rx_dma;
+	dma_addr_t tx_dma_addr;
+	dma_addr_t rx_dma_addr;
+	unsigned long dummy_buffer; /* used in unidirectional transfer */
+	unsigned long tx_dma_size;
+	unsigned long rx_dma_size;
+	int tx_num;
+	int rx_num;
+
+	/* store register value for suspend/resume */
+	u32 control;
+	u32 ssel;
+
+	unsigned long sclk;
+	enum adi_spi_state state;
+
+	const struct adi_spi_transfer_ops *ops;
+};
+
+struct adi_spi_device {
+	u32 control;
+	u32 clock;
+	u32 ssel;
+
+	u8 cs;
+	u16 cs_chg_udelay; /* Some devices require > 255usec delay */
+	u32 cs_gpio;
+	u32 tx_dummy_val; /* tx value for rx only transfer */
+	bool enable_dma;
+	const struct adi_spi_transfer_ops *ops;
+};
+
+static void adi_spi_enable(struct adi_spi_master *drv_data)
+{
+	u32 ctl;
+
+	ctl = ioread32(&drv_data->regs->control);
+	ctl |= SPI_CTL_EN;
+	iowrite32(ctl, &drv_data->regs->control);
+}
+
+static void adi_spi_disable(struct adi_spi_master *drv_data)
+{
+	u32 ctl;
+
+	ctl = ioread32(&drv_data->regs->control);
+	ctl &= ~SPI_CTL_EN;
+	iowrite32(ctl, &drv_data->regs->control);
+}
+
+/* Caculate the SPI_CLOCK register value based on input HZ */
+static u32 hz_to_spi_clock(u32 sclk, u32 speed_hz)
+{
+	u32 spi_clock = sclk / speed_hz;
+
+	if (spi_clock)
+		spi_clock--;
+	return spi_clock;
+}
+
+static int adi_spi_flush(struct adi_spi_master *drv_data)
+{
+	unsigned long limit = loops_per_jiffy << 1;
+
+	/* wait for stop and clear stat */
+	while (!(ioread32(&drv_data->regs->status) & SPI_STAT_SPIF) && --limit)
+		cpu_relax();
+
+	iowrite32(0xFFFFFFFF, &drv_data->regs->status);
+
+	return limit;
+}
+
+/* Chip select operation functions for cs_change flag */
+static void adi_spi_cs_active(struct adi_spi_master *drv_data, struct adi_spi_device *chip)
+{
+	if (likely(chip->cs < MAX_CTRL_CS)) {
+		u32 reg;
+		reg = ioread32(&drv_data->regs->ssel);
+		reg &= ~chip->ssel;
+		iowrite32(reg, &drv_data->regs->ssel);
+	} else {
+		gpio_set_value(chip->cs_gpio, 0);
+	}
+}
+
+static void adi_spi_cs_deactive(struct adi_spi_master *drv_data,
+				struct adi_spi_device *chip)
+{
+	if (likely(chip->cs < MAX_CTRL_CS)) {
+		u32 reg;
+		reg = ioread32(&drv_data->regs->ssel);
+		reg |= chip->ssel;
+		iowrite32(reg, &drv_data->regs->ssel);
+	} else {
+		gpio_set_value(chip->cs_gpio, 1);
+	}
+
+	/* Move delay here for consistency */
+	if (chip->cs_chg_udelay)
+		udelay(chip->cs_chg_udelay);
+}
+
+/* enable or disable the pin muxed by GPIO and SPI CS to work as SPI CS */
+static inline void adi_spi_cs_enable(struct adi_spi_master *drv_data,
+					struct adi_spi_device *chip)
+{
+	if (chip->cs < MAX_CTRL_CS) {
+		u32 reg;
+		reg = ioread32(&drv_data->regs->ssel);
+		reg |= chip->ssel >> 8;
+		iowrite32(reg, &drv_data->regs->ssel);
+	}
+}
+
+static inline void adi_spi_cs_disable(struct adi_spi_master *drv_data,
+					struct adi_spi_device *chip)
+{
+	if (chip->cs < MAX_CTRL_CS) {
+		u32 reg;
+		reg = ioread32(&drv_data->regs->ssel);
+		reg &= ~(chip->ssel >> 8);
+		iowrite32(reg, &drv_data->regs->ssel);
+	}
+}
+
+/* stop controller and re-config current chip*/
+static void adi_spi_restore_state(struct adi_spi_master *drv_data)
+{
+	struct adi_spi_device *chip = drv_data->cur_chip;
+
+	/* Clear status and disable clock */
+	iowrite32(0xFFFFFFFF, &drv_data->regs->status);
+	iowrite32(0x0, &drv_data->regs->rx_control);
+	iowrite32(0x0, &drv_data->regs->tx_control);
+	adi_spi_disable(drv_data);
+
+	/* Load the registers */
+	iowrite32(chip->control, &drv_data->regs->control);
+	iowrite32(chip->clock, &drv_data->regs->clock);
+
+	adi_spi_enable(drv_data);
+	drv_data->tx_num = drv_data->rx_num = 0;
+	/* we always choose tx transfer initiate */
+	iowrite32(SPI_RXCTL_REN, &drv_data->regs->rx_control);
+	iowrite32(SPI_TXCTL_TEN | SPI_TXCTL_TTI, &drv_data->regs->tx_control);
+	adi_spi_cs_active(drv_data, chip);
+}
+
+/* discard invalid rx data and empty rfifo */
+static inline void dummy_read(struct adi_spi_master *drv_data)
+{
+	while (!(ioread32(&drv_data->regs->status) & SPI_STAT_RFE))
+		ioread32(&drv_data->regs->rfifo);
+}
+
+static void adi_spi_u8_write(struct adi_spi_master *drv_data)
+{
+	dummy_read(drv_data);
+	while (drv_data->tx < drv_data->tx_end) {
+		iowrite32(*(u8 *)(drv_data->tx++), &drv_data->regs->tfifo);
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		ioread32(&drv_data->regs->rfifo);
+	}
+}
+
+static void adi_spi_u8_read(struct adi_spi_master *drv_data)
+{
+	u32 tx_val = drv_data->cur_chip->tx_dummy_val;
+
+	dummy_read(drv_data);
+	while (drv_data->rx < drv_data->rx_end) {
+		iowrite32(tx_val, &drv_data->regs->tfifo);
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		*(u8 *)(drv_data->rx++) = ioread32(&drv_data->regs->rfifo);
+	}
+}
+
+static void adi_spi_u8_duplex(struct adi_spi_master *drv_data)
+{
+	dummy_read(drv_data);
+	while (drv_data->rx < drv_data->rx_end) {
+		iowrite32(*(u8 *)(drv_data->tx++), &drv_data->regs->tfifo);
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		*(u8 *)(drv_data->rx++) = ioread32(&drv_data->regs->rfifo);
+	}
+}
+
+static const struct adi_spi_transfer_ops adi_spi_transfer_ops_u8 = {
+	.write  = adi_spi_u8_write,
+	.read   = adi_spi_u8_read,
+	.duplex = adi_spi_u8_duplex,
+};
+
+static void adi_spi_u16_write(struct adi_spi_master *drv_data)
+{
+	dummy_read(drv_data);
+	while (drv_data->tx < drv_data->tx_end) {
+		iowrite32(*(u16 *)drv_data->tx, &drv_data->regs->tfifo);
+		drv_data->tx += 2;
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		ioread32(&drv_data->regs->rfifo);
+	}
+}
+
+static void adi_spi_u16_read(struct adi_spi_master *drv_data)
+{
+	u32 tx_val = drv_data->cur_chip->tx_dummy_val;
+
+	dummy_read(drv_data);
+	while (drv_data->rx < drv_data->rx_end) {
+		iowrite32(tx_val, &drv_data->regs->tfifo);
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		*(u16 *)drv_data->rx = ioread32(&drv_data->regs->rfifo);
+		drv_data->rx += 2;
+	}
+}
+
+static void adi_spi_u16_duplex(struct adi_spi_master *drv_data)
+{
+	dummy_read(drv_data);
+	while (drv_data->rx < drv_data->rx_end) {
+		iowrite32(*(u16 *)drv_data->tx, &drv_data->regs->tfifo);
+		drv_data->tx += 2;
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		*(u16 *)drv_data->rx = ioread32(&drv_data->regs->rfifo);
+		drv_data->rx += 2;
+	}
+}
+
+static const struct adi_spi_transfer_ops adi_spi_transfer_ops_u16 = {
+	.write  = adi_spi_u16_write,
+	.read   = adi_spi_u16_read,
+	.duplex = adi_spi_u16_duplex,
+};
+
+static void adi_spi_u32_write(struct adi_spi_master *drv_data)
+{
+	dummy_read(drv_data);
+	while (drv_data->tx < drv_data->tx_end) {
+		iowrite32(*(u32 *)drv_data->tx, &drv_data->regs->tfifo);
+		drv_data->tx += 4;
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		ioread32(&drv_data->regs->rfifo);
+	}
+}
+
+static void adi_spi_u32_read(struct adi_spi_master *drv_data)
+{
+	u32 tx_val = drv_data->cur_chip->tx_dummy_val;
+
+	dummy_read(drv_data);
+	while (drv_data->rx < drv_data->rx_end) {
+		iowrite32(tx_val, &drv_data->regs->tfifo);
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		*(u32 *)drv_data->rx = ioread32(&drv_data->regs->rfifo);
+		drv_data->rx += 4;
+	}
+}
+
+static void adi_spi_u32_duplex(struct adi_spi_master *drv_data)
+{
+	dummy_read(drv_data);
+	while (drv_data->rx < drv_data->rx_end) {
+		iowrite32(*(u32 *)drv_data->tx, &drv_data->regs->tfifo);
+		drv_data->tx += 4;
+		while (ioread32(&drv_data->regs->status) & SPI_STAT_RFE)
+			cpu_relax();
+		*(u32 *)drv_data->rx = ioread32(&drv_data->regs->rfifo);
+		drv_data->rx += 4;
+	}
+}
+
+static const struct adi_spi_transfer_ops adi_spi_transfer_ops_u32 = {
+	.write  = adi_spi_u32_write,
+	.read   = adi_spi_u32_read,
+	.duplex = adi_spi_u32_duplex,
+};
+
+
+/* test if there is more transfer to be done */
+static void adi_spi_next_transfer(struct adi_spi_master *drv)
+{
+	struct spi_message *msg = drv->cur_msg;
+	struct spi_transfer *t = drv->cur_transfer;
+
+	/* Move to next transfer */
+	if (t->transfer_list.next != &msg->transfers) {
+		drv->cur_transfer = list_entry(t->transfer_list.next,
+			       struct spi_transfer, transfer_list);
+		drv->state = RUNNING_STATE;
+	} else {
+		drv->state = DONE_STATE;
+		drv->cur_transfer = NULL;
+	}
+}
+
+static void adi_spi_giveback(struct adi_spi_master *drv_data)
+{
+	struct adi_spi_device *chip = drv_data->cur_chip;
+
+	adi_spi_cs_deactive(drv_data, chip);
+	spi_finalize_current_message(drv_data->master);
+}
+
+static int adi_spi_setup_transfer(struct adi_spi_master *drv)
+{
+	struct spi_transfer *t = drv->cur_transfer;
+	u32 cr, cr_width;
+
+	if (t->tx_buf) {
+		drv->tx = (void *)t->tx_buf;
+		drv->tx_end = drv->tx + t->len;
+	} else {
+		drv->tx = NULL;
+	}
+
+	if (t->rx_buf) {
+		drv->rx = t->rx_buf;
+		drv->rx_end = drv->rx + t->len;
+	} else {
+		drv->rx = NULL;
+	}
+
+	drv->transfer_len = t->len;
+
+	/* bits per word setup */
+	switch (t->bits_per_word) {
+	case 8:
+		cr_width = SPI_CTL_SIZE08;
+		drv->ops = &adi_spi_transfer_ops_u8;
+		break;
+	case 16:
+		cr_width = SPI_CTL_SIZE16;
+		drv->ops = &adi_spi_transfer_ops_u16;
+		break;
+	case 32:
+		cr_width = SPI_CTL_SIZE32;
+		drv->ops = &adi_spi_transfer_ops_u32;
+		break;
+	default:
+		return -EINVAL;
+	}
+	cr = ioread32(&drv->regs->control) & ~SPI_CTL_SIZE;
+	cr |= cr_width;
+	iowrite32(cr, &drv->regs->control);
+
+	/* speed setup */
+	iowrite32(hz_to_spi_clock(drv->sclk, t->speed_hz), &drv->regs->clock);
+	return 0;
+}
+
+static int adi_spi_dma_xfer(struct adi_spi_master *drv_data)
+{
+	struct spi_transfer *t = drv_data->cur_transfer;
+	struct spi_message *msg = drv_data->cur_msg;
+	struct adi_spi_device *chip = drv_data->cur_chip;
+	u32 dma_config;
+	unsigned long word_count, word_size;
+	void *tx_buf, *rx_buf;
+
+	switch (t->bits_per_word) {
+	case 8:
+		dma_config = WDSIZE_8 | PSIZE_8;
+		word_count = drv_data->transfer_len;
+		word_size = 1;
+		break;
+	case 16:
+		dma_config = WDSIZE_16 | PSIZE_16;
+		word_count = drv_data->transfer_len / 2;
+		word_size = 2;
+		break;
+	default:
+		dma_config = WDSIZE_32 | PSIZE_32;
+		word_count = drv_data->transfer_len / 4;
+		word_size = 4;
+		break;
+	}
+
+	if (!drv_data->rx) {
+		tx_buf = drv_data->tx;
+		rx_buf = &drv_data->dummy_buffer;
+		drv_data->tx_dma_size = drv_data->transfer_len;
+		drv_data->rx_dma_size = sizeof(drv_data->dummy_buffer);
+		set_dma_x_modify(drv_data->tx_dma, word_size);
+		set_dma_x_modify(drv_data->rx_dma, 0);
+	} else if (!drv_data->tx) {
+		drv_data->dummy_buffer = chip->tx_dummy_val;
+		tx_buf = &drv_data->dummy_buffer;
+		rx_buf = drv_data->rx;
+		drv_data->tx_dma_size = sizeof(drv_data->dummy_buffer);
+		drv_data->rx_dma_size = drv_data->transfer_len;
+		set_dma_x_modify(drv_data->tx_dma, 0);
+		set_dma_x_modify(drv_data->rx_dma, word_size);
+	} else {
+		tx_buf = drv_data->tx;
+		rx_buf = drv_data->rx;
+		drv_data->tx_dma_size = drv_data->rx_dma_size
+					= drv_data->transfer_len;
+		set_dma_x_modify(drv_data->tx_dma, word_size);
+		set_dma_x_modify(drv_data->rx_dma, word_size);
+	}
+
+	drv_data->tx_dma_addr = dma_map_single(&msg->spi->dev,
+				(void *)tx_buf,
+				drv_data->tx_dma_size,
+				DMA_TO_DEVICE);
+	if (dma_mapping_error(&msg->spi->dev,
+				drv_data->tx_dma_addr))
+		return -ENOMEM;
+
+	drv_data->rx_dma_addr = dma_map_single(&msg->spi->dev,
+				(void *)rx_buf,
+				drv_data->rx_dma_size,
+				DMA_FROM_DEVICE);
+	if (dma_mapping_error(&msg->spi->dev,
+				drv_data->rx_dma_addr)) {
+		dma_unmap_single(&msg->spi->dev,
+				drv_data->tx_dma_addr,
+				drv_data->tx_dma_size,
+				DMA_TO_DEVICE);
+		return -ENOMEM;
+	}
+
+	dummy_read(drv_data);
+	set_dma_x_count(drv_data->tx_dma, word_count);
+	set_dma_x_count(drv_data->rx_dma, word_count);
+	set_dma_start_addr(drv_data->tx_dma, drv_data->tx_dma_addr);
+	set_dma_start_addr(drv_data->rx_dma, drv_data->rx_dma_addr);
+	dma_config |= DMAFLOW_STOP | RESTART | DI_EN;
+	set_dma_config(drv_data->tx_dma, dma_config);
+	set_dma_config(drv_data->rx_dma, dma_config | WNR);
+	enable_dma(drv_data->tx_dma);
+	enable_dma(drv_data->rx_dma);
+
+	iowrite32(SPI_RXCTL_REN | SPI_RXCTL_RDR_NE,
+			&drv_data->regs->rx_control);
+	iowrite32(SPI_TXCTL_TEN | SPI_TXCTL_TTI | SPI_TXCTL_TDR_NF,
+			&drv_data->regs->tx_control);
+
+	return 0;
+}
+
+static int adi_spi_pio_xfer(struct adi_spi_master *drv_data)
+{
+	struct spi_message *msg = drv_data->cur_msg;
+
+	if (!drv_data->rx) {
+		/* write only half duplex */
+		drv_data->ops->write(drv_data);
+		if (drv_data->tx != drv_data->tx_end)
+			return -EIO;
+	} else if (!drv_data->tx) {
+		/* read only half duplex */
+		drv_data->ops->read(drv_data);
+		if (drv_data->rx != drv_data->rx_end)
+			return -EIO;
+	} else {
+		/* full duplex mode */
+		drv_data->ops->duplex(drv_data);
+		if (drv_data->tx != drv_data->tx_end)
+			return -EIO;
+	}
+
+	if (!adi_spi_flush(drv_data))
+		return -EIO;
+	msg->actual_length += drv_data->transfer_len;
+	tasklet_schedule(&drv_data->pump_transfers);
+	return 0;
+}
+
+static void adi_spi_pump_transfers(unsigned long data)
+{
+	struct adi_spi_master *drv_data = (struct adi_spi_master *)data;
+	struct spi_message *msg = NULL;
+	struct spi_transfer *t = NULL;
+	struct adi_spi_device *chip = NULL;
+	int ret;
+
+	/* Get current state information */
+	msg = drv_data->cur_msg;
+	t = drv_data->cur_transfer;
+	chip = drv_data->cur_chip;
+
+	/* Handle for abort */
+	if (drv_data->state == ERROR_STATE) {
+		msg->status = -EIO;
+		adi_spi_giveback(drv_data);
+		return;
+	}
+
+	if (drv_data->state == RUNNING_STATE) {
+		if (t->delay_usecs)
+			udelay(t->delay_usecs);
+		if (t->cs_change)
+			adi_spi_cs_deactive(drv_data, chip);
+		adi_spi_next_transfer(drv_data);
+		t = drv_data->cur_transfer;
+	}
+	/* Handle end of message */
+	if (drv_data->state == DONE_STATE) {
+		msg->status = 0;
+		adi_spi_giveback(drv_data);
+		return;
+	}
+
+	if ((t->len == 0) || (t->tx_buf == NULL && t->rx_buf == NULL)) {
+		/* Schedule next transfer tasklet */
+		tasklet_schedule(&drv_data->pump_transfers);
+		return;
+	}
+
+	ret = adi_spi_setup_transfer(drv_data);
+	if (ret) {
+		msg->status = ret;
+		adi_spi_giveback(drv_data);
+	}
+
+	iowrite32(0xFFFFFFFF, &drv_data->regs->status);
+	adi_spi_cs_active(drv_data, chip);
+	drv_data->state = RUNNING_STATE;
+
+	if (chip->enable_dma)
+		ret = adi_spi_dma_xfer(drv_data);
+	else
+		ret = adi_spi_pio_xfer(drv_data);
+	if (ret) {
+		msg->status = ret;
+		adi_spi_giveback(drv_data);
+	}
+}
+
+static int adi_spi_transfer_one_message(struct spi_master *master,
+					struct spi_message *m)
+{
+	struct adi_spi_master *drv_data = spi_master_get_devdata(master);
+
+	drv_data->cur_msg = m;
+	drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi);
+	adi_spi_restore_state(drv_data);
+
+	drv_data->state = START_STATE;
+	drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next,
+					    struct spi_transfer, transfer_list);
+
+	tasklet_schedule(&drv_data->pump_transfers);
+	return 0;
+}
+
+#define MAX_SPI_SSEL	7
+
+static const u16 ssel[][MAX_SPI_SSEL] = {
+	{P_SPI0_SSEL1, P_SPI0_SSEL2, P_SPI0_SSEL3,
+	P_SPI0_SSEL4, P_SPI0_SSEL5,
+	P_SPI0_SSEL6, P_SPI0_SSEL7},
+
+	{P_SPI1_SSEL1, P_SPI1_SSEL2, P_SPI1_SSEL3,
+	P_SPI1_SSEL4, P_SPI1_SSEL5,
+	P_SPI1_SSEL6, P_SPI1_SSEL7},
+
+	{P_SPI2_SSEL1, P_SPI2_SSEL2, P_SPI2_SSEL3,
+	P_SPI2_SSEL4, P_SPI2_SSEL5,
+	P_SPI2_SSEL6, P_SPI2_SSEL7},
+};
+
+static int adi_spi_setup(struct spi_device *spi)
+{
+	struct adi_spi_master *drv_data = spi_master_get_devdata(spi->master);
+	struct adi_spi_device *chip = spi_get_ctldata(spi);
+	u32 ctl_reg = SPI_CTL_ODM | SPI_CTL_PSSE;
+	int ret = -EINVAL;
+
+	if (!chip) {
+		struct adi_spi3_chip *chip_info = spi->controller_data;
+
+		chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+		if (!chip) {
+			dev_err(&spi->dev, "can not allocate chip data\n");
+			return -ENOMEM;
+		}
+		if (chip_info) {
+			if (chip_info->control & ~ctl_reg) {
+				dev_err(&spi->dev,
+					"do not set bits that the SPI framework manages\n");
+				goto error;
+			}
+			chip->control = chip_info->control;
+			chip->cs_chg_udelay = chip_info->cs_chg_udelay;
+			chip->tx_dummy_val = chip_info->tx_dummy_val;
+			chip->enable_dma = chip_info->enable_dma;
+		}
+		chip->cs = spi->chip_select;
+
+		if (chip->cs < MAX_CTRL_CS) {
+			chip->ssel = (1 << chip->cs) << 8;
+			ret = peripheral_request(ssel[spi->master->bus_num]
+					[chip->cs-1], dev_name(&spi->dev));
+			if (ret) {
+				dev_err(&spi->dev, "peripheral_request() error\n");
+				goto error;
+			}
+		} else {
+			chip->cs_gpio = chip->cs - MAX_CTRL_CS;
+			ret = gpio_request_one(chip->cs_gpio, GPIOF_OUT_INIT_HIGH,
+						dev_name(&spi->dev));
+			if (ret) {
+				dev_err(&spi->dev, "gpio_request_one() error\n");
+				goto error;
+			}
+		}
+		spi_set_ctldata(spi, chip);
+	}
+
+	/* force a default base state */
+	chip->control &= ctl_reg;
+
+	if (spi->mode & SPI_CPOL)
+		chip->control |= SPI_CTL_CPOL;
+	if (spi->mode & SPI_CPHA)
+		chip->control |= SPI_CTL_CPHA;
+	if (spi->mode & SPI_LSB_FIRST)
+		chip->control |= SPI_CTL_LSBF;
+	chip->control |= SPI_CTL_MSTR;
+	/* we choose software to controll cs */
+	chip->control &= ~SPI_CTL_ASSEL;
+
+	chip->clock = hz_to_spi_clock(drv_data->sclk, spi->max_speed_hz);
+
+	adi_spi_cs_enable(drv_data, chip);
+	adi_spi_cs_deactive(drv_data, chip);
+
+	return 0;
+error:
+	if (chip) {
+		kfree(chip);
+		spi_set_ctldata(spi, NULL);
+	}
+
+	return ret;
+}
+
+static void adi_spi_cleanup(struct spi_device *spi)
+{
+	struct adi_spi_device *chip = spi_get_ctldata(spi);
+	struct adi_spi_master *drv_data = spi_master_get_devdata(spi->master);
+
+	if (!chip)
+		return;
+
+	if (chip->cs < MAX_CTRL_CS) {
+		peripheral_free(ssel[spi->master->bus_num]
+					[chip->cs-1]);
+		adi_spi_cs_disable(drv_data, chip);
+	} else {
+		gpio_free(chip->cs_gpio);
+	}
+
+	kfree(chip);
+	spi_set_ctldata(spi, NULL);
+}
+
+static irqreturn_t adi_spi_tx_dma_isr(int irq, void *dev_id)
+{
+	struct adi_spi_master *drv_data = dev_id;
+	u32 dma_stat = get_dma_curr_irqstat(drv_data->tx_dma);
+	u32 tx_ctl;
+
+	clear_dma_irqstat(drv_data->tx_dma);
+	if (dma_stat & DMA_DONE) {
+		drv_data->tx_num++;
+	} else {
+		dev_err(&drv_data->master->dev,
+				"spi tx dma error: %d\n", dma_stat);
+		if (drv_data->tx)
+			drv_data->state = ERROR_STATE;
+	}
+	tx_ctl = ioread32(&drv_data->regs->tx_control);
+	tx_ctl &= ~SPI_TXCTL_TDR_NF;
+	iowrite32(tx_ctl, &drv_data->regs->tx_control);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t adi_spi_rx_dma_isr(int irq, void *dev_id)
+{
+	struct adi_spi_master *drv_data = dev_id;
+	struct spi_message *msg = drv_data->cur_msg;
+	u32 dma_stat = get_dma_curr_irqstat(drv_data->rx_dma);
+
+	clear_dma_irqstat(drv_data->rx_dma);
+	if (dma_stat & DMA_DONE) {
+		drv_data->rx_num++;
+		/* we may fail on tx dma */
+		if (drv_data->state != ERROR_STATE)
+			msg->actual_length += drv_data->transfer_len;
+	} else {
+		drv_data->state = ERROR_STATE;
+		dev_err(&drv_data->master->dev,
+				"spi rx dma error: %d\n", dma_stat);
+	}
+	iowrite32(0, &drv_data->regs->tx_control);
+	iowrite32(0, &drv_data->regs->rx_control);
+	if (drv_data->rx_num != drv_data->tx_num)
+		dev_dbg(&drv_data->master->dev,
+				"dma interrupt missing: tx=%d,rx=%d\n",
+				drv_data->tx_num, drv_data->rx_num);
+	tasklet_schedule(&drv_data->pump_transfers);
+	return IRQ_HANDLED;
+}
+
+static int adi_spi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct adi_spi3_master *info = dev_get_platdata(dev);
+	struct spi_master *master;
+	struct adi_spi_master *drv_data;
+	struct resource *mem, *res;
+	unsigned int tx_dma, rx_dma;
+	unsigned long sclk;
+	int ret;
+
+	if (!info) {
+		dev_err(dev, "platform data missing!\n");
+		return -ENODEV;
+	}
+
+	sclk = get_sclk1();
+	if (!sclk) {
+		dev_err(dev, "can not get sclk1\n");
+		return -ENXIO;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!res) {
+		dev_err(dev, "can not get tx dma resource\n");
+		return -ENXIO;
+	}
+	tx_dma = res->start;
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (!res) {
+		dev_err(dev, "can not get rx dma resource\n");
+		return -ENXIO;
+	}
+	rx_dma = res->start;
+
+	/* allocate master with space for drv_data */
+	master = spi_alloc_master(dev, sizeof(*drv_data));
+	if (!master) {
+		dev_err(dev, "can not alloc spi_master\n");
+		return -ENOMEM;
+	}
+	platform_set_drvdata(pdev, master);
+
+	/* the mode bits supported by this driver */
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
+
+	master->bus_num = pdev->id;
+	master->num_chipselect = info->num_chipselect;
+	master->cleanup = adi_spi_cleanup;
+	master->setup = adi_spi_setup;
+	master->transfer_one_message = adi_spi_transfer_one_message;
+	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
+				     SPI_BPW_MASK(8);
+
+	drv_data = spi_master_get_devdata(master);
+	drv_data->master = master;
+	drv_data->tx_dma = tx_dma;
+	drv_data->rx_dma = rx_dma;
+	drv_data->pin_req = info->pin_req;
+	drv_data->sclk = sclk;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	drv_data->regs = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(drv_data->regs)) {
+		ret = PTR_ERR(drv_data->regs);
+		goto err_put_master;
+	}
+
+	/* request tx and rx dma */
+	ret = request_dma(tx_dma, "SPI_TX_DMA");
+	if (ret) {
+		dev_err(dev, "can not request SPI TX DMA channel\n");
+		goto err_put_master;
+	}
+	set_dma_callback(tx_dma, adi_spi_tx_dma_isr, drv_data);
+
+	ret = request_dma(rx_dma, "SPI_RX_DMA");
+	if (ret) {
+		dev_err(dev, "can not request SPI RX DMA channel\n");
+		goto err_free_tx_dma;
+	}
+	set_dma_callback(drv_data->rx_dma, adi_spi_rx_dma_isr, drv_data);
+
+	/* request CLK, MOSI and MISO */
+	ret = peripheral_request_list(drv_data->pin_req, "adi-spi3");
+	if (ret < 0) {
+		dev_err(dev, "can not request spi pins\n");
+		goto err_free_rx_dma;
+	}
+
+	iowrite32(SPI_CTL_MSTR | SPI_CTL_CPHA, &drv_data->regs->control);
+	iowrite32(0x0000FE00, &drv_data->regs->ssel);
+	iowrite32(0x0, &drv_data->regs->delay);
+
+	tasklet_init(&drv_data->pump_transfers,
+			adi_spi_pump_transfers, (unsigned long)drv_data);
+	/* register with the SPI framework */
+	ret = devm_spi_register_master(dev, master);
+	if (ret) {
+		dev_err(dev, "can not  register spi master\n");
+		goto err_free_peripheral;
+	}
+
+	return ret;
+
+err_free_peripheral:
+	peripheral_free_list(drv_data->pin_req);
+err_free_rx_dma:
+	free_dma(rx_dma);
+err_free_tx_dma:
+	free_dma(tx_dma);
+err_put_master:
+	spi_master_put(master);
+
+	return ret;
+}
+
+static int adi_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct adi_spi_master *drv_data = spi_master_get_devdata(master);
+
+	adi_spi_disable(drv_data);
+	peripheral_free_list(drv_data->pin_req);
+	free_dma(drv_data->rx_dma);
+	free_dma(drv_data->tx_dma);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int adi_spi_suspend(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct adi_spi_master *drv_data = spi_master_get_devdata(master);
+
+	spi_master_suspend(master);
+
+	drv_data->control = ioread32(&drv_data->regs->control);
+	drv_data->ssel = ioread32(&drv_data->regs->ssel);
+
+	iowrite32(SPI_CTL_MSTR | SPI_CTL_CPHA, &drv_data->regs->control);
+	iowrite32(0x0000FE00, &drv_data->regs->ssel);
+	dma_disable_irq(drv_data->rx_dma);
+	dma_disable_irq(drv_data->tx_dma);
+
+	return 0;
+}
+
+static int adi_spi_resume(struct device *dev)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct adi_spi_master *drv_data = spi_master_get_devdata(master);
+	int ret = 0;
+
+	/* bootrom may modify spi and dma status when resume in spi boot mode */
+	disable_dma(drv_data->rx_dma);
+
+	dma_enable_irq(drv_data->rx_dma);
+	dma_enable_irq(drv_data->tx_dma);
+	iowrite32(drv_data->control, &drv_data->regs->control);
+	iowrite32(drv_data->ssel, &drv_data->regs->ssel);
+
+	ret = spi_master_resume(master);
+	if (ret) {
+		free_dma(drv_data->rx_dma);
+		free_dma(drv_data->tx_dma);
+	}
+
+	return ret;
+}
+#endif
+static const struct dev_pm_ops adi_spi_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(adi_spi_suspend, adi_spi_resume)
+};
+
+MODULE_ALIAS("platform:adi-spi3");
+static struct platform_driver adi_spi_driver = {
+	.driver	= {
+		.name	= "adi-spi3",
+		.owner	= THIS_MODULE,
+		.pm     = &adi_spi_pm_ops,
+	},
+	.remove		= adi_spi_remove,
+};
+
+module_platform_driver_probe(adi_spi_driver, adi_spi_probe);
+
+MODULE_DESCRIPTION("Analog Devices SPI3 controller driver");
+MODULE_AUTHOR("Scott Jiang <Scott.Jiang.Linux@gmail.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/spi/spi-bfin-v3.c b/drivers/spi/spi-bfin-v3.c
deleted file mode 100644
index 4089d0e0d84e..000000000000
--- a/drivers/spi/spi-bfin-v3.c
+++ /dev/null
@@ -1,965 +0,0 @@
-/*
- * Analog Devices SPI3 controller driver
- *
- * Copyright (c) 2013 Analog Devices Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/delay.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/errno.h>
-#include <linux/gpio.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spi/spi.h>
-#include <linux/types.h>
-
-#include <asm/bfin_spi3.h>
-#include <asm/cacheflush.h>
-#include <asm/dma.h>
-#include <asm/portmux.h>
-
-enum bfin_spi_state {
-	START_STATE,
-	RUNNING_STATE,
-	DONE_STATE,
-	ERROR_STATE
-};
-
-struct bfin_spi_master;
-
-struct bfin_spi_transfer_ops {
-	void (*write) (struct bfin_spi_master *);
-	void (*read) (struct bfin_spi_master *);
-	void (*duplex) (struct bfin_spi_master *);
-};
-
-/* runtime info for spi master */
-struct bfin_spi_master {
-	/* SPI framework hookup */
-	struct spi_master *master;
-
-	/* Regs base of SPI controller */
-	struct bfin_spi_regs __iomem *regs;
-
-	/* Pin request list */
-	u16 *pin_req;
-
-	/* Message Transfer pump */
-	struct tasklet_struct pump_transfers;
-
-	/* Current message transfer state info */
-	struct spi_message *cur_msg;
-	struct spi_transfer *cur_transfer;
-	struct bfin_spi_device *cur_chip;
-	unsigned transfer_len;
-
-	/* transfer buffer */
-	void *tx;
-	void *tx_end;
-	void *rx;
-	void *rx_end;
-
-	/* dma info */
-	unsigned int tx_dma;
-	unsigned int rx_dma;
-	dma_addr_t tx_dma_addr;
-	dma_addr_t rx_dma_addr;
-	unsigned long dummy_buffer; /* used in unidirectional transfer */
-	unsigned long tx_dma_size;
-	unsigned long rx_dma_size;
-	int tx_num;
-	int rx_num;
-
-	/* store register value for suspend/resume */
-	u32 control;
-	u32 ssel;
-
-	unsigned long sclk;
-	enum bfin_spi_state state;
-
-	const struct bfin_spi_transfer_ops *ops;
-};
-
-struct bfin_spi_device {
-	u32 control;
-	u32 clock;
-	u32 ssel;
-
-	u8 cs;
-	u16 cs_chg_udelay; /* Some devices require > 255usec delay */
-	u32 cs_gpio;
-	u32 tx_dummy_val; /* tx value for rx only transfer */
-	bool enable_dma;
-	const struct bfin_spi_transfer_ops *ops;
-};
-
-static void bfin_spi_enable(struct bfin_spi_master *drv_data)
-{
-	bfin_write_or(&drv_data->regs->control, SPI_CTL_EN);
-}
-
-static void bfin_spi_disable(struct bfin_spi_master *drv_data)
-{
-	bfin_write_and(&drv_data->regs->control, ~SPI_CTL_EN);
-}
-
-/* Caculate the SPI_CLOCK register value based on input HZ */
-static u32 hz_to_spi_clock(u32 sclk, u32 speed_hz)
-{
-	u32 spi_clock = sclk / speed_hz;
-
-	if (spi_clock)
-		spi_clock--;
-	return spi_clock;
-}
-
-static int bfin_spi_flush(struct bfin_spi_master *drv_data)
-{
-	unsigned long limit = loops_per_jiffy << 1;
-
-	/* wait for stop and clear stat */
-	while (!(bfin_read(&drv_data->regs->status) & SPI_STAT_SPIF) && --limit)
-		cpu_relax();
-
-	bfin_write(&drv_data->regs->status, 0xFFFFFFFF);
-
-	return limit;
-}
-
-/* Chip select operation functions for cs_change flag */
-static void bfin_spi_cs_active(struct bfin_spi_master *drv_data, struct bfin_spi_device *chip)
-{
-	if (likely(chip->cs < MAX_CTRL_CS))
-		bfin_write_and(&drv_data->regs->ssel, ~chip->ssel);
-	else
-		gpio_set_value(chip->cs_gpio, 0);
-}
-
-static void bfin_spi_cs_deactive(struct bfin_spi_master *drv_data,
-				struct bfin_spi_device *chip)
-{
-	if (likely(chip->cs < MAX_CTRL_CS))
-		bfin_write_or(&drv_data->regs->ssel, chip->ssel);
-	else
-		gpio_set_value(chip->cs_gpio, 1);
-
-	/* Move delay here for consistency */
-	if (chip->cs_chg_udelay)
-		udelay(chip->cs_chg_udelay);
-}
-
-/* enable or disable the pin muxed by GPIO and SPI CS to work as SPI CS */
-static inline void bfin_spi_cs_enable(struct bfin_spi_master *drv_data,
-					struct bfin_spi_device *chip)
-{
-	if (chip->cs < MAX_CTRL_CS)
-		bfin_write_or(&drv_data->regs->ssel, chip->ssel >> 8);
-}
-
-static inline void bfin_spi_cs_disable(struct bfin_spi_master *drv_data,
-					struct bfin_spi_device *chip)
-{
-	if (chip->cs < MAX_CTRL_CS)
-		bfin_write_and(&drv_data->regs->ssel, ~(chip->ssel >> 8));
-}
-
-/* stop controller and re-config current chip*/
-static void bfin_spi_restore_state(struct bfin_spi_master *drv_data)
-{
-	struct bfin_spi_device *chip = drv_data->cur_chip;
-
-	/* Clear status and disable clock */
-	bfin_write(&drv_data->regs->status, 0xFFFFFFFF);
-	bfin_write(&drv_data->regs->rx_control, 0x0);
-	bfin_write(&drv_data->regs->tx_control, 0x0);
-	bfin_spi_disable(drv_data);
-
-	SSYNC();
-
-	/* Load the registers */
-	bfin_write(&drv_data->regs->control, chip->control);
-	bfin_write(&drv_data->regs->clock, chip->clock);
-
-	bfin_spi_enable(drv_data);
-	drv_data->tx_num = drv_data->rx_num = 0;
-	/* we always choose tx transfer initiate */
-	bfin_write(&drv_data->regs->rx_control, SPI_RXCTL_REN);
-	bfin_write(&drv_data->regs->tx_control,
-			SPI_TXCTL_TEN | SPI_TXCTL_TTI);
-	bfin_spi_cs_active(drv_data, chip);
-}
-
-/* discard invalid rx data and empty rfifo */
-static inline void dummy_read(struct bfin_spi_master *drv_data)
-{
-	while (!(bfin_read(&drv_data->regs->status) & SPI_STAT_RFE))
-		bfin_read(&drv_data->regs->rfifo);
-}
-
-static void bfin_spi_u8_write(struct bfin_spi_master *drv_data)
-{
-	dummy_read(drv_data);
-	while (drv_data->tx < drv_data->tx_end) {
-		bfin_write(&drv_data->regs->tfifo, (*(u8 *)(drv_data->tx++)));
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		bfin_read(&drv_data->regs->rfifo);
-	}
-}
-
-static void bfin_spi_u8_read(struct bfin_spi_master *drv_data)
-{
-	u32 tx_val = drv_data->cur_chip->tx_dummy_val;
-
-	dummy_read(drv_data);
-	while (drv_data->rx < drv_data->rx_end) {
-		bfin_write(&drv_data->regs->tfifo, tx_val);
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		*(u8 *)(drv_data->rx++) = bfin_read(&drv_data->regs->rfifo);
-	}
-}
-
-static void bfin_spi_u8_duplex(struct bfin_spi_master *drv_data)
-{
-	dummy_read(drv_data);
-	while (drv_data->rx < drv_data->rx_end) {
-		bfin_write(&drv_data->regs->tfifo, (*(u8 *)(drv_data->tx++)));
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		*(u8 *)(drv_data->rx++) = bfin_read(&drv_data->regs->rfifo);
-	}
-}
-
-static const struct bfin_spi_transfer_ops bfin_bfin_spi_transfer_ops_u8 = {
-	.write  = bfin_spi_u8_write,
-	.read   = bfin_spi_u8_read,
-	.duplex = bfin_spi_u8_duplex,
-};
-
-static void bfin_spi_u16_write(struct bfin_spi_master *drv_data)
-{
-	dummy_read(drv_data);
-	while (drv_data->tx < drv_data->tx_end) {
-		bfin_write(&drv_data->regs->tfifo, (*(u16 *)drv_data->tx));
-		drv_data->tx += 2;
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		bfin_read(&drv_data->regs->rfifo);
-	}
-}
-
-static void bfin_spi_u16_read(struct bfin_spi_master *drv_data)
-{
-	u32 tx_val = drv_data->cur_chip->tx_dummy_val;
-
-	dummy_read(drv_data);
-	while (drv_data->rx < drv_data->rx_end) {
-		bfin_write(&drv_data->regs->tfifo, tx_val);
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		*(u16 *)drv_data->rx = bfin_read(&drv_data->regs->rfifo);
-		drv_data->rx += 2;
-	}
-}
-
-static void bfin_spi_u16_duplex(struct bfin_spi_master *drv_data)
-{
-	dummy_read(drv_data);
-	while (drv_data->rx < drv_data->rx_end) {
-		bfin_write(&drv_data->regs->tfifo, (*(u16 *)drv_data->tx));
-		drv_data->tx += 2;
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		*(u16 *)drv_data->rx = bfin_read(&drv_data->regs->rfifo);
-		drv_data->rx += 2;
-	}
-}
-
-static const struct bfin_spi_transfer_ops bfin_bfin_spi_transfer_ops_u16 = {
-	.write  = bfin_spi_u16_write,
-	.read   = bfin_spi_u16_read,
-	.duplex = bfin_spi_u16_duplex,
-};
-
-static void bfin_spi_u32_write(struct bfin_spi_master *drv_data)
-{
-	dummy_read(drv_data);
-	while (drv_data->tx < drv_data->tx_end) {
-		bfin_write(&drv_data->regs->tfifo, (*(u32 *)drv_data->tx));
-		drv_data->tx += 4;
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		bfin_read(&drv_data->regs->rfifo);
-	}
-}
-
-static void bfin_spi_u32_read(struct bfin_spi_master *drv_data)
-{
-	u32 tx_val = drv_data->cur_chip->tx_dummy_val;
-
-	dummy_read(drv_data);
-	while (drv_data->rx < drv_data->rx_end) {
-		bfin_write(&drv_data->regs->tfifo, tx_val);
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		*(u32 *)drv_data->rx = bfin_read(&drv_data->regs->rfifo);
-		drv_data->rx += 4;
-	}
-}
-
-static void bfin_spi_u32_duplex(struct bfin_spi_master *drv_data)
-{
-	dummy_read(drv_data);
-	while (drv_data->rx < drv_data->rx_end) {
-		bfin_write(&drv_data->regs->tfifo, (*(u32 *)drv_data->tx));
-		drv_data->tx += 4;
-		while (bfin_read(&drv_data->regs->status) & SPI_STAT_RFE)
-			cpu_relax();
-		*(u32 *)drv_data->rx = bfin_read(&drv_data->regs->rfifo);
-		drv_data->rx += 4;
-	}
-}
-
-static const struct bfin_spi_transfer_ops bfin_bfin_spi_transfer_ops_u32 = {
-	.write  = bfin_spi_u32_write,
-	.read   = bfin_spi_u32_read,
-	.duplex = bfin_spi_u32_duplex,
-};
-
-
-/* test if there is more transfer to be done */
-static void bfin_spi_next_transfer(struct bfin_spi_master *drv)
-{
-	struct spi_message *msg = drv->cur_msg;
-	struct spi_transfer *t = drv->cur_transfer;
-
-	/* Move to next transfer */
-	if (t->transfer_list.next != &msg->transfers) {
-		drv->cur_transfer = list_entry(t->transfer_list.next,
-			       struct spi_transfer, transfer_list);
-		drv->state = RUNNING_STATE;
-	} else {
-		drv->state = DONE_STATE;
-		drv->cur_transfer = NULL;
-	}
-}
-
-static void bfin_spi_giveback(struct bfin_spi_master *drv_data)
-{
-	struct bfin_spi_device *chip = drv_data->cur_chip;
-
-	bfin_spi_cs_deactive(drv_data, chip);
-	spi_finalize_current_message(drv_data->master);
-}
-
-static int bfin_spi_setup_transfer(struct bfin_spi_master *drv)
-{
-	struct spi_transfer *t = drv->cur_transfer;
-	u32 cr, cr_width;
-
-	if (t->tx_buf) {
-		drv->tx = (void *)t->tx_buf;
-		drv->tx_end = drv->tx + t->len;
-	} else {
-		drv->tx = NULL;
-	}
-
-	if (t->rx_buf) {
-		drv->rx = t->rx_buf;
-		drv->rx_end = drv->rx + t->len;
-	} else {
-		drv->rx = NULL;
-	}
-
-	drv->transfer_len = t->len;
-
-	/* bits per word setup */
-	switch (t->bits_per_word) {
-	case 8:
-		cr_width = SPI_CTL_SIZE08;
-		drv->ops = &bfin_bfin_spi_transfer_ops_u8;
-		break;
-	case 16:
-		cr_width = SPI_CTL_SIZE16;
-		drv->ops = &bfin_bfin_spi_transfer_ops_u16;
-		break;
-	case 32:
-		cr_width = SPI_CTL_SIZE32;
-		drv->ops = &bfin_bfin_spi_transfer_ops_u32;
-		break;
-	default:
-		return -EINVAL;
-	}
-	cr = bfin_read(&drv->regs->control) & ~SPI_CTL_SIZE;
-	cr |= cr_width;
-	bfin_write(&drv->regs->control, cr);
-
-	/* speed setup */
-	bfin_write(&drv->regs->clock,
-			hz_to_spi_clock(drv->sclk, t->speed_hz));
-	return 0;
-}
-
-static int bfin_spi_dma_xfer(struct bfin_spi_master *drv_data)
-{
-	struct spi_transfer *t = drv_data->cur_transfer;
-	struct spi_message *msg = drv_data->cur_msg;
-	struct bfin_spi_device *chip = drv_data->cur_chip;
-	u32 dma_config;
-	unsigned long word_count, word_size;
-	void *tx_buf, *rx_buf;
-
-	switch (t->bits_per_word) {
-	case 8:
-		dma_config = WDSIZE_8 | PSIZE_8;
-		word_count = drv_data->transfer_len;
-		word_size = 1;
-		break;
-	case 16:
-		dma_config = WDSIZE_16 | PSIZE_16;
-		word_count = drv_data->transfer_len / 2;
-		word_size = 2;
-		break;
-	default:
-		dma_config = WDSIZE_32 | PSIZE_32;
-		word_count = drv_data->transfer_len / 4;
-		word_size = 4;
-		break;
-	}
-
-	if (!drv_data->rx) {
-		tx_buf = drv_data->tx;
-		rx_buf = &drv_data->dummy_buffer;
-		drv_data->tx_dma_size = drv_data->transfer_len;
-		drv_data->rx_dma_size = sizeof(drv_data->dummy_buffer);
-		set_dma_x_modify(drv_data->tx_dma, word_size);
-		set_dma_x_modify(drv_data->rx_dma, 0);
-	} else if (!drv_data->tx) {
-		drv_data->dummy_buffer = chip->tx_dummy_val;
-		tx_buf = &drv_data->dummy_buffer;
-		rx_buf = drv_data->rx;
-		drv_data->tx_dma_size = sizeof(drv_data->dummy_buffer);
-		drv_data->rx_dma_size = drv_data->transfer_len;
-		set_dma_x_modify(drv_data->tx_dma, 0);
-		set_dma_x_modify(drv_data->rx_dma, word_size);
-	} else {
-		tx_buf = drv_data->tx;
-		rx_buf = drv_data->rx;
-		drv_data->tx_dma_size = drv_data->rx_dma_size
-					= drv_data->transfer_len;
-		set_dma_x_modify(drv_data->tx_dma, word_size);
-		set_dma_x_modify(drv_data->rx_dma, word_size);
-	}
-
-	drv_data->tx_dma_addr = dma_map_single(&msg->spi->dev,
-				(void *)tx_buf,
-				drv_data->tx_dma_size,
-				DMA_TO_DEVICE);
-	if (dma_mapping_error(&msg->spi->dev,
-				drv_data->tx_dma_addr))
-		return -ENOMEM;
-
-	drv_data->rx_dma_addr = dma_map_single(&msg->spi->dev,
-				(void *)rx_buf,
-				drv_data->rx_dma_size,
-				DMA_FROM_DEVICE);
-	if (dma_mapping_error(&msg->spi->dev,
-				drv_data->rx_dma_addr)) {
-		dma_unmap_single(&msg->spi->dev,
-				drv_data->tx_dma_addr,
-				drv_data->tx_dma_size,
-				DMA_TO_DEVICE);
-		return -ENOMEM;
-	}
-
-	dummy_read(drv_data);
-	set_dma_x_count(drv_data->tx_dma, word_count);
-	set_dma_x_count(drv_data->rx_dma, word_count);
-	set_dma_start_addr(drv_data->tx_dma, drv_data->tx_dma_addr);
-	set_dma_start_addr(drv_data->rx_dma, drv_data->rx_dma_addr);
-	dma_config |= DMAFLOW_STOP | RESTART | DI_EN;
-	set_dma_config(drv_data->tx_dma, dma_config);
-	set_dma_config(drv_data->rx_dma, dma_config | WNR);
-	enable_dma(drv_data->tx_dma);
-	enable_dma(drv_data->rx_dma);
-	SSYNC();
-
-	bfin_write(&drv_data->regs->rx_control, SPI_RXCTL_REN | SPI_RXCTL_RDR_NE);
-	SSYNC();
-	bfin_write(&drv_data->regs->tx_control,
-			SPI_TXCTL_TEN | SPI_TXCTL_TTI | SPI_TXCTL_TDR_NF);
-
-	return 0;
-}
-
-static int bfin_spi_pio_xfer(struct bfin_spi_master *drv_data)
-{
-	struct spi_message *msg = drv_data->cur_msg;
-
-	if (!drv_data->rx) {
-		/* write only half duplex */
-		drv_data->ops->write(drv_data);
-		if (drv_data->tx != drv_data->tx_end)
-			return -EIO;
-	} else if (!drv_data->tx) {
-		/* read only half duplex */
-		drv_data->ops->read(drv_data);
-		if (drv_data->rx != drv_data->rx_end)
-			return -EIO;
-	} else {
-		/* full duplex mode */
-		drv_data->ops->duplex(drv_data);
-		if (drv_data->tx != drv_data->tx_end)
-			return -EIO;
-	}
-
-	if (!bfin_spi_flush(drv_data))
-		return -EIO;
-	msg->actual_length += drv_data->transfer_len;
-	tasklet_schedule(&drv_data->pump_transfers);
-	return 0;
-}
-
-static void bfin_spi_pump_transfers(unsigned long data)
-{
-	struct bfin_spi_master *drv_data = (struct bfin_spi_master *)data;
-	struct spi_message *msg = NULL;
-	struct spi_transfer *t = NULL;
-	struct bfin_spi_device *chip = NULL;
-	int ret;
-
-	/* Get current state information */
-	msg = drv_data->cur_msg;
-	t = drv_data->cur_transfer;
-	chip = drv_data->cur_chip;
-
-	/* Handle for abort */
-	if (drv_data->state == ERROR_STATE) {
-		msg->status = -EIO;
-		bfin_spi_giveback(drv_data);
-		return;
-	}
-
-	if (drv_data->state == RUNNING_STATE) {
-		if (t->delay_usecs)
-			udelay(t->delay_usecs);
-		if (t->cs_change)
-			bfin_spi_cs_deactive(drv_data, chip);
-		bfin_spi_next_transfer(drv_data);
-		t = drv_data->cur_transfer;
-	}
-	/* Handle end of message */
-	if (drv_data->state == DONE_STATE) {
-		msg->status = 0;
-		bfin_spi_giveback(drv_data);
-		return;
-	}
-
-	if ((t->len == 0) || (t->tx_buf == NULL && t->rx_buf == NULL)) {
-		/* Schedule next transfer tasklet */
-		tasklet_schedule(&drv_data->pump_transfers);
-		return;
-	}
-
-	ret = bfin_spi_setup_transfer(drv_data);
-	if (ret) {
-		msg->status = ret;
-		bfin_spi_giveback(drv_data);
-	}
-
-	bfin_write(&drv_data->regs->status, 0xFFFFFFFF);
-	bfin_spi_cs_active(drv_data, chip);
-	drv_data->state = RUNNING_STATE;
-
-	if (chip->enable_dma)
-		ret = bfin_spi_dma_xfer(drv_data);
-	else
-		ret = bfin_spi_pio_xfer(drv_data);
-	if (ret) {
-		msg->status = ret;
-		bfin_spi_giveback(drv_data);
-	}
-}
-
-static int bfin_spi_transfer_one_message(struct spi_master *master,
-					struct spi_message *m)
-{
-	struct bfin_spi_master *drv_data = spi_master_get_devdata(master);
-
-	drv_data->cur_msg = m;
-	drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi);
-	bfin_spi_restore_state(drv_data);
-
-	drv_data->state = START_STATE;
-	drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next,
-					    struct spi_transfer, transfer_list);
-
-	tasklet_schedule(&drv_data->pump_transfers);
-	return 0;
-}
-
-#define MAX_SPI_SSEL	7
-
-static const u16 ssel[][MAX_SPI_SSEL] = {
-	{P_SPI0_SSEL1, P_SPI0_SSEL2, P_SPI0_SSEL3,
-	P_SPI0_SSEL4, P_SPI0_SSEL5,
-	P_SPI0_SSEL6, P_SPI0_SSEL7},
-
-	{P_SPI1_SSEL1, P_SPI1_SSEL2, P_SPI1_SSEL3,
-	P_SPI1_SSEL4, P_SPI1_SSEL5,
-	P_SPI1_SSEL6, P_SPI1_SSEL7},
-
-	{P_SPI2_SSEL1, P_SPI2_SSEL2, P_SPI2_SSEL3,
-	P_SPI2_SSEL4, P_SPI2_SSEL5,
-	P_SPI2_SSEL6, P_SPI2_SSEL7},
-};
-
-static int bfin_spi_setup(struct spi_device *spi)
-{
-	struct bfin_spi_master *drv_data = spi_master_get_devdata(spi->master);
-	struct bfin_spi_device *chip = spi_get_ctldata(spi);
-	u32 bfin_ctl_reg = SPI_CTL_ODM | SPI_CTL_PSSE;
-	int ret = -EINVAL;
-
-	if (!chip) {
-		struct bfin_spi3_chip *chip_info = spi->controller_data;
-
-		chip = kzalloc(sizeof(*chip), GFP_KERNEL);
-		if (!chip) {
-			dev_err(&spi->dev, "can not allocate chip data\n");
-			return -ENOMEM;
-		}
-		if (chip_info) {
-			if (chip_info->control & ~bfin_ctl_reg) {
-				dev_err(&spi->dev,
-					"do not set bits that the SPI framework manages\n");
-				goto error;
-			}
-			chip->control = chip_info->control;
-			chip->cs_chg_udelay = chip_info->cs_chg_udelay;
-			chip->tx_dummy_val = chip_info->tx_dummy_val;
-			chip->enable_dma = chip_info->enable_dma;
-		}
-		chip->cs = spi->chip_select;
-		if (chip->cs < MAX_CTRL_CS) {
-			chip->ssel = (1 << chip->cs) << 8;
-			ret = peripheral_request(ssel[spi->master->bus_num]
-					[chip->cs-1], dev_name(&spi->dev));
-			if (ret) {
-				dev_err(&spi->dev, "peripheral_request() error\n");
-				goto error;
-			}
-		} else {
-			chip->cs_gpio = chip->cs - MAX_CTRL_CS;
-			ret = gpio_request_one(chip->cs_gpio, GPIOF_OUT_INIT_HIGH,
-						dev_name(&spi->dev));
-			if (ret) {
-				dev_err(&spi->dev, "gpio_request_one() error\n");
-				goto error;
-			}
-		}
-		spi_set_ctldata(spi, chip);
-	}
-
-	/* force a default base state */
-	chip->control &= bfin_ctl_reg;
-
-	if (spi->mode & SPI_CPOL)
-		chip->control |= SPI_CTL_CPOL;
-	if (spi->mode & SPI_CPHA)
-		chip->control |= SPI_CTL_CPHA;
-	if (spi->mode & SPI_LSB_FIRST)
-		chip->control |= SPI_CTL_LSBF;
-	chip->control |= SPI_CTL_MSTR;
-	/* we choose software to controll cs */
-	chip->control &= ~SPI_CTL_ASSEL;
-
-	chip->clock = hz_to_spi_clock(drv_data->sclk, spi->max_speed_hz);
-
-	bfin_spi_cs_enable(drv_data, chip);
-	bfin_spi_cs_deactive(drv_data, chip);
-
-	return 0;
-error:
-	if (chip) {
-		kfree(chip);
-		spi_set_ctldata(spi, NULL);
-	}
-
-	return ret;
-}
-
-static void bfin_spi_cleanup(struct spi_device *spi)
-{
-	struct bfin_spi_device *chip = spi_get_ctldata(spi);
-	struct bfin_spi_master *drv_data = spi_master_get_devdata(spi->master);
-
-	if (!chip)
-		return;
-
-	if (chip->cs < MAX_CTRL_CS) {
-		peripheral_free(ssel[spi->master->bus_num]
-					[chip->cs-1]);
-		bfin_spi_cs_disable(drv_data, chip);
-	} else {
-		gpio_free(chip->cs_gpio);
-	}
-
-	kfree(chip);
-	spi_set_ctldata(spi, NULL);
-}
-
-static irqreturn_t bfin_spi_tx_dma_isr(int irq, void *dev_id)
-{
-	struct bfin_spi_master *drv_data = dev_id;
-	u32 dma_stat = get_dma_curr_irqstat(drv_data->tx_dma);
-
-	clear_dma_irqstat(drv_data->tx_dma);
-	if (dma_stat & DMA_DONE) {
-		drv_data->tx_num++;
-	} else {
-		dev_err(&drv_data->master->dev,
-				"spi tx dma error: %d\n", dma_stat);
-		if (drv_data->tx)
-			drv_data->state = ERROR_STATE;
-	}
-	bfin_write_and(&drv_data->regs->tx_control, ~SPI_TXCTL_TDR_NF);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t bfin_spi_rx_dma_isr(int irq, void *dev_id)
-{
-	struct bfin_spi_master *drv_data = dev_id;
-	struct spi_message *msg = drv_data->cur_msg;
-	u32 dma_stat = get_dma_curr_irqstat(drv_data->rx_dma);
-
-	clear_dma_irqstat(drv_data->rx_dma);
-	if (dma_stat & DMA_DONE) {
-		drv_data->rx_num++;
-		/* we may fail on tx dma */
-		if (drv_data->state != ERROR_STATE)
-			msg->actual_length += drv_data->transfer_len;
-	} else {
-		drv_data->state = ERROR_STATE;
-		dev_err(&drv_data->master->dev,
-				"spi rx dma error: %d\n", dma_stat);
-	}
-	bfin_write(&drv_data->regs->tx_control, 0);
-	bfin_write(&drv_data->regs->rx_control, 0);
-	if (drv_data->rx_num != drv_data->tx_num)
-		dev_dbg(&drv_data->master->dev,
-				"dma interrupt missing: tx=%d,rx=%d\n",
-				drv_data->tx_num, drv_data->rx_num);
-	tasklet_schedule(&drv_data->pump_transfers);
-	return IRQ_HANDLED;
-}
-
-static int bfin_spi_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct bfin_spi3_master *info = dev_get_platdata(dev);
-	struct spi_master *master;
-	struct bfin_spi_master *drv_data;
-	struct resource *mem, *res;
-	unsigned int tx_dma, rx_dma;
-	unsigned long sclk;
-	int ret;
-
-	if (!info) {
-		dev_err(dev, "platform data missing!\n");
-		return -ENODEV;
-	}
-
-	sclk = get_sclk1();
-	if (!sclk) {
-		dev_err(dev, "can not get sclk1\n");
-		return -ENXIO;
-	}
-
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
-	if (!res) {
-		dev_err(dev, "can not get tx dma resource\n");
-		return -ENXIO;
-	}
-	tx_dma = res->start;
-
-	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
-	if (!res) {
-		dev_err(dev, "can not get rx dma resource\n");
-		return -ENXIO;
-	}
-	rx_dma = res->start;
-
-	/* allocate master with space for drv_data */
-	master = spi_alloc_master(dev, sizeof(*drv_data));
-	if (!master) {
-		dev_err(dev, "can not alloc spi_master\n");
-		return -ENOMEM;
-	}
-	platform_set_drvdata(pdev, master);
-
-	/* the mode bits supported by this driver */
-	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST;
-
-	master->bus_num = pdev->id;
-	master->num_chipselect = info->num_chipselect;
-	master->cleanup = bfin_spi_cleanup;
-	master->setup = bfin_spi_setup;
-	master->transfer_one_message = bfin_spi_transfer_one_message;
-	master->bits_per_word_mask = SPI_BPW_MASK(32) | SPI_BPW_MASK(16) |
-				     SPI_BPW_MASK(8);
-
-	drv_data = spi_master_get_devdata(master);
-	drv_data->master = master;
-	drv_data->tx_dma = tx_dma;
-	drv_data->rx_dma = rx_dma;
-	drv_data->pin_req = info->pin_req;
-	drv_data->sclk = sclk;
-
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	drv_data->regs = devm_ioremap_resource(dev, mem);
-	if (IS_ERR(drv_data->regs)) {
-		ret = PTR_ERR(drv_data->regs);
-		goto err_put_master;
-	}
-
-	/* request tx and rx dma */
-	ret = request_dma(tx_dma, "SPI_TX_DMA");
-	if (ret) {
-		dev_err(dev, "can not request SPI TX DMA channel\n");
-		goto err_put_master;
-	}
-	set_dma_callback(tx_dma, bfin_spi_tx_dma_isr, drv_data);
-
-	ret = request_dma(rx_dma, "SPI_RX_DMA");
-	if (ret) {
-		dev_err(dev, "can not request SPI RX DMA channel\n");
-		goto err_free_tx_dma;
-	}
-	set_dma_callback(drv_data->rx_dma, bfin_spi_rx_dma_isr, drv_data);
-
-	/* request CLK, MOSI and MISO */
-	ret = peripheral_request_list(drv_data->pin_req, "bfin-spi3");
-	if (ret < 0) {
-		dev_err(dev, "can not request spi pins\n");
-		goto err_free_rx_dma;
-	}
-
-	bfin_write(&drv_data->regs->control, SPI_CTL_MSTR | SPI_CTL_CPHA);
-	bfin_write(&drv_data->regs->ssel, 0x0000FE00);
-	bfin_write(&drv_data->regs->delay, 0x0);
-
-	tasklet_init(&drv_data->pump_transfers,
-			bfin_spi_pump_transfers, (unsigned long)drv_data);
-	/* register with the SPI framework */
-	ret = devm_spi_register_master(dev, master);
-	if (ret) {
-		dev_err(dev, "can not  register spi master\n");
-		goto err_free_peripheral;
-	}
-
-	return ret;
-
-err_free_peripheral:
-	peripheral_free_list(drv_data->pin_req);
-err_free_rx_dma:
-	free_dma(rx_dma);
-err_free_tx_dma:
-	free_dma(tx_dma);
-err_put_master:
-	spi_master_put(master);
-
-	return ret;
-}
-
-static int bfin_spi_remove(struct platform_device *pdev)
-{
-	struct spi_master *master = platform_get_drvdata(pdev);
-	struct bfin_spi_master *drv_data = spi_master_get_devdata(master);
-
-	bfin_spi_disable(drv_data);
-
-	peripheral_free_list(drv_data->pin_req);
-	free_dma(drv_data->rx_dma);
-	free_dma(drv_data->tx_dma);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int bfin_spi_suspend(struct device *dev)
-{
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bfin_spi_master *drv_data = spi_master_get_devdata(master);
-
-	spi_master_suspend(master);
-
-	drv_data->control = bfin_read(&drv_data->regs->control);
-	drv_data->ssel = bfin_read(&drv_data->regs->ssel);
-
-	bfin_write(&drv_data->regs->control, SPI_CTL_MSTR | SPI_CTL_CPHA);
-	bfin_write(&drv_data->regs->ssel, 0x0000FE00);
-	dma_disable_irq(drv_data->rx_dma);
-	dma_disable_irq(drv_data->tx_dma);
-
-	return 0;
-}
-
-static int bfin_spi_resume(struct device *dev)
-{
-	struct spi_master *master = dev_get_drvdata(dev);
-	struct bfin_spi_master *drv_data = spi_master_get_devdata(master);
-	int ret = 0;
-
-	/* bootrom may modify spi and dma status when resume in spi boot mode */
-	disable_dma(drv_data->rx_dma);
-
-	dma_enable_irq(drv_data->rx_dma);
-	dma_enable_irq(drv_data->tx_dma);
-	bfin_write(&drv_data->regs->control, drv_data->control);
-	bfin_write(&drv_data->regs->ssel, drv_data->ssel);
-
-	ret = spi_master_resume(master);
-	if (ret) {
-		free_dma(drv_data->rx_dma);
-		free_dma(drv_data->tx_dma);
-	}
-
-	return ret;
-}
-#endif
-static const struct dev_pm_ops bfin_spi_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(bfin_spi_suspend, bfin_spi_resume)
-};
-
-MODULE_ALIAS("platform:bfin-spi3");
-static struct platform_driver bfin_spi_driver = {
-	.driver	= {
-		.name	= "bfin-spi3",
-		.owner	= THIS_MODULE,
-		.pm     = &bfin_spi_pm_ops,
-	},
-	.remove		= bfin_spi_remove,
-};
-
-module_platform_driver_probe(bfin_spi_driver, bfin_spi_probe);
-
-MODULE_DESCRIPTION("Analog Devices SPI3 controller driver");
-MODULE_AUTHOR("Scott Jiang <Scott.Jiang.Linux@gmail.com>");
-MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/adi_spi3.h b/include/linux/spi/adi_spi3.h
new file mode 100644
index 000000000000..c84123aa1d06
--- /dev/null
+++ b/include/linux/spi/adi_spi3.h
@@ -0,0 +1,254 @@
+/*
+ * Analog Devices SPI3 controller driver
+ *
+ * Copyright (c) 2014 Analog Devices Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _ADI_SPI3_H_
+#define _ADI_SPI3_H_
+
+#include <linux/types.h>
+
+/* SPI_CONTROL */
+#define SPI_CTL_EN                  0x00000001    /* Enable */
+#define SPI_CTL_MSTR                0x00000002    /* Master/Slave */
+#define SPI_CTL_PSSE                0x00000004    /* controls modf error in master mode */
+#define SPI_CTL_ODM                 0x00000008    /* Open Drain Mode */
+#define SPI_CTL_CPHA                0x00000010    /* Clock Phase */
+#define SPI_CTL_CPOL                0x00000020    /* Clock Polarity */
+#define SPI_CTL_ASSEL               0x00000040    /* Slave Select Pin Control */
+#define SPI_CTL_SELST               0x00000080    /* Slave Select Polarity in-between transfers */
+#define SPI_CTL_EMISO               0x00000100    /* Enable MISO */
+#define SPI_CTL_SIZE                0x00000600    /* Word Transfer Size */
+#define SPI_CTL_SIZE08              0x00000000    /* SIZE: 8 bits */
+#define SPI_CTL_SIZE16              0x00000200    /* SIZE: 16 bits */
+#define SPI_CTL_SIZE32              0x00000400    /* SIZE: 32 bits */
+#define SPI_CTL_LSBF                0x00001000    /* LSB First */
+#define SPI_CTL_FCEN                0x00002000    /* Flow-Control Enable */
+#define SPI_CTL_FCCH                0x00004000    /* Flow-Control Channel Selection */
+#define SPI_CTL_FCPL                0x00008000    /* Flow-Control Polarity */
+#define SPI_CTL_FCWM                0x00030000    /* Flow-Control Water-Mark */
+#define SPI_CTL_FIFO0               0x00000000    /* FCWM: TFIFO empty or RFIFO Full */
+#define SPI_CTL_FIFO1               0x00010000    /* FCWM: TFIFO 75% or more empty or RFIFO 75% or more full */
+#define SPI_CTL_FIFO2               0x00020000    /* FCWM: TFIFO 50% or more empty or RFIFO 50% or more full */
+#define SPI_CTL_FMODE               0x00040000    /* Fast-mode Enable */
+#define SPI_CTL_MIOM                0x00300000    /* Multiple I/O Mode */
+#define SPI_CTL_MIO_DIS             0x00000000    /* MIOM: Disable */
+#define SPI_CTL_MIO_DUAL            0x00100000    /* MIOM: Enable DIOM (Dual I/O Mode) */
+#define SPI_CTL_MIO_QUAD            0x00200000    /* MIOM: Enable QUAD (Quad SPI Mode) */
+#define SPI_CTL_SOSI                0x00400000    /* Start on MOSI */
+/* SPI_RX_CONTROL */
+#define SPI_RXCTL_REN               0x00000001    /* Receive Channel Enable */
+#define SPI_RXCTL_RTI               0x00000004    /* Receive Transfer Initiate */
+#define SPI_RXCTL_RWCEN             0x00000008    /* Receive Word Counter Enable */
+#define SPI_RXCTL_RDR               0x00000070    /* Receive Data Request */
+#define SPI_RXCTL_RDR_DIS           0x00000000    /* RDR: Disabled */
+#define SPI_RXCTL_RDR_NE            0x00000010    /* RDR: RFIFO not empty */
+#define SPI_RXCTL_RDR_25            0x00000020    /* RDR: RFIFO 25% full */
+#define SPI_RXCTL_RDR_50            0x00000030    /* RDR: RFIFO 50% full */
+#define SPI_RXCTL_RDR_75            0x00000040    /* RDR: RFIFO 75% full */
+#define SPI_RXCTL_RDR_FULL          0x00000050    /* RDR: RFIFO full */
+#define SPI_RXCTL_RDO               0x00000100    /* Receive Data Over-Run */
+#define SPI_RXCTL_RRWM              0x00003000    /* FIFO Regular Water-Mark */
+#define SPI_RXCTL_RWM_0             0x00000000    /* RRWM: RFIFO Empty */
+#define SPI_RXCTL_RWM_25            0x00001000    /* RRWM: RFIFO 25% full */
+#define SPI_RXCTL_RWM_50            0x00002000    /* RRWM: RFIFO 50% full */
+#define SPI_RXCTL_RWM_75            0x00003000    /* RRWM: RFIFO 75% full */
+#define SPI_RXCTL_RUWM              0x00070000    /* FIFO Urgent Water-Mark */
+#define SPI_RXCTL_UWM_DIS           0x00000000    /* RUWM: Disabled */
+#define SPI_RXCTL_UWM_25            0x00010000    /* RUWM: RFIFO 25% full */
+#define SPI_RXCTL_UWM_50            0x00020000    /* RUWM: RFIFO 50% full */
+#define SPI_RXCTL_UWM_75            0x00030000    /* RUWM: RFIFO 75% full */
+#define SPI_RXCTL_UWM_FULL          0x00040000    /* RUWM: RFIFO full */
+/* SPI_TX_CONTROL */
+#define SPI_TXCTL_TEN               0x00000001    /* Transmit Channel Enable */
+#define SPI_TXCTL_TTI               0x00000004    /* Transmit Transfer Initiate */
+#define SPI_TXCTL_TWCEN             0x00000008    /* Transmit Word Counter Enable */
+#define SPI_TXCTL_TDR               0x00000070    /* Transmit Data Request */
+#define SPI_TXCTL_TDR_DIS           0x00000000    /* TDR: Disabled */
+#define SPI_TXCTL_TDR_NF            0x00000010    /* TDR: TFIFO not full */
+#define SPI_TXCTL_TDR_25            0x00000020    /* TDR: TFIFO 25% empty */
+#define SPI_TXCTL_TDR_50            0x00000030    /* TDR: TFIFO 50% empty */
+#define SPI_TXCTL_TDR_75            0x00000040    /* TDR: TFIFO 75% empty */
+#define SPI_TXCTL_TDR_EMPTY         0x00000050    /* TDR: TFIFO empty */
+#define SPI_TXCTL_TDU               0x00000100    /* Transmit Data Under-Run */
+#define SPI_TXCTL_TRWM              0x00003000    /* FIFO Regular Water-Mark */
+#define SPI_TXCTL_RWM_FULL          0x00000000    /* TRWM: TFIFO full */
+#define SPI_TXCTL_RWM_25            0x00001000    /* TRWM: TFIFO 25% empty */
+#define SPI_TXCTL_RWM_50            0x00002000    /* TRWM: TFIFO 50% empty */
+#define SPI_TXCTL_RWM_75            0x00003000    /* TRWM: TFIFO 75% empty */
+#define SPI_TXCTL_TUWM              0x00070000    /* FIFO Urgent Water-Mark */
+#define SPI_TXCTL_UWM_DIS           0x00000000    /* TUWM: Disabled */
+#define SPI_TXCTL_UWM_25            0x00010000    /* TUWM: TFIFO 25% empty */
+#define SPI_TXCTL_UWM_50            0x00020000    /* TUWM: TFIFO 50% empty */
+#define SPI_TXCTL_UWM_75            0x00030000    /* TUWM: TFIFO 75% empty */
+#define SPI_TXCTL_UWM_EMPTY         0x00040000    /* TUWM: TFIFO empty */
+/* SPI_CLOCK */
+#define SPI_CLK_BAUD                0x0000FFFF    /* Baud Rate */
+/* SPI_DELAY */
+#define SPI_DLY_STOP                0x000000FF    /* Transfer delay time in multiples of SCK period */
+#define SPI_DLY_LEADX               0x00000100    /* Extended (1 SCK) LEAD Control */
+#define SPI_DLY_LAGX                0x00000200    /* Extended (1 SCK) LAG control */
+/* SPI_SSEL */
+#define SPI_SLVSEL_SSE1             0x00000002    /* SPISSEL1 Enable */
+#define SPI_SLVSEL_SSE2             0x00000004    /* SPISSEL2 Enable */
+#define SPI_SLVSEL_SSE3             0x00000008    /* SPISSEL3 Enable */
+#define SPI_SLVSEL_SSE4             0x00000010    /* SPISSEL4 Enable */
+#define SPI_SLVSEL_SSE5             0x00000020    /* SPISSEL5 Enable */
+#define SPI_SLVSEL_SSE6             0x00000040    /* SPISSEL6 Enable */
+#define SPI_SLVSEL_SSE7             0x00000080    /* SPISSEL7 Enable */
+#define SPI_SLVSEL_SSEL1            0x00000200    /* SPISSEL1 Value */
+#define SPI_SLVSEL_SSEL2            0x00000400    /* SPISSEL2 Value */
+#define SPI_SLVSEL_SSEL3            0x00000800    /* SPISSEL3 Value */
+#define SPI_SLVSEL_SSEL4            0x00001000    /* SPISSEL4 Value */
+#define SPI_SLVSEL_SSEL5            0x00002000    /* SPISSEL5 Value */
+#define SPI_SLVSEL_SSEL6            0x00004000    /* SPISSEL6 Value */
+#define SPI_SLVSEL_SSEL7            0x00008000    /* SPISSEL7 Value */
+/* SPI_RWC */
+#define SPI_RWC_VALUE               0x0000FFFF    /* Received Word-Count */
+/* SPI_RWCR */
+#define SPI_RWCR_VALUE              0x0000FFFF    /* Received Word-Count Reload */
+/* SPI_TWC */
+#define SPI_TWC_VALUE               0x0000FFFF    /* Transmitted Word-Count */
+/* SPI_TWCR */
+#define SPI_TWCR_VALUE              0x0000FFFF    /* Transmitted Word-Count Reload */
+/* SPI_IMASK */
+#define SPI_IMSK_RUWM               0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
+#define SPI_IMSK_TUWM               0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
+#define SPI_IMSK_ROM                0x00000010    /* Receive Over-Run Error Interrupt Mask */
+#define SPI_IMSK_TUM                0x00000020    /* Transmit Under-Run Error Interrupt Mask */
+#define SPI_IMSK_TCM                0x00000040    /* Transmit Collision Error Interrupt Mask */
+#define SPI_IMSK_MFM                0x00000080    /* Mode Fault Error Interrupt Mask */
+#define SPI_IMSK_RSM                0x00000100    /* Receive Start Interrupt Mask */
+#define SPI_IMSK_TSM                0x00000200    /* Transmit Start Interrupt Mask */
+#define SPI_IMSK_RFM                0x00000400    /* Receive Finish Interrupt Mask */
+#define SPI_IMSK_TFM                0x00000800    /* Transmit Finish Interrupt Mask */
+/* SPI_IMASKCL */
+#define SPI_IMSK_CLR_RUW            0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
+#define SPI_IMSK_CLR_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
+#define SPI_IMSK_CLR_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
+#define SPI_IMSK_CLR_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
+#define SPI_IMSK_CLR_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
+#define SPI_IMSK_CLR_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
+#define SPI_IMSK_CLR_RSM            0x00000100    /* Receive Start Interrupt Mask */
+#define SPI_IMSK_CLR_TSM            0x00000200    /* Transmit Start Interrupt Mask */
+#define SPI_IMSK_CLR_RFM            0x00000400    /* Receive Finish Interrupt Mask */
+#define SPI_IMSK_CLR_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
+/* SPI_IMASKST */
+#define SPI_IMSK_SET_RUWM           0x00000002    /* Receive Urgent Water-Mark Interrupt Mask */
+#define SPI_IMSK_SET_TUWM           0x00000004    /* Transmit Urgent Water-Mark Interrupt Mask */
+#define SPI_IMSK_SET_ROM            0x00000010    /* Receive Over-Run Error Interrupt Mask */
+#define SPI_IMSK_SET_TUM            0x00000020    /* Transmit Under-Run Error Interrupt Mask */
+#define SPI_IMSK_SET_TCM            0x00000040    /* Transmit Collision Error Interrupt Mask */
+#define SPI_IMSK_SET_MFM            0x00000080    /* Mode Fault Error Interrupt Mask */
+#define SPI_IMSK_SET_RSM            0x00000100    /* Receive Start Interrupt Mask */
+#define SPI_IMSK_SET_TSM            0x00000200    /* Transmit Start Interrupt Mask */
+#define SPI_IMSK_SET_RFM            0x00000400    /* Receive Finish Interrupt Mask */
+#define SPI_IMSK_SET_TFM            0x00000800    /* Transmit Finish Interrupt Mask */
+/* SPI_STATUS */
+#define SPI_STAT_SPIF               0x00000001    /* SPI Finished */
+#define SPI_STAT_RUWM               0x00000002    /* Receive Urgent Water-Mark Breached */
+#define SPI_STAT_TUWM               0x00000004    /* Transmit Urgent Water-Mark Breached */
+#define SPI_STAT_ROE                0x00000010    /* Receive Over-Run Error Indication */
+#define SPI_STAT_TUE                0x00000020    /* Transmit Under-Run Error Indication */
+#define SPI_STAT_TCE                0x00000040    /* Transmit Collision Error Indication */
+#define SPI_STAT_MODF               0x00000080    /* Mode Fault Error Indication */
+#define SPI_STAT_RS                 0x00000100    /* Receive Start Indication */
+#define SPI_STAT_TS                 0x00000200    /* Transmit Start Indication */
+#define SPI_STAT_RF                 0x00000400    /* Receive Finish Indication */
+#define SPI_STAT_TF                 0x00000800    /* Transmit Finish Indication */
+#define SPI_STAT_RFS                0x00007000    /* SPI_RFIFO status */
+#define SPI_STAT_RFIFO_EMPTY        0x00000000    /* RFS: RFIFO Empty */
+#define SPI_STAT_RFIFO_25           0x00001000    /* RFS: RFIFO 25% Full */
+#define SPI_STAT_RFIFO_50           0x00002000    /* RFS: RFIFO 50% Full */
+#define SPI_STAT_RFIFO_75           0x00003000    /* RFS: RFIFO 75% Full */
+#define SPI_STAT_RFIFO_FULL         0x00004000    /* RFS: RFIFO Full */
+#define SPI_STAT_TFS                0x00070000    /* SPI_TFIFO status */
+#define SPI_STAT_TFIFO_FULL         0x00000000    /* TFS: TFIFO full */
+#define SPI_STAT_TFIFO_25           0x00010000    /* TFS: TFIFO 25% empty */
+#define SPI_STAT_TFIFO_50           0x00020000    /* TFS: TFIFO 50% empty */
+#define SPI_STAT_TFIFO_75           0x00030000    /* TFS: TFIFO 75% empty */
+#define SPI_STAT_TFIFO_EMPTY        0x00040000    /* TFS: TFIFO empty */
+#define SPI_STAT_FCS                0x00100000    /* Flow-Control Stall Indication */
+#define SPI_STAT_RFE                0x00400000    /* SPI_RFIFO Empty */
+#define SPI_STAT_TFF                0x00800000    /* SPI_TFIFO Full */
+/* SPI_ILAT */
+#define SPI_ILAT_RUWMI              0x00000002    /* Receive Urgent Water Mark Interrupt */
+#define SPI_ILAT_TUWMI              0x00000004    /* Transmit Urgent Water Mark Interrupt */
+#define SPI_ILAT_ROI                0x00000010    /* Receive Over-Run Error Indication */
+#define SPI_ILAT_TUI                0x00000020    /* Transmit Under-Run Error Indication */
+#define SPI_ILAT_TCI                0x00000040    /* Transmit Collision Error Indication */
+#define SPI_ILAT_MFI                0x00000080    /* Mode Fault Error Indication */
+#define SPI_ILAT_RSI                0x00000100    /* Receive Start Indication */
+#define SPI_ILAT_TSI                0x00000200    /* Transmit Start Indication */
+#define SPI_ILAT_RFI                0x00000400    /* Receive Finish Indication */
+#define SPI_ILAT_TFI                0x00000800    /* Transmit Finish Indication */
+/* SPI_ILATCL */
+#define SPI_ILAT_CLR_RUWMI          0x00000002    /* Receive Urgent Water Mark Interrupt */
+#define SPI_ILAT_CLR_TUWMI          0x00000004    /* Transmit Urgent Water Mark Interrupt */
+#define SPI_ILAT_CLR_ROI            0x00000010    /* Receive Over-Run Error Indication */
+#define SPI_ILAT_CLR_TUI            0x00000020    /* Transmit Under-Run Error Indication */
+#define SPI_ILAT_CLR_TCI            0x00000040    /* Transmit Collision Error Indication */
+#define SPI_ILAT_CLR_MFI            0x00000080    /* Mode Fault Error Indication */
+#define SPI_ILAT_CLR_RSI            0x00000100    /* Receive Start Indication */
+#define SPI_ILAT_CLR_TSI            0x00000200    /* Transmit Start Indication */
+#define SPI_ILAT_CLR_RFI            0x00000400    /* Receive Finish Indication */
+#define SPI_ILAT_CLR_TFI            0x00000800    /* Transmit Finish Indication */
+
+/*
+ * adi spi3 registers layout
+ */
+struct adi_spi_regs {
+	u32 revid;
+	u32 control;
+	u32 rx_control;
+	u32 tx_control;
+	u32 clock;
+	u32 delay;
+	u32 ssel;
+	u32 rwc;
+	u32 rwcr;
+	u32 twc;
+	u32 twcr;
+	u32 reserved0;
+	u32 emask;
+	u32 emaskcl;
+	u32 emaskst;
+	u32 reserved1;
+	u32 status;
+	u32 elat;
+	u32 elatcl;
+	u32 reserved2;
+	u32 rfifo;
+	u32 reserved3;
+	u32 tfifo;
+};
+
+#define MAX_CTRL_CS          8  /* cs in spi controller */
+
+/* device.platform_data for SSP controller devices */
+struct adi_spi3_master {
+	u16 num_chipselect;
+	u16 pin_req[7];
+};
+
+/* spi_board_info.controller_data for SPI slave devices,
+ * copied to spi_device.platform_data ... mostly for dma tuning
+ */
+struct adi_spi3_chip {
+	u32 control;
+	u16 cs_chg_udelay; /* Some devices require 16-bit delays */
+	u32 tx_dummy_val; /* tx value for rx only transfer */
+	bool enable_dma;
+};
+
+#endif /* _ADI_SPI3_H_ */
-- 
cgit 


From e885cd805fc6e65ef5150a211c7bac02f925af04 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Fri, 10 Jan 2014 14:26:06 -0500
Subject: efi: create memory map iteration helper

There are a lot of places in the kernel which iterate through an
EFI memory map. Most of these places use essentially the same
for-loop code. This patch adds a for_each_efi_memory_desc()
helper to clean up all of the existing duplicate code and avoid
more in the future.

Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6c100ff0cae4..82d0abb2b19f 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -863,6 +863,12 @@ extern int efi_set_rtc_mmss(const struct timespec *now);
 extern void efi_reserve_boot_services(void);
 extern struct efi_memory_map memmap;
 
+/* Iterate through an efi_memory_map */
+#define for_each_efi_memory_desc(m, md)					   \
+	for ((md) = (m)->map;						   \
+	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
+	     (md) = (void *)(md) + (m)->desc_size)
+
 /**
  * efi_range_is_wc - check the WC bit on an address range
  * @start: starting kvirt address
-- 
cgit 


From f39d2fa0122e6abd8505a3598f3aa535d0d5aade Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:35 +0800
Subject: mtd: spi-nor: copy the SPI NOR commands to a new header file

This patch adds a new header :spi-nor.h,
and copies all the SPI NOR commands and relative macros into this new header.

This hearder can be used by the m25p80.c and other spi-nor controller,
such as Freescale's Quadspi.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 55 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 include/linux/mtd/spi-nor.h

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
new file mode 100644
index 000000000000..483fc2a086c4
--- /dev/null
+++ b/include/linux/mtd/spi-nor.h
@@ -0,0 +1,55 @@
+#ifndef __LINUX_MTD_SPI_NOR_H
+#define __LINUX_MTD_SPI_NOR_H
+
+/* Flash opcodes. */
+#define	OPCODE_WREN		0x06	/* Write enable */
+#define	OPCODE_RDSR		0x05	/* Read status register */
+#define	OPCODE_WRSR		0x01	/* Write status register 1 byte */
+#define	OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
+#define	OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define	OPCODE_DUAL_READ        0x3b    /* Read data bytes (Dual SPI) */
+#define	OPCODE_QUAD_READ        0x6b    /* Read data bytes (Quad SPI) */
+#define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
+#define	OPCODE_BE_4K		0x20	/* Erase 4KiB block */
+#define	OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
+#define	OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
+#define	OPCODE_RDCR             0x35    /* Read configuration register */
+
+/* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
+#define	OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define	OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define	OPCODE_DUAL_READ_4B	0x3c    /* Read data bytes (Dual SPI) */
+#define	OPCODE_QUAD_READ_4B	0x6c    /* Read data bytes (Quad SPI) */
+#define	OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define	OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+
+/* Used for SST flashes only. */
+#define	OPCODE_BP		0x02	/* Byte program */
+#define	OPCODE_WRDI		0x04	/* Write disable */
+#define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+
+/* Used for Macronix and Winbond flashes. */
+#define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
+#define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+
+/* Used for Spansion flashes only. */
+#define	OPCODE_BRWR		0x17	/* Bank register write */
+
+/* Status Register bits. */
+#define	SR_WIP			1	/* Write in progress */
+#define	SR_WEL			2	/* Write enable latch */
+/* meaning of other SR_* bits may differ between vendors */
+#define	SR_BP0			4	/* Block protect 0 */
+#define	SR_BP1			8	/* Block protect 1 */
+#define	SR_BP2			0x10	/* Block protect 2 */
+#define	SR_SRWD			0x80	/* SR write protect */
+
+#define SR_QUAD_EN_MX           0x40    /* Macronix Quad I/O */
+
+/* Configuration Register bits. */
+#define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
+
+#endif
-- 
cgit 


From 6e602ef73334550bbbb8be1041a3ce6eecbd42f1 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:36 +0800
Subject: mtd: spi-nor: add the basic data structures

The spi_nor{} is cloned from the m25p{}.
The spi_nor{} can be used by both the m25p80 and spi-nor controller.

We also add the spi_nor_xfer_cfg{} which can be used by the two
fundamental primitives: read_xfer/write_xfer.

 1) the hooks for spi_nor{}:
    @prepare/unpreare: used to do some work before or after the
             read/write/erase/lock/unlock.
    @read_xfer/write_xfer: We can use these two hooks to code all
             the following hooks if the driver tries to implement them
             by itself.
    @read_reg: used to read the registers, such as read status register,
             read configure register.
    @write_reg: used to write the registers, such as write enable,
             erase sector.
    @read_id: read out the ID info.
    @wait_till_ready: wait till the NOR becomes ready.
    @read: read out the data from the NOR.
    @write: write data to the NOR.
    @erase: erase a sector of the NOR.

 2) Add a new field sst_write_second for the SST NOR write.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/spi-nor.h | 110 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 483fc2a086c4..3a3c3872c8cd 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -52,4 +52,114 @@
 /* Configuration Register bits. */
 #define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
 
+enum read_mode {
+	SPI_NOR_NORMAL = 0,
+	SPI_NOR_FAST,
+	SPI_NOR_DUAL,
+	SPI_NOR_QUAD,
+};
+
+/**
+ * struct spi_nor_xfer_cfg - Structure for defining a Serial Flash transfer
+ * @wren:		command for "Write Enable", or 0x00 for not required
+ * @cmd:		command for operation
+ * @cmd_pins:		number of pins to send @cmd (1, 2, 4)
+ * @addr:		address for operation
+ * @addr_pins:		number of pins to send @addr (1, 2, 4)
+ * @addr_width:		number of address bytes
+ *			(3,4, or 0 for address not required)
+ * @mode:		mode data
+ * @mode_pins:		number of pins to send @mode (1, 2, 4)
+ * @mode_cycles:	number of mode cycles (0 for mode not required)
+ * @dummy_cycles:	number of dummy cycles (0 for dummy not required)
+ */
+struct spi_nor_xfer_cfg {
+	u8		wren;
+	u8		cmd;
+	u8		cmd_pins;
+	u32		addr;
+	u8		addr_pins;
+	u8		addr_width;
+	u8		mode;
+	u8		mode_pins;
+	u8		mode_cycles;
+	u8		dummy_cycles;
+};
+
+#define	SPI_NOR_MAX_CMD_SIZE	8
+enum spi_nor_ops {
+	SPI_NOR_OPS_READ = 0,
+	SPI_NOR_OPS_WRITE,
+	SPI_NOR_OPS_ERASE,
+	SPI_NOR_OPS_LOCK,
+	SPI_NOR_OPS_UNLOCK,
+};
+
+/**
+ * struct spi_nor - Structure for defining a the SPI NOR layer
+ * @mtd:		point to a mtd_info structure
+ * @lock:		the lock for the read/write/erase/lock/unlock operations
+ * @dev:		point to a spi device, or a spi nor controller device.
+ * @page_size:		the page size of the SPI NOR
+ * @addr_width:		number of address bytes
+ * @erase_opcode:	the opcode for erasing a sector
+ * @read_opcode:	the read opcode
+ * @read_dummy:		the dummy needed by the read operation
+ * @program_opcode:	the program opcode
+ * @flash_read:		the mode of the read
+ * @sst_write_second:	used by the SST write operation
+ * @cfg:		used by the read_xfer/write_xfer
+ * @cmd_buf:		used by the write_reg
+ * @prepare:		[OPTIONAL] do some preparations for the
+ *			read/write/erase/lock/unlock operations
+ * @unprepare:		[OPTIONAL] do some post work after the
+ *			read/write/erase/lock/unlock operations
+ * @read_xfer:		[OPTIONAL] the read fundamental primitive
+ * @write_xfer:		[OPTIONAL] the writefundamental primitive
+ * @read_reg:		[DRIVER-SPECIFIC] read out the register
+ * @write_reg:		[DRIVER-SPECIFIC] write data to the register
+ * @read_id:		[REPLACEABLE] read out the ID data, and find
+ *			the proper spi_device_id
+ * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
+ * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
+ * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
+ * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
+ *			at the offset @offs
+ * @priv:		the private data
+ */
+struct spi_nor {
+	struct mtd_info		*mtd;
+	struct mutex		lock;
+	struct device		*dev;
+	u32			page_size;
+	u8			addr_width;
+	u8			erase_opcode;
+	u8			read_opcode;
+	u8			read_dummy;
+	u8			program_opcode;
+	enum read_mode		flash_read;
+	bool			sst_write_second;
+	struct spi_nor_xfer_cfg	cfg;
+	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
+
+	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
+	int (*read_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
+			 u8 *buf, size_t len);
+	int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg,
+			  u8 *buf, size_t len);
+	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
+	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
+			int write_enable);
+	const struct spi_device_id *(*read_id)(struct spi_nor *nor);
+	int (*wait_till_ready)(struct spi_nor *nor);
+
+	int (*read)(struct spi_nor *nor, loff_t from,
+			size_t len, size_t *retlen, u_char *read_buf);
+	void (*write)(struct spi_nor *nor, loff_t to,
+			size_t len, size_t *retlen, const u_char *write_buf);
+	int (*erase)(struct spi_nor *nor, loff_t offs);
+
+	void *priv;
+};
 #endif
-- 
cgit 


From b199489d37b21c5e294f95bf265acc5dde3fc3a2 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:37 +0800
Subject: mtd: spi-nor: add the framework for SPI NOR

This patch cloned most of the m25p80.c. In theory, it adds a new spi-nor layer.

Before this patch, the layer is like:

                   MTD
         ------------------------
                  m25p80
         ------------------------
	       spi bus driver
         ------------------------
	        SPI NOR chip

After this patch, the layer is like:
                   MTD
         ------------------------
                  spi-nor
         ------------------------
                  m25p80
         ------------------------
	       spi bus driver
         ------------------------
	       SPI NOR chip

With the spi-nor controller driver(Freescale Quadspi), it looks like:
                   MTD
         ------------------------
                  spi-nor
         ------------------------
                fsl-quadspi
         ------------------------
	       SPI NOR chip

New APIs:
   spi_nor_scan: used to scan a spi-nor flash.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
[Brian: rebased to include additional m25p_ids[] entry]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/Kconfig           |    2 +
 drivers/mtd/Makefile          |    1 +
 drivers/mtd/spi-nor/Kconfig   |    6 +
 drivers/mtd/spi-nor/Makefile  |    1 +
 drivers/mtd/spi-nor/spi-nor.c | 1088 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/spi-nor.h   |   20 +
 6 files changed, 1118 insertions(+)
 create mode 100644 drivers/mtd/spi-nor/Kconfig
 create mode 100644 drivers/mtd/spi-nor/Makefile
 create mode 100644 drivers/mtd/spi-nor/spi-nor.c

(limited to 'include/linux')

diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig
index 5d49a2129618..94b821042d9d 100644
--- a/drivers/mtd/Kconfig
+++ b/drivers/mtd/Kconfig
@@ -321,6 +321,8 @@ source "drivers/mtd/onenand/Kconfig"
 
 source "drivers/mtd/lpddr/Kconfig"
 
+source "drivers/mtd/spi-nor/Kconfig"
+
 source "drivers/mtd/ubi/Kconfig"
 
 endif # MTD
diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile
index 4cfb31e6c966..40fd15344387 100644
--- a/drivers/mtd/Makefile
+++ b/drivers/mtd/Makefile
@@ -32,4 +32,5 @@ inftl-objs		:= inftlcore.o inftlmount.o
 
 obj-y		+= chips/ lpddr/ maps/ devices/ nand/ onenand/ tests/
 
+obj-$(CONFIG_MTD_SPI_NOR_BASE)	+= spi-nor/
 obj-$(CONFIG_MTD_UBI)		+= ubi/
diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig
new file mode 100644
index 000000000000..41591af67aca
--- /dev/null
+++ b/drivers/mtd/spi-nor/Kconfig
@@ -0,0 +1,6 @@
+config MTD_SPI_NOR_BASE
+	bool "the framework for SPI-NOR support"
+	depends on MTD
+	help
+	  This is the framework for the SPI NOR which can be used by the SPI
+	  device drivers and the SPI-NOR device driver.
diff --git a/drivers/mtd/spi-nor/Makefile b/drivers/mtd/spi-nor/Makefile
new file mode 100644
index 000000000000..7dfe1f9b6940
--- /dev/null
+++ b/drivers/mtd/spi-nor/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MTD_SPI_NOR_BASE)	+= spi-nor.o
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
new file mode 100644
index 000000000000..50b929095bdb
--- /dev/null
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -0,0 +1,1088 @@
+/*
+ * Cloned most of the code from the m25p80.c
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/math64.h>
+
+#include <linux/mtd/cfi.h>
+#include <linux/mtd/mtd.h>
+#include <linux/of_platform.h>
+#include <linux/spi/flash.h>
+#include <linux/mtd/spi-nor.h>
+
+/* Define max times to check status register before we give up. */
+#define	MAX_READY_WAIT_JIFFIES	(40 * HZ) /* M25P16 specs 40s max chip erase */
+
+#define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
+
+/*
+ * Read the status register, returning its value in the location
+ * Return the status register value.
+ * Returns negative if error occurred.
+ */
+static int read_sr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, OPCODE_RDSR, &val, 1);
+	if (ret < 0) {
+		pr_err("error %d reading SR\n", (int) ret);
+		return ret;
+	}
+
+	return val;
+}
+
+/*
+ * Read configuration register, returning its value in the
+ * location. Return the configuration register value.
+ * Returns negative if error occured.
+ */
+static int read_cr(struct spi_nor *nor)
+{
+	int ret;
+	u8 val;
+
+	ret = nor->read_reg(nor, OPCODE_RDCR, &val, 1);
+	if (ret < 0) {
+		dev_err(nor->dev, "error %d reading CR\n", ret);
+		return ret;
+	}
+
+	return val;
+}
+
+/*
+ * Dummy Cycle calculation for different type of read.
+ * It can be used to support more commands with
+ * different dummy cycle requirements.
+ */
+static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
+{
+	switch (nor->flash_read) {
+	case SPI_NOR_FAST:
+	case SPI_NOR_DUAL:
+	case SPI_NOR_QUAD:
+		return 1;
+	case SPI_NOR_NORMAL:
+		return 0;
+	}
+	return 0;
+}
+
+/*
+ * Write status register 1 byte
+ * Returns negative if error occurred.
+ */
+static inline int write_sr(struct spi_nor *nor, u8 val)
+{
+	nor->cmd_buf[0] = val;
+	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+}
+
+/*
+ * Set write enable latch with Write Enable command.
+ * Returns negative if error occurred.
+ */
+static inline int write_enable(struct spi_nor *nor)
+{
+	return nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+}
+
+/*
+ * Send write disble instruction to the chip.
+ */
+static inline int write_disable(struct spi_nor *nor)
+{
+	return nor->write_reg(nor, OPCODE_WRDI, NULL, 0, 0);
+}
+
+static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
+{
+	return mtd->priv;
+}
+
+/* Enable/disable 4-byte addressing mode. */
+static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
+{
+	int status;
+	bool need_wren = false;
+	u8 cmd;
+
+	switch (JEDEC_MFR(jedec_id)) {
+	case CFI_MFR_ST: /* Micron, actually */
+		/* Some Micron need WREN command; all will accept it */
+		need_wren = true;
+	case CFI_MFR_MACRONIX:
+	case 0xEF /* winbond */:
+		if (need_wren)
+			write_enable(nor);
+
+		cmd = enable ? OPCODE_EN4B : OPCODE_EX4B;
+		status = nor->write_reg(nor, cmd, NULL, 0, 0);
+		if (need_wren)
+			write_disable(nor);
+
+		return status;
+	default:
+		/* Spansion style */
+		nor->cmd_buf[0] = enable << 7;
+		return nor->write_reg(nor, OPCODE_BRWR, nor->cmd_buf, 1, 0);
+	}
+}
+
+static int spi_nor_wait_till_ready(struct spi_nor *nor)
+{
+	unsigned long deadline;
+	int sr;
+
+	deadline = jiffies + MAX_READY_WAIT_JIFFIES;
+
+	do {
+		cond_resched();
+
+		sr = read_sr(nor);
+		if (sr < 0)
+			break;
+		else if (!(sr & SR_WIP))
+			return 0;
+	} while (!time_after_eq(jiffies, deadline));
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * Service routine to read status register until ready, or timeout occurs.
+ * Returns non-zero if error.
+ */
+static int wait_till_ready(struct spi_nor *nor)
+{
+	return nor->wait_till_ready(nor);
+}
+
+/*
+ * Erase the whole flash memory
+ *
+ * Returns 0 if successful, non-zero otherwise.
+ */
+static int erase_chip(struct spi_nor *nor)
+{
+	int ret;
+
+	dev_dbg(nor->dev, " %lldKiB\n", (long long)(nor->mtd->size >> 10));
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		return ret;
+
+	/* Send write enable, then erase commands. */
+	write_enable(nor);
+
+	return nor->write_reg(nor, OPCODE_CHIP_ERASE, NULL, 0, 0);
+}
+
+static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	int ret = 0;
+
+	mutex_lock(&nor->lock);
+
+	if (nor->prepare) {
+		ret = nor->prepare(nor, ops);
+		if (ret) {
+			dev_err(nor->dev, "failed in the preparation.\n");
+			mutex_unlock(&nor->lock);
+			return ret;
+		}
+	}
+	return ret;
+}
+
+static void spi_nor_unlock_and_unprep(struct spi_nor *nor, enum spi_nor_ops ops)
+{
+	if (nor->unprepare)
+		nor->unprepare(nor, ops);
+	mutex_unlock(&nor->lock);
+}
+
+/*
+ * Erase an address range on the nor chip.  The address range may extend
+ * one or more erase sectors.  Return an error is there is a problem erasing.
+ */
+static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	u32 addr, len;
+	uint32_t rem;
+	int ret;
+
+	dev_dbg(nor->dev, "at 0x%llx, len %lld\n", (long long)instr->addr,
+			(long long)instr->len);
+
+	div_u64_rem(instr->len, mtd->erasesize, &rem);
+	if (rem)
+		return -EINVAL;
+
+	addr = instr->addr;
+	len = instr->len;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_ERASE);
+	if (ret)
+		return ret;
+
+	/* whole-chip erase? */
+	if (len == mtd->size) {
+		if (erase_chip(nor)) {
+			ret = -EIO;
+			goto erase_err;
+		}
+
+	/* REVISIT in some cases we could speed up erasing large regions
+	 * by using OPCODE_SE instead of OPCODE_BE_4K.  We may have set up
+	 * to use "small sector erase", but that's not always optimal.
+	 */
+
+	/* "sector"-at-a-time erase */
+	} else {
+		while (len) {
+			if (nor->erase(nor, addr)) {
+				ret = -EIO;
+				goto erase_err;
+			}
+
+			addr += mtd->erasesize;
+			len -= mtd->erasesize;
+		}
+	}
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
+
+	instr->state = MTD_ERASE_DONE;
+	mtd_erase_callback(instr);
+
+	return ret;
+
+erase_err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_ERASE);
+	instr->state = MTD_ERASE_FAILED;
+	return ret;
+}
+
+static int spi_nor_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int ret = 0;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_LOCK);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous command */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto err;
+
+	status_old = read_sr(nor);
+
+	if (offset < mtd->size - (mtd->size / 2))
+		status_new = status_old | SR_BP2 | SR_BP1 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 4))
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+	else if (offset < mtd->size - (mtd->size / 8))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 16))
+		status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2;
+	else if (offset < mtd->size - (mtd->size / 32))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset < mtd->size - (mtd->size / 64))
+		status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1;
+	else
+		status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0;
+
+	/* Only modify protection if it will not unlock other areas */
+	if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) >
+				(status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
+		write_enable(nor);
+		ret = write_sr(nor, status_new);
+		if (ret)
+			goto err;
+	}
+
+err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_LOCK);
+	return ret;
+}
+
+static int spi_nor_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	uint32_t offset = ofs;
+	uint8_t status_old, status_new;
+	int ret = 0;
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_UNLOCK);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous command */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto err;
+
+	status_old = read_sr(nor);
+
+	if (offset+len > mtd->size - (mtd->size / 64))
+		status_new = status_old & ~(SR_BP2 | SR_BP1 | SR_BP0);
+	else if (offset+len > mtd->size - (mtd->size / 32))
+		status_new = (status_old & ~(SR_BP2 | SR_BP1)) | SR_BP0;
+	else if (offset+len > mtd->size - (mtd->size / 16))
+		status_new = (status_old & ~(SR_BP2 | SR_BP0)) | SR_BP1;
+	else if (offset+len > mtd->size - (mtd->size / 8))
+		status_new = (status_old & ~SR_BP2) | SR_BP1 | SR_BP0;
+	else if (offset+len > mtd->size - (mtd->size / 4))
+		status_new = (status_old & ~(SR_BP0 | SR_BP1)) | SR_BP2;
+	else if (offset+len > mtd->size - (mtd->size / 2))
+		status_new = (status_old & ~SR_BP1) | SR_BP2 | SR_BP0;
+	else
+		status_new = (status_old & ~SR_BP0) | SR_BP2 | SR_BP1;
+
+	/* Only modify protection if it will not lock other areas */
+	if ((status_new & (SR_BP2 | SR_BP1 | SR_BP0)) <
+				(status_old & (SR_BP2 | SR_BP1 | SR_BP0))) {
+		write_enable(nor);
+		ret = write_sr(nor, status_new);
+		if (ret)
+			goto err;
+	}
+
+err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_UNLOCK);
+	return ret;
+}
+
+struct flash_info {
+	/* JEDEC id zero means "no ID" (most older chips); otherwise it has
+	 * a high byte of zero plus three data bytes: the manufacturer id,
+	 * then a two byte device id.
+	 */
+	u32		jedec_id;
+	u16             ext_id;
+
+	/* The size listed here is what works with OPCODE_SE, which isn't
+	 * necessarily called a "sector" by the vendor.
+	 */
+	unsigned	sector_size;
+	u16		n_sectors;
+
+	u16		page_size;
+	u16		addr_width;
+
+	u16		flags;
+#define	SECT_4K			0x01	/* OPCODE_BE_4K works uniformly */
+#define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
+#define	SST_WRITE		0x04	/* use SST byte programming */
+#define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
+#define	SECT_4K_PMC		0x10	/* OPCODE_BE_4K_PMC works uniformly */
+#define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
+#define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
+};
+
+#define INFO(_jedec_id, _ext_id, _sector_size, _n_sectors, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.jedec_id = (_jedec_id),				\
+		.ext_id = (_ext_id),					\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = 256,					\
+		.flags = (_flags),					\
+	})
+
+#define CAT25_INFO(_sector_size, _n_sectors, _page_size, _addr_width, _flags)	\
+	((kernel_ulong_t)&(struct flash_info) {				\
+		.sector_size = (_sector_size),				\
+		.n_sectors = (_n_sectors),				\
+		.page_size = (_page_size),				\
+		.addr_width = (_addr_width),				\
+		.flags = (_flags),					\
+	})
+
+/* NOTE: double check command sets and memory organization when you add
+ * more nor chips.  This current list focusses on newer chips, which
+ * have been converging on command sets which including JEDEC ID.
+ */
+const struct spi_device_id spi_nor_ids[] = {
+	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
+	{ "at25fs010",  INFO(0x1f6601, 0, 32 * 1024,   4, SECT_4K) },
+	{ "at25fs040",  INFO(0x1f6604, 0, 64 * 1024,   8, SECT_4K) },
+
+	{ "at25df041a", INFO(0x1f4401, 0, 64 * 1024,   8, SECT_4K) },
+	{ "at25df321a", INFO(0x1f4701, 0, 64 * 1024,  64, SECT_4K) },
+	{ "at25df641",  INFO(0x1f4800, 0, 64 * 1024, 128, SECT_4K) },
+
+	{ "at26f004",   INFO(0x1f0400, 0, 64 * 1024,  8, SECT_4K) },
+	{ "at26df081a", INFO(0x1f4501, 0, 64 * 1024, 16, SECT_4K) },
+	{ "at26df161a", INFO(0x1f4601, 0, 64 * 1024, 32, SECT_4K) },
+	{ "at26df321",  INFO(0x1f4700, 0, 64 * 1024, 64, SECT_4K) },
+
+	{ "at45db081d", INFO(0x1f2500, 0, 64 * 1024, 16, SECT_4K) },
+
+	/* EON -- en25xxx */
+	{ "en25f32",    INFO(0x1c3116, 0, 64 * 1024,   64, SECT_4K) },
+	{ "en25p32",    INFO(0x1c2016, 0, 64 * 1024,   64, 0) },
+	{ "en25q32b",   INFO(0x1c3016, 0, 64 * 1024,   64, 0) },
+	{ "en25p64",    INFO(0x1c2017, 0, 64 * 1024,  128, 0) },
+	{ "en25q64",    INFO(0x1c3017, 0, 64 * 1024,  128, SECT_4K) },
+	{ "en25qh256",  INFO(0x1c7019, 0, 64 * 1024,  512, 0) },
+
+	/* ESMT */
+	{ "f25l32pa", INFO(0x8c2016, 0, 64 * 1024, 64, SECT_4K) },
+
+	/* Everspin */
+	{ "mr25h256", CAT25_INFO( 32 * 1024, 1, 256, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "mr25h10",  CAT25_INFO(128 * 1024, 1, 256, 3, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+
+	/* GigaDevice */
+	{ "gd25q32", INFO(0xc84016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "gd25q64", INFO(0xc84017, 0, 64 * 1024, 128, SECT_4K) },
+
+	/* Intel/Numonyx -- xxxs33b */
+	{ "160s33b",  INFO(0x898911, 0, 64 * 1024,  32, 0) },
+	{ "320s33b",  INFO(0x898912, 0, 64 * 1024,  64, 0) },
+	{ "640s33b",  INFO(0x898913, 0, 64 * 1024, 128, 0) },
+
+	/* Macronix */
+	{ "mx25l2005a",  INFO(0xc22012, 0, 64 * 1024,   4, SECT_4K) },
+	{ "mx25l4005a",  INFO(0xc22013, 0, 64 * 1024,   8, SECT_4K) },
+	{ "mx25l8005",   INFO(0xc22014, 0, 64 * 1024,  16, 0) },
+	{ "mx25l1606e",  INFO(0xc22015, 0, 64 * 1024,  32, SECT_4K) },
+	{ "mx25l3205d",  INFO(0xc22016, 0, 64 * 1024,  64, 0) },
+	{ "mx25l3255e",  INFO(0xc29e16, 0, 64 * 1024,  64, SECT_4K) },
+	{ "mx25l6405d",  INFO(0xc22017, 0, 64 * 1024, 128, 0) },
+	{ "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) },
+	{ "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) },
+	{ "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, 0) },
+	{ "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) },
+	{ "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_QUAD_READ) },
+	{ "mx66l1g55g",  INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) },
+
+	/* Micron */
+	{ "n25q064",     INFO(0x20ba17, 0, 64 * 1024,  128, 0) },
+	{ "n25q128a11",  INFO(0x20bb18, 0, 64 * 1024,  256, 0) },
+	{ "n25q128a13",  INFO(0x20ba18, 0, 64 * 1024,  256, 0) },
+	{ "n25q256a",    INFO(0x20ba19, 0, 64 * 1024,  512, SECT_4K) },
+	{ "n25q512a",    INFO(0x20bb20, 0, 64 * 1024, 1024, SECT_4K) },
+
+	/* PMC */
+	{ "pm25lv512",   INFO(0,        0, 32 * 1024,    2, SECT_4K_PMC) },
+	{ "pm25lv010",   INFO(0,        0, 32 * 1024,    4, SECT_4K_PMC) },
+	{ "pm25lq032",   INFO(0x7f9d46, 0, 64 * 1024,   64, SECT_4K) },
+
+	/* Spansion -- single (large) sector size only, at least
+	 * for the chips listed here (without boot sectors).
+	 */
+	{ "s25sl032p",  INFO(0x010215, 0x4d00,  64 * 1024,  64, 0) },
+	{ "s25sl064p",  INFO(0x010216, 0x4d00,  64 * 1024, 128, 0) },
+	{ "s25fl256s0", INFO(0x010219, 0x4d00, 256 * 1024, 128, 0) },
+	{ "s25fl256s1", INFO(0x010219, 0x4d01,  64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s25fl512s",  INFO(0x010220, 0x4d00, 256 * 1024, 256, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) },
+	{ "s70fl01gs",  INFO(0x010221, 0x4d00, 256 * 1024, 256, 0) },
+	{ "s25sl12800", INFO(0x012018, 0x0300, 256 * 1024,  64, 0) },
+	{ "s25sl12801", INFO(0x012018, 0x0301,  64 * 1024, 256, 0) },
+	{ "s25fl129p0", INFO(0x012018, 0x4d00, 256 * 1024,  64, 0) },
+	{ "s25fl129p1", INFO(0x012018, 0x4d01,  64 * 1024, 256, 0) },
+	{ "s25sl004a",  INFO(0x010212,      0,  64 * 1024,   8, 0) },
+	{ "s25sl008a",  INFO(0x010213,      0,  64 * 1024,  16, 0) },
+	{ "s25sl016a",  INFO(0x010214,      0,  64 * 1024,  32, 0) },
+	{ "s25sl032a",  INFO(0x010215,      0,  64 * 1024,  64, 0) },
+	{ "s25sl064a",  INFO(0x010216,      0,  64 * 1024, 128, 0) },
+	{ "s25fl008k",  INFO(0xef4014,      0,  64 * 1024,  16, SECT_4K) },
+	{ "s25fl016k",  INFO(0xef4015,      0,  64 * 1024,  32, SECT_4K) },
+	{ "s25fl064k",  INFO(0xef4017,      0,  64 * 1024, 128, SECT_4K) },
+
+	/* SST -- large erase sizes are "overlays", "sectors" are 4K */
+	{ "sst25vf040b", INFO(0xbf258d, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+	{ "sst25vf080b", INFO(0xbf258e, 0, 64 * 1024, 16, SECT_4K | SST_WRITE) },
+	{ "sst25vf016b", INFO(0xbf2541, 0, 64 * 1024, 32, SECT_4K | SST_WRITE) },
+	{ "sst25vf032b", INFO(0xbf254a, 0, 64 * 1024, 64, SECT_4K | SST_WRITE) },
+	{ "sst25vf064c", INFO(0xbf254b, 0, 64 * 1024, 128, SECT_4K) },
+	{ "sst25wf512",  INFO(0xbf2501, 0, 64 * 1024,  1, SECT_4K | SST_WRITE) },
+	{ "sst25wf010",  INFO(0xbf2502, 0, 64 * 1024,  2, SECT_4K | SST_WRITE) },
+	{ "sst25wf020",  INFO(0xbf2503, 0, 64 * 1024,  4, SECT_4K | SST_WRITE) },
+	{ "sst25wf040",  INFO(0xbf2504, 0, 64 * 1024,  8, SECT_4K | SST_WRITE) },
+
+	/* ST Microelectronics -- newer production may have feature updates */
+	{ "m25p05",  INFO(0x202010,  0,  32 * 1024,   2, 0) },
+	{ "m25p10",  INFO(0x202011,  0,  32 * 1024,   4, 0) },
+	{ "m25p20",  INFO(0x202012,  0,  64 * 1024,   4, 0) },
+	{ "m25p40",  INFO(0x202013,  0,  64 * 1024,   8, 0) },
+	{ "m25p80",  INFO(0x202014,  0,  64 * 1024,  16, 0) },
+	{ "m25p16",  INFO(0x202015,  0,  64 * 1024,  32, 0) },
+	{ "m25p32",  INFO(0x202016,  0,  64 * 1024,  64, 0) },
+	{ "m25p64",  INFO(0x202017,  0,  64 * 1024, 128, 0) },
+	{ "m25p128", INFO(0x202018,  0, 256 * 1024,  64, 0) },
+	{ "n25q032", INFO(0x20ba16,  0,  64 * 1024,  64, 0) },
+
+	{ "m25p05-nonjedec",  INFO(0, 0,  32 * 1024,   2, 0) },
+	{ "m25p10-nonjedec",  INFO(0, 0,  32 * 1024,   4, 0) },
+	{ "m25p20-nonjedec",  INFO(0, 0,  64 * 1024,   4, 0) },
+	{ "m25p40-nonjedec",  INFO(0, 0,  64 * 1024,   8, 0) },
+	{ "m25p80-nonjedec",  INFO(0, 0,  64 * 1024,  16, 0) },
+	{ "m25p16-nonjedec",  INFO(0, 0,  64 * 1024,  32, 0) },
+	{ "m25p32-nonjedec",  INFO(0, 0,  64 * 1024,  64, 0) },
+	{ "m25p64-nonjedec",  INFO(0, 0,  64 * 1024, 128, 0) },
+	{ "m25p128-nonjedec", INFO(0, 0, 256 * 1024,  64, 0) },
+
+	{ "m45pe10", INFO(0x204011,  0, 64 * 1024,    2, 0) },
+	{ "m45pe80", INFO(0x204014,  0, 64 * 1024,   16, 0) },
+	{ "m45pe16", INFO(0x204015,  0, 64 * 1024,   32, 0) },
+
+	{ "m25pe20", INFO(0x208012,  0, 64 * 1024,  4,       0) },
+	{ "m25pe80", INFO(0x208014,  0, 64 * 1024, 16,       0) },
+	{ "m25pe16", INFO(0x208015,  0, 64 * 1024, 32, SECT_4K) },
+
+	{ "m25px16",    INFO(0x207115,  0, 64 * 1024, 32, SECT_4K) },
+	{ "m25px32",    INFO(0x207116,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px32-s0", INFO(0x207316,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px32-s1", INFO(0x206316,  0, 64 * 1024, 64, SECT_4K) },
+	{ "m25px64",    INFO(0x207117,  0, 64 * 1024, 128, 0) },
+
+	/* Winbond -- w25x "blocks" are 64K, "sectors" are 4KiB */
+	{ "w25x10", INFO(0xef3011, 0, 64 * 1024,  2,  SECT_4K) },
+	{ "w25x20", INFO(0xef3012, 0, 64 * 1024,  4,  SECT_4K) },
+	{ "w25x40", INFO(0xef3013, 0, 64 * 1024,  8,  SECT_4K) },
+	{ "w25x80", INFO(0xef3014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25x16", INFO(0xef3015, 0, 64 * 1024,  32, SECT_4K) },
+	{ "w25x32", INFO(0xef3016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q32", INFO(0xef4016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25q32dw", INFO(0xef6016, 0, 64 * 1024,  64, SECT_4K) },
+	{ "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
+	{ "w25q80", INFO(0xef5014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q80bl", INFO(0xef4014, 0, 64 * 1024,  16, SECT_4K) },
+	{ "w25q128", INFO(0xef4018, 0, 64 * 1024, 256, SECT_4K) },
+	{ "w25q256", INFO(0xef4019, 0, 64 * 1024, 512, SECT_4K) },
+
+	/* Catalyst / On Semiconductor -- non-JEDEC */
+	{ "cat25c11", CAT25_INFO(  16, 8, 16, 1, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c03", CAT25_INFO(  32, 8, 16, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c09", CAT25_INFO( 128, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25c17", CAT25_INFO( 256, 8, 32, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
+	{ },
+};
+
+static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
+{
+	int			tmp;
+	u8			id[5];
+	u32			jedec;
+	u16                     ext_jedec;
+	struct flash_info	*info;
+
+	tmp = nor->read_reg(nor, OPCODE_RDID, id, 5);
+	if (tmp < 0) {
+		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
+		return ERR_PTR(tmp);
+	}
+	jedec = id[0];
+	jedec = jedec << 8;
+	jedec |= id[1];
+	jedec = jedec << 8;
+	jedec |= id[2];
+
+	ext_jedec = id[3] << 8 | id[4];
+
+	for (tmp = 0; tmp < ARRAY_SIZE(spi_nor_ids) - 1; tmp++) {
+		info = (void *)spi_nor_ids[tmp].driver_data;
+		if (info->jedec_id == jedec) {
+			if (info->ext_id == 0 || info->ext_id == ext_jedec)
+				return &spi_nor_ids[tmp];
+		}
+	}
+	dev_err(nor->dev, "unrecognized JEDEC id %06x\n", jedec);
+	return ERR_PTR(-ENODEV);
+}
+
+static const struct spi_device_id *jedec_probe(struct spi_nor *nor)
+{
+	return nor->read_id(nor);
+}
+
+static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len,
+			size_t *retlen, u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	int ret;
+
+	dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_READ);
+	if (ret)
+		return ret;
+
+	ret = nor->read(nor, from, len, retlen, buf);
+
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_READ);
+	return ret;
+}
+
+static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
+		size_t *retlen, const u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	size_t actual;
+	int ret;
+
+	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto time_out;
+
+	write_enable(nor);
+
+	nor->sst_write_second = false;
+
+	actual = to % 2;
+	/* Start write from odd address. */
+	if (actual) {
+		nor->program_opcode = OPCODE_BP;
+
+		/* write one byte. */
+		nor->write(nor, to, 1, retlen, buf);
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+	}
+	to += actual;
+
+	/* Write out most of the data here. */
+	for (; actual < len - 1; actual += 2) {
+		nor->program_opcode = OPCODE_AAI_WP;
+
+		/* write two bytes. */
+		nor->write(nor, to, 2, retlen, buf + actual);
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+		to += 2;
+		nor->sst_write_second = true;
+	}
+	nor->sst_write_second = false;
+
+	write_disable(nor);
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto time_out;
+
+	/* Write out trailing byte if it exists. */
+	if (actual != len) {
+		write_enable(nor);
+
+		nor->program_opcode = OPCODE_BP;
+		nor->write(nor, to, 1, retlen, buf + actual);
+
+		ret = wait_till_ready(nor);
+		if (ret)
+			goto time_out;
+		write_disable(nor);
+	}
+time_out:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
+	return ret;
+}
+
+/*
+ * Write an address range to the nor chip.  Data must be written in
+ * FLASH_PAGESIZE chunks.  The address range may be any size provided
+ * it is within the physical boundaries.
+ */
+static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
+	size_t *retlen, const u_char *buf)
+{
+	struct spi_nor *nor = mtd_to_spi_nor(mtd);
+	u32 page_offset, page_size, i;
+	int ret;
+
+	dev_dbg(nor->dev, "to 0x%08x, len %zd\n", (u32)to, len);
+
+	ret = spi_nor_lock_and_prep(nor, SPI_NOR_OPS_WRITE);
+	if (ret)
+		return ret;
+
+	/* Wait until finished previous write command. */
+	ret = wait_till_ready(nor);
+	if (ret)
+		goto write_err;
+
+	write_enable(nor);
+
+	page_offset = to & (nor->page_size - 1);
+
+	/* do all the bytes fit onto one page? */
+	if (page_offset + len <= nor->page_size) {
+		nor->write(nor, to, len, retlen, buf);
+	} else {
+		/* the size of data remaining on the first page */
+		page_size = nor->page_size - page_offset;
+		nor->write(nor, to, page_size, retlen, buf);
+
+		/* write everything in nor->page_size chunks */
+		for (i = page_size; i < len; i += page_size) {
+			page_size = len - i;
+			if (page_size > nor->page_size)
+				page_size = nor->page_size;
+
+			wait_till_ready(nor);
+			write_enable(nor);
+
+			nor->write(nor, to + i, page_size, retlen, buf + i);
+		}
+	}
+
+write_err:
+	spi_nor_unlock_and_unprep(nor, SPI_NOR_OPS_WRITE);
+	return 0;
+}
+
+static int macronix_quad_enable(struct spi_nor *nor)
+{
+	int ret, val;
+
+	val = read_sr(nor);
+	write_enable(nor);
+
+	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
+	nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+
+	if (wait_till_ready(nor))
+		return 1;
+
+	ret = read_sr(nor);
+	if (!(ret > 0 && (ret & SR_QUAD_EN_MX))) {
+		dev_err(nor->dev, "Macronix Quad bit not set\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * Write status Register and configuration register with 2 bytes
+ * The first byte will be written to the status register, while the
+ * second byte will be written to the configuration register.
+ * Return negative if error occured.
+ */
+static int write_sr_cr(struct spi_nor *nor, u16 val)
+{
+	nor->cmd_buf[0] = val & 0xff;
+	nor->cmd_buf[1] = (val >> 8);
+
+	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 2, 0);
+}
+
+static int spansion_quad_enable(struct spi_nor *nor)
+{
+	int ret;
+	int quad_en = CR_QUAD_EN_SPAN << 8;
+
+	write_enable(nor);
+
+	ret = write_sr_cr(nor, quad_en);
+	if (ret < 0) {
+		dev_err(nor->dev,
+			"error while writing configuration register\n");
+		return -EINVAL;
+	}
+
+	/* read back and check it */
+	ret = read_cr(nor);
+	if (!(ret > 0 && (ret & CR_QUAD_EN_SPAN))) {
+		dev_err(nor->dev, "Spansion Quad bit not set\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_quad_mode(struct spi_nor *nor, u32 jedec_id)
+{
+	int status;
+
+	switch (JEDEC_MFR(jedec_id)) {
+	case CFI_MFR_MACRONIX:
+		status = macronix_quad_enable(nor);
+		if (status) {
+			dev_err(nor->dev, "Macronix quad-read not enabled\n");
+			return -EINVAL;
+		}
+		return status;
+	default:
+		status = spansion_quad_enable(nor);
+		if (status) {
+			dev_err(nor->dev, "Spansion quad-read not enabled\n");
+			return -EINVAL;
+		}
+		return status;
+	}
+}
+
+static int spi_nor_check(struct spi_nor *nor)
+{
+	if (!nor->dev || !nor->read || !nor->write ||
+		!nor->read_reg || !nor->write_reg || !nor->erase) {
+		pr_err("spi-nor: please fill all the necessary fields!\n");
+		return -EINVAL;
+	}
+
+	if (!nor->read_id)
+		nor->read_id = spi_nor_read_id;
+	if (!nor->wait_till_ready)
+		nor->wait_till_ready = spi_nor_wait_till_ready;
+
+	return 0;
+}
+
+int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
+			enum read_mode mode)
+{
+	struct flash_info		*info;
+	struct flash_platform_data	*data;
+	struct device *dev = nor->dev;
+	struct mtd_info *mtd = nor->mtd;
+	struct device_node *np = dev->of_node;
+	int ret;
+	int i;
+
+	ret = spi_nor_check(nor);
+	if (ret)
+		return ret;
+
+	/* Platform data helps sort out which chip type we have, as
+	 * well as how this board partitions it.  If we don't have
+	 * a chip ID, try the JEDEC id commands; they'll work for most
+	 * newer chips, even if we don't recognize the particular chip.
+	 */
+	data = dev_get_platdata(dev);
+	if (data && data->type) {
+		const struct spi_device_id *plat_id;
+
+		for (i = 0; i < ARRAY_SIZE(spi_nor_ids) - 1; i++) {
+			plat_id = &spi_nor_ids[i];
+			if (strcmp(data->type, plat_id->name))
+				continue;
+			break;
+		}
+
+		if (i < ARRAY_SIZE(spi_nor_ids) - 1)
+			id = plat_id;
+		else
+			dev_warn(dev, "unrecognized id %s\n", data->type);
+	}
+
+	info = (void *)id->driver_data;
+
+	if (info->jedec_id) {
+		const struct spi_device_id *jid;
+
+		jid = jedec_probe(nor);
+		if (IS_ERR(jid)) {
+			return PTR_ERR(jid);
+		} else if (jid != id) {
+			/*
+			 * JEDEC knows better, so overwrite platform ID. We
+			 * can't trust partitions any longer, but we'll let
+			 * mtd apply them anyway, since some partitions may be
+			 * marked read-only, and we don't want to lose that
+			 * information, even if it's not 100% accurate.
+			 */
+			dev_warn(dev, "found %s, expected %s\n",
+				 jid->name, id->name);
+			id = jid;
+			info = (void *)jid->driver_data;
+		}
+	}
+
+	mutex_init(&nor->lock);
+
+	/*
+	 * Atmel, SST and Intel/Numonyx serial nor tend to power
+	 * up with the software protection bits set
+	 */
+
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ATMEL ||
+	    JEDEC_MFR(info->jedec_id) == CFI_MFR_INTEL ||
+	    JEDEC_MFR(info->jedec_id) == CFI_MFR_SST) {
+		write_enable(nor);
+		write_sr(nor, 0);
+	}
+
+	if (data && data->name)
+		mtd->name = data->name;
+	else
+		mtd->name = dev_name(dev);
+
+	mtd->type = MTD_NORFLASH;
+	mtd->writesize = 1;
+	mtd->flags = MTD_CAP_NORFLASH;
+	mtd->size = info->sector_size * info->n_sectors;
+	mtd->_erase = spi_nor_erase;
+	mtd->_read = spi_nor_read;
+
+	/* nor protection support for STmicro chips */
+	if (JEDEC_MFR(info->jedec_id) == CFI_MFR_ST) {
+		mtd->_lock = spi_nor_lock;
+		mtd->_unlock = spi_nor_unlock;
+	}
+
+	/* sst nor chips use AAI word program */
+	if (info->flags & SST_WRITE)
+		mtd->_write = sst_write;
+	else
+		mtd->_write = spi_nor_write;
+
+	/* prefer "small sector" erase if possible */
+	if (info->flags & SECT_4K) {
+		nor->erase_opcode = OPCODE_BE_4K;
+		mtd->erasesize = 4096;
+	} else if (info->flags & SECT_4K_PMC) {
+		nor->erase_opcode = OPCODE_BE_4K_PMC;
+		mtd->erasesize = 4096;
+	} else {
+		nor->erase_opcode = OPCODE_SE;
+		mtd->erasesize = info->sector_size;
+	}
+
+	if (info->flags & SPI_NOR_NO_ERASE)
+		mtd->flags |= MTD_NO_ERASE;
+
+	mtd->dev.parent = dev;
+	nor->page_size = info->page_size;
+	mtd->writebufsize = nor->page_size;
+
+	if (np) {
+		/* If we were instantiated by DT, use it */
+		if (of_property_read_bool(np, "m25p,fast-read"))
+			nor->flash_read = SPI_NOR_FAST;
+		else
+			nor->flash_read = SPI_NOR_NORMAL;
+	} else {
+		/* If we weren't instantiated by DT, default to fast-read */
+		nor->flash_read = SPI_NOR_FAST;
+	}
+
+	/* Some devices cannot do fast-read, no matter what DT tells us */
+	if (info->flags & SPI_NOR_NO_FR)
+		nor->flash_read = SPI_NOR_NORMAL;
+
+	/* Quad/Dual-read mode takes precedence over fast/normal */
+	if (mode == SPI_NOR_QUAD && info->flags & SPI_NOR_QUAD_READ) {
+		ret = set_quad_mode(nor, info->jedec_id);
+		if (ret) {
+			dev_err(dev, "quad mode not supported\n");
+			return ret;
+		}
+		nor->flash_read = SPI_NOR_QUAD;
+	} else if (mode == SPI_NOR_DUAL && info->flags & SPI_NOR_DUAL_READ) {
+		nor->flash_read = SPI_NOR_DUAL;
+	}
+
+	/* Default commands */
+	switch (nor->flash_read) {
+	case SPI_NOR_QUAD:
+		nor->read_opcode = OPCODE_QUAD_READ;
+		break;
+	case SPI_NOR_DUAL:
+		nor->read_opcode = OPCODE_DUAL_READ;
+		break;
+	case SPI_NOR_FAST:
+		nor->read_opcode = OPCODE_FAST_READ;
+		break;
+	case SPI_NOR_NORMAL:
+		nor->read_opcode = OPCODE_NORM_READ;
+		break;
+	default:
+		dev_err(dev, "No Read opcode defined\n");
+		return -EINVAL;
+	}
+
+	nor->program_opcode = OPCODE_PP;
+
+	if (info->addr_width)
+		nor->addr_width = info->addr_width;
+	else if (mtd->size > 0x1000000) {
+		/* enable 4-byte addressing if the device exceeds 16MiB */
+		nor->addr_width = 4;
+		if (JEDEC_MFR(info->jedec_id) == CFI_MFR_AMD) {
+			/* Dedicated 4-byte command set */
+			switch (nor->flash_read) {
+			case SPI_NOR_QUAD:
+				nor->read_opcode = OPCODE_QUAD_READ_4B;
+				break;
+			case SPI_NOR_DUAL:
+				nor->read_opcode = OPCODE_DUAL_READ_4B;
+				break;
+			case SPI_NOR_FAST:
+				nor->read_opcode = OPCODE_FAST_READ_4B;
+				break;
+			case SPI_NOR_NORMAL:
+				nor->read_opcode = OPCODE_NORM_READ_4B;
+				break;
+			}
+			nor->program_opcode = OPCODE_PP_4B;
+			/* No small sector erase for 4-byte command set */
+			nor->erase_opcode = OPCODE_SE_4B;
+			mtd->erasesize = info->sector_size;
+		} else
+			set_4byte(nor, info->jedec_id, 1);
+	} else {
+		nor->addr_width = 3;
+	}
+
+	nor->read_dummy = spi_nor_read_dummy_cycles(nor);
+
+	dev_info(dev, "%s (%lld Kbytes)\n", id->name,
+			(long long)mtd->size >> 10);
+
+	dev_dbg(dev,
+		"mtd .name = %s, .size = 0x%llx (%lldMiB), "
+		".erasesize = 0x%.8x (%uKiB) .numeraseregions = %d\n",
+		mtd->name, (long long)mtd->size, (long long)(mtd->size >> 20),
+		mtd->erasesize, mtd->erasesize / 1024, mtd->numeraseregions);
+
+	if (mtd->numeraseregions)
+		for (i = 0; i < mtd->numeraseregions; i++)
+			dev_dbg(dev,
+				"mtd.eraseregions[%d] = { .offset = 0x%llx, "
+				".erasesize = 0x%.8x (%uKiB), "
+				".numblocks = %d }\n",
+				i, (long long)mtd->eraseregions[i].offset,
+				mtd->eraseregions[i].erasesize,
+				mtd->eraseregions[i].erasesize / 1024,
+				mtd->eraseregions[i].numblocks);
+	return 0;
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
+MODULE_AUTHOR("Mike Lavender");
+MODULE_DESCRIPTION("framework for SPI NOR");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 3a3c3872c8cd..16d8409abcdc 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -162,4 +162,24 @@ struct spi_nor {
 
 	void *priv;
 };
+
+/**
+ * spi_nor_scan() - scan the SPI NOR
+ * @nor:	the spi_nor structure
+ * @id:		the spi_device_id provided by the driver
+ * @mode:	the read mode supported by the driver
+ *
+ * The drivers can use this fuction to scan the SPI NOR.
+ * In the scanning, it will try to get all the necessary information to
+ * fill the mtd_info{} and the spi_nor{}.
+ *
+ * The board may assigns a spi_device_id with @id which be used to compared with
+ * the spi_device_id detected by the scanning.
+ *
+ * Return: 0 for success, others for failure.
+ */
+int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
+			enum read_mode mode);
+extern const struct spi_device_id spi_nor_ids[];
+
 #endif
-- 
cgit 


From 0d8c11c01274bde227d368daa8954911dd324a9f Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Mon, 24 Feb 2014 18:37:40 +0800
Subject: mtd: spi-nor: add a helper to find the spi_device_id

Add the spi_nor_match_id() to find the proper spi_device_id with the
NOR flash's name in the spi_nor_ids table.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Acked-by: Marek Vasut <marex@denx.de>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 12 ++++++++++++
 include/linux/mtd/spi-nor.h   | 12 ++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 50b929095bdb..f7c9e638623b 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1082,6 +1082,18 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	return 0;
 }
 
+const struct spi_device_id *spi_nor_match_id(char *name)
+{
+	const struct spi_device_id *id = spi_nor_ids;
+
+	while (id->name[0]) {
+		if (!strcmp(name, id->name))
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
 MODULE_AUTHOR("Mike Lavender");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 16d8409abcdc..41dae78fbd1d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -182,4 +182,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 			enum read_mode mode);
 extern const struct spi_device_id spi_nor_ids[];
 
+/**
+ * spi_nor_match_id() - find the spi_device_id by the name
+ * @name:	the name of the spi_device_id
+ *
+ * The drivers use this function to find the spi_device_id
+ * specified by the @name.
+ *
+ * Return: returns the right spi_device_id pointer on success,
+ *         and returns NULL on failure.
+ */
+const struct spi_device_id *spi_nor_match_id(char *name);
+
 #endif
-- 
cgit 


From 8eabdd1ec122cf6b77cf73e798f134fbace1b8d1 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Thu, 10 Apr 2014 16:27:28 +0800
Subject: mtd: spi-nor: add the copyright information

Add the copyright information for spi-nor.c and spi-nor.h.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 6 +++++-
 include/linux/mtd/spi-nor.h   | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 5cd86eb2a5f0..6c64ab95dee2 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1,5 +1,9 @@
 /*
- * Cloned most of the code from the m25p80.c
+ * Based on m25p80.c, by Mike Lavender (mike@steroidmicros.com), with
+ * influence from lart.c (Abraham Van Der Merwe) and mtd_dataflash.c
+ *
+ * Copyright (C) 2005, Intec Automation Inc.
+ * Copyright (C) 2014, Freescale Semiconductor, Inc.
  *
  * This code is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 41dae78fbd1d..9428d285489b 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -1,3 +1,12 @@
+/*
+ * Copyright (C) 2014 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
 #ifndef __LINUX_MTD_SPI_NOR_H
 #define __LINUX_MTD_SPI_NOR_H
 
-- 
cgit 


From becd0cb8666de4bfaaf6eb3042f69066c8fb8677 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 8 Apr 2014 18:10:23 -0700
Subject: mtd: spi-nor: drop \t after #define

Spacing is a little non-standard here. Fix up tabs vs. spaces.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Acked-by: Huang Shijie <b32955@freescale.com>
Reviewed-by: Marek Vasut <marex@denx.de>
---
 include/linux/mtd/spi-nor.h | 72 ++++++++++++++++++++++-----------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 9428d285489b..a6e87190ead1 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -11,55 +11,55 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 /* Flash opcodes. */
-#define	OPCODE_WREN		0x06	/* Write enable */
-#define	OPCODE_RDSR		0x05	/* Read status register */
-#define	OPCODE_WRSR		0x01	/* Write status register 1 byte */
-#define	OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define	OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define	OPCODE_DUAL_READ        0x3b    /* Read data bytes (Dual SPI) */
-#define	OPCODE_QUAD_READ        0x6b    /* Read data bytes (Quad SPI) */
-#define	OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
-#define	OPCODE_BE_4K		0x20	/* Erase 4KiB block */
-#define	OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
-#define	OPCODE_BE_32K		0x52	/* Erase 32KiB block */
-#define	OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
-#define	OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
-#define	OPCODE_RDID		0x9f	/* Read JEDEC ID */
-#define	OPCODE_RDCR             0x35    /* Read configuration register */
+#define OPCODE_WREN		0x06	/* Write enable */
+#define OPCODE_RDSR		0x05	/* Read status register */
+#define OPCODE_WRSR		0x01	/* Write status register 1 byte */
+#define OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
+#define OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define OPCODE_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
+#define OPCODE_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
+#define OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
+#define OPCODE_BE_4K		0x20	/* Erase 4KiB block */
+#define OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define OPCODE_BE_32K		0x52	/* Erase 32KiB block */
+#define OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define OPCODE_RDID		0x9f	/* Read JEDEC ID */
+#define OPCODE_RDCR		0x35	/* Read configuration register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define	OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define	OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define	OPCODE_DUAL_READ_4B	0x3c    /* Read data bytes (Dual SPI) */
-#define	OPCODE_QUAD_READ_4B	0x6c    /* Read data bytes (Quad SPI) */
-#define	OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
-#define	OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+#define OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define OPCODE_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
+#define OPCODE_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
+#define OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
 /* Used for SST flashes only. */
-#define	OPCODE_BP		0x02	/* Byte program */
-#define	OPCODE_WRDI		0x04	/* Write disable */
-#define	OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+#define OPCODE_BP		0x02	/* Byte program */
+#define OPCODE_WRDI		0x04	/* Write disable */
+#define OPCODE_AAI_WP		0xad	/* Auto address increment word program */
 
 /* Used for Macronix and Winbond flashes. */
-#define	OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
-#define	OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+#define OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
+#define OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
 
 /* Used for Spansion flashes only. */
-#define	OPCODE_BRWR		0x17	/* Bank register write */
+#define OPCODE_BRWR		0x17	/* Bank register write */
 
 /* Status Register bits. */
-#define	SR_WIP			1	/* Write in progress */
-#define	SR_WEL			2	/* Write enable latch */
+#define SR_WIP			1	/* Write in progress */
+#define SR_WEL			2	/* Write enable latch */
 /* meaning of other SR_* bits may differ between vendors */
-#define	SR_BP0			4	/* Block protect 0 */
-#define	SR_BP1			8	/* Block protect 1 */
-#define	SR_BP2			0x10	/* Block protect 2 */
-#define	SR_SRWD			0x80	/* SR write protect */
+#define SR_BP0			4	/* Block protect 0 */
+#define SR_BP1			8	/* Block protect 1 */
+#define SR_BP2			0x10	/* Block protect 2 */
+#define SR_SRWD			0x80	/* SR write protect */
 
-#define SR_QUAD_EN_MX           0x40    /* Macronix Quad I/O */
+#define SR_QUAD_EN_MX		0x40	/* Macronix Quad I/O */
 
 /* Configuration Register bits. */
-#define CR_QUAD_EN_SPAN		0x2     /* Spansion Quad I/O */
+#define CR_QUAD_EN_SPAN		0x2	/* Spansion Quad I/O */
 
 enum read_mode {
 	SPI_NOR_NORMAL = 0,
@@ -95,7 +95,7 @@ struct spi_nor_xfer_cfg {
 	u8		dummy_cycles;
 };
 
-#define	SPI_NOR_MAX_CMD_SIZE	8
+#define SPI_NOR_MAX_CMD_SIZE	8
 enum spi_nor_ops {
 	SPI_NOR_OPS_READ = 0,
 	SPI_NOR_OPS_WRITE,
-- 
cgit 


From b02e7f3ef0beb72da8fc64542f0ac977996ec56b Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 8 Apr 2014 18:15:31 -0700
Subject: mtd: spi-nor: re-name OPCODE_* to SPINOR_OP_*

Qualify these with a better namespace, and prepare them for use in more
drivers.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Acked-by: Huang Shijie <b32955@freescale.com>
---
 drivers/mtd/devices/m25p80.c      |  4 +--
 drivers/mtd/spi-nor/fsl-quadspi.c | 58 +++++++++++++++++------------------
 drivers/mtd/spi-nor/spi-nor.c     | 64 +++++++++++++++++++--------------------
 include/linux/mtd/spi-nor.h       | 54 ++++++++++++++++-----------------
 4 files changed, 90 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 4af6400ccd95..1557d8f672c1 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -86,7 +86,7 @@ static void m25p80_write(struct spi_nor *nor, loff_t to, size_t len,
 
 	spi_message_init(&m);
 
-	if (nor->program_opcode == OPCODE_AAI_WP && nor->sst_write_second)
+	if (nor->program_opcode == SPINOR_OP_AAI_WP && nor->sst_write_second)
 		cmd_sz = 1;
 
 	flash->command[0] = nor->program_opcode;
@@ -171,7 +171,7 @@ static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 		return ret;
 
 	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 6dc08ed950c8..2977f026f39d 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -294,12 +294,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	lut_base = SEQID_QUAD_READ * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = OPCODE_QUAD_READ;
+		cmd = SPINOR_OP_QUAD_READ;
 		addrlen = ADDR24BIT;
 		dummy = 8;
 	} else {
 		/* use the 4-byte address */
-		cmd = OPCODE_QUAD_READ;
+		cmd = SPINOR_OP_QUAD_READ;
 		addrlen = ADDR32BIT;
 		dummy = 8;
 	}
@@ -311,17 +311,17 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Write enable */
 	lut_base = SEQID_WREN * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_WREN), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WREN), base + QUADSPI_LUT(lut_base));
 
 	/* Page Program */
 	lut_base = SEQID_PP * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = OPCODE_PP;
+		cmd = SPINOR_OP_PP;
 		addrlen = ADDR24BIT;
 	} else {
 		/* use the 4-byte address */
-		cmd = OPCODE_PP;
+		cmd = SPINOR_OP_PP;
 		addrlen = ADDR32BIT;
 	}
 
@@ -331,18 +331,18 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Read Status */
 	lut_base = SEQID_RDSR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_RDSR) | LUT1(READ, PAD1, 0x1),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDSR) | LUT1(READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Erase a sector */
 	lut_base = SEQID_SE * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = OPCODE_SE;
+		cmd = SPINOR_OP_SE;
 		addrlen = ADDR24BIT;
 	} else {
 		/* use the 4-byte address */
-		cmd = OPCODE_SE;
+		cmd = SPINOR_OP_SE;
 		addrlen = ADDR32BIT;
 	}
 
@@ -351,35 +351,35 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 
 	/* Erase the whole chip */
 	lut_base = SEQID_CHIP_ERASE * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_CHIP_ERASE),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_CHIP_ERASE),
 			base + QUADSPI_LUT(lut_base));
 
 	/* READ ID */
 	lut_base = SEQID_RDID * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_RDID) | LUT1(READ, PAD1, 0x8),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDID) | LUT1(READ, PAD1, 0x8),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write Register */
 	lut_base = SEQID_WRSR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_WRSR) | LUT1(WRITE, PAD1, 0x2),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WRSR) | LUT1(WRITE, PAD1, 0x2),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Read Configuration Register */
 	lut_base = SEQID_RDCR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_RDCR) | LUT1(READ, PAD1, 0x1),
+	writel(LUT0(CMD, PAD1, SPINOR_OP_RDCR) | LUT1(READ, PAD1, 0x1),
 			base + QUADSPI_LUT(lut_base));
 
 	/* Write disable */
 	lut_base = SEQID_WRDI * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_WRDI), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_WRDI), base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Micron) */
 	lut_base = SEQID_EN4B * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_EN4B), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_EN4B), base + QUADSPI_LUT(lut_base));
 
 	/* Enter 4 Byte Mode (Spansion) */
 	lut_base = SEQID_BRWR * 4;
-	writel(LUT0(CMD, PAD1, OPCODE_BRWR), base + QUADSPI_LUT(lut_base));
+	writel(LUT0(CMD, PAD1, SPINOR_OP_BRWR), base + QUADSPI_LUT(lut_base));
 
 	fsl_qspi_lock_lut(q);
 }
@@ -388,29 +388,29 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
 	switch (cmd) {
-	case OPCODE_QUAD_READ:
+	case SPINOR_OP_QUAD_READ:
 		return SEQID_QUAD_READ;
-	case OPCODE_WREN:
+	case SPINOR_OP_WREN:
 		return SEQID_WREN;
-	case OPCODE_WRDI:
+	case SPINOR_OP_WRDI:
 		return SEQID_WRDI;
-	case OPCODE_RDSR:
+	case SPINOR_OP_RDSR:
 		return SEQID_RDSR;
-	case OPCODE_SE:
+	case SPINOR_OP_SE:
 		return SEQID_SE;
-	case OPCODE_CHIP_ERASE:
+	case SPINOR_OP_CHIP_ERASE:
 		return SEQID_CHIP_ERASE;
-	case OPCODE_PP:
+	case SPINOR_OP_PP:
 		return SEQID_PP;
-	case OPCODE_RDID:
+	case SPINOR_OP_RDID:
 		return SEQID_RDID;
-	case OPCODE_WRSR:
+	case SPINOR_OP_WRSR:
 		return SEQID_WRSR;
-	case OPCODE_RDCR:
+	case SPINOR_OP_RDCR:
 		return SEQID_RDCR;
-	case OPCODE_EN4B:
+	case SPINOR_OP_EN4B:
 		return SEQID_EN4B;
-	case OPCODE_BRWR:
+	case SPINOR_OP_BRWR:
 		return SEQID_BRWR;
 	default:
 		dev_err(q->dev, "Unsupported cmd 0x%.2x\n", cmd);
@@ -688,7 +688,7 @@ static int fsl_qspi_write_reg(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
 		if (ret)
 			return ret;
 
-		if (opcode == OPCODE_CHIP_ERASE)
+		if (opcode == SPINOR_OP_CHIP_ERASE)
 			fsl_qspi_invalid(q);
 
 	} else if (len > 0) {
@@ -750,7 +750,7 @@ static int fsl_qspi_erase(struct spi_nor *nor, loff_t offs)
 		return ret;
 
 	/* Send write enable, then erase commands. */
-	ret = nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 6c64ab95dee2..1716f3ce9949 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -38,7 +38,7 @@ static int read_sr(struct spi_nor *nor)
 	int ret;
 	u8 val;
 
-	ret = nor->read_reg(nor, OPCODE_RDSR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDSR, &val, 1);
 	if (ret < 0) {
 		pr_err("error %d reading SR\n", (int) ret);
 		return ret;
@@ -57,7 +57,7 @@ static int read_cr(struct spi_nor *nor)
 	int ret;
 	u8 val;
 
-	ret = nor->read_reg(nor, OPCODE_RDCR, &val, 1);
+	ret = nor->read_reg(nor, SPINOR_OP_RDCR, &val, 1);
 	if (ret < 0) {
 		dev_err(nor->dev, "error %d reading CR\n", ret);
 		return ret;
@@ -91,7 +91,7 @@ static inline int spi_nor_read_dummy_cycles(struct spi_nor *nor)
 static inline int write_sr(struct spi_nor *nor, u8 val)
 {
 	nor->cmd_buf[0] = val;
-	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 }
 
 /*
@@ -100,7 +100,7 @@ static inline int write_sr(struct spi_nor *nor, u8 val)
  */
 static inline int write_enable(struct spi_nor *nor)
 {
-	return nor->write_reg(nor, OPCODE_WREN, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 }
 
 /*
@@ -108,7 +108,7 @@ static inline int write_enable(struct spi_nor *nor)
  */
 static inline int write_disable(struct spi_nor *nor)
 {
-	return nor->write_reg(nor, OPCODE_WRDI, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRDI, NULL, 0, 0);
 }
 
 static inline struct spi_nor *mtd_to_spi_nor(struct mtd_info *mtd)
@@ -132,7 +132,7 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
 		if (need_wren)
 			write_enable(nor);
 
-		cmd = enable ? OPCODE_EN4B : OPCODE_EX4B;
+		cmd = enable ? SPINOR_OP_EN4B : SPINOR_OP_EX4B;
 		status = nor->write_reg(nor, cmd, NULL, 0, 0);
 		if (need_wren)
 			write_disable(nor);
@@ -141,7 +141,7 @@ static inline int set_4byte(struct spi_nor *nor, u32 jedec_id, int enable)
 	default:
 		/* Spansion style */
 		nor->cmd_buf[0] = enable << 7;
-		return nor->write_reg(nor, OPCODE_BRWR, nor->cmd_buf, 1, 0);
+		return nor->write_reg(nor, SPINOR_OP_BRWR, nor->cmd_buf, 1, 0);
 	}
 }
 
@@ -193,7 +193,7 @@ static int erase_chip(struct spi_nor *nor)
 	/* Send write enable, then erase commands. */
 	write_enable(nor);
 
-	return nor->write_reg(nor, OPCODE_CHIP_ERASE, NULL, 0, 0);
+	return nor->write_reg(nor, SPINOR_OP_CHIP_ERASE, NULL, 0, 0);
 }
 
 static int spi_nor_lock_and_prep(struct spi_nor *nor, enum spi_nor_ops ops)
@@ -253,7 +253,7 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 		}
 
 	/* REVISIT in some cases we could speed up erasing large regions
-	 * by using OPCODE_SE instead of OPCODE_BE_4K.  We may have set up
+	 * by using SPINOR_OP_SE instead of SPINOR_OP_BE_4K.  We may have set up
 	 * to use "small sector erase", but that's not always optimal.
 	 */
 
@@ -385,7 +385,7 @@ struct flash_info {
 	u32		jedec_id;
 	u16             ext_id;
 
-	/* The size listed here is what works with OPCODE_SE, which isn't
+	/* The size listed here is what works with SPINOR_OP_SE, which isn't
 	 * necessarily called a "sector" by the vendor.
 	 */
 	unsigned	sector_size;
@@ -395,11 +395,11 @@ struct flash_info {
 	u16		addr_width;
 
 	u16		flags;
-#define	SECT_4K			0x01	/* OPCODE_BE_4K works uniformly */
+#define	SECT_4K			0x01	/* SPINOR_OP_BE_4K works uniformly */
 #define	SPI_NOR_NO_ERASE	0x02	/* No erase command needed */
 #define	SST_WRITE		0x04	/* use SST byte programming */
 #define	SPI_NOR_NO_FR		0x08	/* Can't do fastread */
-#define	SECT_4K_PMC		0x10	/* OPCODE_BE_4K_PMC works uniformly */
+#define	SECT_4K_PMC		0x10	/* SPINOR_OP_BE_4K_PMC works uniformly */
 #define	SPI_NOR_DUAL_READ	0x20    /* Flash supports Dual Read */
 #define	SPI_NOR_QUAD_READ	0x40    /* Flash supports Quad Read */
 };
@@ -598,7 +598,7 @@ static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
 	u16                     ext_jedec;
 	struct flash_info	*info;
 
-	tmp = nor->read_reg(nor, OPCODE_RDID, id, 5);
+	tmp = nor->read_reg(nor, SPINOR_OP_RDID, id, 5);
 	if (tmp < 0) {
 		dev_dbg(nor->dev, " error %d reading JEDEC ID\n", tmp);
 		return ERR_PTR(tmp);
@@ -670,7 +670,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	actual = to % 2;
 	/* Start write from odd address. */
 	if (actual) {
-		nor->program_opcode = OPCODE_BP;
+		nor->program_opcode = SPINOR_OP_BP;
 
 		/* write one byte. */
 		nor->write(nor, to, 1, retlen, buf);
@@ -682,7 +682,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 	/* Write out most of the data here. */
 	for (; actual < len - 1; actual += 2) {
-		nor->program_opcode = OPCODE_AAI_WP;
+		nor->program_opcode = SPINOR_OP_AAI_WP;
 
 		/* write two bytes. */
 		nor->write(nor, to, 2, retlen, buf + actual);
@@ -703,7 +703,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	if (actual != len) {
 		write_enable(nor);
 
-		nor->program_opcode = OPCODE_BP;
+		nor->program_opcode = SPINOR_OP_BP;
 		nor->write(nor, to, 1, retlen, buf + actual);
 
 		ret = wait_till_ready(nor);
@@ -777,7 +777,7 @@ static int macronix_quad_enable(struct spi_nor *nor)
 	write_enable(nor);
 
 	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
-	nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 1, 0);
+	nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 
 	if (wait_till_ready(nor))
 		return 1;
@@ -802,7 +802,7 @@ static int write_sr_cr(struct spi_nor *nor, u16 val)
 	nor->cmd_buf[0] = val & 0xff;
 	nor->cmd_buf[1] = (val >> 8);
 
-	return nor->write_reg(nor, OPCODE_WRSR, nor->cmd_buf, 2, 0);
+	return nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 2, 0);
 }
 
 static int spansion_quad_enable(struct spi_nor *nor)
@@ -967,13 +967,13 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 
 	/* prefer "small sector" erase if possible */
 	if (info->flags & SECT_4K) {
-		nor->erase_opcode = OPCODE_BE_4K;
+		nor->erase_opcode = SPINOR_OP_BE_4K;
 		mtd->erasesize = 4096;
 	} else if (info->flags & SECT_4K_PMC) {
-		nor->erase_opcode = OPCODE_BE_4K_PMC;
+		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
 		mtd->erasesize = 4096;
 	} else {
-		nor->erase_opcode = OPCODE_SE;
+		nor->erase_opcode = SPINOR_OP_SE;
 		mtd->erasesize = info->sector_size;
 	}
 
@@ -1014,23 +1014,23 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	/* Default commands */
 	switch (nor->flash_read) {
 	case SPI_NOR_QUAD:
-		nor->read_opcode = OPCODE_QUAD_READ;
+		nor->read_opcode = SPINOR_OP_QUAD_READ;
 		break;
 	case SPI_NOR_DUAL:
-		nor->read_opcode = OPCODE_DUAL_READ;
+		nor->read_opcode = SPINOR_OP_DUAL_READ;
 		break;
 	case SPI_NOR_FAST:
-		nor->read_opcode = OPCODE_FAST_READ;
+		nor->read_opcode = SPINOR_OP_FAST_READ;
 		break;
 	case SPI_NOR_NORMAL:
-		nor->read_opcode = OPCODE_NORM_READ;
+		nor->read_opcode = SPINOR_OP_NORM_READ;
 		break;
 	default:
 		dev_err(dev, "No Read opcode defined\n");
 		return -EINVAL;
 	}
 
-	nor->program_opcode = OPCODE_PP;
+	nor->program_opcode = SPINOR_OP_PP;
 
 	if (info->addr_width)
 		nor->addr_width = info->addr_width;
@@ -1041,21 +1041,21 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 			/* Dedicated 4-byte command set */
 			switch (nor->flash_read) {
 			case SPI_NOR_QUAD:
-				nor->read_opcode = OPCODE_QUAD_READ_4B;
+				nor->read_opcode = SPINOR_OP_QUAD_READ_4B;
 				break;
 			case SPI_NOR_DUAL:
-				nor->read_opcode = OPCODE_DUAL_READ_4B;
+				nor->read_opcode = SPINOR_OP_DUAL_READ_4B;
 				break;
 			case SPI_NOR_FAST:
-				nor->read_opcode = OPCODE_FAST_READ_4B;
+				nor->read_opcode = SPINOR_OP_FAST_READ_4B;
 				break;
 			case SPI_NOR_NORMAL:
-				nor->read_opcode = OPCODE_NORM_READ_4B;
+				nor->read_opcode = SPINOR_OP_NORM_READ_4B;
 				break;
 			}
-			nor->program_opcode = OPCODE_PP_4B;
+			nor->program_opcode = SPINOR_OP_PP_4B;
 			/* No small sector erase for 4-byte command set */
-			nor->erase_opcode = OPCODE_SE_4B;
+			nor->erase_opcode = SPINOR_OP_SE_4B;
 			mtd->erasesize = info->sector_size;
 		} else
 			set_4byte(nor, info->jedec_id, 1);
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index a6e87190ead1..f1fe1a6659a3 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -11,41 +11,41 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 /* Flash opcodes. */
-#define OPCODE_WREN		0x06	/* Write enable */
-#define OPCODE_RDSR		0x05	/* Read status register */
-#define OPCODE_WRSR		0x01	/* Write status register 1 byte */
-#define OPCODE_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define OPCODE_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define OPCODE_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
-#define OPCODE_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
-#define OPCODE_PP		0x02	/* Page program (up to 256 bytes) */
-#define OPCODE_BE_4K		0x20	/* Erase 4KiB block */
-#define OPCODE_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
-#define OPCODE_BE_32K		0x52	/* Erase 32KiB block */
-#define OPCODE_CHIP_ERASE	0xc7	/* Erase whole flash chip */
-#define OPCODE_SE		0xd8	/* Sector erase (usually 64KiB) */
-#define OPCODE_RDID		0x9f	/* Read JEDEC ID */
-#define OPCODE_RDCR		0x35	/* Read configuration register */
+#define SPINOR_OP_WREN		0x06	/* Write enable */
+#define SPINOR_OP_RDSR		0x05	/* Read status register */
+#define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
+#define SPINOR_OP_NORM_READ	0x03	/* Read data bytes (low frequency) */
+#define SPINOR_OP_FAST_READ	0x0b	/* Read data bytes (high frequency) */
+#define SPINOR_OP_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
+#define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
+#define SPINOR_OP_BE_32K	0x52	/* Erase 32KiB block */
+#define SPINOR_OP_CHIP_ERASE	0xc7	/* Erase whole flash chip */
+#define SPINOR_OP_SE		0xd8	/* Sector erase (usually 64KiB) */
+#define SPINOR_OP_RDID		0x9f	/* Read JEDEC ID */
+#define SPINOR_OP_RDCR		0x35	/* Read configuration register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define OPCODE_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define OPCODE_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define OPCODE_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
-#define OPCODE_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
-#define OPCODE_PP_4B		0x12	/* Page program (up to 256 bytes) */
-#define OPCODE_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
+#define SPINOR_OP_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
+#define SPINOR_OP_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
+#define SPINOR_OP_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
+#define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
 /* Used for SST flashes only. */
-#define OPCODE_BP		0x02	/* Byte program */
-#define OPCODE_WRDI		0x04	/* Write disable */
-#define OPCODE_AAI_WP		0xad	/* Auto address increment word program */
+#define SPINOR_OP_BP		0x02	/* Byte program */
+#define SPINOR_OP_WRDI		0x04	/* Write disable */
+#define SPINOR_OP_AAI_WP	0xad	/* Auto address increment word program */
 
 /* Used for Macronix and Winbond flashes. */
-#define OPCODE_EN4B		0xb7	/* Enter 4-byte mode */
-#define OPCODE_EX4B		0xe9	/* Exit 4-byte mode */
+#define SPINOR_OP_EN4B		0xb7	/* Enter 4-byte mode */
+#define SPINOR_OP_EX4B		0xe9	/* Exit 4-byte mode */
 
 /* Used for Spansion flashes only. */
-#define OPCODE_BRWR		0x17	/* Bank register write */
+#define SPINOR_OP_BRWR		0x17	/* Bank register write */
 
 /* Status Register bits. */
 #define SR_WIP			1	/* Write in progress */
-- 
cgit 


From 58b89a1f4c2a65b10b8f7b90b6ff2161b19bb0d1 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 8 Apr 2014 19:16:49 -0700
Subject: mtd: spi-nor: unify read opcode variants with ST SPI FSM

serial_flash_cmds.h defines our opcodes a little differently. Let's
borrow its naming, since it's borrowed from the SFDP standard, and it's
more extensible.

This prepares us for merging serial_flash_cmds.h and spi-nor.h opcode
listing.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Acked-by: Huang Shijie <b32955@freescale.com>
---
 drivers/mtd/spi-nor/fsl-quadspi.c |  6 +++---
 drivers/mtd/spi-nor/spi-nor.c     | 16 ++++++++--------
 include/linux/mtd/spi-nor.h       | 24 ++++++++++++++++--------
 3 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 2977f026f39d..b41bbbc531ff 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -294,12 +294,12 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 	lut_base = SEQID_QUAD_READ * 4;
 
 	if (q->nor_size <= SZ_16M) {
-		cmd = SPINOR_OP_QUAD_READ;
+		cmd = SPINOR_OP_READ_1_1_4;
 		addrlen = ADDR24BIT;
 		dummy = 8;
 	} else {
 		/* use the 4-byte address */
-		cmd = SPINOR_OP_QUAD_READ;
+		cmd = SPINOR_OP_READ_1_1_4;
 		addrlen = ADDR32BIT;
 		dummy = 8;
 	}
@@ -388,7 +388,7 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
 static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
 	switch (cmd) {
-	case SPINOR_OP_QUAD_READ:
+	case SPINOR_OP_READ_1_1_4:
 		return SEQID_QUAD_READ;
 	case SPINOR_OP_WREN:
 		return SEQID_WREN;
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 1716f3ce9949..d6f44d527701 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -1014,16 +1014,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	/* Default commands */
 	switch (nor->flash_read) {
 	case SPI_NOR_QUAD:
-		nor->read_opcode = SPINOR_OP_QUAD_READ;
+		nor->read_opcode = SPINOR_OP_READ_1_1_4;
 		break;
 	case SPI_NOR_DUAL:
-		nor->read_opcode = SPINOR_OP_DUAL_READ;
+		nor->read_opcode = SPINOR_OP_READ_1_1_2;
 		break;
 	case SPI_NOR_FAST:
-		nor->read_opcode = SPINOR_OP_FAST_READ;
+		nor->read_opcode = SPINOR_OP_READ_FAST;
 		break;
 	case SPI_NOR_NORMAL:
-		nor->read_opcode = SPINOR_OP_NORM_READ;
+		nor->read_opcode = SPINOR_OP_READ;
 		break;
 	default:
 		dev_err(dev, "No Read opcode defined\n");
@@ -1041,16 +1041,16 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 			/* Dedicated 4-byte command set */
 			switch (nor->flash_read) {
 			case SPI_NOR_QUAD:
-				nor->read_opcode = SPINOR_OP_QUAD_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4_1_1_4;
 				break;
 			case SPI_NOR_DUAL:
-				nor->read_opcode = SPINOR_OP_DUAL_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4_1_1_2;
 				break;
 			case SPI_NOR_FAST:
-				nor->read_opcode = SPINOR_OP_FAST_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4_FAST;
 				break;
 			case SPI_NOR_NORMAL:
-				nor->read_opcode = SPINOR_OP_NORM_READ_4B;
+				nor->read_opcode = SPINOR_OP_READ4;
 				break;
 			}
 			nor->program_opcode = SPINOR_OP_PP_4B;
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index f1fe1a6659a3..53241842a7ab 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -10,14 +10,22 @@
 #ifndef __LINUX_MTD_SPI_NOR_H
 #define __LINUX_MTD_SPI_NOR_H
 
+/*
+ * Note on opcode nomenclature: some opcodes have a format like
+ * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number
+ * of I/O lines used for the opcode, address, and data (respectively). The
+ * FUNCTION has an optional suffix of '4', to represent an opcode which
+ * requires a 4-byte (32-bit) address.
+ */
+
 /* Flash opcodes. */
 #define SPINOR_OP_WREN		0x06	/* Write enable */
 #define SPINOR_OP_RDSR		0x05	/* Read status register */
 #define SPINOR_OP_WRSR		0x01	/* Write status register 1 byte */
-#define SPINOR_OP_NORM_READ	0x03	/* Read data bytes (low frequency) */
-#define SPINOR_OP_FAST_READ	0x0b	/* Read data bytes (high frequency) */
-#define SPINOR_OP_DUAL_READ	0x3b	/* Read data bytes (Dual SPI) */
-#define SPINOR_OP_QUAD_READ	0x6b	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_READ		0x03	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ_FAST	0x0b	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ_1_1_2	0x3b	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_READ_1_1_4	0x6b	/* Read data bytes (Quad SPI) */
 #define SPINOR_OP_PP		0x02	/* Page program (up to 256 bytes) */
 #define SPINOR_OP_BE_4K		0x20	/* Erase 4KiB block */
 #define SPINOR_OP_BE_4K_PMC	0xd7	/* Erase 4KiB block on PMC chips */
@@ -28,10 +36,10 @@
 #define SPINOR_OP_RDCR		0x35	/* Read configuration register */
 
 /* 4-byte address opcodes - used on Spansion and some Macronix flashes. */
-#define SPINOR_OP_NORM_READ_4B	0x13	/* Read data bytes (low frequency) */
-#define SPINOR_OP_FAST_READ_4B	0x0c	/* Read data bytes (high frequency) */
-#define SPINOR_OP_DUAL_READ_4B	0x3c	/* Read data bytes (Dual SPI) */
-#define SPINOR_OP_QUAD_READ_4B	0x6c	/* Read data bytes (Quad SPI) */
+#define SPINOR_OP_READ4		0x13	/* Read data bytes (low frequency) */
+#define SPINOR_OP_READ4_FAST	0x0c	/* Read data bytes (high frequency) */
+#define SPINOR_OP_READ4_1_1_2	0x3c	/* Read data bytes (Dual SPI) */
+#define SPINOR_OP_READ4_1_1_4	0x6c	/* Read data bytes (Quad SPI) */
 #define SPINOR_OP_PP_4B		0x12	/* Page program (up to 256 bytes) */
 #define SPINOR_OP_SE_4B		0xdc	/* Sector erase (usually 64KiB) */
 
-- 
cgit 


From 97f53d710b9f63cbef1c86ee39d9ecfdda6e674c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 10:09:07 +0200
Subject: regulator: s2mps11: Add external GPIO control for S2MPS14

Add support for external control over GPIO for LDO10, LDO11 and LDO12
S2MPS14 regulators. External control can be turned on by writing 0x0 to
control register which in case of other regulators is used for disabling
them. These LDO10-LDO12 regulators can be disabled only by I2C GPIO or
PWREN pin so the patch actually allows proper way of disabling them.

Additionally the GPIO control has two benefits:
 - It is faster than toggling it over I2C bus.
 - It allows disabling the regulator during suspend to RAM; The AP will
   enable it during resume.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/s2mps11.c         | 67 +++++++++++++++++++++++++++++++++++--
 include/linux/mfd/samsung/s2mps14.h |  2 ++
 2 files changed, 67 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index 3aba0331fb5d..6dad0aa74a47 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -27,6 +27,7 @@
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/of_regulator.h>
+#include <linux/of_gpio.h>
 #include <linux/mfd/samsung/core.h>
 #include <linux/mfd/samsung/s2mps11.h>
 #include <linux/mfd/samsung/s2mps14.h>
@@ -44,6 +45,8 @@ struct s2mps11_info {
 	 * was enabled.
 	 */
 	unsigned int s2mps14_suspend_state:30;
+	/* Array of size rdev_num with GPIO-s for external sleep control */
+	int *ext_control_gpio;
 };
 
 static int get_ramp_delay(int ramp_delay)
@@ -409,6 +412,8 @@ static int s2mps14_regulator_enable(struct regulator_dev *rdev)
 
 	if (s2mps11->s2mps14_suspend_state & (1 << rdev_get_id(rdev)))
 		val = S2MPS14_ENABLE_SUSPEND;
+	else if (s2mps11->ext_control_gpio[rdev_get_id(rdev)])
+		val = S2MPS14_ENABLE_EXT_CONTROL;
 	else
 		val = rdev->desc->enable_mask;
 
@@ -565,8 +570,40 @@ static const struct regulator_desc s2mps14_regulators[] = {
 	regulator_desc_s2mps14_buck1235(5),
 };
 
-static int s2mps11_pmic_dt_parse(struct platform_device *pdev,
+static int s2mps14_pmic_enable_ext_control(struct s2mps11_info *s2mps11,
+		struct regulator_dev *rdev)
+{
+	return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
+			rdev->desc->enable_mask, S2MPS14_ENABLE_EXT_CONTROL);
+}
+
+static void s2mps14_pmic_dt_parse_ext_control_gpio(struct platform_device *pdev,
 		struct of_regulator_match *rdata, struct s2mps11_info *s2mps11)
+{
+	int *gpio = s2mps11->ext_control_gpio;
+	unsigned int i;
+	unsigned int valid_regulators[3] = { S2MPS14_LDO10, S2MPS14_LDO11,
+		S2MPS14_LDO12 };
+
+	for (i = 0; i < ARRAY_SIZE(valid_regulators); i++) {
+		unsigned int reg = valid_regulators[i];
+
+		if (!rdata[reg].init_data || !rdata[reg].of_node)
+			continue;
+
+		gpio[reg] = of_get_named_gpio(rdata[reg].of_node,
+				"samsung,ext-control-gpios", 0);
+		if (!gpio_is_valid(gpio[reg]))
+			gpio[reg] = 0;
+		else
+			dev_dbg(&pdev->dev, "Using GPIO %d for ext-control over %d/%s\n",
+					gpio[reg], reg, rdata[reg].name);
+	}
+}
+
+static int s2mps11_pmic_dt_parse(struct platform_device *pdev,
+		struct of_regulator_match *rdata, struct s2mps11_info *s2mps11,
+		enum sec_device_type dev_type)
 {
 	struct device_node *reg_np;
 
@@ -577,6 +614,9 @@ static int s2mps11_pmic_dt_parse(struct platform_device *pdev,
 	}
 
 	of_regulator_match(&pdev->dev, reg_np, rdata, s2mps11->rdev_num);
+	if (dev_type == S2MPS14X)
+		s2mps14_pmic_dt_parse_ext_control_gpio(pdev, rdata, s2mps11);
+
 	of_node_put(reg_np);
 
 	return 0;
@@ -613,6 +653,12 @@ static int s2mps11_pmic_probe(struct platform_device *pdev)
 		return -EINVAL;
 	};
 
+	s2mps11->ext_control_gpio = devm_kzalloc(&pdev->dev,
+			sizeof(*s2mps11->ext_control_gpio) * s2mps11->rdev_num,
+			GFP_KERNEL);
+	if (!s2mps11->ext_control_gpio)
+		return -ENOMEM;
+
 	if (!iodev->dev->of_node) {
 		if (iodev->pdata) {
 			pdata = iodev->pdata;
@@ -631,7 +677,7 @@ static int s2mps11_pmic_probe(struct platform_device *pdev)
 	for (i = 0; i < s2mps11->rdev_num; i++)
 		rdata[i].name = regulators[i].name;
 
-	ret = s2mps11_pmic_dt_parse(pdev, rdata, s2mps11);
+	ret = s2mps11_pmic_dt_parse(pdev, rdata, s2mps11, dev_type);
 	if (ret)
 		goto out;
 
@@ -652,6 +698,12 @@ common_reg:
 			config.of_node = rdata[i].of_node;
 		}
 
+		if (s2mps11->ext_control_gpio[i]) {
+			config.ena_gpio = s2mps11->ext_control_gpio[i];
+			config.ena_gpio_flags = GPIOF_OUT_INIT_HIGH;
+		} else
+			config.ena_gpio = config.ena_gpio_flags = 0;
+
 		regulator = devm_regulator_register(&pdev->dev,
 						&regulators[i], &config);
 		if (IS_ERR(regulator)) {
@@ -660,6 +712,17 @@ common_reg:
 				i);
 			goto out;
 		}
+
+		if (s2mps11->ext_control_gpio[i]) {
+			ret = s2mps14_pmic_enable_ext_control(s2mps11,
+					regulator);
+			if (ret < 0) {
+				dev_err(&pdev->dev,
+						"failed to enable GPIO control over %s: %d\n",
+						regulator->desc->name, ret);
+				goto out;
+			}
+		}
 	}
 
 out:
diff --git a/include/linux/mfd/samsung/s2mps14.h b/include/linux/mfd/samsung/s2mps14.h
index 4b449b8ac548..900cd7a04314 100644
--- a/include/linux/mfd/samsung/s2mps14.h
+++ b/include/linux/mfd/samsung/s2mps14.h
@@ -148,6 +148,8 @@ enum s2mps14_regulators {
 #define S2MPS14_ENABLE_SHIFT		6
 /* On/Off controlled by PWREN */
 #define S2MPS14_ENABLE_SUSPEND		(0x01 << S2MPS14_ENABLE_SHIFT)
+/* On/Off controlled by LDO10EN or EMMCEN */
+#define S2MPS14_ENABLE_EXT_CONTROL	(0x00 << S2MPS14_ENABLE_SHIFT)
 #define S2MPS14_LDO_N_VOLTAGES		(S2MPS14_LDO_VSEL_MASK + 1)
 #define S2MPS14_BUCK_N_VOLTAGES		(S2MPS14_BUCK_VSEL_MASK + 1)
 
-- 
cgit 


From 73679e50820123ebdedc67ebcda4562d1d6e4aba Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Tue, 15 Apr 2014 12:05:22 -0400
Subject: compiler-intel.h: Remove duplicate definition

barrier is already defined as __memory_barrier in compiler.h
Remove this unnecessary redefinition.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Link: http://lkml.kernel.org/r/CAJhHMCAnYPy0%2BqD-1KBnJPLt3XgAjdR12j%2BySSnPgmZcpbE7HQ@mail.gmail.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 include/linux/compiler-intel.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 5529c5239421..ba147a1727e6 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -13,12 +13,9 @@
 /* Intel ECC compiler doesn't support gcc specific asm stmts.
  * It uses intrinsics to do the equivalent things.
  */
-#undef barrier
 #undef RELOC_HIDE
 #undef OPTIMIZER_HIDE_VAR
 
-#define barrier() __memory_barrier()
-
 #define RELOC_HIDE(ptr, off)					\
   ({ unsigned long __ptr;					\
      __ptr = (unsigned long) (ptr);				\
-- 
cgit 


From fdb9c293decf7e06795f7d9ae409df907c7ae1b6 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 15 Apr 2014 12:39:14 -0500
Subject: percpu: Replace __get_cpu_var with this_cpu_ptr

__this_cpu_ptr is being phased out.  Use raw_cpu_ptr instead which was
introduced in 3.15-rc1.  One case of using __get_cpu_var in the
get_cpu_var macro for address calculation was remaining in
include/linux/percpu.h.

tj: Updated patch description.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e7a0b95ed527..539b3caa5748 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -29,7 +29,7 @@
  */
 #define get_cpu_var(var) (*({				\
 	preempt_disable();				\
-	&__get_cpu_var(var); }))
+	this_cpu_ptr(&var); }))
 
 /*
  * The weird & is necessary because sparse considers (void)(var) to be
-- 
cgit 


From b4f42e2831ff9b9fa19252265d7c8985d47eefb9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 10 Apr 2014 09:46:28 -0600
Subject: block: remove struct request buffer member

This was used in the olden days, back when onions were proper
yellow. Basically it mapped to the current buffer to be
transferred. With highmem being added more than a decade ago,
most drivers map pages out of a bio, and rq->buffer isn't
pointing at anything valid.

Convert old style drivers to just use bio_data().

For the discard payload use case, just reference the page
in the bio.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c             | 21 ++++++---------------
 block/blk-map.c              |  3 ---
 drivers/block/amiflop.c      |  2 +-
 drivers/block/ataflop.c      |  2 +-
 drivers/block/floppy.c       | 18 +++++++++---------
 drivers/block/hd.c           | 10 +++++-----
 drivers/block/mg_disk.c      | 12 ++++++------
 drivers/block/paride/pcd.c   |  2 +-
 drivers/block/paride/pd.c    |  4 ++--
 drivers/block/paride/pf.c    |  4 ++--
 drivers/block/skd_main.c     |  5 ++---
 drivers/block/swim.c         |  2 +-
 drivers/block/swim3.c        |  6 +++---
 drivers/block/xen-blkfront.c |  4 ++--
 drivers/block/xsysace.c      |  4 ++--
 drivers/block/z2ram.c        |  6 ++++--
 drivers/ide/ide-disk.c       |  5 ++---
 drivers/md/dm.c              |  1 -
 drivers/mtd/mtd_blkdevs.c    |  3 +--
 drivers/mtd/ubi/block.c      |  2 +-
 drivers/scsi/scsi_lib.c      |  3 ---
 drivers/scsi/sd.c            | 10 ++++------
 include/linux/blkdev.h       |  1 -
 23 files changed, 55 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 1fe9ff6e6802..ae6227fd07aa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -146,8 +146,8 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
 	printk(KERN_INFO "  sector %llu, nr/cnr %u/%u\n",
 	       (unsigned long long)blk_rq_pos(rq),
 	       blk_rq_sectors(rq), blk_rq_cur_sectors(rq));
-	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
-	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
+	printk(KERN_INFO "  bio %p, biotail %p, len %u\n",
+	       rq->bio, rq->biotail, blk_rq_bytes(rq));
 
 	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		printk(KERN_INFO "  cdb: ");
@@ -1360,7 +1360,6 @@ void blk_add_request_payload(struct request *rq, struct page *page,
 
 	rq->__data_len = rq->resid_len = len;
 	rq->nr_phys_segments = 1;
-	rq->buffer = bio_data(bio);
 }
 EXPORT_SYMBOL_GPL(blk_add_request_payload);
 
@@ -1402,12 +1401,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
 	bio->bi_next = req->bio;
 	req->bio = bio;
 
-	/*
-	 * may not be valid. if the low level driver said
-	 * it didn't need a bounce buffer then it better
-	 * not touch req->buffer either...
-	 */
-	req->buffer = bio_data(bio);
 	req->__sector = bio->bi_iter.bi_sector;
 	req->__data_len += bio->bi_iter.bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
@@ -2434,7 +2427,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	}
 
 	req->__data_len -= total_bytes;
-	req->buffer = bio_data(req->bio);
 
 	/* update sector only for requests with clear definition of sector */
 	if (req->cmd_type == REQ_TYPE_FS)
@@ -2752,10 +2744,9 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
 	rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
 
-	if (bio_has_data(bio)) {
+	if (bio_has_data(bio))
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
-		rq->buffer = bio_data(bio);
-	}
+
 	rq->__data_len = bio->bi_iter.bi_size;
 	rq->bio = rq->biotail = bio;
 
@@ -2831,7 +2822,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 
 /*
  * Copy attributes of the original request to the clone request.
- * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied.
+ * The actual data parts (e.g. ->cmd, ->sense) are not copied.
  */
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
@@ -2857,7 +2848,7 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src)
  *
  * Description:
  *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
- *     The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense)
+ *     The actual data parts of @rq_src (e.g. ->cmd, ->sense)
  *     are not copied, and copying such parts is the caller's responsibility.
  *     Also, pages which the original bios are pointing to are not copied
  *     and the cloned bios just point same pages.
diff --git a/block/blk-map.c b/block/blk-map.c
index f7b22bc21518..f890d4345b0c 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -155,7 +155,6 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 	if (!bio_flagged(bio, BIO_USER_MAPPED))
 		rq->cmd_flags |= REQ_COPY_USER;
 
-	rq->buffer = NULL;
 	return 0;
 unmap_rq:
 	blk_rq_unmap_user(bio);
@@ -238,7 +237,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
 	blk_queue_bounce(q, &bio);
 	bio_get(bio);
 	blk_rq_bio_prep(q, rq, bio);
-	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_user_iov);
@@ -325,7 +323,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
 	}
 
 	blk_queue_bounce(q, &rq->bio);
-	rq->buffer = NULL;
 	return 0;
 }
 EXPORT_SYMBOL(blk_rq_map_kern);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 748dea4f34dc..758da2287d9a 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1406,7 +1406,7 @@ next_segment:
 
 		track = block / (floppy->dtype->sects * floppy->type->sect_mult);
 		sector = block % (floppy->dtype->sects * floppy->type->sect_mult);
-		data = rq->buffer + 512 * cnt;
+		data = bio_data(rq->bio) + 512 * cnt;
 #ifdef DEBUG
 		printk("access to track %d, sector %d, with buffer at "
 		       "0x%08lx\n", track, sector, data);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 96b629e1f0c9..7e8a55f8917c 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1484,7 +1484,7 @@ repeat:
 	ReqCnt = 0;
 	ReqCmd = rq_data_dir(fd_request);
 	ReqBlock = blk_rq_pos(fd_request);
-	ReqBuffer = fd_request->buffer;
+	ReqBuffer = bio_data(fd_request->bio);
 	setup_req_params( drive );
 	do_fd_action( drive );
 
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8f5565bf34cd..5f69c910c3ac 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2351,7 +2351,7 @@ static void rw_interrupt(void)
 	}
 
 	if (CT(COMMAND) != FD_READ ||
-	    raw_cmd->kernel_data == current_req->buffer) {
+	    raw_cmd->kernel_data == bio_data(current_req->bio)) {
 		/* transfer directly from buffer */
 		cont->done(1);
 	} else if (CT(COMMAND) == FD_READ) {
@@ -2640,7 +2640,7 @@ static int make_raw_rw_request(void)
 		raw_cmd->flags &= ~FD_RAW_WRITE;
 		raw_cmd->flags |= FD_RAW_READ;
 		COMMAND = FM_MODE(_floppy, FD_READ);
-	} else if ((unsigned long)current_req->buffer < MAX_DMA_ADDRESS) {
+	} else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) {
 		unsigned long dma_limit;
 		int direct, indirect;
 
@@ -2654,13 +2654,13 @@ static int make_raw_rw_request(void)
 		 */
 		max_size = buffer_chain_size();
 		dma_limit = (MAX_DMA_ADDRESS -
-			     ((unsigned long)current_req->buffer)) >> 9;
+			     ((unsigned long)bio_data(current_req->bio))) >> 9;
 		if ((unsigned long)max_size > dma_limit)
 			max_size = dma_limit;
 		/* 64 kb boundaries */
-		if (CROSS_64KB(current_req->buffer, max_size << 9))
+		if (CROSS_64KB(bio_data(current_req->bio), max_size << 9))
 			max_size = (K_64 -
-				    ((unsigned long)current_req->buffer) %
+				    ((unsigned long)bio_data(current_req->bio)) %
 				    K_64) >> 9;
 		direct = transfer_size(ssize, max_sector, max_size) - fsector_t;
 		/*
@@ -2677,7 +2677,7 @@ static int make_raw_rw_request(void)
 		       (DP->read_track & (1 << DRS->probed_format)))))) {
 			max_size = blk_rq_sectors(current_req);
 		} else {
-			raw_cmd->kernel_data = current_req->buffer;
+			raw_cmd->kernel_data = bio_data(current_req->bio);
 			raw_cmd->length = current_count_sectors << 9;
 			if (raw_cmd->length == 0) {
 				DPRINT("%s: zero dma transfer attempted\n", __func__);
@@ -2731,7 +2731,7 @@ static int make_raw_rw_request(void)
 	raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1;
 	raw_cmd->length <<= 9;
 	if ((raw_cmd->length < current_count_sectors << 9) ||
-	    (raw_cmd->kernel_data != current_req->buffer &&
+	    (raw_cmd->kernel_data != bio_data(current_req->bio) &&
 	     CT(COMMAND) == FD_WRITE &&
 	     (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max ||
 	      aligned_sector_t < buffer_min)) ||
@@ -2739,7 +2739,7 @@ static int make_raw_rw_request(void)
 	    raw_cmd->length <= 0 || current_count_sectors <= 0) {
 		DPRINT("fractionary current count b=%lx s=%lx\n",
 		       raw_cmd->length, current_count_sectors);
-		if (raw_cmd->kernel_data != current_req->buffer)
+		if (raw_cmd->kernel_data != bio_data(current_req->bio))
 			pr_info("addr=%d, length=%ld\n",
 				(int)((raw_cmd->kernel_data -
 				       floppy_track_buffer) >> 9),
@@ -2756,7 +2756,7 @@ static int make_raw_rw_request(void)
 		return 0;
 	}
 
-	if (raw_cmd->kernel_data != current_req->buffer) {
+	if (raw_cmd->kernel_data != bio_data(current_req->bio)) {
 		if (raw_cmd->kernel_data < floppy_track_buffer ||
 		    current_count_sectors < 0 ||
 		    raw_cmd->length < 0 ||
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index bf397bf108b7..8a290c08262f 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -464,11 +464,11 @@ static void read_intr(void)
 
 ok_to_read:
 	req = hd_req;
-	insw(HD_DATA, req->buffer, 256);
+	insw(HD_DATA, bio_data(req->bio), 256);
 #ifdef DEBUG
 	printk("%s: read: sector %ld, remaining = %u, buffer=%p\n",
 	       req->rq_disk->disk_name, blk_rq_pos(req) + 1,
-	       blk_rq_sectors(req) - 1, req->buffer+512);
+	       blk_rq_sectors(req) - 1, bio_data(req->bio)+512);
 #endif
 	if (hd_end_request(0, 512)) {
 		SET_HANDLER(&read_intr);
@@ -505,7 +505,7 @@ static void write_intr(void)
 ok_to_write:
 	if (hd_end_request(0, 512)) {
 		SET_HANDLER(&write_intr);
-		outsw(HD_DATA, req->buffer, 256);
+		outsw(HD_DATA, bio_data(req->bio), 256);
 		return;
 	}
 
@@ -624,7 +624,7 @@ repeat:
 	printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n",
 		req->rq_disk->disk_name,
 		req_data_dir(req) == READ ? "read" : "writ",
-		cyl, head, sec, nsect, req->buffer);
+		cyl, head, sec, nsect, bio_data(req->bio));
 #endif
 	if (req->cmd_type == REQ_TYPE_FS) {
 		switch (rq_data_dir(req)) {
@@ -643,7 +643,7 @@ repeat:
 				bad_rw_intr();
 				goto repeat;
 			}
-			outsw(HD_DATA, req->buffer, 256);
+			outsw(HD_DATA, bio_data(req->bio), 256);
 			break;
 		default:
 			printk("unknown hd-command\n");
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index eb59b1241366..e352cac707e8 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -479,7 +479,7 @@ static unsigned int mg_out(struct mg_host *host,
 
 static void mg_read_one(struct mg_host *host, struct request *req)
 {
-	u16 *buff = (u16 *)req->buffer;
+	u16 *buff = (u16 *)bio_data(req->bio);
 	u32 i;
 
 	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
@@ -496,7 +496,7 @@ static void mg_read(struct request *req)
 		mg_bad_rw_intr(host);
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       blk_rq_sectors(req), blk_rq_pos(req), req->buffer);
+	       blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio));
 
 	do {
 		if (mg_wait(host, ATA_DRQ,
@@ -514,7 +514,7 @@ static void mg_read(struct request *req)
 
 static void mg_write_one(struct mg_host *host, struct request *req)
 {
-	u16 *buff = (u16 *)req->buffer;
+	u16 *buff = (u16 *)bio_data(req->bio);
 	u32 i;
 
 	for (i = 0; i < MG_SECTOR_SIZE >> 1; i++)
@@ -534,7 +534,7 @@ static void mg_write(struct request *req)
 	}
 
 	MG_DBG("requested %d sects (from %ld), buffer=0x%p\n",
-	       rem, blk_rq_pos(req), req->buffer);
+	       rem, blk_rq_pos(req), bio_data(req->bio));
 
 	if (mg_wait(host, ATA_DRQ,
 		    MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) {
@@ -585,7 +585,7 @@ ok_to_read:
 	mg_read_one(host, req);
 
 	MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-	       blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer);
+	       blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio));
 
 	/* send read confirm */
 	outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND);
@@ -624,7 +624,7 @@ ok_to_write:
 		/* write 1 sector and set handler if remains */
 		mg_write_one(host, req);
 		MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n",
-		       blk_rq_pos(req), blk_rq_sectors(req), req->buffer);
+		       blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio));
 		host->mg_do_intr = mg_write_intr;
 		mod_timer(&host->timer, jiffies + 3 * HZ);
 	}
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index e76bdc074dbe..719cb1bc1640 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -747,7 +747,7 @@ static void do_pcd_request(struct request_queue * q)
 			pcd_current = cd;
 			pcd_sector = blk_rq_pos(pcd_req);
 			pcd_count = blk_rq_cur_sectors(pcd_req);
-			pcd_buf = pcd_req->buffer;
+			pcd_buf = bio_data(pcd_req->bio);
 			pcd_busy = 1;
 			ps_set_intr(do_pcd_read, NULL, 0, nice);
 			return;
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 19ad8f0c83ef..fea7e76a00de 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -454,7 +454,7 @@ static enum action do_pd_io_start(void)
 		if (pd_block + pd_count > get_capacity(pd_req->rq_disk))
 			return Fail;
 		pd_run = blk_rq_sectors(pd_req);
-		pd_buf = pd_req->buffer;
+		pd_buf = bio_data(pd_req->bio);
 		pd_retries = 0;
 		if (pd_cmd == READ)
 			return do_pd_read_start();
@@ -485,7 +485,7 @@ static int pd_next_buf(void)
 	spin_lock_irqsave(&pd_lock, saved_flags);
 	__blk_end_request_cur(pd_req, 0);
 	pd_count = blk_rq_cur_sectors(pd_req);
-	pd_buf = pd_req->buffer;
+	pd_buf = bio_data(pd_req->bio);
 	spin_unlock_irqrestore(&pd_lock, saved_flags);
 	return 0;
 }
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index f5c86d523ba0..9a15fd3c9349 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -795,7 +795,7 @@ repeat:
 	}
 
 	pf_cmd = rq_data_dir(pf_req);
-	pf_buf = pf_req->buffer;
+	pf_buf = bio_data(pf_req->bio);
 	pf_retries = 0;
 
 	pf_busy = 1;
@@ -827,7 +827,7 @@ static int pf_next_buf(void)
 		if (!pf_req)
 			return 1;
 		pf_count = blk_rq_cur_sectors(pf_req);
-		pf_buf = pf_req->buffer;
+		pf_buf = bio_data(pf_req->bio);
 	}
 	return 0;
 }
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index a69dd93d1bd5..36bcedfd930c 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -563,7 +563,6 @@ skd_prep_discard_cdb(struct skd_scsi_request *scsi_req,
 
 	req = skreq->req;
 	blk_add_request_payload(req, page, len);
-	req->buffer = buf;
 }
 
 static void skd_request_fn_not_online(struct request_queue *q);
@@ -856,10 +855,10 @@ static void skd_end_request(struct skd_device *skdev,
 
 	if ((io_flags & REQ_DISCARD) &&
 		(skreq->discard_page == 1)) {
+		struct bio *bio = req->bio;
 		pr_debug("%s:%s:%d, free the page!",
 			 skdev->name, __func__, __LINE__);
-		free_page((unsigned long)req->buffer);
-		req->buffer = NULL;
+		__free_page(bio->bi_io_vec->bv_page);
 	}
 
 	if (unlikely(error)) {
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index b02d53a399f3..6b44bbe528b7 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -549,7 +549,7 @@ static void redo_fd_request(struct request_queue *q)
 		case READ:
 			err = floppy_read_sectors(fs, blk_rq_pos(req),
 						  blk_rq_cur_sectors(req),
-						  req->buffer);
+						  bio_data(req->bio));
 			break;
 		}
 	done:
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index c74f7b56e7c4..523ee8fd4c15 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -342,7 +342,7 @@ static void start_request(struct floppy_state *fs)
 		swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n",
 			  req->rq_disk->disk_name, req->cmd,
 			  (long)blk_rq_pos(req), blk_rq_sectors(req),
-			  req->buffer);
+			  bio_data(req->bio));
 		swim3_dbg("           errors=%d current_nr_sectors=%u\n",
 			  req->errors, blk_rq_cur_sectors(req));
 #endif
@@ -479,11 +479,11 @@ static inline void setup_transfer(struct floppy_state *fs)
 		/* Set up 3 dma commands: write preamble, data, postamble */
 		init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble));
 		++cp;
-		init_dma(cp, OUTPUT_MORE, req->buffer, 512);
+		init_dma(cp, OUTPUT_MORE, bio_data(req->bio), 512);
 		++cp;
 		init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble));
 	} else {
-		init_dma(cp, INPUT_LAST, req->buffer, n * 512);
+		init_dma(cp, INPUT_LAST, bio_data(req->bio), n * 512);
 	}
 	++cp;
 	out_le16(&cp->command, DBDMA_STOP);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index efe1b4761735..283a30e88287 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -612,10 +612,10 @@ static void do_blkif_request(struct request_queue *rq)
 		}
 
 		pr_debug("do_blk_req %p: cmd %p, sec %lx, "
-			 "(%u/%u) buffer:%p [%s]\n",
+			 "(%u/%u) [%s]\n",
 			 req, req->cmd, (unsigned long)blk_rq_pos(req),
 			 blk_rq_cur_sectors(req), blk_rq_sectors(req),
-			 req->buffer, rq_data_dir(req) ? "write" : "read");
+			 rq_data_dir(req) ? "write" : "read");
 
 		if (blkif_queue_request(req)) {
 			blk_requeue_request(rq, req);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 1393b8871a28..ab3ea62e5dfc 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -661,7 +661,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			rq_data_dir(req));
 
 		ace->req = req;
-		ace->data_ptr = req->buffer;
+		ace->data_ptr = bio_data(req->bio);
 		ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR;
 		ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF);
 
@@ -733,7 +733,7 @@ static void ace_fsm_dostate(struct ace_device *ace)
 			 *      blk_rq_sectors(ace->req),
 			 *      blk_rq_cur_sectors(ace->req));
 			 */
-			ace->data_ptr = ace->req->buffer;
+			ace->data_ptr = bio_data(ace->req->bio);
 			ace->data_count = blk_rq_cur_sectors(ace->req) * 16;
 			ace_fsm_yieldirq(ace);
 			break;
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 27de5046708a..968f9e52effa 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -87,13 +87,15 @@ static void do_z2_request(struct request_queue *q)
 		while (len) {
 			unsigned long addr = start & Z2RAM_CHUNKMASK;
 			unsigned long size = Z2RAM_CHUNKSIZE - addr;
+			void *buffer = bio_data(req->bio);
+
 			if (len < size)
 				size = len;
 			addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ];
 			if (rq_data_dir(req) == READ)
-				memcpy(req->buffer, (char *)addr, size);
+				memcpy(buffer, (char *)addr, size);
 			else
-				memcpy((char *)addr, req->buffer, size);
+				memcpy((char *)addr, buffer, size);
 			start += size;
 			len -= size;
 		}
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 16f69be820c7..ee880382e3bc 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -188,10 +188,9 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq,
 
 	ledtrig_ide_activity();
 
-	pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n",
+	pr_debug("%s: %sing: block=%llu, sectors=%u\n",
 		 drive->name, rq_data_dir(rq) == READ ? "read" : "writ",
-		 (unsigned long long)block, blk_rq_sectors(rq),
-		 (unsigned long)rq->buffer);
+		 (unsigned long long)block, blk_rq_sectors(rq));
 
 	if (hwif->rw_disk)
 		hwif->rw_disk(drive, rq);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 455e64916498..6a71bc7c9133 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1544,7 +1544,6 @@ static int setup_clone(struct request *clone, struct request *rq,
 	clone->cmd = rq->cmd;
 	clone->cmd_len = rq->cmd_len;
 	clone->sense = rq->sense;
-	clone->buffer = rq->buffer;
 	clone->end_io = end_clone_request;
 	clone->end_io_data = tio;
 
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0b2ccb68c0d0..4dbfaee9aa95 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -82,8 +82,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 	block = blk_rq_pos(req) << 9 >> tr->blkshift;
 	nsect = blk_rq_cur_bytes(req) >> tr->blkshift;
-
-	buf = req->buffer;
+	buf = bio_data(req->bio);
 
 	if (req->cmd_type != REQ_TYPE_FS)
 		return -EIO;
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c
index 7ff473c871a9..ee774ba3728d 100644
--- a/drivers/mtd/ubi/block.c
+++ b/drivers/mtd/ubi/block.c
@@ -253,7 +253,7 @@ static int do_ubiblock_request(struct ubiblock *dev, struct request *req)
 	 * flash access anyway.
 	 */
 	mutex_lock(&dev->dev_mutex);
-	ret = ubiblock_read(dev, req->buffer, sec, len);
+	ret = ubiblock_read(dev, bio_data(req->bio), sec, len);
 	mutex_unlock(&dev->dev_mutex);
 
 	return ret;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0f3bddcb6b1a..3cc82d3dec78 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1018,8 +1018,6 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb,
 		return BLKPREP_DEFER;
 	}
 
-	req->buffer = NULL;
-
 	/* 
 	 * Next, walk the list, and fill in the addresses and sizes of
 	 * each segment.
@@ -1156,7 +1154,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
 		BUG_ON(blk_rq_bytes(req));
 
 		memset(&cmd->sdb, 0, sizeof(cmd->sdb));
-		req->buffer = NULL;
 	}
 
 	cmd->cmd_len = req->cmd_len;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index efcbcd182863..06d154d20faa 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -739,14 +739,11 @@ static int sd_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
 
 	blk_add_request_payload(rq, page, len);
 	ret = scsi_setup_blk_pc_cmnd(sdp, rq);
-	rq->buffer = page_address(page);
 	rq->__data_len = nr_bytes;
 
 out:
-	if (ret != BLKPREP_OK) {
+	if (ret != BLKPREP_OK)
 		__free_page(page);
-		rq->buffer = NULL;
-	}
 	return ret;
 }
 
@@ -843,8 +840,9 @@ static void sd_unprep_fn(struct request_queue *q, struct request *rq)
 	struct scsi_cmnd *SCpnt = rq->special;
 
 	if (rq->cmd_flags & REQ_DISCARD) {
-		free_page((unsigned long)rq->buffer);
-		rq->buffer = NULL;
+		struct bio *bio = rq->bio;
+
+		__free_page(bio->bi_io_vec->bv_page);
 	}
 	if (SCpnt->cmnd != rq->cmd) {
 		mempool_free(SCpnt->cmnd, sd_cdb_pool);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 86a8df13a5fe..eb5e94803892 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -178,7 +178,6 @@ struct request {
 	unsigned short ioprio;
 
 	void *special;		/* opaque pointer available for LLD use */
-	char *buffer;		/* kaddr of the current segment if available */
 
 	int tag;
 	int errors;
-- 
cgit 


From e9b267d91f6ddbc694cb40aa962b0b2cec03971d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Apr 2014 13:59:10 -0600
Subject: blk-mq: add ->init_request and ->exit_request methods

The current blk_mq_init_commands/blk_mq_free_commands interface has a
two problems:

 1) Because only the constructor is passed to blk_mq_init_commands there
    is no easy way to clean up when a comman initialization failed.  The
    current code simply leaks the allocations done in the constructor.

 2) There is no good place to call blk_mq_free_commands: before
    blk_cleanup_queue there is no guarantee that all outstanding
    commands have completed, so we can't free them yet.  After
    blk_cleanup_queue the queue has usually been freed.  This can be
    worked around by grabbing an unconditional reference before calling
    blk_cleanup_queue and dropping it after blk_mq_free_commands is
    done, although that's not exatly pretty and driver writers are
    guaranteed to get it wrong sooner or later.

Both issues are easily fixed by making the request constructor and
destructor normal blk_mq_ops methods.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c             | 105 ++++++++++++++-------------------------------
 drivers/block/virtio_blk.c |  23 +++++-----
 include/linux/blk-mq.h     |  14 +++++-
 3 files changed, 55 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e644feec068c..48d2d8495f5e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1031,74 +1031,20 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	blk_mq_put_ctx(ctx);
 }
 
-static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
-				   int (*init)(void *, struct blk_mq_hw_ctx *,
-					struct request *, unsigned int),
-				   void *data)
+static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx, void *driver_data)
 {
-	unsigned int i;
-	int ret = 0;
-
-	for (i = 0; i < hctx->queue_depth; i++) {
-		struct request *rq = hctx->rqs[i];
-
-		ret = init(data, hctx, rq, i);
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-
-int blk_mq_init_commands(struct request_queue *q,
-			 int (*init)(void *, struct blk_mq_hw_ctx *,
-					struct request *, unsigned int),
-			 void *data)
-{
-	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
-	int ret = 0;
-
-	queue_for_each_hw_ctx(q, hctx, i) {
-		ret = blk_mq_init_hw_commands(hctx, init, data);
-		if (ret)
-			break;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(blk_mq_init_commands);
-
-static void blk_mq_free_hw_commands(struct blk_mq_hw_ctx *hctx,
-				    void (*free)(void *, struct blk_mq_hw_ctx *,
-					struct request *, unsigned int),
-				    void *data)
-{
-	unsigned int i;
+	struct page *page;
 
-	for (i = 0; i < hctx->queue_depth; i++) {
-		struct request *rq = hctx->rqs[i];
+	if (hctx->rqs && hctx->queue->mq_ops->exit_request) {
+		int i;
 
-		free(data, hctx, rq, i);
+		for (i = 0; i < hctx->queue_depth; i++) {
+			if (!hctx->rqs[i])
+				continue;
+			hctx->queue->mq_ops->exit_request(driver_data, hctx,
+							  hctx->rqs[i], i);
+		}
 	}
-}
-
-void blk_mq_free_commands(struct request_queue *q,
-			  void (*free)(void *, struct blk_mq_hw_ctx *,
-					struct request *, unsigned int),
-			  void *data)
-{
-	struct blk_mq_hw_ctx *hctx;
-	unsigned int i;
-
-	queue_for_each_hw_ctx(q, hctx, i)
-		blk_mq_free_hw_commands(hctx, free, data);
-}
-EXPORT_SYMBOL(blk_mq_free_commands);
-
-static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx)
-{
-	struct page *page;
 
 	while (!list_empty(&hctx->page_list)) {
 		page = list_first_entry(&hctx->page_list, struct page, lru);
@@ -1123,10 +1069,12 @@ static size_t order_to_size(unsigned int order)
 }
 
 static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
-			      unsigned int reserved_tags, int node)
+		struct blk_mq_reg *reg, void *driver_data, int node)
 {
+	unsigned int reserved_tags = reg->reserved_tags;
 	unsigned int i, j, entries_per_page, max_order = 4;
 	size_t rq_size, left;
+	int error;
 
 	INIT_LIST_HEAD(&hctx->page_list);
 
@@ -1175,14 +1123,23 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
 		for (j = 0; j < to_do; j++) {
 			hctx->rqs[i] = p;
 			blk_rq_init(hctx->queue, hctx->rqs[i]);
+			if (reg->ops->init_request) {
+				error = reg->ops->init_request(driver_data,
+						hctx, hctx->rqs[i], i);
+				if (error)
+					goto err_rq_map;
+			}
+
 			p += rq_size;
 			i++;
 		}
 	}
 
-	if (i < (reserved_tags + BLK_MQ_TAG_MIN))
+	if (i < (reserved_tags + BLK_MQ_TAG_MIN)) {
+		error = -ENOMEM;
 		goto err_rq_map;
-	else if (i != hctx->queue_depth) {
+	}
+	if (i != hctx->queue_depth) {
 		hctx->queue_depth = i;
 		pr_warn("%s: queue depth set to %u because of low memory\n",
 					__func__, i);
@@ -1190,12 +1147,14 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
 
 	hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node);
 	if (!hctx->tags) {
-err_rq_map:
-		blk_mq_free_rq_map(hctx);
-		return -ENOMEM;
+		error = -ENOMEM;
+		goto err_rq_map;
 	}
 
 	return 0;
+err_rq_map:
+	blk_mq_free_rq_map(hctx, driver_data);
+	return error;
 }
 
 static int blk_mq_init_hw_queues(struct request_queue *q,
@@ -1228,7 +1187,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 						blk_mq_hctx_notify, hctx);
 		blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
 
-		if (blk_mq_init_rq_map(hctx, reg->reserved_tags, node))
+		if (blk_mq_init_rq_map(hctx, reg, driver_data, node))
 			break;
 
 		/*
@@ -1268,7 +1227,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 			reg->ops->exit_hctx(hctx, j);
 
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
-		blk_mq_free_rq_map(hctx);
+		blk_mq_free_rq_map(hctx, driver_data);
 		kfree(hctx->ctxs);
 	}
 
@@ -1455,7 +1414,7 @@ void blk_mq_free_queue(struct request_queue *q)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		kfree(hctx->ctx_map);
 		kfree(hctx->ctxs);
-		blk_mq_free_rq_map(hctx);
+		blk_mq_free_rq_map(hctx, q->queuedata);
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
 		if (q->mq_ops->exit_hctx)
 			q->mq_ops->exit_hctx(hctx, i);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c7d02bc9d945..d06206abd340 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -480,11 +480,22 @@ static const struct device_attribute dev_attr_cache_type_rw =
 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
 	       virtblk_cache_type_show, virtblk_cache_type_store);
 
+static int virtblk_init_request(void *data, struct blk_mq_hw_ctx *hctx,
+		struct request *rq, unsigned int nr)
+{
+	struct virtio_blk *vblk = data;
+	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
+
+	sg_init_table(vbr->sg, vblk->sg_elems);
+	return 0;
+}
+
 static struct blk_mq_ops virtio_mq_ops = {
 	.queue_rq	= virtio_queue_rq,
 	.map_queue	= blk_mq_map_queue,
 	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
 	.free_hctx	= blk_mq_free_single_hw_queue,
+	.init_request	= virtblk_init_request,
 	.complete	= virtblk_request_done,
 };
 
@@ -497,16 +508,6 @@ static struct blk_mq_reg virtio_mq_reg = {
 };
 module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444);
 
-static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
-			     struct request *rq, unsigned int nr)
-{
-	struct virtio_blk *vblk = data;
-	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
-
-	sg_init_table(vbr->sg, vblk->sg_elems);
-	return 0;
-}
-
 static int virtblk_probe(struct virtio_device *vdev)
 {
 	struct virtio_blk *vblk;
@@ -577,8 +578,6 @@ static int virtblk_probe(struct virtio_device *vdev)
 		goto out_put_disk;
 	}
 
-	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
-
 	q->queuedata = vblk;
 
 	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b6ee48740458..29c1a6e83814 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -67,6 +67,10 @@ typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int);
 typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
+typedef int (init_request_fn)(void *, struct blk_mq_hw_ctx *,
+		struct request *, unsigned int);
+typedef void (exit_request_fn)(void *, struct blk_mq_hw_ctx *,
+		struct request *, unsigned int);
 
 struct blk_mq_ops {
 	/*
@@ -99,6 +103,14 @@ struct blk_mq_ops {
 	 */
 	init_hctx_fn		*init_hctx;
 	exit_hctx_fn		*exit_hctx;
+
+	/*
+	 * Called for every command allocated by the block layer to allow
+	 * the driver to set up driver specific data.
+	 * Ditto for exit/teardown.
+	 */
+	init_request_fn		*init_request;
+	exit_request_fn		*exit_request;
 };
 
 enum {
@@ -118,8 +130,6 @@ enum {
 struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
-int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
-void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
-- 
cgit 


From 24d2f90309b23f2cfe016b2aebc5f0d6e01c57fd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Apr 2014 14:14:00 -0600
Subject: blk-mq: split out tag initialization, support shared tags

Add a new blk_mq_tag_set structure that gets set up before we initialize
the queue.  A single blk_mq_tag_set structure can be shared by multiple
queues.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Modular export of blk_mq_{alloc,free}_tagset added by me.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-cpumap.c      |   6 +-
 block/blk-mq-tag.c         |  14 ---
 block/blk-mq-tag.h         |  19 +++-
 block/blk-mq.c             | 244 +++++++++++++++++++++++++--------------------
 block/blk-mq.h             |   5 +-
 drivers/block/null_blk.c   |  92 ++++++++++-------
 drivers/block/virtio_blk.c |  48 +++++----
 include/linux/blk-mq.h     |  34 +++----
 8 files changed, 262 insertions(+), 200 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 097921329619..5d0f93cf358c 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -80,17 +80,17 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues)
 	return 0;
 }
 
-unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg)
+unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
 {
 	unsigned int *map;
 
 	/* If cpus are offline, map them to first hctx */
 	map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
-				reg->numa_node);
+				set->numa_node);
 	if (!map)
 		return NULL;
 
-	if (!blk_mq_update_queue_map(map, reg->nr_hw_queues))
+	if (!blk_mq_update_queue_map(map, set->nr_hw_queues))
 		return map;
 
 	kfree(map);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 83ae96c51a27..7a799c46c32d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -1,25 +1,11 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/percpu_ida.h>
 
 #include <linux/blk-mq.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
-/*
- * Per tagged queue (tag address space) map
- */
-struct blk_mq_tags {
-	unsigned int nr_tags;
-	unsigned int nr_reserved_tags;
-	unsigned int nr_batch_move;
-	unsigned int nr_max_cache;
-
-	struct percpu_ida free_tags;
-	struct percpu_ida reserved_tags;
-};
-
 void blk_mq_wait_for_tags(struct blk_mq_tags *tags)
 {
 	int tag = blk_mq_get_tag(tags, __GFP_WAIT, false);
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 947ba2c6148e..b602e3fa66ea 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -1,7 +1,24 @@
 #ifndef INT_BLK_MQ_TAG_H
 #define INT_BLK_MQ_TAG_H
 
-struct blk_mq_tags;
+#include <linux/percpu_ida.h>
+
+/*
+ * Tag address space map.
+ */
+struct blk_mq_tags {
+	unsigned int nr_tags;
+	unsigned int nr_reserved_tags;
+	unsigned int nr_batch_move;
+	unsigned int nr_max_cache;
+
+	struct percpu_ida free_tags;
+	struct percpu_ida reserved_tags;
+
+	struct request **rqs;
+	struct list_head page_list;
+};
+
 
 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
 extern void blk_mq_free_tags(struct blk_mq_tags *tags);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2a5a0fed10a3..9180052d42cc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -81,7 +81,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
 
 	tag = blk_mq_get_tag(hctx->tags, gfp, reserved);
 	if (tag != BLK_MQ_TAG_FAIL) {
-		rq = hctx->rqs[tag];
+		rq = hctx->tags->rqs[tag];
 		blk_rq_init(hctx->queue, rq);
 		rq->tag = tag;
 
@@ -404,6 +404,12 @@ static void blk_mq_requeue_request(struct request *rq)
 		rq->nr_phys_segments--;
 }
 
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+{
+	return tags->rqs[tag];
+}
+EXPORT_SYMBOL(blk_mq_tag_to_rq);
+
 struct blk_mq_timeout_data {
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long *next;
@@ -425,12 +431,13 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
 	do {
 		struct request *rq;
 
-		tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag);
-		if (tag >= hctx->queue_depth)
+		tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
+		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = hctx->rqs[tag++];
-
+		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
+		if (rq->q != hctx->queue)
+			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
 			continue;
 
@@ -969,11 +976,11 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
 }
 EXPORT_SYMBOL(blk_mq_map_queue);
 
-struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg,
+struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set,
 						   unsigned int hctx_index)
 {
 	return kmalloc_node(sizeof(struct blk_mq_hw_ctx),
-				GFP_KERNEL | __GFP_ZERO, reg->numa_node);
+				GFP_KERNEL | __GFP_ZERO, set->numa_node);
 }
 EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
 
@@ -1030,31 +1037,31 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	blk_mq_put_ctx(ctx);
 }
 
-static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx, void *driver_data)
+static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
+		struct blk_mq_tags *tags, unsigned int hctx_idx)
 {
 	struct page *page;
 
-	if (hctx->rqs && hctx->queue->mq_ops->exit_request) {
+	if (tags->rqs && set->ops->exit_request) {
 		int i;
 
-		for (i = 0; i < hctx->queue_depth; i++) {
-			if (!hctx->rqs[i])
+		for (i = 0; i < tags->nr_tags; i++) {
+			if (!tags->rqs[i])
 				continue;
-			hctx->queue->mq_ops->exit_request(driver_data, hctx,
-							  hctx->rqs[i], i);
+			set->ops->exit_request(set->driver_data, tags->rqs[i],
+						hctx_idx, i);
 		}
 	}
 
-	while (!list_empty(&hctx->page_list)) {
-		page = list_first_entry(&hctx->page_list, struct page, lru);
+	while (!list_empty(&tags->page_list)) {
+		page = list_first_entry(&tags->page_list, struct page, lru);
 		list_del_init(&page->lru);
 		__free_pages(page, page->private);
 	}
 
-	kfree(hctx->rqs);
+	kfree(tags->rqs);
 
-	if (hctx->tags)
-		blk_mq_free_tags(hctx->tags);
+	blk_mq_free_tags(tags);
 }
 
 static size_t order_to_size(unsigned int order)
@@ -1067,30 +1074,36 @@ static size_t order_to_size(unsigned int order)
 	return ret;
 }
 
-static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
-		struct blk_mq_reg *reg, void *driver_data, int node)
+static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
+		unsigned int hctx_idx)
 {
-	unsigned int reserved_tags = reg->reserved_tags;
+	struct blk_mq_tags *tags;
 	unsigned int i, j, entries_per_page, max_order = 4;
 	size_t rq_size, left;
-	int error;
 
-	INIT_LIST_HEAD(&hctx->page_list);
+	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
+				set->numa_node);
+	if (!tags)
+		return NULL;
 
-	hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *),
-					GFP_KERNEL, node);
-	if (!hctx->rqs)
-		return -ENOMEM;
+	INIT_LIST_HEAD(&tags->page_list);
+
+	tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *),
+					GFP_KERNEL, set->numa_node);
+	if (!tags->rqs) {
+		blk_mq_free_tags(tags);
+		return NULL;
+	}
 
 	/*
 	 * rq_size is the size of the request plus driver payload, rounded
 	 * to the cacheline size
 	 */
-	rq_size = round_up(sizeof(struct request) + hctx->cmd_size,
+	rq_size = round_up(sizeof(struct request) + set->cmd_size,
 				cache_line_size());
-	left = rq_size * hctx->queue_depth;
+	left = rq_size * set->queue_depth;
 
-	for (i = 0; i < hctx->queue_depth;) {
+	for (i = 0; i < set->queue_depth; ) {
 		int this_order = max_order;
 		struct page *page;
 		int to_do;
@@ -1100,7 +1113,8 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
 			this_order--;
 
 		do {
-			page = alloc_pages_node(node, GFP_KERNEL, this_order);
+			page = alloc_pages_node(set->numa_node, GFP_KERNEL,
+						this_order);
 			if (page)
 				break;
 			if (!this_order--)
@@ -1110,22 +1124,22 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
 		} while (1);
 
 		if (!page)
-			break;
+			goto fail;
 
 		page->private = this_order;
-		list_add_tail(&page->lru, &hctx->page_list);
+		list_add_tail(&page->lru, &tags->page_list);
 
 		p = page_address(page);
 		entries_per_page = order_to_size(this_order) / rq_size;
-		to_do = min(entries_per_page, hctx->queue_depth - i);
+		to_do = min(entries_per_page, set->queue_depth - i);
 		left -= to_do * rq_size;
 		for (j = 0; j < to_do; j++) {
-			hctx->rqs[i] = p;
-			if (reg->ops->init_request) {
-				error = reg->ops->init_request(driver_data,
-						hctx, hctx->rqs[i], i);
-				if (error)
-					goto err_rq_map;
+			tags->rqs[i] = p;
+			if (set->ops->init_request) {
+				if (set->ops->init_request(set->driver_data,
+						tags->rqs[i], hctx_idx, i,
+						set->numa_node))
+					goto fail;
 			}
 
 			p += rq_size;
@@ -1133,30 +1147,16 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
 		}
 	}
 
-	if (i < (reserved_tags + BLK_MQ_TAG_MIN)) {
-		error = -ENOMEM;
-		goto err_rq_map;
-	}
-	if (i != hctx->queue_depth) {
-		hctx->queue_depth = i;
-		pr_warn("%s: queue depth set to %u because of low memory\n",
-					__func__, i);
-	}
+	return tags;
 
-	hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node);
-	if (!hctx->tags) {
-		error = -ENOMEM;
-		goto err_rq_map;
-	}
-
-	return 0;
-err_rq_map:
-	blk_mq_free_rq_map(hctx, driver_data);
-	return error;
+fail:
+	pr_warn("%s: failed to allocate requests\n", __func__);
+	blk_mq_free_rq_map(set, tags, hctx_idx);
+	return NULL;
 }
 
 static int blk_mq_init_hw_queues(struct request_queue *q,
-				 struct blk_mq_reg *reg, void *driver_data)
+		struct blk_mq_tag_set *set)
 {
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i, j;
@@ -1170,23 +1170,21 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 
 		node = hctx->numa_node;
 		if (node == NUMA_NO_NODE)
-			node = hctx->numa_node = reg->numa_node;
+			node = hctx->numa_node = set->numa_node;
 
 		INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn);
 		spin_lock_init(&hctx->lock);
 		INIT_LIST_HEAD(&hctx->dispatch);
 		hctx->queue = q;
 		hctx->queue_num = i;
-		hctx->flags = reg->flags;
-		hctx->queue_depth = reg->queue_depth;
-		hctx->cmd_size = reg->cmd_size;
+		hctx->flags = set->flags;
+		hctx->cmd_size = set->cmd_size;
 
 		blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
 						blk_mq_hctx_notify, hctx);
 		blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
 
-		if (blk_mq_init_rq_map(hctx, reg, driver_data, node))
-			break;
+		hctx->tags = set->tags[i];
 
 		/*
 		 * Allocate space for all possible cpus to avoid allocation in
@@ -1206,8 +1204,8 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 		hctx->nr_ctx_map = num_maps;
 		hctx->nr_ctx = 0;
 
-		if (reg->ops->init_hctx &&
-		    reg->ops->init_hctx(hctx, driver_data, i))
+		if (set->ops->init_hctx &&
+		    set->ops->init_hctx(hctx, set->driver_data, i))
 			break;
 	}
 
@@ -1221,11 +1219,10 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 		if (i == j)
 			break;
 
-		if (reg->ops->exit_hctx)
-			reg->ops->exit_hctx(hctx, j);
+		if (set->ops->exit_hctx)
+			set->ops->exit_hctx(hctx, j);
 
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
-		blk_mq_free_rq_map(hctx, driver_data);
 		kfree(hctx->ctxs);
 	}
 
@@ -1290,41 +1287,25 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	}
 }
 
-struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
-					void *driver_data)
+struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
 	struct blk_mq_hw_ctx **hctxs;
 	struct blk_mq_ctx *ctx;
 	struct request_queue *q;
 	int i;
 
-	if (!reg->nr_hw_queues ||
-	    !reg->ops->queue_rq || !reg->ops->map_queue ||
-	    !reg->ops->alloc_hctx || !reg->ops->free_hctx)
-		return ERR_PTR(-EINVAL);
-
-	if (!reg->queue_depth)
-		reg->queue_depth = BLK_MQ_MAX_DEPTH;
-	else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) {
-		pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth);
-		reg->queue_depth = BLK_MQ_MAX_DEPTH;
-	}
-
-	if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
-		return ERR_PTR(-EINVAL);
-
 	ctx = alloc_percpu(struct blk_mq_ctx);
 	if (!ctx)
 		return ERR_PTR(-ENOMEM);
 
-	hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
-			reg->numa_node);
+	hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
+			set->numa_node);
 
 	if (!hctxs)
 		goto err_percpu;
 
-	for (i = 0; i < reg->nr_hw_queues; i++) {
-		hctxs[i] = reg->ops->alloc_hctx(reg, i);
+	for (i = 0; i < set->nr_hw_queues; i++) {
+		hctxs[i] = set->ops->alloc_hctx(set, i);
 		if (!hctxs[i])
 			goto err_hctxs;
 
@@ -1335,11 +1316,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
 		hctxs[i]->queue_num = i;
 	}
 
-	q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node);
+	q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
 	if (!q)
 		goto err_hctxs;
 
-	q->mq_map = blk_mq_make_queue_map(reg);
+	q->mq_map = blk_mq_make_queue_map(set);
 	if (!q->mq_map)
 		goto err_map;
 
@@ -1347,33 +1328,34 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
 	blk_queue_rq_timeout(q, 30000);
 
 	q->nr_queues = nr_cpu_ids;
-	q->nr_hw_queues = reg->nr_hw_queues;
+	q->nr_hw_queues = set->nr_hw_queues;
 
 	q->queue_ctx = ctx;
 	q->queue_hw_ctx = hctxs;
 
-	q->mq_ops = reg->ops;
+	q->mq_ops = set->ops;
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
 	q->sg_reserved_size = INT_MAX;
 
 	blk_queue_make_request(q, blk_mq_make_request);
-	blk_queue_rq_timed_out(q, reg->ops->timeout);
-	if (reg->timeout)
-		blk_queue_rq_timeout(q, reg->timeout);
+	blk_queue_rq_timed_out(q, set->ops->timeout);
+	if (set->timeout)
+		blk_queue_rq_timeout(q, set->timeout);
 
-	if (reg->ops->complete)
-		blk_queue_softirq_done(q, reg->ops->complete);
+	if (set->ops->complete)
+		blk_queue_softirq_done(q, set->ops->complete);
 
 	blk_mq_init_flush(q);
-	blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
+	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
-	q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
-				cache_line_size()), GFP_KERNEL);
+	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
+				set->cmd_size, cache_line_size()),
+				GFP_KERNEL);
 	if (!q->flush_rq)
 		goto err_hw;
 
-	if (blk_mq_init_hw_queues(q, reg, driver_data))
+	if (blk_mq_init_hw_queues(q, set))
 		goto err_flush_rq;
 
 	blk_mq_map_swqueue(q);
@@ -1391,11 +1373,11 @@ err_hw:
 err_map:
 	blk_cleanup_queue(q);
 err_hctxs:
-	for (i = 0; i < reg->nr_hw_queues; i++) {
+	for (i = 0; i < set->nr_hw_queues; i++) {
 		if (!hctxs[i])
 			break;
 		free_cpumask_var(hctxs[i]->cpumask);
-		reg->ops->free_hctx(hctxs[i], i);
+		set->ops->free_hctx(hctxs[i], i);
 	}
 	kfree(hctxs);
 err_percpu:
@@ -1412,7 +1394,6 @@ void blk_mq_free_queue(struct request_queue *q)
 	queue_for_each_hw_ctx(q, hctx, i) {
 		kfree(hctx->ctx_map);
 		kfree(hctx->ctxs);
-		blk_mq_free_rq_map(hctx, q->queuedata);
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
 		if (q->mq_ops->exit_hctx)
 			q->mq_ops->exit_hctx(hctx, i);
@@ -1473,6 +1454,53 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+{
+	int i;
+
+	if (!set->nr_hw_queues)
+		return -EINVAL;
+	if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
+		return -EINVAL;
+	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
+		return -EINVAL;
+
+	if (!set->nr_hw_queues ||
+	    !set->ops->queue_rq || !set->ops->map_queue ||
+	    !set->ops->alloc_hctx || !set->ops->free_hctx)
+		return -EINVAL;
+
+
+	set->tags = kmalloc_node(set->nr_hw_queues * sizeof(struct blk_mq_tags),
+				 GFP_KERNEL, set->numa_node);
+	if (!set->tags)
+		goto out;
+
+	for (i = 0; i < set->nr_hw_queues; i++) {
+		set->tags[i] = blk_mq_init_rq_map(set, i);
+		if (!set->tags[i])
+			goto out_unwind;
+	}
+
+	return 0;
+
+out_unwind:
+	while (--i >= 0)
+		blk_mq_free_rq_map(set, set->tags[i], i);
+out:
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(blk_mq_alloc_tag_set);
+
+void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
+{
+	int i;
+
+	for (i = 0; i < set->nr_hw_queues; i++)
+		blk_mq_free_rq_map(set, set->tags[i], i);
+}
+EXPORT_SYMBOL(blk_mq_free_tag_set);
+
 void blk_mq_disable_hotplug(void)
 {
 	mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 7964dadb7d64..5fa14f19f752 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -1,6 +1,8 @@
 #ifndef INT_BLK_MQ_H
 #define INT_BLK_MQ_H
 
+struct blk_mq_tag_set;
+
 struct blk_mq_ctx {
 	struct {
 		spinlock_t		lock;
@@ -46,8 +48,7 @@ void blk_mq_disable_hotplug(void);
 /*
  * CPU -> queue mappings
  */
-struct blk_mq_reg;
-extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg);
+extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
 extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
 
 void blk_mq_add_timer(struct request *rq);
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 71df69d90900..8e7e3a0b0d24 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -32,6 +32,7 @@ struct nullb {
 	unsigned int index;
 	struct request_queue *q;
 	struct gendisk *disk;
+	struct blk_mq_tag_set tag_set;
 	struct hrtimer timer;
 	unsigned int queue_depth;
 	spinlock_t lock;
@@ -320,10 +321,11 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
-static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index)
+static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_tag_set *set,
+		unsigned int hctx_index)
 {
-	int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes);
-	int tip = (reg->nr_hw_queues % nr_online_nodes);
+	int b_size = DIV_ROUND_UP(set->nr_hw_queues, nr_online_nodes);
+	int tip = (set->nr_hw_queues % nr_online_nodes);
 	int node = 0, i, n;
 
 	/*
@@ -338,7 +340,7 @@ static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned in
 
 			tip--;
 			if (!tip)
-				b_size = reg->nr_hw_queues / nr_online_nodes;
+				b_size = set->nr_hw_queues / nr_online_nodes;
 		}
 	}
 
@@ -387,13 +389,17 @@ static struct blk_mq_ops null_mq_ops = {
 	.map_queue      = blk_mq_map_queue,
 	.init_hctx	= null_init_hctx,
 	.complete	= null_softirq_done_fn,
+	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
+	.free_hctx	= blk_mq_free_single_hw_queue,
 };
 
-static struct blk_mq_reg null_mq_reg = {
-	.ops		= &null_mq_ops,
-	.queue_depth	= 64,
-	.cmd_size	= sizeof(struct nullb_cmd),
-	.flags		= BLK_MQ_F_SHOULD_MERGE,
+static struct blk_mq_ops null_mq_ops_pernode = {
+	.queue_rq       = null_queue_rq,
+	.map_queue      = blk_mq_map_queue,
+	.init_hctx	= null_init_hctx,
+	.complete	= null_softirq_done_fn,
+	.alloc_hctx	= null_alloc_hctx,
+	.free_hctx	= null_free_hctx,
 };
 
 static void null_del_dev(struct nullb *nullb)
@@ -402,6 +408,8 @@ static void null_del_dev(struct nullb *nullb)
 
 	del_gendisk(nullb->disk);
 	blk_cleanup_queue(nullb->q);
+	if (queue_mode == NULL_Q_MQ)
+		blk_mq_free_tag_set(&nullb->tag_set);
 	put_disk(nullb->disk);
 	kfree(nullb);
 }
@@ -506,7 +514,7 @@ static int null_add_dev(void)
 
 	nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node);
 	if (!nullb)
-		return -ENOMEM;
+		goto out;
 
 	spin_lock_init(&nullb->lock);
 
@@ -514,49 +522,47 @@ static int null_add_dev(void)
 		submit_queues = nr_online_nodes;
 
 	if (setup_queues(nullb))
-		goto err;
+		goto out_free_nullb;
 
 	if (queue_mode == NULL_Q_MQ) {
-		null_mq_reg.numa_node = home_node;
-		null_mq_reg.queue_depth = hw_queue_depth;
-		null_mq_reg.nr_hw_queues = submit_queues;
-
-		if (use_per_node_hctx) {
-			null_mq_reg.ops->alloc_hctx = null_alloc_hctx;
-			null_mq_reg.ops->free_hctx = null_free_hctx;
-		} else {
-			null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue;
-			null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue;
-		}
-
-		nullb->q = blk_mq_init_queue(&null_mq_reg, nullb);
+		if (use_per_node_hctx)
+			nullb->tag_set.ops = &null_mq_ops_pernode;
+		else
+			nullb->tag_set.ops = &null_mq_ops;
+		nullb->tag_set.nr_hw_queues = submit_queues;
+		nullb->tag_set.queue_depth = hw_queue_depth;
+		nullb->tag_set.numa_node = home_node;
+		nullb->tag_set.cmd_size	= sizeof(struct nullb_cmd);
+		nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+		nullb->tag_set.driver_data = nullb;
+
+		if (blk_mq_alloc_tag_set(&nullb->tag_set))
+			goto out_cleanup_queues;
+
+		nullb->q = blk_mq_init_queue(&nullb->tag_set);
+		if (!nullb->q)
+			goto out_cleanup_tags;
 	} else if (queue_mode == NULL_Q_BIO) {
 		nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node);
+		if (!nullb->q)
+			goto out_cleanup_queues;
 		blk_queue_make_request(nullb->q, null_queue_bio);
 		init_driver_queues(nullb);
 	} else {
 		nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node);
+		if (!nullb->q)
+			goto out_cleanup_queues;
 		blk_queue_prep_rq(nullb->q, null_rq_prep_fn);
-		if (nullb->q)
-			blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
+		blk_queue_softirq_done(nullb->q, null_softirq_done_fn);
 		init_driver_queues(nullb);
 	}
 
-	if (!nullb->q)
-		goto queue_fail;
-
 	nullb->q->queuedata = nullb;
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
 
 	disk = nullb->disk = alloc_disk_node(1, home_node);
-	if (!disk) {
-queue_fail:
-		blk_cleanup_queue(nullb->q);
-		cleanup_queues(nullb);
-err:
-		kfree(nullb);
-		return -ENOMEM;
-	}
+	if (!disk)
+		goto out_cleanup_blk_queue;
 
 	mutex_lock(&lock);
 	list_add_tail(&nullb->list, &nullb_list);
@@ -579,6 +585,18 @@ err:
 	sprintf(disk->disk_name, "nullb%d", nullb->index);
 	add_disk(disk);
 	return 0;
+
+out_cleanup_blk_queue:
+	blk_cleanup_queue(nullb->q);
+out_cleanup_tags:
+	if (queue_mode == NULL_Q_MQ)
+		blk_mq_free_tag_set(&nullb->tag_set);
+out_cleanup_queues:
+	cleanup_queues(nullb);
+out_free_nullb:
+	kfree(nullb);
+out:
+	return -ENOMEM;
 }
 
 static int __init null_init(void)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d06206abd340..f909a8821e65 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -30,6 +30,9 @@ struct virtio_blk
 	/* The disk structure for the kernel. */
 	struct gendisk *disk;
 
+	/* Block layer tags. */
+	struct blk_mq_tag_set tag_set;
+
 	/* Process context for config space updates */
 	struct work_struct config_work;
 
@@ -480,8 +483,9 @@ static const struct device_attribute dev_attr_cache_type_rw =
 	__ATTR(cache_type, S_IRUGO|S_IWUSR,
 	       virtblk_cache_type_show, virtblk_cache_type_store);
 
-static int virtblk_init_request(void *data, struct blk_mq_hw_ctx *hctx,
-		struct request *rq, unsigned int nr)
+static int virtblk_init_request(void *data, struct request *rq,
+		unsigned int hctx_idx, unsigned int request_idx,
+		unsigned int numa_node)
 {
 	struct virtio_blk *vblk = data;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
@@ -495,18 +499,12 @@ static struct blk_mq_ops virtio_mq_ops = {
 	.map_queue	= blk_mq_map_queue,
 	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
 	.free_hctx	= blk_mq_free_single_hw_queue,
-	.init_request	= virtblk_init_request,
 	.complete	= virtblk_request_done,
+	.init_request	= virtblk_init_request,
 };
 
-static struct blk_mq_reg virtio_mq_reg = {
-	.ops		= &virtio_mq_ops,
-	.nr_hw_queues	= 1,
-	.queue_depth	= 0, /* Set in virtblk_probe */
-	.numa_node	= NUMA_NO_NODE,
-	.flags		= BLK_MQ_F_SHOULD_MERGE,
-};
-module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444);
+static unsigned int virtblk_queue_depth;
+module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
 
 static int virtblk_probe(struct virtio_device *vdev)
 {
@@ -562,20 +560,32 @@ static int virtblk_probe(struct virtio_device *vdev)
 	}
 
 	/* Default queue sizing is to fill the ring. */
-	if (!virtio_mq_reg.queue_depth) {
-		virtio_mq_reg.queue_depth = vblk->vq->num_free;
+	if (!virtblk_queue_depth) {
+		virtblk_queue_depth = vblk->vq->num_free;
 		/* ... but without indirect descs, we use 2 descs per req */
 		if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
-			virtio_mq_reg.queue_depth /= 2;
+			virtblk_queue_depth /= 2;
 	}
-	virtio_mq_reg.cmd_size =
+
+	memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
+	vblk->tag_set.ops = &virtio_mq_ops;
+	vblk->tag_set.nr_hw_queues = 1;
+	vblk->tag_set.queue_depth = virtblk_queue_depth;
+	vblk->tag_set.numa_node = NUMA_NO_NODE;
+	vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+	vblk->tag_set.cmd_size =
 		sizeof(struct virtblk_req) +
 		sizeof(struct scatterlist) * sg_elems;
+	vblk->tag_set.driver_data = vblk;
 
-	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
+	err = blk_mq_alloc_tag_set(&vblk->tag_set);
+	if (err)
+		goto out_put_disk;
+
+	q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
 	if (!q) {
 		err = -ENOMEM;
-		goto out_put_disk;
+		goto out_free_tags;
 	}
 
 	q->queuedata = vblk;
@@ -678,6 +688,8 @@ static int virtblk_probe(struct virtio_device *vdev)
 out_del_disk:
 	del_gendisk(vblk->disk);
 	blk_cleanup_queue(vblk->disk->queue);
+out_free_tags:
+	blk_mq_free_tag_set(&vblk->tag_set);
 out_put_disk:
 	put_disk(vblk->disk);
 out_free_vq:
@@ -704,6 +716,8 @@ static void virtblk_remove(struct virtio_device *vdev)
 	del_gendisk(vblk->disk);
 	blk_cleanup_queue(vblk->disk->queue);
 
+	blk_mq_free_tag_set(&vblk->tag_set);
+
 	/* Stop all the virtqueues. */
 	vdev->config->reset(vdev);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 29c1a6e83814..a4ea0ce83b07 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -33,8 +33,6 @@ struct blk_mq_hw_ctx {
 	unsigned int 		nr_ctx_map;
 	unsigned long		*ctx_map;
 
-	struct request		**rqs;
-	struct list_head	page_list;
 	struct blk_mq_tags	*tags;
 
 	unsigned long		queued;
@@ -42,7 +40,6 @@ struct blk_mq_hw_ctx {
 #define BLK_MQ_MAX_DISPATCH_ORDER	10
 	unsigned long		dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
 
-	unsigned int		queue_depth;
 	unsigned int		numa_node;
 	unsigned int		cmd_size;	/* per-request extra data */
 
@@ -50,7 +47,7 @@ struct blk_mq_hw_ctx {
 	struct kobject		kobj;
 };
 
-struct blk_mq_reg {
+struct blk_mq_tag_set {
 	struct blk_mq_ops	*ops;
 	unsigned int		nr_hw_queues;
 	unsigned int		queue_depth;
@@ -59,18 +56,22 @@ struct blk_mq_reg {
 	int			numa_node;
 	unsigned int		timeout;
 	unsigned int		flags;		/* BLK_MQ_F_* */
+	void			*driver_data;
+
+	struct blk_mq_tags	**tags;
 };
 
 typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
-typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int);
+typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *,
+		unsigned int);
 typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
-typedef int (init_request_fn)(void *, struct blk_mq_hw_ctx *,
-		struct request *, unsigned int);
-typedef void (exit_request_fn)(void *, struct blk_mq_hw_ctx *,
-		struct request *, unsigned int);
+typedef int (init_request_fn)(void *, struct request *, unsigned int,
+		unsigned int, unsigned int);
+typedef void (exit_request_fn)(void *, struct request *, unsigned int,
+		unsigned int);
 
 struct blk_mq_ops {
 	/*
@@ -127,10 +128,13 @@ enum {
 	BLK_MQ_MAX_DEPTH	= 2048,
 };
 
-struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
+struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
 
+int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
+void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
+
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
 void blk_mq_insert_request(struct request *, bool, bool, bool);
@@ -139,10 +143,10 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
 struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
-struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
-struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int);
+struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int);
 void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
 
 bool blk_mq_end_io_partial(struct request *rq, int error,
@@ -173,12 +177,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
 	return (void *) rq + sizeof(*rq);
 }
 
-static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx,
-					       unsigned int tag)
-{
-	return hctx->rqs[tag];
-}
-
 #define queue_for_each_hw_ctx(q, hctx, i)				\
 	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
 	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
-- 
cgit 


From fb3ccb5da71273e7f0d50b50bc879e50cedd60e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 14 Apr 2014 10:30:12 +0200
Subject: block: all blk-mq requests are tagged

Instead of setting the REQ_QUEUED flag on each of them just take it into
account in the only macro checking it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index eb5e94803892..95bb551273ab 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1101,7 +1101,8 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 /*
  * tag stuff
  */
-#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
+#define blk_rq_tagged(rq) \
+	((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED))
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
-- 
cgit 


From 81b3b2711072b6047d5f332cd8751a1c5c9a3fb2 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Tue, 28 Jan 2014 12:36:48 +0100
Subject: clocksource: sh_cmt: Add support for multiple channels per device

CMT hardware devices can support multiple channels, with global
registers and per-channel registers. The sh_cmt driver currently models
the hardware with one Linux device per channel. This model makes it
difficult to handle global registers in a clean way.

Add support for a new model that uses one Linux device per timer with
multiple channels per device. This requires changes to platform data,
add new channel configuration fields.

Support for the legacy model is kept and will be removed after all
platforms switch to the new model.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 drivers/clocksource/sh_cmt.c | 304 +++++++++++++++++++++++++++++++++----------
 include/linux/sh_timer.h     |   1 +
 2 files changed, 237 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index c753efcfe9f5..1efe7d64efca 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -53,7 +53,16 @@ struct sh_cmt_device;
  * channel registers block. All other versions have a shared start/stop register
  * located in the global space.
  *
- * Note that CMT0 on r8a73a4, r8a7790 and r8a7791, while implementing 32-bit
+ * Channels are indexed from 0 to N-1 in the documentation. The channel index
+ * infers the start/stop bit position in the control register and the channel
+ * registers block address. Some CMT instances have a subset of channels
+ * available, in which case the index in the documentation doesn't match the
+ * "real" index as implemented in hardware. This is for instance the case with
+ * CMT0 on r8a7740, which is a 32-bit variant with a single channel numbered 0
+ * in the documentation but using start/stop bit 5 and having its registers
+ * block at 0x60.
+ *
+ * Similarly CMT0 on r8a73a4, r8a7790 and r8a7791, while implementing 32-bit
  * channels only, is a 48-bit gen2 CMT with the 48-bit channels unavailable.
  */
 
@@ -85,10 +94,14 @@ struct sh_cmt_info {
 
 struct sh_cmt_channel {
 	struct sh_cmt_device *cmt;
-	unsigned int index;
 
-	void __iomem *base;
+	unsigned int index;	/* Index in the documentation */
+	unsigned int hwidx;	/* Real hardware index */
+
+	void __iomem *iostart;
+	void __iomem *ioctrl;
 
+	unsigned int timer_bit;
 	unsigned long flags;
 	unsigned long match_value;
 	unsigned long next_match_value;
@@ -105,6 +118,7 @@ struct sh_cmt_device {
 	struct platform_device *pdev;
 
 	const struct sh_cmt_info *info;
+	bool legacy;
 
 	void __iomem *mapbase_ch;
 	void __iomem *mapbase;
@@ -112,6 +126,9 @@ struct sh_cmt_device {
 
 	struct sh_cmt_channel *channels;
 	unsigned int num_channels;
+
+	bool has_clockevent;
+	bool has_clocksource;
 };
 
 #define SH_CMT16_CMCSR_CMF		(1 << 7)
@@ -223,41 +240,47 @@ static const struct sh_cmt_info sh_cmt_info[] = {
 
 static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch)
 {
-	return ch->cmt->info->read_control(ch->cmt->mapbase, 0);
+	if (ch->iostart)
+		return ch->cmt->info->read_control(ch->iostart, 0);
+	else
+		return ch->cmt->info->read_control(ch->cmt->mapbase, 0);
 }
 
-static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_channel *ch)
+static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch,
+				      unsigned long value)
 {
-	return ch->cmt->info->read_control(ch->base, CMCSR);
+	if (ch->iostart)
+		ch->cmt->info->write_control(ch->iostart, 0, value);
+	else
+		ch->cmt->info->write_control(ch->cmt->mapbase, 0, value);
 }
 
-static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_channel *ch)
+static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_channel *ch)
 {
-	return ch->cmt->info->read_count(ch->base, CMCNT);
+	return ch->cmt->info->read_control(ch->ioctrl, CMCSR);
 }
 
-static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch,
+static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch,
 				      unsigned long value)
 {
-	ch->cmt->info->write_control(ch->cmt->mapbase, 0, value);
+	ch->cmt->info->write_control(ch->ioctrl, CMCSR, value);
 }
 
-static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch,
-				      unsigned long value)
+static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_channel *ch)
 {
-	ch->cmt->info->write_control(ch->base, CMCSR, value);
+	return ch->cmt->info->read_count(ch->ioctrl, CMCNT);
 }
 
 static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch,
 				      unsigned long value)
 {
-	ch->cmt->info->write_count(ch->base, CMCNT, value);
+	ch->cmt->info->write_count(ch->ioctrl, CMCNT, value);
 }
 
 static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch,
 				      unsigned long value)
 {
-	ch->cmt->info->write_count(ch->base, CMCOR, value);
+	ch->cmt->info->write_count(ch->ioctrl, CMCOR, value);
 }
 
 static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch,
@@ -286,7 +309,6 @@ static DEFINE_RAW_SPINLOCK(sh_cmt_lock);
 
 static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start)
 {
-	struct sh_timer_config *cfg = ch->cmt->pdev->dev.platform_data;
 	unsigned long flags, value;
 
 	/* start stop register shared by multiple timer channels */
@@ -294,9 +316,9 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start)
 	value = sh_cmt_read_cmstr(ch);
 
 	if (start)
-		value |= 1 << cfg->timer_bit;
+		value |= 1 << ch->timer_bit;
 	else
-		value &= ~(1 << cfg->timer_bit);
+		value &= ~(1 << ch->timer_bit);
 
 	sh_cmt_write_cmstr(ch, value);
 	raw_spin_unlock_irqrestore(&sh_cmt_lock, flags);
@@ -790,27 +812,72 @@ static void sh_cmt_register_clockevent(struct sh_cmt_channel *ch,
 static int sh_cmt_register(struct sh_cmt_channel *ch, const char *name,
 			   bool clockevent, bool clocksource)
 {
-	if (clockevent)
+	if (clockevent) {
+		ch->cmt->has_clockevent = true;
 		sh_cmt_register_clockevent(ch, name);
+	}
 
-	if (clocksource)
+	if (clocksource) {
+		ch->cmt->has_clocksource = true;
 		sh_cmt_register_clocksource(ch, name);
+	}
 
 	return 0;
 }
 
 static int sh_cmt_setup_channel(struct sh_cmt_channel *ch, unsigned int index,
-				struct sh_cmt_device *cmt)
+				unsigned int hwidx, bool clockevent,
+				bool clocksource, struct sh_cmt_device *cmt)
 {
-	struct sh_timer_config *cfg = cmt->pdev->dev.platform_data;
 	int irq;
 	int ret;
 
+	/* Skip unused channels. */
+	if (!clockevent && !clocksource)
+		return 0;
+
 	ch->cmt = cmt;
-	ch->base = cmt->mapbase_ch;
 	ch->index = index;
+	ch->hwidx = hwidx;
+
+	/*
+	 * Compute the address of the channel control register block. For the
+	 * timers with a per-channel start/stop register, compute its address
+	 * as well.
+	 *
+	 * For legacy configuration the address has been mapped explicitly.
+	 */
+	if (cmt->legacy) {
+		ch->ioctrl = cmt->mapbase_ch;
+	} else {
+		switch (cmt->info->model) {
+		case SH_CMT_16BIT:
+			ch->ioctrl = cmt->mapbase + 2 + ch->hwidx * 6;
+			break;
+		case SH_CMT_32BIT:
+		case SH_CMT_48BIT:
+			ch->ioctrl = cmt->mapbase + 0x10 + ch->hwidx * 0x10;
+			break;
+		case SH_CMT_32BIT_FAST:
+			/*
+			 * The 32-bit "fast" timer has a single channel at hwidx
+			 * 5 but is located at offset 0x40 instead of 0x60 for
+			 * some reason.
+			 */
+			ch->ioctrl = cmt->mapbase + 0x40;
+			break;
+		case SH_CMT_48BIT_GEN2:
+			ch->iostart = cmt->mapbase + ch->hwidx * 0x100;
+			ch->ioctrl = ch->iostart + 0x10;
+			break;
+		}
+	}
+
+	if (cmt->legacy)
+		irq = platform_get_irq(cmt->pdev, 0);
+	else
+		irq = platform_get_irq(cmt->pdev, ch->index);
 
-	irq = platform_get_irq(cmt->pdev, 0);
 	if (irq < 0) {
 		dev_err(&cmt->pdev->dev, "ch%u: failed to get irq\n",
 			ch->index);
@@ -825,9 +892,15 @@ static int sh_cmt_setup_channel(struct sh_cmt_channel *ch, unsigned int index,
 	ch->match_value = ch->max_match_value;
 	raw_spin_lock_init(&ch->lock);
 
+	if (cmt->legacy) {
+		ch->timer_bit = ch->hwidx;
+	} else {
+		ch->timer_bit = cmt->info->model == SH_CMT_48BIT_GEN2
+			      ? 0 : ch->hwidx;
+	}
+
 	ret = sh_cmt_register(ch, dev_name(&cmt->pdev->dev),
-			      cfg->clockevent_rating != 0,
-			      cfg->clocksource_rating != 0);
+			      clockevent, clocksource);
 	if (ret) {
 		dev_err(&cmt->pdev->dev, "ch%u: registration failed\n",
 			ch->index);
@@ -847,97 +920,180 @@ static int sh_cmt_setup_channel(struct sh_cmt_channel *ch, unsigned int index,
 	return 0;
 }
 
-static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev)
+static int sh_cmt_map_memory(struct sh_cmt_device *cmt)
 {
-	struct sh_timer_config *cfg = pdev->dev.platform_data;
-	struct resource *res, *res2;
-	int ret;
-	ret = -ENXIO;
+	struct resource *mem;
 
-	cmt->pdev = pdev;
+	mem = platform_get_resource(cmt->pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(&cmt->pdev->dev, "failed to get I/O memory\n");
+		return -ENXIO;
+	}
 
-	if (!cfg) {
-		dev_err(&cmt->pdev->dev, "missing platform data\n");
-		goto err0;
+	cmt->mapbase = ioremap_nocache(mem->start, resource_size(mem));
+	if (cmt->mapbase == NULL) {
+		dev_err(&cmt->pdev->dev, "failed to remap I/O memory\n");
+		return -ENXIO;
 	}
 
+	return 0;
+}
+
+static int sh_cmt_map_memory_legacy(struct sh_cmt_device *cmt)
+{
+	struct sh_timer_config *cfg = cmt->pdev->dev.platform_data;
+	struct resource *res, *res2;
+
+	/* map memory, let mapbase_ch point to our channel */
 	res = platform_get_resource(cmt->pdev, IORESOURCE_MEM, 0);
 	if (!res) {
 		dev_err(&cmt->pdev->dev, "failed to get I/O memory\n");
-		goto err0;
+		return -ENXIO;
 	}
 
-	/* optional resource for the shared timer start/stop register */
-	res2 = platform_get_resource(cmt->pdev, IORESOURCE_MEM, 1);
-
-	/* map memory, let mapbase_ch point to our channel */
 	cmt->mapbase_ch = ioremap_nocache(res->start, resource_size(res));
 	if (cmt->mapbase_ch == NULL) {
 		dev_err(&cmt->pdev->dev, "failed to remap I/O memory\n");
-		goto err0;
+		return -ENXIO;
 	}
 
+	/* optional resource for the shared timer start/stop register */
+	res2 = platform_get_resource(cmt->pdev, IORESOURCE_MEM, 1);
+
 	/* map second resource for CMSTR */
 	cmt->mapbase = ioremap_nocache(res2 ? res2->start :
 				       res->start - cfg->channel_offset,
 				       res2 ? resource_size(res2) : 2);
 	if (cmt->mapbase == NULL) {
 		dev_err(&cmt->pdev->dev, "failed to remap I/O second memory\n");
-		goto err1;
+		iounmap(cmt->mapbase_ch);
+		return -ENXIO;
 	}
 
-	/* get hold of clock */
+	/* identify the model based on the resources */
+	if (resource_size(res) == 6)
+		cmt->info = &sh_cmt_info[SH_CMT_16BIT];
+	else if (res2 && (resource_size(res2) == 4))
+		cmt->info = &sh_cmt_info[SH_CMT_48BIT_GEN2];
+	else
+		cmt->info = &sh_cmt_info[SH_CMT_32BIT];
+
+	return 0;
+}
+
+static void sh_cmt_unmap_memory(struct sh_cmt_device *cmt)
+{
+	iounmap(cmt->mapbase);
+	if (cmt->mapbase_ch)
+		iounmap(cmt->mapbase_ch);
+}
+
+static int sh_cmt_setup(struct sh_cmt_device *cmt, struct platform_device *pdev)
+{
+	struct sh_timer_config *cfg = pdev->dev.platform_data;
+	const struct platform_device_id *id = pdev->id_entry;
+	unsigned int hw_channels;
+	int ret;
+
+	memset(cmt, 0, sizeof(*cmt));
+	cmt->pdev = pdev;
+
+	if (!cfg) {
+		dev_err(&cmt->pdev->dev, "missing platform data\n");
+		return -ENXIO;
+	}
+
+	cmt->info = (const struct sh_cmt_info *)id->driver_data;
+	cmt->legacy = cmt->info ? false : true;
+
+	/* Get hold of clock. */
 	cmt->clk = clk_get(&cmt->pdev->dev, "cmt_fck");
 	if (IS_ERR(cmt->clk)) {
 		dev_err(&cmt->pdev->dev, "cannot get clock\n");
-		ret = PTR_ERR(cmt->clk);
-		goto err2;
+		return PTR_ERR(cmt->clk);
 	}
 
 	ret = clk_prepare(cmt->clk);
 	if (ret < 0)
-		goto err3;
+		goto err_clk_put;
 
-	/* identify the model based on the resources */
-	if (resource_size(res) == 6)
-		cmt->info = &sh_cmt_info[SH_CMT_16BIT];
-	else if (res2 && (resource_size(res2) == 4))
-		cmt->info = &sh_cmt_info[SH_CMT_48BIT_GEN2];
+	/*
+	 * Map the memory resource(s). We need to support both the legacy
+	 * platform device configuration (with one device per channel) and the
+	 * new version (with multiple channels per device).
+	 */
+	if (cmt->legacy)
+		ret = sh_cmt_map_memory_legacy(cmt);
 	else
-		cmt->info = &sh_cmt_info[SH_CMT_32BIT];
+		ret = sh_cmt_map_memory(cmt);
 
-	cmt->channels = kzalloc(sizeof(*cmt->channels), GFP_KERNEL);
+	if (ret < 0)
+		goto err_clk_unprepare;
+
+	/* Allocate and setup the channels. */
+	if (cmt->legacy) {
+		cmt->num_channels = 1;
+		hw_channels = 0;
+	} else {
+		cmt->num_channels = hweight8(cfg->channels_mask);
+		hw_channels = cfg->channels_mask;
+	}
+
+	cmt->channels = kzalloc(cmt->num_channels * sizeof(*cmt->channels),
+				GFP_KERNEL);
 	if (cmt->channels == NULL) {
 		ret = -ENOMEM;
-		goto err4;
+		goto err_unmap;
 	}
 
-	cmt->num_channels = 1;
+	if (cmt->legacy) {
+		ret = sh_cmt_setup_channel(&cmt->channels[0],
+					   cfg->timer_bit, cfg->timer_bit,
+					   cfg->clockevent_rating != 0,
+					   cfg->clocksource_rating != 0, cmt);
+		if (ret < 0)
+			goto err_unmap;
+	} else {
+		unsigned int mask = hw_channels;
+		unsigned int i;
 
-	ret = sh_cmt_setup_channel(&cmt->channels[0], cfg->timer_bit, cmt);
-	if (ret < 0)
-		goto err4;
+		/*
+		 * Use the first channel as a clock event device and the second
+		 * channel as a clock source. If only one channel is available
+		 * use it for both.
+		 */
+		for (i = 0; i < cmt->num_channels; ++i) {
+			unsigned int hwidx = ffs(mask) - 1;
+			bool clocksource = i == 1 || cmt->num_channels == 1;
+			bool clockevent = i == 0;
+
+			ret = sh_cmt_setup_channel(&cmt->channels[i], i, hwidx,
+						   clockevent, clocksource,
+						   cmt);
+			if (ret < 0)
+				goto err_unmap;
+
+			mask &= ~(1 << hwidx);
+		}
+	}
 
 	platform_set_drvdata(pdev, cmt);
 
 	return 0;
-err4:
+
+err_unmap:
 	kfree(cmt->channels);
+	sh_cmt_unmap_memory(cmt);
+err_clk_unprepare:
 	clk_unprepare(cmt->clk);
-err3:
+err_clk_put:
 	clk_put(cmt->clk);
-err2:
-	iounmap(cmt->mapbase);
-err1:
-	iounmap(cmt->mapbase_ch);
-err0:
 	return ret;
 }
 
 static int sh_cmt_probe(struct platform_device *pdev)
 {
 	struct sh_cmt_device *cmt = platform_get_drvdata(pdev);
-	struct sh_timer_config *cfg = pdev->dev.platform_data;
 	int ret;
 
 	if (!is_early_platform_device(pdev)) {
@@ -966,7 +1122,7 @@ static int sh_cmt_probe(struct platform_device *pdev)
 		return 0;
 
  out:
-	if (cfg->clockevent_rating || cfg->clocksource_rating)
+	if (cmt->has_clockevent || cmt->has_clocksource)
 		pm_runtime_irq_safe(&pdev->dev);
 	else
 		pm_runtime_idle(&pdev->dev);
@@ -979,12 +1135,24 @@ static int sh_cmt_remove(struct platform_device *pdev)
 	return -EBUSY; /* cannot unregister clockevent and clocksource */
 }
 
+static const struct platform_device_id sh_cmt_id_table[] = {
+	{ "sh_cmt", 0 },
+	{ "sh-cmt-16", (kernel_ulong_t)&sh_cmt_info[SH_CMT_16BIT] },
+	{ "sh-cmt-32", (kernel_ulong_t)&sh_cmt_info[SH_CMT_32BIT] },
+	{ "sh-cmt-32-fast", (kernel_ulong_t)&sh_cmt_info[SH_CMT_32BIT_FAST] },
+	{ "sh-cmt-48", (kernel_ulong_t)&sh_cmt_info[SH_CMT_48BIT] },
+	{ "sh-cmt-48-gen2", (kernel_ulong_t)&sh_cmt_info[SH_CMT_48BIT_GEN2] },
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, sh_cmt_id_table);
+
 static struct platform_driver sh_cmt_device_driver = {
 	.probe		= sh_cmt_probe,
 	.remove		= sh_cmt_remove,
 	.driver		= {
 		.name	= "sh_cmt",
-	}
+	},
+	.id_table	= sh_cmt_id_table,
 };
 
 static int __init sh_cmt_init(void)
diff --git a/include/linux/sh_timer.h b/include/linux/sh_timer.h
index 4d9dcd138315..8e1e036d6d45 100644
--- a/include/linux/sh_timer.h
+++ b/include/linux/sh_timer.h
@@ -7,6 +7,7 @@ struct sh_timer_config {
 	int timer_bit;
 	unsigned long clockevent_rating;
 	unsigned long clocksource_rating;
+	unsigned int channels_mask;
 };
 
 #endif /* __SH_TIMER_H__ */
-- 
cgit 


From 63151a449ebaef062ffac5b302206565ff5ef62e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 09:44:52 +0200
Subject: blk-mq: allow drivers to hook into I/O completion

Split out the bottom half of blk_mq_end_io so that drivers can perform
work when they know a request has been completed, but before it has been
freed.  This also obsoletes blk_mq_end_io_partial as drivers can now
pass any value to blk_update_request directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 16 ++++++++++------
 include/linux/blk-mq.h |  9 ++-------
 2 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b59a8d027dff..86d66e0e900c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -294,20 +294,24 @@ void blk_mq_clone_flush_request(struct request *flush_rq,
 		hctx->cmd_size);
 }
 
-bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes)
+inline void __blk_mq_end_io(struct request *rq, int error)
 {
-	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
-		return true;
-
 	blk_account_io_done(rq);
 
 	if (rq->end_io)
 		rq->end_io(rq, error);
 	else
 		blk_mq_free_request(rq);
-	return false;
 }
-EXPORT_SYMBOL(blk_mq_end_io_partial);
+EXPORT_SYMBOL(__blk_mq_end_io);
+
+void blk_mq_end_io(struct request *rq, int error)
+{
+	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
+		BUG();
+	__blk_mq_end_io(rq, error);
+}
+EXPORT_SYMBOL(blk_mq_end_io);
 
 static void __blk_mq_complete_request_remote(void *data)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a4ea0ce83b07..a81b474b794f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -149,13 +149,8 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int);
 void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
 
-bool blk_mq_end_io_partial(struct request *rq, int error,
-		unsigned int nr_bytes);
-static inline void blk_mq_end_io(struct request *rq, int error)
-{
-	bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq));
-	BUG_ON(!done);
-}
+void blk_mq_end_io(struct request *rq, int error);
+void __blk_mq_end_io(struct request *rq, int error);
 
 void blk_mq_complete_request(struct request *rq);
 
-- 
cgit 


From 1b4a325858f695a9b5041313602d34b36f463724 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 09:44:54 +0200
Subject: blk-mq: add async parameter to blk_mq_start_stopped_hw_queues

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c             | 4 ++--
 drivers/block/virtio_blk.c | 4 ++--
 include/linux/blk-mq.h     | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 963a82109386..da3808823e44 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -700,7 +700,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_start_hw_queue);
 
-void blk_mq_start_stopped_hw_queues(struct request_queue *q)
+void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
@@ -711,7 +711,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q)
 
 		clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
 		preempt_disable();
-		blk_mq_run_hw_queue(hctx, true);
+		blk_mq_run_hw_queue(hctx, async);
 		preempt_enable();
 	}
 }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f909a8821e65..7a51f065edcd 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -151,7 +151,7 @@ static void virtblk_done(struct virtqueue *vq)
 
 	/* In case queue is stopped waiting for more buffers. */
 	if (req_done)
-		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
+		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
 }
 
 static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
@@ -762,7 +762,7 @@ static int virtblk_restore(struct virtio_device *vdev)
 	vblk->config_enable = true;
 	ret = init_vq(vdev->priv);
 	if (!ret)
-		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
+		blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
 
 	return ret;
 }
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a81b474b794f..9ecfab96d8c9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -157,7 +157,7 @@ void blk_mq_complete_request(struct request *rq);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
-void blk_mq_start_stopped_hw_queues(struct request_queue *q);
+void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 
 /*
  * Driver command data is immediately after the request. So subtract request
-- 
cgit 


From 70f4db639c5b2479e08657392cbf3ba3cceea11c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 10:48:08 -0600
Subject: blk-mq: add blk_mq_delay_queue

Add a blk-mq equivalent to blk_delay_queue so that the scsi layer can ask
to be kicked again after a delay.

Signed-off-by: Christoph Hellwig <hch@lst.de>

Modified by me to kill the unnecessary preempt disable/enable
in the delayed workqueue handler.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  6 ++++--
 block/blk-mq.c         | 45 +++++++++++++++++++++++++++++++++++++++------
 include/linux/blk-mq.h |  4 +++-
 3 files changed, 46 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index ae6227fd07aa..90b6e63b8769 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -251,8 +251,10 @@ void blk_sync_queue(struct request_queue *q)
 		struct blk_mq_hw_ctx *hctx;
 		int i;
 
-		queue_for_each_hw_ctx(q, hctx, i)
-			cancel_delayed_work_sync(&hctx->delayed_work);
+		queue_for_each_hw_ctx(q, hctx, i) {
+			cancel_delayed_work_sync(&hctx->run_work);
+			cancel_delayed_work_sync(&hctx->delay_work);
+		}
 	} else {
 		cancel_delayed_work_sync(&q->delay_work);
 	}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da3808823e44..0cf52dddfa6b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -640,7 +640,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 	if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
 		__blk_mq_run_hw_queue(hctx);
 	else if (hctx->queue->nr_hw_queues == 1)
-		kblockd_schedule_delayed_work(&hctx->delayed_work, 0);
+		kblockd_schedule_delayed_work(&hctx->run_work, 0);
 	else {
 		unsigned int cpu;
 
@@ -651,7 +651,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 		 * just queue on the first CPU.
 		 */
 		cpu = cpumask_first(hctx->cpumask);
-		kblockd_schedule_delayed_work_on(cpu, &hctx->delayed_work, 0);
+		kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
 	}
 }
 
@@ -675,7 +675,8 @@ EXPORT_SYMBOL(blk_mq_run_queues);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
-	cancel_delayed_work(&hctx->delayed_work);
+	cancel_delayed_work(&hctx->run_work);
+	cancel_delayed_work(&hctx->delay_work);
 	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
 }
 EXPORT_SYMBOL(blk_mq_stop_hw_queue);
@@ -717,15 +718,46 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
 }
 EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
 
-static void blk_mq_work_fn(struct work_struct *work)
+static void blk_mq_run_work_fn(struct work_struct *work)
 {
 	struct blk_mq_hw_ctx *hctx;
 
-	hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work);
+	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
 
 	__blk_mq_run_hw_queue(hctx);
 }
 
+static void blk_mq_delay_work_fn(struct work_struct *work)
+{
+	struct blk_mq_hw_ctx *hctx;
+
+	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
+
+	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
+		__blk_mq_run_hw_queue(hctx);
+}
+
+void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
+{
+	unsigned long tmo = msecs_to_jiffies(msecs);
+
+	if (hctx->queue->nr_hw_queues == 1)
+		kblockd_schedule_delayed_work(&hctx->delay_work, tmo);
+	else {
+		unsigned int cpu;
+
+		/*
+		 * It'd be great if the workqueue API had a way to pass
+		 * in a mask and had some smarts for more clever placement
+		 * than the first CPU. Or we could round-robin here. For now,
+		 * just queue on the first CPU.
+		 */
+		cpu = cpumask_first(hctx->cpumask);
+		kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
+	}
+}
+EXPORT_SYMBOL(blk_mq_delay_queue);
+
 static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 				    struct request *rq, bool at_head)
 {
@@ -1179,7 +1211,8 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 		if (node == NUMA_NO_NODE)
 			node = hctx->numa_node = set->numa_node;
 
-		INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn);
+		INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
+		INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
 		spin_lock_init(&hctx->lock);
 		INIT_LIST_HEAD(&hctx->dispatch);
 		hctx->queue = q;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 9ecfab96d8c9..ae868e77bc2f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -18,7 +18,8 @@ struct blk_mq_hw_ctx {
 	} ____cacheline_aligned_in_smp;
 
 	unsigned long		state;		/* BLK_MQ_S_* flags */
-	struct delayed_work	delayed_work;
+	struct delayed_work	run_work;
+	struct delayed_work	delay_work;
 	cpumask_var_t		cpumask;
 
 	unsigned long		flags;		/* BLK_MQ_F_* flags */
@@ -158,6 +159,7 @@ void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 
 /*
  * Driver command data is immediately after the request. So subtract request
-- 
cgit 


From 2f268556567ebeb3538f99b9bdad177581439dcb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 09:44:56 +0200
Subject: blk-mq: add blk_mq_start_hw_queues

Add a helper to unconditionally kick contexts of a queue.  This will
be needed by the SCSI layer to provide fair queueing between multiple
devices on a single host.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 11 +++++++++++
 include/linux/blk-mq.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0cf52dddfa6b..543bbc08a261 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -701,6 +701,17 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
 }
 EXPORT_SYMBOL(blk_mq_start_hw_queue);
 
+void blk_mq_start_hw_queues(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_start_hw_queue(hctx);
+}
+EXPORT_SYMBOL(blk_mq_start_hw_queues);
+
+
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
 {
 	struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ae868e77bc2f..391377e53367 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -158,6 +158,7 @@ void blk_mq_complete_request(struct request *rq);
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
+void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 
-- 
cgit 


From ed0791b2f83cec4e77d88c4e9baabcebf9254a78 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 09:44:57 +0200
Subject: blk-mq: add blk_mq_requeue_request

This allows to requeue a request that has been accepted by ->queue_rq
earlier.  This is needed by the SCSI layer in various error conditions.

The existing internal blk_mq_requeue_request is renamed to
__blk_mq_requeue_request as it is a lower level building block for this
funtionality.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 18 ++++++++++++++++--
 include/linux/blk-mq.h |  2 ++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 543bbc08a261..ee225cc312b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -400,7 +400,7 @@ static void blk_mq_start_request(struct request *rq, bool last)
 		rq->cmd_flags |= REQ_END;
 }
 
-static void blk_mq_requeue_request(struct request *rq)
+static void __blk_mq_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -413,6 +413,20 @@ static void blk_mq_requeue_request(struct request *rq)
 		rq->nr_phys_segments--;
 }
 
+void blk_mq_requeue_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+
+	__blk_mq_requeue_request(rq);
+	blk_clear_rq_complete(rq);
+
+	trace_block_rq_requeue(q, rq);
+
+	BUG_ON(blk_queued_rq(rq));
+	blk_mq_insert_request(rq, true, true, false);
+}
+EXPORT_SYMBOL(blk_mq_requeue_request);
+
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	return tags->rqs[tag];
@@ -602,7 +616,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 			 * time
 			 */
 			list_add(&rq->queuelist, &rq_list);
-			blk_mq_requeue_request(rq);
+			__blk_mq_requeue_request(rq);
 			break;
 		default:
 			pr_err("blk-mq: bad return on queue: %d\n", ret);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 391377e53367..ab469d525894 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -153,6 +153,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
 void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
 
+void blk_mq_requeue_request(struct request *rq);
+
 void blk_mq_complete_request(struct request *rq);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
-- 
cgit 


From f88a164b72bd51fe4c89e06ac9939f2afe39c7ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 09:44:58 +0200
Subject: blk-mq: rename mq_flush_work struct request member

We will use this work_struct to requeue scsi commands from the
completion handler as well, so give it a more generic name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c      | 6 +++---
 include/linux/blkdev.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index c41fc19f75d1..ec7a224d6733 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -134,7 +134,7 @@ static void mq_flush_run(struct work_struct *work)
 {
 	struct request *rq;
 
-	rq = container_of(work, struct request, mq_flush_work);
+	rq = container_of(work, struct request, requeue_work);
 
 	memset(&rq->csd, 0, sizeof(rq->csd));
 	blk_mq_insert_request(rq, false, true, false);
@@ -143,8 +143,8 @@ static void mq_flush_run(struct work_struct *work)
 static bool blk_flush_queue_rq(struct request *rq, bool add_front)
 {
 	if (rq->q->mq_ops) {
-		INIT_WORK(&rq->mq_flush_work, mq_flush_run);
-		kblockd_schedule_work(&rq->mq_flush_work);
+		INIT_WORK(&rq->requeue_work, mq_flush_run);
+		kblockd_schedule_work(&rq->requeue_work);
 		return false;
 	} else {
 		if (add_front)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95bb551273ab..71288083a46f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -98,7 +98,7 @@ struct request {
 	struct list_head queuelist;
 	union {
 		struct call_single_data csd;
-		struct work_struct mq_flush_work;
+		struct work_struct requeue_work;
 		unsigned long fifo_time;
 	};
 
-- 
cgit 


From 12120077b2612a243d158605640cd39266906667 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 09:44:59 +0200
Subject: block: export blk_finish_request

This allows to mirror the blk-mq code flow for more a more readable I/O
completion handler in SCSI.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 3 ++-
 include/linux/blkdev.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 90b6e63b8769..c4269701cb4f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2497,7 +2497,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
 /*
  * queue lock must be held
  */
-static void blk_finish_request(struct request *req, int error)
+void blk_finish_request(struct request *req, int error)
 {
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
@@ -2523,6 +2523,7 @@ static void blk_finish_request(struct request *req, int error)
 		__blk_put_request(req->q, req);
 	}
 }
+EXPORT_SYMBOL(blk_finish_request);
 
 /**
  * blk_end_bidi_request - Complete a bidi request
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 71288083a46f..20b26d4e53a2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -936,6 +936,7 @@ extern struct request *blk_fetch_request(struct request_queue *q);
  */
 extern bool blk_update_request(struct request *rq, int error,
 			       unsigned int nr_bytes);
+extern void blk_finish_request(struct request *rq, int error);
 extern bool blk_end_request(struct request *rq, int error,
 			    unsigned int nr_bytes);
 extern void blk_end_request_all(struct request *rq, int error);
-- 
cgit 


From 49fd524f95cb4cc699d435e0ebb08b1c6220da6d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 16 Apr 2014 10:57:18 -0600
Subject: bsg: update check for rq based driver for blk-mq

bsg currently checks ->request_fn to check whether a queue can
handle struct request. But with blk-mq, we don't have a request_fn
yet are request based. Add a queue_is_rq_based() helper and use
that in bsg, I'm guessing this is not the last place we need to
update for this. Besides, it better explains what is being
checked.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bsg.c            | 2 +-
 include/linux/blkdev.h | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/bsg.c b/block/bsg.c
index 420a5a9f1b23..e5214c148096 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -1008,7 +1008,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent,
 	/*
 	 * we need a proper transport to send commands, not a stacked device
 	 */
-	if (!q->request_fn)
+	if (!queue_is_rq_based(q))
 		return 0;
 
 	bcd = &q->bsg_dev;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 20b26d4e53a2..74ee55fefcf0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -612,6 +612,15 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 
 #define rq_data_dir(rq)		(((rq)->cmd_flags & 1) != 0)
 
+/*
+ * Driver can handle struct request, if it either has an old style
+ * request_fn defined, or is blk-mq based.
+ */
+static inline bool queue_is_rq_based(struct request_queue *q)
+{
+	return q->request_fn || q->mq_ops;
+}
+
 static inline unsigned int blk_queue_cluster(struct request_queue *q)
 {
 	return q->limits.cluster;
-- 
cgit 


From a5d92ad32dad94fd8f3f61778561d532bb3a2f77 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Mon, 17 Mar 2014 10:57:00 +0000
Subject: efivars: Stop passing a struct argument to efivar_validate()

In preparation for compat support, we can't assume that user variable
object is represented by a 'struct efi_variable'. Convert the validation
functions to take the variable name as an argument, which is the only
piece of the struct that was ever used anyway.

Cc: Mike Waychison <mikew@google.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/efivars.c |  6 ++++--
 drivers/firmware/efi/vars.c    | 30 +++++++++++++++---------------
 include/linux/efi.h            |  6 ++++--
 3 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 2c21cccc2572..5ee2cfb96698 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -231,7 +231,7 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count)
 	}
 
 	if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
-	    efivar_validate(new_var, data, size) == false) {
+	    efivar_validate(name, data, size) == false) {
 		printk(KERN_ERR "efivars: Malformed variable content\n");
 		return -EINVAL;
 	}
@@ -339,6 +339,7 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 {
 	struct efi_variable *new_var = (struct efi_variable *)buf;
 	struct efivar_entry *new_entry;
+	efi_char16_t *name;
 	unsigned long size;
 	u32 attributes;
 	u8 *data;
@@ -351,11 +352,12 @@ static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 		return -EINVAL;
 
 	attributes = new_var->Attributes;
+	name = new_var->VariableName;
 	size = new_var->DataSize;
 	data = new_var->Data;
 
 	if ((attributes & ~EFI_VARIABLE_MASK) != 0 ||
-	    efivar_validate(new_var, data, size) == false) {
+	    efivar_validate(name, data, size) == false) {
 		printk(KERN_ERR "efivars: Malformed variable content\n");
 		return -EINVAL;
 	}
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index b22659cccca4..f0a43646a2f3 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -42,7 +42,7 @@ DECLARE_WORK(efivar_work, NULL);
 EXPORT_SYMBOL_GPL(efivar_work);
 
 static bool
-validate_device_path(struct efi_variable *var, int match, u8 *buffer,
+validate_device_path(efi_char16_t *var_name, int match, u8 *buffer,
 		     unsigned long len)
 {
 	struct efi_generic_dev_path *node;
@@ -75,7 +75,7 @@ validate_device_path(struct efi_variable *var, int match, u8 *buffer,
 }
 
 static bool
-validate_boot_order(struct efi_variable *var, int match, u8 *buffer,
+validate_boot_order(efi_char16_t *var_name, int match, u8 *buffer,
 		    unsigned long len)
 {
 	/* An array of 16-bit integers */
@@ -86,18 +86,18 @@ validate_boot_order(struct efi_variable *var, int match, u8 *buffer,
 }
 
 static bool
-validate_load_option(struct efi_variable *var, int match, u8 *buffer,
+validate_load_option(efi_char16_t *var_name, int match, u8 *buffer,
 		     unsigned long len)
 {
 	u16 filepathlength;
 	int i, desclength = 0, namelen;
 
-	namelen = ucs2_strnlen(var->VariableName, sizeof(var->VariableName));
+	namelen = ucs2_strnlen(var_name, EFI_VAR_NAME_LEN);
 
 	/* Either "Boot" or "Driver" followed by four digits of hex */
 	for (i = match; i < match+4; i++) {
-		if (var->VariableName[i] > 127 ||
-		    hex_to_bin(var->VariableName[i] & 0xff) < 0)
+		if (var_name[i] > 127 ||
+		    hex_to_bin(var_name[i] & 0xff) < 0)
 			return true;
 	}
 
@@ -132,12 +132,12 @@ validate_load_option(struct efi_variable *var, int match, u8 *buffer,
 	/*
 	 * And, finally, check the filepath
 	 */
-	return validate_device_path(var, match, buffer + desclength + 6,
+	return validate_device_path(var_name, match, buffer + desclength + 6,
 				    filepathlength);
 }
 
 static bool
-validate_uint16(struct efi_variable *var, int match, u8 *buffer,
+validate_uint16(efi_char16_t *var_name, int match, u8 *buffer,
 		unsigned long len)
 {
 	/* A single 16-bit integer */
@@ -148,7 +148,7 @@ validate_uint16(struct efi_variable *var, int match, u8 *buffer,
 }
 
 static bool
-validate_ascii_string(struct efi_variable *var, int match, u8 *buffer,
+validate_ascii_string(efi_char16_t *var_name, int match, u8 *buffer,
 		      unsigned long len)
 {
 	int i;
@@ -166,7 +166,7 @@ validate_ascii_string(struct efi_variable *var, int match, u8 *buffer,
 
 struct variable_validate {
 	char *name;
-	bool (*validate)(struct efi_variable *var, int match, u8 *data,
+	bool (*validate)(efi_char16_t *var_name, int match, u8 *data,
 			 unsigned long len);
 };
 
@@ -189,10 +189,10 @@ static const struct variable_validate variable_validate[] = {
 };
 
 bool
-efivar_validate(struct efi_variable *var, u8 *data, unsigned long len)
+efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len)
 {
 	int i;
-	u16 *unicode_name = var->VariableName;
+	u16 *unicode_name = var_name;
 
 	for (i = 0; variable_validate[i].validate != NULL; i++) {
 		const char *name = variable_validate[i].name;
@@ -208,7 +208,7 @@ efivar_validate(struct efi_variable *var, u8 *data, unsigned long len)
 
 			/* Wildcard in the matching name means we've matched */
 			if (c == '*')
-				return variable_validate[i].validate(var,
+				return variable_validate[i].validate(var_name,
 							     match, data, len);
 
 			/* Case sensitive match */
@@ -217,7 +217,7 @@ efivar_validate(struct efi_variable *var, u8 *data, unsigned long len)
 
 			/* Reached the end of the string while matching */
 			if (!c)
-				return variable_validate[i].validate(var,
+				return variable_validate[i].validate(var_name,
 							     match, data, len);
 		}
 	}
@@ -805,7 +805,7 @@ int efivar_entry_set_get_size(struct efivar_entry *entry, u32 attributes,
 
 	*set = false;
 
-	if (efivar_validate(&entry->var, data, *size) == false)
+	if (efivar_validate(name, data, *size) == false)
 		return -EINVAL;
 
 	/*
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 82d0abb2b19f..6a4d8e27d1d7 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1039,8 +1039,10 @@ struct efivars {
  * and we use a page for reading/writing.
  */
 
+#define EFI_VAR_NAME_LEN	1024
+
 struct efi_variable {
-	efi_char16_t  VariableName[1024/sizeof(efi_char16_t)];
+	efi_char16_t  VariableName[EFI_VAR_NAME_LEN/sizeof(efi_char16_t)];
 	efi_guid_t    VendorGuid;
 	unsigned long DataSize;
 	__u8          Data[1024];
@@ -1122,7 +1124,7 @@ int efivar_entry_iter(int (*func)(struct efivar_entry *, void *),
 struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
 				       struct list_head *head, bool remove);
 
-bool efivar_validate(struct efi_variable *var, u8 *data, unsigned long len);
+bool efivar_validate(efi_char16_t *var_name, u8 *data, unsigned long len);
 
 extern struct work_struct efivar_work;
 void efivar_run_worker(void);
-- 
cgit 


From 68c3b4d1676d870f0453c31d5a52e7e65c7448ae Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 31 Mar 2014 21:50:44 +0300
Subject: KVM: VMX: speed up wildcard MMIO EVENTFD

With KVM, MMIO is much slower than PIO, due to the need to
do page walk and emulation. But with EPT, it does not have to be: we
know the address from the VMCS so if the address is unique, we can look
up the eventfd directly, bypassing emulation.

Unfortunately, this only works if userspace does not need to match on
access length and data.  The implementation adds a separate FAST_MMIO
bus internally. This serves two purposes:
    - minimize overhead for old userspace that does not use eventfd with lengtth = 0
    - minimize disruption in other code (since we don't know the length,
      devices on the MMIO bus only get a valid address in write, this
      way we don't need to touch all devices to teach them to handle
      an invalid length)

At the moment, this optimization only has effect for EPT on x86.

It will be possible to speed up MMIO for NPT and MMU using the same
idea in the future.

With this patch applied, on VMX MMIO EVENTFD is essentially as fast as PIO.
I was unable to detect any measureable slowdown to non-eventfd MMIO.

Making MMIO faster is important for the upcoming virtio 1.0 which
includes an MMIO signalling capability.

The idea was suggested by Peter Anvin.  Lots of thanks to Gleb for
pre-review and suggestions.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c       |  4 ++++
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h |  1 +
 virt/kvm/eventfd.c       | 16 ++++++++++++++++
 virt/kvm/kvm_main.c      |  1 +
 5 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1f68c5831924..eb3f2b1b764c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5528,6 +5528,10 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 	gpa_t gpa;
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+	if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+		skip_emulated_instruction(vcpu);
+		return 1;
+	}
 
 	ret = handle_mmio_page_fault_common(vcpu, gpa, true);
 	if (likely(ret == RET_MMIO_PF_EMULATE))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9f4380..6c3c2eb96d06 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -163,6 +163,7 @@ enum kvm_bus {
 	KVM_MMIO_BUS,
 	KVM_PIO_BUS,
 	KVM_VIRTIO_CCW_NOTIFY_BUS,
+	KVM_FAST_MMIO_BUS,
 	KVM_NR_BUSES
 };
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 39098a61f41c..d8a6ce4c2a83 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -515,6 +515,7 @@ enum {
 	kvm_ioeventfd_flag_nr_pio,
 	kvm_ioeventfd_flag_nr_deassign,
 	kvm_ioeventfd_flag_nr_virtio_ccw_notify,
+	kvm_ioeventfd_flag_nr_fast_mmio,
 	kvm_ioeventfd_flag_nr_max,
 };
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 2721996bb9c2..912ec5a95e2c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -770,6 +770,16 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	if (ret < 0)
 		goto unlock_fail;
 
+	/* When length is ignored, MMIO is also put on a separate bus, for
+	 * faster lookups.
+	 */
+	if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
+		ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
+					      p->addr, 0, &p->dev);
+		if (ret < 0)
+			goto register_fail;
+	}
+
 	kvm->buses[bus_idx]->ioeventfd_count++;
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
@@ -777,6 +787,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	return 0;
 
+register_fail:
+	kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 unlock_fail:
 	mutex_unlock(&kvm->slots_lock);
 
@@ -816,6 +828,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
+		if (!p->length) {
+			kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
+						  &p->dev);
+		}
 		kvm->buses[bus_idx]->ioeventfd_count--;
 		ioeventfd_release(p);
 		ret = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 56baae8c2f56..96456ac888ba 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2922,6 +2922,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
 
 	return -EOPNOTSUPP;
 }
+EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-- 
cgit 


From febdbfe8a91ce0d11939d4940b592eb0dba8d663 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 6 Feb 2014 18:16:07 +0100
Subject: arch: Prepare for smp_mb__{before,after}_atomic()

Since the smp_mb__{before,after}*() ops are fundamentally dependent on
how an arch can implement atomics it doesn't make sense to have 3
variants of them. They must all be the same.

Furthermore, the 3 variants suggest they're only valid for those 3
atomic ops, while we have many more where they could be applied.

So move away from
smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}() and reduce the
interface to just the two: smp_mb__{before,after}_atomic().

This patch prepares the way by introducing default implementations in
asm-generic/barrier.h that default to a full barrier and providing
__deprecated inlines for the previous 6 barriers if they're not
provided by the arch.

This should allow for a mostly painless transition (lots of deprecated
warns in the interim).

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/n/tip-wr59327qdyi9mbzn6x937s4e@git.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Chen, Gong" <gong.chen@linux.intel.com>
Cc: John Sullivan <jsrhbz@kanargh.force9.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mauro Carvalho Chehab <m.chehab@samsung.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: linux-arch@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/asm-generic/atomic.h  |  7 +------
 include/asm-generic/barrier.h |  8 ++++++++
 include/asm-generic/bitops.h  |  9 +--------
 include/linux/atomic.h        | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/bitops.h        | 20 ++++++++++++++++++++
 kernel/sched/core.c           | 16 ++++++++++++++++
 6 files changed, 82 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index 33bd2de3bc1e..9c79e7603459 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -16,6 +16,7 @@
 #define __ASM_GENERIC_ATOMIC_H
 
 #include <asm/cmpxchg.h>
+#include <asm/barrier.h>
 
 #ifdef CONFIG_SMP
 /* Force people to define core atomics */
@@ -182,11 +183,5 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 }
 #endif
 
-/* Assume that atomic operations are already serializing */
-#define smp_mb__before_atomic_dec()	barrier()
-#define smp_mb__after_atomic_dec()	barrier()
-#define smp_mb__before_atomic_inc()	barrier()
-#define smp_mb__after_atomic_inc()	barrier()
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_GENERIC_ATOMIC_H */
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index 6f692f8ac664..1402fa855388 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -62,6 +62,14 @@
 #define set_mb(var, value)  do { (var) = (value); mb(); } while (0)
 #endif
 
+#ifndef smp_mb__before_atomic
+#define smp_mb__before_atomic()	smp_mb()
+#endif
+
+#ifndef smp_mb__after_atomic
+#define smp_mb__after_atomic()	smp_mb()
+#endif
+
 #define smp_store_release(p, v)						\
 do {									\
 	compiletime_assert_atomic_type(*p);				\
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index 280ca7a96f75..dcdcacf2fd2b 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -11,14 +11,7 @@
 
 #include <linux/irqflags.h>
 #include <linux/compiler.h>
-
-/*
- * clear_bit may not imply a memory barrier
- */
-#ifndef smp_mb__before_clear_bit
-#define smp_mb__before_clear_bit()	smp_mb()
-#define smp_mb__after_clear_bit()	smp_mb()
-#endif
+#include <asm/barrier.h>
 
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/ffz.h>
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 5b08a8540ecf..fef3a809e7cf 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -3,6 +3,42 @@
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
 
+/*
+ * Provide __deprecated wrappers for the new interface, avoid flag day changes.
+ * We need the ugly external functions to break header recursion hell.
+ */
+#ifndef smp_mb__before_atomic_inc
+static inline void __deprecated smp_mb__before_atomic_inc(void)
+{
+	extern void __smp_mb__before_atomic(void);
+	__smp_mb__before_atomic();
+}
+#endif
+
+#ifndef smp_mb__after_atomic_inc
+static inline void __deprecated smp_mb__after_atomic_inc(void)
+{
+	extern void __smp_mb__after_atomic(void);
+	__smp_mb__after_atomic();
+}
+#endif
+
+#ifndef smp_mb__before_atomic_dec
+static inline void __deprecated smp_mb__before_atomic_dec(void)
+{
+	extern void __smp_mb__before_atomic(void);
+	__smp_mb__before_atomic();
+}
+#endif
+
+#ifndef smp_mb__after_atomic_dec
+static inline void __deprecated smp_mb__after_atomic_dec(void)
+{
+	extern void __smp_mb__after_atomic(void);
+	__smp_mb__after_atomic();
+}
+#endif
+
 /**
  * atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index be5fd38bd5a0..cbc5833fb221 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -32,6 +32,26 @@ extern unsigned long __sw_hweight64(__u64 w);
  */
 #include <asm/bitops.h>
 
+/*
+ * Provide __deprecated wrappers for the new interface, avoid flag day changes.
+ * We need the ugly external functions to break header recursion hell.
+ */
+#ifndef smp_mb__before_clear_bit
+static inline void __deprecated smp_mb__before_clear_bit(void)
+{
+	extern void __smp_mb__before_atomic(void);
+	__smp_mb__before_atomic();
+}
+#endif
+
+#ifndef smp_mb__after_clear_bit
+static inline void __deprecated smp_mb__after_clear_bit(void)
+{
+	extern void __smp_mb__after_atomic(void);
+	__smp_mb__after_atomic();
+}
+#endif
+
 #define for_each_set_bit(bit, addr, size) \
 	for ((bit) = find_first_bit((addr), (size));		\
 	     (bit) < (size);					\
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..8a70ec091760 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,6 +90,22 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
+#ifdef smp_mb__before_atomic
+void __smp_mb__before_atomic(void)
+{
+	smp_mb__before_atomic();
+}
+EXPORT_SYMBOL(__smp_mb__before_atomic);
+#endif
+
+#ifdef smp_mb__after_atomic
+void __smp_mb__after_atomic(void)
+{
+	smp_mb__after_atomic();
+}
+EXPORT_SYMBOL(__smp_mb__after_atomic);
+#endif
+
 void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 {
 	unsigned long delta;
-- 
cgit 


From 27e4f9d0012a9bb7011aade862f08679d2921ab0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 9 Apr 2014 12:50:34 +0200
Subject: sched/wait: Explain the shadowing and type inconsistencies

Stick in a comment before someone else tries to fix the sparse warning
this generates.

Requested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-o2ro6f3vkxklni0bc8f7m68s@git.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 559044c79232..2b563a15a77d 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -191,11 +191,23 @@ wait_queue_head_t *bit_waitqueue(void *, int);
 	(!__builtin_constant_p(state) ||				\
 		state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE)	\
 
+/*
+ * The below macro ___wait_event() has an explicit shadow of the __ret
+ * variable when used from the wait_event_*() macros.
+ *
+ * This is so that both can use the ___wait_cond_timeout() construct
+ * to wrap the condition.
+ *
+ * The type inconsistency of the wait_event_*() __ret variable is also
+ * on purpose; we use long where we can return timeout values and int
+ * otherwise.
+ */
+
 #define ___wait_event(wq, condition, state, exclusive, ret, cmd)	\
 ({									\
 	__label__ __out;						\
 	wait_queue_t __wait;						\
-	long __ret = ret;						\
+	long __ret = ret;	/* explicit shadow */			\
 									\
 	INIT_LIST_HEAD(&__wait.task_list);				\
 	if (exclusive)							\
-- 
cgit 


From 08f8aeb55d7727d644dbbbbfb798fe937d47751d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 8 Apr 2014 14:27:25 +0200
Subject: sched: Remove set_need_resched()

The last user is gone now, so we can safely remove this function.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/thread_info.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index fddbe2023a5d..cb0cec94fda3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -104,20 +104,6 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 #define test_thread_flag(flag) \
 	test_ti_thread_flag(current_thread_info(), flag)
 
-static inline __deprecated void set_need_resched(void)
-{
-	/*
-	 * Use of this function in deprecated.
-	 *
-	 * As of this writing there are only a few users in the DRM tree left
-	 * all of which are wrong and can be removed without causing too much
-	 * grief.
-	 *
-	 * The DRM people are aware and are working on removing the last few
-	 * instances.
-	 */
-}
-
 #define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
 
 #if defined TIF_RESTORE_SIGMASK && !defined HAVE_SET_RESTORE_SIGMASK
-- 
cgit 


From c464c76eec4be587604ca082e8cded7e6b89f3bf Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Tue, 18 Mar 2014 16:56:41 +0800
Subject: perf: Allow building PMU drivers as modules

This patch adds support for building PMU driver as module. It exports
the functions perf_pmu_{register,unregister}() and adds reference tracking
for the PMU driver module.

When the PMU driver is built as a module, each active event of the PMU
holds a reference to the driver module.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1395133004-23205-1-git-send-email-zheng.z.yan@intel.com
Cc: eranian@google.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3356abcfff18..af6dcf1d9e47 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -172,6 +172,7 @@ struct perf_event;
 struct pmu {
 	struct list_head		entry;
 
+	struct module			*module;
 	struct device			*dev;
 	const struct attribute_group	**attr_groups;
 	const char			*name;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..5129b1201050 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -39,6 +39,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/mm_types.h>
 #include <linux/cgroup.h>
+#include <linux/module.h>
 
 #include "internal.h"
 
@@ -3229,6 +3230,9 @@ static void __free_event(struct perf_event *event)
 	if (event->ctx)
 		put_ctx(event->ctx);
 
+	if (event->pmu)
+		module_put(event->pmu->module);
+
 	call_rcu(&event->rcu_head, free_event_rcu);
 }
 static void free_event(struct perf_event *event)
@@ -6551,6 +6555,7 @@ free_pdc:
 	free_percpu(pmu->pmu_disable_count);
 	goto unlock;
 }
+EXPORT_SYMBOL_GPL(perf_pmu_register);
 
 void perf_pmu_unregister(struct pmu *pmu)
 {
@@ -6572,6 +6577,7 @@ void perf_pmu_unregister(struct pmu *pmu)
 	put_device(pmu->dev);
 	free_pmu_context(pmu);
 }
+EXPORT_SYMBOL_GPL(perf_pmu_unregister);
 
 struct pmu *perf_init_event(struct perf_event *event)
 {
@@ -6585,6 +6591,10 @@ struct pmu *perf_init_event(struct perf_event *event)
 	pmu = idr_find(&pmu_idr, event->attr.type);
 	rcu_read_unlock();
 	if (pmu) {
+		if (!try_module_get(pmu->module)) {
+			pmu = ERR_PTR(-ENODEV);
+			goto unlock;
+		}
 		event->pmu = pmu;
 		ret = pmu->event_init(event);
 		if (ret)
@@ -6593,6 +6603,10 @@ struct pmu *perf_init_event(struct perf_event *event)
 	}
 
 	list_for_each_entry_rcu(pmu, &pmus, entry) {
+		if (!try_module_get(pmu->module)) {
+			pmu = ERR_PTR(-ENODEV);
+			goto unlock;
+		}
 		event->pmu = pmu;
 		ret = pmu->event_init(event);
 		if (!ret)
@@ -6771,6 +6785,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 err_pmu:
 	if (event->destroy)
 		event->destroy(event);
+	module_put(pmu->module);
 err_ns:
 	if (event->ns)
 		put_pid_ns(event->ns);
-- 
cgit 


From 4e857c58efeb99393cba5a5d0d8ec7117183137c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 17 Mar 2014 18:06:10 +0100
Subject: arch: Mass conversion of smp_mb__*()

Mostly scripted conversion of the smp_mb__* barriers.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/n/tip-55dhyhocezdw1dg7u19hmh1u@git.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-arch@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 block/blk-iopoll.c                                |  4 +-
 crypto/chainiv.c                                  |  2 +-
 drivers/base/power/domain.c                       |  2 +-
 drivers/block/mtip32xx/mtip32xx.c                 |  4 +-
 drivers/cpuidle/coupled.c                         |  2 +-
 drivers/firewire/ohci.c                           |  2 +-
 drivers/gpu/drm/drm_irq.c                         | 10 ++--
 drivers/gpu/drm/i915/i915_irq.c                   |  2 +-
 drivers/md/bcache/bcache.h                        |  2 +-
 drivers/md/bcache/closure.h                       |  2 +-
 drivers/md/dm-bufio.c                             |  8 ++--
 drivers/md/dm-snap.c                              |  4 +-
 drivers/md/dm.c                                   |  2 +-
 drivers/md/raid5.c                                |  2 +-
 drivers/media/usb/dvb-usb-v2/dvb_usb_core.c       |  6 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c   |  6 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 18 ++++----
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c    | 26 +++++------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  8 ++--
 drivers/net/ethernet/broadcom/cnic.c              |  8 ++--
 drivers/net/ethernet/brocade/bna/bnad.c           |  6 +--
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c         |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/sge.c          |  6 +--
 drivers/net/ethernet/chelsio/cxgb4/sge.c          |  2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c        |  2 +-
 drivers/net/ethernet/freescale/gianfar.c          |  8 ++--
 drivers/net/ethernet/intel/i40e/i40e_main.c       |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  8 ++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  6 +--
 drivers/net/wireless/ti/wlcore/main.c             |  2 +-
 drivers/pci/xen-pcifront.c                        |  4 +-
 drivers/scsi/isci/remote_device.c                 |  2 +-
 drivers/target/loopback/tcm_loop.c                |  4 +-
 drivers/target/target_core_alua.c                 | 26 +++++------
 drivers/target/target_core_device.c               |  6 +--
 drivers/target/target_core_iblock.c               |  2 +-
 drivers/target/target_core_pr.c                   | 56 +++++++++++------------
 drivers/target/target_core_transport.c            | 16 +++----
 drivers/target/target_core_ua.c                   | 10 ++--
 drivers/tty/n_tty.c                               |  2 +-
 drivers/tty/serial/mxs-auart.c                    |  4 +-
 drivers/usb/gadget/tcm_usb_gadget.c               |  4 +-
 drivers/usb/serial/usb_wwan.c                     |  2 +-
 drivers/vhost/scsi.c                              |  2 +-
 drivers/w1/w1_family.c                            |  4 +-
 drivers/xen/xen-pciback/pciback_ops.c             |  4 +-
 fs/btrfs/btrfs_inode.h                            |  2 +-
 fs/btrfs/extent_io.c                              |  2 +-
 fs/btrfs/inode.c                                  |  6 +--
 fs/btrfs/ioctl.c                                  |  2 +-
 fs/buffer.c                                       |  2 +-
 fs/ext4/resize.c                                  |  2 +-
 fs/gfs2/glock.c                                   |  8 ++--
 fs/gfs2/glops.c                                   |  2 +-
 fs/gfs2/lock_dlm.c                                |  4 +-
 fs/gfs2/recovery.c                                |  2 +-
 fs/gfs2/sys.c                                     |  4 +-
 fs/jbd2/commit.c                                  |  6 +--
 fs/nfs/dir.c                                      | 12 ++---
 fs/nfs/inode.c                                    |  2 +-
 fs/nfs/nfs4filelayoutdev.c                        |  4 +-
 fs/nfs/nfs4state.c                                |  4 +-
 fs/nfs/pagelist.c                                 |  6 +--
 fs/nfs/pnfs.c                                     |  2 +-
 fs/nfs/pnfs.h                                     |  2 +-
 fs/nfs/write.c                                    |  4 +-
 fs/ubifs/lpt_commit.c                             |  4 +-
 fs/ubifs/tnc_commit.c                             |  4 +-
 include/asm-generic/bitops/atomic.h               |  2 +-
 include/asm-generic/bitops/lock.h                 |  2 +-
 include/linux/buffer_head.h                       |  2 +-
 include/linux/genhd.h                             |  2 +-
 include/linux/interrupt.h                         |  8 ++--
 include/linux/netdevice.h                         |  2 +-
 include/linux/sched.h                             |  6 +--
 include/linux/sunrpc/sched.h                      |  8 ++--
 include/linux/sunrpc/xprt.h                       |  8 ++--
 include/linux/tracehook.h                         |  2 +-
 include/net/ip_vs.h                               |  4 +-
 kernel/debug/debug_core.c                         |  4 +-
 kernel/futex.c                                    |  4 +-
 kernel/kmod.c                                     |  2 +-
 kernel/rcu/tree.c                                 | 22 ++++-----
 kernel/rcu/tree_plugin.h                          |  8 ++--
 kernel/sched/cpupri.c                             |  6 +--
 kernel/sched/wait.c                               |  2 +-
 mm/backing-dev.c                                  |  2 +-
 mm/filemap.c                                      |  4 +-
 net/atm/pppoatm.c                                 |  2 +-
 net/bluetooth/hci_event.c                         |  4 +-
 net/core/dev.c                                    |  8 ++--
 net/core/link_watch.c                             |  2 +-
 net/ipv4/inetpeer.c                               |  2 +-
 net/ipv4/tcp_output.c                             |  4 +-
 net/netfilter/nf_conntrack_core.c                 |  2 +-
 net/rds/ib_recv.c                                 |  4 +-
 net/rds/iw_recv.c                                 |  4 +-
 net/rds/send.c                                    |  6 +--
 net/rds/tcp_send.c                                |  2 +-
 net/sunrpc/auth.c                                 |  2 +-
 net/sunrpc/auth_gss/auth_gss.c                    |  2 +-
 net/sunrpc/backchannel_rqst.c                     |  4 +-
 net/sunrpc/xprt.c                                 |  4 +-
 net/sunrpc/xprtsock.c                             | 16 +++----
 net/unix/af_unix.c                                |  2 +-
 sound/pci/bt87x.c                                 |  4 +-
 106 files changed, 284 insertions(+), 288 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
index c11d24e379e2..f8c6a11b13f0 100644
--- a/block/blk-iopoll.c
+++ b/block/blk-iopoll.c
@@ -49,7 +49,7 @@ EXPORT_SYMBOL(blk_iopoll_sched);
 void __blk_iopoll_complete(struct blk_iopoll *iop)
 {
 	list_del(&iop->list);
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
 }
 EXPORT_SYMBOL(__blk_iopoll_complete);
@@ -161,7 +161,7 @@ EXPORT_SYMBOL(blk_iopoll_disable);
 void blk_iopoll_enable(struct blk_iopoll *iop)
 {
 	BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
 }
 EXPORT_SYMBOL(blk_iopoll_enable);
diff --git a/crypto/chainiv.c b/crypto/chainiv.c
index 834d8dd3d4fc..9c294c8f9a07 100644
--- a/crypto/chainiv.c
+++ b/crypto/chainiv.c
@@ -126,7 +126,7 @@ static int async_chainiv_schedule_work(struct async_chainiv_ctx *ctx)
 	int err = ctx->err;
 
 	if (!ctx->queue.qlen) {
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(CHAINIV_STATE_INUSE, &ctx->state);
 
 		if (!ctx->queue.qlen ||
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index ae098a261fcd..eee55c1e5fde 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -105,7 +105,7 @@ static bool genpd_sd_counter_dec(struct generic_pm_domain *genpd)
 static void genpd_sd_counter_inc(struct generic_pm_domain *genpd)
 {
 	atomic_inc(&genpd->sd_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 static void genpd_acquire_lock(struct generic_pm_domain *genpd)
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 59c5abe32f06..4fd8d6c1c3d2 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -224,9 +224,9 @@ static int get_slot(struct mtip_port *port)
  */
 static inline void release_slot(struct mtip_port *port, int tag)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(tag, port->allocated);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 /*
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index cb6654bfad77..73fe2f8d7f96 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -159,7 +159,7 @@ void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
 {
 	int n = dev->coupled->online_count;
 
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(a);
 
 	while (atomic_read(a) < n)
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 8db663219560..995dd42a2627 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -3498,7 +3498,7 @@ static int ohci_flush_iso_completions(struct fw_iso_context *base)
 		}
 
 		clear_bit_unlock(0, &ctx->flushing_completions);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 	}
 
 	tasklet_enable(&ctx->context.tasklet);
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index c2676b5908d9..ec5c3f4cdd01 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -156,7 +156,7 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 	 */
 	if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) {
 		atomic_inc(&dev->vblank[crtc].count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 	}
 
 	/* Invalidate all timestamps while vblank irq's are off. */
@@ -864,9 +864,9 @@ static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 		vblanktimestamp(dev, crtc, tslot) = t_vblank;
 	}
 
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_add(diff, &dev->vblank[crtc].count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 /**
@@ -1330,9 +1330,9 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
 		/* Increment cooked vblank count. This also atomically commits
 		 * the timestamp computed above.
 		 */
-		smp_mb__before_atomic_inc();
+		smp_mb__before_atomic();
 		atomic_inc(&dev->vblank[crtc].count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 	} else {
 		DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n",
 			  crtc, (int) diff_ns);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7753249b3a95..5409bfafff63 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2147,7 +2147,7 @@ static void i915_error_work_func(struct work_struct *work)
 			 * updates before
 			 * the counter increment.
 			 */
-			smp_mb__before_atomic_inc();
+			smp_mb__before_atomic();
 			atomic_inc(&dev_priv->gpu_error.reset_counter);
 
 			kobject_uevent_env(&dev->primary->kdev->kobj,
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 82c9c5d35251..d2ebcf323094 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -828,7 +828,7 @@ static inline bool cached_dev_get(struct cached_dev *dc)
 		return false;
 
 	/* Paired with the mb in cached_dev_attach */
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	return true;
 }
 
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 7ef7461912be..a08e3eeac3c5 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -243,7 +243,7 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
 	cl->fn = fn;
 	cl->wq = wq;
 	/* between atomic_dec() in closure_put() */
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 }
 
 static inline void closure_queue(struct closure *cl)
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 66c5d130c8c2..4e84095833db 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -607,9 +607,9 @@ static void write_endio(struct bio *bio, int error)
 
 	BUG_ON(!test_bit(B_WRITING, &b->state));
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(B_WRITING, &b->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	wake_up_bit(&b->state, B_WRITING);
 }
@@ -997,9 +997,9 @@ static void read_endio(struct bio *bio, int error)
 
 	BUG_ON(!test_bit(B_READING, &b->state));
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(B_READING, &b->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	wake_up_bit(&b->state, B_READING);
 }
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ebddef5237e4..8e0caed0bf74 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -642,7 +642,7 @@ static void free_pending_exception(struct dm_snap_pending_exception *pe)
 	struct dm_snapshot *s = pe->snap;
 
 	mempool_free(pe, s->pending_pool);
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&s->pending_exceptions_count);
 }
 
@@ -783,7 +783,7 @@ static int init_hash_tables(struct dm_snapshot *s)
 static void merge_shutdown(struct dm_snapshot *s)
 {
 	clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&s->state_bits, RUNNING_MERGE);
 }
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 455e64916498..2db768e4553f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2447,7 +2447,7 @@ static void dm_wq_work(struct work_struct *work)
 static void dm_queue_flush(struct mapped_device *md)
 {
 	clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	queue_work(md->wq, &md->work);
 }
 
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ad1b9bea446e..2afef4ec9312 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4400,7 +4400,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
 			 * STRIPE_ON_UNPLUG_LIST clear but the stripe
 			 * is still in our list
 			 */
-			smp_mb__before_clear_bit();
+			smp_mb__before_atomic();
 			clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
 			/*
 			 * STRIPE_ON_RELEASE_LIST could be set here. In that
diff --git a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c
index de02db802ace..e35580618936 100644
--- a/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c
+++ b/drivers/media/usb/dvb-usb-v2/dvb_usb_core.c
@@ -399,7 +399,7 @@ static int dvb_usb_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
 
 	/* clear 'streaming' status bit */
 	clear_bit(ADAP_STREAMING, &adap->state_bits);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&adap->state_bits, ADAP_STREAMING);
 skip_feed_stop:
 
@@ -550,7 +550,7 @@ static int dvb_usb_fe_init(struct dvb_frontend *fe)
 err:
 	if (!adap->suspend_resume_active) {
 		clear_bit(ADAP_INIT, &adap->state_bits);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		wake_up_bit(&adap->state_bits, ADAP_INIT);
 	}
 
@@ -591,7 +591,7 @@ err:
 	if (!adap->suspend_resume_active) {
 		adap->active_fe = -1;
 		clear_bit(ADAP_SLEEP, &adap->state_bits);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		wake_up_bit(&adap->state_bits, ADAP_SLEEP);
 	}
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 9261d5313b5b..dd57c7c5a3da 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2781,7 +2781,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
 
 	case LOAD_OPEN:
 		netif_tx_start_all_queues(bp->dev);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		break;
 
 	case LOAD_DIAG:
@@ -4939,9 +4939,9 @@ void bnx2x_update_coalesce_sb_index(struct bnx2x *bp, u8 fw_sb_id,
 void bnx2x_schedule_sp_rtnl(struct bnx2x *bp, enum sp_rtnl_flag flag,
 			    u32 verbose)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	set_bit(flag, &bp->sp_rtnl_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	DP((BNX2X_MSG_SP | verbose), "Scheduling sp_rtnl task [Flag: %d]\n",
 	   flag);
 	schedule_delayed_work(&bp->sp_rtnl_task, 0);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index a78edaccceee..16391db2e8c9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1858,10 +1858,10 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 		return;
 #endif
 
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(&bp->cq_spq_left);
 	/* push the change in bp->spq_left and towards the memory */
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 
 	DP(BNX2X_MSG_SP, "bp->cq_spq_left %x\n", atomic_read(&bp->cq_spq_left));
 
@@ -1876,11 +1876,11 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe)
 		 * sp_state is cleared, and this order prevents
 		 * races
 		 */
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		set_bit(BNX2X_AFEX_PENDING_VIFSET_MCP_ACK, &bp->sp_state);
 		wmb();
 		clear_bit(BNX2X_AFEX_FCOE_Q_UPDATE_PENDING, &bp->sp_state);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 		/* schedule the sp task as mcp ack is required */
 		bnx2x_schedule_sp_task(bp);
@@ -5272,9 +5272,9 @@ static void bnx2x_after_function_update(struct bnx2x *bp)
 		__clear_bit(RAMROD_COMP_WAIT, &queue_params.ramrod_flags);
 
 		/* mark latest Q bit */
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		set_bit(BNX2X_AFEX_FCOE_Q_UPDATE_PENDING, &bp->sp_state);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 		/* send Q update ramrod for FCoE Q */
 		rc = bnx2x_queue_state_change(bp, &queue_params);
@@ -5500,7 +5500,7 @@ next_spqe:
 		spqe_cnt++;
 	} /* for */
 
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_add(spqe_cnt, &bp->eq_spq_left);
 
 	bp->eq_cons = sw_cons;
@@ -13869,9 +13869,9 @@ static int bnx2x_drv_ctl(struct net_device *dev, struct drv_ctl_info *ctl)
 	case DRV_CTL_RET_L2_SPQ_CREDIT_CMD: {
 		int count = ctl->data.credit.credit_count;
 
-		smp_mb__before_atomic_inc();
+		smp_mb__before_atomic();
 		atomic_add(count, &bp->cq_spq_left);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		break;
 	}
 	case DRV_CTL_ULP_REGISTER_CMD: {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 31297266b743..d725317c4277 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -258,16 +258,16 @@ static bool bnx2x_raw_check_pending(struct bnx2x_raw_obj *o)
 
 static void bnx2x_raw_clear_pending(struct bnx2x_raw_obj *o)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(o->state, o->pstate);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static void bnx2x_raw_set_pending(struct bnx2x_raw_obj *o)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	set_bit(o->state, o->pstate);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 /**
@@ -2131,7 +2131,7 @@ static int bnx2x_set_rx_mode_e1x(struct bnx2x *bp,
 
 	/* The operation is completed */
 	clear_bit(p->state, p->pstate);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return 0;
 }
@@ -3576,16 +3576,16 @@ error_exit1:
 
 static void bnx2x_mcast_clear_sched(struct bnx2x_mcast_obj *o)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(o->sched_state, o->raw.pstate);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static void bnx2x_mcast_set_sched(struct bnx2x_mcast_obj *o)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	set_bit(o->sched_state, o->raw.pstate);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static bool bnx2x_mcast_check_sched(struct bnx2x_mcast_obj *o)
@@ -4200,7 +4200,7 @@ int bnx2x_queue_state_change(struct bnx2x *bp,
 		if (rc) {
 			o->next_state = BNX2X_Q_STATE_MAX;
 			clear_bit(pending_bit, pending);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 			return rc;
 		}
 
@@ -4288,7 +4288,7 @@ static int bnx2x_queue_comp_cmd(struct bnx2x *bp,
 	wmb();
 
 	clear_bit(cmd, &o->pending);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return 0;
 }
@@ -5279,7 +5279,7 @@ static inline int bnx2x_func_state_change_comp(struct bnx2x *bp,
 	wmb();
 
 	clear_bit(cmd, &o->pending);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return 0;
 }
@@ -5926,7 +5926,7 @@ int bnx2x_func_state_change(struct bnx2x *bp,
 		if (rc) {
 			o->next_state = BNX2X_F_STATE_MAX;
 			clear_bit(cmd, pending);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 			return rc;
 		}
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 5c523b32db70..f82ac5ac2336 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1626,9 +1626,9 @@ static
 void bnx2x_vf_handle_filters_eqe(struct bnx2x *bp,
 				 struct bnx2x_virtf *vf)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(BNX2X_FILTER_RX_MODE_PENDING, &vf->filter_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static void bnx2x_vf_handle_rss_update_eqe(struct bnx2x *bp,
@@ -2960,9 +2960,9 @@ void bnx2x_iov_task(struct work_struct *work)
 
 void bnx2x_schedule_iov_task(struct bnx2x *bp, enum bnx2x_iov_flag flag)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	set_bit(flag, &bp->iov_task_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	DP(BNX2X_MSG_IOV, "Scheduling iov task [Flag: %d]\n", flag);
 	queue_delayed_work(bnx2x_iov_wq, &bp->iov_task, 0);
 }
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 09f3fefcbf9c..4dd48d2fa804 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -436,7 +436,7 @@ static int cnic_offld_prep(struct cnic_sock *csk)
 static int cnic_close_prep(struct cnic_sock *csk)
 {
 	clear_bit(SK_F_CONNECT_START, &csk->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	if (test_and_clear_bit(SK_F_OFFLD_COMPLETE, &csk->flags)) {
 		while (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
@@ -450,7 +450,7 @@ static int cnic_close_prep(struct cnic_sock *csk)
 static int cnic_abort_prep(struct cnic_sock *csk)
 {
 	clear_bit(SK_F_CONNECT_START, &csk->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	while (test_and_set_bit(SK_F_OFFLD_SCHED, &csk->flags))
 		msleep(1);
@@ -3646,7 +3646,7 @@ static int cnic_cm_destroy(struct cnic_sock *csk)
 
 	csk_hold(csk);
 	clear_bit(SK_F_INUSE, &csk->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	while (atomic_read(&csk->ref_count) != 1)
 		msleep(1);
 	cnic_cm_cleanup(csk);
@@ -4026,7 +4026,7 @@ static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe)
 			 L4_KCQE_COMPLETION_STATUS_PARITY_ERROR)
 			set_bit(SK_F_HW_ERR, &csk->flags);
 
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(SK_F_OFFLD_SCHED, &csk->flags);
 		cnic_cm_upcall(cp, csk, opcode);
 		break;
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 675550fe8ee9..3a77f9ead004 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -249,7 +249,7 @@ bnad_tx_complete(struct bnad *bnad, struct bna_tcb *tcb)
 	if (likely(test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags)))
 		bna_ib_ack(tcb->i_dbell, sent);
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
 
 	return sent;
@@ -1126,7 +1126,7 @@ bnad_tx_cleanup(struct delayed_work *work)
 
 		bnad_txq_cleanup(bnad, tcb);
 
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
 	}
 
@@ -2992,7 +2992,7 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 			sent = bnad_txcmpl_process(bnad, tcb);
 			if (likely(test_bit(BNAD_TXQ_TX_STARTED, &tcb->flags)))
 				bna_ib_ack(tcb->i_dbell, sent);
-			smp_mb__before_clear_bit();
+			smp_mb__before_atomic();
 			clear_bit(BNAD_TXQ_FREE_SENT, &tcb->flags);
 		} else {
 			netif_stop_queue(netdev);
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 0fe7ff750d77..05613a85ce61 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -281,7 +281,7 @@ static int cxgb_close(struct net_device *dev)
 	if (adapter->params.stats_update_period &&
 	    !(adapter->open_device_map & PORT_MASK)) {
 		/* Stop statistics accumulation. */
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		spin_lock(&adapter->work_lock);   /* sync with update task */
 		spin_unlock(&adapter->work_lock);
 		cancel_mac_stats_update(adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 8b069f96e920..3dfcf600fcc6 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -1379,7 +1379,7 @@ static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
 		struct sge_qset *qs = txq_to_qset(q, qid);
 
 		set_bit(qid, &qs->txq_stopped);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 		if (should_restart_tx(q) &&
 		    test_and_clear_bit(qid, &qs->txq_stopped))
@@ -1492,7 +1492,7 @@ static void restart_ctrlq(unsigned long data)
 
 	if (!skb_queue_empty(&q->sendq)) {
 		set_bit(TXQ_CTRL, &qs->txq_stopped);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 		if (should_restart_tx(q) &&
 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
@@ -1697,7 +1697,7 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 
 		if (unlikely(q->size - q->in_use < ndesc)) {
 			set_bit(TXQ_OFLD, &qs->txq_stopped);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 
 			if (should_restart_tx(q) &&
 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ca95cf2954eb..e249528c8e60 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2031,7 +2031,7 @@ static void sge_rx_timer_cb(unsigned long data)
 			struct sge_fl *fl = s->egr_map[id];
 
 			clear_bit(id, s->starving_fl);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 
 			if (fl_starving(fl)) {
 				rxq = container_of(fl, struct sge_eth_rxq, fl);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 9cfa4b4bb089..9d88c1d50b49 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1951,7 +1951,7 @@ static void sge_rx_timer_cb(unsigned long data)
 			struct sge_fl *fl = s->egr_map[id];
 
 			clear_bit(id, s->starving_fl);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 
 			/*
 			 * Since we are accessing fl without a lock there's a
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 9125d9abf099..d82f092cae90 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1797,9 +1797,9 @@ void stop_gfar(struct net_device *dev)
 
 	netif_tx_stop_all_queues(dev);
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	set_bit(GFAR_DOWN, &priv->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	disable_napi(priv);
 
@@ -2042,9 +2042,9 @@ int startup_gfar(struct net_device *ndev)
 
 	gfar_init_tx_rx_base(priv);
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(GFAR_DOWN, &priv->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	/* Start Rx/Tx DMA and enable the interrupts */
 	gfar_start(priv);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 861b722c2672..1e526c072a44 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4671,7 +4671,7 @@ static void i40e_service_event_complete(struct i40e_pf *pf)
 	BUG_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state));
 
 	/* flush memory to make sure state is correct before next watchog */
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__I40E_SERVICE_SCHED, &pf->state);
 }
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index c4c526b7f99f..2fecc2626de5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -376,7 +376,7 @@ static void ixgbe_service_event_complete(struct ixgbe_adapter *adapter)
 	BUG_ON(!test_bit(__IXGBE_SERVICE_SCHED, &adapter->state));
 
 	/* flush memory to make sure state is correct before next watchdog */
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
 }
 
@@ -4671,7 +4671,7 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
 	if (hw->mac.ops.enable_tx_laser)
 		hw->mac.ops.enable_tx_laser(hw);
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__IXGBE_DOWN, &adapter->state);
 	ixgbe_napi_enable_all(adapter);
 
@@ -5567,7 +5567,7 @@ static int ixgbe_resume(struct pci_dev *pdev)
 		e_dev_err("Cannot enable PCI device from suspend\n");
 		return err;
 	}
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__IXGBE_DISABLED, &adapter->state);
 	pci_set_master(pdev);
 
@@ -8541,7 +8541,7 @@ static pci_ers_result_t ixgbe_io_slot_reset(struct pci_dev *pdev)
 		e_err(probe, "Cannot re-enable PCI device after reset.\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
 	} else {
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(__IXGBE_DISABLED, &adapter->state);
 		adapter->hw.hw_addr = adapter->io_addr;
 		pci_set_master(pdev);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index d0799e8e31e4..de2793b06305 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1668,7 +1668,7 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 
 	spin_unlock_bh(&adapter->mbx_lock);
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__IXGBEVF_DOWN, &adapter->state);
 	ixgbevf_napi_enable_all(adapter);
 
@@ -3354,7 +3354,7 @@ static int ixgbevf_resume(struct pci_dev *pdev)
 		dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
 		return err;
 	}
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__IXGBEVF_DISABLED, &adapter->state);
 	pci_set_master(pdev);
 
@@ -3712,7 +3712,7 @@ static pci_ers_result_t ixgbevf_io_slot_reset(struct pci_dev *pdev)
 		return PCI_ERS_RESULT_DISCONNECT;
 	}
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(__IXGBEVF_DISABLED, &adapter->state);
 	pci_set_master(pdev);
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index ed88d3913483..e71eae353368 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -543,7 +543,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 		 * wl1271_ps_elp_wakeup cannot be called concurrently.
 		 */
 		clear_bit(WL1271_FLAG_IRQ_RUNNING, &wl->flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 		ret = wlcore_fw_status(wl, wl->fw_status);
 		if (ret < 0)
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 179b8edc2262..53df39a22c8a 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -662,9 +662,9 @@ static void pcifront_do_aer(struct work_struct *data)
 	notify_remote_via_evtchn(pdev->evtchn);
 
 	/*in case of we lost an aer request in four lines time_window*/
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(_PDEVB_op_active, &pdev->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	schedule_pcifront_aer_op(pdev);
 
diff --git a/drivers/scsi/isci/remote_device.c b/drivers/scsi/isci/remote_device.c
index 96a26f454673..cc51f38b116d 100644
--- a/drivers/scsi/isci/remote_device.c
+++ b/drivers/scsi/isci/remote_device.c
@@ -1541,7 +1541,7 @@ void isci_remote_device_release(struct kref *kref)
 	clear_bit(IDEV_STOP_PENDING, &idev->flags);
 	clear_bit(IDEV_IO_READY, &idev->flags);
 	clear_bit(IDEV_GONE, &idev->flags);
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(IDEV_ALLOCATED, &idev->flags);
 	wake_up(&ihost->eventq);
 }
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index c886ad1c39fb..73ab75ddaf42 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -951,7 +951,7 @@ static int tcm_loop_port_link(
 	struct tcm_loop_hba *tl_hba = tl_tpg->tl_hba;
 
 	atomic_inc(&tl_tpg->tl_tpg_port_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	/*
 	 * Add Linux/SCSI struct scsi_device by HCTL
 	 */
@@ -986,7 +986,7 @@ static void tcm_loop_port_unlink(
 	scsi_device_put(sd);
 
 	atomic_dec(&tl_tpg->tl_tpg_port_count);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 
 	pr_debug("TCM_Loop_ConfigFS: Port Unlink Successful\n");
 }
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index fcbe6125b73e..0b79b852f4b2 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -393,7 +393,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
 					continue;
 
 				atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-				smp_mb__after_atomic_inc();
+				smp_mb__after_atomic();
 
 				spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
@@ -404,7 +404,7 @@ target_emulate_set_target_port_groups(struct se_cmd *cmd)
 
 				spin_lock(&dev->t10_alua.tg_pt_gps_lock);
 				atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				break;
 			}
 			spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
@@ -990,7 +990,7 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 		 * TARGET PORT GROUPS command
 		 */
 		atomic_inc(&mem->tg_pt_gp_mem_ref_cnt);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 
 		spin_lock_bh(&port->sep_alua_lock);
@@ -1020,7 +1020,7 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 
 		spin_lock(&tg_pt_gp->tg_pt_gp_lock);
 		atomic_dec(&mem->tg_pt_gp_mem_ref_cnt);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 	}
 	spin_unlock(&tg_pt_gp->tg_pt_gp_lock);
 	/*
@@ -1054,7 +1054,7 @@ static void core_alua_do_transition_tg_pt_work(struct work_struct *work)
 		core_alua_dump_state(tg_pt_gp->tg_pt_gp_alua_pending_state));
 	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
 	atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
 	if (tg_pt_gp->tg_pt_gp_transition_complete)
@@ -1116,7 +1116,7 @@ static int core_alua_do_transition_tg_pt(
 	 */
 	spin_lock(&dev->t10_alua.tg_pt_gps_lock);
 	atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 
 	if (!explicit && tg_pt_gp->tg_pt_gp_implicit_trans_secs) {
@@ -1159,7 +1159,7 @@ int core_alua_do_port_transition(
 	spin_lock(&local_lu_gp_mem->lu_gp_mem_lock);
 	lu_gp = local_lu_gp_mem->lu_gp;
 	atomic_inc(&lu_gp->lu_gp_ref_cnt);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	spin_unlock(&local_lu_gp_mem->lu_gp_mem_lock);
 	/*
 	 * For storage objects that are members of the 'default_lu_gp',
@@ -1176,7 +1176,7 @@ int core_alua_do_port_transition(
 		rc = core_alua_do_transition_tg_pt(l_tg_pt_gp,
 						   new_state, explicit);
 		atomic_dec(&lu_gp->lu_gp_ref_cnt);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		return rc;
 	}
 	/*
@@ -1190,7 +1190,7 @@ int core_alua_do_port_transition(
 
 		dev = lu_gp_mem->lu_gp_mem_dev;
 		atomic_inc(&lu_gp_mem->lu_gp_mem_ref_cnt);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock(&lu_gp->lu_gp_lock);
 
 		spin_lock(&dev->t10_alua.tg_pt_gps_lock);
@@ -1219,7 +1219,7 @@ int core_alua_do_port_transition(
 				tg_pt_gp->tg_pt_gp_alua_nacl = NULL;
 			}
 			atomic_inc(&tg_pt_gp->tg_pt_gp_ref_cnt);
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 			spin_unlock(&dev->t10_alua.tg_pt_gps_lock);
 			/*
 			 * core_alua_do_transition_tg_pt() will always return
@@ -1230,7 +1230,7 @@ int core_alua_do_port_transition(
 
 			spin_lock(&dev->t10_alua.tg_pt_gps_lock);
 			atomic_dec(&tg_pt_gp->tg_pt_gp_ref_cnt);
-			smp_mb__after_atomic_dec();
+			smp_mb__after_atomic();
 			if (rc)
 				break;
 		}
@@ -1238,7 +1238,7 @@ int core_alua_do_port_transition(
 
 		spin_lock(&lu_gp->lu_gp_lock);
 		atomic_dec(&lu_gp_mem->lu_gp_mem_ref_cnt);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 	}
 	spin_unlock(&lu_gp->lu_gp_lock);
 
@@ -1252,7 +1252,7 @@ int core_alua_do_port_transition(
 	}
 
 	atomic_dec(&lu_gp->lu_gp_ref_cnt);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 	return rc;
 }
 
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index 65001e133670..72618776ede4 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -225,7 +225,7 @@ struct se_dev_entry *core_get_se_deve_from_rtpi(
 			continue;
 
 		atomic_inc(&deve->pr_ref_count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock_irq(&nacl->device_list_lock);
 
 		return deve;
@@ -1392,7 +1392,7 @@ int core_dev_add_initiator_node_lun_acl(
 	spin_lock(&lun->lun_acl_lock);
 	list_add_tail(&lacl->lacl_list, &lun->lun_acl_list);
 	atomic_inc(&lun->lun_acl_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	spin_unlock(&lun->lun_acl_lock);
 
 	pr_debug("%s_TPG[%hu]_LUN[%u->%u] - Added %s ACL for "
@@ -1426,7 +1426,7 @@ int core_dev_del_initiator_node_lun_acl(
 	spin_lock(&lun->lun_acl_lock);
 	list_del(&lacl->lacl_list);
 	atomic_dec(&lun->lun_acl_count);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 	spin_unlock(&lun->lun_acl_lock);
 
 	core_disable_device_list_for_node(lun, NULL, lacl->mapped_lun,
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 9e0232cca92e..7e6b857c6b3f 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -323,7 +323,7 @@ static void iblock_bio_done(struct bio *bio, int err)
 		 * Bump the ib_bio_err_cnt and release bio.
 		 */
 		atomic_inc(&ibr->ib_bio_err_cnt);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 	}
 
 	bio_put(bio);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 3013287a2aaa..df357862286e 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -675,7 +675,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
 	spin_lock(&dev->se_port_lock);
 	list_for_each_entry_safe(port, port_tmp, &dev->dev_sep_list, sep_list) {
 		atomic_inc(&port->sep_tg_pt_ref_cnt);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock(&dev->se_port_lock);
 
 		spin_lock_bh(&port->sep_alua_lock);
@@ -710,7 +710,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
 				continue;
 
 			atomic_inc(&deve_tmp->pr_ref_count);
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 			spin_unlock_bh(&port->sep_alua_lock);
 			/*
 			 * Grab a configfs group dependency that is released
@@ -723,9 +723,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
 				pr_err("core_scsi3_lunacl_depend"
 						"_item() failed\n");
 				atomic_dec(&port->sep_tg_pt_ref_cnt);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				atomic_dec(&deve_tmp->pr_ref_count);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				goto out;
 			}
 			/*
@@ -740,9 +740,9 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
 						sa_res_key, all_tg_pt, aptpl);
 			if (!pr_reg_atp) {
 				atomic_dec(&port->sep_tg_pt_ref_cnt);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				atomic_dec(&deve_tmp->pr_ref_count);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				core_scsi3_lunacl_undepend_item(deve_tmp);
 				goto out;
 			}
@@ -755,7 +755,7 @@ static struct t10_pr_registration *__core_scsi3_alloc_registration(
 
 		spin_lock(&dev->se_port_lock);
 		atomic_dec(&port->sep_tg_pt_ref_cnt);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 	}
 	spin_unlock(&dev->se_port_lock);
 
@@ -1110,7 +1110,7 @@ static struct t10_pr_registration *__core_scsi3_locate_pr_reg(
 					continue;
 			}
 			atomic_inc(&pr_reg->pr_res_holders);
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 			spin_unlock(&pr_tmpl->registration_lock);
 			return pr_reg;
 		}
@@ -1125,7 +1125,7 @@ static struct t10_pr_registration *__core_scsi3_locate_pr_reg(
 			continue;
 
 		atomic_inc(&pr_reg->pr_res_holders);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock(&pr_tmpl->registration_lock);
 		return pr_reg;
 	}
@@ -1155,7 +1155,7 @@ static struct t10_pr_registration *core_scsi3_locate_pr_reg(
 static void core_scsi3_put_pr_reg(struct t10_pr_registration *pr_reg)
 {
 	atomic_dec(&pr_reg->pr_res_holders);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 }
 
 static int core_scsi3_check_implicit_release(
@@ -1349,7 +1349,7 @@ static void core_scsi3_tpg_undepend_item(struct se_portal_group *tpg)
 			&tpg->tpg_group.cg_item);
 
 	atomic_dec(&tpg->tpg_pr_ref_count);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 }
 
 static int core_scsi3_nodeacl_depend_item(struct se_node_acl *nacl)
@@ -1369,7 +1369,7 @@ static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl)
 
 	if (nacl->dynamic_node_acl) {
 		atomic_dec(&nacl->acl_pr_ref_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		return;
 	}
 
@@ -1377,7 +1377,7 @@ static void core_scsi3_nodeacl_undepend_item(struct se_node_acl *nacl)
 			&nacl->acl_group.cg_item);
 
 	atomic_dec(&nacl->acl_pr_ref_count);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 }
 
 static int core_scsi3_lunacl_depend_item(struct se_dev_entry *se_deve)
@@ -1408,7 +1408,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
 	 */
 	if (!lun_acl) {
 		atomic_dec(&se_deve->pr_ref_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		return;
 	}
 	nacl = lun_acl->se_lun_nacl;
@@ -1418,7 +1418,7 @@ static void core_scsi3_lunacl_undepend_item(struct se_dev_entry *se_deve)
 			&lun_acl->se_lun_group.cg_item);
 
 	atomic_dec(&se_deve->pr_ref_count);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 }
 
 static sense_reason_t
@@ -1552,14 +1552,14 @@ core_scsi3_decode_spec_i_port(
 				continue;
 
 			atomic_inc(&tmp_tpg->tpg_pr_ref_count);
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 			spin_unlock(&dev->se_port_lock);
 
 			if (core_scsi3_tpg_depend_item(tmp_tpg)) {
 				pr_err(" core_scsi3_tpg_depend_item()"
 					" for tmp_tpg\n");
 				atomic_dec(&tmp_tpg->tpg_pr_ref_count);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 				goto out_unmap;
 			}
@@ -1573,7 +1573,7 @@ core_scsi3_decode_spec_i_port(
 						tmp_tpg, i_str);
 			if (dest_node_acl) {
 				atomic_inc(&dest_node_acl->acl_pr_ref_count);
-				smp_mb__after_atomic_inc();
+				smp_mb__after_atomic();
 			}
 			spin_unlock_irq(&tmp_tpg->acl_node_lock);
 
@@ -1587,7 +1587,7 @@ core_scsi3_decode_spec_i_port(
 				pr_err("configfs_depend_item() failed"
 					" for dest_node_acl->acl_group\n");
 				atomic_dec(&dest_node_acl->acl_pr_ref_count);
-				smp_mb__after_atomic_dec();
+				smp_mb__after_atomic();
 				core_scsi3_tpg_undepend_item(tmp_tpg);
 				ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 				goto out_unmap;
@@ -1647,7 +1647,7 @@ core_scsi3_decode_spec_i_port(
 			pr_err("core_scsi3_lunacl_depend_item()"
 					" failed\n");
 			atomic_dec(&dest_se_deve->pr_ref_count);
-			smp_mb__after_atomic_dec();
+			smp_mb__after_atomic();
 			core_scsi3_nodeacl_undepend_item(dest_node_acl);
 			core_scsi3_tpg_undepend_item(dest_tpg);
 			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
@@ -3168,14 +3168,14 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key,
 			continue;
 
 		atomic_inc(&dest_se_tpg->tpg_pr_ref_count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock(&dev->se_port_lock);
 
 		if (core_scsi3_tpg_depend_item(dest_se_tpg)) {
 			pr_err("core_scsi3_tpg_depend_item() failed"
 				" for dest_se_tpg\n");
 			atomic_dec(&dest_se_tpg->tpg_pr_ref_count);
-			smp_mb__after_atomic_dec();
+			smp_mb__after_atomic();
 			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 			goto out_put_pr_reg;
 		}
@@ -3273,7 +3273,7 @@ after_iport_check:
 				initiator_str);
 	if (dest_node_acl) {
 		atomic_inc(&dest_node_acl->acl_pr_ref_count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 	}
 	spin_unlock_irq(&dest_se_tpg->acl_node_lock);
 
@@ -3289,7 +3289,7 @@ after_iport_check:
 		pr_err("core_scsi3_nodeacl_depend_item() for"
 			" dest_node_acl\n");
 		atomic_dec(&dest_node_acl->acl_pr_ref_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		dest_node_acl = NULL;
 		ret = TCM_INVALID_PARAMETER_LIST;
 		goto out;
@@ -3314,7 +3314,7 @@ after_iport_check:
 	if (core_scsi3_lunacl_depend_item(dest_se_deve)) {
 		pr_err("core_scsi3_lunacl_depend_item() failed\n");
 		atomic_dec(&dest_se_deve->pr_ref_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		dest_se_deve = NULL;
 		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		goto out;
@@ -3880,7 +3880,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 		add_desc_len = 0;
 
 		atomic_inc(&pr_reg->pr_res_holders);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		spin_unlock(&pr_tmpl->registration_lock);
 		/*
 		 * Determine expected length of $FABRIC_MOD specific
@@ -3894,7 +3894,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 				" out of buffer: %d\n", cmd->data_length);
 			spin_lock(&pr_tmpl->registration_lock);
 			atomic_dec(&pr_reg->pr_res_holders);
-			smp_mb__after_atomic_dec();
+			smp_mb__after_atomic();
 			break;
 		}
 		/*
@@ -3956,7 +3956,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
 
 		spin_lock(&pr_tmpl->registration_lock);
 		atomic_dec(&pr_reg->pr_res_holders);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		/*
 		 * Set the ADDITIONAL DESCRIPTOR LENGTH
 		 */
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index d4b98690a736..4badca1cd625 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -736,7 +736,7 @@ void target_qf_do_work(struct work_struct *work)
 	list_for_each_entry_safe(cmd, cmd_tmp, &qf_cmd_list, se_qf_node) {
 		list_del(&cmd->se_qf_node);
 		atomic_dec(&dev->dev_qf_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 
 		pr_debug("Processing %s cmd: %p QUEUE_FULL in work queue"
 			" context: %s\n", cmd->se_tfo->get_fabric_name(), cmd,
@@ -1148,7 +1148,7 @@ transport_check_alloc_task_attr(struct se_cmd *cmd)
 	 * Dormant to Active status.
 	 */
 	cmd->se_ordered_id = atomic_inc_return(&dev->dev_ordered_id);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	pr_debug("Allocated se_ordered_id: %u for Task Attr: 0x%02x on %s\n",
 			cmd->se_ordered_id, cmd->sam_task_attr,
 			dev->transport->name);
@@ -1705,7 +1705,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
 		return false;
 	case MSG_ORDERED_TAG:
 		atomic_inc(&dev->dev_ordered_sync);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 
 		pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, "
 			 " se_ordered_id: %u\n",
@@ -1723,7 +1723,7 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
 		 * For SIMPLE and UNTAGGED Task Attribute commands
 		 */
 		atomic_inc(&dev->simple_cmds);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		break;
 	}
 
@@ -1828,7 +1828,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 
 	if (cmd->sam_task_attr == MSG_SIMPLE_TAG) {
 		atomic_dec(&dev->simple_cmds);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev->dev_cur_ordered_id: %u for"
 			" SIMPLE: %u\n", dev->dev_cur_ordered_id,
@@ -1840,7 +1840,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
 			cmd->se_ordered_id);
 	} else if (cmd->sam_task_attr == MSG_ORDERED_TAG) {
 		atomic_dec(&dev->dev_ordered_sync);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED:"
@@ -1899,7 +1899,7 @@ static void transport_handle_queue_full(
 	spin_lock_irq(&dev->qf_cmd_lock);
 	list_add_tail(&cmd->se_qf_node, &cmd->se_dev->qf_cmd_list);
 	atomic_inc(&dev->dev_qf_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	spin_unlock_irq(&cmd->se_dev->qf_cmd_lock);
 
 	schedule_work(&cmd->se_dev->qf_work_queue);
@@ -2875,7 +2875,7 @@ void transport_send_task_abort(struct se_cmd *cmd)
 		if (cmd->se_tfo->write_pending_status(cmd) != 0) {
 			cmd->transport_state |= CMD_T_ABORTED;
 			cmd->se_cmd_flags |= SCF_SEND_DELAYED_TAS;
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 			return;
 		}
 	}
diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c
index 505519b10cb7..101858e245b3 100644
--- a/drivers/target/target_core_ua.c
+++ b/drivers/target/target_core_ua.c
@@ -162,7 +162,7 @@ int core_scsi3_ua_allocate(
 		spin_unlock_irq(&nacl->device_list_lock);
 
 		atomic_inc(&deve->ua_count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		return 0;
 	}
 	list_add_tail(&ua->ua_nacl_list, &deve->ua_list);
@@ -175,7 +175,7 @@ int core_scsi3_ua_allocate(
 		asc, ascq);
 
 	atomic_inc(&deve->ua_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	return 0;
 }
 
@@ -190,7 +190,7 @@ void core_scsi3_ua_release_all(
 		kmem_cache_free(se_ua_cache, ua);
 
 		atomic_dec(&deve->ua_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 	}
 	spin_unlock(&deve->ua_lock);
 }
@@ -251,7 +251,7 @@ void core_scsi3_ua_for_check_condition(
 		kmem_cache_free(se_ua_cache, ua);
 
 		atomic_dec(&deve->ua_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 	}
 	spin_unlock(&deve->ua_lock);
 	spin_unlock_irq(&nacl->device_list_lock);
@@ -310,7 +310,7 @@ int core_scsi3_ua_clear_for_request_sense(
 		kmem_cache_free(se_ua_cache, ua);
 
 		atomic_dec(&deve->ua_count);
-		smp_mb__after_atomic_dec();
+		smp_mb__after_atomic();
 	}
 	spin_unlock(&deve->ua_lock);
 	spin_unlock_irq(&nacl->device_list_lock);
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 41fe8a047d37..746ae80b972f 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2041,7 +2041,7 @@ static int canon_copy_from_read_buf(struct tty_struct *tty,
 
 	if (found)
 		clear_bit(eol, ldata->read_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	ldata->read_tail += c;
 
 	if (found) {
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index aa97fd845b4d..4b5b3c2fe328 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -200,7 +200,7 @@ static void dma_tx_callback(void *param)
 
 	/* clear the bit used to serialize the DMA tx. */
 	clear_bit(MXS_AUART_DMA_TX_SYNC, &s->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	/* wake up the possible processes. */
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
@@ -275,7 +275,7 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s)
 			mxs_auart_dma_tx(s, i);
 		} else {
 			clear_bit(MXS_AUART_DMA_TX_SYNC, &s->flags);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 		}
 		return;
 	}
diff --git a/drivers/usb/gadget/tcm_usb_gadget.c b/drivers/usb/gadget/tcm_usb_gadget.c
index f058c0368d61..819875c7e394 100644
--- a/drivers/usb/gadget/tcm_usb_gadget.c
+++ b/drivers/usb/gadget/tcm_usb_gadget.c
@@ -1851,7 +1851,7 @@ static int usbg_port_link(struct se_portal_group *se_tpg, struct se_lun *lun)
 	struct usbg_tpg *tpg = container_of(se_tpg, struct usbg_tpg, se_tpg);
 
 	atomic_inc(&tpg->tpg_port_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	return 0;
 }
 
@@ -1861,7 +1861,7 @@ static void usbg_port_unlink(struct se_portal_group *se_tpg,
 	struct usbg_tpg *tpg = container_of(se_tpg, struct usbg_tpg, se_tpg);
 
 	atomic_dec(&tpg->tpg_port_count);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 }
 
 static int usbg_check_stop_free(struct se_cmd *se_cmd)
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index 640fe0173236..f1ec1680e822 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -325,7 +325,7 @@ static void usb_wwan_outdat_callback(struct urb *urb)
 
 	for (i = 0; i < N_OUT_URB; ++i) {
 		if (portdata->out_urbs[i] == urb) {
-			smp_mb__before_clear_bit();
+			smp_mb__before_atomic();
 			clear_bit(i, &portdata->out_busy);
 			break;
 		}
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index cf50ce93975b..aeb513108448 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1255,7 +1255,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
 			tpg->tv_tpg_vhost_count++;
 			tpg->vhost_scsi = vs;
 			vs_tpg[tpg->tport_tpgt] = tpg;
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 			match = true;
 		}
 		mutex_unlock(&tpg->tv_tpg_mutex);
diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c
index 3bff6b37b472..3651ec801f45 100644
--- a/drivers/w1/w1_family.c
+++ b/drivers/w1/w1_family.c
@@ -139,9 +139,9 @@ void w1_family_get(struct w1_family *f)
 
 void __w1_family_get(struct w1_family *f)
 {
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(&f->refcnt);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 EXPORT_SYMBOL(w1_unregister_family);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 607e41460c0d..c4a0666de6f5 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -348,9 +348,9 @@ void xen_pcibk_do_op(struct work_struct *data)
 	notify_remote_via_irq(pdev->evtchn_irq);
 
 	/* Mark that we're done. */
-	smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
+	smp_mb__before_atomic(); /* /after/ clearing PCIF_active */
 	clear_bit(_PDEVF_op_active, &pdev->flags);
-	smp_mb__after_clear_bit(); /* /before/ final check for work */
+	smp_mb__after_atomic(); /* /before/ final check for work */
 
 	/* Check to see if the driver domain tried to start another request in
 	 * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index c9a24444ec9a..2256e9cceec5 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -279,7 +279,7 @@ static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
 
 static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
 		  &BTRFS_I(inode)->runtime_flags);
 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3955e475ceec..f29a54e454d4 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3458,7 +3458,7 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
 static void end_extent_buffer_writeback(struct extent_buffer *eb)
 {
 	clear_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&eb->bflags, EXTENT_BUFFER_WRITEBACK);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5f805bc944fa..5a3b8371772e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7126,7 +7126,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
 		 * before atomic variable goto zero, we must make sure
 		 * dip->errors is perceived to be set.
 		 */
-		smp_mb__before_atomic_dec();
+		smp_mb__before_atomic();
 	}
 
 	/* if there are more bios still pending for this dio, just exit */
@@ -7306,7 +7306,7 @@ out_err:
 	 * before atomic variable goto zero, we must
 	 * make sure dip->errors is perceived to be set.
 	 */
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	if (atomic_dec_and_test(&dip->pending_bios))
 		bio_io_error(dip->orig_bio);
 
@@ -7449,7 +7449,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 		return 0;
 
 	atomic_inc(&inode->i_dio_count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 
 	/*
 	 * The generic stuff only does filemap_write_and_wait_range, which
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e79ff6b90cb7..f45040a4bb76 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -642,7 +642,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 		return -EINVAL;
 
 	atomic_inc(&root->will_be_snapshoted);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	btrfs_wait_nocow_write(root);
 
 	ret = btrfs_start_delalloc_inodes(root, 0);
diff --git a/fs/buffer.c b/fs/buffer.c
index 9ddb9fc7d923..6a8110c03a47 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -77,7 +77,7 @@ EXPORT_SYMBOL(__lock_buffer);
 void unlock_buffer(struct buffer_head *bh)
 {
 	clear_bit_unlock(BH_Lock, &bh->b_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&bh->b_state, BH_Lock);
 }
 EXPORT_SYMBOL(unlock_buffer);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f3b84cd9de56..08b3c116915b 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -42,7 +42,7 @@ int ext4_resize_begin(struct super_block *sb)
 void ext4_resize_end(struct super_block *sb)
 {
 	clear_bit_unlock(EXT4_RESIZING, &EXT4_SB(sb)->s_resize_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index aec7f73832f0..c355f7320e44 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -277,7 +277,7 @@ static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holde
 static void gfs2_holder_wake(struct gfs2_holder *gh)
 {
 	clear_bit(HIF_WAIT, &gh->gh_iflags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
 }
 
@@ -411,7 +411,7 @@ static void gfs2_demote_wake(struct gfs2_glock *gl)
 {
 	gl->gl_demote_state = LM_ST_EXCLUSIVE;
 	clear_bit(GLF_DEMOTE, &gl->gl_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
 }
 
@@ -620,7 +620,7 @@ out:
 
 out_sched:
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	gl->gl_lockref.count++;
 	if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0)
 		gl->gl_lockref.count--;
@@ -628,7 +628,7 @@ out_sched:
 
 out_unlock:
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	return;
 }
 
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 54b66809e818..74d9a3dbf16f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -221,7 +221,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
 	 * Writeback of the data mapping may cause the dirty flag to be set
 	 * so we have to clear it again here.
 	 */
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(GLF_DIRTY, &gl->gl_flags);
 }
 
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index c1eb555dc588..91f274de1246 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1134,7 +1134,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
 		queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
 
 	clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
 	spin_unlock(&ls->ls_recover_spin);
 }
@@ -1271,7 +1271,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
 
 	ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
 	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
 	return 0;
 
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 7ad4094d68c0..fe7a56fb6084 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -587,7 +587,7 @@ fail:
 	gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP);
 done:
 	clear_bit(JDF_RECOVERY, &jd->jd_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&jd->jd_flags, JDF_RECOVERY);
 }
 
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index de25d5577e5d..529d9a9eb897 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -333,7 +333,7 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 		set_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
 	else if (val == 0) {
 		clear_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		gfs2_glock_thaw(sdp);
 	} else {
 		ret = -EINVAL;
@@ -482,7 +482,7 @@ static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 		rv = jid = -EINVAL;
 	sdp->sd_lockstruct.ls_jid = jid;
 	clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
 out:
 	spin_unlock(&sdp->sd_jindex_spin);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5f26139a165a..6fac74349856 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -43,7 +43,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
 		clear_buffer_uptodate(bh);
 	if (orig_bh) {
 		clear_bit_unlock(BH_Shadow, &orig_bh->b_state);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		wake_up_bit(&orig_bh->b_state, BH_Shadow);
 	}
 	unlock_buffer(bh);
@@ -239,7 +239,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
 		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
 	spin_unlock(&journal->j_list_lock);
@@ -277,7 +277,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 		}
 		spin_lock(&journal->j_list_lock);
 		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
 
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9f3d067cd15..4a3d4ef76127 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2032,9 +2032,9 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
 {
 	put_rpccred(entry->cred);
 	kfree(entry);
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_long_dec(&nfs_access_nr_entries);
-	smp_mb__after_atomic_dec();
+	smp_mb__after_atomic();
 }
 
 static void nfs_access_free_list(struct list_head *head)
@@ -2082,9 +2082,9 @@ nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
 		else {
 remove_lru_entry:
 			list_del_init(&nfsi->access_cache_inode_lru);
-			smp_mb__before_clear_bit();
+			smp_mb__before_atomic();
 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 		}
 		spin_unlock(&inode->i_lock);
 	}
@@ -2232,9 +2232,9 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
 	nfs_access_add_rbtree(inode, cache);
 
 	/* Update accounting */
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_long_inc(&nfs_access_nr_entries);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 
 	/* Add inode to global LRU list */
 	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0c438973f3c8..e6f7398d2b3c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1085,7 +1085,7 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	trace_nfs_invalidate_mapping_exit(inode, ret);
 
 	clear_bit_unlock(NFS_INO_INVALIDATING, bitlock);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(bitlock, NFS_INO_INVALIDATING);
 out:
 	return ret;
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index efac602edb37..b9c61efe9660 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -789,9 +789,9 @@ static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
 
 static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2349518eef2c..c0583b9bef71 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1140,9 +1140,9 @@ static int nfs4_run_state_manager(void *);
 
 static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
 	rpc_wake_up(&clp->cl_rpcwaitq);
 }
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 2ffebf2081ce..03ed984ab4d8 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -95,7 +95,7 @@ nfs_iocounter_dec(struct nfs_io_counter *c)
 {
 	if (atomic_dec_and_test(&c->io_count)) {
 		clear_bit(NFS_IO_INPROGRESS, &c->flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		wake_up_bit(&c->flags, NFS_IO_INPROGRESS);
 	}
 }
@@ -193,9 +193,9 @@ void nfs_unlock_request(struct nfs_page *req)
 		printk(KERN_ERR "NFS: Invalid unlock attempted\n");
 		BUG();
 	}
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(PG_BUSY, &req->wb_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&req->wb_flags, PG_BUSY);
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cb53d450ae32..fd9536e494bc 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1810,7 +1810,7 @@ static void pnfs_clear_layoutcommitting(struct inode *inode)
 	unsigned long *bitlock = &NFS_I(inode)->flags;
 
 	clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
 }
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 023793909778..c3058a076596 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -275,7 +275,7 @@ pnfs_get_lseg(struct pnfs_layout_segment *lseg)
 {
 	if (lseg) {
 		atomic_inc(&lseg->pls_refcount);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 	}
 	return lseg;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9a3b6a4cd6b9..ffb9459f180b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -405,7 +405,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	nfs_pageio_complete(&pgio);
 
 	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(bitlock, NFS_INO_FLUSHING);
 
 	if (err < 0)
@@ -1458,7 +1458,7 @@ static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
 {
 	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
 }
 
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 4b826abb1528..45d4e96a6bac 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -460,9 +460,9 @@ static int write_cnodes(struct ubifs_info *c)
 		 * important.
 		 */
 		clear_bit(DIRTY_CNODE, &cnode->flags);
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(COW_CNODE, &cnode->flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		offs += len;
 		dbg_chk_lpt_sz(c, 1, len);
 		cnode = cnode->cnext;
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 52a6559275c4..3600994f8411 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -895,9 +895,9 @@ static int write_index(struct ubifs_info *c)
 		 * the reason for the second barrier.
 		 */
 		clear_bit(DIRTY_ZNODE, &znode->flags);
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(COW_ZNODE, &znode->flags);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 		/*
 		 * We have marked the znode as clean but have not updated the
diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h
index 9ae6c34dc191..49673510b484 100644
--- a/include/asm-generic/bitops/atomic.h
+++ b/include/asm-generic/bitops/atomic.h
@@ -80,7 +80,7 @@ static inline void set_bit(int nr, volatile unsigned long *addr)
  *
  * clear_bit() is atomic and may not be reordered.  However, it does
  * not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
  * in order to ensure changes are visible on other processors.
  */
 static inline void clear_bit(int nr, volatile unsigned long *addr)
diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h
index 308a9e22c802..c30266e94806 100644
--- a/include/asm-generic/bitops/lock.h
+++ b/include/asm-generic/bitops/lock.h
@@ -20,7 +20,7 @@
  */
 #define clear_bit_unlock(nr, addr)	\
 do {					\
-	smp_mb__before_clear_bit();	\
+	smp_mb__before_atomic();	\
 	clear_bit(nr, addr);		\
 } while (0)
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c40302f909ce..7cbf837a279c 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -278,7 +278,7 @@ static inline void get_bh(struct buffer_head *bh)
 
 static inline void put_bh(struct buffer_head *bh)
 {
-        smp_mb__before_atomic_dec();
+        smp_mb__before_atomic();
         atomic_dec(&bh->b_count);
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 9f3c275e053e..ec274e0f4ed2 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -649,7 +649,7 @@ static inline void hd_ref_init(struct hd_struct *part)
 static inline void hd_struct_get(struct hd_struct *part)
 {
 	atomic_inc(&part->ref);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 static inline int hd_struct_try_get(struct hd_struct *part)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c7bfac1c4a7b..157111043281 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -453,7 +453,7 @@ static inline int tasklet_trylock(struct tasklet_struct *t)
 
 static inline void tasklet_unlock(struct tasklet_struct *t)
 {
-	smp_mb__before_clear_bit(); 
+	smp_mb__before_atomic();
 	clear_bit(TASKLET_STATE_RUN, &(t)->state);
 }
 
@@ -501,7 +501,7 @@ static inline void tasklet_hi_schedule_first(struct tasklet_struct *t)
 static inline void tasklet_disable_nosync(struct tasklet_struct *t)
 {
 	atomic_inc(&t->count);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 static inline void tasklet_disable(struct tasklet_struct *t)
@@ -513,13 +513,13 @@ static inline void tasklet_disable(struct tasklet_struct *t)
 
 static inline void tasklet_enable(struct tasklet_struct *t)
 {
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&t->count);
 }
 
 static inline void tasklet_hi_enable(struct tasklet_struct *t)
 {
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&t->count);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ed3a3aa6604..616415a4fee4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -493,7 +493,7 @@ static inline void napi_disable(struct napi_struct *n)
 static inline void napi_enable(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25f54c79f757..010cde3b44cb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2782,10 +2782,8 @@ static inline bool __must_check current_set_polling_and_test(void)
 	/*
 	 * Polling state must be visible before we test NEED_RESCHED,
 	 * paired by resched_task()
-	 *
-	 * XXX: assumes set/clear bit are identical barrier wise.
 	 */
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return unlikely(tif_need_resched());
 }
@@ -2803,7 +2801,7 @@ static inline bool __must_check current_clr_polling_and_test(void)
 	 * Polling state must be visible before we test NEED_RESCHED,
 	 * paired by resched_task()
 	 */
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return unlikely(tif_need_resched());
 }
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 3a847de83fab..ad7dbe2cfecd 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -142,18 +142,18 @@ struct rpc_task_setup {
 				test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
 #define rpc_clear_running(t)	\
 	do { \
-		smp_mb__before_clear_bit(); \
+		smp_mb__before_atomic(); \
 		clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
-		smp_mb__after_clear_bit(); \
+		smp_mb__after_atomic(); \
 	} while (0)
 
 #define RPC_IS_QUEUED(t)	test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)
 #define rpc_set_queued(t)	set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)
 #define rpc_clear_queued(t)	\
 	do { \
-		smp_mb__before_clear_bit(); \
+		smp_mb__before_atomic(); \
 		clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
-		smp_mb__after_clear_bit(); \
+		smp_mb__after_atomic(); \
 	} while (0)
 
 #define RPC_IS_ACTIVATED(t)	test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3e5efb2b236e..3876f0f1dfd3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -379,9 +379,9 @@ static inline int xprt_test_and_clear_connected(struct rpc_xprt *xprt)
 
 static inline void xprt_clear_connecting(struct rpc_xprt *xprt)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(XPRT_CONNECTING, &xprt->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static inline int xprt_connecting(struct rpc_xprt *xprt)
@@ -411,9 +411,9 @@ static inline void xprt_clear_bound(struct rpc_xprt *xprt)
 
 static inline void xprt_clear_binding(struct rpc_xprt *xprt)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(XPRT_BINDING, &xprt->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 1e98b5530425..6f8ab7da27c4 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -191,7 +191,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 	 * pairs with task_work_add()->set_notify_resume() after
 	 * hlist_add_head(task->task_works);
 	 */
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	if (unlikely(current->task_works))
 		task_work_run();
 }
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 5679d927562b..624a8a54806d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -1204,7 +1204,7 @@ static inline bool __ip_vs_conn_get(struct ip_vs_conn *cp)
 /* put back the conn without restarting its timer */
 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp)
 {
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&cp->refcnt);
 }
 void ip_vs_conn_put(struct ip_vs_conn *cp);
@@ -1408,7 +1408,7 @@ static inline void ip_vs_dest_hold(struct ip_vs_dest *dest)
 
 static inline void ip_vs_dest_put(struct ip_vs_dest *dest)
 {
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&dest->refcnt);
 }
 
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 2956c8da1605..1adf62b39b96 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -534,7 +534,7 @@ return_normal:
 			kgdb_info[cpu].exception_state &=
 				~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
 			kgdb_info[cpu].enter_kgdb--;
-			smp_mb__before_atomic_dec();
+			smp_mb__before_atomic();
 			atomic_dec(&slaves_in_kgdb);
 			dbg_touch_watchdogs();
 			local_irq_restore(flags);
@@ -662,7 +662,7 @@ kgdb_restore:
 	kgdb_info[cpu].exception_state &=
 		~(DCPU_WANT_MASTER | DCPU_IS_SLAVE);
 	kgdb_info[cpu].enter_kgdb--;
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&masters_in_kgdb);
 	/* Free kgdb_active */
 	atomic_set(&kgdb_active, -1);
diff --git a/kernel/futex.c b/kernel/futex.c
index 5f589279e462..b991ec05b8f9 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -267,7 +267,7 @@ static inline void futex_get_mm(union futex_key *key)
 	 * get_futex_key() implies a full barrier. This is relied upon
 	 * as full barrier (B), see the ordering comment above.
 	 */
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 /*
@@ -280,7 +280,7 @@ static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
 	/*
 	 * Full barrier (A), see the ordering comment above.
 	 */
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 #endif
 }
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6b375af4958d..0ac67a5861c5 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -498,7 +498,7 @@ int __usermodehelper_disable(enum umh_disable_depth depth)
 static void helper_lock(void)
 {
 	atomic_inc(&running_helpers);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 }
 
 static void helper_unlock(void)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c47e300210a..88b4a1dcb58c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -387,9 +387,9 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
 	}
 	rcu_prepare_for_idle(smp_processor_id());
 	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-	smp_mb__before_atomic_inc();  /* See above. */
+	smp_mb__before_atomic();  /* See above. */
 	atomic_inc(&rdtp->dynticks);
-	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */
+	smp_mb__after_atomic();  /* Force ordering with next sojourn. */
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 
 	/*
@@ -507,10 +507,10 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
 			       int user)
 {
-	smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */
+	smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
 	atomic_inc(&rdtp->dynticks);
 	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-	smp_mb__after_atomic_inc();  /* See above. */
+	smp_mb__after_atomic();  /* See above. */
 	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
 	rcu_cleanup_after_idle(smp_processor_id());
 	trace_rcu_dyntick(TPS("End"), oldval, rdtp->dynticks_nesting);
@@ -635,10 +635,10 @@ void rcu_nmi_enter(void)
 	    (atomic_read(&rdtp->dynticks) & 0x1))
 		return;
 	rdtp->dynticks_nmi_nesting++;
-	smp_mb__before_atomic_inc();  /* Force delay from prior write. */
+	smp_mb__before_atomic();  /* Force delay from prior write. */
 	atomic_inc(&rdtp->dynticks);
 	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
-	smp_mb__after_atomic_inc();  /* See above. */
+	smp_mb__after_atomic();  /* See above. */
 	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
 }
 
@@ -657,9 +657,9 @@ void rcu_nmi_exit(void)
 	    --rdtp->dynticks_nmi_nesting != 0)
 		return;
 	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
-	smp_mb__before_atomic_inc();  /* See above. */
+	smp_mb__before_atomic();  /* See above. */
 	atomic_inc(&rdtp->dynticks);
-	smp_mb__after_atomic_inc();  /* Force delay to next write. */
+	smp_mb__after_atomic();  /* Force delay to next write. */
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
 }
 
@@ -2790,7 +2790,7 @@ void synchronize_sched_expedited(void)
 		s = atomic_long_read(&rsp->expedited_done);
 		if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
 			/* ensure test happens before caller kfree */
-			smp_mb__before_atomic_inc(); /* ^^^ */
+			smp_mb__before_atomic(); /* ^^^ */
 			atomic_long_inc(&rsp->expedited_workdone1);
 			return;
 		}
@@ -2808,7 +2808,7 @@ void synchronize_sched_expedited(void)
 		s = atomic_long_read(&rsp->expedited_done);
 		if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
 			/* ensure test happens before caller kfree */
-			smp_mb__before_atomic_inc(); /* ^^^ */
+			smp_mb__before_atomic(); /* ^^^ */
 			atomic_long_inc(&rsp->expedited_workdone2);
 			return;
 		}
@@ -2837,7 +2837,7 @@ void synchronize_sched_expedited(void)
 		s = atomic_long_read(&rsp->expedited_done);
 		if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
 			/* ensure test happens before caller kfree */
-			smp_mb__before_atomic_inc(); /* ^^^ */
+			smp_mb__before_atomic(); /* ^^^ */
 			atomic_long_inc(&rsp->expedited_done_lost);
 			break;
 		}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 962d1d589929..56db2f853e43 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2523,9 +2523,9 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
 	/* Record start of fully idle period. */
 	j = jiffies;
 	ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
 }
 
@@ -2590,9 +2590,9 @@ static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
 	}
 
 	/* Record end of idle period. */
-	smp_mb__before_atomic_inc();
+	smp_mb__before_atomic();
 	atomic_inc(&rdtp->dynticks_idle);
-	smp_mb__after_atomic_inc();
+	smp_mb__after_atomic();
 	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
 
 	/*
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 8b836b376d91..746bc9344969 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -165,7 +165,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 		 * do a write memory barrier, and then update the count, to
 		 * make sure the vector is visible when count is set.
 		 */
-		smp_mb__before_atomic_inc();
+		smp_mb__before_atomic();
 		atomic_inc(&(vec)->count);
 		do_mb = 1;
 	}
@@ -185,14 +185,14 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 		 * the new priority vec.
 		 */
 		if (do_mb)
-			smp_mb__after_atomic_inc();
+			smp_mb__after_atomic();
 
 		/*
 		 * When removing from the vector, we decrement the counter first
 		 * do a memory barrier and then clear the mask.
 		 */
 		atomic_dec(&(vec)->count);
-		smp_mb__after_atomic_inc();
+		smp_mb__after_atomic();
 		cpumask_clear_cpu(cpu, vec->mask);
 	}
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 7d50f794e248..0ffa20ae657b 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -394,7 +394,7 @@ EXPORT_SYMBOL(__wake_up_bit);
  *
  * In order for this to function properly, as it uses waitqueue_active()
  * internally, some kind of memory barrier must be done prior to calling
- * this. Typically, this will be smp_mb__after_clear_bit(), but in some
+ * this. Typically, this will be smp_mb__after_atomic(), but in some
  * cases where bitflags are manipulated non-atomically under a lock, one
  * may need to use a less regular barrier, such fs/inode.c's smp_mb(),
  * because spin_unlock() does not guarantee a memory barrier.
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 09d9591b7708..1706cbbdf5f0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -557,7 +557,7 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 	bit = sync ? BDI_sync_congested : BDI_async_congested;
 	if (test_and_clear_bit(bit, &bdi->state))
 		atomic_dec(&nr_bdi_congested[sync]);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	if (waitqueue_active(wqh))
 		wake_up(wqh);
 }
diff --git a/mm/filemap.c b/mm/filemap.c
index a82fbe4c9e8e..c73535c914cc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -740,7 +740,7 @@ void unlock_page(struct page *page)
 {
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	clear_bit_unlock(PG_locked, &page->flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_page(page, PG_locked);
 }
 EXPORT_SYMBOL(unlock_page);
@@ -757,7 +757,7 @@ void end_page_writeback(struct page *page)
 	if (!test_clear_page_writeback(page))
 		BUG();
 
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	wake_up_page(page, PG_writeback);
 }
 EXPORT_SYMBOL(end_page_writeback);
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c
index 8c93267ce969..c4e09846d1de 100644
--- a/net/atm/pppoatm.c
+++ b/net/atm/pppoatm.c
@@ -252,7 +252,7 @@ static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
 	 * we need to ensure there's a memory barrier after it. The bit
 	 * *must* be set before we do the atomic_inc() on pvcc->inflight.
 	 * There's no smp_mb__after_set_bit(), so it's this or abuse
-	 * smp_mb__after_clear_bit().
+	 * smp_mb__after_atomic().
 	 */
 	test_and_set_bit(BLOCKED, &pvcc->blocked);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 49774912cb01..74014420b3c7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -45,7 +45,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
 		return;
 
 	clear_bit(HCI_INQUIRY, &hdev->flags);
-	smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+	smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
 	wake_up_bit(&hdev->flags, HCI_INQUIRY);
 
 	hci_conn_check_pending(hdev);
@@ -1768,7 +1768,7 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags))
 		return;
 
-	smp_mb__after_clear_bit(); /* wake_up_bit advises about this barrier */
+	smp_mb__after_atomic(); /* wake_up_bit advises about this barrier */
 	wake_up_bit(&hdev->flags, HCI_INQUIRY);
 
 	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
diff --git a/net/core/dev.c b/net/core/dev.c
index 5b3042e69f85..e14f1cba591a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1326,7 +1326,7 @@ static int __dev_close_many(struct list_head *head)
 		 * dev->stop() will invoke napi_disable() on all of it's
 		 * napi_struct instances on this device.
 		 */
-		smp_mb__after_clear_bit(); /* Commit netif_running(). */
+		smp_mb__after_atomic(); /* Commit netif_running(). */
 	}
 
 	dev_deactivate_many(head);
@@ -3343,7 +3343,7 @@ static void net_tx_action(struct softirq_action *h)
 
 			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
-				smp_mb__before_clear_bit();
+				smp_mb__before_atomic();
 				clear_bit(__QDISC_STATE_SCHED,
 					  &q->state);
 				qdisc_run(q);
@@ -3353,7 +3353,7 @@ static void net_tx_action(struct softirq_action *h)
 					      &q->state)) {
 					__netif_reschedule(q);
 				} else {
-					smp_mb__before_clear_bit();
+					smp_mb__before_atomic();
 					clear_bit(__QDISC_STATE_SCHED,
 						  &q->state);
 				}
@@ -4244,7 +4244,7 @@ void __napi_complete(struct napi_struct *n)
 	BUG_ON(n->gro_list);
 
 	list_del(&n->poll_list);
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(NAPI_STATE_SCHED, &n->state);
 }
 EXPORT_SYMBOL(__napi_complete);
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 9c3a839322ba..bd0767e6b2b3 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -147,7 +147,7 @@ static void linkwatch_do_dev(struct net_device *dev)
 	 * Make sure the above read is complete since it can be
 	 * rewritten as soon as we clear the bit below.
 	 */
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 
 	/* We are about to handle this device,
 	 * so new events can be accepted
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f424465112..56cd458a1b8c 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -522,7 +522,7 @@ EXPORT_SYMBOL_GPL(inet_getpeer);
 void inet_putpeer(struct inet_peer *p)
 {
 	p->dtime = (__u32)jiffies;
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&p->refcnt);
 }
 EXPORT_SYMBOL_GPL(inet_putpeer);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 025e25093984..366cf06587b8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1930,10 +1930,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			/* It is possible TX completion already happened
 			 * before we set TSQ_THROTTLED, so we must
 			 * test again the condition.
-			 * We abuse smp_mb__after_clear_bit() because
-			 * there is no smp_mb__after_set_bit() yet
 			 */
-			smp_mb__after_clear_bit();
+			smp_mb__after_atomic();
 			if (atomic_read(&sk->sk_wmem_alloc) > limit)
 				break;
 		}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 75421f2ba8be..1f4f954c4b47 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -914,7 +914,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	nf_ct_ext_free(ct);
 	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
-	smp_mb__before_atomic_dec();
+	smp_mb__before_atomic();
 	atomic_dec(&net->ct.count);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index b7ebe23cdedf..d67de453c35a 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -598,7 +598,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
 {
 	atomic64_set(&ic->i_ack_next, seq);
 	if (ack_required) {
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 	}
 }
@@ -606,7 +606,7 @@ static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
 static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
 {
 	clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return atomic64_read(&ic->i_ack_next);
 }
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 45033358358e..aa8bf6786008 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -429,7 +429,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
 {
 	atomic64_set(&ic->i_ack_next, seq);
 	if (ack_required) {
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 	}
 }
@@ -437,7 +437,7 @@ static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
 static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
 {
 	clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	return atomic64_read(&ic->i_ack_next);
 }
diff --git a/net/rds/send.c b/net/rds/send.c
index a82fb660ec00..23718160d71e 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -107,7 +107,7 @@ static int acquire_in_xmit(struct rds_connection *conn)
 static void release_in_xmit(struct rds_connection *conn)
 {
 	clear_bit(RDS_IN_XMIT, &conn->c_flags);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	/*
 	 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
 	 * hot path and finding waiters is very rare.  We don't want to walk
@@ -661,7 +661,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
 
 	/* order flag updates with spin locks */
 	if (!list_empty(&list))
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 
 	spin_unlock_irqrestore(&conn->c_lock, flags);
 
@@ -691,7 +691,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 	}
 
 	/* order flag updates with the rs lock */
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	spin_unlock_irqrestore(&rs->rs_lock, flags);
 
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 81cf5a4c5e40..53b17ca0dff5 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -93,7 +93,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
 		rm->m_ack_seq = tc->t_last_sent_nxt +
 				sizeof(struct rds_header) +
 				be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
 		tc->t_last_expected_una = rm->m_ack_seq + 1;
 
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 5285ead196c0..247e973544bf 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -296,7 +296,7 @@ static void
 rpcauth_unhash_cred_locked(struct rpc_cred *cred)
 {
 	hlist_del_rcu(&cred->cr_hash);
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
 }
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 36e431ee1c90..b6e440baccc3 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -143,7 +143,7 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
 	gss_get_ctx(ctx);
 	rcu_assign_pointer(gss_cred->gc_ctx, ctx);
 	set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
 }
 
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 3513d559bc45..9761a0da964d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -244,10 +244,10 @@ void xprt_free_bc_request(struct rpc_rqst *req)
 	dprintk("RPC:       free backchannel req=%p\n", req);
 
 	req->rq_connect_cookie = xprt->connect_cookie - 1;
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
 	clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 
 	if (!xprt_need_to_requeue(xprt)) {
 		/*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d173f79947c6..89d051de6b3e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -230,9 +230,9 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
 {
 	xprt->snd_task = NULL;
 	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 	} else
 		queue_work(rpciod_workqueue, &xprt->task_cleanup);
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 25a3dcf15cae..402a7e9a16b7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -893,11 +893,11 @@ static void xs_close(struct rpc_xprt *xprt)
 	xs_reset_transport(transport);
 	xprt->reestablish_timeout = 0;
 
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	clear_bit(XPRT_CLOSING, &xprt->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	xprt_disconnect_done(xprt);
 }
 
@@ -1497,12 +1497,12 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
 
 static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
 {
-	smp_mb__before_clear_bit();
+	smp_mb__before_atomic();
 	clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
 	clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	clear_bit(XPRT_CLOSING, &xprt->state);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 }
 
 static void xs_sock_mark_closed(struct rpc_xprt *xprt)
@@ -1556,10 +1556,10 @@ static void xs_tcp_state_change(struct sock *sk)
 		xprt->connect_cookie++;
 		xprt->reestablish_timeout = 0;
 		set_bit(XPRT_CLOSING, &xprt->state);
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(XPRT_CONNECTED, &xprt->state);
 		clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
 		break;
 	case TCP_CLOSE_WAIT:
@@ -1578,9 +1578,9 @@ static void xs_tcp_state_change(struct sock *sk)
 	case TCP_LAST_ACK:
 		set_bit(XPRT_CLOSING, &xprt->state);
 		xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
-		smp_mb__before_clear_bit();
+		smp_mb__before_atomic();
 		clear_bit(XPRT_CONNECTED, &xprt->state);
-		smp_mb__after_clear_bit();
+		smp_mb__after_atomic();
 		break;
 	case TCP_CLOSE:
 		xs_tcp_cancel_linger_timeout(xprt);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index bb7e8ba821f4..749f80c21e22 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1207,7 +1207,7 @@ restart:
 	sk->sk_state	= TCP_ESTABLISHED;
 	sock_hold(newsk);
 
-	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
+	smp_mb__after_atomic();	/* sock_hold() does an atomic_inc() */
 	unix_peer(sk)	= newsk;
 
 	unix_state_unlock(sk);
diff --git a/sound/pci/bt87x.c b/sound/pci/bt87x.c
index 8546711d12f9..70951fd9b354 100644
--- a/sound/pci/bt87x.c
+++ b/sound/pci/bt87x.c
@@ -443,7 +443,7 @@ static int snd_bt87x_pcm_open(struct snd_pcm_substream *substream)
 
 _error:
 	clear_bit(0, &chip->opened);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	return err;
 }
 
@@ -458,7 +458,7 @@ static int snd_bt87x_close(struct snd_pcm_substream *substream)
 
 	chip->substream = NULL;
 	clear_bit(0, &chip->opened);
-	smp_mb__after_clear_bit();
+	smp_mb__after_atomic();
 	return 0;
 }
 
-- 
cgit 


From 073a77d03ee88ae3a5504b3f73632841a55d60a1 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 15 Apr 2014 19:47:38 +0800
Subject: regulator: tps65217: Remove *rdev[] from struct tps65217

Now this driver uses devm_regulator_register() so we don't need to save rdev
pointer to tps->rdev[i] for cleanup.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/tps65217-regulator.c | 3 ---
 include/linux/mfd/tps65217.h           | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps65217-regulator.c b/drivers/regulator/tps65217-regulator.c
index 10b78d2b766a..8482f6ba08a1 100644
--- a/drivers/regulator/tps65217-regulator.c
+++ b/drivers/regulator/tps65217-regulator.c
@@ -257,9 +257,6 @@ static int tps65217_regulator_probe(struct platform_device *pdev)
 				pdev->name);
 			return PTR_ERR(rdev);
 		}
-
-		/* Save regulator for cleanup */
-		tps->rdev[i] = rdev;
 	}
 	return 0;
 }
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 54b5458ec084..95d6938737fd 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -254,7 +254,6 @@ struct tps65217 {
 	struct tps65217_board *pdata;
 	unsigned long id;
 	struct regulator_desc desc[TPS65217_NUM_REGULATOR];
-	struct regulator_dev *rdev[TPS65217_NUM_REGULATOR];
 	struct regmap *regmap;
 };
 
-- 
cgit 


From 290414499cf94284a97cc3c33214d13ccfcd896a Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Wed, 16 Apr 2014 16:12:28 -0700
Subject: regulator: tps65090: Allow setting the overcurrent wait time

The tps65090 regulator allows you to specify how long you want it to
wait before detecting an overcurrent condition.  Allow specifying that
through the device tree (or through platform data).

Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Michael Spang <spang@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 .../devicetree/bindings/regulator/tps65090.txt     |  4 ++
 drivers/regulator/tps65090-regulator.c             | 56 ++++++++++++++++++++++
 include/linux/mfd/tps65090.h                       |  5 ++
 3 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/tps65090.txt b/Documentation/devicetree/bindings/regulator/tps65090.txt
index 313a60ba61d8..340980239ea9 100644
--- a/Documentation/devicetree/bindings/regulator/tps65090.txt
+++ b/Documentation/devicetree/bindings/regulator/tps65090.txt
@@ -21,6 +21,10 @@ Optional properties:
   number should be provided. If it is externally controlled and no GPIO
   entry then driver will just configure this rails as external control
   and will not provide any enable/disable APIs.
+- ti,overcurrent-wait: This is applicable to FET registers, which have a
+  poorly defined "overcurrent wait" field.  If this property is present it
+  should be between 0 - 3.  If this property isn't present we won't touch the
+  "overcurrent wait" field and we'll leave it to the BIOS/EC to deal with.
 
 Each regulator is defined using the standard binding for regulators.
 
diff --git a/drivers/regulator/tps65090-regulator.c b/drivers/regulator/tps65090-regulator.c
index 2e92ef68574d..ca04e9f010e1 100644
--- a/drivers/regulator/tps65090-regulator.c
+++ b/drivers/regulator/tps65090-regulator.c
@@ -28,15 +28,58 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/mfd/tps65090.h>
 
+#define CTRL_WT_BIT		2 /* Regulator wait time 0 bit */
+
+#define MAX_OVERCURRENT_WAIT	3 /* Overcurrent wait must be <= this */
+
+/**
+ * struct tps65090_regulator - Per-regulator data for a tps65090 regulator
+ *
+ * @dev: Pointer to our device.
+ * @desc: The struct regulator_desc for the regulator.
+ * @rdev: The struct regulator_dev for the regulator.
+ * @overcurrent_wait_valid: True if overcurrent_wait is valid.
+ * @overcurrent_wait: For FETs, the value to put in the WTFET bitfield.
+ */
+
 struct tps65090_regulator {
 	struct device		*dev;
 	struct regulator_desc	*desc;
 	struct regulator_dev	*rdev;
+	bool			overcurrent_wait_valid;
+	int			overcurrent_wait;
 };
 
 static struct regulator_ops tps65090_ext_control_ops = {
 };
 
+/**
+ * tps65090_reg_set_overcurrent_wait - Setup overcurrent wait
+ *
+ * This will set the overcurrent wait time based on what's in the regulator
+ * info.
+ *
+ * @ri:		Overall regulator data
+ * @rdev:	Regulator device
+ *
+ * Return: 0 if no error, non-zero if there was an error writing the register.
+ */
+static int tps65090_reg_set_overcurrent_wait(struct tps65090_regulator *ri,
+					     struct regulator_dev *rdev)
+{
+	int ret;
+
+	ret = regmap_update_bits(rdev->regmap, rdev->desc->enable_reg,
+				 MAX_OVERCURRENT_WAIT << CTRL_WT_BIT,
+				 ri->overcurrent_wait << CTRL_WT_BIT);
+	if (ret) {
+		dev_err(&rdev->dev, "Error updating overcurrent wait %#x\n",
+			rdev->desc->enable_reg);
+	}
+
+	return ret;
+}
+
 static struct regulator_ops tps65090_reg_contol_ops = {
 	.enable		= regulator_enable_regmap,
 	.disable	= regulator_disable_regmap,
@@ -209,6 +252,11 @@ static struct tps65090_platform_data *tps65090_parse_dt_reg_data(
 			rpdata->gpio = of_get_named_gpio(np,
 					"dcdc-ext-control-gpios", 0);
 
+		if (of_property_read_u32(tps65090_matches[idx].of_node,
+					 "ti,overcurrent-wait",
+					 &rpdata->overcurrent_wait) == 0)
+			rpdata->overcurrent_wait_valid = true;
+
 		tps65090_pdata->reg_pdata[idx] = rpdata;
 	}
 	return tps65090_pdata;
@@ -258,6 +306,8 @@ static int tps65090_regulator_probe(struct platform_device *pdev)
 		ri = &pmic[num];
 		ri->dev = &pdev->dev;
 		ri->desc = &tps65090_regulator_desc[num];
+		ri->overcurrent_wait_valid = tps_pdata->overcurrent_wait_valid;
+		ri->overcurrent_wait = tps_pdata->overcurrent_wait;
 
 		/*
 		 * TPS5090 DCDC support the control from external digital input.
@@ -299,6 +349,12 @@ static int tps65090_regulator_probe(struct platform_device *pdev)
 		}
 		ri->rdev = rdev;
 
+		if (ri->overcurrent_wait_valid) {
+			ret = tps65090_reg_set_overcurrent_wait(ri, rdev);
+			if (ret < 0)
+				return ret;
+		}
+
 		/* Enable external control if it is require */
 		if (tps_pdata && is_dcdc(num) && tps_pdata->reg_init_data &&
 				tps_pdata->enable_ext_control) {
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 3f43069413e7..f25adfa97c73 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -78,11 +78,16 @@ struct tps65090 {
  *     DCDC1, DCDC2 and DCDC3.
  * @gpio: Gpio number if external control is enabled and controlled through
  *     gpio.
+ * @overcurrent_wait_valid: True if the overcurrent_wait should be applied.
+ * @overcurrent_wait: Value to set as the overcurrent wait time.  This is the
+ *     actual bitfield value, not a time in ms (valid value are 0 - 3).
  */
 struct tps65090_regulator_plat_data {
 	struct regulator_init_data *reg_init_data;
 	bool enable_ext_control;
 	int gpio;
+	bool overcurrent_wait_valid;
+	int overcurrent_wait;
 };
 
 struct tps65090_platform_data {
-- 
cgit 


From 3ac170376f2c5123414e0267aa0f9cf218965e24 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Thu, 17 Apr 2014 11:40:11 +0200
Subject: regmap: add reg_read/reg_write callbacks to regmap_bus struct

Some busses do not support sending/receiving multiple registers in one go.
Such kind of busses just unpack the registers that have been previously
packed by the regmap core or pack registers that will be later unpacked by
the core code.

Add reg_write and reg_read callbacks in order to optimize access through
this kind of busses.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/base/regmap/regmap.c | 26 ++++++++++++++++++++++++++
 include/linux/regmap.h       |  6 ++++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 63e30ef096e2..2209de0ceabc 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -35,10 +35,14 @@ static int _regmap_update_bits(struct regmap *map, unsigned int reg,
 			       unsigned int mask, unsigned int val,
 			       bool *change);
 
+static int _regmap_bus_reg_read(void *context, unsigned int reg,
+				unsigned int *val);
 static int _regmap_bus_read(void *context, unsigned int reg,
 			    unsigned int *val);
 static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 				       unsigned int val);
+static int _regmap_bus_reg_write(void *context, unsigned int reg,
+				 unsigned int val);
 static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 unsigned int val);
 
@@ -493,6 +497,12 @@ struct regmap *regmap_init(struct device *dev,
 		map->reg_read  = config->reg_read;
 		map->reg_write = config->reg_write;
 
+		map->defer_caching = false;
+		goto skip_format_initialization;
+	} else if (!bus->read || !bus->write) {
+		map->reg_read = _regmap_bus_reg_read;
+		map->reg_write = _regmap_bus_reg_write;
+
 		map->defer_caching = false;
 		goto skip_format_initialization;
 	} else {
@@ -1284,6 +1294,14 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 	return ret;
 }
 
+static int _regmap_bus_reg_write(void *context, unsigned int reg,
+				 unsigned int val)
+{
+	struct regmap *map = context;
+
+	return map->bus->reg_write(map->bus_context, reg, val);
+}
+
 static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				 unsigned int val)
 {
@@ -1925,6 +1943,14 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	return ret;
 }
 
+static int _regmap_bus_reg_read(void *context, unsigned int reg,
+				unsigned int *val)
+{
+	struct regmap *map = context;
+
+	return map->bus->reg_read(map->bus_context, reg, val);
+}
+
 static int _regmap_bus_read(void *context, unsigned int reg,
 			    unsigned int *val)
 {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 85691b9b4fa7..7b0e4b425cdf 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -276,6 +276,10 @@ typedef int (*regmap_hw_async_write)(void *context,
 typedef int (*regmap_hw_read)(void *context,
 			      const void *reg_buf, size_t reg_size,
 			      void *val_buf, size_t val_size);
+typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg,
+				  unsigned int *val);
+typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg,
+				   unsigned int val);
 typedef struct regmap_async *(*regmap_hw_async_alloc)(void);
 typedef void (*regmap_hw_free_context)(void *context);
 
@@ -309,7 +313,9 @@ struct regmap_bus {
 	regmap_hw_write write;
 	regmap_hw_gather_write gather_write;
 	regmap_hw_async_write async_write;
+	regmap_hw_reg_write reg_write;
 	regmap_hw_read read;
+	regmap_hw_reg_read reg_read;
 	regmap_hw_free_context free_context;
 	regmap_hw_async_alloc async_alloc;
 	u8 read_flag_mask;
-- 
cgit 


From e4fcb1d6148284a10c314fce2a488cf19ce886f6 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Wed, 16 Apr 2014 10:01:37 +0100
Subject: mfd: arizona: Factor out read of device tree GPIOs

This patch factors out the reading of GPIOs for the Arizona devices
into a helper function.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/mfd/arizona-core.c       | 31 ++++++++++++++++++++++---------
 include/linux/mfd/arizona/core.h |  3 +++
 2 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 1c3ae57082ed..37b5e1447d02 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -508,19 +508,32 @@ int arizona_of_get_type(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(arizona_of_get_type);
 
+int arizona_of_get_named_gpio(struct arizona *arizona, const char *prop,
+			      bool mandatory)
+{
+	int gpio;
+
+	gpio = of_get_named_gpio(arizona->dev->of_node, prop, 0);
+	if (gpio < 0) {
+		if (mandatory)
+			dev_err(arizona->dev,
+				"Mandatory DT gpio %s missing/malformed: %d\n",
+				prop, gpio);
+
+		gpio = 0;
+	}
+
+	return gpio;
+}
+EXPORT_SYMBOL_GPL(arizona_of_get_named_gpio);
+
 static int arizona_of_get_core_pdata(struct arizona *arizona)
 {
+	struct arizona_pdata *pdata = &arizona->pdata;
 	int ret, i;
 
-	arizona->pdata.reset = of_get_named_gpio(arizona->dev->of_node,
-						 "wlf,reset", 0);
-	if (arizona->pdata.reset < 0)
-		arizona->pdata.reset = 0;
-
-	arizona->pdata.ldoena = of_get_named_gpio(arizona->dev->of_node,
-						  "wlf,ldoena", 0);
-	if (arizona->pdata.ldoena < 0)
-		arizona->pdata.ldoena = 0;
+	pdata->reset = arizona_of_get_named_gpio(arizona, "wlf,reset", true);
+	pdata->ldoena = arizona_of_get_named_gpio(arizona, "wlf,ldoena", true);
 
 	ret = of_property_read_u32_array(arizona->dev->of_node,
 					 "wlf,gpio-defaults",
diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index 5cf8b91ce996..6d9371f88875 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -124,4 +124,7 @@ int wm5102_patch(struct arizona *arizona);
 int wm5110_patch(struct arizona *arizona);
 int wm8997_patch(struct arizona *arizona);
 
+extern int arizona_of_get_named_gpio(struct arizona *arizona, const char *prop,
+				     bool mandatory);
+
 #endif
-- 
cgit 


From 79f7ae7c45a6ccf04e2908337461dee615f6afb0 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Fri, 14 Mar 2014 21:11:56 +0900
Subject: mmc: clarify DDR timing mode between SD-UHS and eMMC

This change distinguishes DDR timing mode of current
mixed usage to clarify device type.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/debugfs.c | 3 +++
 drivers/mmc/core/mmc.c     | 2 +-
 include/linux/mmc/host.h   | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 54829c0ed000..509229b48b55 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -135,6 +135,9 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	case MMC_TIMING_UHS_DDR50:
 		str = "sd uhs DDR50";
 		break;
+	case MMC_TIMING_MMC_DDR52:
+		str = "mmc DDR52";
+		break;
 	case MMC_TIMING_MMC_HS200:
 		str = "mmc high-speed SDR200";
 		break;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1ab5f3a0af5b..e22d8515ff97 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1264,7 +1264,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 					goto err;
 			}
 			mmc_card_set_ddr_mode(card);
-			mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50);
+			mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52);
 			mmc_set_bus_width(card->host, bus_width);
 		}
 	}
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cb61ea4d6945..35354207e71f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -58,7 +58,8 @@ struct mmc_ios {
 #define MMC_TIMING_UHS_SDR50	5
 #define MMC_TIMING_UHS_SDR104	6
 #define MMC_TIMING_UHS_DDR50	7
-#define MMC_TIMING_MMC_HS200	8
+#define MMC_TIMING_MMC_DDR52	8
+#define MMC_TIMING_MMC_HS200	9
 
 #define MMC_SDR_MODE		0
 #define MMC_1_2V_DDR_MODE	1
-- 
cgit 


From af6b6967d6e17fe070c0fd1be364c34cbd31a523 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Wed, 16 Apr 2014 17:19:12 +0200
Subject: net: phy: export genphy_config_init()

This enables other drivers to call this generic implementation, and then
only do specific details on top of it.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 3 ++-
 include/linux/phy.h          | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0ce606624296..466ae3e06322 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1067,7 +1067,7 @@ int genphy_soft_reset(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_soft_reset);
 
-static int genphy_config_init(struct phy_device *phydev)
+int genphy_config_init(struct phy_device *phydev)
 {
 	int val;
 	u32 features;
@@ -1118,6 +1118,7 @@ static int gen10g_soft_reset(struct phy_device *phydev)
 	/* Do nothing for now */
 	return 0;
 }
+EXPORT_SYMBOL(genphy_config_init);
 
 static int gen10g_config_init(struct phy_device *phydev)
 {
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 4d0221fd0688..51d15f684e7e 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -666,6 +666,7 @@ static inline int phy_read_status(struct phy_device *phydev)
 	return phydev->drv->read_status(phydev);
 }
 
+int genphy_config_init(struct phy_device *phydev);
 int genphy_setup_forced(struct phy_device *phydev);
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
-- 
cgit 


From a0265d28b3a5877b5b8edd14eb12a2ccb60ab1f3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 17 Apr 2014 13:45:03 +0800
Subject: net: Add __dev_forward_skb

This patch adds the helper __dev_forward_skb which is identical to
dev_forward_skb except that it doesn't actually inject the skb into
the stack.  This is useful where we wish to have finer control over
how the packet is injected, e.g., via netif_rx_ni or netif_receive_skb.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 42 ++++++++++++++++++++++++------------------
 2 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ed3a3aa6604..a803d792df1e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2633,6 +2633,7 @@ int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq);
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index d2c8a06b3a98..11d70e3afefa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1661,6 +1661,29 @@ bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
+int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
+{
+	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
+		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
+			atomic_long_inc(&dev->rx_dropped);
+			kfree_skb(skb);
+			return NET_RX_DROP;
+		}
+	}
+
+	if (unlikely(!is_skb_forwardable(dev, skb))) {
+		atomic_long_inc(&dev->rx_dropped);
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	skb_scrub_packet(skb, true);
+	skb->protocol = eth_type_trans(skb, dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__dev_forward_skb);
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1681,24 +1704,7 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
  */
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
-			atomic_long_inc(&dev->rx_dropped);
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-	}
-
-	if (unlikely(!is_skb_forwardable(dev, skb))) {
-		atomic_long_inc(&dev->rx_dropped);
-		kfree_skb(skb);
-		return NET_RX_DROP;
-	}
-
-	skb_scrub_packet(skb, true);
-	skb->protocol = eth_type_trans(skb, dev);
-
-	return netif_rx_internal(skb);
+	return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
-- 
cgit 


From 4104d326b670c2b66f575d2004daa28b2d1b4c8d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 10 Jan 2014 17:01:58 -0500
Subject: ftrace: Remove global function list and call function directly

Instead of having a list of global functions that are called,
as only one global function is allow to be enabled at a time, there's
no reason to have a list.

Instead, simply have all the users of the global ops, use the global ops
directly, instead of registering their own ftrace_ops. Just switch what
function is used before enabling the function tracer.

This removes a lot of code as well as the complexity involved with it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h            |  20 ++---
 kernel/trace/ftrace.c             | 152 +++++++++++---------------------------
 kernel/trace/trace.c              |   2 +
 kernel/trace/trace.h              |  19 +++--
 kernel/trace/trace_functions.c    |  55 +++++---------
 kernel/trace/trace_irqsoff.c      |  33 ++++-----
 kernel/trace/trace_sched_wakeup.c |  40 +++++-----
 kernel/trace/trace_selftest.c     |  33 +++++----
 8 files changed, 133 insertions(+), 221 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9212b017bc72..f0ff2c2453e7 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -62,9 +62,6 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  * set in the flags member.
  *
  * ENABLED - set/unset when ftrace_ops is registered/unregistered
- * GLOBAL  - set manualy by ftrace_ops user to denote the ftrace_ops
- *           is part of the global tracers sharing the same filter
- *           via set_ftrace_* debugfs files.
  * DYNAMIC - set when ftrace_ops is registered to denote dynamically
  *           allocated ftrace_ops which need special care
  * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
@@ -96,15 +93,14 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
-	FTRACE_OPS_FL_GLOBAL			= 1 << 1,
-	FTRACE_OPS_FL_DYNAMIC			= 1 << 2,
-	FTRACE_OPS_FL_CONTROL			= 1 << 3,
-	FTRACE_OPS_FL_SAVE_REGS			= 1 << 4,
-	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 5,
-	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 6,
-	FTRACE_OPS_FL_STUB			= 1 << 7,
-	FTRACE_OPS_FL_INITIALIZED		= 1 << 8,
-	FTRACE_OPS_FL_DELETED			= 1 << 9,
+	FTRACE_OPS_FL_DYNAMIC			= 1 << 1,
+	FTRACE_OPS_FL_CONTROL			= 1 << 2,
+	FTRACE_OPS_FL_SAVE_REGS			= 1 << 3,
+	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 4,
+	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 5,
+	FTRACE_OPS_FL_STUB			= 1 << 6,
+	FTRACE_OPS_FL_INITIALIZED		= 1 << 7,
+	FTRACE_OPS_FL_DELETED			= 1 << 8,
 };
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 1fd4b9479210..8f61ef70a297 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -62,7 +62,7 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
-#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
+#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define INIT_REGEX_LOCK(opsname)	\
@@ -103,7 +103,6 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
@@ -171,23 +170,6 @@ int ftrace_nr_registered_ops(void)
 	return cnt;
 }
 
-static void
-ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
-			struct ftrace_ops *op, struct pt_regs *regs)
-{
-	int bit;
-
-	bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
-	if (bit < 0)
-		return;
-
-	do_for_each_ftrace_op(op, ftrace_global_list) {
-		op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
-
-	trace_clear_recursion(bit);
-}
-
 static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
 			    struct ftrace_ops *op, struct pt_regs *regs)
 {
@@ -237,43 +219,6 @@ static int control_ops_alloc(struct ftrace_ops *ops)
 	return 0;
 }
 
-static void update_global_ops(void)
-{
-	ftrace_func_t func = ftrace_global_list_func;
-	void *private = NULL;
-
-	/* The list has its own recursion protection. */
-	global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
-
-	/*
-	 * If there's only one function registered, then call that
-	 * function directly. Otherwise, we need to iterate over the
-	 * registered callers.
-	 */
-	if (ftrace_global_list == &ftrace_list_end ||
-	    ftrace_global_list->next == &ftrace_list_end) {
-		func = ftrace_global_list->func;
-		private = ftrace_global_list->private;
-		/*
-		 * As we are calling the function directly.
-		 * If it does not have recursion protection,
-		 * the function_trace_op needs to be updated
-		 * accordingly.
-		 */
-		if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE))
-			global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
-	}
-
-	/* If we filter on pids, update to use the pid function */
-	if (!list_empty(&ftrace_pids)) {
-		set_ftrace_pid_function(func);
-		func = ftrace_pid_func;
-	}
-
-	global_ops.func = func;
-	global_ops.private = private;
-}
-
 static void ftrace_sync(struct work_struct *work)
 {
 	/*
@@ -301,8 +246,6 @@ static void update_ftrace_function(void)
 {
 	ftrace_func_t func;
 
-	update_global_ops();
-
 	/*
 	 * If we are at the end of the list and this ops is
 	 * recursion safe and not dynamic and the arch supports passing ops,
@@ -314,10 +257,7 @@ static void update_ftrace_function(void)
 	     (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
 	     !FTRACE_FORCE_LIST_FUNC)) {
 		/* Set the ftrace_ops that the arch callback uses */
-		if (ftrace_ops_list == &global_ops)
-			set_function_trace_op = ftrace_global_list;
-		else
-			set_function_trace_op = ftrace_ops_list;
+		set_function_trace_op = ftrace_ops_list;
 		func = ftrace_ops_list->func;
 	} else {
 		/* Just use the default ftrace_ops */
@@ -434,16 +374,9 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (ops->flags & FTRACE_OPS_FL_DELETED)
 		return -EINVAL;
 
-	if (FTRACE_WARN_ON(ops == &global_ops))
-		return -EINVAL;
-
 	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return -EBUSY;
 
-	/* We don't support both control and global flags set. */
-	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
-		return -EINVAL;
-
 #ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 	/*
 	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
@@ -461,10 +394,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
-		ops->flags |= FTRACE_OPS_FL_ENABLED;
-	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
 		if (control_ops_alloc(ops))
 			return -ENOMEM;
 		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
@@ -484,15 +414,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
 		return -EBUSY;
 
-	if (FTRACE_WARN_ON(ops == &global_ops))
-		return -EINVAL;
-
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ret = remove_ftrace_list_ops(&ftrace_global_list,
-					     &global_ops, ops);
-		if (!ret)
-			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
-	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
 		ret = remove_ftrace_list_ops(&ftrace_control_list,
 					     &control_ops, ops);
 	} else
@@ -2128,15 +2050,6 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
 	ftrace_start_up++;
 	command |= FTRACE_UPDATE_CALLS;
 
-	/* ops marked global share the filter hashes */
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ops = &global_ops;
-		/* Don't update hash if global is already set */
-		if (global_start_up)
-			hash_enable = false;
-		global_start_up++;
-	}
-
 	ops->flags |= FTRACE_OPS_FL_ENABLED;
 	if (hash_enable)
 		ftrace_hash_rec_enable(ops, 1);
@@ -2166,21 +2079,10 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ops = &global_ops;
-		global_start_up--;
-		WARN_ON_ONCE(global_start_up < 0);
-		/* Don't update hash if global still has users */
-		if (global_start_up) {
-			WARN_ON_ONCE(!ftrace_start_up);
-			hash_disable = false;
-		}
-	}
-
 	if (hash_disable)
 		ftrace_hash_rec_disable(ops, 1);
 
-	if (ops != &global_ops || !global_start_up)
+	if (!global_start_up)
 		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
 
 	command |= FTRACE_UPDATE_CALLS;
@@ -3524,10 +3426,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 	struct ftrace_hash *hash;
 	int ret;
 
-	/* All global ops uses the global ops filters */
-	if (ops->flags & FTRACE_OPS_FL_GLOBAL)
-		ops = &global_ops;
-
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
@@ -4462,6 +4360,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+__init void ftrace_init_global_array_ops(struct trace_array *tr)
+{
+	tr->ops = &global_ops;
+	tr->ops->private = tr;
+}
+
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
+{
+	/* If we filter on pids, update to use the pid function */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
+		if (WARN_ON(tr->ops->func != ftrace_stub))
+			printk("ftrace ops had %pS for function\n",
+			       tr->ops->func);
+		/* Only the top level instance does pid tracing */
+		if (!list_empty(&ftrace_pids)) {
+			set_ftrace_pid_function(func);
+			func = ftrace_pid_func;
+		}
+	}
+	tr->ops->func = func;
+	tr->ops->private = tr;
+}
+
+void ftrace_reset_array_ops(struct trace_array *tr)
+{
+	tr->ops->func = ftrace_stub;
+}
+
 static void
 ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
 			struct ftrace_ops *op, struct pt_regs *regs)
@@ -4520,9 +4446,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	 */
 	preempt_disable_notrace();
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (ftrace_ops_test(op, ip, regs))
+		if (ftrace_ops_test(op, ip, regs)) {
+			if (WARN_ON(!op->func)) {
+				function_trace_stop = 1;
+				printk("op=%p %pS\n", op, op);
+				goto out;
+			}
 			op->func(ip, parent_ip, op, regs);
+		}
 	} while_for_each_ftrace_op(op);
+out:
 	preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
@@ -5076,8 +5009,7 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
 /* Just a place holder for function graph */
 static struct ftrace_ops fgraph_ops __read_mostly = {
 	.func		= ftrace_stub,
-	.flags		= FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL |
-				FTRACE_OPS_FL_RECURSION_SAFE,
+	.flags		= FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_RECURSION_SAFE,
 };
 
 static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 737b0efa1a62..fdd33aacdf05 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6629,6 +6629,8 @@ __init static int tracer_alloc_buffers(void)
 	 */
 	global_trace.current_trace = &nop_trace;
 
+	ftrace_init_global_array_ops(&global_trace);
+
 	register_tracer(&nop_trace);
 
 	/* All seems OK, enable tracing */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2e29d7ba5a52..df5256be64cd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -416,13 +416,7 @@ enum {
 	TRACE_FTRACE_IRQ_BIT,
 	TRACE_FTRACE_SIRQ_BIT,
 
-	/* GLOBAL_BITs must be greater than FTRACE_BITs */
-	TRACE_GLOBAL_BIT,
-	TRACE_GLOBAL_NMI_BIT,
-	TRACE_GLOBAL_IRQ_BIT,
-	TRACE_GLOBAL_SIRQ_BIT,
-
-	/* INTERNAL_BITs must be greater than GLOBAL_BITs */
+	/* INTERNAL_BITs must be greater than FTRACE_BITs */
 	TRACE_INTERNAL_BIT,
 	TRACE_INTERNAL_NMI_BIT,
 	TRACE_INTERNAL_IRQ_BIT,
@@ -449,9 +443,6 @@ enum {
 #define TRACE_FTRACE_START	TRACE_FTRACE_BIT
 #define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
 
-#define TRACE_GLOBAL_START	TRACE_GLOBAL_BIT
-#define TRACE_GLOBAL_MAX	((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
-
 #define TRACE_LIST_START	TRACE_INTERNAL_BIT
 #define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
 
@@ -823,6 +814,9 @@ extern int ftrace_is_dead(void);
 int ftrace_create_function_files(struct trace_array *tr,
 				 struct dentry *parent);
 void ftrace_destroy_function_files(struct trace_array *tr);
+void ftrace_init_global_array_ops(struct trace_array *tr);
+void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
+void ftrace_reset_array_ops(struct trace_array *tr);
 #else
 static inline int ftrace_trace_task(struct task_struct *task)
 {
@@ -836,6 +830,11 @@ ftrace_create_function_files(struct trace_array *tr,
 	return 0;
 }
 static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
+static inline __init void
+ftrace_init_global_array_ops(struct trace_array *tr) { }
+static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+/* ftace_func_t type is not defined, use macro instead of static inline */
+#define ftrace_init_array_ops(tr, func) do { } while (0)
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index ffd56351b521..2d9482b8f26a 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -26,8 +26,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 static void
 function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 			  struct ftrace_ops *op, struct pt_regs *pt_regs);
-static struct ftrace_ops trace_ops;
-static struct ftrace_ops trace_stack_ops;
 static struct tracer_flags func_flags;
 
 /* Our option */
@@ -83,28 +81,24 @@ void ftrace_destroy_function_files(struct trace_array *tr)
 
 static int function_trace_init(struct trace_array *tr)
 {
-	struct ftrace_ops *ops;
-
-	if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
-		/* There's only one global tr */
-		if (!trace_ops.private) {
-			trace_ops.private = tr;
-			trace_stack_ops.private = tr;
-		}
+	ftrace_func_t func;
 
-		if (func_flags.val & TRACE_FUNC_OPT_STACK)
-			ops = &trace_stack_ops;
-		else
-			ops = &trace_ops;
-		tr->ops = ops;
-	} else if (!tr->ops) {
-		/*
-		 * Instance trace_arrays get their ops allocated
-		 * at instance creation. Unless it failed
-		 * the allocation.
-		 */
+	/*
+	 * Instance trace_arrays get their ops allocated
+	 * at instance creation. Unless it failed
+	 * the allocation.
+	 */
+	if (!tr->ops)
 		return -ENOMEM;
-	}
+
+	/* Currently only the global instance can do stack tracing */
+	if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+	    func_flags.val & TRACE_FUNC_OPT_STACK)
+		func = function_stack_trace_call;
+	else
+		func = function_trace_call;
+
+	ftrace_init_array_ops(tr, func);
 
 	tr->trace_buffer.cpu = get_cpu();
 	put_cpu();
@@ -118,6 +112,7 @@ static void function_trace_reset(struct trace_array *tr)
 {
 	tracing_stop_function_trace(tr);
 	tracing_stop_cmdline_record();
+	ftrace_reset_array_ops(tr);
 }
 
 static void function_trace_start(struct trace_array *tr)
@@ -199,18 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
 	local_irq_restore(flags);
 }
 
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = function_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
-static struct ftrace_ops trace_stack_ops __read_mostly =
-{
-	.func = function_stack_trace_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static struct tracer_opt func_opts[] = {
 #ifdef CONFIG_STACKTRACE
 	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
@@ -248,10 +231,10 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 		unregister_ftrace_function(tr->ops);
 
 		if (set) {
-			tr->ops = &trace_stack_ops;
+			tr->ops->func = function_stack_trace_call;
 			register_ftrace_function(tr->ops);
 		} else {
-			tr->ops = &trace_ops;
+			tr->ops->func = function_trace_call;
 			register_ftrace_function(tr->ops);
 		}
 
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 8ff02cbb892f..b5cb047df3e9 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -151,12 +151,6 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
 
 	atomic_dec(&data->disabled);
 }
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = irqsoff_tracer_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -531,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
 }
 #endif /* CONFIG_PREEMPT_TRACER */
 
-static int register_irqsoff_function(int graph, int set)
+static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
 {
 	int ret;
 
@@ -543,7 +537,7 @@ static int register_irqsoff_function(int graph, int set)
 		ret = register_ftrace_graph(&irqsoff_graph_return,
 					    &irqsoff_graph_entry);
 	else
-		ret = register_ftrace_function(&trace_ops);
+		ret = register_ftrace_function(tr->ops);
 
 	if (!ret)
 		function_enabled = true;
@@ -551,7 +545,7 @@ static int register_irqsoff_function(int graph, int set)
 	return ret;
 }
 
-static void unregister_irqsoff_function(int graph)
+static void unregister_irqsoff_function(struct trace_array *tr, int graph)
 {
 	if (!function_enabled)
 		return;
@@ -559,17 +553,17 @@ static void unregister_irqsoff_function(int graph)
 	if (graph)
 		unregister_ftrace_graph();
 	else
-		unregister_ftrace_function(&trace_ops);
+		unregister_ftrace_function(tr->ops);
 
 	function_enabled = false;
 }
 
-static void irqsoff_function_set(int set)
+static void irqsoff_function_set(struct trace_array *tr, int set)
 {
 	if (set)
-		register_irqsoff_function(is_graph(), 1);
+		register_irqsoff_function(tr, is_graph(), 1);
 	else
-		unregister_irqsoff_function(is_graph());
+		unregister_irqsoff_function(tr, is_graph());
 }
 
 static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
@@ -577,7 +571,7 @@ static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
 	struct tracer *tracer = tr->current_trace;
 
 	if (mask & TRACE_ITER_FUNCTION)
-		irqsoff_function_set(set);
+		irqsoff_function_set(tr, set);
 
 	return trace_keep_overwrite(tracer, mask, set);
 }
@@ -586,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph)
 {
 	int ret;
 
-	ret = register_irqsoff_function(graph, 0);
+	ret = register_irqsoff_function(tr, graph, 0);
 
 	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
@@ -600,7 +594,7 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
 {
 	tracer_enabled = 0;
 
-	unregister_irqsoff_function(graph);
+	unregister_irqsoff_function(tr, graph);
 }
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
@@ -617,7 +611,11 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
 	smp_wmb();
 	tracing_reset_online_cpus(&tr->trace_buffer);
 
-	if (start_irqsoff_tracer(tr, is_graph()))
+	ftrace_init_array_ops(tr, irqsoff_tracer_call);
+
+	/* Only toplevel instance supports graph tracing */
+	if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
+				      is_graph())))
 		printk(KERN_ERR "failed to start irqsoff tracer\n");
 }
 
@@ -630,6 +628,7 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
 
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+	ftrace_reset_array_ops(tr);
 }
 
 static void irqsoff_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index e14da5e97a69..4dd986defa60 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -130,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
 	atomic_dec(&data->disabled);
 	preempt_enable_notrace();
 }
-
-static struct ftrace_ops trace_ops __read_mostly =
-{
-	.func = wakeup_tracer_call,
-	.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
 #endif /* CONFIG_FUNCTION_TRACER */
 
-static int register_wakeup_function(int graph, int set)
+static int register_wakeup_function(struct trace_array *tr, int graph, int set)
 {
 	int ret;
 
@@ -150,7 +144,7 @@ static int register_wakeup_function(int graph, int set)
 		ret = register_ftrace_graph(&wakeup_graph_return,
 					    &wakeup_graph_entry);
 	else
-		ret = register_ftrace_function(&trace_ops);
+		ret = register_ftrace_function(tr->ops);
 
 	if (!ret)
 		function_enabled = true;
@@ -158,7 +152,7 @@ static int register_wakeup_function(int graph, int set)
 	return ret;
 }
 
-static void unregister_wakeup_function(int graph)
+static void unregister_wakeup_function(struct trace_array *tr, int graph)
 {
 	if (!function_enabled)
 		return;
@@ -166,17 +160,17 @@ static void unregister_wakeup_function(int graph)
 	if (graph)
 		unregister_ftrace_graph();
 	else
-		unregister_ftrace_function(&trace_ops);
+		unregister_ftrace_function(tr->ops);
 
 	function_enabled = false;
 }
 
-static void wakeup_function_set(int set)
+static void wakeup_function_set(struct trace_array *tr, int set)
 {
 	if (set)
-		register_wakeup_function(is_graph(), 1);
+		register_wakeup_function(tr, is_graph(), 1);
 	else
-		unregister_wakeup_function(is_graph());
+		unregister_wakeup_function(tr, is_graph());
 }
 
 static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
@@ -184,16 +178,16 @@ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
 	struct tracer *tracer = tr->current_trace;
 
 	if (mask & TRACE_ITER_FUNCTION)
-		wakeup_function_set(set);
+		wakeup_function_set(tr, set);
 
 	return trace_keep_overwrite(tracer, mask, set);
 }
 
-static int start_func_tracer(int graph)
+static int start_func_tracer(struct trace_array *tr, int graph)
 {
 	int ret;
 
-	ret = register_wakeup_function(graph, 0);
+	ret = register_wakeup_function(tr, graph, 0);
 
 	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
@@ -203,11 +197,11 @@ static int start_func_tracer(int graph)
 	return ret;
 }
 
-static void stop_func_tracer(int graph)
+static void stop_func_tracer(struct trace_array *tr, int graph)
 {
 	tracer_enabled = 0;
 
-	unregister_wakeup_function(graph);
+	unregister_wakeup_function(tr, graph);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -221,12 +215,12 @@ wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
 	if (!(is_graph() ^ set))
 		return 0;
 
-	stop_func_tracer(!set);
+	stop_func_tracer(tr, !set);
 
 	wakeup_reset(wakeup_trace);
 	tracing_max_latency = 0;
 
-	return start_func_tracer(set);
+	return start_func_tracer(tr, set);
 }
 
 static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
@@ -587,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
 	 */
 	smp_wmb();
 
-	if (start_func_tracer(is_graph()))
+	if (start_func_tracer(tr, is_graph()))
 		printk(KERN_ERR "failed to start wakeup tracer\n");
 
 	return;
@@ -600,7 +594,7 @@ fail_deprobe:
 static void stop_wakeup_tracer(struct trace_array *tr)
 {
 	tracer_enabled = 0;
-	stop_func_tracer(is_graph());
+	stop_func_tracer(tr, is_graph());
 	unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
 	unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
 	unregister_trace_sched_wakeup(probe_wakeup, NULL);
@@ -617,6 +611,7 @@ static int __wakeup_tracer_init(struct trace_array *tr)
 
 	tracing_max_latency = 0;
 	wakeup_trace = tr;
+	ftrace_init_array_ops(tr, wakeup_tracer_call);
 	start_wakeup_tracer(tr);
 	return 0;
 }
@@ -653,6 +648,7 @@ static void wakeup_tracer_reset(struct trace_array *tr)
 
 	set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
 	set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
+	ftrace_reset_array_ops(tr);
 }
 
 static void wakeup_tracer_start(struct trace_array *tr)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index e98fca60974f..519d04affe38 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = {
 	.flags			= FTRACE_OPS_FL_RECURSION_SAFE,
 };
 
-static struct ftrace_ops test_global = {
-	.func		= trace_selftest_test_global_func,
-	.flags		= FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
-};
-
 static void print_counts(void)
 {
 	printk("(%d %d %d %d %d) ",
@@ -185,7 +180,7 @@ static void reset_counts(void)
 	trace_selftest_test_dyn_cnt = 0;
 }
 
-static int trace_selftest_ops(int cnt)
+static int trace_selftest_ops(struct trace_array *tr, int cnt)
 {
 	int save_ftrace_enabled = ftrace_enabled;
 	struct ftrace_ops *dyn_ops;
@@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt)
 	register_ftrace_function(&test_probe1);
 	register_ftrace_function(&test_probe2);
 	register_ftrace_function(&test_probe3);
-	register_ftrace_function(&test_global);
+	/* First time we are running with main function */
+	if (cnt > 1) {
+		ftrace_init_array_ops(tr, trace_selftest_test_global_func);
+		register_ftrace_function(tr->ops);
+	}
 
 	DYN_FTRACE_TEST_NAME();
 
@@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt)
 		goto out;
 	if (trace_selftest_test_probe3_cnt != 1)
 		goto out;
-	if (trace_selftest_test_global_cnt == 0)
-		goto out;
+	if (cnt > 1) {
+		if (trace_selftest_test_global_cnt == 0)
+			goto out;
+	}
 
 	DYN_FTRACE_TEST_NAME2();
 
@@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt)
 		goto out_free;
 	if (trace_selftest_test_probe3_cnt != 3)
 		goto out_free;
-	if (trace_selftest_test_global_cnt == 0)
-		goto out;
+	if (cnt > 1) {
+		if (trace_selftest_test_global_cnt == 0)
+			goto out;
+	}
 	if (trace_selftest_test_dyn_cnt == 0)
 		goto out_free;
 
@@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt)
 	unregister_ftrace_function(&test_probe1);
 	unregister_ftrace_function(&test_probe2);
 	unregister_ftrace_function(&test_probe3);
-	unregister_ftrace_function(&test_global);
+	if (cnt > 1)
+		unregister_ftrace_function(tr->ops);
+	ftrace_reset_array_ops(tr);
 
 	/* Make sure everything is off */
 	reset_counts();
@@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 	}
 
 	/* Test the ops with global tracing running */
-	ret = trace_selftest_ops(1);
+	ret = trace_selftest_ops(tr, 1);
 	trace->reset(tr);
 
  out:
@@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 
 	/* Test the ops with global tracing off */
 	if (!ret)
-		ret = trace_selftest_ops(2);
+		ret = trace_selftest_ops(tr, 2);
 
 	return ret;
 }
-- 
cgit 


From 4525beeb9aadbb9e1cb3e9e135f4371553f26a70 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 16 Apr 2014 15:20:44 -0500
Subject: usb: phy: rename usb_nop_xceiv to usb_phy_generic

no functional changes, just renaming the function
in order to make it slightly clearer what it should
be used for, also matching the driver name.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-omap2/usb-host.c        |  8 +++---
 drivers/usb/dwc3/dwc3-exynos.c        |  6 ++---
 drivers/usb/dwc3/dwc3-pci.c           |  6 ++---
 drivers/usb/musb/am35x.c              |  4 +--
 drivers/usb/musb/blackfin.c           |  4 +--
 drivers/usb/musb/da8xx.c              |  4 +--
 drivers/usb/musb/davinci.c            |  6 ++---
 drivers/usb/musb/tusb6010.c           |  6 ++---
 drivers/usb/phy/phy-am335x.c          |  2 +-
 drivers/usb/phy/phy-generic.c         | 50 +++++++++++++++++------------------
 drivers/usb/phy/phy-generic.h         |  6 ++---
 drivers/usb/phy/phy-keystone.c        |  2 +-
 include/linux/usb/usb_phy_gen_xceiv.h | 10 +++----
 13 files changed, 57 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c
index 10855eb4ccc1..ab983cdd3edf 100644
--- a/arch/arm/mach-omap2/usb-host.c
+++ b/arch/arm/mach-omap2/usb-host.c
@@ -349,7 +349,7 @@ static struct fixed_voltage_config hsusb_reg_config = {
 	/* .init_data filled later */
 };
 
-static const char *nop_name = "usb_phy_gen_xceiv"; /* NOP PHY driver */
+static const char *nop_name = "usb_phy_generic"; /* NOP PHY driver */
 static const char *reg_name = "reg-fixed-voltage"; /* Regulator driver */
 
 /**
@@ -435,7 +435,7 @@ int usbhs_init_phys(struct usbhs_phy_data *phy, int num_phys)
 	struct platform_device *pdev;
 	char *phy_id;
 	struct platform_device_info pdevinfo;
-	struct usb_phy_gen_xceiv_platform_data nop_pdata;
+	struct usb_phy_generic_platform_data nop_pdata;
 
 	for (i = 0; i < num_phys; i++) {
 
@@ -469,8 +469,8 @@ int usbhs_init_phys(struct usbhs_phy_data *phy, int num_phys)
 		pdevinfo.id = phy->port;
 		pdevinfo.data = &nop_pdata;
 		pdevinfo.size_data =
-			sizeof(struct usb_phy_gen_xceiv_platform_data);
-		scnprintf(phy_id, MAX_STR, "usb_phy_gen_xceiv.%d",
+			sizeof(struct usb_phy_generic_platform_data);
+		scnprintf(phy_id, MAX_STR, "usb_phy_generic.%d",
 					phy->port);
 		pdev = platform_device_register_full(&pdevinfo);
 		if (IS_ERR(pdev)) {
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index 28c8ad79f5e6..821cc59e6e1d 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -38,13 +38,13 @@ struct dwc3_exynos {
 
 static int dwc3_exynos_register_phys(struct dwc3_exynos *exynos)
 {
-	struct usb_phy_gen_xceiv_platform_data pdata;
+	struct usb_phy_generic_platform_data pdata;
 	struct platform_device	*pdev;
 	int			ret;
 
 	memset(&pdata, 0x00, sizeof(pdata));
 
-	pdev = platform_device_alloc("usb_phy_gen_xceiv", PLATFORM_DEVID_AUTO);
+	pdev = platform_device_alloc("usb_phy_generic", PLATFORM_DEVID_AUTO);
 	if (!pdev)
 		return -ENOMEM;
 
@@ -56,7 +56,7 @@ static int dwc3_exynos_register_phys(struct dwc3_exynos *exynos)
 	if (ret)
 		goto err1;
 
-	pdev = platform_device_alloc("usb_phy_gen_xceiv", PLATFORM_DEVID_AUTO);
+	pdev = platform_device_alloc("usb_phy_generic", PLATFORM_DEVID_AUTO);
 	if (!pdev) {
 		ret = -ENOMEM;
 		goto err1;
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index f393c183cc69..8b162f0e293c 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -40,13 +40,13 @@ struct dwc3_pci {
 
 static int dwc3_pci_register_phys(struct dwc3_pci *glue)
 {
-	struct usb_phy_gen_xceiv_platform_data pdata;
+	struct usb_phy_generic_platform_data pdata;
 	struct platform_device	*pdev;
 	int			ret;
 
 	memset(&pdata, 0x00, sizeof(pdata));
 
-	pdev = platform_device_alloc("usb_phy_gen_xceiv", 0);
+	pdev = platform_device_alloc("usb_phy_generic", 0);
 	if (!pdev)
 		return -ENOMEM;
 
@@ -58,7 +58,7 @@ static int dwc3_pci_register_phys(struct dwc3_pci *glue)
 	if (ret)
 		goto err1;
 
-	pdev = platform_device_alloc("usb_phy_gen_xceiv", 1);
+	pdev = platform_device_alloc("usb_phy_generic", 1);
 	if (!pdev) {
 		ret = -ENOMEM;
 		goto err1;
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index b3aa0184af9a..77ed66427969 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -360,7 +360,7 @@ static int am35x_musb_init(struct musb *musb)
 	if (!rev)
 		return -ENODEV;
 
-	usb_nop_xceiv_register();
+	usb_phy_generic_register();
 	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
 	if (IS_ERR_OR_NULL(musb->xceiv))
 		return -EPROBE_DEFER;
@@ -402,7 +402,7 @@ static int am35x_musb_exit(struct musb *musb)
 		data->set_phy_power(0);
 
 	usb_put_phy(musb->xceiv);
-	usb_nop_xceiv_unregister();
+	usb_phy_generic_unregister();
 
 	return 0;
 }
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 796677fa9a15..607f3ae04591 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -401,7 +401,7 @@ static int bfin_musb_init(struct musb *musb)
 	}
 	gpio_direction_output(musb->config->gpio_vrsel, 0);
 
-	usb_nop_xceiv_register();
+	usb_phy_generic_register();
 	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
 	if (IS_ERR_OR_NULL(musb->xceiv)) {
 		gpio_free(musb->config->gpio_vrsel);
@@ -426,7 +426,7 @@ static int bfin_musb_exit(struct musb *musb)
 	gpio_free(musb->config->gpio_vrsel);
 
 	usb_put_phy(musb->xceiv);
-	usb_nop_xceiv_unregister();
+	usb_phy_generic_unregister();
 	return 0;
 }
 
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index e3486de71995..bcdce8e64670 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -418,7 +418,7 @@ static int da8xx_musb_init(struct musb *musb)
 	if (!rev)
 		goto fail;
 
-	usb_nop_xceiv_register();
+	usb_phy_generic_register();
 	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
 	if (IS_ERR_OR_NULL(musb->xceiv)) {
 		ret = -EPROBE_DEFER;
@@ -453,7 +453,7 @@ static int da8xx_musb_exit(struct musb *musb)
 	phy_off();
 
 	usb_put_phy(musb->xceiv);
-	usb_nop_xceiv_unregister();
+	usb_phy_generic_unregister();
 
 	return 0;
 }
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index c259dac9d056..c0e07eddb079 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -381,7 +381,7 @@ static int davinci_musb_init(struct musb *musb)
 	u32		revision;
 	int 		ret = -ENODEV;
 
-	usb_nop_xceiv_register();
+	usb_phy_generic_register();
 	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
 	if (IS_ERR_OR_NULL(musb->xceiv)) {
 		ret = -EPROBE_DEFER;
@@ -439,7 +439,7 @@ static int davinci_musb_init(struct musb *musb)
 fail:
 	usb_put_phy(musb->xceiv);
 unregister:
-	usb_nop_xceiv_unregister();
+	usb_phy_generic_unregister();
 	return ret;
 }
 
@@ -487,7 +487,7 @@ static int davinci_musb_exit(struct musb *musb)
 	phy_off();
 
 	usb_put_phy(musb->xceiv);
-	usb_nop_xceiv_unregister();
+	usb_phy_generic_unregister();
 
 	return 0;
 }
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 4e9fb1d08698..0c0f5ee1e3f1 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -1065,7 +1065,7 @@ static int tusb_musb_init(struct musb *musb)
 	void __iomem		*sync = NULL;
 	int			ret;
 
-	usb_nop_xceiv_register();
+	usb_phy_generic_register();
 	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
 	if (IS_ERR_OR_NULL(musb->xceiv))
 		return -EPROBE_DEFER;
@@ -1117,7 +1117,7 @@ done:
 			iounmap(sync);
 
 		usb_put_phy(musb->xceiv);
-		usb_nop_xceiv_unregister();
+		usb_phy_generic_unregister();
 	}
 	return ret;
 }
@@ -1133,7 +1133,7 @@ static int tusb_musb_exit(struct musb *musb)
 	iounmap(musb->sync_va);
 
 	usb_put_phy(musb->xceiv);
-	usb_nop_xceiv_unregister();
+	usb_phy_generic_unregister();
 	return 0;
 }
 
diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c
index 12fc3468a01e..bb866e466051 100644
--- a/drivers/usb/phy/phy-am335x.c
+++ b/drivers/usb/phy/phy-am335x.c
@@ -13,7 +13,7 @@
 #include "phy-generic.h"
 
 struct am335x_phy {
-	struct usb_phy_gen_xceiv usb_phy_gen;
+	struct usb_phy_generic usb_phy_gen;
 	struct phy_control *phy_ctrl;
 	int id;
 };
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 95e70bc384c2..e76ca4ca3a8a 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -43,32 +43,32 @@
 
 static struct platform_device *pd;
 
-void usb_nop_xceiv_register(void)
+void usb_phy_generic_register(void)
 {
 	if (pd)
 		return;
-	pd = platform_device_register_simple("usb_phy_gen_xceiv", -1, NULL, 0);
+	pd = platform_device_register_simple("usb_phy_generic", -1, NULL, 0);
 	if (IS_ERR(pd)) {
 		pr_err("Unable to register generic usb transceiver\n");
 		pd = NULL;
 		return;
 	}
 }
-EXPORT_SYMBOL_GPL(usb_nop_xceiv_register);
+EXPORT_SYMBOL_GPL(usb_phy_generic_register);
 
-void usb_nop_xceiv_unregister(void)
+void usb_phy_generic_unregister(void)
 {
 	platform_device_unregister(pd);
 	pd = NULL;
 }
-EXPORT_SYMBOL_GPL(usb_nop_xceiv_unregister);
+EXPORT_SYMBOL_GPL(usb_phy_generic_unregister);
 
 static int nop_set_suspend(struct usb_phy *x, int suspend)
 {
 	return 0;
 }
 
-static void nop_reset_set(struct usb_phy_gen_xceiv *nop, int asserted)
+static void nop_reset_set(struct usb_phy_generic *nop, int asserted)
 {
 	int value;
 
@@ -87,7 +87,7 @@ static void nop_reset_set(struct usb_phy_gen_xceiv *nop, int asserted)
 
 int usb_gen_phy_init(struct usb_phy *phy)
 {
-	struct usb_phy_gen_xceiv *nop = dev_get_drvdata(phy->dev);
+	struct usb_phy_generic *nop = dev_get_drvdata(phy->dev);
 
 	if (!IS_ERR(nop->vcc)) {
 		if (regulator_enable(nop->vcc))
@@ -106,7 +106,7 @@ EXPORT_SYMBOL_GPL(usb_gen_phy_init);
 
 void usb_gen_phy_shutdown(struct usb_phy *phy)
 {
-	struct usb_phy_gen_xceiv *nop = dev_get_drvdata(phy->dev);
+	struct usb_phy_generic *nop = dev_get_drvdata(phy->dev);
 
 	/* Assert RESET */
 	nop_reset_set(nop, 1);
@@ -150,8 +150,8 @@ static int nop_set_host(struct usb_otg *otg, struct usb_bus *host)
 	return 0;
 }
 
-int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_gen_xceiv *nop,
-		struct usb_phy_gen_xceiv_platform_data *pdata)
+int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop,
+		struct usb_phy_generic_platform_data *pdata)
 {
 	enum usb_phy_type type = USB_PHY_TYPE_USB2;
 	int err;
@@ -245,10 +245,10 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_gen_xceiv *nop,
 }
 EXPORT_SYMBOL_GPL(usb_phy_gen_create_phy);
 
-static int usb_phy_gen_xceiv_probe(struct platform_device *pdev)
+static int usb_phy_generic_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct usb_phy_gen_xceiv	*nop;
+	struct usb_phy_generic	*nop;
 	int err;
 
 	nop = devm_kzalloc(dev, sizeof(*nop), GFP_KERNEL);
@@ -274,9 +274,9 @@ static int usb_phy_gen_xceiv_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int usb_phy_gen_xceiv_remove(struct platform_device *pdev)
+static int usb_phy_generic_remove(struct platform_device *pdev)
 {
-	struct usb_phy_gen_xceiv *nop = platform_get_drvdata(pdev);
+	struct usb_phy_generic *nop = platform_get_drvdata(pdev);
 
 	usb_remove_phy(&nop->phy);
 
@@ -290,29 +290,29 @@ static const struct of_device_id nop_xceiv_dt_ids[] = {
 
 MODULE_DEVICE_TABLE(of, nop_xceiv_dt_ids);
 
-static struct platform_driver usb_phy_gen_xceiv_driver = {
-	.probe		= usb_phy_gen_xceiv_probe,
-	.remove		= usb_phy_gen_xceiv_remove,
+static struct platform_driver usb_phy_generic_driver = {
+	.probe		= usb_phy_generic_probe,
+	.remove		= usb_phy_generic_remove,
 	.driver		= {
-		.name	= "usb_phy_gen_xceiv",
+		.name	= "usb_phy_generic",
 		.owner	= THIS_MODULE,
 		.of_match_table = nop_xceiv_dt_ids,
 	},
 };
 
-static int __init usb_phy_gen_xceiv_init(void)
+static int __init usb_phy_generic_init(void)
 {
-	return platform_driver_register(&usb_phy_gen_xceiv_driver);
+	return platform_driver_register(&usb_phy_generic_driver);
 }
-subsys_initcall(usb_phy_gen_xceiv_init);
+subsys_initcall(usb_phy_generic_init);
 
-static void __exit usb_phy_gen_xceiv_exit(void)
+static void __exit usb_phy_generic_exit(void)
 {
-	platform_driver_unregister(&usb_phy_gen_xceiv_driver);
+	platform_driver_unregister(&usb_phy_generic_driver);
 }
-module_exit(usb_phy_gen_xceiv_exit);
+module_exit(usb_phy_generic_exit);
 
-MODULE_ALIAS("platform:usb_phy_gen_xceiv");
+MODULE_ALIAS("platform:usb_phy_generic");
 MODULE_AUTHOR("Texas Instruments Inc");
 MODULE_DESCRIPTION("NOP USB Transceiver driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/usb/phy/phy-generic.h b/drivers/usb/phy/phy-generic.h
index 38a81f307b82..f32450ada12d 100644
--- a/drivers/usb/phy/phy-generic.h
+++ b/drivers/usb/phy/phy-generic.h
@@ -3,7 +3,7 @@
 
 #include <linux/usb/usb_phy_gen_xceiv.h>
 
-struct usb_phy_gen_xceiv {
+struct usb_phy_generic {
 	struct usb_phy phy;
 	struct device *dev;
 	struct clk *clk;
@@ -15,7 +15,7 @@ struct usb_phy_gen_xceiv {
 int usb_gen_phy_init(struct usb_phy *phy);
 void usb_gen_phy_shutdown(struct usb_phy *phy);
 
-int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_gen_xceiv *nop,
-		struct usb_phy_gen_xceiv_platform_data *pdata);
+int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop,
+		struct usb_phy_generic_platform_data *pdata);
 
 #endif
diff --git a/drivers/usb/phy/phy-keystone.c b/drivers/usb/phy/phy-keystone.c
index d762003896c0..2404c442c302 100644
--- a/drivers/usb/phy/phy-keystone.c
+++ b/drivers/usb/phy/phy-keystone.c
@@ -35,7 +35,7 @@
 #define PHY_REF_SSP_EN			BIT(29)
 
 struct keystone_usbphy {
-	struct usb_phy_gen_xceiv	usb_phy_gen;
+	struct usb_phy_generic	usb_phy_gen;
 	void __iomem			*phy_ctrl;
 };
 
diff --git a/include/linux/usb/usb_phy_gen_xceiv.h b/include/linux/usb/usb_phy_gen_xceiv.h
index cc8d818a83be..c00176d48625 100644
--- a/include/linux/usb/usb_phy_gen_xceiv.h
+++ b/include/linux/usb/usb_phy_gen_xceiv.h
@@ -3,7 +3,7 @@
 
 #include <linux/usb/otg.h>
 
-struct usb_phy_gen_xceiv_platform_data {
+struct usb_phy_generic_platform_data {
 	enum usb_phy_type type;
 	unsigned long clk_rate;
 
@@ -15,14 +15,14 @@ struct usb_phy_gen_xceiv_platform_data {
 
 #if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
 /* sometimes transceivers are accessed only through e.g. ULPI */
-extern void usb_nop_xceiv_register(void);
-extern void usb_nop_xceiv_unregister(void);
+extern void usb_phy_generic_register(void);
+extern void usb_phy_generic_unregister(void);
 #else
-static inline void usb_nop_xceiv_register(void)
+static inline void usb_phy_generic_register(void)
 {
 }
 
-static inline void usb_nop_xceiv_unregister(void)
+static inline void usb_phy_generic_unregister(void)
 {
 }
 #endif
-- 
cgit 


From d7078df6be6e9e5e3ac354859f5b8d60114391b4 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 16 Apr 2014 15:28:32 -0500
Subject: usb: phy: rename <linux/usb/usb_phy_gen_xceiv.h> to
 <linux/usb/usb_phy_generic.h>

now that all functions match the driver name,
the only missing piece is to rename the header
file itself.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-omap2/board-omap3beagle.c |  1 -
 arch/arm/mach-omap2/usb-host.c          |  2 +-
 drivers/usb/dwc3/dwc3-exynos.c          |  2 +-
 drivers/usb/dwc3/dwc3-pci.c             |  2 +-
 drivers/usb/musb/am35x.c                |  2 +-
 drivers/usb/musb/blackfin.c             |  2 +-
 drivers/usb/musb/da8xx.c                |  2 +-
 drivers/usb/musb/davinci.c              |  2 +-
 drivers/usb/musb/musb_dsps.c            |  2 +-
 drivers/usb/musb/tusb6010.c             |  2 +-
 drivers/usb/phy/phy-am335x.c            |  2 +-
 drivers/usb/phy/phy-generic.c           |  2 +-
 drivers/usb/phy/phy-generic.h           |  2 +-
 drivers/usb/phy/phy-keystone.c          |  2 +-
 include/linux/usb/usb_phy_gen_xceiv.h   | 30 ------------------------------
 include/linux/usb/usb_phy_generic.h     | 30 ++++++++++++++++++++++++++++++
 16 files changed, 43 insertions(+), 44 deletions(-)
 delete mode 100644 include/linux/usb/usb_phy_gen_xceiv.h
 create mode 100644 include/linux/usb/usb_phy_generic.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-omap3beagle.c b/arch/arm/mach-omap2/board-omap3beagle.c
index d6ed819ff15c..660bfc5a70d7 100644
--- a/arch/arm/mach-omap2/board-omap3beagle.c
+++ b/arch/arm/mach-omap2/board-omap3beagle.c
@@ -33,7 +33,6 @@
 #include <linux/mtd/nand.h>
 #include <linux/mmc/host.h>
 #include <linux/usb/phy.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
 
 #include <linux/regulator/machine.h>
 #include <linux/i2c/twl.h>
diff --git a/arch/arm/mach-omap2/usb-host.c b/arch/arm/mach-omap2/usb-host.c
index ab983cdd3edf..745367c0c2bb 100644
--- a/arch/arm/mach-omap2/usb-host.c
+++ b/arch/arm/mach-omap2/usb-host.c
@@ -28,7 +28,7 @@
 #include <linux/io.h>
 #include <linux/gpio.h>
 #include <linux/usb/phy.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 #include "soc.h"
 #include "omap_device.h"
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index 821cc59e6e1d..ed22d722884e 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -24,7 +24,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/clk.h>
 #include <linux/usb/otg.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 8b162f0e293c..1ed95e0386eb 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -23,7 +23,7 @@
 #include <linux/platform_device.h>
 
 #include <linux/usb/otg.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 /* FIXME define these in <linux/pci_ids.h> */
 #define PCI_VENDOR_ID_SYNOPSYS		0x16c3
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index 77ed66427969..044cd824c70d 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -32,7 +32,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 #include <linux/platform_data/usb-omap.h>
 
 #include "musb_core.h"
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 607f3ae04591..c9992a2eaaa8 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -18,7 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/prefetch.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 #include <asm/cacheflush.h>
 
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index bcdce8e64670..a0dabb05de76 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -32,7 +32,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 #include <mach/da8xx.h>
 #include <linux/platform_data/usb-davinci.h>
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index c0e07eddb079..737035457858 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -32,7 +32,7 @@
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 #include <mach/cputype.h>
 #include <mach/hardware.h>
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 3372ded5def7..18882924d9d5 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -35,7 +35,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 #include <linux/platform_data/usb-omap.h>
 #include <linux/sizes.h>
 
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 0c0f5ee1e3f1..8d4a8194c8f2 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -24,7 +24,7 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 #include "musb_core.h"
 
diff --git a/drivers/usb/phy/phy-am335x.c b/drivers/usb/phy/phy-am335x.c
index bb866e466051..585e50cb1980 100644
--- a/drivers/usb/phy/phy-am335x.c
+++ b/drivers/usb/phy/phy-am335x.c
@@ -2,7 +2,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/otg.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/regulator/consumer.h>
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index e76ca4ca3a8a..2c49cd8f6d25 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -30,7 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb/otg.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 #include <linux/slab.h>
 #include <linux/clk.h>
 #include <linux/regulator/consumer.h>
diff --git a/drivers/usb/phy/phy-generic.h b/drivers/usb/phy/phy-generic.h
index f32450ada12d..d8feacc0b7fb 100644
--- a/drivers/usb/phy/phy-generic.h
+++ b/drivers/usb/phy/phy-generic.h
@@ -1,7 +1,7 @@
 #ifndef _PHY_GENERIC_H_
 #define _PHY_GENERIC_H_
 
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 
 struct usb_phy_generic {
 	struct usb_phy phy;
diff --git a/drivers/usb/phy/phy-keystone.c b/drivers/usb/phy/phy-keystone.c
index 2404c442c302..f4d722de912b 100644
--- a/drivers/usb/phy/phy-keystone.c
+++ b/drivers/usb/phy/phy-keystone.c
@@ -18,7 +18,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/usb/usb_phy_gen_xceiv.h>
+#include <linux/usb/usb_phy_generic.h>
 #include <linux/io.h>
 #include <linux/of.h>
 
diff --git a/include/linux/usb/usb_phy_gen_xceiv.h b/include/linux/usb/usb_phy_gen_xceiv.h
deleted file mode 100644
index c00176d48625..000000000000
--- a/include/linux/usb/usb_phy_gen_xceiv.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef __LINUX_USB_NOP_XCEIV_H
-#define __LINUX_USB_NOP_XCEIV_H
-
-#include <linux/usb/otg.h>
-
-struct usb_phy_generic_platform_data {
-	enum usb_phy_type type;
-	unsigned long clk_rate;
-
-	/* if set fails with -EPROBE_DEFER if can't get regulator */
-	unsigned int needs_vcc:1;
-	unsigned int needs_reset:1;	/* deprecated */
-	int gpio_reset;
-};
-
-#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
-/* sometimes transceivers are accessed only through e.g. ULPI */
-extern void usb_phy_generic_register(void);
-extern void usb_phy_generic_unregister(void);
-#else
-static inline void usb_phy_generic_register(void)
-{
-}
-
-static inline void usb_phy_generic_unregister(void)
-{
-}
-#endif
-
-#endif /* __LINUX_USB_NOP_XCEIV_H */
diff --git a/include/linux/usb/usb_phy_generic.h b/include/linux/usb/usb_phy_generic.h
new file mode 100644
index 000000000000..c00176d48625
--- /dev/null
+++ b/include/linux/usb/usb_phy_generic.h
@@ -0,0 +1,30 @@
+#ifndef __LINUX_USB_NOP_XCEIV_H
+#define __LINUX_USB_NOP_XCEIV_H
+
+#include <linux/usb/otg.h>
+
+struct usb_phy_generic_platform_data {
+	enum usb_phy_type type;
+	unsigned long clk_rate;
+
+	/* if set fails with -EPROBE_DEFER if can't get regulator */
+	unsigned int needs_vcc:1;
+	unsigned int needs_reset:1;	/* deprecated */
+	int gpio_reset;
+};
+
+#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
+/* sometimes transceivers are accessed only through e.g. ULPI */
+extern void usb_phy_generic_register(void);
+extern void usb_phy_generic_unregister(void);
+#else
+static inline void usb_phy_generic_register(void)
+{
+}
+
+static inline void usb_phy_generic_unregister(void)
+{
+}
+#endif
+
+#endif /* __LINUX_USB_NOP_XCEIV_H */
-- 
cgit 


From 2f36ff6915c6c00df8b9962d9c6c7992befcf8ce Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Wed, 16 Apr 2014 16:16:33 -0500
Subject: usb: phy: generic: allow multiples calls to
 usb_phy_generic_register()

it's now very easy to return a platform_device pointer
and have the caller pass it as argument when calling
usb_phy_generic_unregister().

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/musb/am35x.c            | 12 +++++++++---
 drivers/usb/musb/blackfin.c         | 10 ++++++++--
 drivers/usb/musb/da8xx.c            | 14 +++++++++++---
 drivers/usb/musb/tusb6010.c         |  3 ++-
 drivers/usb/phy/phy-generic.c       | 19 +++++--------------
 include/linux/usb/usb_phy_generic.h |  9 +++++----
 6 files changed, 40 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index 05459b56b2a8..0a34dd859555 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -85,6 +85,7 @@
 struct am35x_glue {
 	struct device		*dev;
 	struct platform_device	*musb;
+	struct platform_device	*phy;
 	struct clk		*phy_clk;
 	struct clk		*clk;
 };
@@ -503,7 +504,9 @@ static int am35x_probe(struct platform_device *pdev)
 
 	pdata->platform_ops		= &am35x_ops;
 
-	usb_phy_generic_register();
+	glue->phy = usb_phy_generic_register();
+	if (IS_ERR(glue->phy))
+		goto err7;
 	platform_set_drvdata(pdev, glue);
 
 	pinfo = am35x_dev_info;
@@ -517,11 +520,14 @@ static int am35x_probe(struct platform_device *pdev)
 	if (IS_ERR(musb)) {
 		ret = PTR_ERR(musb);
 		dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
-		goto err7;
+		goto err8;
 	}
 
 	return 0;
 
+err8:
+	usb_phy_generic_unregister(glue->phy);
+
 err7:
 	clk_disable(clk);
 
@@ -546,7 +552,7 @@ static int am35x_remove(struct platform_device *pdev)
 	struct am35x_glue	*glue = platform_get_drvdata(pdev);
 
 	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister();
+	usb_phy_generic_unregister(glue->phy);
 	clk_disable(glue->clk);
 	clk_disable(glue->phy_clk);
 	clk_put(glue->clk);
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 53acffe9a858..d40d5f0b5528 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -29,6 +29,7 @@
 struct bfin_glue {
 	struct device		*dev;
 	struct platform_device	*musb;
+	struct platform_device	*phy;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
@@ -475,7 +476,9 @@ static int bfin_probe(struct platform_device *pdev)
 
 	pdata->platform_ops		= &bfin_ops;
 
-	usb_phy_generic_register();
+	glue->phy = usb_phy_generic_register();
+	if (IS_ERR(glue->phy))
+		goto err2;
 	platform_set_drvdata(pdev, glue);
 
 	memset(musb_resources, 0x00, sizeof(*musb_resources) *
@@ -513,6 +516,9 @@ static int bfin_probe(struct platform_device *pdev)
 	return 0;
 
 err3:
+	usb_phy_generic_unregister(glue->phy);
+
+err2:
 	platform_device_put(musb);
 
 err1:
@@ -527,7 +533,7 @@ static int bfin_remove(struct platform_device *pdev)
 	struct bfin_glue		*glue = platform_get_drvdata(pdev);
 
 	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister();
+	usb_phy_generic_unregister(glue->phy);
 	kfree(glue);
 
 	return 0;
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 024751f9b31d..058775e647ad 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -85,6 +85,7 @@
 struct da8xx_glue {
 	struct device		*dev;
 	struct platform_device	*musb;
+	struct platform_device	*phy;
 	struct clk		*clk;
 };
 
@@ -510,7 +511,11 @@ static int da8xx_probe(struct platform_device *pdev)
 
 	pdata->platform_ops		= &da8xx_ops;
 
-	usb_phy_generic_register();
+	glue->phy = usb_phy_generic_register();
+	if (IS_ERR(glue->phy)) {
+		ret = PTR_ERR(glue->phy);
+		goto err5;
+	}
 	platform_set_drvdata(pdev, glue);
 
 	memset(musb_resources, 0x00, sizeof(*musb_resources) *
@@ -537,11 +542,14 @@ static int da8xx_probe(struct platform_device *pdev)
 	if (IS_ERR(musb)) {
 		ret = PTR_ERR(musb);
 		dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
-		goto err5;
+		goto err6;
 	}
 
 	return 0;
 
+err6:
+	usb_phy_generic_unregister(glue->phy);
+
 err5:
 	clk_disable(clk);
 
@@ -560,7 +568,7 @@ static int da8xx_remove(struct platform_device *pdev)
 	struct da8xx_glue		*glue = platform_get_drvdata(pdev);
 
 	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister();
+	usb_phy_generic_unregister(glue->phy);
 	clk_disable(glue->clk);
 	clk_put(glue->clk);
 	kfree(glue);
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index e1da199c6f21..f38a8dbd6075 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -31,6 +31,7 @@
 struct tusb6010_glue {
 	struct device		*dev;
 	struct platform_device	*musb;
+	struct platform_device	*phy;
 };
 
 static void tusb_musb_set_vbus(struct musb *musb, int is_on);
@@ -1222,7 +1223,7 @@ static int tusb_remove(struct platform_device *pdev)
 	struct tusb6010_glue		*glue = platform_get_drvdata(pdev);
 
 	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister();
+	usb_phy_generic_unregister(glue->phy);
 	kfree(glue);
 
 	return 0;
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 2c49cd8f6d25..7594e5069ae5 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -41,25 +41,16 @@
 
 #include "phy-generic.h"
 
-static struct platform_device *pd;
-
-void usb_phy_generic_register(void)
+struct platform_device *usb_phy_generic_register(void)
 {
-	if (pd)
-		return;
-	pd = platform_device_register_simple("usb_phy_generic", -1, NULL, 0);
-	if (IS_ERR(pd)) {
-		pr_err("Unable to register generic usb transceiver\n");
-		pd = NULL;
-		return;
-	}
+	return platform_device_register_simple("usb_phy_generic",
+			PLATFORM_DEVID_AUTO, NULL, 0);
 }
 EXPORT_SYMBOL_GPL(usb_phy_generic_register);
 
-void usb_phy_generic_unregister(void)
+void usb_phy_generic_unregister(struct platform_device *pdev)
 {
-	platform_device_unregister(pd);
-	pd = NULL;
+	platform_device_unregister(pdev);
 }
 EXPORT_SYMBOL_GPL(usb_phy_generic_unregister);
 
diff --git a/include/linux/usb/usb_phy_generic.h b/include/linux/usb/usb_phy_generic.h
index c00176d48625..8346bcc50c2f 100644
--- a/include/linux/usb/usb_phy_generic.h
+++ b/include/linux/usb/usb_phy_generic.h
@@ -15,14 +15,15 @@ struct usb_phy_generic_platform_data {
 
 #if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
 /* sometimes transceivers are accessed only through e.g. ULPI */
-extern void usb_phy_generic_register(void);
-extern void usb_phy_generic_unregister(void);
+extern struct platform_device *usb_phy_generic_register(void);
+extern void usb_phy_generic_unregister(struct platform_device *);
 #else
-static inline void usb_phy_generic_register(void)
+static inline struct platform_device *usb_phy_generic_register(void)
 {
+	return NULL;
 }
 
-static inline void usb_phy_generic_unregister(void)
+static inline void usb_phy_generic_unregister(struct platform_device *pdev)
 {
 }
 #endif
-- 
cgit 


From dca769bd5a76e9e634cc36987760306846153cac Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 21 Apr 2014 10:50:35 -0500
Subject: usb: phy: generic: switch over to IS_ENABLED()

when checking if our generic PHY is enabled,
it's a lot easier to use IS_ENABLED() instead
of manually checking for it. While at that, also
remove the bogus defined(MODULE) at the end of
the line.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/usb_phy_generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/usb_phy_generic.h b/include/linux/usb/usb_phy_generic.h
index 8346bcc50c2f..68adae83affc 100644
--- a/include/linux/usb/usb_phy_generic.h
+++ b/include/linux/usb/usb_phy_generic.h
@@ -13,7 +13,7 @@ struct usb_phy_generic_platform_data {
 	int gpio_reset;
 };
 
-#if defined(CONFIG_NOP_USB_XCEIV) || (defined(CONFIG_NOP_USB_XCEIV_MODULE) && defined(MODULE))
+#if IS_ENABLED(CONFIG_NOP_USB_XCEIV)
 /* sometimes transceivers are accessed only through e.g. ULPI */
 extern struct platform_device *usb_phy_generic_register(void);
 extern void usb_phy_generic_unregister(struct platform_device *);
-- 
cgit 


From 68957303f44a501af5cf37913208a2acaa6bcdf1 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 25 Mar 2014 06:51:47 +0100
Subject: NFC: ST21NFCA: Add driver for STMicroelectronics ST21NFCA NFC Chip

Add driver for STMicroelectronics ST21NFCA NFC controller.
ST21NFCA is using HCI protocol, shdlc as LLC layer & I2C as
communication protocol.

Adding support for Reader/Writer mode with Tag type 1/2/3/4 A & B.
It is using proprietary gate 15 for ISO14443-3 such as type 1 &
type 2 tags. It is using proprietary gate 14 for type F tags.
ST21NFCA_DEVICE_MGNT_GATE gives access to proprietary CLF configuration.
Standard gate for ISO14443-4 A (13) & B (11) are also used.

ST21NFCA specific mecanism:

One particular point to notice for the data handling is that frame
does not contain any length value. Therefore the i2c part of this driver
is managing the reception with a read length sequence until the end of
frame (0x7e) is reached.

In order to avoid conflict between sof & eof a mecanism
called byte stuffing concist of an escape byte (0x7d) insertion before
special byte (0x7e, 0x7d). The special byte is then xored with 0x20.

In this driver, When data are available in the CLF, the interrupt
gpio is driven to active state and triggered an interrupt.
Once the i2c_master_recv start, the interrupt gpio is driven to idle
state until its complete. If the frame is incomplete or data are still
available, interrupts will be triggered again.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/Kconfig                    |   1 +
 drivers/nfc/Makefile                   |   1 +
 drivers/nfc/st21nfca/Kconfig           |  23 ++
 drivers/nfc/st21nfca/Makefile          |   8 +
 drivers/nfc/st21nfca/i2c.c             | 595 +++++++++++++++++++++++++++++++++
 drivers/nfc/st21nfca/st21nfca.c        | 506 ++++++++++++++++++++++++++++
 drivers/nfc/st21nfca/st21nfca.h        |  87 +++++
 include/linux/platform_data/st21nfca.h |  32 ++
 8 files changed, 1253 insertions(+)
 create mode 100644 drivers/nfc/st21nfca/Kconfig
 create mode 100644 drivers/nfc/st21nfca/Makefile
 create mode 100644 drivers/nfc/st21nfca/i2c.c
 create mode 100644 drivers/nfc/st21nfca/st21nfca.c
 create mode 100644 drivers/nfc/st21nfca/st21nfca.h
 create mode 100644 include/linux/platform_data/st21nfca.h

(limited to 'include/linux')

diff --git a/drivers/nfc/Kconfig b/drivers/nfc/Kconfig
index 65d4ca19d132..26c66a126551 100644
--- a/drivers/nfc/Kconfig
+++ b/drivers/nfc/Kconfig
@@ -71,5 +71,6 @@ config NFC_PORT100
 source "drivers/nfc/pn544/Kconfig"
 source "drivers/nfc/microread/Kconfig"
 source "drivers/nfc/nfcmrvl/Kconfig"
+source "drivers/nfc/st21nfca/Kconfig"
 
 endmenu
diff --git a/drivers/nfc/Makefile b/drivers/nfc/Makefile
index ae42a3fa60c9..23225b0287fd 100644
--- a/drivers/nfc/Makefile
+++ b/drivers/nfc/Makefile
@@ -11,5 +11,6 @@ obj-$(CONFIG_NFC_SIM)		+= nfcsim.o
 obj-$(CONFIG_NFC_PORT100)	+= port100.o
 obj-$(CONFIG_NFC_MRVL)		+= nfcmrvl/
 obj-$(CONFIG_NFC_TRF7970A)	+= trf7970a.o
+obj-$(CONFIG_NFC_ST21NFCA)  += st21nfca/
 
 ccflags-$(CONFIG_NFC_DEBUG) := -DDEBUG
diff --git a/drivers/nfc/st21nfca/Kconfig b/drivers/nfc/st21nfca/Kconfig
new file mode 100644
index 000000000000..ee459f066ade
--- /dev/null
+++ b/drivers/nfc/st21nfca/Kconfig
@@ -0,0 +1,23 @@
+config NFC_ST21NFCA
+	tristate "STMicroelectronics ST21NFCA NFC driver"
+	depends on NFC_HCI
+	select CRC_CCITT
+	default n
+	---help---
+	  STMicroelectronics ST21NFCA core driver. It implements the chipset
+	  HCI logic and hooks into the NFC kernel APIs. Physical layers will
+	  register against it.
+
+	  To compile this driver as a module, choose m here. The module will
+	  be called st21nfca.
+	  Say N if unsure.
+
+config NFC_ST21NFCA_I2C
+	tristate "NFC ST21NFCA i2c support"
+	depends on NFC_ST21NFCA && I2C && NFC_SHDLC
+	---help---
+	  This module adds support for the STMicroelectronics st21nfca i2c interface.
+	  Select this if your platform is using the i2c bus.
+
+	  If you choose to build a module, it'll be called st21nfca_i2c.
+	  Say N if unsure.
diff --git a/drivers/nfc/st21nfca/Makefile b/drivers/nfc/st21nfca/Makefile
new file mode 100644
index 000000000000..038ed093a119
--- /dev/null
+++ b/drivers/nfc/st21nfca/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for ST21NFCA HCI based NFC driver
+#
+
+st21nfca_i2c-objs  = i2c.o
+
+obj-$(CONFIG_NFC_ST21NFCA)     += st21nfca.o
+obj-$(CONFIG_NFC_ST21NFCA_I2C) += st21nfca_i2c.o
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
new file mode 100644
index 000000000000..3b0fd0f76d1c
--- /dev/null
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -0,0 +1,595 @@
+/*
+ * I2C Link Layer for ST21NFCA HCI based Driver
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/crc-ccitt.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nfc.h>
+#include <linux/firmware.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/platform_data/st21nfca.h>
+
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+#include <net/nfc/nfc.h>
+
+#include "st21nfca.h"
+
+/*
+ * Every frame starts with ST21NFCA_SOF_EOF and ends with ST21NFCA_SOF_EOF.
+ * Because ST21NFCA_SOF_EOF is a possible data value, there is a mecanism
+ * called byte stuffing has been introduced.
+ *
+ * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING
+ * - insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte)
+ * - xor byte with ST21NFCA_BYTE_STUFFING_MASK
+ */
+#define ST21NFCA_SOF_EOF		0x7e
+#define ST21NFCA_BYTE_STUFFING_MASK	0x20
+#define ST21NFCA_ESCAPE_BYTE_STUFFING	0x7d
+
+/* SOF + 00 fill size */
+#define ST21NFCA_FRAME_HEADROOM			2
+
+/* 4 bytes crc (worst case byte stuffing) + EOF */
+#define ST21NFCA_FRAME_TAILROOM 5
+
+#define ST21NFCA_HCI_I2C_DRIVER_NAME "st21nfca_hci_i2c"
+
+static struct i2c_device_id st21nfca_hci_i2c_id_table[] = {
+	{ST21NFCA_HCI_DRIVER_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, st21nfca_hci_i2c_id_table);
+
+struct st21nfca_i2c_phy {
+	struct i2c_client *i2c_dev;
+	struct nfc_hci_dev *hdev;
+
+	unsigned int gpio_ena;
+	unsigned int gpio_irq;
+	unsigned int irq_polarity;
+
+	struct sk_buff *pending_skb;
+	int current_read_len;
+	/*
+	 * crc might have fail because i2c macro
+	 * is disable due to other interface activity
+	 */
+	int crc_trials;
+
+	int powered;
+	int run_mode;
+
+	/*
+	 * < 0 if hardware error occured (e.g. i2c err)
+	 * and prevents normal operation.
+	 */
+	int hard_fault;
+};
+static u8 len_seq[] = { 13, 24, 15, 29 };
+static u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+
+#define I2C_DUMP_SKB(info, skb)					\
+do {								\
+	pr_debug("%s:\n", info);				\
+	print_hex_dump(KERN_DEBUG, "i2c: ", DUMP_PREFIX_OFFSET,	\
+		       16, 1, (skb)->data, (skb)->len, 0);	\
+} while (0)
+
+static void st21nfca_hci_platform_init(struct st21nfca_i2c_phy *phy)
+{
+	u16 wait_tab[] = { 50, 300, 1000 };
+	char reboot_cmd[] = { 0x7E, 0x66, 0x48, 0xF6, 0x7E };
+	u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE];
+	int i, r = -1;
+
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++)
+		r = i2c_master_recv(phy->i2c_dev, tmp,
+				    ST21NFCA_HCI_LLC_MAX_SIZE);
+
+	r = -1;
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++)
+		r = i2c_master_send(phy->i2c_dev, reboot_cmd,
+				    sizeof(reboot_cmd));
+	usleep_range(1000, 1500);
+
+}
+
+static int st21nfca_hci_i2c_enable(void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+
+	gpio_set_value(phy->gpio_ena, 1);
+	phy->powered = 1;
+	phy->run_mode = ST21NFCA_HCI_MODE;
+
+	usleep_range(10000, 15000);
+
+	return 0;
+}
+
+static void st21nfca_hci_i2c_disable(void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+
+	pr_info("\n");
+	gpio_set_value(phy->gpio_ena, 0);
+
+	phy->powered = 0;
+}
+
+static int st21nfca_hci_add_len_crc(struct sk_buff *skb)
+{
+	int ret = 2;
+	u16 crc;
+	u8 tmp;
+
+	*skb_push(skb, 1) = 0;
+
+	crc = crc_ccitt(0xffff, skb->data, skb->len);
+	crc = ~crc;
+
+	tmp = crc & 0x00ff;
+	*skb_put(skb, 1) = tmp;
+
+	tmp = (crc >> 8) & 0x00ff;
+	*skb_put(skb, 1) = tmp;
+
+	return ret;
+}
+
+static void st21nfca_hci_remove_len_crc(struct sk_buff *skb, int crc_len)
+{
+	skb_pull(skb, ST21NFCA_FRAME_HEADROOM);
+	skb_trim(skb, crc_len);
+}
+
+/*
+ * Writing a frame must not return the number of written bytes.
+ * It must return either zero for success, or <0 for error.
+ * In addition, it must not alter the skb
+ */
+static int st21nfca_hci_i2c_write(void *phy_id, struct sk_buff *skb)
+{
+	int r = -1, i, j, len;
+	struct st21nfca_i2c_phy *phy = phy_id;
+	struct i2c_client *client = phy->i2c_dev;
+	u16 wait_tab[] = { 2, 3, 5, 15, 20, 40};
+	u8 tmp[ST21NFCA_HCI_LLC_MAX_SIZE * 2];
+
+	I2C_DUMP_SKB("st21nfca_hci_i2c_write", skb);
+
+
+	if (phy->hard_fault != 0)
+		return phy->hard_fault;
+
+	/*
+	 * Compute CRC before byte stuffing computation on frame
+	 * Note st21nfca_hci_add_len_crc is doing a byte stuffing
+	 * on its own value
+	 */
+	len = st21nfca_hci_add_len_crc(skb);
+
+	/* add ST21NFCA_SOF_EOF on tail */
+	*skb_put(skb, 1) = ST21NFCA_SOF_EOF;
+	/* add ST21NFCA_SOF_EOF on head */
+	*skb_push(skb, 1) = ST21NFCA_SOF_EOF;
+
+	/*
+	 * Compute byte stuffing
+	 * if byte == ST21NFCA_SOF_EOF or ST21NFCA_ESCAPE_BYTE_STUFFING
+	 * insert ST21NFCA_ESCAPE_BYTE_STUFFING (escape byte)
+	 * xor byte with ST21NFCA_BYTE_STUFFING_MASK
+	 */
+	tmp[0] = skb->data[0];
+	for (i = 1, j = 1; i < skb->len - 1; i++, j++) {
+		if (skb->data[i] == ST21NFCA_SOF_EOF
+		    || skb->data[i] == ST21NFCA_ESCAPE_BYTE_STUFFING) {
+			tmp[j] = ST21NFCA_ESCAPE_BYTE_STUFFING;
+			j++;
+			tmp[j] = skb->data[i] ^ ST21NFCA_BYTE_STUFFING_MASK;
+		} else {
+			tmp[j] = skb->data[i];
+		}
+	}
+	tmp[j] = skb->data[i];
+	j++;
+
+	/*
+	 * Manage sleep mode
+	 * Try 3 times to send data with delay between each
+	 */
+	for (i = 0; i < ARRAY_SIZE(wait_tab) && r < 0; i++) {
+		r = i2c_master_send(client, tmp, j);
+		if (r < 0)
+			msleep(wait_tab[i]);
+	}
+
+	if (r >= 0) {
+		if (r != j)
+			r = -EREMOTEIO;
+		else
+			r = 0;
+	}
+
+	st21nfca_hci_remove_len_crc(skb, len);
+
+	return r;
+}
+
+static int get_frame_size(u8 *buf, int buflen)
+{
+	int len = 0;
+	if (buf[len + 1] == ST21NFCA_SOF_EOF)
+		return 0;
+
+	for (len = 1; len < buflen && buf[len] != ST21NFCA_SOF_EOF; len++)
+		;
+
+	return len;
+}
+
+static int check_crc(u8 *buf, int buflen)
+{
+	u16 crc;
+
+	crc = crc_ccitt(0xffff, buf, buflen - 2);
+	crc = ~crc;
+
+	if (buf[buflen - 2] != (crc & 0xff) || buf[buflen - 1] != (crc >> 8)) {
+		pr_err(ST21NFCA_HCI_DRIVER_NAME
+		       ": CRC error 0x%x != 0x%x 0x%x\n", crc, buf[buflen - 1],
+		       buf[buflen - 2]);
+
+		pr_info(DRIVER_DESC ": %s : BAD CRC\n", __func__);
+		print_hex_dump(KERN_DEBUG, "crc: ", DUMP_PREFIX_NONE,
+			       16, 2, buf, buflen, false);
+		return -EPERM;
+	}
+	return 0;
+}
+
+/*
+ * Prepare received data for upper layer.
+ * Received data include byte stuffing, crc and sof/eof
+ * which is not usable by hci part.
+ * returns:
+ * frame size without sof/eof, header and byte stuffing
+ * -EBADMSG : frame was incorrect and discarded
+ */
+static int st21nfca_hci_i2c_repack(struct sk_buff *skb)
+{
+	int i, j, r, size;
+	if (skb->len < 1 || (skb->len > 1 && skb->data[1] != 0))
+		return -EBADMSG;
+
+	size = get_frame_size(skb->data, skb->len);
+	if (size > 0) {
+		skb_trim(skb, size);
+		/* remove ST21NFCA byte stuffing for upper layer */
+		for (i = 1, j = 0; i < skb->len; i++) {
+			if (skb->data[i] ==
+					(u8) ST21NFCA_ESCAPE_BYTE_STUFFING) {
+				skb->data[i] =
+				    skb->data[i +
+					      1] | ST21NFCA_BYTE_STUFFING_MASK;
+				i++;
+				j++;
+			}
+			skb->data[i] = skb->data[i + j];
+		}
+		/* remove byte stuffing useless byte */
+		skb_trim(skb, i - j);
+		/* remove ST21NFCA_SOF_EOF from head */
+		skb_pull(skb, 1);
+
+		r = check_crc(skb->data, skb->len);
+		if (r != 0) {
+			i = 0;
+			return -EBADMSG;
+		}
+
+		/* remove headbyte */
+		skb_pull(skb, 1);
+		/* remove crc. Byte Stuffing is already removed here */
+		skb_trim(skb, skb->len - 2);
+		return skb->len;
+	}
+	return 0;
+}
+
+/*
+ * Reads an shdlc frame and returns it in a newly allocated sk_buff. Guarantees
+ * that i2c bus will be flushed and that next read will start on a new frame.
+ * returned skb contains only LLC header and payload.
+ * returns:
+ * frame size : if received frame is complete (find ST21NFCA_SOF_EOF at
+ * end of read)
+ * -EAGAIN : if received frame is incomplete (not find ST21NFCA_SOF_EOF
+ * at end of read)
+ * -EREMOTEIO : i2c read error (fatal)
+ * -EBADMSG : frame was incorrect and discarded
+ * (value returned from st21nfca_hci_i2c_repack)
+ * -EIO : if no ST21NFCA_SOF_EOF is found after reaching
+ * the read length end sequence
+ */
+static int st21nfca_hci_i2c_read(struct st21nfca_i2c_phy *phy,
+				 struct sk_buff *skb)
+{
+	int r, i;
+	u8 len;
+	struct i2c_client *client = phy->i2c_dev;
+
+	if (phy->current_read_len < ARRAY_SIZE(len_seq)) {
+		len = len_seq[phy->current_read_len];
+
+		/*
+		 * Add retry mecanism
+		 * Operation on I2C interface may fail in case of operation on
+		 * RF or SWP interface
+		 */
+		r = 0;
+		for (i = 0; i < ARRAY_SIZE(wait_tab) && r <= 0; i++) {
+			r = i2c_master_recv(client, skb_put(skb, len), len);
+			if (r < 0)
+				msleep(wait_tab[i]);
+		}
+
+		if (r != len) {
+			phy->current_read_len = 0;
+			return -EREMOTEIO;
+		}
+
+		if (memchr(skb->data + 2, ST21NFCA_SOF_EOF,
+				skb->len - 2) != NULL) {
+			phy->current_read_len = 0;
+			return st21nfca_hci_i2c_repack(skb);
+		}
+		phy->current_read_len++;
+		return -EAGAIN;
+	}
+	return -EIO;
+}
+
+/*
+ * Reads an shdlc frame from the chip. This is not as straightforward as it
+ * seems. The frame format is data-crc, and corruption can occur anywhere
+ * while transiting on i2c bus, such that we could read an invalid data.
+ * The tricky case is when we read a corrupted data or crc. We must detect
+ * this here in order to determine that data can be transmitted to the hci
+ * core. This is the reason why we check the crc here.
+ * The CLF will repeat a frame until we send a RR on that frame.
+ *
+ * On ST21NFCA, IRQ goes in idle when read starts. As no size information are
+ * available in the incoming data, other IRQ might come. Every IRQ will trigger
+ * a read sequence with different length and will fill the current frame.
+ * The reception is complete once we reach a ST21NFCA_SOF_EOF.
+ */
+static irqreturn_t st21nfca_hci_irq_thread_fn(int irq, void *phy_id)
+{
+	struct st21nfca_i2c_phy *phy = phy_id;
+	struct i2c_client *client;
+
+	int r;
+
+	if (!phy || irq != phy->i2c_dev->irq) {
+		WARN_ON_ONCE(1);
+		return IRQ_NONE;
+	}
+
+	client = phy->i2c_dev;
+	dev_dbg(&client->dev, "IRQ\n");
+
+	if (phy->hard_fault != 0)
+		return IRQ_HANDLED;
+
+	r = st21nfca_hci_i2c_read(phy, phy->pending_skb);
+	if (r == -EREMOTEIO) {
+		phy->hard_fault = r;
+
+		nfc_hci_recv_frame(phy->hdev, NULL);
+
+		return IRQ_HANDLED;
+	} else if (r == -EAGAIN || r == -EIO) {
+		return IRQ_HANDLED;
+	} else if (r == -EBADMSG && phy->crc_trials < ARRAY_SIZE(wait_tab)) {
+		/*
+		 * With ST21NFCA, only one interface (I2C, RF or SWP)
+		 * may be active at a time.
+		 * Having incorrect crc is usually due to i2c macrocell
+		 * deactivation in the middle of a transmission.
+		 * It may generate corrupted data on i2c.
+		 * We give sometime to get i2c back.
+		 * The complete frame will be repeated.
+		 */
+		msleep(wait_tab[phy->crc_trials]);
+		phy->crc_trials++;
+		phy->current_read_len = 0;
+	} else if (r > 0) {
+		/*
+		 * We succeeded to read data from the CLF and
+		 * data is valid.
+		 * Reset counter.
+		 */
+		nfc_hci_recv_frame(phy->hdev, phy->pending_skb);
+		phy->crc_trials = 0;
+	}
+
+	phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL);
+	if (phy->pending_skb == NULL) {
+		phy->hard_fault = -ENOMEM;
+		nfc_hci_recv_frame(phy->hdev, NULL);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct nfc_phy_ops i2c_phy_ops = {
+	.write = st21nfca_hci_i2c_write,
+	.enable = st21nfca_hci_i2c_enable,
+	.disable = st21nfca_hci_i2c_disable,
+};
+
+static int st21nfca_request_resources(struct st21nfca_i2c_phy *phy,
+				      struct i2c_client *client)
+{
+	struct st21nfca_nfc_platform_data *pdata;
+	int r;
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL) {
+		nfc_err(&client->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	/* store for later use */
+	phy->gpio_irq = pdata->gpio_irq;
+	phy->gpio_ena = pdata->gpio_ena;
+	phy->irq_polarity = pdata->irq_polarity;
+	phy->i2c_dev = client;
+
+	r = devm_gpio_request(&client->dev, phy->gpio_irq, "wake_up");
+	if (r) {
+		pr_err("%s : gpio_request failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	r = gpio_direction_input(phy->gpio_irq);
+	if (r) {
+		pr_err("%s : gpio_direction_input failed\n", __FILE__);
+		return -ENODEV;
+	}
+
+	if (phy->gpio_ena != 0) {
+		r = devm_gpio_request(&client->dev,
+					phy->gpio_ena, "clf_enable");
+		if (r) {
+			pr_err("%s : ena gpio_request failed\n", __FILE__);
+			return -ENODEV;
+		}
+		r = gpio_direction_output(phy->gpio_ena, 1);
+
+		if (r) {
+			pr_err("%s : ena gpio_direction_output failed\n",
+			       __FILE__);
+			return -ENODEV;
+		}
+	}
+
+	phy->pending_skb = alloc_skb(ST21NFCA_HCI_LLC_MAX_SIZE * 2, GFP_KERNEL);
+	if (phy->pending_skb == NULL)
+		return -ENOMEM;
+
+	phy->current_read_len = 0;
+	phy->crc_trials = 0;
+	return r;
+}
+
+static int st21nfca_hci_i2c_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct st21nfca_i2c_phy *phy;
+	struct st21nfca_nfc_platform_data *pdata;
+	int r = 0;
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+	dev_dbg(&client->dev, "IRQ: %d\n", client->irq);
+
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+		nfc_err(&client->dev, "Need I2C_FUNC_I2C\n");
+		return -ENODEV;
+	}
+
+	phy = devm_kzalloc(&client->dev, sizeof(struct st21nfca_i2c_phy),
+			   GFP_KERNEL);
+	if (!phy) {
+		nfc_err(&client->dev,
+			"Cannot allocate memory for st21nfca i2c phy.\n");
+		return -ENOMEM;
+	}
+
+	phy->i2c_dev = client;
+
+	i2c_set_clientdata(client, phy);
+
+	pdata = client->dev.platform_data;
+	if (pdata == NULL) {
+		nfc_err(&client->dev, "No platform data\n");
+		return -EINVAL;
+	}
+
+	r = st21nfca_request_resources(phy, client);
+	if (r) {
+		nfc_err(&client->dev, "Cannot get platform resources\n");
+		return r;
+	}
+
+	st21nfca_hci_platform_init(phy);
+	r = devm_request_threaded_irq(&client->dev, client->irq, NULL,
+				st21nfca_hci_irq_thread_fn,
+				phy->irq_polarity | IRQF_ONESHOT,
+				ST21NFCA_HCI_DRIVER_NAME, phy);
+	if (r < 0) {
+		nfc_err(&client->dev, "Unable to register IRQ handler\n");
+		return r;
+	}
+
+	r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
+			       ST21NFCA_FRAME_HEADROOM, ST21NFCA_FRAME_TAILROOM,
+			       ST21NFCA_HCI_LLC_MAX_PAYLOAD, &phy->hdev);
+
+	if (r < 0)
+		return r;
+
+	return 0;
+}
+
+static int st21nfca_hci_i2c_remove(struct i2c_client *client)
+{
+	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
+
+	dev_dbg(&client->dev, "%s\n", __func__);
+
+	st21nfca_hci_remove(phy->hdev);
+
+	if (phy->powered)
+		st21nfca_hci_i2c_disable(phy);
+
+	return 0;
+}
+
+static struct i2c_driver st21nfca_hci_i2c_driver = {
+	.driver = {
+		   .name = ST21NFCA_HCI_I2C_DRIVER_NAME,
+		   },
+	.probe = st21nfca_hci_i2c_probe,
+	.id_table = st21nfca_hci_i2c_id_table,
+	.remove = st21nfca_hci_i2c_remove,
+};
+
+module_i2c_driver(st21nfca_hci_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c
new file mode 100644
index 000000000000..69213f37b7ba
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca.c
@@ -0,0 +1,506 @@
+/*
+ * HCI based Driver for STMicroelectronics NFC Chip
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/crc-ccitt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+
+#include <linux/nfc.h>
+#include <net/nfc/hci.h>
+#include <net/nfc/llc.h>
+
+#include <uapi/linux/nfc.h>
+
+#include "st21nfca.h"
+#include <linux/platform_data/st21nfca.h>
+
+#define DRIVER_DESC "HCI NFC driver for ST21NFCA"
+
+#define FULL_VERSION_LEN 3
+
+/* Proprietary gates, events, commands and registers */
+
+/* Commands that apply to all RF readers */
+#define ST21NFCA_RF_READER_CMD_PRESENCE_CHECK	0x30
+
+#define ST21NFCA_RF_READER_ISO15693_GATE	0x12
+
+/*
+ * Reader gate for communication with contact-less cards using Type A
+ * protocol ISO14443-3 but not compliant with ISO14443-4
+ */
+#define ST21NFCA_RF_READER_14443_3_A_GATE	0x15
+#define ST21NFCA_RF_READER_14443_3_A_UID	0x02
+#define ST21NFCA_RF_READER_14443_3_A_ATQA	0x03
+#define ST21NFCA_RF_READER_14443_3_A_SAK	0x04
+
+#define ST21NFCA_DEVICE_MGNT_GATE		0x01
+#define ST21NFCA_DEVICE_MGNT_PIPE		0x02
+#define ST21NFCA_NFC_MODE	0x03	/* NFC_MODE parameter*/
+
+
+static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES);
+
+static struct nfc_hci_gate st21nfca_gates[] = {
+	{NFC_HCI_ADMIN_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_LOOPBACK_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_ID_MGMT_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_LINK_MGMT_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_RF_READER_B_GATE, NFC_HCI_INVALID_PIPE},
+	{NFC_HCI_RF_READER_A_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE},
+	{ST21NFCA_RF_READER_F_GATE, NFC_HCI_INVALID_PIPE},
+	{ST21NFCA_RF_READER_14443_3_A_GATE, NFC_HCI_INVALID_PIPE},
+};
+/* Largest headroom needed for outgoing custom commands */
+#define ST21NFCA_CMDS_HEADROOM  7
+
+static int st21nfca_hci_open(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+	int r;
+
+	mutex_lock(&info->info_lock);
+
+	if (info->state != ST21NFCA_ST_COLD) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	r = info->phy_ops->enable(info->phy_id);
+
+	if (r == 0)
+		info->state = ST21NFCA_ST_READY;
+
+out:
+	mutex_unlock(&info->info_lock);
+	return r;
+}
+
+static void st21nfca_hci_close(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	mutex_lock(&info->info_lock);
+
+	if (info->state == ST21NFCA_ST_COLD)
+		goto out;
+
+	info->phy_ops->disable(info->phy_id);
+	info->state = ST21NFCA_ST_COLD;
+
+out:
+	mutex_unlock(&info->info_lock);
+}
+
+static int st21nfca_hci_ready(struct nfc_hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
+	u8 param;
+	int r;
+
+	param = NFC_HCI_UICC_HOST_ID;
+	r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+			      NFC_HCI_ADMIN_WHITELIST, &param, 1);
+	if (r < 0)
+		return r;
+
+	/* Set NFC_MODE in device management gate to enable */
+	r = nfc_hci_get_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+			      ST21NFCA_NFC_MODE, &skb);
+	if (r < 0)
+		return r;
+
+	if (skb->data[0] == 0) {
+		kfree_skb(skb);
+		param = 1;
+
+		r = nfc_hci_set_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+					ST21NFCA_NFC_MODE, &param, 1);
+		if (r < 0)
+			return r;
+	}
+
+	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	if (r < 0)
+		return r;
+
+	r = nfc_hci_get_param(hdev, NFC_HCI_ID_MGMT_GATE,
+			      NFC_HCI_ID_MGMT_VERSION_SW, &skb);
+	if (r < 0)
+		return r;
+
+	if (skb->len != FULL_VERSION_LEN) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	print_hex_dump(KERN_DEBUG, "FULL VERSION SOFTWARE INFO: ",
+		       DUMP_PREFIX_NONE, 16, 1,
+		       skb->data, FULL_VERSION_LEN, false);
+
+	kfree_skb(skb);
+
+	return 0;
+}
+
+static int st21nfca_hci_xmit(struct nfc_hci_dev *hdev, struct sk_buff *skb)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	return info->phy_ops->write(info->phy_id, skb);
+}
+
+static int st21nfca_hci_start_poll(struct nfc_hci_dev *hdev,
+				   u32 im_protocols, u32 tm_protocols)
+{
+	int r;
+
+	pr_info(DRIVER_DESC ": %s protocols 0x%x 0x%x\n",
+		__func__, im_protocols, tm_protocols);
+
+	r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+			       NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	if (r < 0)
+		return r;
+	if (im_protocols) {
+		/*
+		 * enable polling according to im_protocols & tm_protocols
+		 * - CLOSE pipe according to im_protocols & tm_protocols
+		 */
+		if ((NFC_HCI_RF_READER_B_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					NFC_HCI_RF_READER_B_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((NFC_HCI_RF_READER_A_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					NFC_HCI_RF_READER_A_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_F_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_F_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_14443_3_A_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_14443_3_A_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		if ((ST21NFCA_RF_READER_ISO15693_GATE & im_protocols) == 0) {
+			r = nfc_hci_disconnect_gate(hdev,
+					ST21NFCA_RF_READER_ISO15693_GATE);
+			if (r < 0)
+				return r;
+		}
+
+		r = nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+				       NFC_HCI_EVT_READER_REQUESTED, NULL, 0);
+		if (r < 0)
+			nfc_hci_send_event(hdev, NFC_HCI_RF_READER_A_GATE,
+					   NFC_HCI_EVT_END_OPERATION, NULL, 0);
+	}
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_atqa(struct nfc_hci_dev *hdev, u16 *atqa)
+{
+	int r;
+	struct sk_buff *atqa_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_ATQA, &atqa_skb);
+	if (r < 0)
+		goto exit;
+
+	if (atqa_skb->len != 2) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	*atqa = be16_to_cpu(*(u16 *) atqa_skb->data);
+
+exit:
+	kfree_skb(atqa_skb);
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_sak(struct nfc_hci_dev *hdev, u8 *sak)
+{
+	int r;
+	struct sk_buff *sak_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_SAK, &sak_skb);
+	if (r < 0)
+		goto exit;
+
+	if (sak_skb->len != 1) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	*sak = sak_skb->data[0];
+
+exit:
+	kfree_skb(sak_skb);
+	return r;
+}
+
+static int st21nfca_get_iso14443_3_uid(struct nfc_hci_dev *hdev, u8 *gate,
+				       int *len)
+{
+	int r;
+	struct sk_buff *uid_skb = NULL;
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_14443_3_A_GATE,
+			      ST21NFCA_RF_READER_14443_3_A_UID, &uid_skb);
+	if (r < 0)
+		goto exit;
+
+	if (uid_skb->len == 0 || uid_skb->len > NFC_NFCID1_MAXSIZE) {
+		r = -EPROTO;
+		goto exit;
+	}
+
+	gate = uid_skb->data;
+	*len = uid_skb->len;
+exit:
+	kfree_skb(uid_skb);
+	return r;
+}
+
+static int st21nfca_hci_target_from_gate(struct nfc_hci_dev *hdev, u8 gate,
+					 struct nfc_target *target)
+{
+	int r, len;
+	u16 atqa;
+	u8 sak;
+	u8 uid[NFC_NFCID1_MAXSIZE];
+
+	switch (gate) {
+	case ST21NFCA_RF_READER_F_GATE:
+		target->supported_protocols = NFC_PROTO_FELICA_MASK;
+		break;
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		/* ISO14443-3 type 1 or 2 tags */
+		r = st21nfca_get_iso14443_3_atqa(hdev, &atqa);
+		if (r < 0)
+			return r;
+		if (atqa == 0x000c) {
+			target->supported_protocols = NFC_PROTO_JEWEL_MASK;
+			target->sens_res = 0x0c00;
+		} else {
+			r = st21nfca_get_iso14443_3_sak(hdev, &sak);
+			if (r < 0)
+				return r;
+
+			r = st21nfca_get_iso14443_3_uid(hdev, uid, &len);
+			if (r < 0)
+				return r;
+
+			target->supported_protocols =
+			    nfc_hci_sak_to_protocol(sak);
+			if (target->supported_protocols == 0xffffffff)
+				return -EPROTO;
+
+			target->sens_res = atqa;
+			target->sel_res = sak;
+			memcpy(target->nfcid1, uid, len);
+			target->nfcid1_len = len;
+		}
+
+		break;
+	default:
+		return -EPROTO;
+	}
+
+	return 0;
+}
+
+/*
+ * Returns:
+ * <= 0: driver handled the data exchange
+ *    1: driver doesn't especially handle, please do standard processing
+ */
+static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev,
+				      struct nfc_target *target,
+				      struct sk_buff *skb,
+				      data_exchange_cb_t cb, void *cb_context)
+{
+	pr_info(DRIVER_DESC ": %s for gate=%d len=%d\n", __func__,
+		target->hci_reader_gate, skb->len);
+
+	switch (target->hci_reader_gate) {
+	case ST21NFCA_RF_READER_F_GATE:
+		*skb_push(skb, 1) = 0x1a;
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len, cb, cb_context);
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		*skb_push(skb, 1) = 0x1a;	/* CTR, see spec:10.2.2.1 */
+
+		return nfc_hci_send_cmd_async(hdev, target->hci_reader_gate,
+					      ST21NFCA_WR_XCHG_DATA, skb->data,
+					      skb->len, cb, cb_context);
+	default:
+		return 1;
+	}
+}
+
+static int st21nfca_hci_check_presence(struct nfc_hci_dev *hdev,
+				       struct nfc_target *target)
+{
+	u8 fwi = 0x11;
+	switch (target->hci_reader_gate) {
+	case NFC_HCI_RF_READER_A_GATE:
+	case NFC_HCI_RF_READER_B_GATE:
+		/*
+		 * PRESENCE_CHECK on those gates is available
+		 * However, the answer to this command is taking 3 * fwi
+		 * if the card is no present.
+		 * Instead, we send an empty I-Frame with a very short
+		 * configurable fwi ~604µs.
+		 */
+		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+					ST21NFCA_WR_XCHG_DATA, &fwi, 1, NULL);
+	case ST21NFCA_RF_READER_14443_3_A_GATE:
+		return nfc_hci_send_cmd(hdev, target->hci_reader_gate,
+					ST21NFCA_RF_READER_CMD_PRESENCE_CHECK,
+					NULL, 0, NULL);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct nfc_hci_ops st21nfca_hci_ops = {
+	.open = st21nfca_hci_open,
+	.close = st21nfca_hci_close,
+	.hci_ready = st21nfca_hci_ready,
+	.xmit = st21nfca_hci_xmit,
+	.start_poll = st21nfca_hci_start_poll,
+	.target_from_gate = st21nfca_hci_target_from_gate,
+	.im_transceive = st21nfca_hci_im_transceive,
+	.check_presence = st21nfca_hci_check_presence,
+};
+
+int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+		       char *llc_name, int phy_headroom, int phy_tailroom,
+		       int phy_payload, struct nfc_hci_dev **hdev)
+{
+	struct st21nfca_hci_info *info;
+	int r = 0;
+	int dev_num;
+	u32 protocols;
+	struct nfc_hci_init_data init_data;
+	unsigned long quirks = 0;
+
+	info = kzalloc(sizeof(struct st21nfca_hci_info), GFP_KERNEL);
+	if (!info) {
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	info->phy_ops = phy_ops;
+	info->phy_id = phy_id;
+	info->state = ST21NFCA_ST_COLD;
+	mutex_init(&info->info_lock);
+
+	init_data.gate_count = ARRAY_SIZE(st21nfca_gates);
+
+	memcpy(init_data.gates, st21nfca_gates, sizeof(st21nfca_gates));
+
+	/*
+	 * Session id must include the driver name + i2c bus addr
+	 * persistent info to discriminate 2 identical chips
+	 */
+	dev_num = find_first_zero_bit(dev_mask, ST21NFCA_NUM_DEVICES);
+	if (dev_num >= ST21NFCA_NUM_DEVICES)
+		goto err_alloc_hdev;
+
+	scnprintf(init_data.session_id, sizeof(init_data.session_id), "%s%2x",
+		  "ST21AH", dev_num);
+
+	protocols = NFC_PROTO_JEWEL_MASK |
+	    NFC_PROTO_MIFARE_MASK |
+	    NFC_PROTO_FELICA_MASK |
+	    NFC_PROTO_ISO14443_MASK |
+	    NFC_PROTO_ISO14443_B_MASK;
+
+	set_bit(NFC_HCI_QUIRK_SHORT_CLEAR, &quirks);
+
+	info->hdev =
+	    nfc_hci_allocate_device(&st21nfca_hci_ops, &init_data, quirks,
+				    protocols, llc_name,
+				    phy_headroom + ST21NFCA_CMDS_HEADROOM,
+				    phy_tailroom, phy_payload);
+
+	if (!info->hdev) {
+		pr_err("Cannot allocate nfc hdev.\n");
+		r = -ENOMEM;
+		goto err_alloc_hdev;
+	}
+
+	nfc_hci_set_clientdata(info->hdev, info);
+
+	r = nfc_hci_register_device(info->hdev);
+	if (r)
+		goto err_regdev;
+
+	*hdev = info->hdev;
+
+	return 0;
+
+err_regdev:
+	nfc_hci_free_device(info->hdev);
+
+err_alloc_hdev:
+	kfree(info);
+
+	return r;
+}
+EXPORT_SYMBOL(st21nfca_hci_probe);
+
+void st21nfca_hci_remove(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	nfc_hci_unregister_device(hdev);
+	nfc_hci_free_device(hdev);
+	kfree(info);
+}
+EXPORT_SYMBOL(st21nfca_hci_remove);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
new file mode 100644
index 000000000000..334cd90bcc8c
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LOCAL_ST21NFCA_H_
+#define __LOCAL_ST21NFCA_H_
+
+#include <net/nfc/hci.h>
+
+#define HCI_MODE 0
+
+/* framing in HCI mode */
+#define ST21NFCA_SOF_EOF_LEN    2
+
+/* Almost every time value is 0 */
+#define ST21NFCA_HCI_LLC_LEN    1
+
+/* Size in worst case :
+ * In normal case CRC len = 2 but byte stuffing
+ * may appear in case one CRC byte = ST21NFCA_SOF_EOF
+ */
+#define ST21NFCA_HCI_LLC_CRC    4
+
+#define ST21NFCA_HCI_LLC_LEN_CRC        (ST21NFCA_SOF_EOF_LEN + \
+						ST21NFCA_HCI_LLC_LEN + \
+						ST21NFCA_HCI_LLC_CRC)
+#define ST21NFCA_HCI_LLC_MIN_SIZE       (1 + ST21NFCA_HCI_LLC_LEN_CRC)
+
+/* Worst case when adding byte stuffing between each byte */
+#define ST21NFCA_HCI_LLC_MAX_PAYLOAD    29
+#define ST21NFCA_HCI_LLC_MAX_SIZE       (ST21NFCA_HCI_LLC_LEN_CRC + 1 + \
+					ST21NFCA_HCI_LLC_MAX_PAYLOAD)
+
+#define DRIVER_DESC "HCI NFC driver for ST21NFCA"
+
+#define ST21NFCA_HCI_MODE 0
+
+#define ST21NFCA_NUM_DEVICES 256
+
+int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
+		       char *llc_name, int phy_headroom, int phy_tailroom,
+		       int phy_payload, struct nfc_hci_dev **hdev);
+void st21nfca_hci_remove(struct nfc_hci_dev *hdev);
+
+enum st21nfca_state {
+	ST21NFCA_ST_COLD,
+	ST21NFCA_ST_READY,
+};
+
+struct st21nfca_hci_info {
+	struct nfc_phy_ops *phy_ops;
+	void *phy_id;
+
+	struct nfc_hci_dev *hdev;
+
+	enum st21nfca_state state;
+
+	struct mutex info_lock;
+
+	int async_cb_type;
+	data_exchange_cb_t async_cb;
+	void *async_cb_context;
+
+} __packed;
+
+/* Reader RF commands */
+#define ST21NFCA_WR_XCHG_DATA            0x10
+
+#define ST21NFCA_RF_READER_F_GATE               0x14
+#define ST21NFCA_RF_READER_F_DATARATE 0x01
+#define ST21NFCA_RF_READER_F_DATARATE_106 0x01
+#define ST21NFCA_RF_READER_F_DATARATE_212 0x02
+#define ST21NFCA_RF_READER_F_DATARATE_424 0x04
+
+#endif /* __LOCAL_ST21NFCA_H_ */
diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h
new file mode 100644
index 000000000000..1730312398ff
--- /dev/null
+++ b/include/linux/platform_data/st21nfca.h
@@ -0,0 +1,32 @@
+/*
+ * Driver include for the ST21NFCA NFC chip.
+ *
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ST21NFCA_HCI_H_
+#define _ST21NFCA_HCI_H_
+
+#include <linux/i2c.h>
+
+#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
+
+struct st21nfca_nfc_platform_data {
+	unsigned int gpio_irq;
+	unsigned int gpio_ena;
+	unsigned int irq_polarity;
+};
+
+#endif /* _ST21NFCA_HCI_H_ */
-- 
cgit 


From e6853aafd4339dbf2992957ff2616ef7164bc9d4 Mon Sep 17 00:00:00 2001
From: David Ung <davidu@nvidia.com>
Date: Wed, 26 Mar 2014 15:35:37 -0700
Subject: video: Check EDID for HDMI connection

Check EDID Vendor Specific Data Block bytes to see if the connection
is HDMI and set FB_MISC_HDMI.

Signed-off-by: David Ung <davidu@nvidia.com>
Signed-off-by: Christopher Freeman <cfreeman@nvidia.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/core/fbmon.c | 9 ++++++++-
 include/linux/fb.h               | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c
index c204ebe6187e..5b0e313849bd 100644
--- a/drivers/video/fbdev/core/fbmon.c
+++ b/drivers/video/fbdev/core/fbmon.c
@@ -1012,13 +1012,20 @@ void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs)
 	while (pos < edid[2]) {
 		u8 len = edid[pos] & 0x1f, type = (edid[pos] >> 5) & 7;
 		pr_debug("Data block %u of %u bytes\n", type, len);
-		if (type == 2)
+		if (type == 2) {
 			for (i = pos; i < pos + len; i++) {
 				u8 idx = edid[pos + i] & 0x7f;
 				svd[svd_n++] = idx;
 				pr_debug("N%sative mode #%d\n",
 					 edid[pos + i] & 0x80 ? "" : "on-n", idx);
 			}
+		} else if (type == 3 && len >= 3) {
+			/* Check Vendor Specific Data Block.  For HDMI,
+			   it is always 00-0C-03 for HDMI Licensing, LLC. */
+			if (edid[pos + 1] == 3 && edid[pos + 2] == 0xc &&
+			    edid[pos + 3] == 0)
+				specs->misc |= FB_MISC_HDMI;
+		}
 		pos += len + 1;
 	}
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index fe6ac956550e..506242979eea 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -47,6 +47,7 @@ struct device_node;
 
 #define FB_MISC_PRIM_COLOR	1
 #define FB_MISC_1ST_DETAIL	2	/* First Detailed Timing is preferred */
+#define FB_MISC_HDMI		4
 struct fb_chroma {
 	__u32 redx;	/* in fraction of 1024 */
 	__u32 greenx;
-- 
cgit 


From fa372a51cb5f93800f711473e5a36e0e0c9a8f00 Mon Sep 17 00:00:00 2001
From: Markus Mayer <markus.mayer@linaro.org>
Date: Tue, 8 Apr 2014 15:19:43 -0700
Subject: mmc: Delay the card_event callback into the mmc_rescan worker

This change removes the callback from atomic context which it doesn't
need to be in, and puts it in line with the debounced rescan.

This code is based on these e-mail threads with Christian Daudt:

  https://lkml.org/lkml/2013/8/19/539
  https://lkml.org/lkml/2014/3/19/79

Signed-off-by: Markus Mayer <markus.mayer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/core.c      | 5 +++++
 drivers/mmc/core/slot-gpio.c | 4 +---
 include/linux/mmc/host.h     | 2 ++
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index acbc3f2aaaf9..f396d1bb4ac4 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2403,6 +2403,11 @@ void mmc_rescan(struct work_struct *work)
 		container_of(work, struct mmc_host, detect.work);
 	int i;
 
+	if (host->trigger_card_event && host->ops->card_event) {
+		host->ops->card_event(host);
+		host->trigger_card_event = false;
+	}
+
 	if (host->rescan_disable)
 		return;
 
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index f7650b899e3d..5f89cb83d5f0 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -32,9 +32,7 @@ static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)
 	/* Schedule a card detection after a debounce timeout */
 	struct mmc_host *host = dev_id;
 
-	if (host->ops->card_event)
-		host->ops->card_event(host);
-
+	host->trigger_card_event = true;
 	mmc_detect_change(host, msecs_to_jiffies(200));
 
 	return IRQ_HANDLED;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 35354207e71f..0cf705c83998 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -319,6 +319,8 @@ struct mmc_host {
 	int			rescan_disable;	/* disable card detection */
 	int			rescan_entered;	/* used with nonremovable devices */
 
+	bool			trigger_card_event; /* card_event necessary */
+
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
-- 
cgit 


From 297d40560bc8f474adbb43178e3118321fa702ea Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Fri, 4 Apr 2014 22:42:48 -0300
Subject: mmc: card.h: Use NULL instead of 0 for END_FIXUP

Fix the following sparse warnings:

drivers/mmc/card/block.c:2421:9: warning: Using plain integer as NULL pointer

drivers/mmc/core/quirks.c:69:9: warning: Using plain integer as NULL pointer

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 include/linux/mmc/card.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index b73027298b3a..aa7e57f60fb2 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -353,7 +353,7 @@ struct mmc_fixup {
 #define CID_OEMID_ANY ((unsigned short) -1)
 #define CID_NAME_ANY (NULL)
 
-#define END_FIXUP { 0 }
+#define END_FIXUP { NULL }
 
 #define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
 		   _cis_vendor, _cis_device,				\
-- 
cgit 


From dfeec843fb237d73947e818f961e8d6f0df22b01 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 1 Jan 2014 16:09:21 +0100
Subject: KVM: add kvm_is_error_gpa() helper

It's quite common (in the s390 guest access code) to test if a guest
physical address points to a valid guest memory area or not.
So add a simple helper function in common code, since this might be
of interest for other architectures as well.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/kvm_host.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9f4380..471d1400c4ac 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -879,6 +879,13 @@ static inline hpa_t pfn_to_hpa(pfn_t pfn)
 	return (hpa_t)pfn << PAGE_SHIFT;
 }
 
+static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
+{
+	unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa));
+
+	return kvm_is_error_hva(hva);
+}
+
 static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
 	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-- 
cgit 


From 8df4053f0532df8fe47d0434af51676b0fa65491 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Mon, 14 Apr 2014 14:41:56 +0300
Subject: platform_data: edma: Be precise with the paRAM struct

The edmacc_param struct should follow the layout of the paRAM area in the
HW. Be explicit on the size of the fields (u32) and also mark the struct
as packed to avoid any padding on non 32bit architectures.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Joel Fernandes <joelf@ti.com>
Reviewed-and-Tested-by: Joel Fernandes <joelf@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/platform_data/edma.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index f50821cb64be..923f8a3e4ce0 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -43,15 +43,15 @@
 
 /* PaRAM slots are laid out like this */
 struct edmacc_param {
-	unsigned int opt;
-	unsigned int src;
-	unsigned int a_b_cnt;
-	unsigned int dst;
-	unsigned int src_dst_bidx;
-	unsigned int link_bcntrld;
-	unsigned int src_dst_cidx;
-	unsigned int ccnt;
-};
+	u32 opt;
+	u32 src;
+	u32 a_b_cnt;
+	u32 dst;
+	u32 src_dst_bidx;
+	u32 link_bcntrld;
+	u32 src_dst_cidx;
+	u32 ccnt;
+} __packed;
 
 /* fields in edmacc_param.opt */
 #define SAM		BIT(0)
-- 
cgit 


From c04ae71c9c264312a6f57d2665a79f7bbccf8758 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 8 Apr 2014 17:33:19 -0700
Subject: sched_clock: Remove deprecated setup_sched_clock() API

Remove the 32-bit only setup_sched_clock() API now that all users
have been converted to the 64-bit friendly sched_clock_register().

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/sched_clock.h |  1 -
 kernel/time/sched_clock.c   | 13 -------------
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h
index cddf0c2940b6..efa931c5cef1 100644
--- a/include/linux/sched_clock.h
+++ b/include/linux/sched_clock.h
@@ -14,7 +14,6 @@ extern void sched_clock_postinit(void);
 static inline void sched_clock_postinit(void) { }
 #endif
 
-extern void setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate);
 extern void sched_clock_register(u64 (*read)(void), int bits,
 				 unsigned long rate);
 
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 4d23dc4d8139..445106d2c729 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -49,13 +49,6 @@ static u64 notrace jiffy_sched_clock_read(void)
 	return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u32 __read_mostly (*read_sched_clock_32)(void);
-
-static u64 notrace read_sched_clock_32_wrapper(void)
-{
-	return read_sched_clock_32();
-}
-
 static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
@@ -176,12 +169,6 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 	pr_debug("Registered %pF as sched_clock source\n", read);
 }
 
-void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
-{
-	read_sched_clock_32 = read;
-	sched_clock_register(read_sched_clock_32_wrapper, bits, rate);
-}
-
 void __init sched_clock_postinit(void)
 {
 	/*
-- 
cgit 


From a6c39cb4f71e61aff19d07e2d0b26bb6e3548fae Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 22 Apr 2014 15:09:05 -0600
Subject: fs/bio: remove bs paramater in biovec_create_pool

bs is no longer used in biovec_create_pool since 9f060e2231ca96 ("block:
Convert integrity to bvec_alloc_bs()")

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/bio-integrity.c  | 2 +-
 fs/bio.c            | 4 ++--
 include/linux/bio.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 1c2ce0c87711..9e241063a616 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -617,7 +617,7 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
 	if (!bs->bio_integrity_pool)
 		return -1;
 
-	bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
+	bs->bvec_integrity_pool = biovec_create_pool(pool_size);
 	if (!bs->bvec_integrity_pool) {
 		mempool_destroy(bs->bio_integrity_pool);
 		return -1;
diff --git a/fs/bio.c b/fs/bio.c
index 4c9c5095bacb..ca55d37436d6 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1861,7 +1861,7 @@ EXPORT_SYMBOL_GPL(bio_trim);
  * create memory pools for biovec's in a bio_set.
  * use the global biovec slabs created for general use.
  */
-mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries)
+mempool_t *biovec_create_pool(int pool_entries)
 {
 	struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX;
 
@@ -1924,7 +1924,7 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
-	bs->bvec_pool = biovec_create_pool(bs, pool_size);
+	bs->bvec_pool = biovec_create_pool(pool_size);
 	if (!bs->bvec_pool)
 		goto bad;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index bba550826921..5a645769f020 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -333,7 +333,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
-extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries);
+extern mempool_t *biovec_create_pool(int pool_entries);
 
 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
-- 
cgit 


From 603fb42a66499ab353466c7afa3d38beea20a8a9 Mon Sep 17 00:00:00 2001
From: Sebastian Capella <sebastian.capella@linaro.org>
Date: Tue, 25 Mar 2014 01:20:29 +0100
Subject: ARM: 8011/1: ARM hibernation / suspend-to-disk

Enable hibernation for ARM architectures and provide ARM
architecture specific calls used during hibernation.

The swsusp hibernation framework depends on the
platform first having functional suspend/resume.

Then, in order to enable hibernation on a given platform, a
platform_hibernation_ops structure may need to be registered with
the system in order to save/restore any SoC-specific / cpu specific
state needing (re)init over a suspend-to-disk/resume-from-disk cycle.

For example:

     - "secure" SoCs that have different sets of control registers
       and/or different CR reg access patterns.

     - SoCs with L2 caches as the activation sequence there is
       SoC-dependent; a full off-on cycle for L2 is not done
       by the hibernation support code.

     - SoCs requiring steps on wakeup _before_ the "generic" parts
       done by cpu_suspend / cpu_resume can work correctly.

     - SoCs having persistent state which is maintained during suspend
       and resume, but will be lost during the power off cycle after
       suspend-to-disk.

This is a rebase/rework of Frank Hofmann's v5 hibernation patchset.

Acked-by: Russ Dill <Russ.Dill@ti.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Sebastian Capella <sebastian.capella@linaro.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
[fixed duplicate virt_to_pfn() definition --rmk]
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/Kconfig            |   5 +++
 arch/arm/kernel/Makefile    |   1 +
 arch/arm/kernel/hibernate.c | 107 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/suspend.h     |   2 +
 4 files changed, 115 insertions(+)
 create mode 100644 arch/arm/kernel/hibernate.c

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ab438cb5af55..58506175a3ea 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -2294,6 +2294,11 @@ config ARCH_SUSPEND_POSSIBLE
 config ARM_CPU_SUSPEND
 	def_bool PM_SLEEP
 
+config ARCH_HIBERNATION_POSSIBLE
+	bool
+	depends on MMU
+	default y if ARCH_SUSPEND_POSSIBLE
+
 endmenu
 
 source "net/Kconfig"
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index a766bcbaf8ad..10f0464206a2 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_ARTHUR)		+= arthur.o
 obj-$(CONFIG_ISA_DMA)		+= dma-isa.o
 obj-$(CONFIG_PCI)		+= bios32.o isa.o
 obj-$(CONFIG_ARM_CPU_SUSPEND)	+= sleep.o suspend.o
+obj-$(CONFIG_HIBERNATION)	+= hibernate.o
 obj-$(CONFIG_SMP)		+= smp.o
 ifdef CONFIG_MMU
 obj-$(CONFIG_SMP)		+= smp_tlb.o
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
new file mode 100644
index 000000000000..bb8b79648643
--- /dev/null
+++ b/arch/arm/kernel/hibernate.c
@@ -0,0 +1,107 @@
+/*
+ * Hibernation support specific for ARM
+ *
+ * Derived from work on ARM hibernation support by:
+ *
+ * Ubuntu project, hibernation support for mach-dove
+ * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu)
+ * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.)
+ *  https://lkml.org/lkml/2010/6/18/4
+ *  https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html
+ *  https://patchwork.kernel.org/patch/96442/
+ *
+ * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <asm/system_misc.h>
+#include <asm/idmap.h>
+#include <asm/suspend.h>
+#include <asm/memory.h>
+
+extern const void __nosave_begin, __nosave_end;
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin);
+	unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1);
+
+	return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn);
+}
+
+void notrace save_processor_state(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	local_fiq_disable();
+}
+
+void notrace restore_processor_state(void)
+{
+	local_fiq_enable();
+}
+
+/*
+ * Snapshot kernel memory and reset the system.
+ *
+ * swsusp_save() is executed in the suspend finisher so that the CPU
+ * context pointer and memory are part of the saved image, which is
+ * required by the resume kernel image to restart execution from
+ * swsusp_arch_suspend().
+ *
+ * soft_restart is not technically needed, but is used to get success
+ * returned from cpu_suspend.
+ *
+ * When soft reboot completes, the hibernation snapshot is written out.
+ */
+static int notrace arch_save_image(unsigned long unused)
+{
+	int ret;
+
+	ret = swsusp_save();
+	if (ret == 0)
+		soft_restart(virt_to_phys(cpu_resume));
+	return ret;
+}
+
+/*
+ * Save the current CPU state before suspend / poweroff.
+ */
+int notrace swsusp_arch_suspend(void)
+{
+	return cpu_suspend(0, arch_save_image);
+}
+
+/*
+ * Restore page contents for physical pages that were in use during loading
+ * hibernation image.  Switch to idmap_pgd so the physical page tables
+ * are overwritten with the same contents.
+ */
+static void notrace arch_restore_image(void *unused)
+{
+	struct pbe *pbe;
+
+	cpu_switch_mm(idmap_pgd, &init_mm);
+	for (pbe = restore_pblist; pbe; pbe = pbe->next)
+		copy_page(pbe->orig_address, pbe->address);
+
+	soft_restart(virt_to_phys(cpu_resume));
+}
+
+static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
+
+/*
+ * Resume from the hibernation image.
+ * Due to the kernel heap / data restore, stack contents change underneath
+ * and that would make function calls impossible; switch to a temporary
+ * stack within the nosave region to avoid that problem.
+ */
+int swsusp_arch_resume(void)
+{
+	extern void call_with_stack(void (*fn)(void *), void *arg, void *sp);
+	call_with_stack(arch_restore_image, 0,
+		resume_stack + ARRAY_SIZE(resume_stack));
+	return 0;
+}
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f73cabf59012..38bbf95109da 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -320,6 +320,8 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
+asmlinkage int swsusp_save(void);
+extern struct pbe *restore_pblist;
 #else /* CONFIG_HIBERNATION */
 static inline void register_nosave_region(unsigned long b, unsigned long e) {}
 static inline void register_nosave_region_late(unsigned long b, unsigned long e) {}
-- 
cgit 


From 3de0b592394d17b2c41a261a6a493a521213f299 Mon Sep 17 00:00:00 2001
From: Venkata Duvvuru <VenkatKumar.Duvvuru@Emulex.Com>
Date: Mon, 21 Apr 2014 15:37:59 +0530
Subject: ethtool: Support for configurable RSS hash key

This ethtool patch primarily copies the ioctl command data structures
from/to the User space and invokes the driver hook.

Signed-off-by: Venkat Duvvuru <VenkatKumar.Duvvuru@Emulex.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  13 +++
 include/uapi/linux/ethtool.h |  32 +++++++
 net/core/ethtool.c           | 221 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 252 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0a114d05f68d..212f537fc686 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -154,13 +154,23 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @reset: Reset (part of) the device, as specified by a bitmask of
  *	flags from &enum ethtool_reset_flags.  Returns a negative
  *	error code or zero.
+ * @get_rxfh_key_size: Get the size of the RX flow hash key.
+ *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
+ * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
+ *	key.
+ *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
+ *	returns zero.
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table and
+ *	hash key.
+ *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
+ *	returns zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -232,7 +242,10 @@ struct ethtool_ops {
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
+	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
+	int	(*get_rxfh)(struct net_device *, u32 *, u8 *);
+	int	(*set_rxfh)(struct net_device *, u32 *, u8 *);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index fd161e91b6d7..d47d31d6fa0e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -846,6 +846,35 @@ struct ethtool_rxfh_indir {
 	__u32	ring_index[0];
 };
 
+/**
+ * struct ethtool_rxfh - command to get/set RX flow hash indir or/and hash key.
+ * @cmd: Specific command number - %ETHTOOL_GRSSH or %ETHTOOL_SRSSH
+ * @rss_context: RSS context identifier.
+ * @indir_size: On entry, the array size of the user buffer, which may be zero.
+ *		On return from %ETHTOOL_GRSSH, the array size of the hardware
+ *		indirection table.
+ * @key_size:	On entry, the array size of the user buffer in bytes,
+ *		which may be zero.
+ *		On return from %ETHTOOL_GRSSH, the size of the RSS hash key.
+ * @rsvd:	Reserved for future extensions.
+ * @rss_config: RX ring/queue index for each hash value i.e., indirection table
+ *		of size @indir_size followed by hash key of size @key_size.
+ *
+ * For %ETHTOOL_GRSSH, a @indir_size and key_size of zero means that only the
+ * size should be returned.  For %ETHTOOL_SRSSH, a @indir_size of 0xDEADBEEF
+ * means that indir table setting is not requested and a @indir_size of zero
+ * means the indir table should be reset to default values.  This last feature
+ * is not supported by the original implementations.
+ */
+struct ethtool_rxfh {
+	__u32   cmd;
+	__u32	rss_context;
+	__u32   indir_size;
+	__u32   key_size;
+	__u32	rsvd[2];
+	__u32   rss_config[0];
+};
+
 /**
  * struct ethtool_rx_ntuple_flow_spec - specification for RX flow filter
  * @flow_type: Type of match to perform, e.g. %TCP_V4_FLOW
@@ -1118,6 +1147,9 @@ enum ethtool_sfeatures_retval_bits {
 #define ETHTOOL_GEEE		0x00000044 /* Get EEE settings */
 #define ETHTOOL_SEEE		0x00000045 /* Set EEE settings */
 
+#define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
+#define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
 #define SPARC_ETH_SSET		ETHTOOL_SSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 640ba0e5831c..1d72786ef866 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -557,6 +557,23 @@ err_out:
 	return ret;
 }
 
+static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
+					struct ethtool_rxnfc *rx_rings,
+					u32 size)
+{
+	int ret = 0, i;
+
+	if (copy_from_user(indir, useraddr, size * sizeof(indir[0])))
+		ret = -EFAULT;
+
+	/* Validate ring indices */
+	for (i = 0; i < size; i++) {
+		if (indir[i] >= rx_rings->data)
+			ret = -EINVAL;
+	}
+	return ret;
+}
+
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
@@ -613,6 +630,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	u32 *indir;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
 	int ret;
+	u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
 
 	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
 	    !ops->get_rxnfc)
@@ -643,28 +661,196 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 		for (i = 0; i < dev_size; i++)
 			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
 	} else {
-		if (copy_from_user(indir,
-				  useraddr +
-				  offsetof(struct ethtool_rxfh_indir,
-					   ring_index[0]),
-				  dev_size * sizeof(indir[0]))) {
+		ret = ethtool_copy_validate_indir(indir,
+						  useraddr + ringidx_offset,
+						  &rx_rings,
+						  dev_size);
+		if (ret)
+			goto out;
+	}
+
+	ret = ops->set_rxfh_indir(dev, indir);
+
+out:
+	kfree(indir);
+	return ret;
+}
+
+static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
+					       void __user *useraddr)
+{
+	int ret;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	u32 user_indir_size = 0, user_key_size = 0;
+	u32 dev_indir_size = 0, dev_key_size = 0;
+	u32 total_size;
+	u32 indir_offset, indir_bytes;
+	u32 key_offset;
+	u32 *indir = NULL;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+
+	if (!(dev->ethtool_ops->get_rxfh_indir_size ||
+	      dev->ethtool_ops->get_rxfh_key_size) ||
+	      !dev->ethtool_ops->get_rxfh)
+		return -EOPNOTSUPP;
+
+	if (ops->get_rxfh_indir_size)
+		dev_indir_size = ops->get_rxfh_indir_size(dev);
+
+	indir_offset = offsetof(struct ethtool_rxfh, indir_size);
+
+	if (copy_from_user(&user_indir_size,
+			   useraddr + indir_offset,
+			   sizeof(user_indir_size)))
+		return -EFAULT;
+
+	if (copy_to_user(useraddr + indir_offset,
+			 &dev_indir_size, sizeof(dev_indir_size)))
+		return -EFAULT;
+
+	if (ops->get_rxfh_key_size)
+		dev_key_size = ops->get_rxfh_key_size(dev);
+
+	if ((dev_key_size + dev_indir_size) == 0)
+		return -EOPNOTSUPP;
+
+	key_offset = offsetof(struct ethtool_rxfh, key_size);
+
+	if (copy_from_user(&user_key_size,
+			   useraddr + key_offset,
+			   sizeof(user_key_size)))
+		return -EFAULT;
+
+	if (copy_to_user(useraddr + key_offset,
+			 &dev_key_size, sizeof(dev_key_size)))
+		return -EFAULT;
+
+	/* If the user buffer size is 0, this is just a query for the
+	 * device table size and key size.  Otherwise, if the User size is
+	 * not equal to device table size or key size it's an error.
+	 */
+	if (!user_indir_size && !user_key_size)
+		return 0;
+
+	if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
+	    (user_key_size && (user_key_size != dev_key_size)))
+		return -EINVAL;
+
+	indir_bytes = user_indir_size * sizeof(indir[0]);
+	total_size = indir_bytes + user_key_size;
+	rss_config = kzalloc(total_size, GFP_USER);
+	if (!rss_config)
+		return -ENOMEM;
+
+	if (user_indir_size)
+		indir = (u32 *)rss_config;
+
+	if (user_key_size)
+		hkey = rss_config + indir_bytes;
+
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
+	if (!ret) {
+		if (copy_to_user(useraddr +
+				 offsetof(struct ethtool_rxfh, rss_config[0]),
+				 rss_config, total_size))
 			ret = -EFAULT;
+	}
+
+	kfree(rss_config);
+
+	return ret;
+}
+
+static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
+					       void __user *useraddr)
+{
+	int ret;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct ethtool_rxnfc rx_rings;
+	u32 user_indir_size = 0, dev_indir_size = 0, i;
+	u32 user_key_size = 0, dev_key_size = 0;
+	u32 *indir = NULL, indir_bytes = 0;
+	u8 *hkey = NULL;
+	u8 *rss_config;
+	u32 indir_offset, key_offset;
+	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
+
+	if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
+	    !ops->get_rxnfc || !ops->set_rxfh)
+		return -EOPNOTSUPP;
+
+	if (ops->get_rxfh_indir_size)
+		dev_indir_size = ops->get_rxfh_indir_size(dev);
+
+	indir_offset = offsetof(struct ethtool_rxfh, indir_size);
+	if (copy_from_user(&user_indir_size,
+			   useraddr + indir_offset,
+			   sizeof(user_indir_size)))
+		return -EFAULT;
+
+	if (ops->get_rxfh_key_size)
+		dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
+
+	if ((dev_key_size + dev_indir_size) == 0)
+		return -EOPNOTSUPP;
+
+	key_offset = offsetof(struct ethtool_rxfh, key_size);
+	if (copy_from_user(&user_key_size,
+			   useraddr + key_offset,
+			   sizeof(user_key_size)))
+		return -EFAULT;
+
+	/* If either indir or hash key is valid, proceed further.
+	 */
+	if ((user_indir_size && ((user_indir_size != 0xDEADBEEF) &&
+				 user_indir_size != dev_indir_size)) ||
+	    (user_key_size && (user_key_size != dev_key_size)))
+		return -EINVAL;
+
+	if (user_indir_size != 0xDEADBEEF)
+		indir_bytes = dev_indir_size * sizeof(indir[0]);
+
+	rss_config = kzalloc(indir_bytes + user_key_size, GFP_USER);
+	if (!rss_config)
+		return -ENOMEM;
+
+	rx_rings.cmd = ETHTOOL_GRXRINGS;
+	ret = ops->get_rxnfc(dev, &rx_rings, NULL);
+	if (ret)
+		goto out;
+
+	/* user_indir_size == 0 means reset the indir table to default.
+	 * user_indir_size == 0xDEADBEEF means indir setting is not requested.
+	 */
+	if (user_indir_size && user_indir_size != 0xDEADBEEF) {
+		indir = (u32 *)rss_config;
+		ret = ethtool_copy_validate_indir(indir,
+						  useraddr + rss_cfg_offset,
+						  &rx_rings,
+						  user_indir_size);
+		if (ret)
 			goto out;
-		}
+	} else if (user_indir_size == 0) {
+		indir = (u32 *)rss_config;
+		for (i = 0; i < dev_indir_size; i++)
+			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+	}
 
-		/* Validate ring indices */
-		for (i = 0; i < dev_size; i++) {
-			if (indir[i] >= rx_rings.data) {
-				ret = -EINVAL;
-				goto out;
-			}
+	if (user_key_size) {
+		hkey = rss_config + indir_bytes;
+		if (copy_from_user(hkey,
+				   useraddr + rss_cfg_offset + indir_bytes,
+				   user_key_size)) {
+			ret = -EFAULT;
+			goto out;
 		}
 	}
 
-	ret = ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh(dev, indir, hkey);
 
 out:
-	kfree(indir);
+	kfree(rss_config);
 	return ret;
 }
 
@@ -1491,6 +1677,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GRXCLSRULE:
 	case ETHTOOL_GRXCLSRLALL:
 	case ETHTOOL_GRXFHINDIR:
+	case ETHTOOL_GRSSH:
 	case ETHTOOL_GFEATURES:
 	case ETHTOOL_GCHANNELS:
 	case ETHTOOL_GET_TS_INFO:
@@ -1628,6 +1815,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SRXFHINDIR:
 		rc = ethtool_set_rxfh_indir(dev, useraddr);
 		break;
+	case ETHTOOL_GRSSH:
+		rc = ethtool_get_rxfh(dev, useraddr);
+		break;
+	case ETHTOOL_SRSSH:
+		rc = ethtool_set_rxfh(dev, useraddr);
+		break;
 	case ETHTOOL_GFEATURES:
 		rc = ethtool_get_features(dev, useraddr);
 		break;
-- 
cgit 


From 4cd3675ebf74d7f559038ded6aa8088e4099a83d Mon Sep 17 00:00:00 2001
From: Chema Gonzalez <chema@google.com>
Date: Mon, 21 Apr 2014 09:21:24 -0700
Subject: filter: added BPF random opcode

Added a new ancillary load (bpf call in eBPF parlance) that produces
a 32-bit random number. We are implementing it as an ancillary load
(instead of an ISA opcode) because (a) it is simpler, (b) allows easy
JITing, and (c) seems more in line with generic ISAs that do not have
"get a random number" as a instruction, but as an OS call.

The main use for this ancillary load is to perform random packet sampling.

Signed-off-by: Chema Gonzalez <chema@google.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt | 13 +++++++++++++
 include/linux/filter.h              |  1 +
 include/uapi/linux/filter.h         |  3 ++-
 net/core/filter.c                   | 12 ++++++++++++
 tools/net/bpf_exp.l                 |  1 +
 tools/net/bpf_exp.y                 | 11 ++++++++++-
 6 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 81f940f4e884..82e1cb0b3da8 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table:
   cpu                                   raw_smp_processor_id()
   vlan_tci                              vlan_tx_tag_get(skb)
   vlan_pr                               vlan_tx_tag_present(skb)
+  rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
 Examples for low-level BPF:
@@ -308,6 +309,18 @@ Examples for low-level BPF:
   ret #-1
   drop: ret #0
 
+** icmp random packet sampling, 1 in 4
+  ldh [12]
+  jne #0x800, drop
+  ldb [23]
+  jneq #1, drop
+  # get a random uint32 number
+  ld rand
+  mod #4
+  jneq #1, drop
+  ret #-1
+  drop: ret #0
+
 ** SECCOMP filter example:
 
   ld [4]                  /* offsetof(struct seccomp_data, arch) */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 024fd03e5d18..759abf78dd61 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -223,6 +223,7 @@ enum {
 	BPF_S_ANC_VLAN_TAG,
 	BPF_S_ANC_VLAN_TAG_PRESENT,
 	BPF_S_ANC_PAY_OFFSET,
+	BPF_S_ANC_RANDOM,
 };
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 8eb9ccaa5b48..253b4d42cf2b 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -130,7 +130,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_VLAN_TAG	44
 #define SKF_AD_VLAN_TAG_PRESENT 48
 #define SKF_AD_PAY_OFFSET	52
-#define SKF_AD_MAX	56
+#define SKF_AD_RANDOM	56
+#define SKF_AD_MAX	60
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index cd58614660cf..78a636e60a0b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -643,6 +643,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
 	return raw_smp_processor_id();
 }
 
+/* note that this only generates 32-bit random numbers */
+static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+{
+	return (u64)prandom_u32();
+}
+
 /* Register mappings for user programs. */
 #define A_REG		0
 #define X_REG		7
@@ -779,6 +785,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_NLATTR:
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 	case SKF_AD_OFF + SKF_AD_CPU:
+	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
 		insn->a_reg = ARG1_REG;
@@ -812,6 +819,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		case SKF_AD_OFF + SKF_AD_CPU:
 			insn->imm = __get_raw_cpu_id - __bpf_call_base;
 			break;
+		case SKF_AD_OFF + SKF_AD_RANDOM:
+			insn->imm = __get_random_u32 - __bpf_call_base;
+			break;
 		}
 		break;
 
@@ -1362,6 +1372,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 			ANCILLARY(VLAN_TAG);
 			ANCILLARY(VLAN_TAG_PRESENT);
 			ANCILLARY(PAY_OFFSET);
+			ANCILLARY(RANDOM);
 			}
 
 			/* ancillary operation unknown or unsupported */
@@ -1746,6 +1757,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_RANDOM]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
 		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
 		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index bf7be77ddd62..833a96611da6 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l
@@ -92,6 +92,7 @@ extern void yyerror(const char *str);
 "#"?("cpu")	{ return K_CPU; }
 "#"?("vlan_tci") { return K_VLANT; }
 "#"?("vlan_pr")	{ return K_VLANP; }
+"#"?("rand")	{ return K_RAND; }
 
 ":"		{ return ':'; }
 ","		{ return ','; }
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index d15efc989ef5..e6306c51c26f 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y
@@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type);
 %token OP_LDXI
 
 %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
 
 %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
 
@@ -164,6 +164,9 @@ ldb
 	| OP_LDB K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LDB K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	;
 
 ldh
@@ -212,6 +215,9 @@ ldh
 	| OP_LDH K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LDH K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	;
 
 ldi
@@ -265,6 +271,9 @@ ld
 	| OP_LD K_POFF {
 		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+	| OP_LD K_RAND {
+		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_RANDOM); }
 	| OP_LD 'M' '[' number ']' {
 		bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
 	| OP_LD '[' 'x' '+' number ']' {
-- 
cgit 


From 4f520900522fd596e336c07e9aafd5b7a9564235 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 22 Apr 2014 21:31:54 -0400
Subject: netlink: have netlink per-protocol bind function return an error
 code.

Have the netlink per-protocol optional bind function return an int error code
rather than void to signal a failure.

This will enable netlink protocols to perform extra checks including
capabilities and permissions verifications when updating memberships in
multicast groups.

In netlink_bind() and netlink_setsockopt() the call to the per-protocol bind
function was moved above the multicast group update to prevent any access to
the multicast socket groups before checking with the per-protocol bind
function.  This will enable the per-protocol bind function to be used to check
permissions which could be denied before making them available, and to avoid
the messy job of undoing the addition should the per-protocol bind function
fail.

The netfilter subsystem seems to be the only one currently using the
per-protocol bind function.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h   |  3 ++-
 net/netfilter/nfnetlink.c |  3 ++-
 net/netlink/af_netlink.c  | 68 +++++++++++++++++++++++++++++++++--------------
 net/netlink/af_netlink.h  |  6 +++--
 4 files changed, 56 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index aad8eeaf416d..5146ce066498 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -45,7 +45,8 @@ struct netlink_kernel_cfg {
 	unsigned int	flags;
 	void		(*input)(struct sk_buff *skb);
 	struct mutex	*cb_mutex;
-	void		(*bind)(int group);
+	int		(*bind)(int group);
+	void		(*unbind)(int group);
 	bool		(*compare)(struct net *net, struct sock *sk);
 };
 
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 0df800a454ec..6e42dcfad40a 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -400,7 +400,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_MODULES
-static void nfnetlink_bind(int group)
+static int nfnetlink_bind(int group)
 {
 	const struct nfnetlink_subsystem *ss;
 	int type = nfnl_group2type[group];
@@ -410,6 +410,7 @@ static void nfnetlink_bind(int group)
 	rcu_read_unlock();
 	if (!ss)
 		request_module("nfnetlink-subsys-%d", type);
+	return 0;
 }
 #endif
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 894cda0206bb..7e8d229bc010 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1206,7 +1206,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	struct module *module = NULL;
 	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
-	void (*bind)(int group);
+	int (*bind)(int group);
+	void (*unbind)(int group);
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -1232,6 +1233,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 		err = -EPROTONOSUPPORT;
 	cb_mutex = nl_table[protocol].cb_mutex;
 	bind = nl_table[protocol].bind;
+	unbind = nl_table[protocol].unbind;
 	netlink_unlock_table();
 
 	if (err < 0)
@@ -1248,6 +1250,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	nlk = nlk_sk(sock->sk);
 	nlk->module = module;
 	nlk->netlink_bind = bind;
+	nlk->netlink_unbind = unbind;
 out:
 	return err;
 
@@ -1301,6 +1304,7 @@ static int netlink_release(struct socket *sock)
 			kfree_rcu(old, rcu);
 			nl_table[sk->sk_protocol].module = NULL;
 			nl_table[sk->sk_protocol].bind = NULL;
+			nl_table[sk->sk_protocol].unbind = NULL;
 			nl_table[sk->sk_protocol].flags = 0;
 			nl_table[sk->sk_protocol].registered = 0;
 		}
@@ -1411,6 +1415,19 @@ static int netlink_realloc_groups(struct sock *sk)
 	return err;
 }
 
+static void netlink_unbind(int group, long unsigned int groups,
+			   struct netlink_sock *nlk)
+{
+	int undo;
+
+	if (!nlk->netlink_unbind)
+		return;
+
+	for (undo = 0; undo < group; undo++)
+		if (test_bit(group, &groups))
+			nlk->netlink_unbind(undo);
+}
+
 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			int addr_len)
 {
@@ -1419,6 +1436,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 	int err;
+	long unsigned int groups = nladdr->nl_groups;
 
 	if (addr_len < sizeof(struct sockaddr_nl))
 		return -EINVAL;
@@ -1427,7 +1445,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		return -EINVAL;
 
 	/* Only superuser is allowed to listen multicasts */
-	if (nladdr->nl_groups) {
+	if (groups) {
 		if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
 			return -EPERM;
 		err = netlink_realloc_groups(sk);
@@ -1435,37 +1453,45 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			return err;
 	}
 
-	if (nlk->portid) {
+	if (nlk->portid)
 		if (nladdr->nl_pid != nlk->portid)
 			return -EINVAL;
-	} else {
+
+	if (nlk->netlink_bind && groups) {
+		int group;
+
+		for (group = 0; group < nlk->ngroups; group++) {
+			if (!test_bit(group, &groups))
+				continue;
+			err = nlk->netlink_bind(group);
+			if (!err)
+				continue;
+			netlink_unbind(group, groups, nlk);
+			return err;
+		}
+	}
+
+	if (!nlk->portid) {
 		err = nladdr->nl_pid ?
 			netlink_insert(sk, net, nladdr->nl_pid) :
 			netlink_autobind(sock);
-		if (err)
+		if (err) {
+			netlink_unbind(nlk->ngroups - 1, groups, nlk);
 			return err;
+		}
 	}
 
-	if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
+	if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 		return 0;
 
 	netlink_table_grab();
 	netlink_update_subscriptions(sk, nlk->subscriptions +
-					 hweight32(nladdr->nl_groups) -
+					 hweight32(groups) -
 					 hweight32(nlk->groups[0]));
-	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
+	nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | groups;
 	netlink_update_listeners(sk);
 	netlink_table_ungrab();
 
-	if (nlk->netlink_bind && nlk->groups[0]) {
-		int i;
-
-		for (i = 0; i < nlk->ngroups; i++) {
-			if (test_bit(i, nlk->groups))
-				nlk->netlink_bind(i);
-		}
-	}
-
 	return 0;
 }
 
@@ -2103,14 +2129,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 			return err;
 		if (!val || val - 1 >= nlk->ngroups)
 			return -EINVAL;
+		if (nlk->netlink_bind) {
+			err = nlk->netlink_bind(val);
+			if (err)
+				return err;
+		}
 		netlink_table_grab();
 		netlink_update_socket_mc(nlk, val,
 					 optname == NETLINK_ADD_MEMBERSHIP);
 		netlink_table_ungrab();
 
-		if (nlk->netlink_bind)
-			nlk->netlink_bind(val);
-
 		err = 0;
 		break;
 	}
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index ed13a790b00e..0b59d441f5b6 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -38,7 +38,8 @@ struct netlink_sock {
 	struct mutex		*cb_mutex;
 	struct mutex		cb_def_mutex;
 	void			(*netlink_rcv)(struct sk_buff *skb);
-	void			(*netlink_bind)(int group);
+	int			(*netlink_bind)(int group);
+	void			(*netlink_unbind)(int group);
 	struct module		*module;
 #ifdef CONFIG_NETLINK_MMAP
 	struct mutex		pg_vec_lock;
@@ -74,7 +75,8 @@ struct netlink_table {
 	unsigned int		groups;
 	struct mutex		*cb_mutex;
 	struct module		*module;
-	void			(*bind)(int group);
+	int			(*bind)(int group);
+	void			(*unbind)(int group);
 	bool			(*compare)(struct net *net, struct sock *sock);
 	int			registered;
 };
-- 
cgit 


From c42ba72ec3a7a1b6aa30122931f1f4b91b601c31 Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Wed, 16 Apr 2014 16:12:27 -0700
Subject: mfd: tps65090: Stop caching most registers

Nearly all of the registers in tps65090 combine control bits and
status bits.  Turn off caching of all registers except the select few
that can be cached.

In order to avoid adding more duplicate #defines, we also move some
register offset definitions to the mfd driver (and resolve
inconsistent names).

Signed-off-by: Doug Anderson <dianders@chromium.org>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65090.c           | 27 ++++++++++++++-------------
 drivers/power/tps65090-charger.c | 11 -----------
 include/linux/mfd/tps65090.h     | 14 ++++++++++++++
 3 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65090.c b/drivers/mfd/tps65090.c
index c3cddb4c3a1a..1c3e6e2efe41 100644
--- a/drivers/mfd/tps65090.c
+++ b/drivers/mfd/tps65090.c
@@ -32,14 +32,6 @@
 #define NUM_INT_REG 2
 #define TOTAL_NUM_REG 0x18
 
-/* interrupt status registers */
-#define TPS65090_INT_STS	0x0
-#define TPS65090_INT_STS2	0x1
-
-/* interrupt mask registers */
-#define TPS65090_INT_MSK	0x2
-#define TPS65090_INT_MSK2	0x3
-
 #define TPS65090_INT1_MASK_VAC_STATUS_CHANGE		1
 #define TPS65090_INT1_MASK_VSYS_STATUS_CHANGE		2
 #define TPS65090_INT1_MASK_BAT_STATUS_CHANGE		3
@@ -144,17 +136,26 @@ static struct regmap_irq_chip tps65090_irq_chip = {
 	.irqs = tps65090_irqs,
 	.num_irqs = ARRAY_SIZE(tps65090_irqs),
 	.num_regs = NUM_INT_REG,
-	.status_base = TPS65090_INT_STS,
-	.mask_base = TPS65090_INT_MSK,
+	.status_base = TPS65090_REG_INTR_STS,
+	.mask_base = TPS65090_REG_INTR_MASK,
 	.mask_invert = true,
 };
 
 static bool is_volatile_reg(struct device *dev, unsigned int reg)
 {
-	if ((reg == TPS65090_INT_STS) || (reg == TPS65090_INT_STS2))
-		return true;
-	else
+	/* Nearly all registers have status bits mixed in, except a few */
+	switch (reg) {
+	case TPS65090_REG_INTR_MASK:
+	case TPS65090_REG_INTR_MASK2:
+	case TPS65090_REG_CG_CTRL0:
+	case TPS65090_REG_CG_CTRL1:
+	case TPS65090_REG_CG_CTRL2:
+	case TPS65090_REG_CG_CTRL3:
+	case TPS65090_REG_CG_CTRL4:
+	case TPS65090_REG_CG_CTRL5:
 		return false;
+	}
+	return true;
 }
 
 static const struct regmap_config tps65090_regmap_config = {
diff --git a/drivers/power/tps65090-charger.c b/drivers/power/tps65090-charger.c
index 8fc9d6df87f6..1685f63b9e5d 100644
--- a/drivers/power/tps65090-charger.c
+++ b/drivers/power/tps65090-charger.c
@@ -28,17 +28,6 @@
 
 #include <linux/mfd/tps65090.h>
 
-#define TPS65090_REG_INTR_STS	0x00
-#define TPS65090_REG_INTR_MASK	0x02
-#define TPS65090_REG_CG_CTRL0	0x04
-#define TPS65090_REG_CG_CTRL1	0x05
-#define TPS65090_REG_CG_CTRL2	0x06
-#define TPS65090_REG_CG_CTRL3	0x07
-#define TPS65090_REG_CG_CTRL4	0x08
-#define TPS65090_REG_CG_CTRL5	0x09
-#define TPS65090_REG_CG_STATUS1	0x0a
-#define TPS65090_REG_CG_STATUS2	0x0b
-
 #define TPS65090_CHARGER_ENABLE	BIT(0)
 #define TPS65090_VACG		BIT(1)
 #define TPS65090_NOITERM	BIT(5)
diff --git a/include/linux/mfd/tps65090.h b/include/linux/mfd/tps65090.h
index 3f43069413e7..45f0f9d2ed25 100644
--- a/include/linux/mfd/tps65090.h
+++ b/include/linux/mfd/tps65090.h
@@ -64,6 +64,20 @@ enum {
 	TPS65090_REGULATOR_MAX,
 };
 
+/* Register addresses */
+#define TPS65090_REG_INTR_STS	0x00
+#define TPS65090_REG_INTR_STS2	0x01
+#define TPS65090_REG_INTR_MASK	0x02
+#define TPS65090_REG_INTR_MASK2	0x03
+#define TPS65090_REG_CG_CTRL0	0x04
+#define TPS65090_REG_CG_CTRL1	0x05
+#define TPS65090_REG_CG_CTRL2	0x06
+#define TPS65090_REG_CG_CTRL3	0x07
+#define TPS65090_REG_CG_CTRL4	0x08
+#define TPS65090_REG_CG_CTRL5	0x09
+#define TPS65090_REG_CG_STATUS1	0x0a
+#define TPS65090_REG_CG_STATUS2	0x0b
+
 struct tps65090 {
 	struct device		*dev;
 	struct regmap		*rmap;
-- 
cgit 


From 575343d161d75dc1516f53436b9eb47d04eda938 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 11:17:13 +0200
Subject: mfd: max14577: Add muic prefix to regmap config

Add muic prefix to regmap config to differentiate between another regmap
config for MAX77836 PMIC node. Additionally remove unused
symbols: MAX14577_REG_INVALID and max14577_irq_source.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max14577.c               | 9 +++++----
 include/linux/mfd/max14577-private.h | 4 +---
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 5f13cefe8def..d180fae8e317 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -37,7 +37,7 @@ static struct mfd_cell max14577_devs[] = {
 	{ .name = "max14577-charger", },
 };
 
-static bool max14577_volatile_reg(struct device *dev, unsigned int reg)
+static bool max14577_muic_volatile_reg(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
 	case MAX14577_REG_INT1 ... MAX14577_REG_STATUS3:
@@ -48,10 +48,10 @@ static bool max14577_volatile_reg(struct device *dev, unsigned int reg)
 	return false;
 }
 
-static const struct regmap_config max14577_regmap_config = {
+static const struct regmap_config max14577_muic_regmap_config = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
-	.volatile_reg	= max14577_volatile_reg,
+	.volatile_reg	= max14577_muic_volatile_reg,
 	.max_register	= MAX14577_REG_END,
 };
 
@@ -113,7 +113,8 @@ static int max14577_i2c_probe(struct i2c_client *i2c,
 	max14577->i2c = i2c;
 	max14577->irq = i2c->irq;
 
-	max14577->regmap = devm_regmap_init_i2c(i2c, &max14577_regmap_config);
+	max14577->regmap = devm_regmap_init_i2c(i2c,
+			&max14577_muic_regmap_config);
 	if (IS_ERR(max14577->regmap)) {
 		ret = PTR_ERR(max14577->regmap);
 		dev_err(max14577->dev, "Failed to allocate register map: %d\n",
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index c9b332fb0d5d..97b78d94f92f 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -22,9 +22,7 @@
 #include <linux/i2c.h>
 #include <linux/regmap.h>
 
-#define MAX14577_REG_INVALID		(0xff)
-
-/* Slave addr = 0x4A: Interrupt */
+/* Slave addr = 0x4A: MUIC and Charger */
 enum max14577_reg {
 	MAX14577_REG_DEVICEID		= 0x00,
 	MAX14577_REG_INT1		= 0x01,
-- 
cgit 


From eccb80cc22354a12255c2579247a92a30a4c881b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 11:17:14 +0200
Subject: mfd: max14577: Add detection of device type

This patch continues the preparation for adding support for MAX77836
device to existing max14577 driver.

Add enum for types of devices supported by this driver. The device type
will be detected by matching of_device_id, or i2c_device_id as a
fallback.

The patch also moves to separate function the code related to displaying
DeviceID register values.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max14577.c               | 64 +++++++++++++++++++++++++-----------
 include/linux/mfd/max14577-private.h | 12 ++++---
 2 files changed, 53 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index d180fae8e317..0e07ed74ab41 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -21,6 +21,7 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/of_device.h>
 #include <linux/mfd/core.h>
 #include <linux/mfd/max14577.h>
 #include <linux/mfd/max14577-private.h>
@@ -37,6 +38,14 @@ static struct mfd_cell max14577_devs[] = {
 	{ .name = "max14577-charger", },
 };
 
+static struct of_device_id max14577_dt_match[] = {
+	{
+		.compatible = "maxim,max14577",
+		.data = (void *)MAXIM_DEVICE_TYPE_MAX14577,
+	},
+	{},
+};
+
 static bool max14577_muic_volatile_reg(struct device *dev, unsigned int reg)
 {
 	switch (reg) {
@@ -83,13 +92,34 @@ static const struct regmap_irq_chip max14577_irq_chip = {
 	.num_irqs		= ARRAY_SIZE(max14577_irqs),
 };
 
+static void max14577_print_dev_type(struct max14577 *max14577)
+{
+	u8 reg_data, vendor_id, device_id;
+	int ret;
+
+	ret = max14577_read_reg(max14577->regmap, MAX14577_REG_DEVICEID,
+			&reg_data);
+	if (ret) {
+		dev_err(max14577->dev,
+			"Failed to read DEVICEID register: %d\n", ret);
+		return;
+	}
+
+	vendor_id = ((reg_data & DEVID_VENDORID_MASK) >>
+				DEVID_VENDORID_SHIFT);
+	device_id = ((reg_data & DEVID_DEVICEID_MASK) >>
+				DEVID_DEVICEID_SHIFT);
+
+	dev_info(max14577->dev, "Device type: %u (ID: 0x%x, vendor: 0x%x)\n",
+			max14577->dev_type, device_id, vendor_id);
+}
+
 static int max14577_i2c_probe(struct i2c_client *i2c,
 			      const struct i2c_device_id *id)
 {
 	struct max14577 *max14577;
 	struct max14577_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	struct device_node *np = i2c->dev.of_node;
-	u8 reg_data;
 	int ret = 0;
 
 	if (np) {
@@ -122,19 +152,17 @@ static int max14577_i2c_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	ret = max14577_read_reg(max14577->regmap, MAX14577_REG_DEVICEID,
-			&reg_data);
-	if (ret) {
-		dev_err(max14577->dev, "Device not found on this channel: %d\n",
-				ret);
-		return ret;
+	if (np) {
+		const struct of_device_id *of_id;
+
+		of_id = of_match_device(max14577_dt_match, &i2c->dev);
+		if (of_id)
+			max14577->dev_type = (unsigned int)of_id->data;
+	} else {
+		max14577->dev_type = id->driver_data;
 	}
-	max14577->vendor_id = ((reg_data & DEVID_VENDORID_MASK) >>
-				DEVID_VENDORID_SHIFT);
-	max14577->device_id = ((reg_data & DEVID_DEVICEID_MASK) >>
-				DEVID_DEVICEID_SHIFT);
-	dev_info(max14577->dev, "Device ID: 0x%x, vendor: 0x%x\n",
-			max14577->device_id, max14577->vendor_id);
+
+	max14577_print_dev_type(max14577);
 
 	ret = regmap_add_irq_chip(max14577->regmap, max14577->irq,
 				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT, 0,
@@ -173,7 +201,7 @@ static int max14577_i2c_remove(struct i2c_client *i2c)
 }
 
 static const struct i2c_device_id max14577_i2c_id[] = {
-	{ "max14577", 0 },
+	{ "max14577", MAXIM_DEVICE_TYPE_MAX14577, },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max14577_i2c_id);
@@ -216,11 +244,6 @@ static int max14577_resume(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-static struct of_device_id max14577_dt_match[] = {
-	{ .compatible = "maxim,max14577", },
-	{},
-};
-
 static SIMPLE_DEV_PM_OPS(max14577_pm, max14577_suspend, max14577_resume);
 
 static struct i2c_driver max14577_i2c_driver = {
@@ -237,6 +260,9 @@ static struct i2c_driver max14577_i2c_driver = {
 
 static int __init max14577_i2c_init(void)
 {
+	BUILD_BUG_ON(ARRAY_SIZE(max14577_i2c_id) != MAXIM_DEVICE_TYPE_NUM);
+	BUILD_BUG_ON(ARRAY_SIZE(max14577_dt_match) != MAXIM_DEVICE_TYPE_NUM);
+
 	return i2c_add_driver(&max14577_i2c_driver);
 }
 subsys_initcall(max14577_i2c_init);
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 97b78d94f92f..1ce6f2952cc9 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -22,6 +22,13 @@
 #include <linux/i2c.h>
 #include <linux/regmap.h>
 
+enum maxim_device_type {
+	MAXIM_DEVICE_TYPE_UNKNOWN	= 0,
+	MAXIM_DEVICE_TYPE_MAX14577,
+
+	MAXIM_DEVICE_TYPE_NUM,
+};
+
 /* Slave addr = 0x4A: MUIC and Charger */
 enum max14577_reg {
 	MAX14577_REG_DEVICEID		= 0x00,
@@ -271,15 +278,12 @@ enum max14577_irq {
 struct max14577 {
 	struct device *dev;
 	struct i2c_client *i2c; /* Slave addr = 0x4A */
+	enum maxim_device_type dev_type;
 
 	struct regmap *regmap;
 
 	struct regmap_irq_chip_data *irq_data;
 	int irq;
-
-	/* Device ID */
-	u8 vendor_id;	/* Vendor Identification */
-	u8 device_id;	/* Chip Version */
 };
 
 /* MAX14577 shared regmap API function */
-- 
cgit 


From c7846852ec8f304c629963202fa565452e8fe34c Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 11:17:17 +0200
Subject: mfd: max14577: Add MAX14577 prefix to IRQ defines

This patch prepares for adding support for MAX77836 device to existing
max14577 driver by adding MAX14577 prefix to defines of interrupts.

This is only a rename-like patch, new code is not added.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max14577.c               | 24 ++++++++++++------------
 include/linux/mfd/max14577-private.h | 28 ++++++++++++++--------------
 2 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 0e07ed74ab41..6f39dec9dfdf 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -66,20 +66,20 @@ static const struct regmap_config max14577_muic_regmap_config = {
 
 static const struct regmap_irq max14577_irqs[] = {
 	/* INT1 interrupts */
-	{ .reg_offset = 0, .mask = INT1_ADC_MASK, },
-	{ .reg_offset = 0, .mask = INT1_ADCLOW_MASK, },
-	{ .reg_offset = 0, .mask = INT1_ADCERR_MASK, },
+	{ .reg_offset = 0, .mask = MAX14577_INT1_ADC_MASK, },
+	{ .reg_offset = 0, .mask = MAX14577_INT1_ADCLOW_MASK, },
+	{ .reg_offset = 0, .mask = MAX14577_INT1_ADCERR_MASK, },
 	/* INT2 interrupts */
-	{ .reg_offset = 1, .mask = INT2_CHGTYP_MASK, },
-	{ .reg_offset = 1, .mask = INT2_CHGDETRUN_MASK, },
-	{ .reg_offset = 1, .mask = INT2_DCDTMR_MASK, },
-	{ .reg_offset = 1, .mask = INT2_DBCHG_MASK, },
-	{ .reg_offset = 1, .mask = INT2_VBVOLT_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_CHGTYP_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_CHGDETRUN_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_DCDTMR_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_DBCHG_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_VBVOLT_MASK, },
 	/* INT3 interrupts */
-	{ .reg_offset = 2, .mask = INT3_EOC_MASK, },
-	{ .reg_offset = 2, .mask = INT3_CGMBC_MASK, },
-	{ .reg_offset = 2, .mask = INT3_OVP_MASK, },
-	{ .reg_offset = 2, .mask = INT3_MBCCHGERR_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_EOC_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_CGMBC_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_OVP_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_MBCCHGERR_MASK, },
 };
 
 static const struct regmap_irq_chip max14577_irq_chip = {
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 1ce6f2952cc9..989183d232cd 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -79,20 +79,20 @@ enum max14577_muic_charger_type {
 };
 
 /* MAX14577 interrupts */
-#define INT1_ADC_MASK			(0x1 << 0)
-#define INT1_ADCLOW_MASK		(0x1 << 1)
-#define INT1_ADCERR_MASK		(0x1 << 2)
-
-#define INT2_CHGTYP_MASK		(0x1 << 0)
-#define INT2_CHGDETRUN_MASK		(0x1 << 1)
-#define INT2_DCDTMR_MASK		(0x1 << 2)
-#define INT2_DBCHG_MASK			(0x1 << 3)
-#define INT2_VBVOLT_MASK		(0x1 << 4)
-
-#define INT3_EOC_MASK			(0x1 << 0)
-#define INT3_CGMBC_MASK			(0x1 << 1)
-#define INT3_OVP_MASK			(0x1 << 2)
-#define INT3_MBCCHGERR_MASK		(0x1 << 3)
+#define MAX14577_INT1_ADC_MASK		BIT(0)
+#define MAX14577_INT1_ADCLOW_MASK	BIT(1)
+#define MAX14577_INT1_ADCERR_MASK	BIT(2)
+
+#define MAX14577_INT2_CHGTYP_MASK	BIT(0)
+#define MAX14577_INT2_CHGDETRUN_MASK	BIT(1)
+#define MAX14577_INT2_DCDTMR_MASK	BIT(2)
+#define MAX14577_INT2_DBCHG_MASK	BIT(3)
+#define MAX14577_INT2_VBVOLT_MASK	BIT(4)
+
+#define MAX14577_INT3_EOC_MASK		BIT(0)
+#define MAX14577_INT3_CGMBC_MASK	BIT(1)
+#define MAX14577_INT3_OVP_MASK		BIT(2)
+#define MAX14577_INT3_MBCCHGERR_MASK	BIT(3)
 
 /* MAX14577 DEVICE ID register */
 #define DEVID_VENDORID_SHIFT		0
-- 
cgit 


From aee2a57c7482c712052b877218aa2c5bc0fe8626 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 11:17:18 +0200
Subject: mfd: max77836: Add MAX77836 support to max14577 driver

Add Maxim 77836 support to max14577 driver. The chipsets have same MUIC
component so the extcon, charger and regulators are almost the same. The
MAX77836 however has also PMIC and Fuel Gauge.

The MAX77836 uses three I2C slave addresses and has additional interrupts
(related to PMIC and Fuel Gauge). It has also Interrupt Source register,
just like MAX77686 and MAX77693.

The MAX77836 PMIC's TOPSYS and INTSRC interrupts are reported in the
PMIC block. The PMIC block has different I2C slave address and uses own
regmap so another regmap_irq_chip is needed.

Since we have two regmap_irq_chip, use shared interrupts on MAX77836.

This patch adds additional defines and functions to the max14577 MFD core
driver so the driver will handle both chipsets. Also this patch replaces
"0x1 << N" with BIT(N) in defines for register masks.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig                  |   6 +-
 drivers/mfd/max14577.c               | 217 +++++++++++++++++++++++++++++++++--
 include/linux/mfd/max14577-private.h | 145 +++++++++++++++++------
 include/linux/mfd/max14577.h         |   7 +-
 4 files changed, 330 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 33834120d057..5bdefe72625e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -331,15 +331,15 @@ config MFD_88PM860X
 	  battery-charger under the corresponding menus.
 
 config MFD_MAX14577
-	bool "Maxim Semiconductor MAX14577 MUIC + Charger Support"
+	bool "Maxim Semiconductor MAX14577/77836 MUIC + Charger Support"
 	depends on I2C=y
 	select MFD_CORE
 	select REGMAP_I2C
 	select REGMAP_IRQ
 	select IRQ_DOMAIN
 	help
-	  Say yes here to add support for Maxim Semiconductor MAX14577.
-	  This is a Micro-USB IC with Charger controls on chip.
+	  Say yes here to add support for Maxim Semiconductor MAX14577 and
+	  MAX77836 Micro-USB ICs with battery charger.
 	  This driver provides common support for accessing the device;
 	  additional drivers must be enabled in order to use the functionality
 	  of the device.
diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 6f39dec9dfdf..20e3b2d81bf0 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -1,7 +1,7 @@
 /*
- * max14577.c - mfd core driver for the Maxim 14577
+ * max14577.c - mfd core driver for the Maxim 14577/77836
  *
- * Copyright (C) 2013 Samsung Electrnoics
+ * Copyright (C) 2014 Samsung Electrnoics
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <k.kozlowski@samsung.com>
  *
@@ -38,11 +38,34 @@ static struct mfd_cell max14577_devs[] = {
 	{ .name = "max14577-charger", },
 };
 
+static struct mfd_cell max77836_devs[] = {
+	{
+		.name = "max77836-muic",
+		.of_compatible = "maxim,max77836-muic",
+	},
+	{
+		.name = "max77836-regulator",
+		.of_compatible = "maxim,max77836-regulator",
+	},
+	{
+		.name = "max77836-charger",
+		.of_compatible = "maxim,max77836-charger",
+	},
+	{
+		.name = "max77836-battery",
+		.of_compatible = "maxim,max77836-battery",
+	},
+};
+
 static struct of_device_id max14577_dt_match[] = {
 	{
 		.compatible = "maxim,max14577",
 		.data = (void *)MAXIM_DEVICE_TYPE_MAX14577,
 	},
+	{
+		.compatible = "maxim,max77836",
+		.data = (void *)MAXIM_DEVICE_TYPE_MAX77836,
+	},
 	{},
 };
 
@@ -57,6 +80,26 @@ static bool max14577_muic_volatile_reg(struct device *dev, unsigned int reg)
 	return false;
 }
 
+static bool max77836_muic_volatile_reg(struct device *dev, unsigned int reg)
+{
+	/* Any max14577 volatile registers are also max77836 volatile. */
+	if (max14577_muic_volatile_reg(dev, reg))
+		return true;
+
+	switch (reg) {
+	case MAX77836_FG_REG_VCELL_MSB ... MAX77836_FG_REG_SOC_LSB:
+	case MAX77836_FG_REG_CRATE_MSB ... MAX77836_FG_REG_CRATE_LSB:
+	case MAX77836_FG_REG_STATUS_H ... MAX77836_FG_REG_STATUS_L:
+	case MAX77836_PMIC_REG_INTSRC:
+	case MAX77836_PMIC_REG_TOPSYS_INT:
+	case MAX77836_PMIC_REG_TOPSYS_STAT:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
 static const struct regmap_config max14577_muic_regmap_config = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
@@ -64,6 +107,13 @@ static const struct regmap_config max14577_muic_regmap_config = {
 	.max_register	= MAX14577_REG_END,
 };
 
+static const struct regmap_config max77836_pmic_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.volatile_reg	= max77836_muic_volatile_reg,
+	.max_register	= MAX77836_PMIC_REG_END,
+};
+
 static const struct regmap_irq max14577_irqs[] = {
 	/* INT1 interrupts */
 	{ .reg_offset = 0, .mask = MAX14577_INT1_ADC_MASK, },
@@ -86,12 +136,56 @@ static const struct regmap_irq_chip max14577_irq_chip = {
 	.name			= "max14577",
 	.status_base		= MAX14577_REG_INT1,
 	.mask_base		= MAX14577_REG_INTMASK1,
-	.mask_invert		= 1,
+	.mask_invert		= true,
 	.num_regs		= 3,
 	.irqs			= max14577_irqs,
 	.num_irqs		= ARRAY_SIZE(max14577_irqs),
 };
 
+static const struct regmap_irq max77836_muic_irqs[] = {
+	/* INT1 interrupts */
+	{ .reg_offset = 0, .mask = MAX14577_INT1_ADC_MASK, },
+	{ .reg_offset = 0, .mask = MAX14577_INT1_ADCLOW_MASK, },
+	{ .reg_offset = 0, .mask = MAX14577_INT1_ADCERR_MASK, },
+	/* INT2 interrupts */
+	{ .reg_offset = 1, .mask = MAX14577_INT2_CHGTYP_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_CHGDETRUN_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_DCDTMR_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_DBCHG_MASK, },
+	{ .reg_offset = 1, .mask = MAX14577_INT2_VBVOLT_MASK, },
+	{ .reg_offset = 1, .mask = MAX77836_INT2_VIDRM_MASK, },
+	/* INT3 interrupts */
+	{ .reg_offset = 2, .mask = MAX14577_INT3_EOC_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_CGMBC_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_OVP_MASK, },
+	{ .reg_offset = 2, .mask = MAX14577_INT3_MBCCHGERR_MASK, },
+};
+
+static const struct regmap_irq_chip max77836_muic_irq_chip = {
+	.name			= "max77836-muic",
+	.status_base		= MAX14577_REG_INT1,
+	.mask_base		= MAX14577_REG_INTMASK1,
+	.mask_invert		= true,
+	.num_regs		= 3,
+	.irqs			= max77836_muic_irqs,
+	.num_irqs		= ARRAY_SIZE(max77836_muic_irqs),
+};
+
+static const struct regmap_irq max77836_pmic_irqs[] = {
+	{ .reg_offset = 0, .mask = MAX77836_TOPSYS_INT_T120C_MASK, },
+	{ .reg_offset = 0, .mask = MAX77836_TOPSYS_INT_T140C_MASK, },
+};
+
+static const struct regmap_irq_chip max77836_pmic_irq_chip = {
+	.name			= "max77836-pmic",
+	.status_base		= MAX77836_PMIC_REG_TOPSYS_INT,
+	.mask_base		= MAX77836_PMIC_REG_TOPSYS_INT_MASK,
+	.mask_invert		= false,
+	.num_regs		= 1,
+	.irqs			= max77836_pmic_irqs,
+	.num_irqs		= ARRAY_SIZE(max77836_pmic_irqs),
+};
+
 static void max14577_print_dev_type(struct max14577 *max14577)
 {
 	u8 reg_data, vendor_id, device_id;
@@ -114,6 +208,81 @@ static void max14577_print_dev_type(struct max14577 *max14577)
 			max14577->dev_type, device_id, vendor_id);
 }
 
+/*
+ * Max77836 specific initialization code for driver probe.
+ * Adds new I2C dummy device, regmap and regmap IRQ chip.
+ * Unmasks Interrupt Source register.
+ *
+ * On success returns 0.
+ * On failure returns errno and reverts any changes done so far (e.g. remove
+ * I2C dummy device), except masking the INT SRC register.
+ */
+static int max77836_init(struct max14577 *max14577)
+{
+	int ret;
+	u8 intsrc_mask;
+
+	max14577->i2c_pmic = i2c_new_dummy(max14577->i2c->adapter,
+			I2C_ADDR_PMIC);
+	if (!max14577->i2c_pmic) {
+		dev_err(max14577->dev, "Failed to register PMIC I2C device\n");
+		return -ENODEV;
+	}
+	i2c_set_clientdata(max14577->i2c_pmic, max14577);
+
+	max14577->regmap_pmic = devm_regmap_init_i2c(max14577->i2c_pmic,
+			&max77836_pmic_regmap_config);
+	if (IS_ERR(max14577->regmap_pmic)) {
+		ret = PTR_ERR(max14577->regmap_pmic);
+		dev_err(max14577->dev, "Failed to allocate PMIC register map: %d\n",
+				ret);
+		goto err;
+	}
+
+	/* Un-mask MAX77836 Interrupt Source register */
+	ret = max14577_read_reg(max14577->regmap_pmic,
+			MAX77836_PMIC_REG_INTSRC_MASK, &intsrc_mask);
+	if (ret < 0) {
+		dev_err(max14577->dev, "Failed to read PMIC register\n");
+		goto err;
+	}
+
+	intsrc_mask &= ~(MAX77836_INTSRC_MASK_TOP_INT_MASK);
+	intsrc_mask &= ~(MAX77836_INTSRC_MASK_MUIC_CHG_INT_MASK);
+	ret = max14577_write_reg(max14577->regmap_pmic,
+			MAX77836_PMIC_REG_INTSRC_MASK, intsrc_mask);
+	if (ret < 0) {
+		dev_err(max14577->dev, "Failed to write PMIC register\n");
+		goto err;
+	}
+
+	ret = regmap_add_irq_chip(max14577->regmap_pmic, max14577->irq,
+			IRQF_TRIGGER_FALLING | IRQF_ONESHOT | IRQF_SHARED,
+			0, &max77836_pmic_irq_chip,
+			&max14577->irq_data_pmic);
+	if (ret != 0) {
+		dev_err(max14577->dev, "Failed to request PMIC IRQ %d: %d\n",
+				max14577->irq, ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	i2c_unregister_device(max14577->i2c_pmic);
+
+	return ret;
+}
+
+/*
+ * Max77836 specific de-initialization code for driver remove.
+ */
+static void max77836_remove(struct max14577 *max14577)
+{
+	regmap_del_irq_chip(max14577->irq, max14577->irq_data_pmic);
+	i2c_unregister_device(max14577->i2c_pmic);
+}
+
 static int max14577_i2c_probe(struct i2c_client *i2c,
 			      const struct i2c_device_id *id)
 {
@@ -121,6 +290,10 @@ static int max14577_i2c_probe(struct i2c_client *i2c,
 	struct max14577_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	struct device_node *np = i2c->dev.of_node;
 	int ret = 0;
+	const struct regmap_irq_chip *irq_chip;
+	struct mfd_cell *mfd_devs;
+	unsigned int mfd_devs_size;
+	int irq_flags;
 
 	if (np) {
 		pdata = devm_kzalloc(&i2c->dev, sizeof(*pdata), GFP_KERNEL);
@@ -164,9 +337,24 @@ static int max14577_i2c_probe(struct i2c_client *i2c,
 
 	max14577_print_dev_type(max14577);
 
+	switch (max14577->dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		irq_chip = &max77836_muic_irq_chip;
+		mfd_devs = max77836_devs;
+		mfd_devs_size = ARRAY_SIZE(max77836_devs);
+		irq_flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT | IRQF_SHARED;
+		break;
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		irq_chip = &max14577_irq_chip;
+		mfd_devs = max14577_devs;
+		mfd_devs_size = ARRAY_SIZE(max14577_devs);
+		irq_flags = IRQF_TRIGGER_FALLING | IRQF_ONESHOT;
+		break;
+	}
+
 	ret = regmap_add_irq_chip(max14577->regmap, max14577->irq,
-				  IRQF_TRIGGER_FALLING | IRQF_ONESHOT, 0,
-				  &max14577_irq_chip,
+				  irq_flags, 0, irq_chip,
 				  &max14577->irq_data);
 	if (ret != 0) {
 		dev_err(&i2c->dev, "Failed to request IRQ %d: %d\n",
@@ -174,8 +362,15 @@ static int max14577_i2c_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	ret = mfd_add_devices(max14577->dev, -1, max14577_devs,
-			ARRAY_SIZE(max14577_devs), NULL, 0,
+	/* Max77836 specific initialization code (additional regmap) */
+	if (max14577->dev_type == MAXIM_DEVICE_TYPE_MAX77836) {
+		ret = max77836_init(max14577);
+		if (ret < 0)
+			goto err_max77836;
+	}
+
+	ret = mfd_add_devices(max14577->dev, -1, mfd_devs,
+			mfd_devs_size, NULL, 0,
 			regmap_irq_get_domain(max14577->irq_data));
 	if (ret < 0)
 		goto err_mfd;
@@ -185,6 +380,9 @@ static int max14577_i2c_probe(struct i2c_client *i2c,
 	return 0;
 
 err_mfd:
+	if (max14577->dev_type == MAXIM_DEVICE_TYPE_MAX77836)
+		max77836_remove(max14577);
+err_max77836:
 	regmap_del_irq_chip(max14577->irq, max14577->irq_data);
 
 	return ret;
@@ -196,12 +394,15 @@ static int max14577_i2c_remove(struct i2c_client *i2c)
 
 	mfd_remove_devices(max14577->dev);
 	regmap_del_irq_chip(max14577->irq, max14577->irq_data);
+	if (max14577->dev_type == MAXIM_DEVICE_TYPE_MAX77836)
+		max77836_remove(max14577);
 
 	return 0;
 }
 
 static const struct i2c_device_id max14577_i2c_id[] = {
 	{ "max14577", MAXIM_DEVICE_TYPE_MAX14577, },
+	{ "max77836", MAXIM_DEVICE_TYPE_MAX77836, },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max14577_i2c_id);
@@ -274,5 +475,5 @@ static void __exit max14577_i2c_exit(void)
 module_exit(max14577_i2c_exit);
 
 MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>, Krzysztof Kozlowski <k.kozlowski@samsung.com>");
-MODULE_DESCRIPTION("MAXIM 14577 multi-function core driver");
+MODULE_DESCRIPTION("Maxim 14577/77836 multi-function core driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 989183d232cd..e301bd19b067 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -1,7 +1,7 @@
 /*
- * max14577-private.h - Common API for the Maxim 14577 internal sub chip
+ * max14577-private.h - Common API for the Maxim 14577/77836 internal sub chip
  *
- * Copyright (C) 2013 Samsung Electrnoics
+ * Copyright (C) 2014 Samsung Electrnoics
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <k.kozlowski@samsung.com>
  *
@@ -22,9 +22,14 @@
 #include <linux/i2c.h>
 #include <linux/regmap.h>
 
+#define I2C_ADDR_PMIC	(0x46 >> 1)
+#define I2C_ADDR_MUIC	(0x4A >> 1)
+#define I2C_ADDR_FG	(0x6C >> 1)
+
 enum maxim_device_type {
 	MAXIM_DEVICE_TYPE_UNKNOWN	= 0,
 	MAXIM_DEVICE_TYPE_MAX14577,
+	MAXIM_DEVICE_TYPE_MAX77836,
 
 	MAXIM_DEVICE_TYPE_NUM,
 };
@@ -88,6 +93,7 @@ enum max14577_muic_charger_type {
 #define MAX14577_INT2_DCDTMR_MASK	BIT(2)
 #define MAX14577_INT2_DBCHG_MASK	BIT(3)
 #define MAX14577_INT2_VBVOLT_MASK	BIT(4)
+#define MAX77836_INT2_VIDRM_MASK	BIT(5)
 
 #define MAX14577_INT3_EOC_MASK		BIT(0)
 #define MAX14577_INT3_CGMBC_MASK	BIT(1)
@@ -104,9 +110,11 @@ enum max14577_muic_charger_type {
 #define STATUS1_ADC_SHIFT		0
 #define STATUS1_ADCLOW_SHIFT		5
 #define STATUS1_ADCERR_SHIFT		6
+#define MAX77836_STATUS1_ADC1K_SHIFT	7
 #define STATUS1_ADC_MASK		(0x1f << STATUS1_ADC_SHIFT)
-#define STATUS1_ADCLOW_MASK		(0x1 << STATUS1_ADCLOW_SHIFT)
-#define STATUS1_ADCERR_MASK		(0x1 << STATUS1_ADCERR_SHIFT)
+#define STATUS1_ADCLOW_MASK		BIT(STATUS1_ADCLOW_SHIFT)
+#define STATUS1_ADCERR_MASK		BIT(STATUS1_ADCERR_SHIFT)
+#define MAX77836_STATUS1_ADC1K_MASK	BIT(MAX77836_STATUS1_ADC1K_SHIFT)
 
 /* MAX14577 STATUS2 register */
 #define STATUS2_CHGTYP_SHIFT		0
@@ -114,11 +122,13 @@ enum max14577_muic_charger_type {
 #define STATUS2_DCDTMR_SHIFT		4
 #define STATUS2_DBCHG_SHIFT		5
 #define STATUS2_VBVOLT_SHIFT		6
+#define MAX77836_STATUS2_VIDRM_SHIFT	7
 #define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
-#define STATUS2_CHGDETRUN_MASK		(0x1 << STATUS2_CHGDETRUN_SHIFT)
-#define STATUS2_DCDTMR_MASK		(0x1 << STATUS2_DCDTMR_SHIFT)
-#define STATUS2_DBCHG_MASK		(0x1 << STATUS2_DBCHG_SHIFT)
-#define STATUS2_VBVOLT_MASK		(0x1 << STATUS2_VBVOLT_SHIFT)
+#define STATUS2_CHGDETRUN_MASK		BIT(STATUS2_CHGDETRUN_SHIFT)
+#define STATUS2_DCDTMR_MASK		BIT(STATUS2_DCDTMR_SHIFT)
+#define STATUS2_DBCHG_MASK		BIT(STATUS2_DBCHG_SHIFT)
+#define STATUS2_VBVOLT_MASK		BIT(STATUS2_VBVOLT_SHIFT)
+#define MAX77836_STATUS2_VIDRM_MASK	BIT(MAX77836_STATUS2_VIDRM_SHIFT)
 
 /* MAX14577 CONTROL1 register */
 #define COMN1SW_SHIFT			0
@@ -127,8 +137,8 @@ enum max14577_muic_charger_type {
 #define IDBEN_SHIFT			7
 #define COMN1SW_MASK			(0x7 << COMN1SW_SHIFT)
 #define COMP2SW_MASK			(0x7 << COMP2SW_SHIFT)
-#define MICEN_MASK			(0x1 << MICEN_SHIFT)
-#define IDBEN_MASK			(0x1 << IDBEN_SHIFT)
+#define MICEN_MASK			BIT(MICEN_SHIFT)
+#define IDBEN_MASK			BIT(IDBEN_SHIFT)
 #define CLEAR_IDBEN_MICEN_MASK		(COMN1SW_MASK | COMP2SW_MASK)
 #define CTRL1_SW_USB			((1 << COMP2SW_SHIFT) \
 						| (1 << COMN1SW_SHIFT))
@@ -148,14 +158,14 @@ enum max14577_muic_charger_type {
 #define CTRL2_ACCDET_SHIFT		(5)
 #define CTRL2_USBCPINT_SHIFT		(6)
 #define CTRL2_RCPS_SHIFT		(7)
-#define CTRL2_LOWPWR_MASK		(0x1 << CTRL2_LOWPWR_SHIFT)
-#define CTRL2_ADCEN_MASK		(0x1 << CTRL2_ADCEN_SHIFT)
-#define CTRL2_CPEN_MASK			(0x1 << CTRL2_CPEN_SHIFT)
-#define CTRL2_SFOUTASRT_MASK		(0x1 << CTRL2_SFOUTASRT_SHIFT)
-#define CTRL2_SFOUTORD_MASK		(0x1 << CTRL2_SFOUTORD_SHIFT)
-#define CTRL2_ACCDET_MASK		(0x1 << CTRL2_ACCDET_SHIFT)
-#define CTRL2_USBCPINT_MASK		(0x1 << CTRL2_USBCPINT_SHIFT)
-#define CTRL2_RCPS_MASK			(0x1 << CTR2_RCPS_SHIFT)
+#define CTRL2_LOWPWR_MASK		BIT(CTRL2_LOWPWR_SHIFT)
+#define CTRL2_ADCEN_MASK		BIT(CTRL2_ADCEN_SHIFT)
+#define CTRL2_CPEN_MASK			BIT(CTRL2_CPEN_SHIFT)
+#define CTRL2_SFOUTASRT_MASK		BIT(CTRL2_SFOUTASRT_SHIFT)
+#define CTRL2_SFOUTORD_MASK		BIT(CTRL2_SFOUTORD_SHIFT)
+#define CTRL2_ACCDET_MASK		BIT(CTRL2_ACCDET_SHIFT)
+#define CTRL2_USBCPINT_MASK		BIT(CTRL2_USBCPINT_SHIFT)
+#define CTRL2_RCPS_MASK			BIT(CTRL2_RCPS_SHIFT)
 
 #define CTRL2_CPEN1_LOWPWR0 ((1 << CTRL2_CPEN_SHIFT) | \
 				(0 << CTRL2_LOWPWR_SHIFT))
@@ -203,14 +213,14 @@ enum max14577_charger_reg {
 #define CDETCTRL1_DBEXIT_SHIFT		5
 #define CDETCTRL1_DBIDLE_SHIFT		6
 #define CDETCTRL1_CDPDET_SHIFT		7
-#define CDETCTRL1_CHGDETEN_MASK		(0x1 << CDETCTRL1_CHGDETEN_SHIFT)
-#define CDETCTRL1_CHGTYPMAN_MASK	(0x1 << CDETCTRL1_CHGTYPMAN_SHIFT)
-#define CDETCTRL1_DCDEN_MASK		(0x1 << CDETCTRL1_DCDEN_SHIFT)
-#define CDETCTRL1_DCD2SCT_MASK		(0x1 << CDETCTRL1_DCD2SCT_SHIFT)
-#define CDETCTRL1_DCHKTM_MASK		(0x1 << CDETCTRL1_DCHKTM_SHIFT)
-#define CDETCTRL1_DBEXIT_MASK		(0x1 << CDETCTRL1_DBEXIT_SHIFT)
-#define CDETCTRL1_DBIDLE_MASK		(0x1 << CDETCTRL1_DBIDLE_SHIFT)
-#define CDETCTRL1_CDPDET_MASK		(0x1 << CDETCTRL1_CDPDET_SHIFT)
+#define CDETCTRL1_CHGDETEN_MASK		BIT(CDETCTRL1_CHGDETEN_SHIFT)
+#define CDETCTRL1_CHGTYPMAN_MASK	BIT(CDETCTRL1_CHGTYPMAN_SHIFT)
+#define CDETCTRL1_DCDEN_MASK		BIT(CDETCTRL1_DCDEN_SHIFT)
+#define CDETCTRL1_DCD2SCT_MASK		BIT(CDETCTRL1_DCD2SCT_SHIFT)
+#define CDETCTRL1_DCHKTM_MASK		BIT(CDETCTRL1_DCHKTM_SHIFT)
+#define CDETCTRL1_DBEXIT_MASK		BIT(CDETCTRL1_DBEXIT_SHIFT)
+#define CDETCTRL1_DBIDLE_MASK		BIT(CDETCTRL1_DBIDLE_SHIFT)
+#define CDETCTRL1_CDPDET_MASK		BIT(CDETCTRL1_CDPDET_SHIFT)
 
 /* MAX14577 CHGCTRL1 register */
 #define CHGCTRL1_TCHW_SHIFT		4
@@ -218,9 +228,9 @@ enum max14577_charger_reg {
 
 /* MAX14577 CHGCTRL2 register */
 #define CHGCTRL2_MBCHOSTEN_SHIFT	6
-#define CHGCTRL2_MBCHOSTEN_MASK		(0x1 << CHGCTRL2_MBCHOSTEN_SHIFT)
+#define CHGCTRL2_MBCHOSTEN_MASK		BIT(CHGCTRL2_MBCHOSTEN_SHIFT)
 #define CHGCTRL2_VCHGR_RC_SHIFT		7
-#define CHGCTRL2_VCHGR_RC_MASK		(0x1 << CHGCTRL2_VCHGR_RC_SHIFT)
+#define CHGCTRL2_VCHGR_RC_MASK		BIT(CHGCTRL2_VCHGR_RC_SHIFT)
 
 /* MAX14577 CHGCTRL3 register */
 #define CHGCTRL3_MBCCVWRC_SHIFT		0
@@ -230,7 +240,7 @@ enum max14577_charger_reg {
 #define CHGCTRL4_MBCICHWRCH_SHIFT	0
 #define CHGCTRL4_MBCICHWRCH_MASK	(0xf << CHGCTRL4_MBCICHWRCH_SHIFT)
 #define CHGCTRL4_MBCICHWRCL_SHIFT	4
-#define CHGCTRL4_MBCICHWRCL_MASK	(0x1 << CHGCTRL4_MBCICHWRCL_SHIFT)
+#define CHGCTRL4_MBCICHWRCL_MASK	BIT(CHGCTRL4_MBCICHWRCL_SHIFT)
 
 /* MAX14577 CHGCTRL5 register */
 #define CHGCTRL5_EOCS_SHIFT		0
@@ -238,7 +248,7 @@ enum max14577_charger_reg {
 
 /* MAX14577 CHGCTRL6 register */
 #define CHGCTRL6_AUTOSTOP_SHIFT		5
-#define CHGCTRL6_AUTOSTOP_MASK		(0x1 << CHGCTRL6_AUTOSTOP_SHIFT)
+#define CHGCTRL6_AUTOSTOP_MASK		BIT(CHGCTRL6_AUTOSTOP_SHIFT)
 
 /* MAX14577 CHGCTRL7 register */
 #define CHGCTRL7_OTPCGHCVS_SHIFT	0
@@ -253,6 +263,70 @@ enum max14577_charger_reg {
 /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */
 #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE		4900000
 
+/* Slave addr = 0x46: PMIC */
+enum max77836_pmic_reg {
+	MAX77836_PMIC_REG_PMIC_ID		= 0x20,
+	MAX77836_PMIC_REG_PMIC_REV		= 0x21,
+	MAX77836_PMIC_REG_INTSRC		= 0x22,
+	MAX77836_PMIC_REG_INTSRC_MASK		= 0x23,
+	MAX77836_PMIC_REG_TOPSYS_INT		= 0x24,
+	MAX77836_PMIC_REG_TOPSYS_INT_MASK	= 0x26,
+	MAX77836_PMIC_REG_TOPSYS_STAT		= 0x28,
+	MAX77836_PMIC_REG_MRSTB_CNTL		= 0x2A,
+	MAX77836_PMIC_REG_LSCNFG		= 0x2B,
+
+	MAX77836_LDO_REG_CNFG1_LDO1		= 0x51,
+	MAX77836_LDO_REG_CNFG2_LDO1		= 0x52,
+	MAX77836_LDO_REG_CNFG1_LDO2		= 0x53,
+	MAX77836_LDO_REG_CNFG2_LDO2		= 0x54,
+	MAX77836_LDO_REG_CNFG_LDO_BIAS		= 0x55,
+
+	MAX77836_COMP_REG_COMP1			= 0x60,
+
+	MAX77836_PMIC_REG_END,
+};
+
+#define MAX77836_INTSRC_MASK_TOP_INT_SHIFT	1
+#define MAX77836_INTSRC_MASK_MUIC_CHG_INT_SHIFT	3
+#define MAX77836_INTSRC_MASK_TOP_INT_MASK	BIT(MAX77836_INTSRC_MASK_TOP_INT_SHIFT)
+#define MAX77836_INTSRC_MASK_MUIC_CHG_INT_MASK	BIT(MAX77836_INTSRC_MASK_MUIC_CHG_INT_SHIFT)
+
+/* MAX77836 PMIC interrupts */
+#define MAX77836_TOPSYS_INT_T120C_SHIFT		0
+#define MAX77836_TOPSYS_INT_T140C_SHIFT		1
+#define MAX77836_TOPSYS_INT_T120C_MASK		BIT(MAX77836_TOPSYS_INT_T120C_SHIFT)
+#define MAX77836_TOPSYS_INT_T140C_MASK		BIT(MAX77836_TOPSYS_INT_T140C_SHIFT)
+
+/* Slave addr = 0x6C: Fuel-Gauge/Battery */
+enum max77836_fg_reg {
+	MAX77836_FG_REG_VCELL_MSB	= 0x02,
+	MAX77836_FG_REG_VCELL_LSB	= 0x03,
+	MAX77836_FG_REG_SOC_MSB		= 0x04,
+	MAX77836_FG_REG_SOC_LSB		= 0x05,
+	MAX77836_FG_REG_MODE_H		= 0x06,
+	MAX77836_FG_REG_MODE_L		= 0x07,
+	MAX77836_FG_REG_VERSION_MSB	= 0x08,
+	MAX77836_FG_REG_VERSION_LSB	= 0x09,
+	MAX77836_FG_REG_HIBRT_H		= 0x0A,
+	MAX77836_FG_REG_HIBRT_L		= 0x0B,
+	MAX77836_FG_REG_CONFIG_H	= 0x0C,
+	MAX77836_FG_REG_CONFIG_L	= 0x0D,
+	MAX77836_FG_REG_VALRT_MIN	= 0x14,
+	MAX77836_FG_REG_VALRT_MAX	= 0x15,
+	MAX77836_FG_REG_CRATE_MSB	= 0x16,
+	MAX77836_FG_REG_CRATE_LSB	= 0x17,
+	MAX77836_FG_REG_VRESET		= 0x18,
+	MAX77836_FG_REG_FGID		= 0x19,
+	MAX77836_FG_REG_STATUS_H	= 0x1A,
+	MAX77836_FG_REG_STATUS_L	= 0x1B,
+	/*
+	 * TODO: TABLE registers
+	 * TODO: CMD register
+	 */
+
+	MAX77836_FG_REG_END,
+};
+
 enum max14577_irq {
 	/* INT1 */
 	MAX14577_IRQ_INT1_ADC,
@@ -272,17 +346,24 @@ enum max14577_irq {
 	MAX14577_IRQ_INT3_OVP,
 	MAX14577_IRQ_INT3_MBCCHGERR,
 
+	/* TOPSYS_INT, only MAX77836 */
+	MAX77836_IRQ_TOPSYS_T140C,
+	MAX77836_IRQ_TOPSYS_T120C,
+
 	MAX14577_IRQ_NUM,
 };
 
 struct max14577 {
 	struct device *dev;
 	struct i2c_client *i2c; /* Slave addr = 0x4A */
+	struct i2c_client *i2c_pmic; /* Slave addr = 0x46 */
 	enum maxim_device_type dev_type;
 
-	struct regmap *regmap;
+	struct regmap *regmap; /* For MUIC and Charger */
+	struct regmap *regmap_pmic;
 
-	struct regmap_irq_chip_data *irq_data;
+	struct regmap_irq_chip_data *irq_data; /* For MUIC and Charger */
+	struct regmap_irq_chip_data *irq_data_pmic;
 	int irq;
 };
 
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index 736d39c3ec0d..08b449159fd1 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -1,7 +1,7 @@
 /*
- * max14577.h - Driver for the Maxim 14577
+ * max14577.h - Driver for the Maxim 14577/77836
  *
- * Copyright (C) 2013 Samsung Electrnoics
+ * Copyright (C) 2014 Samsung Electrnoics
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <k.kozlowski@samsung.com>
  *
@@ -20,6 +20,9 @@
  * MAX14577 has MUIC, Charger devices.
  * The devices share the same I2C bus and interrupt line
  * included in this mfd driver.
+ *
+ * MAX77836 has additional PMIC and Fuel-Gauge on different I2C slave
+ * addresses.
  */
 
 #ifndef __MAX14577_H__
-- 
cgit 


From 4706a5253bcc502a5889feb98392ea7b15dd936e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 11:17:19 +0200
Subject: extcon: max14577: Add support for MAX77836

Add support for MAX77836 chipset to the max14577 extcon driver. The
MAX77836 MUIC has additional interrupts (VIDRM, ADC1K) so IRQ handling
is split up into two functions: max14577_parse_irq() and
max77836_parse_irq().

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Tested-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/extcon/Kconfig               |   4 +-
 drivers/extcon/extcon-max14577.c     | 109 +++++++++++++++++++++++++++++------
 drivers/mfd/max14577.c               |   1 +
 include/linux/mfd/max14577-private.h |   3 +
 4 files changed, 96 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/Kconfig b/drivers/extcon/Kconfig
index be56e8ac95e6..aebde489c291 100644
--- a/drivers/extcon/Kconfig
+++ b/drivers/extcon/Kconfig
@@ -28,13 +28,13 @@ config EXTCON_ADC_JACK
 	  Say Y here to enable extcon device driver based on ADC values.
 
 config EXTCON_MAX14577
-	tristate "MAX14577 EXTCON Support"
+	tristate "MAX14577/77836 EXTCON Support"
 	depends on MFD_MAX14577
 	select IRQ_DOMAIN
 	select REGMAP_I2C
 	help
 	  If you say yes here you get support for the MUIC device of
-	  Maxim MAX14577 PMIC. The MAX14577 MUIC is a USB port accessory
+	  Maxim MAX14577/77836. The MAX14577/77836 MUIC is a USB port accessory
 	  detector and switch.
 
 config EXTCON_MAX77693
diff --git a/drivers/extcon/extcon-max14577.c b/drivers/extcon/extcon-max14577.c
index 1513013a92f1..c76734a70171 100644
--- a/drivers/extcon/extcon-max14577.c
+++ b/drivers/extcon/extcon-max14577.c
@@ -1,8 +1,9 @@
 /*
- * extcon-max14577.c - MAX14577 extcon driver to support MAX14577 MUIC
+ * extcon-max14577.c - MAX14577/77836 extcon driver to support MUIC
  *
- * Copyright (C) 2013 Samsung Electrnoics
+ * Copyright (C) 2013,2014 Samsung Electrnoics
  * Chanwoo Choi <cw00.choi@samsung.com>
+ * Krzysztof Kozlowski <k.kozlowski@samsung.com>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -62,6 +63,19 @@ static struct max14577_muic_irq max14577_muic_irqs[] = {
 	{ MAX14577_IRQ_INT2_VBVOLT,	"muic-VBVOLT" },
 };
 
+static struct max14577_muic_irq max77836_muic_irqs[] = {
+	{ MAX14577_IRQ_INT1_ADC,	"muic-ADC" },
+	{ MAX14577_IRQ_INT1_ADCLOW,	"muic-ADCLOW" },
+	{ MAX14577_IRQ_INT1_ADCERR,	"muic-ADCError" },
+	{ MAX77836_IRQ_INT1_ADC1K,	"muic-ADC1K" },
+	{ MAX14577_IRQ_INT2_CHGTYP,	"muic-CHGTYP" },
+	{ MAX14577_IRQ_INT2_CHGDETRUN,	"muic-CHGDETRUN" },
+	{ MAX14577_IRQ_INT2_DCDTMR,	"muic-DCDTMR" },
+	{ MAX14577_IRQ_INT2_DBCHG,	"muic-DBCHG" },
+	{ MAX14577_IRQ_INT2_VBVOLT,	"muic-VBVOLT" },
+	{ MAX77836_IRQ_INT2_VIDRM,	"muic-VIDRM" },
+};
+
 struct max14577_muic_info {
 	struct device *dev;
 	struct max14577 *max14577;
@@ -529,21 +543,12 @@ static void max14577_muic_irq_work(struct work_struct *work)
 	return;
 }
 
-static irqreturn_t max14577_muic_irq_handler(int irq, void *data)
+/*
+ * Sets irq_adc or irq_chg in max14577_muic_info and returns 1.
+ * Returns 0 if irq_type does not match registered IRQ for this device type.
+ */
+static int max14577_parse_irq(struct max14577_muic_info *info, int irq_type)
 {
-	struct max14577_muic_info *info = data;
-	int i, irq_type = -1;
-
-	/*
-	 * We may be called multiple times for different nested IRQ-s.
-	 * Including changes in INT1_ADC and INT2_CGHTYP at once.
-	 * However we only need to know whether it was ADC, charger
-	 * or both interrupts so decode IRQ and turn on proper flags.
-	 */
-	for (i = 0; i < info->muic_irqs_num; i++)
-		if (irq == info->muic_irqs[i].virq)
-			irq_type = info->muic_irqs[i].irq;
-
 	switch (irq_type) {
 	case MAX14577_IRQ_INT1_ADC:
 	case MAX14577_IRQ_INT1_ADCLOW:
@@ -551,7 +556,7 @@ static irqreturn_t max14577_muic_irq_handler(int irq, void *data)
 		/* Handle all of accessory except for
 		   type of charger accessory */
 		info->irq_adc = true;
-		break;
+		return 1;
 	case MAX14577_IRQ_INT2_CHGTYP:
 	case MAX14577_IRQ_INT2_CHGDETRUN:
 	case MAX14577_IRQ_INT2_DCDTMR:
@@ -559,8 +564,62 @@ static irqreturn_t max14577_muic_irq_handler(int irq, void *data)
 	case MAX14577_IRQ_INT2_VBVOLT:
 		/* Handle charger accessory */
 		info->irq_chg = true;
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * Sets irq_adc or irq_chg in max14577_muic_info and returns 1.
+ * Returns 0 if irq_type does not match registered IRQ for this device type.
+ */
+static int max77836_parse_irq(struct max14577_muic_info *info, int irq_type)
+{
+	/* First check common max14577 interrupts */
+	if (max14577_parse_irq(info, irq_type))
+		return 1;
+
+	switch (irq_type) {
+	case MAX77836_IRQ_INT1_ADC1K:
+		info->irq_adc = true;
+		return 1;
+	case MAX77836_IRQ_INT2_VIDRM:
+		/* Handle charger accessory */
+		info->irq_chg = true;
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static irqreturn_t max14577_muic_irq_handler(int irq, void *data)
+{
+	struct max14577_muic_info *info = data;
+	int i, irq_type = -1;
+	bool irq_parsed;
+
+	/*
+	 * We may be called multiple times for different nested IRQ-s.
+	 * Including changes in INT1_ADC and INT2_CGHTYP at once.
+	 * However we only need to know whether it was ADC, charger
+	 * or both interrupts so decode IRQ and turn on proper flags.
+	 */
+	for (i = 0; i < info->muic_irqs_num; i++)
+		if (irq == info->muic_irqs[i].virq)
+			irq_type = info->muic_irqs[i].irq;
+
+	switch (info->max14577->dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		irq_parsed = max77836_parse_irq(info, irq_type);
 		break;
+	case MAXIM_DEVICE_TYPE_MAX14577:
 	default:
+		irq_parsed = max14577_parse_irq(info, irq_type);
+		break;
+	}
+
+	if (!irq_parsed) {
 		dev_err(info->dev, "muic interrupt: irq %d occurred, skipped\n",
 				irq_type);
 		return IRQ_HANDLED;
@@ -646,6 +705,10 @@ static int max14577_muic_probe(struct platform_device *pdev)
 	INIT_WORK(&info->irq_work, max14577_muic_irq_work);
 
 	switch (max14577->dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		info->muic_irqs = max77836_muic_irqs;
+		info->muic_irqs_num = ARRAY_SIZE(max77836_muic_irqs);
+		break;
 	case MAXIM_DEVICE_TYPE_MAX14577:
 	default:
 		info->muic_irqs = max14577_muic_irqs;
@@ -744,6 +807,13 @@ static int max14577_muic_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct platform_device_id max14577_muic_id[] = {
+	{ "max14577-muic", MAXIM_DEVICE_TYPE_MAX14577, },
+	{ "max77836-muic", MAXIM_DEVICE_TYPE_MAX77836, },
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, max14577_muic_id);
+
 static struct platform_driver max14577_muic_driver = {
 	.driver		= {
 		.name	= "max14577-muic",
@@ -751,11 +821,12 @@ static struct platform_driver max14577_muic_driver = {
 	},
 	.probe		= max14577_muic_probe,
 	.remove		= max14577_muic_remove,
+	.id_table	= max14577_muic_id,
 };
 
 module_platform_driver(max14577_muic_driver);
 
-MODULE_DESCRIPTION("MAXIM 14577 Extcon driver");
-MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
+MODULE_DESCRIPTION("Maxim 14577/77836 Extcon driver");
+MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>, Krzysztof Kozlowski <k.kozlowski@samsung.com>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:extcon-max14577");
diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 20e3b2d81bf0..484d372a4892 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -147,6 +147,7 @@ static const struct regmap_irq max77836_muic_irqs[] = {
 	{ .reg_offset = 0, .mask = MAX14577_INT1_ADC_MASK, },
 	{ .reg_offset = 0, .mask = MAX14577_INT1_ADCLOW_MASK, },
 	{ .reg_offset = 0, .mask = MAX14577_INT1_ADCERR_MASK, },
+	{ .reg_offset = 0, .mask = MAX77836_INT1_ADC1K_MASK, },
 	/* INT2 interrupts */
 	{ .reg_offset = 1, .mask = MAX14577_INT2_CHGTYP_MASK, },
 	{ .reg_offset = 1, .mask = MAX14577_INT2_CHGDETRUN_MASK, },
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index e301bd19b067..a557ae27d8a8 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -87,6 +87,7 @@ enum max14577_muic_charger_type {
 #define MAX14577_INT1_ADC_MASK		BIT(0)
 #define MAX14577_INT1_ADCLOW_MASK	BIT(1)
 #define MAX14577_INT1_ADCERR_MASK	BIT(2)
+#define MAX77836_INT1_ADC1K_MASK	BIT(3)
 
 #define MAX14577_INT2_CHGTYP_MASK	BIT(0)
 #define MAX14577_INT2_CHGDETRUN_MASK	BIT(1)
@@ -332,6 +333,7 @@ enum max14577_irq {
 	MAX14577_IRQ_INT1_ADC,
 	MAX14577_IRQ_INT1_ADCLOW,
 	MAX14577_IRQ_INT1_ADCERR,
+	MAX77836_IRQ_INT1_ADC1K,
 
 	/* INT2 */
 	MAX14577_IRQ_INT2_CHGTYP,
@@ -339,6 +341,7 @@ enum max14577_irq {
 	MAX14577_IRQ_INT2_DCDTMR,
 	MAX14577_IRQ_INT2_DBCHG,
 	MAX14577_IRQ_INT2_VBVOLT,
+	MAX77836_IRQ_INT2_VIDRM,
 
 	/* INT3 */
 	MAX14577_IRQ_INT3_EOC,
-- 
cgit 


From 8a82b408acad29161c43072727151d373e68116a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 11:17:20 +0200
Subject: regulator: max14577: Add support for MAX77836 regulators

Add support for MAX77836 chipset and its additional two LDO regulators.
These LDO regulators are controlled by the PMIC block with additional
regmap (different I2C slave address).

The MAX77836 charger and safeout regulators are almost identical to
MAX14577. The registers layout is the same, except values for charger's
current. The patch adds simple mapping between device type and supported
current by the charger regulator.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/regulator/Kconfig            |   7 +-
 drivers/regulator/max14577.c         | 277 ++++++++++++++++++++++++++++++-----
 include/linux/mfd/max14577-private.h |  32 ++++
 include/linux/mfd/max14577.h         |  12 +-
 4 files changed, 289 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 903eb37f047a..f0cc9e6dac3a 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -266,11 +266,12 @@ config REGULATOR_LP8788
 	  This driver supports LP8788 voltage regulator chip.
 
 config REGULATOR_MAX14577
-	tristate "Maxim 14577 regulator"
+	tristate "Maxim 14577/77836 regulator"
 	depends on MFD_MAX14577
 	help
-	  This driver controls a Maxim 14577 regulator via I2C bus.
-	  The regulators include safeout LDO and current regulator 'CHARGER'.
+	  This driver controls a Maxim MAX14577/77836 regulator via I2C bus.
+	  The MAX14577 regulators include safeout LDO and charger current
+	  regulator. The MAX77836 has two additional LDOs.
 
 config REGULATOR_MAX1586
 	tristate "Maxim 1586/1587 voltage regulator"
diff --git a/drivers/regulator/max14577.c b/drivers/regulator/max14577.c
index ed60baaeceec..5d9c605cf534 100644
--- a/drivers/regulator/max14577.c
+++ b/drivers/regulator/max14577.c
@@ -1,5 +1,5 @@
 /*
- * max14577.c - Regulator driver for the Maxim 14577
+ * max14577.c - Regulator driver for the Maxim 14577/77836
  *
  * Copyright (C) 2013,2014 Samsung Electronics
  * Krzysztof Kozlowski <k.kozlowski@samsung.com>
@@ -22,6 +22,42 @@
 #include <linux/mfd/max14577-private.h>
 #include <linux/regulator/of_regulator.h>
 
+/*
+ * Valid limits of current for max14577 and max77836 chargers.
+ * They must correspond to MBCICHWRCL and MBCICHWRCH fields in CHGCTRL4
+ * register for given chipset.
+ */
+struct maxim_charger_current {
+	/* Minimal current, set in CHGCTRL4/MBCICHWRCL, uA */
+	unsigned int min;
+	/*
+	 * Minimal current when high setting is active,
+	 * set in CHGCTRL4/MBCICHWRCH, uA
+	 */
+	unsigned int high_start;
+	/* Value of one step in high setting, uA */
+	unsigned int high_step;
+	/* Maximum current of high setting, uA */
+	unsigned int max;
+};
+
+/* Table of valid charger currents for different Maxim chipsets */
+static const struct maxim_charger_current maxim_charger_currents[] = {
+	[MAXIM_DEVICE_TYPE_UNKNOWN] = { 0, 0, 0, 0 },
+	[MAXIM_DEVICE_TYPE_MAX14577] = {
+		.min		= MAX14577_REGULATOR_CURRENT_LIMIT_MIN,
+		.high_start	= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START,
+		.high_step	= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP,
+		.max		= MAX14577_REGULATOR_CURRENT_LIMIT_MAX,
+	},
+	[MAXIM_DEVICE_TYPE_MAX77836] = {
+		.min		= MAX77836_REGULATOR_CURRENT_LIMIT_MIN,
+		.high_start	= MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START,
+		.high_step	= MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP,
+		.max		= MAX77836_REGULATOR_CURRENT_LIMIT_MAX,
+	},
+};
+
 static int max14577_reg_is_enabled(struct regulator_dev *rdev)
 {
 	int rid = rdev_get_id(rdev);
@@ -47,6 +83,9 @@ static int max14577_reg_get_current_limit(struct regulator_dev *rdev)
 {
 	u8 reg_data;
 	struct regmap *rmap = rdev->regmap;
+	struct max14577 *max14577 = rdev_get_drvdata(rdev);
+	const struct maxim_charger_current *limits =
+		&maxim_charger_currents[max14577->dev_type];
 
 	if (rdev_get_id(rdev) != MAX14577_CHARGER)
 		return -EINVAL;
@@ -54,12 +93,11 @@ static int max14577_reg_get_current_limit(struct regulator_dev *rdev)
 	max14577_read_reg(rmap, MAX14577_CHG_REG_CHG_CTRL4, &reg_data);
 
 	if ((reg_data & CHGCTRL4_MBCICHWRCL_MASK) == 0)
-		return MAX14577_REGULATOR_CURRENT_LIMIT_MIN;
+		return limits->min;
 
 	reg_data = ((reg_data & CHGCTRL4_MBCICHWRCH_MASK) >>
 			CHGCTRL4_MBCICHWRCH_SHIFT);
-	return MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START +
-		reg_data * MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP;
+	return limits->high_start + reg_data * limits->high_step;
 }
 
 static int max14577_reg_set_current_limit(struct regulator_dev *rdev,
@@ -67,33 +105,39 @@ static int max14577_reg_set_current_limit(struct regulator_dev *rdev,
 {
 	int i, current_bits = 0xf;
 	u8 reg_data;
+	struct max14577 *max14577 = rdev_get_drvdata(rdev);
+	const struct maxim_charger_current *limits =
+		&maxim_charger_currents[max14577->dev_type];
 
 	if (rdev_get_id(rdev) != MAX14577_CHARGER)
 		return -EINVAL;
 
-	if (min_uA > MAX14577_REGULATOR_CURRENT_LIMIT_MAX ||
-			max_uA < MAX14577_REGULATOR_CURRENT_LIMIT_MIN)
+	if (min_uA > limits->max || max_uA < limits->min)
 		return -EINVAL;
 
-	if (max_uA < MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START) {
-		/* Less than 200 mA, so set 90mA (turn only Low Bit off) */
+	if (max_uA < limits->high_start) {
+		/*
+		 * Less than high_start,
+		 * so set the minimal current (turn only Low Bit off)
+		 */
 		u8 reg_data = 0x0 << CHGCTRL4_MBCICHWRCL_SHIFT;
 		return max14577_update_reg(rdev->regmap,
 				MAX14577_CHG_REG_CHG_CTRL4,
 				CHGCTRL4_MBCICHWRCL_MASK, reg_data);
 	}
 
-	/* max_uA is in range: <LIMIT_HIGH_START, inifinite>, so search for
-	 * valid current starting from LIMIT_MAX. */
-	for (i = MAX14577_REGULATOR_CURRENT_LIMIT_MAX;
-			i >= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START;
-			i -= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP) {
+	/*
+	 * max_uA is in range: <high_start, inifinite>, so search for
+	 * valid current starting from maximum current.
+	 */
+	for (i = limits->max; i >= limits->high_start; i -= limits->high_step) {
 		if (i <= max_uA)
 			break;
 		current_bits--;
 	}
 	BUG_ON(current_bits < 0); /* Cannot happen */
-	/* Turn Low Bit on (use range 200mA-950 mA) */
+
+	/* Turn Low Bit on (use range high_start-max)... */
 	reg_data = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
 	/* and set proper High Bits */
 	reg_data |= current_bits << CHGCTRL4_MBCICHWRCH_SHIFT;
@@ -118,7 +162,7 @@ static struct regulator_ops max14577_charger_ops = {
 	.set_current_limit	= max14577_reg_set_current_limit,
 };
 
-static const struct regulator_desc supported_regulators[] = {
+static const struct regulator_desc max14577_supported_regulators[] = {
 	[MAX14577_SAFEOUT] = {
 		.name		= "SAFEOUT",
 		.id		= MAX14577_SAFEOUT,
@@ -141,16 +185,88 @@ static const struct regulator_desc supported_regulators[] = {
 	},
 };
 
+static struct regulator_ops max77836_ldo_ops = {
+	.is_enabled		= regulator_is_enabled_regmap,
+	.enable			= regulator_enable_regmap,
+	.disable		= regulator_disable_regmap,
+	.list_voltage		= regulator_list_voltage_linear,
+	.map_voltage		= regulator_map_voltage_linear,
+	.get_voltage_sel	= regulator_get_voltage_sel_regmap,
+	.set_voltage_sel	= regulator_set_voltage_sel_regmap,
+	/* TODO: add .set_suspend_mode */
+};
+
+static const struct regulator_desc max77836_supported_regulators[] = {
+	[MAX14577_SAFEOUT] = {
+		.name		= "SAFEOUT",
+		.id		= MAX14577_SAFEOUT,
+		.ops		= &max14577_safeout_ops,
+		.type		= REGULATOR_VOLTAGE,
+		.owner		= THIS_MODULE,
+		.n_voltages	= 1,
+		.min_uV		= MAX14577_REGULATOR_SAFEOUT_VOLTAGE,
+		.enable_reg	= MAX14577_REG_CONTROL2,
+		.enable_mask	= CTRL2_SFOUTORD_MASK,
+	},
+	[MAX14577_CHARGER] = {
+		.name		= "CHARGER",
+		.id		= MAX14577_CHARGER,
+		.ops		= &max14577_charger_ops,
+		.type		= REGULATOR_CURRENT,
+		.owner		= THIS_MODULE,
+		.enable_reg	= MAX14577_CHG_REG_CHG_CTRL2,
+		.enable_mask	= CHGCTRL2_MBCHOSTEN_MASK,
+	},
+	[MAX77836_LDO1] = {
+		.name		= "LDO1",
+		.id		= MAX77836_LDO1,
+		.ops		= &max77836_ldo_ops,
+		.type		= REGULATOR_VOLTAGE,
+		.owner		= THIS_MODULE,
+		.n_voltages	= MAX77836_REGULATOR_LDO_VOLTAGE_STEPS_NUM,
+		.min_uV		= MAX77836_REGULATOR_LDO_VOLTAGE_MIN,
+		.uV_step	= MAX77836_REGULATOR_LDO_VOLTAGE_STEP,
+		.enable_reg	= MAX77836_LDO_REG_CNFG1_LDO1,
+		.enable_mask	= MAX77836_CNFG1_LDO_PWRMD_MASK,
+		.vsel_reg	= MAX77836_LDO_REG_CNFG1_LDO1,
+		.vsel_mask	= MAX77836_CNFG1_LDO_TV_MASK,
+	},
+	[MAX77836_LDO2] = {
+		.name		= "LDO2",
+		.id		= MAX77836_LDO2,
+		.ops		= &max77836_ldo_ops,
+		.type		= REGULATOR_VOLTAGE,
+		.owner		= THIS_MODULE,
+		.n_voltages	= MAX77836_REGULATOR_LDO_VOLTAGE_STEPS_NUM,
+		.min_uV		= MAX77836_REGULATOR_LDO_VOLTAGE_MIN,
+		.uV_step	= MAX77836_REGULATOR_LDO_VOLTAGE_STEP,
+		.enable_reg	= MAX77836_LDO_REG_CNFG1_LDO2,
+		.enable_mask	= MAX77836_CNFG1_LDO_PWRMD_MASK,
+		.vsel_reg	= MAX77836_LDO_REG_CNFG1_LDO2,
+		.vsel_mask	= MAX77836_CNFG1_LDO_TV_MASK,
+	},
+};
+
 #ifdef CONFIG_OF
 static struct of_regulator_match max14577_regulator_matches[] = {
 	{ .name	= "SAFEOUT", },
 	{ .name = "CHARGER", },
 };
 
-static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev)
+static struct of_regulator_match max77836_regulator_matches[] = {
+	{ .name	= "SAFEOUT", },
+	{ .name = "CHARGER", },
+	{ .name = "LDO1", },
+	{ .name = "LDO2", },
+};
+
+static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev,
+		enum maxim_device_type dev_type)
 {
 	int ret;
 	struct device_node *np;
+	struct of_regulator_match *regulator_matches;
+	unsigned int regulator_matches_size;
 
 	np = of_get_child_by_name(pdev->dev.parent->of_node, "regulators");
 	if (!np) {
@@ -158,8 +274,19 @@ static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	ret = of_regulator_match(&pdev->dev, np, max14577_regulator_matches,
-			MAX14577_REG_MAX);
+	switch (dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		regulator_matches = max77836_regulator_matches;
+		regulator_matches_size = ARRAY_SIZE(max77836_regulator_matches);
+		break;
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		regulator_matches = max14577_regulator_matches;
+		regulator_matches_size = ARRAY_SIZE(max14577_regulator_matches);
+	}
+
+	ret = of_regulator_match(&pdev->dev, np, regulator_matches,
+			regulator_matches_size);
 	if (ret < 0)
 		dev_err(&pdev->dev, "Error parsing regulator init data: %d\n", ret);
 	else
@@ -170,31 +297,74 @@ static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev)
 	return ret;
 }
 
-static inline struct regulator_init_data *match_init_data(int index)
+static inline struct regulator_init_data *match_init_data(int index,
+		enum maxim_device_type dev_type)
 {
-	return max14577_regulator_matches[index].init_data;
+	switch (dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		return max77836_regulator_matches[index].init_data;
+
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		return max14577_regulator_matches[index].init_data;
+	}
 }
 
-static inline struct device_node *match_of_node(int index)
+static inline struct device_node *match_of_node(int index,
+		enum maxim_device_type dev_type)
 {
-	return max14577_regulator_matches[index].of_node;
+	switch (dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		return max77836_regulator_matches[index].of_node;
+
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		return max14577_regulator_matches[index].of_node;
+	}
 }
 #else /* CONFIG_OF */
-static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev)
+static int max14577_regulator_dt_parse_pdata(struct platform_device *pdev,
+		enum maxim_device_type dev_type)
 {
 	return 0;
 }
-static inline struct regulator_init_data *match_init_data(int index)
+static inline struct regulator_init_data *match_init_data(int index,
+		enum maxim_device_type dev_type)
 {
 	return NULL;
 }
 
-static inline struct device_node *match_of_node(int index)
+static inline struct device_node *match_of_node(int index,
+		enum maxim_device_type dev_type)
 {
 	return NULL;
 }
 #endif /* CONFIG_OF */
 
+/**
+ * Registers for regulators of max77836 use different I2C slave addresses so
+ * different regmaps must be used for them.
+ *
+ * Returns proper regmap for accessing regulator passed by id.
+ */
+static struct regmap *max14577_get_regmap(struct max14577 *max14577,
+		int reg_id)
+{
+	switch (max14577->dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		switch (reg_id) {
+		case MAX77836_SAFEOUT ... MAX77836_CHARGER:
+			return max14577->regmap;
+		default:
+			/* MAX77836_LDO1 ... MAX77836_LDO2 */
+			return max14577->regmap_pmic;
+		}
+
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		return max14577->regmap;
+	}
+}
 
 static int max14577_regulator_probe(struct platform_device *pdev)
 {
@@ -202,15 +372,29 @@ static int max14577_regulator_probe(struct platform_device *pdev)
 	struct max14577_platform_data *pdata = dev_get_platdata(max14577->dev);
 	int i, ret;
 	struct regulator_config config = {};
+	const struct regulator_desc *supported_regulators;
+	unsigned int supported_regulators_size;
+	enum maxim_device_type dev_type = max14577->dev_type;
 
-	ret = max14577_regulator_dt_parse_pdata(pdev);
+	ret = max14577_regulator_dt_parse_pdata(pdev, dev_type);
 	if (ret)
 		return ret;
 
+	switch (dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		supported_regulators = max77836_supported_regulators;
+		supported_regulators_size = ARRAY_SIZE(max77836_supported_regulators);
+		break;
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		supported_regulators = max14577_supported_regulators;
+		supported_regulators_size = ARRAY_SIZE(max14577_supported_regulators);
+	}
+
 	config.dev = &pdev->dev;
-	config.regmap = max14577->regmap;
+	config.driver_data = max14577;
 
-	for (i = 0; i < ARRAY_SIZE(supported_regulators); i++) {
+	for (i = 0; i < supported_regulators_size; i++) {
 		struct regulator_dev *regulator;
 		/*
 		 * Index of supported_regulators[] is also the id and must
@@ -220,17 +404,19 @@ static int max14577_regulator_probe(struct platform_device *pdev)
 			config.init_data = pdata->regulators[i].initdata;
 			config.of_node = pdata->regulators[i].of_node;
 		} else {
-			config.init_data = match_init_data(i);
-			config.of_node = match_of_node(i);
+			config.init_data = match_init_data(i, dev_type);
+			config.of_node = match_of_node(i, dev_type);
 		}
+		config.regmap = max14577_get_regmap(max14577,
+				supported_regulators[i].id);
 
 		regulator = devm_regulator_register(&pdev->dev,
 				&supported_regulators[i], &config);
 		if (IS_ERR(regulator)) {
 			ret = PTR_ERR(regulator);
 			dev_err(&pdev->dev,
-					"Regulator init failed for ID %d with error: %d\n",
-					i, ret);
+					"Regulator init failed for %d/%s with error: %d\n",
+					i, supported_regulators[i].name, ret);
 			return ret;
 		}
 	}
@@ -238,20 +424,41 @@ static int max14577_regulator_probe(struct platform_device *pdev)
 	return ret;
 }
 
+static const struct platform_device_id max14577_regulator_id[] = {
+	{ "max14577-regulator", MAXIM_DEVICE_TYPE_MAX14577, },
+	{ "max77836-regulator", MAXIM_DEVICE_TYPE_MAX77836, },
+	{ }
+};
+MODULE_DEVICE_TABLE(platform, max14577_regulator_id);
+
 static struct platform_driver max14577_regulator_driver = {
 	.driver = {
 		   .owner = THIS_MODULE,
 		   .name = "max14577-regulator",
 		   },
-	.probe	= max14577_regulator_probe,
+	.probe		= max14577_regulator_probe,
+	.id_table	= max14577_regulator_id,
 };
 
 static int __init max14577_regulator_init(void)
 {
+	/* Check for valid values for charger */
 	BUILD_BUG_ON(MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START +
 			MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP * 0xf !=
 			MAX14577_REGULATOR_CURRENT_LIMIT_MAX);
-	BUILD_BUG_ON(ARRAY_SIZE(supported_regulators) != MAX14577_REG_MAX);
+	BUILD_BUG_ON(MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START +
+			MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP * 0xf !=
+			MAX77836_REGULATOR_CURRENT_LIMIT_MAX);
+	/* Valid charger current values must be provided for each chipset */
+	BUILD_BUG_ON(ARRAY_SIZE(maxim_charger_currents) != MAXIM_DEVICE_TYPE_NUM);
+
+	BUILD_BUG_ON(ARRAY_SIZE(max14577_supported_regulators) != MAX14577_REGULATOR_NUM);
+	BUILD_BUG_ON(ARRAY_SIZE(max77836_supported_regulators) != MAX77836_REGULATOR_NUM);
+
+	BUILD_BUG_ON(MAX77836_REGULATOR_LDO_VOLTAGE_MIN +
+			(MAX77836_REGULATOR_LDO_VOLTAGE_STEP *
+			  (MAX77836_REGULATOR_LDO_VOLTAGE_STEPS_NUM - 1)) !=
+			MAX77836_REGULATOR_LDO_VOLTAGE_MAX);
 
 	return platform_driver_register(&max14577_regulator_driver);
 }
@@ -264,6 +471,6 @@ static void __exit max14577_regulator_exit(void)
 module_exit(max14577_regulator_exit);
 
 MODULE_AUTHOR("Krzysztof Kozlowski <k.kozlowski@samsung.com>");
-MODULE_DESCRIPTION("MAXIM 14577 regulator driver");
+MODULE_DESCRIPTION("Maxim 14577/77836 regulator driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:max14577-regulator");
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index a557ae27d8a8..499253604026 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -261,9 +261,21 @@ enum max14577_charger_reg {
 #define MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP	 50000
 #define MAX14577_REGULATOR_CURRENT_LIMIT_MAX		950000
 
+/* MAX77836 regulator current limits (as in CHGCTRL4 register), uA */
+#define MAX77836_REGULATOR_CURRENT_LIMIT_MIN		 45000
+#define MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START	100000
+#define MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP	 25000
+#define MAX77836_REGULATOR_CURRENT_LIMIT_MAX		475000
+
 /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */
 #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE		4900000
 
+/* MAX77836 regulator LDOx voltage, uV */
+#define MAX77836_REGULATOR_LDO_VOLTAGE_MIN		800000
+#define MAX77836_REGULATOR_LDO_VOLTAGE_MAX		3950000
+#define MAX77836_REGULATOR_LDO_VOLTAGE_STEP		50000
+#define MAX77836_REGULATOR_LDO_VOLTAGE_STEPS_NUM	64
+
 /* Slave addr = 0x46: PMIC */
 enum max77836_pmic_reg {
 	MAX77836_PMIC_REG_PMIC_ID		= 0x20,
@@ -298,6 +310,26 @@ enum max77836_pmic_reg {
 #define MAX77836_TOPSYS_INT_T120C_MASK		BIT(MAX77836_TOPSYS_INT_T120C_SHIFT)
 #define MAX77836_TOPSYS_INT_T140C_MASK		BIT(MAX77836_TOPSYS_INT_T140C_SHIFT)
 
+/* LDO1/LDO2 CONFIG1 register */
+#define MAX77836_CNFG1_LDO_PWRMD_SHIFT		6
+#define MAX77836_CNFG1_LDO_TV_SHIFT		0
+#define MAX77836_CNFG1_LDO_PWRMD_MASK		(0x3 << MAX77836_CNFG1_LDO_PWRMD_SHIFT)
+#define MAX77836_CNFG1_LDO_TV_MASK		(0x3f << MAX77836_CNFG1_LDO_TV_SHIFT)
+
+/* LDO1/LDO2 CONFIG2 register */
+#define MAX77836_CNFG2_LDO_OVCLMPEN_SHIFT	7
+#define MAX77836_CNFG2_LDO_ALPMEN_SHIFT		6
+#define MAX77836_CNFG2_LDO_COMP_SHIFT		4
+#define MAX77836_CNFG2_LDO_POK_SHIFT		3
+#define MAX77836_CNFG2_LDO_ADE_SHIFT		1
+#define MAX77836_CNFG2_LDO_SS_SHIFT		0
+#define MAX77836_CNFG2_LDO_OVCLMPEN_MASK	BIT(MAX77836_CNFG2_LDO_OVCLMPEN_SHIFT)
+#define MAX77836_CNFG2_LDO_ALPMEN_MASK		BIT(MAX77836_CNFG2_LDO_ALPMEN_SHIFT)
+#define MAX77836_CNFG2_LDO_COMP_MASK		(0x3 << MAX77836_CNFG2_LDO_COMP_SHIFT)
+#define MAX77836_CNFG2_LDO_POK_MASK		BIT(MAX77836_CNFG2_LDO_POK_SHIFT)
+#define MAX77836_CNFG2_LDO_ADE_MASK		BIT(MAX77836_CNFG2_LDO_ADE_SHIFT)
+#define MAX77836_CNFG2_LDO_SS_MASK		BIT(MAX77836_CNFG2_LDO_SS_SHIFT)
+
 /* Slave addr = 0x6C: Fuel-Gauge/Battery */
 enum max77836_fg_reg {
 	MAX77836_FG_REG_VCELL_MSB	= 0x02,
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index 08b449159fd1..c83fbed1c7b6 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -35,7 +35,17 @@ enum max14577_regulators {
 	MAX14577_SAFEOUT = 0,
 	MAX14577_CHARGER,
 
-	MAX14577_REG_MAX,
+	MAX14577_REGULATOR_NUM,
+};
+
+/* MAX77836 regulator IDs */
+enum max77836_regulators {
+	MAX77836_SAFEOUT = 0,
+	MAX77836_CHARGER,
+	MAX77836_LDO1,
+	MAX77836_LDO2,
+
+	MAX77836_REGULATOR_NUM,
 };
 
 struct max14577_regulator_platform_data {
-- 
cgit 


From f392e51cd6ae6f6ee5b9b6d611cdc282b4c1711e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:14 -0400
Subject: cgroup: update cgroup->subsys_mask to ->child_subsys_mask and restore
 cgroup_root->subsys_mask

944196278d3d ("cgroup: move ->subsys_mask from cgroupfs_root to
cgroup") moved ->subsys_mask from cgroup_root to cgroup to prepare for
the unified hierarhcy; however, it turns out that carrying the
subsys_mask of the children in the parent, instead of itself, is a lot
more natural.  This patch restores cgroup_root->subsys_mask and morphs
cgroup->subsys_mask into cgroup->child_subsys_mask.

* Uses of root->cgrp.subsys_mask are restored to root->subsys_mask.

* Remove automatic setting and clearing of cgrp->subsys_mask and
  instead just inherit ->child_subsys_mask from the parent during
  cgroup creation.  Note that this doesn't affect any current
  behaviors.

* Undo __kill_css() separation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  7 ++++--
 kernel/cgroup.c        | 64 +++++++++++++++++++++-----------------------------
 2 files changed, 32 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c2515851c1aa..1b5b2fe1b228 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -173,8 +173,8 @@ struct cgroup {
 	 */
 	u64 serial_nr;
 
-	/* The bitmask of subsystems attached to this cgroup */
-	unsigned long subsys_mask;
+	/* the bitmask of subsystems enabled on the child cgroups */
+	unsigned long child_subsys_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@@ -282,6 +282,9 @@ enum {
 struct cgroup_root {
 	struct kernfs_root *kf_root;
 
+	/* The bitmask of subsystems attached to this hierarchy */
+	unsigned long subsys_mask;
+
 	/* Unique id for this hierarchy. */
 	int hierarchy_id;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a6894272353b..f944619077f4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -529,7 +529,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 	 * won't change, so no need for locking.
 	 */
 	for_each_subsys(ss, i) {
-		if (root->cgrp.subsys_mask & (1UL << i)) {
+		if (root->subsys_mask & (1UL << i)) {
 			/* Subsystem is in this hierarchy. So we want
 			 * the subsystem state from the new
 			 * cgroup */
@@ -742,7 +742,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	BUG_ON(!list_empty(&cgrp->children));
 
 	/* Rebind all subsystems back to the default hierarchy */
-	rebind_subsystems(&cgrp_dfl_root, cgrp->subsys_mask);
+	rebind_subsystems(&cgrp_dfl_root, root->subsys_mask);
 
 	/*
 	 * Release all the links from cset_links to this hierarchy's
@@ -1050,8 +1050,11 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		ss->root = dst_root;
 		css->cgroup = &dst_root->cgrp;
 
-		src_root->cgrp.subsys_mask &= ~(1 << ssid);
-		dst_root->cgrp.subsys_mask |= 1 << ssid;
+		src_root->subsys_mask &= ~(1 << ssid);
+		src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
+
+		dst_root->subsys_mask |= 1 << ssid;
+		dst_root->cgrp.child_subsys_mask |= 1 << ssid;
 
 		if (ss->bind)
 			ss->bind(css);
@@ -1069,7 +1072,7 @@ static int cgroup_show_options(struct seq_file *seq,
 	int ssid;
 
 	for_each_subsys(ss, ssid)
-		if (root->cgrp.subsys_mask & (1 << ssid))
+		if (root->subsys_mask & (1 << ssid))
 			seq_printf(seq, ",%s", ss->name);
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR)
 		seq_puts(seq, ",sane_behavior");
@@ -1273,12 +1276,12 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	if (ret)
 		goto out_unlock;
 
-	if (opts.subsys_mask != root->cgrp.subsys_mask || opts.release_agent)
+	if (opts.subsys_mask != root->subsys_mask || opts.release_agent)
 		pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
 			   task_tgid_nr(current), current->comm);
 
-	added_mask = opts.subsys_mask & ~root->cgrp.subsys_mask;
-	removed_mask = root->cgrp.subsys_mask & ~opts.subsys_mask;
+	added_mask = opts.subsys_mask & ~root->subsys_mask;
+	removed_mask = root->subsys_mask & ~opts.subsys_mask;
 
 	/* Don't allow flags or name to change at remount */
 	if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
@@ -1535,7 +1538,7 @@ retry:
 		 * subsystems) then they must match.
 		 */
 		if ((opts.subsys_mask || opts.none) &&
-		    (opts.subsys_mask != root->cgrp.subsys_mask)) {
+		    (opts.subsys_mask != root->subsys_mask)) {
 			if (!name_match)
 				continue;
 			ret = -EBUSY;
@@ -3658,8 +3661,6 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 	cgroup_get(cgrp);
 	css_get(css->parent);
 
-	cgrp->subsys_mask |= 1 << ss->id;
-
 	if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
 	    parent->parent) {
 		pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
@@ -3780,13 +3781,15 @@ static long cgroup_create(struct cgroup *parent, const char *name,
 
 	/* let's create and online css's */
 	for_each_subsys(ss, ssid) {
-		if (root->cgrp.subsys_mask & (1 << ssid)) {
+		if (parent->child_subsys_mask & (1 << ssid)) {
 			err = create_css(cgrp, ss);
 			if (err)
 				goto err_destroy;
 		}
 	}
 
+	cgrp->child_subsys_mask = parent->child_subsys_mask;
+
 	kernfs_activate(kn);
 
 	mutex_unlock(&cgroup_mutex);
@@ -3882,7 +3885,16 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
 	queue_work(cgroup_destroy_wq, &css->destroy_work);
 }
 
-static void __kill_css(struct cgroup_subsys_state *css)
+/**
+ * kill_css - destroy a css
+ * @css: css to destroy
+ *
+ * This function initiates destruction of @css by removing cgroup interface
+ * files and putting its base reference.  ->css_offline() will be invoked
+ * asynchronously once css_tryget() is guaranteed to fail and when the
+ * reference count reaches zero, @css will be released.
+ */
+static void kill_css(struct cgroup_subsys_state *css)
 {
 	lockdep_assert_held(&cgroup_tree_mutex);
 
@@ -3911,28 +3923,6 @@ static void __kill_css(struct cgroup_subsys_state *css)
 	percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
 }
 
-/**
- * kill_css - destroy a css
- * @css: css to destroy
- *
- * This function initiates destruction of @css by removing cgroup interface
- * files and putting its base reference.  ->css_offline() will be invoked
- * asynchronously once css_tryget() is guaranteed to fail and when the
- * reference count reaches zero, @css will be released.
- */
-static void kill_css(struct cgroup_subsys_state *css)
-{
-	struct cgroup *cgrp = css->cgroup;
-
-	lockdep_assert_held(&cgroup_tree_mutex);
-
-	/* if already killed, noop */
-	if (cgrp->subsys_mask & (1 << css->ss->id)) {
-		cgrp->subsys_mask &= ~(1 << css->ss->id);
-		__kill_css(css);
-	}
-}
-
 /**
  * cgroup_destroy_locked - the first stage of cgroup destruction
  * @cgrp: cgroup to be destroyed
@@ -4145,7 +4135,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 
 	BUG_ON(online_css(css));
 
-	cgrp_dfl_root.cgrp.subsys_mask |= 1 << ss->id;
+	cgrp_dfl_root.subsys_mask |= 1 << ss->id;
 
 	mutex_unlock(&cgroup_mutex);
 	mutex_unlock(&cgroup_tree_mutex);
@@ -4302,7 +4292,7 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 
 		seq_printf(m, "%d:", root->hierarchy_id);
 		for_each_subsys(ss, ssid)
-			if (root->cgrp.subsys_mask & (1 << ssid))
+			if (root->subsys_mask & (1 << ssid))
 				seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
 		if (strlen(root->name))
 			seq_printf(m, "%sname=%s", count ? "," : "",
-- 
cgit 


From 2d8f243a5e6efa57fb7c46fe83fafa45b33d0ec2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:15 -0400
Subject: cgroup: implement cgroup->e_csets[]

On the default unified hierarchy, a cgroup may be associated with
csses of its ancestors, which means that a css of a given cgroup may
be associated with css_sets of descendant cgroups.  This means that we
can't walk all tasks associated with a css by iterating the css_sets
associated with the cgroup as there are css_sets which are pointing to
the css but linked on the descendants.

This patch adds per-subsystem list heads cgroup->e_csets[].  Any
css_set which is pointing to a css is linked to
css->cgroup->e_csets[$SUBSYS_ID] through
css_set->e_cset_node[$SUBSYS_ID].  The lists are protected by
css_set_rwsem and will allow us to walk all css_sets associated with a
given css so that we can find out all associated tasks.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 18 ++++++++++++++++++
 kernel/cgroup.c        | 30 ++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1b5b2fe1b228..33a0043ef454 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -187,6 +187,15 @@ struct cgroup {
 	 */
 	struct list_head cset_links;
 
+	/*
+	 * On the default hierarchy, a css_set for a cgroup with some
+	 * susbsys disabled will point to css's which are associated with
+	 * the closest ancestor which has the subsys enabled.  The
+	 * following lists all css_sets which point to this cgroup's css
+	 * for the given subsystem.
+	 */
+	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
+
 	/*
 	 * Linked list running through all cgroups that can
 	 * potentially be reaped by the release agent. Protected by
@@ -369,6 +378,15 @@ struct css_set {
 	struct cgroup *mg_src_cgrp;
 	struct css_set *mg_dst_cset;
 
+	/*
+	 * On the default hierarhcy, ->subsys[ssid] may point to a css
+	 * attached to an ancestor instead of the cgroup this css_set is
+	 * associated with.  The following node is anchored at
+	 * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to
+	 * iterate through all css's attached to a given cgroup.
+	 */
+	struct list_head e_cset_node[CGROUP_SUBSYS_COUNT];
+
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4eb2dd1bb5b1..37d966289978 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -425,6 +425,8 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
 static void put_css_set_locked(struct css_set *cset, bool taskexit)
 {
 	struct cgrp_cset_link *link, *tmp_link;
+	struct cgroup_subsys *ss;
+	int ssid;
 
 	lockdep_assert_held(&css_set_rwsem);
 
@@ -432,6 +434,8 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 		return;
 
 	/* This css_set is dead. unlink it and release cgroup refcounts */
+	for_each_subsys(ss, ssid)
+		list_del(&cset->e_cset_node[ssid]);
 	hash_del(&cset->hlist);
 	css_set_count--;
 
@@ -673,7 +677,9 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 	struct css_set *cset;
 	struct list_head tmp_links;
 	struct cgrp_cset_link *link;
+	struct cgroup_subsys *ss;
 	unsigned long key;
+	int ssid;
 
 	lockdep_assert_held(&cgroup_mutex);
 
@@ -724,10 +730,14 @@ static struct css_set *find_css_set(struct css_set *old_cset,
 
 	css_set_count++;
 
-	/* Add this cgroup group to the hash table */
+	/* Add @cset to the hash table */
 	key = css_set_hash(cset->subsys);
 	hash_add(css_set_table, &cset->hlist, key);
 
+	for_each_subsys(ss, ssid)
+		list_add_tail(&cset->e_cset_node[ssid],
+			      &cset->subsys[ssid]->cgroup->e_csets[ssid]);
+
 	up_write(&css_set_rwsem);
 
 	return cset;
@@ -1028,7 +1038,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned long ss_mask)
 {
 	struct cgroup_subsys *ss;
-	int ssid, ret;
+	int ssid, i, ret;
 
 	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
@@ -1081,6 +1091,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 	for_each_subsys(ss, ssid) {
 		struct cgroup_root *src_root;
 		struct cgroup_subsys_state *css;
+		struct css_set *cset;
 
 		if (!(ss_mask & (1 << ssid)))
 			continue;
@@ -1095,6 +1106,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		ss->root = dst_root;
 		css->cgroup = &dst_root->cgrp;
 
+		down_write(&css_set_rwsem);
+		hash_for_each(css_set_table, i, cset, hlist)
+			list_move_tail(&cset->e_cset_node[ss->id],
+				       &dst_root->cgrp.e_csets[ss->id]);
+		up_write(&css_set_rwsem);
+
 		src_root->subsys_mask &= ~(1 << ssid);
 		src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
 
@@ -1417,6 +1434,9 @@ out_unlock:
 
 static void init_cgroup_housekeeping(struct cgroup *cgrp)
 {
+	struct cgroup_subsys *ss;
+	int ssid;
+
 	atomic_set(&cgrp->refcnt, 1);
 	INIT_LIST_HEAD(&cgrp->sibling);
 	INIT_LIST_HEAD(&cgrp->children);
@@ -1425,6 +1445,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
 	cgrp->dummy_css.cgroup = cgrp;
+
+	for_each_subsys(ss, ssid)
+		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -4249,6 +4272,9 @@ int __init cgroup_init(void)
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
 
+		list_add_tail(&init_css_set.e_cset_node[ssid],
+			      &cgrp_dfl_root.cgrp.e_csets[ssid]);
+
 		/*
 		 * cftype registration needs kmalloc and can't be done
 		 * during early_init.  Register base cftypes separately.
-- 
cgit 


From 0f0a2b4fa6210147131082999f1f16d7fb79abf8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:15 -0400
Subject: cgroup: reorganize css_task_iter

This patch reorganizes css_task_iter so that adding effective css
support is easier.

* s/->cset_link/->cset_pos/ and s/->task/->task_pos/ for consistency

* ->origin_css is used to determine whether the iteration reached the
  last css_set.  Replace it with explicit ->cset_head so that
  css_advance_task_iter() doesn't have to know the termination
  condition directly.

* css_task_iter_next() currently assumes that it's walking list of
  cgrp_cset_link and reaches into the current cset through the current
  link to determine the termination conditions for task walking.  As
  this won't always be true for effective css walking, add
  ->tasks_head and ->mg_tasks_head and use them to control task
  walking so that css_task_iter_next() doesn't have to know how
  css_sets are being walked.

This patch doesn't make any behavior changes.  The iteration logic
stays unchanged after the patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  9 ++++++---
 kernel/cgroup.c        | 33 +++++++++++++++++----------------
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 33a0043ef454..bee390586120 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -842,9 +842,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
-	struct cgroup_subsys_state	*origin_css;
-	struct list_head		*cset_link;
-	struct list_head		*task;
+	struct list_head		*cset_pos;
+	struct list_head		*cset_head;
+
+	struct list_head		*task_pos;
+	struct list_head		*tasks_head;
+	struct list_head		*mg_tasks_head;
 };
 
 void css_task_iter_start(struct cgroup_subsys_state *css,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0edc186cd545..d48163b26196 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2857,27 +2857,30 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
  */
 static void css_advance_task_iter(struct css_task_iter *it)
 {
-	struct list_head *l = it->cset_link;
+	struct list_head *l = it->cset_pos;
 	struct cgrp_cset_link *link;
 	struct css_set *cset;
 
 	/* Advance to the next non-empty css_set */
 	do {
 		l = l->next;
-		if (l == &it->origin_css->cgroup->cset_links) {
-			it->cset_link = NULL;
+		if (l == it->cset_head) {
+			it->cset_pos = NULL;
 			return;
 		}
 		link = list_entry(l, struct cgrp_cset_link, cset_link);
 		cset = link->cset;
 	} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
 
-	it->cset_link = l;
+	it->cset_pos = l;
 
 	if (!list_empty(&cset->tasks))
-		it->task = cset->tasks.next;
+		it->task_pos = cset->tasks.next;
 	else
-		it->task = cset->mg_tasks.next;
+		it->task_pos = cset->mg_tasks.next;
+
+	it->tasks_head = &cset->tasks;
+	it->mg_tasks_head = &cset->mg_tasks;
 }
 
 /**
@@ -2903,8 +2906,8 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	down_read(&css_set_rwsem);
 
-	it->origin_css = css;
-	it->cset_link = &css->cgroup->cset_links;
+	it->cset_pos = &css->cgroup->cset_links;
+	it->cset_head = it->cset_pos;
 
 	css_advance_task_iter(it);
 }
@@ -2920,12 +2923,10 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 struct task_struct *css_task_iter_next(struct css_task_iter *it)
 {
 	struct task_struct *res;
-	struct list_head *l = it->task;
-	struct cgrp_cset_link *link = list_entry(it->cset_link,
-					struct cgrp_cset_link, cset_link);
+	struct list_head *l = it->task_pos;
 
 	/* If the iterator cg is NULL, we have no tasks */
-	if (!it->cset_link)
+	if (!it->cset_pos)
 		return NULL;
 	res = list_entry(l, struct task_struct, cg_list);
 
@@ -2936,13 +2937,13 @@ struct task_struct *css_task_iter_next(struct css_task_iter *it)
 	 */
 	l = l->next;
 
-	if (l == &link->cset->tasks)
-		l = link->cset->mg_tasks.next;
+	if (l == it->tasks_head)
+		l = it->mg_tasks_head->next;
 
-	if (l == &link->cset->mg_tasks)
+	if (l == it->mg_tasks_head)
 		css_advance_task_iter(it);
 	else
-		it->task = l;
+		it->task_pos = l;
 
 	return res;
 }
-- 
cgit 


From 3ebb2b6ef38875b866ec0118bfae7bc52afd0166 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:15 -0400
Subject: cgroup: teach css_task_iter about effective csses

Currently, css_task_iter iterates tasks associated with a css by
visiting each css_set associated with the owning cgroup and walking
tasks of each of them.  This works fine for !unified hierarchies as
each cgroup has its own css for each associated subsystem on the
hierarchy; however, on the planned unified hierarchy, a cgroup may not
have csses associated and its tasks would be considered associated
with the matching css of the nearest ancestor which has the subsystem
enabled.

This means that on the default unified hierarchy, just walking all
tasks associated with a cgroup isn't enough to walk all tasks which
are associated with the specified css.  If any of its children doesn't
have the matching css enabled, task iteration should also include all
tasks from the subtree.  We already added cgroup->e_csets[] to list
all css_sets effectively associated with a given css and walk css_sets
on that list instead to achieve such iteration.

This patch updates css_task_iter iteration such that it walks css_sets
on cgroup->e_csets[] instead of cgroup->cset_links if iteration is
requested on an non-dummy css.  Thanks to the previous iteration
update, this change can be achieved with the addition of
css_task_iter->ss and minimal updates to css_advance_task_iter() and
css_task_iter_start().

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bee390586120..18fcae39e63e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -842,6 +842,8 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
+	struct cgroup_subsys		*ss;
+
 	struct list_head		*cset_pos;
 	struct list_head		*cset_head;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d48163b26196..ad28866ed44c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2868,8 +2868,14 @@ static void css_advance_task_iter(struct css_task_iter *it)
 			it->cset_pos = NULL;
 			return;
 		}
-		link = list_entry(l, struct cgrp_cset_link, cset_link);
-		cset = link->cset;
+
+		if (it->ss) {
+			cset = container_of(l, struct css_set,
+					    e_cset_node[it->ss->id]);
+		} else {
+			link = list_entry(l, struct cgrp_cset_link, cset_link);
+			cset = link->cset;
+		}
 	} while (list_empty(&cset->tasks) && list_empty(&cset->mg_tasks));
 
 	it->cset_pos = l;
@@ -2906,7 +2912,13 @@ void css_task_iter_start(struct cgroup_subsys_state *css,
 
 	down_read(&css_set_rwsem);
 
-	it->cset_pos = &css->cgroup->cset_links;
+	it->ss = css->ss;
+
+	if (it->ss)
+		it->cset_pos = &css->cgroup->e_csets[css->ss->id];
+	else
+		it->cset_pos = &css->cgroup->cset_links;
+
 	it->cset_head = it->cset_pos;
 
 	css_advance_task_iter(it);
-- 
cgit 


From 6803c006282768ec850760766a6e4eb1a6ff87df Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:16 -0400
Subject: cgroup: add css_set->dfl_cgrp

To implement the unified hierarchy behavior, we'll need to be able to
determine the associated cgroup on the default hierarchy from css_set.
Let's add css_set->dfl_cgrp so that it can be accessed conveniently
and efficiently.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 3 +++
 kernel/cgroup.c        | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 18fcae39e63e..c49d161a71cd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -354,6 +354,9 @@ struct css_set {
 	 */
 	struct list_head cgrp_links;
 
+	/* the default cgroup associated with this css_set */
+	struct cgroup *dfl_cgrp;
+
 	/*
 	 * Set of subsystem states, one for each subsystem. This array is
 	 * immutable after creation apart from the init_css_set during
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a4f88db3205..c66bfc8ee8a7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -651,6 +651,10 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 	struct cgrp_cset_link *link;
 
 	BUG_ON(list_empty(tmp_links));
+
+	if (cgroup_on_dfl(cgrp))
+		cset->dfl_cgrp = cgrp;
+
 	link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
 	link->cset = cset;
 	link->cgrp = cgrp;
-- 
cgit 


From f8f22e53a262ebee37fc98004f16b066cf5bc125 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 23 Apr 2014 11:13:16 -0400
Subject: cgroup: implement dynamic subtree controller enable/disable on the
 default hierarchy

cgroup is switching away from multiple hierarchies and will use one
unified default hierarchy where controllers can be dynamically enabled
and disabled per subtree.  The default hierarchy will serve as the
unified hierarchy to which all controllers are attached and a css on
the default hierarchy would need to also serve the tasks of descendant
cgroups which don't have the controller enabled - ie. the tree may be
collapsed from leaf towards root when viewed from specific
controllers.  This has been implemented through effective css in the
previous patches.

This patch finally implements dynamic subtree controller
enable/disable on the default hierarchy via a new knob -
"cgroup.subtree_control" which controls which controllers are enabled
on the child cgroups.  Let's assume a hierarchy like the following.

  root - A - B - C
               \ D

root's "cgroup.subtree_control" determines which controllers are
enabled on A.  A's on B.  B's on C and D.  This coincides with the
fact that controllers on the immediate sub-level are used to
distribute the resources of the parent.  In fact, it's natural to
assume that resource control knobs of a child belong to its parent.
Enabling a controller in "cgroup.subtree_control" declares that
distribution of the respective resources of the cgroup will be
controlled.  Note that this means that controller enable states are
shared among siblings.

The default hierarchy has an extra restriction - only cgroups which
don't contain any task may have controllers enabled in
"cgroup.subtree_control".  Combined with the other properties of the
default hierarchy, this guarantees that, from the view point of
controllers, tasks are only on the leaf cgroups.  In other words, only
leaf csses may contain tasks.  This rules out situations where child
cgroups compete against internal tasks of the parent, which is a
competition between two different types of entities without any clear
way to determine resource distribution between the two.  Different
controllers handle it differently and all the implemented behaviors
are ambiguous, ad-hoc, cumbersome and/or just wrong.  Having this
structural constraints imposed from cgroup core removes the burden
from controller implementations and enables showing one consistent
behavior across all controllers.

When a controller is enabled or disabled, css associations for the
controller in the subtrees of each child should be updated.  After
enabling, the whole subtree of a child should point to the new css of
the child.  After disabling, the whole subtree of a child should point
to the cgroup's css.  This is implemented by first updating cgroup
states such that cgroup_e_css() result points to the appropriate css
and then invoking cgroup_update_dfl_csses() which migrates all tasks
in the affected subtrees to the self cgroup on the default hierarchy.

* When read, "cgroup.subtree_control" lists all the currently enabled
  controllers on the children of the cgroup.

* White-space separated list of controller names prefixed with either
  '+' or '-' can be written to "cgroup.subtree_control".  The ones
  prefixed with '+' are enabled on the controller and '-' disabled.

* A controller can be enabled iff the parent's
  "cgroup.subtree_control" enables it and disabled iff no child's
  "cgroup.subtree_control" has it enabled.

* If a cgroup has tasks, no controller can be enabled via
  "cgroup.subtree_control".  Likewise, if "cgroup.subtree_control" has
  some controllers enabled, tasks can't be migrated into the cgroup.

* All controllers which aren't bound on other hierarchies are
  automatically associated with the root cgroup of the default
  hierarchy.  All the controllers which are bound to the default
  hierarchy are listed in the read-only file "cgroup.controllers" in
  the root directory.

* "cgroup.controllers" in all non-root cgroups is read-only file whose
  content is equal to that of "cgroup.subtree_control" of the parent.
  This indicates which controllers can be used in the cgroup's
  "cgroup.subtree_control".

This is still experimental and there are some holes, one of which is
that ->can_attach() failure during cgroup_update_dfl_csses() may leave
the cgroups in an undefined state.  The issues will be addressed by
future patches.

v2: Non-root cgroups now also have "cgroup.controllers".

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |   5 +
 kernel/cgroup.c        | 367 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 370 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c49d161a71cd..ada239253ec7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -21,6 +21,7 @@
 #include <linux/percpu-refcount.h>
 #include <linux/seq_file.h>
 #include <linux/kernfs.h>
+#include <linux/wait.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -164,6 +165,7 @@ struct cgroup {
 
 	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
+	struct kernfs_node *control_kn;	/* kn for "cgroup.subtree_control" */
 
 	/*
 	 * Monotonically increasing unique serial number which defines a
@@ -216,6 +218,9 @@ struct cgroup {
 	/* For css percpu_ref killing and RCU-protected deletion */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
+
+	/* used to wait for offlining of csses */
+	wait_queue_head_t offline_waitq;
 };
 
 #define MAX_CGROUP_ROOT_NAMELEN 64
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c2835a9e192..809dd903ceb8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -182,6 +182,8 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned long ss_mask);
 static void cgroup_destroy_css_killed(struct cgroup *cgrp);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
 static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
@@ -338,6 +340,14 @@ static int notify_on_release(const struct cgroup *cgrp)
 #define for_each_root(root)						\
 	list_for_each_entry((root), &cgroup_roots, root_list)
 
+/* iterate over child cgrps, lock should be held throughout iteration */
+#define cgroup_for_each_live_child(child, cgrp)				\
+	list_for_each_entry((child), &(cgrp)->children, sibling)	\
+		if (({ lockdep_assert_held(&cgroup_tree_mutex);		\
+		       cgroup_is_dead(child); }))			\
+			;						\
+		else
+
 /**
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
@@ -1450,6 +1460,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
+
+	init_waitqueue_head(&cgrp->offline_waitq);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -1938,6 +1950,14 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 
 	lockdep_assert_held(&cgroup_mutex);
 
+	/*
+	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * with tasks so that child cgroups don't compete against tasks.
+	 */
+	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent &&
+	    dst_cgrp->child_subsys_mask)
+		return -EBUSY;
+
 	/* look up the dst cset for each src cset and link it to src */
 	list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
 		struct css_set *dst_cset;
@@ -2303,6 +2323,326 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+static void cgroup_print_ss_mask(struct seq_file *seq, unsigned int ss_mask)
+{
+	struct cgroup_subsys *ss;
+	bool printed = false;
+	int ssid;
+
+	for_each_subsys(ss, ssid) {
+		if (ss_mask & (1 << ssid)) {
+			if (printed)
+				seq_putc(seq, ' ');
+			seq_printf(seq, "%s", ss->name);
+			printed = true;
+		}
+	}
+	if (printed)
+		seq_putc(seq, '\n');
+}
+
+/* show controllers which are currently attached to the default hierarchy */
+static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask);
+	return 0;
+}
+
+/* show controllers which are enabled from the parent */
+static int cgroup_controllers_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask);
+	return 0;
+}
+
+/* show controllers which are enabled for a given cgroup's children */
+static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
+{
+	struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+	cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
+	return 0;
+}
+
+/**
+ * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
+ * @cgrp: root of the subtree to update csses for
+ *
+ * @cgrp's child_subsys_mask has changed and its subtree's (self excluded)
+ * css associations need to be updated accordingly.  This function looks up
+ * all css_sets which are attached to the subtree, creates the matching
+ * updated css_sets and migrates the tasks to the new ones.
+ */
+static int cgroup_update_dfl_csses(struct cgroup *cgrp)
+{
+	LIST_HEAD(preloaded_csets);
+	struct cgroup_subsys_state *css;
+	struct css_set *src_cset;
+	int ret;
+
+	lockdep_assert_held(&cgroup_tree_mutex);
+	lockdep_assert_held(&cgroup_mutex);
+
+	/* look up all csses currently attached to @cgrp's subtree */
+	down_read(&css_set_rwsem);
+	css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
+		struct cgrp_cset_link *link;
+
+		/* self is not affected by child_subsys_mask change */
+		if (css->cgroup == cgrp)
+			continue;
+
+		list_for_each_entry(link, &css->cgroup->cset_links, cset_link)
+			cgroup_migrate_add_src(link->cset, cgrp,
+					       &preloaded_csets);
+	}
+	up_read(&css_set_rwsem);
+
+	/* NULL dst indicates self on default hierarchy */
+	ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets);
+	if (ret)
+		goto out_finish;
+
+	list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
+		struct task_struct *last_task = NULL, *task;
+
+		/* src_csets precede dst_csets, break on the first dst_cset */
+		if (!src_cset->mg_src_cgrp)
+			break;
+
+		/*
+		 * All tasks in src_cset need to be migrated to the
+		 * matching dst_cset.  Empty it process by process.  We
+		 * walk tasks but migrate processes.  The leader might even
+		 * belong to a different cset but such src_cset would also
+		 * be among the target src_csets because the default
+		 * hierarchy enforces per-process membership.
+		 */
+		while (true) {
+			down_read(&css_set_rwsem);
+			task = list_first_entry_or_null(&src_cset->tasks,
+						struct task_struct, cg_list);
+			if (task) {
+				task = task->group_leader;
+				WARN_ON_ONCE(!task_css_set(task)->mg_src_cgrp);
+				get_task_struct(task);
+			}
+			up_read(&css_set_rwsem);
+
+			if (!task)
+				break;
+
+			/* guard against possible infinite loop */
+			if (WARN(last_task == task,
+				 "cgroup: update_dfl_csses failed to make progress, aborting in inconsistent state\n"))
+				goto out_finish;
+			last_task = task;
+
+			threadgroup_lock(task);
+			/* raced against de_thread() from another thread? */
+			if (!thread_group_leader(task)) {
+				threadgroup_unlock(task);
+				put_task_struct(task);
+				continue;
+			}
+
+			ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
+
+			threadgroup_unlock(task);
+			put_task_struct(task);
+
+			if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
+				goto out_finish;
+		}
+	}
+
+out_finish:
+	cgroup_migrate_finish(&preloaded_csets);
+	return ret;
+}
+
+/* change the enabled child controllers for a cgroup in the default hierarchy */
+static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css,
+					struct cftype *cft, char *buffer)
+{
+	unsigned long enable_req = 0, disable_req = 0, enable, disable;
+	struct cgroup *cgrp = dummy_css->cgroup, *child;
+	struct cgroup_subsys *ss;
+	char *tok, *p;
+	int ssid, ret;
+
+	/*
+	 * Parse input - white space separated list of subsystem names
+	 * prefixed with either + or -.
+	 */
+	p = buffer;
+	while ((tok = strsep(&p, " \t\n"))) {
+		for_each_subsys(ss, ssid) {
+			if (ss->disabled || strcmp(tok + 1, ss->name))
+				continue;
+
+			if (*tok == '+') {
+				enable_req |= 1 << ssid;
+				disable_req &= ~(1 << ssid);
+			} else if (*tok == '-') {
+				disable_req |= 1 << ssid;
+				enable_req &= ~(1 << ssid);
+			} else {
+				return -EINVAL;
+			}
+			break;
+		}
+		if (ssid == CGROUP_SUBSYS_COUNT)
+			return -EINVAL;
+	}
+
+	/*
+	 * We're gonna grab cgroup_tree_mutex which nests outside kernfs
+	 * active_ref.  cgroup_lock_live_group() already provides enough
+	 * protection.  Ensure @cgrp stays accessible and break the
+	 * active_ref protection.
+	 */
+	cgroup_get(cgrp);
+	kernfs_break_active_protection(cgrp->control_kn);
+retry:
+	enable = enable_req;
+	disable = disable_req;
+
+	mutex_lock(&cgroup_tree_mutex);
+
+	for_each_subsys(ss, ssid) {
+		if (enable & (1 << ssid)) {
+			if (cgrp->child_subsys_mask & (1 << ssid)) {
+				enable &= ~(1 << ssid);
+				continue;
+			}
+
+			/*
+			 * Because css offlining is asynchronous, userland
+			 * might try to re-enable the same controller while
+			 * the previous instance is still around.  In such
+			 * cases, wait till it's gone using offline_waitq.
+			 */
+			cgroup_for_each_live_child(child, cgrp) {
+				wait_queue_t wait;
+
+				if (!cgroup_css(child, ss))
+					continue;
+
+				prepare_to_wait(&child->offline_waitq, &wait,
+						TASK_UNINTERRUPTIBLE);
+				mutex_unlock(&cgroup_tree_mutex);
+				schedule();
+				finish_wait(&child->offline_waitq, &wait);
+				goto retry;
+			}
+
+			/* unavailable or not enabled on the parent? */
+			if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
+			    (cgrp->parent &&
+			     !(cgrp->parent->child_subsys_mask & (1 << ssid)))) {
+				ret = -ENOENT;
+				goto out_unlock_tree;
+			}
+		} else if (disable & (1 << ssid)) {
+			if (!(cgrp->child_subsys_mask & (1 << ssid))) {
+				disable &= ~(1 << ssid);
+				continue;
+			}
+
+			/* a child has it enabled? */
+			cgroup_for_each_live_child(child, cgrp) {
+				if (child->child_subsys_mask & (1 << ssid)) {
+					ret = -EBUSY;
+					goto out_unlock_tree;
+				}
+			}
+		}
+	}
+
+	if (!enable && !disable) {
+		ret = 0;
+		goto out_unlock_tree;
+	}
+
+	if (!cgroup_lock_live_group(cgrp)) {
+		ret = -ENODEV;
+		goto out_unlock_tree;
+	}
+
+	/*
+	 * Except for the root, child_subsys_mask must be zero for a cgroup
+	 * with tasks so that child cgroups don't compete against tasks.
+	 */
+	if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	/*
+	 * Create csses for enables and update child_subsys_mask.  This
+	 * changes cgroup_e_css() results which in turn makes the
+	 * subsequent cgroup_update_dfl_csses() associate all tasks in the
+	 * subtree to the updated csses.
+	 */
+	for_each_subsys(ss, ssid) {
+		if (!(enable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp) {
+			ret = create_css(child, ss);
+			if (ret)
+				goto err_undo_css;
+		}
+	}
+
+	cgrp->child_subsys_mask |= enable;
+	cgrp->child_subsys_mask &= ~disable;
+
+	ret = cgroup_update_dfl_csses(cgrp);
+	if (ret)
+		goto err_undo_css;
+
+	/* all tasks are now migrated away from the old csses, kill them */
+	for_each_subsys(ss, ssid) {
+		if (!(disable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp)
+			kill_css(cgroup_css(child, ss));
+	}
+
+	kernfs_activate(cgrp->kn);
+	ret = 0;
+out_unlock:
+	mutex_unlock(&cgroup_mutex);
+out_unlock_tree:
+	mutex_unlock(&cgroup_tree_mutex);
+	kernfs_unbreak_active_protection(cgrp->control_kn);
+	cgroup_put(cgrp);
+	return ret;
+
+err_undo_css:
+	cgrp->child_subsys_mask &= ~enable;
+	cgrp->child_subsys_mask |= disable;
+
+	for_each_subsys(ss, ssid) {
+		if (!(enable & (1 << ssid)))
+			continue;
+
+		cgroup_for_each_live_child(child, cgrp) {
+			struct cgroup_subsys_state *css = cgroup_css(child, ss);
+			if (css)
+				kill_css(css);
+		}
+	}
+	goto out_unlock;
+}
+
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 				 size_t nbytes, loff_t off)
 {
@@ -2462,9 +2802,14 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 		return PTR_ERR(kn);
 
 	ret = cgroup_kn_set_ugid(kn);
-	if (ret)
+	if (ret) {
 		kernfs_remove(kn);
-	return ret;
+		return ret;
+	}
+
+	if (cft->seq_show == cgroup_subtree_control_show)
+		cgrp->control_kn = kn;
+	return 0;
 }
 
 /**
@@ -3557,6 +3902,22 @@ static struct cftype cgroup_base_files[] = {
 		.flags = CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cgroup_sane_behavior_show,
 	},
+	{
+		.name = "cgroup.controllers",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_ONLY_ON_ROOT,
+		.seq_show = cgroup_root_controllers_show,
+	},
+	{
+		.name = "cgroup.controllers",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_controllers_show,
+	},
+	{
+		.name = "cgroup.subtree_control",
+		.flags = CFTYPE_ONLY_ON_DFL,
+		.seq_show = cgroup_subtree_control_show,
+		.write_string = cgroup_subtree_control_write,
+	},
 
 	/*
 	 * Historical crazy stuff.  These don't have "cgroup."  prefix and
@@ -3725,6 +4086,8 @@ static void offline_css(struct cgroup_subsys_state *css)
 	css->flags &= ~CSS_ONLINE;
 	css->cgroup->nr_css--;
 	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
+
+	wake_up_all(&css->cgroup->offline_waitq);
 }
 
 /**
-- 
cgit 


From a086f6a1ebc9d8d2d028b99e779ce0dbd9691dea Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 17 Apr 2014 17:06:12 +0800
Subject: Revert "KVM: Simplify kvm->tlbs_dirty handling"

This reverts commit 5befdc385ddb2d5ae8995ad89004529a3acf58fc.

Since we will allow flush tlb out of mmu-lock in the later
patch

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 7 +++----
 include/linux/kvm_host.h   | 4 +---
 virt/kvm/kvm_main.c        | 5 ++++-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 123efd3ec29f..410776528265 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -913,8 +913,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr,
  *   and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't
  *   used by guest then tlbs are not flushed, so guest is allowed to access the
  *   freed pages.
- *   We set tlbs_dirty to let the notifier know this change and delay the flush
- *   until such a case actually happens.
+ *   And we increase kvm->tlbs_dirty to delay tlbs flush in this case.
  */
 static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -943,7 +942,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 			return -EINVAL;
 
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
-			vcpu->kvm->tlbs_dirty = true;
+			vcpu->kvm->tlbs_dirty++;
 			continue;
 		}
 
@@ -958,7 +957,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		if (gfn != sp->gfns[i]) {
 			drop_spte(vcpu->kvm, &sp->spt[i]);
-			vcpu->kvm->tlbs_dirty = true;
+			vcpu->kvm->tlbs_dirty++;
 			continue;
 		}
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 32d263f683dc..820fc2e1d9df 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -411,9 +411,7 @@ struct kvm {
 	unsigned long mmu_notifier_seq;
 	long mmu_notifier_count;
 #endif
-	/* Protected by mmu_lock */
-	bool tlbs_dirty;
-
+	long tlbs_dirty;
 	struct list_head devices;
 };
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ea46d64c8e75..fa70c6e642b4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -186,9 +186,12 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
+	long dirty_count = kvm->tlbs_dirty;
+
+	smp_mb();
 	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
-	kvm->tlbs_dirty = false;
+	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
-- 
cgit 


From 5686a1e5aa436c49187a60052d5885fb1f541ce6 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Mon, 14 Apr 2014 15:47:01 +0200
Subject: bus: mvebu: pass the coherency availability information at init time

Until now, the mvebu-mbus was guessing by itself whether hardware I/O
coherency was available or not by poking into the Device Tree to see
if the coherency fabric Device Tree node was present or not.

However, on some upcoming SoCs, the presence or absence of the
coherency fabric DT node isn't sufficient: in CONFIG_SMP, the
coherency can be enabled, but not in !CONFIG_SMP.

In order to clean this up, the mvebu_mbus_dt_init() function is
extended to get a boolean argument telling whether coherency is
enabled or not. Therefore, the logic to decide whether coherency is
available or not now belongs to the core SoC code instead of the
mvebu-mbus driver itself, which is much better.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Link: https://lkml.kernel.org/r/1397483228-25625-4-git-send-email-thomas.petazzoni@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm/mach-kirkwood/board-dt.c |  2 +-
 arch/arm/mach-mvebu/board-v7.c    |  2 +-
 arch/arm/mach-mvebu/dove.c        |  2 +-
 arch/arm/mach-mvebu/kirkwood.c    |  2 +-
 drivers/bus/mvebu-mbus.c          | 11 +++--------
 include/linux/mbus.h              |  2 +-
 6 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c
index 2801da49e2a3..ff18ff20f71f 100644
--- a/arch/arm/mach-kirkwood/board-dt.c
+++ b/arch/arm/mach-kirkwood/board-dt.c
@@ -195,7 +195,7 @@ static void __init kirkwood_dt_init(void)
 {
 	kirkwood_disable_mbus_error_propagation();
 
-	BUG_ON(mvebu_mbus_dt_init());
+	BUG_ON(mvebu_mbus_dt_init(false));
 
 #ifdef CONFIG_CACHE_FEROCEON_L2
 	feroceon_of_init();
diff --git a/arch/arm/mach-mvebu/board-v7.c b/arch/arm/mach-mvebu/board-v7.c
index 333fca8fdc41..1730e0cdb6f6 100644
--- a/arch/arm/mach-mvebu/board-v7.c
+++ b/arch/arm/mach-mvebu/board-v7.c
@@ -58,7 +58,7 @@ static void __init mvebu_timer_and_clk_init(void)
 	of_clk_init(NULL);
 	clocksource_of_init();
 	coherency_init();
-	BUG_ON(mvebu_mbus_dt_init());
+	BUG_ON(mvebu_mbus_dt_init(coherency_available()));
 #ifdef CONFIG_CACHE_L2X0
 	l2x0_of_init(0, ~0UL);
 #endif
diff --git a/arch/arm/mach-mvebu/dove.c b/arch/arm/mach-mvebu/dove.c
index 5e5a43624237..b50464ec1130 100644
--- a/arch/arm/mach-mvebu/dove.c
+++ b/arch/arm/mach-mvebu/dove.c
@@ -23,7 +23,7 @@ static void __init dove_init(void)
 #ifdef CONFIG_CACHE_TAUROS2
 	tauros2_init(0);
 #endif
-	BUG_ON(mvebu_mbus_dt_init());
+	BUG_ON(mvebu_mbus_dt_init(false));
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c
index 120207fc36f1..a77e0bae9c55 100644
--- a/arch/arm/mach-mvebu/kirkwood.c
+++ b/arch/arm/mach-mvebu/kirkwood.c
@@ -169,7 +169,7 @@ static void __init kirkwood_dt_init(void)
 {
 	kirkwood_disable_mbus_error_propagation();
 
-	BUG_ON(mvebu_mbus_dt_init());
+	BUG_ON(mvebu_mbus_dt_init(false));
 
 #ifdef CONFIG_CACHE_FEROCEON_L2
 	feroceon_of_init();
diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index 293e2e0a0a87..ff02fc90fc21 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -694,7 +694,6 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
 					 phys_addr_t sdramwins_phys_base,
 					 size_t sdramwins_size)
 {
-	struct device_node *np;
 	int win;
 
 	mbus->mbuswins_base = ioremap(mbuswins_phys_base, mbuswins_size);
@@ -707,12 +706,6 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
 		return -ENOMEM;
 	}
 
-	np = of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric");
-	if (np) {
-		mbus->hw_io_coherency = 1;
-		of_node_put(np);
-	}
-
 	for (win = 0; win < mbus->soc->num_wins; win++)
 		mvebu_mbus_disable_window(mbus, win);
 
@@ -882,7 +875,7 @@ static void __init mvebu_mbus_get_pcie_resources(struct device_node *np,
 	}
 }
 
-int __init mvebu_mbus_dt_init(void)
+int __init mvebu_mbus_dt_init(bool is_coherent)
 {
 	struct resource mbuswins_res, sdramwins_res;
 	struct device_node *np, *controller;
@@ -920,6 +913,8 @@ int __init mvebu_mbus_dt_init(void)
 		return -EINVAL;
 	}
 
+	mbus_state.hw_io_coherency = is_coherent;
+
 	/* Get optional pcie-{mem,io}-aperture properties */
 	mvebu_mbus_get_pcie_resources(np, &mbus_state.pcie_mem_aperture,
 					  &mbus_state.pcie_io_aperture);
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 345b8c53b897..550c88fb0267 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -73,6 +73,6 @@ int mvebu_mbus_del_window(phys_addr_t base, size_t size);
 int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base,
 		    size_t mbus_size, phys_addr_t sdram_phys_base,
 		    size_t sdram_size);
-int mvebu_mbus_dt_init(void);
+int mvebu_mbus_dt_init(bool is_coherent);
 
 #endif /* __LINUX_MBUS_H */
-- 
cgit 


From be8f274323c26ddc7e6fd6c44254b7abcdbe6389 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 17 Apr 2014 17:16:58 +0900
Subject: kprobes: Prohibit probing on .entry.text code

.entry.text is a code area which is used for interrupt/syscall
entries, which includes many sensitive code.
Thus, it is better to prohibit probing on all of such code
instead of a part of that.
Since some symbols are already registered on kprobe blacklist,
this also removes them from the blacklist.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Jonathan Lebon <jlebon@redhat.com>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/20140417081658.26341.57354.stgit@ltc230.yrl.intra.hitachi.co.jp
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/entry_32.S     | 33 ---------------------------------
 arch/x86/kernel/entry_64.S     | 20 --------------------
 arch/x86/kernel/kprobes/core.c |  8 ++++++++
 include/linux/kprobes.h        |  1 +
 kernel/kprobes.c               | 13 ++++++++-----
 5 files changed, 17 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a2a4f4697889..0ca5bf1697bb 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -314,10 +314,6 @@ ENTRY(ret_from_kernel_thread)
 	CFI_ENDPROC
 ENDPROC(ret_from_kernel_thread)
 
-/*
- * Interrupt exit functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 /*
  * Return to user mode is not as complex as all this looks,
  * but we want the default path for a system call return to
@@ -372,10 +368,6 @@ need_resched:
 END(resume_kernel)
 #endif
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 /* SYSENTER_RETURN points to after the "sysenter" instruction in
    the vsyscall page.  See vsyscall-sysentry.S, which defines the symbol.  */
@@ -495,10 +487,6 @@ sysexit_audit:
 	PTGS_TO_GS_EX
 ENDPROC(ia32_sysenter_target)
 
-/*
- * syscall stub including irq exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	# system call handler stub
 ENTRY(system_call)
 	RING0_INT_FRAME			# can't unwind into user space anyway
@@ -691,10 +679,6 @@ syscall_badsys:
 	jmp resume_userspace
 END(syscall_badsys)
 	CFI_ENDPROC
-/*
- * End of kprobes section
- */
-	.popsection
 
 .macro FIXUP_ESPFIX_STACK
 /*
@@ -781,10 +765,6 @@ common_interrupt:
 ENDPROC(common_interrupt)
 	CFI_ENDPROC
 
-/*
- *  Irq entries should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 #define BUILD_INTERRUPT3(name, nr, fn)	\
 ENTRY(name)				\
 	RING0_INT_FRAME;		\
@@ -961,10 +941,6 @@ ENTRY(spurious_interrupt_bug)
 	jmp error_code
 	CFI_ENDPROC
 END(spurious_interrupt_bug)
-/*
- * End of kprobes section
- */
-	.popsection
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
@@ -1239,11 +1215,6 @@ return_to_handler:
 	jmp *%ecx
 #endif
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 #ifdef CONFIG_TRACING
 ENTRY(trace_page_fault)
 	RING0_EC_FRAME
@@ -1453,7 +1424,3 @@ ENTRY(async_page_fault)
 END(async_page_fault)
 #endif
 
-/*
- * End of kprobes section
- */
-	.popsection
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1e96c3628bf2..43bb38951660 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -487,8 +487,6 @@ ENDPROC(native_usergs_sysret64)
 	TRACE_IRQS_OFF
 	.endm
 
-/* save complete stack frame */
-	.pushsection .kprobes.text, "ax"
 ENTRY(save_paranoid)
 	XCPT_FRAME 1 RDI+8
 	cld
@@ -517,7 +515,6 @@ ENTRY(save_paranoid)
 1:	ret
 	CFI_ENDPROC
 END(save_paranoid)
-	.popsection
 
 /*
  * A newly forked process directly context switches into this address.
@@ -975,10 +972,6 @@ END(interrupt)
 	call \func
 	.endm
 
-/*
- * Interrupt entry/exit should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
 	/*
 	 * The interrupt stubs push (~vector+0x80) onto the stack and
 	 * then jump to common_interrupt.
@@ -1113,10 +1106,6 @@ ENTRY(retint_kernel)
 
 	CFI_ENDPROC
 END(common_interrupt)
-/*
- * End of kprobes section
- */
-       .popsection
 
 /*
  * APIC interrupts.
@@ -1477,11 +1466,6 @@ apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \
 	hyperv_callback_vector hyperv_vector_handler
 #endif /* CONFIG_HYPERV */
 
-/*
- * Some functions should be protected against kprobes
- */
-	.pushsection .kprobes.text, "ax"
-
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
@@ -1898,7 +1882,3 @@ ENTRY(ignore_sysret)
 	CFI_ENDPROC
 END(ignore_sysret)
 
-/*
- * End of kprobes section
- */
-	.popsection
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index da7bdaa3ce15..7751b3dee53a 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1065,6 +1065,14 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+	return  (addr >= (unsigned long)__kprobes_text_start &&
+		 addr < (unsigned long)__kprobes_text_end) ||
+		(addr >= (unsigned long)__entry_text_start &&
+		 addr < (unsigned long)__entry_text_end);
+}
+
 int __init arch_init_kprobes(void)
 {
 	return 0;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 925eaf28fca9..cdf9251f8249 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -265,6 +265,7 @@ extern void arch_disarm_kprobe(struct kprobe *p);
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
+extern bool arch_within_kprobe_blacklist(unsigned long addr);
 
 struct kprobe_insn_cache {
 	struct mutex mutex;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ceeadfcabb76..5b5ac76671e7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -96,9 +96,6 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
 static struct kprobe_blackpoint kprobe_blacklist[] = {
 	{"preempt_schedule",},
 	{"native_get_debugreg",},
-	{"irq_entries_start",},
-	{"common_interrupt",},
-	{"mcount",},	/* mcount can be called from everywhere */
 	{NULL}    /* Terminator */
 };
 
@@ -1324,12 +1321,18 @@ out:
 	return ret;
 }
 
+bool __weak arch_within_kprobe_blacklist(unsigned long addr)
+{
+	/* The __kprobes marked functions and entry code must not be probed */
+	return addr >= (unsigned long)__kprobes_text_start &&
+	       addr < (unsigned long)__kprobes_text_end;
+}
+
 static int __kprobes in_kprobes_functions(unsigned long addr)
 {
 	struct kprobe_blackpoint *kb;
 
-	if (addr >= (unsigned long)__kprobes_text_start &&
-	    addr < (unsigned long)__kprobes_text_end)
+	if (arch_within_kprobe_blacklist(addr))
 		return -EINVAL;
 	/*
 	 * If there exists a kprobe_blacklist, verify and
-- 
cgit 


From 376e242429bf8539ef39a080ac113c8799840b13 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 17 Apr 2014 17:17:05 +0900
Subject: kprobes: Introduce NOKPROBE_SYMBOL() macro to maintain kprobes
 blacklist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce NOKPROBE_SYMBOL() macro which builds a kprobes
blacklist at kernel build time.

The usage of this macro is similar to EXPORT_SYMBOL(),
placed after the function definition:

  NOKPROBE_SYMBOL(function);

Since this macro will inhibit inlining of static/inline
functions, this patch also introduces a nokprobe_inline macro
for static/inline functions. In this case, we must use
NOKPROBE_SYMBOL() for the inline function caller.

When CONFIG_KPROBES=y, the macro stores the given function
address in the "_kprobe_blacklist" section.

Since the data structures are not fully initialized by the
macro (because there is no "size" information),  those
are re-initialized at boot time by using kallsyms.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Link: http://lkml.kernel.org/r/20140417081705.26341.96719.stgit@ltc230.yrl.intra.hitachi.co.jp
Cc: Alok Kataria <akataria@vmware.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Chris Wright <chrisw@sous-sol.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jan-Simon Möller <dl9pf@gmx.de>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: linux-arch@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-sparse@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kprobes.txt         |  16 +++++-
 arch/x86/include/asm/asm.h        |   7 +++
 arch/x86/kernel/paravirt.c        |   4 ++
 include/asm-generic/vmlinux.lds.h |   9 ++++
 include/linux/compiler.h          |   2 +
 include/linux/kprobes.h           |  20 ++++++--
 kernel/kprobes.c                  | 100 ++++++++++++++++++++------------------
 kernel/sched/core.c               |   1 +
 8 files changed, 107 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
index 0cfb00fd86ff..4bbeca8483ed 100644
--- a/Documentation/kprobes.txt
+++ b/Documentation/kprobes.txt
@@ -22,8 +22,9 @@ Appendix B: The kprobes sysctl interface
 
 Kprobes enables you to dynamically break into any kernel routine and
 collect debugging and performance information non-disruptively. You
-can trap at almost any kernel code address, specifying a handler
+can trap at almost any kernel code address(*), specifying a handler
 routine to be invoked when the breakpoint is hit.
+(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
 
 There are currently three types of probes: kprobes, jprobes, and
 kretprobes (also called return probes).  A kprobe can be inserted
@@ -273,6 +274,19 @@ using one of the following techniques:
  or
 - Execute 'sysctl -w debug.kprobes_optimization=n'
 
+1.5 Blacklist
+
+Kprobes can probe most of the kernel except itself. This means
+that there are some functions where kprobes cannot probe. Probing
+(trapping) such functions can cause a recursive trap (e.g. double
+fault) or the nested probe handler may never be called.
+Kprobes manages such functions as a blacklist.
+If you want to add a function into the blacklist, you just need
+to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
+to specify a blacklisted function.
+Kprobes checks the given probe address against the blacklist and
+rejects registering it, if the given address is in the blacklist.
+
 2. Architectures Supported
 
 Kprobes, jprobes, and return probes are implemented on the following
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 4582e8e1cd1a..7730c1c5c83a 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -57,6 +57,12 @@
 	.long (from) - . ;					\
 	.long (to) - . + 0x7ffffff0 ;				\
 	.popsection
+
+# define _ASM_NOKPROBE(entry)					\
+	.pushsection "_kprobe_blacklist","aw" ;			\
+	_ASM_ALIGN ;						\
+	_ASM_PTR (entry);					\
+	.popsection
 #else
 # define _ASM_EXTABLE(from,to)					\
 	" .pushsection \"__ex_table\",\"a\"\n"			\
@@ -71,6 +77,7 @@
 	" .long (" #from ") - .\n"				\
 	" .long (" #to ") - . + 0x7ffffff0\n"			\
 	" .popsection\n"
+/* For C file, we already have NOKPROBE_SYMBOL macro */
 #endif
 
 #endif /* _ASM_X86_ASM_H */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 1b10af835c31..e136869ae42e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -23,6 +23,7 @@
 #include <linux/efi.h>
 #include <linux/bcd.h>
 #include <linux/highmem.h>
+#include <linux/kprobes.h>
 
 #include <asm/bug.h>
 #include <asm/paravirt.h>
@@ -389,6 +390,9 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
 	.end_context_switch = paravirt_nop,
 };
 
+/* At this point, native_get_debugreg has a real function entry */
+NOKPROBE_SYMBOL(native_get_debugreg);
+
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
 	.startup_ipi_hook = paravirt_nop,
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 146e4fffd710..40ceb3ceba79 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -109,6 +109,14 @@
 #define BRANCH_PROFILE()
 #endif
 
+#ifdef CONFIG_KPROBES
+#define KPROBE_BLACKLIST()	VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \
+				*(_kprobe_blacklist)			      \
+				VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .;
+#else
+#define KPROBE_BLACKLIST()
+#endif
+
 #ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()	. = ALIGN(8);					\
 			VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
@@ -507,6 +515,7 @@
 	*(.init.rodata)							\
 	FTRACE_EVENTS()							\
 	TRACE_SYSCALLS()						\
+	KPROBE_BLACKLIST()						\
 	MEM_DISCARD(init.rodata)					\
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ee7239ea1583..0300c0f5c88b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -374,7 +374,9 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */
 #ifdef CONFIG_KPROBES
 # define __kprobes	__attribute__((__section__(".kprobes.text")))
+# define nokprobe_inline	__always_inline
 #else
 # define __kprobes
+# define nokprobe_inline	inline
 #endif
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index cdf9251f8249..e059507c465d 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -205,10 +205,10 @@ struct kretprobe_blackpoint {
 	void *addr;
 };
 
-struct kprobe_blackpoint {
-	const char *name;
+struct kprobe_blacklist_entry {
+	struct list_head list;
 	unsigned long start_addr;
-	unsigned long range;
+	unsigned long end_addr;
 };
 
 #ifdef CONFIG_KPROBES
@@ -477,4 +477,18 @@ static inline int enable_jprobe(struct jprobe *jp)
 	return enable_kprobe(&jp->kp);
 }
 
+#ifdef CONFIG_KPROBES
+/*
+ * Blacklist ganerating macro. Specify functions which is not probed
+ * by using this macro.
+ */
+#define __NOKPROBE_SYMBOL(fname)			\
+static unsigned long __used				\
+	__attribute__((section("_kprobe_blacklist")))	\
+	_kbl_addr_##fname = (unsigned long)fname;
+#define NOKPROBE_SYMBOL(fname)	__NOKPROBE_SYMBOL(fname)
+#else
+#define NOKPROBE_SYMBOL(fname)
+#endif
+
 #endif /* _LINUX_KPROBES_H */
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5b5ac76671e7..5ffc6875d2a7 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -86,18 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
 	return &(kretprobe_table_locks[hash].lock);
 }
 
-/*
- * Normally, functions that we'd want to prohibit kprobes in, are marked
- * __kprobes. But, there are cases where such functions already belong to
- * a different section (__sched for preempt_schedule)
- *
- * For such cases, we now have a blacklist
- */
-static struct kprobe_blackpoint kprobe_blacklist[] = {
-	{"preempt_schedule",},
-	{"native_get_debugreg",},
-	{NULL}    /* Terminator */
-};
+/* Blacklist -- list of struct kprobe_blacklist_entry */
+static LIST_HEAD(kprobe_blacklist);
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
@@ -1328,24 +1318,22 @@ bool __weak arch_within_kprobe_blacklist(unsigned long addr)
 	       addr < (unsigned long)__kprobes_text_end;
 }
 
-static int __kprobes in_kprobes_functions(unsigned long addr)
+static bool __kprobes within_kprobe_blacklist(unsigned long addr)
 {
-	struct kprobe_blackpoint *kb;
+	struct kprobe_blacklist_entry *ent;
 
 	if (arch_within_kprobe_blacklist(addr))
-		return -EINVAL;
+		return true;
 	/*
 	 * If there exists a kprobe_blacklist, verify and
 	 * fail any probe registration in the prohibited area
 	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		if (kb->start_addr) {
-			if (addr >= kb->start_addr &&
-			    addr < (kb->start_addr + kb->range))
-				return -EINVAL;
-		}
+	list_for_each_entry(ent, &kprobe_blacklist, list) {
+		if (addr >= ent->start_addr && addr < ent->end_addr)
+			return true;
 	}
-	return 0;
+
+	return false;
 }
 
 /*
@@ -1436,7 +1424,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 
 	/* Ensure it is not in reserved area nor out of text */
 	if (!kernel_text_address((unsigned long) p->addr) ||
-	    in_kprobes_functions((unsigned long) p->addr) ||
+	    within_kprobe_blacklist((unsigned long) p->addr) ||
 	    jump_label_text_reserved(p->addr, p->addr)) {
 		ret = -EINVAL;
 		goto out;
@@ -2022,6 +2010,38 @@ void __kprobes dump_kprobe(struct kprobe *kp)
 	       kp->symbol_name, kp->addr, kp->offset);
 }
 
+/*
+ * Lookup and populate the kprobe_blacklist.
+ *
+ * Unlike the kretprobe blacklist, we'll need to determine
+ * the range of addresses that belong to the said functions,
+ * since a kprobe need not necessarily be at the beginning
+ * of a function.
+ */
+static int __init populate_kprobe_blacklist(unsigned long *start,
+					     unsigned long *end)
+{
+	unsigned long *iter;
+	struct kprobe_blacklist_entry *ent;
+	unsigned long offset = 0, size = 0;
+
+	for (iter = start; iter < end; iter++) {
+		if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
+			pr_err("Failed to find blacklist %p\n", (void *)*iter);
+			continue;
+		}
+
+		ent = kmalloc(sizeof(*ent), GFP_KERNEL);
+		if (!ent)
+			return -ENOMEM;
+		ent->start_addr = *iter;
+		ent->end_addr = *iter + size;
+		INIT_LIST_HEAD(&ent->list);
+		list_add_tail(&ent->list, &kprobe_blacklist);
+	}
+	return 0;
+}
+
 /* Module notifier call back, checking kprobes on the module */
 static int __kprobes kprobes_module_callback(struct notifier_block *nb,
 					     unsigned long val, void *data)
@@ -2065,14 +2085,13 @@ static struct notifier_block kprobe_module_nb = {
 	.priority = 0
 };
 
+/* Markers of _kprobe_blacklist section */
+extern unsigned long __start_kprobe_blacklist[];
+extern unsigned long __stop_kprobe_blacklist[];
+
 static int __init init_kprobes(void)
 {
 	int i, err = 0;
-	unsigned long offset = 0, size = 0;
-	char *modname, namebuf[KSYM_NAME_LEN];
-	const char *symbol_name;
-	void *addr;
-	struct kprobe_blackpoint *kb;
 
 	/* FIXME allocate the probe table, currently defined statically */
 	/* initialize all list heads */
@@ -2082,26 +2101,11 @@ static int __init init_kprobes(void)
 		raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
 	}
 
-	/*
-	 * Lookup and populate the kprobe_blacklist.
-	 *
-	 * Unlike the kretprobe blacklist, we'll need to determine
-	 * the range of addresses that belong to the said functions,
-	 * since a kprobe need not necessarily be at the beginning
-	 * of a function.
-	 */
-	for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
-		kprobe_lookup_name(kb->name, addr);
-		if (!addr)
-			continue;
-
-		kb->start_addr = (unsigned long)addr;
-		symbol_name = kallsyms_lookup(kb->start_addr,
-				&size, &offset, &modname, namebuf);
-		if (!symbol_name)
-			kb->range = 0;
-		else
-			kb->range = size;
+	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
+					__stop_kprobe_blacklist);
+	if (err) {
+		pr_err("kprobes: failed to populate blacklist: %d\n", err);
+		pr_err("Please take care of using kprobes.\n");
 	}
 
 	if (kretprobe_blacklist_size) {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..6863631e8cd0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2804,6 +2804,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
 		barrier();
 	} while (need_resched());
 }
+NOKPROBE_SYMBOL(preempt_schedule);
 EXPORT_SYMBOL(preempt_schedule);
 #endif /* CONFIG_PREEMPT */
 
-- 
cgit 


From 1111244ff4493448c0ee66e814e20c6e81d3b93d Mon Sep 17 00:00:00 2001
From: Sangjung Woo <sangjung.woo@samsung.com>
Date: Mon, 21 Apr 2014 19:10:08 +0900
Subject: extcon: Add resource-managed extcon register function

Add resource-managed extcon device register function for convenience.
For example, if a extcon device is attached with new
devm_extcon_dev_register(), that extcon device is automatically
unregistered on driver detach.

Signed-off-by: Sangjung Woo <sangjung.woo@samsung.com>
[Fix bug about devm_extcon_dev_match/release() and code clean by Chanwoo Choi]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-class.c | 69 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/extcon.h        | 13 ++++++++
 2 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c
index 7ab21aa6eaa1..f6df68989651 100644
--- a/drivers/extcon/extcon-class.c
+++ b/drivers/extcon/extcon-class.c
@@ -819,6 +819,75 @@ void extcon_dev_unregister(struct extcon_dev *edev)
 }
 EXPORT_SYMBOL_GPL(extcon_dev_unregister);
 
+static void devm_extcon_dev_unreg(struct device *dev, void *res)
+{
+	extcon_dev_unregister(*(struct extcon_dev **)res);
+}
+
+static int devm_extcon_dev_match(struct device *dev, void *res, void *data)
+{
+	struct extcon_dev **r = res;
+
+	if (!r || !*r) {
+		WARN_ON(!r || !*r);
+		return 0;
+	}
+
+	return *r == data;
+}
+
+/**
+ * devm_extcon_dev_register() - Resource-managed extcon_dev_register()
+ * @dev:	device to allocate extcon device
+ * @edev:	the new extcon device to register
+ *
+ * Managed extcon_dev_register() function. If extcon device is attached with
+ * this function, that extcon device is automatically unregistered on driver
+ * detach. Internally this function calls extcon_dev_register() function.
+ * To get more information, refer that function.
+ *
+ * If extcon device is registered with this function and the device needs to be
+ * unregistered separately, devm_extcon_dev_unregister() should be used.
+ *
+ * Returns 0 if success or negaive error number if failure.
+ */
+int devm_extcon_dev_register(struct device *dev, struct extcon_dev *edev)
+{
+	struct extcon_dev **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_extcon_dev_unreg, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = extcon_dev_register(edev);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	*ptr = edev;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_register);
+
+/**
+ * devm_extcon_dev_unregister() - Resource-managed extcon_dev_unregister()
+ * @dev:	device the extcon belongs to
+ * @edev:	the extcon device to unregister
+ *
+ * Unregister extcon device that is registered with devm_extcon_dev_register()
+ * function.
+ */
+void devm_extcon_dev_unregister(struct device *dev, struct extcon_dev *edev)
+{
+	WARN_ON(devres_release(dev, devm_extcon_dev_unreg,
+			       devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_unregister);
+
 #ifdef CONFIG_OF
 /*
  * extcon_get_edev_by_phandle - Get the extcon device from devicetree
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index f488145bb2d4..548447be2d8f 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -185,6 +185,10 @@ struct extcon_specific_cable_nb {
  */
 extern int extcon_dev_register(struct extcon_dev *edev);
 extern void extcon_dev_unregister(struct extcon_dev *edev);
+extern int devm_extcon_dev_register(struct device *dev,
+				    struct extcon_dev *edev);
+extern void devm_extcon_dev_unregister(struct device *dev,
+				       struct extcon_dev *edev);
 extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name);
 
 /*
@@ -254,6 +258,15 @@ static inline int extcon_dev_register(struct extcon_dev *edev)
 
 static inline void extcon_dev_unregister(struct extcon_dev *edev) { }
 
+static inline int devm_extcon_dev_register(struct device *dev,
+					   struct extcon_dev *edev)
+{
+	return -EINVAL;
+}
+
+static inline void devm_extcon_dev_unregister(struct device *dev,
+					      struct extcon_dev *edev) { }
+
 static inline u32 extcon_get_state(struct extcon_dev *edev)
 {
 	return 0;
-- 
cgit 


From 7ee4910ab31c4b1fafb7e4f273cbe9340ac953aa Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 31 Mar 2014 15:19:29 +0200
Subject: PCI: Remove old serial device IDs

These IDs are no longer referenced since kernel 3.1 so I suppose we can
remove them from pci_ids.h.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/pci_ids.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d4de24b4d4c6..7fa31731c854 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1631,8 +1631,6 @@
 #define PCI_DEVICE_ID_ATT_VENUS_MODEM	0x480
 
 #define PCI_VENDOR_ID_SPECIALIX		0x11cb
-#define PCI_DEVICE_ID_SPECIALIX_IO8	0x2000
-#define PCI_DEVICE_ID_SPECIALIX_RIO	0x8000
 #define PCI_SUBDEVICE_ID_SPECIALIX_SPEED4 0xa004
 
 #define PCI_VENDOR_ID_ANALOG_DEVICES	0x11d4
@@ -2874,7 +2872,6 @@
 #define PCI_DEVICE_ID_SCALEMP_VSMP_CTL	0x1010
 
 #define PCI_VENDOR_ID_COMPUTONE		0x8e0e
-#define PCI_DEVICE_ID_COMPUTONE_IP2EX	0x0291
 #define PCI_DEVICE_ID_COMPUTONE_PG	0x0302
 #define PCI_SUBVENDOR_ID_COMPUTONE	0x8e0e
 #define PCI_SUBDEVICE_ID_COMPUTONE_PG4	0x0001
-- 
cgit 


From 879eb9c3f9b854394c5a2014b9243c00eaa329f0 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Wed, 23 Apr 2014 09:58:25 -0500
Subject: tty_ldisc: add more limits to the @write_wakeup

In the uart_handle_cts_change(), uart_write_wakeup() is called after
we call @uart_port->ops->start_tx().

The Documentation/serial/driver tells us:
-----------------------------------------------
  start_tx(port)
	Start transmitting characters.

	Locking: port->lock taken.
	Interrupts: locally disabled.
-----------------------------------------------

So when the uart_write_wakeup() is called, the port->lock is taken by
the upper. See the following callstack:

	|_ uart_write_wakeup
	   |_ tty_wakeup
	      |_ ld->ops->write_wakeup

With the port->lock held, we call the @write_wakeup. Some implemetation of
the @write_wakeup does not notice that the port->lock is held, and it still
tries to send data with uart_write() which will try to grab the prot->lock.
A dead lock occurs, see the following log caught in the Bluetooth by uart:

--------------------------------------------------------------------
BUG: spinlock lockup suspected on CPU#0, swapper/0/0
 lock: 0xdc3f4410, .magic: dead4ead, .owner: swapper/0/0, .owner_cpu: 0
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W    3.10.17-16839-ge4a1bef #1320
[<80014cbc>] (unwind_backtrace+0x0/0x138) from [<8001251c>] (show_stack+0x10/0x14)
[<8001251c>] (show_stack+0x10/0x14) from [<802816ac>] (do_raw_spin_lock+0x108/0x184)
[<802816ac>] (do_raw_spin_lock+0x108/0x184) from [<806a22b0>] (_raw_spin_lock_irqsave+0x54/0x60)
[<806a22b0>] (_raw_spin_lock_irqsave+0x54/0x60) from [<802f5754>] (uart_write+0x38/0xe0)
[<802f5754>] (uart_write+0x38/0xe0) from [<80455270>] (hci_uart_tx_wakeup+0xa4/0x168)
[<80455270>] (hci_uart_tx_wakeup+0xa4/0x168) from [<802dab18>] (tty_wakeup+0x50/0x5c)
[<802dab18>] (tty_wakeup+0x50/0x5c) from [<802f81a4>] (imx_rtsint+0x50/0x80)
[<802f81a4>] (imx_rtsint+0x50/0x80) from [<802f88f4>] (imx_int+0x158/0x17c)
[<802f88f4>] (imx_int+0x158/0x17c) from [<8007abe0>] (handle_irq_event_percpu+0x50/0x194)
[<8007abe0>] (handle_irq_event_percpu+0x50/0x194) from [<8007ad60>] (handle_irq_event+0x3c/0x5c)
--------------------------------------------------------------------

This patch adds more limits to the @write_wakeup, the one who wants to
implemet the @write_wakeup should follow the limits which avoid the deadlock.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_ldisc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index add26da2faeb..00c9d688d7b7 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -92,7 +92,10 @@
  *	This function is called by the low-level tty driver to signal
  *	that line discpline should try to send more characters to the
  *	low-level driver for transmission.  If the line discpline does
- *	not have any more data to send, it can just return.
+ *	not have any more data to send, it can just return. If the line
+ *	discipline does have some data to send, please arise a tasklet
+ *	or workqueue to do the real data transfer. Do not send data in
+ *	this hook, it may leads to a deadlock.
  *
  * int (*hangup)(struct tty_struct *)
  *
-- 
cgit 


From c1309040967e200d3ea6415ae54cf6a69d7ad996 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Mon, 31 Mar 2014 14:58:39 -0700
Subject: PCI: Use designated initialization in PCI_VDEVICE

By using designated initialization in PCI_VDEVICE, like other similar
macros, many "missing initializer" warnings that appear when compiling with
W=2 can be silenced.

Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Tested-by: Kavindya Deegala <kavindya.s.deegala@intel.com>
Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..a95aac7ad37f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -680,8 +680,8 @@ struct pci_driver {
 
 /**
  * PCI_VDEVICE - macro used to describe a specific pci device in short form
- * @vendor: the vendor name
- * @device: the 16 bit PCI Device ID
+ * @vend: the vendor name
+ * @dev: the 16 bit PCI Device ID
  *
  * This macro is used to create a struct pci_device_id that matches a
  * specific PCI device.  The subvendor, and subdevice fields will be set
@@ -689,9 +689,9 @@ struct pci_driver {
  * private data.
  */
 
-#define PCI_VDEVICE(vendor, device)		\
-	PCI_VENDOR_ID_##vendor, (device),	\
-	PCI_ANY_ID, PCI_ANY_ID, 0, 0
+#define PCI_VDEVICE(vend, dev) \
+	.vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
+	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
 
 /* these external functions are only available when PCI support is enabled */
 #ifdef CONFIG_PCI
-- 
cgit 


From 9aac5887595b765b6f64b2af08b785e82e095b57 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 18 Apr 2014 17:19:55 -0500
Subject: tty/serial: add generic serial earlycon

This introduces generic earlycon infrastructure for serial devices
based on the 8250 earlycon. This allows for supporting earlycon option
with other serial devices. The earlycon output is enabled at the time
early_params are processed.

Only architectures that have fixmap support or have functional ioremap
when early_params are processed are supported. This is the same
restriction that the 8250 driver had.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig    |   7 ++
 drivers/tty/serial/Makefile   |   2 +
 drivers/tty/serial/earlycon.c | 152 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h   |  16 +++++
 4 files changed, 177 insertions(+)
 create mode 100644 drivers/tty/serial/earlycon.c

(limited to 'include/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 30530e47cdf0..9fb6028ad900 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -7,6 +7,13 @@ if TTY
 menu "Serial drivers"
 	depends on HAS_IOMEM
 
+config SERIAL_EARLYCON
+	bool
+	help
+	  Support for early consoles with the earlycon parameter. This enables
+	  the console before standard serial driver is probed. The console is
+	  enabled when early_param is processed.
+
 source "drivers/tty/serial/8250/Kconfig"
 
 comment "Non-8250 serial port support"
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 5f2a3f493ab9..28048178f308 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -5,6 +5,8 @@
 obj-$(CONFIG_SERIAL_CORE) += serial_core.o
 obj-$(CONFIG_SERIAL_21285) += 21285.o
 
+obj-$(CONFIG_SERIAL_EARLYCON) += earlycon.o
+
 # These Sparc drivers have to appear before others such as 8250
 # which share ttySx minor node space.  Otherwise console device
 # names change and other unplesantries.
diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
new file mode 100644
index 000000000000..73bf1e21aae0
--- /dev/null
+++ b/drivers/tty/serial/earlycon.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd.
+ * Author: Rob Herring <robh@kernel.org>
+ *
+ * Based on 8250 earlycon:
+ * (c) Copyright 2004 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/console.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/serial_core.h>
+
+#ifdef CONFIG_FIX_EARLYCON_MEM
+#include <asm/fixmap.h>
+#endif
+
+#include <asm/serial.h>
+
+static struct console early_con = {
+	.name =		"earlycon",
+	.flags =	CON_PRINTBUFFER | CON_BOOT,
+	.index =	-1,
+};
+
+static struct earlycon_device early_console_dev = {
+	.con = &early_con,
+};
+
+static void __iomem * __init earlycon_map(unsigned long paddr, size_t size)
+{
+	void __iomem *base;
+#ifdef CONFIG_FIX_EARLYCON_MEM
+	set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK);
+	base = (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
+	base += paddr & ~PAGE_MASK;
+#else
+	base = ioremap(paddr, size);
+#endif
+	if (!base)
+		pr_err("%s: Couldn't map 0x%llx\n", __func__,
+		       (unsigned long long)paddr);
+
+	return base;
+}
+
+static int __init parse_options(struct earlycon_device *device,
+				char *options)
+{
+	struct uart_port *port = &device->port;
+	int mmio, mmio32, length, ret;
+	unsigned long addr;
+
+	if (!options)
+		return -ENODEV;
+
+	mmio = !strncmp(options, "mmio,", 5);
+	mmio32 = !strncmp(options, "mmio32,", 7);
+	if (mmio || mmio32) {
+		port->iotype = (mmio ? UPIO_MEM : UPIO_MEM32);
+		options += mmio ? 5 : 7;
+		ret = kstrtoul(options, 0, &addr);
+		if (ret)
+			return ret;
+		port->mapbase = addr;
+		if (mmio32)
+			port->regshift = 2;
+	} else if (!strncmp(options, "io,", 3)) {
+		port->iotype = UPIO_PORT;
+		options += 3;
+		ret = kstrtoul(options, 0, &addr);
+		if (ret)
+			return ret;
+		port->iobase = addr;
+		mmio = 0;
+	} else if (!strncmp(options, "0x", 2)) {
+		port->iotype = UPIO_MEM;
+		ret = kstrtoul(options, 0, &addr);
+		if (ret)
+			return ret;
+		port->mapbase = addr;
+	} else {
+		return -EINVAL;
+	}
+
+	port->uartclk = BASE_BAUD * 16;
+
+	options = strchr(options, ',');
+	if (options) {
+		options++;
+		ret = kstrtouint(options, 0, &device->baud);
+		if (ret)
+			return ret;
+		length = min(strcspn(options, " ") + 1,
+			     (size_t)(sizeof(device->options)));
+		strlcpy(device->options, options, length);
+	}
+
+	if (mmio || mmio32)
+		pr_info("Early serial console at MMIO%s 0x%llx (options '%s')\n",
+			mmio32 ? "32" : "",
+			(unsigned long long)port->mapbase,
+			device->options);
+	else
+		pr_info("Early serial console at I/O port 0x%lx (options '%s')\n",
+			port->iobase,
+			device->options);
+
+	return 0;
+}
+
+int __init setup_earlycon(char *buf, const char *match,
+			  int (*setup)(struct earlycon_device *, const char *))
+{
+	int err;
+	size_t len;
+	struct uart_port *port = &early_console_dev.port;
+
+	if (!buf || !match || !setup)
+		return 0;
+
+	len = strlen(match);
+	if (strncmp(buf, match, len))
+		return 0;
+	if (buf[len] && (buf[len] != ','))
+		return 0;
+
+	buf += len + 1;
+
+	err = parse_options(&early_console_dev, buf);
+	/* On parsing error, pass the options buf to the setup function */
+	if (!err)
+		buf = NULL;
+
+	if (port->mapbase)
+		port->membase = earlycon_map(port->mapbase, 64);
+
+	early_console_dev.con->data = &early_console_dev;
+	err = setup(&early_console_dev, buf);
+	if (err < 0)
+		return err;
+	if (!early_console_dev.con->write)
+		return -ENODEV;
+
+	register_console(early_console_dev.con);
+	return 0;
+}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f729be981da0..7a15b5b24c0b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -285,6 +285,22 @@ static inline int uart_poll_timeout(struct uart_port *port)
 /*
  * Console helpers.
  */
+struct earlycon_device {
+	struct console *con;
+	struct uart_port port;
+	char options[16];		/* e.g., 115200n8 */
+	unsigned int baud;
+};
+int setup_earlycon(char *buf, const char *match,
+		   int (*setup)(struct earlycon_device *, const char *));
+
+#define EARLYCON_DECLARE(name, func) \
+static int __init name ## _setup_earlycon(char *buf) \
+{ \
+	return setup_earlycon(buf, __stringify(name), func); \
+} \
+early_param("earlycon", name ## _setup_earlycon);
+
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
 void uart_parse_options(char *options, int *baud, int *parity, int *bits,
-- 
cgit 


From 38535201633077cbaf8b32886b5e3005b36c9024 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Apr 2014 02:32:53 -0700
Subject: blk-mq: respect rq_affinity

The blk-mq code is using it's own version of the I/O completion affinity
tunables, which causes a few issues:

 - the rq_affinity sysfs file doesn't work for blk-mq devices, even if it
   still is present, thus breaking existing tuning setups.
 - the rq_affinity = 1 mode, which is the defauly for legacy request based
   drivers isn't implemented at all.
 - blk-mq drivers don't implement any completion affinity with the default
   flag settings.

This patches removes the blk-mq ipi_redirect flag and sysfs file, as well
as the internal BLK_MQ_F_SHOULD_IPI flag and replaces it with code that
respects the queue-wide rq_affinity flags and also implements the
rq_affinity = 1 mode.

This means I/O completion affinity can now only be tuned block-queue wide
instead of per context, which seems more sensible to me anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c   | 42 ------------------------------------------
 block/blk-mq.c         |  8 ++++++--
 block/blk-mq.h         |  1 -
 include/linux/blk-mq.h |  1 -
 4 files changed, 6 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 9176a6984857..8145b5b25b4b 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -203,42 +203,6 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
-static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page)
-{
-	ssize_t ret;
-
-	spin_lock(&hctx->lock);
-	ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI));
-	spin_unlock(&hctx->lock);
-
-	return ret;
-}
-
-static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx,
-					 const char *page, size_t len)
-{
-	struct blk_mq_ctx *ctx;
-	unsigned long ret;
-	unsigned int i;
-
-	if (kstrtoul(page, 10, &ret)) {
-		pr_err("blk-mq-sysfs: invalid input '%s'\n", page);
-		return -EINVAL;
-	}
-
-	spin_lock(&hctx->lock);
-	if (ret)
-		hctx->flags |= BLK_MQ_F_SHOULD_IPI;
-	else
-		hctx->flags &= ~BLK_MQ_F_SHOULD_IPI;
-	spin_unlock(&hctx->lock);
-
-	hctx_for_each_ctx(hctx, ctx, i)
-		ctx->ipi_redirect = !!ret;
-
-	return len;
-}
-
 static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
 {
 	return blk_mq_tag_sysfs_show(hctx->tags, page);
@@ -307,11 +271,6 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
 	.attr = {.name = "pending", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_rq_list_show,
 };
-static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = {
-	.attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR},
-	.show = blk_mq_hw_sysfs_ipi_show,
-	.store = blk_mq_hw_sysfs_ipi_store,
-};
 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
 	.attr = {.name = "tags", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_tags_show,
@@ -326,7 +285,6 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_run.attr,
 	&blk_mq_hw_sysfs_dispatched.attr,
 	&blk_mq_hw_sysfs_pending.attr,
-	&blk_mq_hw_sysfs_ipi.attr,
 	&blk_mq_hw_sysfs_tags.attr,
 	&blk_mq_hw_sysfs_cpus.attr,
 	NULL,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a84112c94e74..f2e92eb92803 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -326,15 +326,19 @@ static void __blk_mq_complete_request_remote(void *data)
 void __blk_mq_complete_request(struct request *rq)
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	bool shared = false;
 	int cpu;
 
-	if (!ctx->ipi_redirect) {
+	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
 		rq->q->softirq_done_fn(rq);
 		return;
 	}
 
 	cpu = get_cpu();
-	if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
+	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
+		shared = cpus_share_cache(cpu, ctx->cpu);
+
+	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
 		rq->csd.func = __blk_mq_complete_request_remote;
 		rq->csd.info = rq;
 		rq->csd.flags = 0;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index b41a784de50d..1ae364ceaf8b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -11,7 +11,6 @@ struct blk_mq_ctx {
 
 	unsigned int		cpu;
 	unsigned int		index_hw;
-	unsigned int		ipi_redirect;
 
 	/* incremented at dispatch time */
 	unsigned long		rq_dispatched[2];
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ab469d525894..3b561d651a02 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -122,7 +122,6 @@ enum {
 
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
-	BLK_MQ_F_SHOULD_IPI	= 1 << 2,
 
 	BLK_MQ_S_STOPPED	= 0,
 
-- 
cgit 


From 7d568a8383bbb9c1f5167781075906acb2bb1550 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 9 Apr 2014 11:07:30 -0400
Subject: kernfs: implement kernfs_root->supers list

Currently, there's no way to find out which super_blocks are
associated with a given kernfs_root.  Let's implement it - the planned
inotify extension to kernfs_notify() needs it.

Make kernfs_super_info point back to the super_block and chain it at
kernfs_root->supers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/dir.c             |  1 +
 fs/kernfs/kernfs-internal.h |  5 +++++
 fs/kernfs/mount.c           | 11 +++++++++++
 include/linux/kernfs.h      |  4 ++++
 4 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 78f3403300af..43aa97988c31 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -711,6 +711,7 @@ struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
 		return ERR_PTR(-ENOMEM);
 
 	ida_init(&root->ino_ida);
+	INIT_LIST_HEAD(&root->supers);
 
 	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
 			       KERNFS_DIR);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index 8be13b2a079b..dc84a3ef9ca2 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -49,6 +49,8 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
  * mount.c
  */
 struct kernfs_super_info {
+	struct super_block	*sb;
+
 	/*
 	 * The root associated with this super_block.  Each super_block is
 	 * identified by the root and ns it's associated with.
@@ -62,6 +64,9 @@ struct kernfs_super_info {
 	 * an array and compare kernfs_node tag against every entry.
 	 */
 	const void		*ns;
+
+	/* anchored at kernfs_root->supers, protected by kernfs_mutex */
+	struct list_head	node;
 };
 #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
 
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 6a5f04ac8704..f25a7c0c3cdc 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -68,6 +68,7 @@ static int kernfs_fill_super(struct super_block *sb)
 	struct inode *inode;
 	struct dentry *root;
 
+	info->sb = sb;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 	sb->s_magic = SYSFS_MAGIC;
@@ -166,12 +167,18 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 		*new_sb_created = !sb->s_root;
 
 	if (!sb->s_root) {
+		struct kernfs_super_info *info = kernfs_info(sb);
+
 		error = kernfs_fill_super(sb);
 		if (error) {
 			deactivate_locked_super(sb);
 			return ERR_PTR(error);
 		}
 		sb->s_flags |= MS_ACTIVE;
+
+		mutex_lock(&kernfs_mutex);
+		list_add(&info->node, &root->supers);
+		mutex_unlock(&kernfs_mutex);
 	}
 
 	return dget(sb->s_root);
@@ -190,6 +197,10 @@ void kernfs_kill_sb(struct super_block *sb)
 	struct kernfs_super_info *info = kernfs_info(sb);
 	struct kernfs_node *root_kn = sb->s_root->d_fsdata;
 
+	mutex_lock(&kernfs_mutex);
+	list_del(&info->node);
+	mutex_unlock(&kernfs_mutex);
+
 	/*
 	 * Remove the superblock from fs_supers/s_instances
 	 * so we can't find it, before freeing kernfs_super_info.
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index b0122dc6f96a..589318b73e61 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -144,6 +144,10 @@ struct kernfs_root {
 	/* private fields, do not use outside kernfs proper */
 	struct ida		ino_ida;
 	struct kernfs_syscall_ops *syscall_ops;
+
+	/* list of kernfs_super_info of this root, protected by kernfs_mutex */
+	struct list_head	supers;
+
 	wait_queue_head_t	deactivate_waitq;
 };
 
-- 
cgit 


From 86d56134f1b67d0c18025ba5cade95c048ed528d Mon Sep 17 00:00:00 2001
From: Michael Marineau <mike@marineau.org>
Date: Thu, 10 Apr 2014 14:09:31 -0700
Subject: kobject: Make support for uevent_helper optional.

Support for uevent_helper, aka hotplug, is not required on many systems
these days but it can still be enabled via sysfs or sysctl.

Reported-by: Darren Shepherd <darren.s.shepherd@gmail.com>
Signed-off-by: Michael Marineau <mike@marineau.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/Kconfig    | 17 +++++++++++------
 include/linux/kobject.h |  2 ++
 kernel/ksysfs.c         |  5 ++++-
 kernel/sysctl.c         |  4 ++--
 lib/kobject_uevent.c    |  6 ++++++
 5 files changed, 25 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 8fa8deab6449..4b7b4522b64f 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -1,10 +1,10 @@
 menu "Generic Driver Options"
 
-config UEVENT_HELPER_PATH
-	string "path to uevent helper"
-	default ""
+config UEVENT_HELPER
+	bool "Support for uevent helper"
+	default y
 	help
-	  Path to uevent helper program forked by the kernel for
+	  The uevent helper program is forked by the kernel for
 	  every uevent.
 	  Before the switch to the netlink-based uevent source, this was
 	  used to hook hotplug scripts into kernel device events. It
@@ -15,8 +15,13 @@ config UEVENT_HELPER_PATH
 	  that it creates a high system load, or on smaller systems
 	  it is known to create out-of-memory situations during bootup.
 
-	  To disable user space helper program execution at early boot
-	  time specify an empty string here. This setting can be altered
+config UEVENT_HELPER_PATH
+	string "path to uevent helper"
+	depends on UEVENT_HELPER
+	default ""
+	help
+	  To disable user space helper program execution at by default
+	  specify an empty string here. This setting can still be altered
 	  via /proc/sys/kernel/hotplug or via /sys/kernel/uevent_helper
 	  later at runtime.
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index f896a33e8341..2d61b909f414 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -32,8 +32,10 @@
 #define UEVENT_NUM_ENVP			32	/* number of env pointers */
 #define UEVENT_BUFFER_SIZE		2048	/* buffer for the variables */
 
+#ifdef CONFIG_UEVENT_HELPER
 /* path to the userspace helper executed on an event */
 extern char uevent_helper[];
+#endif
 
 /* counter to tag the uevent, read only except for the kobject core */
 extern u64 uevent_seqnum;
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 2495a9b14ac8..6683ccef9fff 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -37,6 +37,7 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(uevent_seqnum);
 
+#ifdef CONFIG_UEVENT_HELPER
 /* uevent helper program, used during early boot */
 static ssize_t uevent_helper_show(struct kobject *kobj,
 				  struct kobj_attribute *attr, char *buf)
@@ -56,7 +57,7 @@ static ssize_t uevent_helper_store(struct kobject *kobj,
 	return count;
 }
 KERNEL_ATTR_RW(uevent_helper);
-
+#endif
 
 #ifdef CONFIG_PROFILING
 static ssize_t profiling_show(struct kobject *kobj,
@@ -189,7 +190,9 @@ EXPORT_SYMBOL_GPL(kernel_kobj);
 static struct attribute * kernel_attrs[] = {
 	&fscaps_attr.attr,
 	&uevent_seqnum_attr.attr,
+#ifdef CONFIG_UEVENT_HELPER
 	&uevent_helper_attr.attr,
+#endif
 #ifdef CONFIG_PROFILING
 	&profiling_attr.attr,
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 74f5b580fe34..bc966a8ffc3e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -643,7 +643,7 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one,
 	},
 #endif
-
+#ifdef CONFIG_UEVENT_HELPER
 	{
 		.procname	= "hotplug",
 		.data		= &uevent_helper,
@@ -651,7 +651,7 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dostring,
 	},
-
+#endif
 #ifdef CONFIG_CHR_DEV_SG
 	{
 		.procname	= "sg-big-buff",
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 4e3bd71bd949..9ebf9e20de53 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -29,7 +29,9 @@
 
 
 u64 uevent_seqnum;
+#ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
+#endif
 #ifdef CONFIG_NET
 struct uevent_sock {
 	struct list_head list;
@@ -109,6 +111,7 @@ static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data)
 }
 #endif
 
+#ifdef CONFIG_UEVENT_HELPER
 static int kobj_usermode_filter(struct kobject *kobj)
 {
 	const struct kobj_ns_type_operations *ops;
@@ -147,6 +150,7 @@ static void cleanup_uevent_env(struct subprocess_info *info)
 {
 	kfree(info->data);
 }
+#endif
 
 /**
  * kobject_uevent_env - send an uevent with environmental data
@@ -323,6 +327,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 #endif
 	mutex_unlock(&uevent_sock_mutex);
 
+#ifdef CONFIG_UEVENT_HELPER
 	/* call uevent_helper, usually only enabled during early boot */
 	if (uevent_helper[0] && !kobj_usermode_filter(kobj)) {
 		struct subprocess_info *info;
@@ -347,6 +352,7 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
 			env = NULL;	/* freed by cleanup_uevent_env */
 		}
 	}
+#endif
 
 exit:
 	kfree(devpath);
-- 
cgit 


From 842b597ee0a7e1aa5a3148164ffdba00ec17f614 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 25 Apr 2014 18:28:02 -0400
Subject: cgroup: implement cgroup.populated for the default hierarchy

cgroup users often need a way to determine when a cgroup's
subhierarchy becomes empty so that it can be cleaned up.  cgroup
currently provides release_agent for it; unfortunately, this mechanism
is riddled with issues.

* It delivers events by forking and execing a userland binary
  specified as the release_agent.  This is a long deprecated method of
  notification delivery.  It's extremely heavy, slow and cumbersome to
  integrate with larger infrastructure.

* There is single monitoring point at the root.  There's no way to
  delegate management of a subtree.

* The event isn't recursive.  It triggers when a cgroup doesn't have
  any tasks or child cgroups.  Events for internal nodes trigger only
  after all children are removed.  This again makes it impossible to
  delegate management of a subtree.

* Events are filtered from the kernel side.  "notify_on_release" file
  is used to subscribe to or suppress release event.  This is
  unnecessarily complicated and probably done this way because event
  delivery itself was expensive.

This patch implements interface file "cgroup.populated" which can be
used to monitor whether the cgroup's subhierarchy has tasks in it or
not.  Its value is 0 if there is no task in the cgroup and its
descendants; otherwise, 1, and kernfs_notify() notificaiton is
triggers when the value changes, which can be monitored through poll
and [di]notify.

This is a lot ligther and simpler and trivially allows delegating
management of subhierarchy - subhierarchy monitoring can block further
propgation simply by putting itself or another process in the root of
the subhierarchy and monitor events that it's interested in from there
without interfering with monitoring higher in the tree.

v2: Patch description updated as per Serge.

v3: "cgroup.subtree_populated" renamed to "cgroup.populated".  The
    subtree_ prefix was a bit confusing because
    "cgroup.subtree_control" uses it to denote the tree rooted at the
    cgroup sans the cgroup itself while the populated state includes
    the cgroup itself.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Serge Hallyn <serge.hallyn@ubuntu.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Lennart Poettering <lennart@poettering.net>
---
 include/linux/cgroup.h | 15 ++++++++++++
 kernel/cgroup.c        | 65 ++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 76 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ada239253ec7..4b38e2d6110d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -154,6 +154,14 @@ struct cgroup {
 	/* the number of attached css's */
 	int nr_css;
 
+	/*
+	 * If this cgroup contains any tasks, it contributes one to
+	 * populated_cnt.  All children with non-zero popuplated_cnt of
+	 * their own contribute one.  The count is zero iff there's no task
+	 * in this cgroup or its subtree.
+	 */
+	int populated_cnt;
+
 	atomic_t refcnt;
 
 	/*
@@ -166,6 +174,7 @@ struct cgroup {
 	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *control_kn;	/* kn for "cgroup.subtree_control" */
+	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
 	/*
 	 * Monotonically increasing unique serial number which defines a
@@ -264,6 +273,12 @@ enum {
 	 *
 	 * - "cgroup.clone_children" is removed.
 	 *
+	 * - "cgroup.subtree_populated" is available.  Its value is 0 if
+	 *   the cgroup and its descendants contain no task; otherwise, 1.
+	 *   The file also generates kernfs notification which can be
+	 *   monitored through poll and [di]notify when the value of the
+	 *   file changes.
+	 *
 	 * - If mount is requested with sane_behavior but without any
 	 *   subsystem, the default unified hierarchy is mounted.
 	 *
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 809dd903ceb8..0f986f7afee4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -411,6 +411,43 @@ static struct css_set init_css_set = {
 
 static int css_set_count	= 1;	/* 1 for init_css_set */
 
+/**
+ * cgroup_update_populated - updated populated count of a cgroup
+ * @cgrp: the target cgroup
+ * @populated: inc or dec populated count
+ *
+ * @cgrp is either getting the first task (css_set) or losing the last.
+ * Update @cgrp->populated_cnt accordingly.  The count is propagated
+ * towards root so that a given cgroup's populated_cnt is zero iff the
+ * cgroup and all its descendants are empty.
+ *
+ * @cgrp's interface file "cgroup.populated" is zero if
+ * @cgrp->populated_cnt is zero and 1 otherwise.  When @cgrp->populated_cnt
+ * changes from or to zero, userland is notified that the content of the
+ * interface file has changed.  This can be used to detect when @cgrp and
+ * its descendants become populated or empty.
+ */
+static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
+{
+	lockdep_assert_held(&css_set_rwsem);
+
+	do {
+		bool trigger;
+
+		if (populated)
+			trigger = !cgrp->populated_cnt++;
+		else
+			trigger = !--cgrp->populated_cnt;
+
+		if (!trigger)
+			break;
+
+		if (cgrp->populated_kn)
+			kernfs_notify(cgrp->populated_kn);
+		cgrp = cgrp->parent;
+	} while (cgrp);
+}
+
 /*
  * hash table for cgroup groups. This improves the performance to find
  * an existing css_set. This hash doesn't (currently) take into
@@ -456,10 +493,13 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 		list_del(&link->cgrp_link);
 
 		/* @cgrp can't go away while we're holding css_set_rwsem */
-		if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
+		if (list_empty(&cgrp->cset_links)) {
+			cgroup_update_populated(cgrp, false);
+			if (notify_on_release(cgrp)) {
+				if (taskexit)
+					set_bit(CGRP_RELEASABLE, &cgrp->flags);
+				check_for_release(cgrp);
+			}
 		}
 
 		kfree(link);
@@ -668,7 +708,11 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 	link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
 	link->cset = cset;
 	link->cgrp = cgrp;
+
+	if (list_empty(&cgrp->cset_links))
+		cgroup_update_populated(cgrp, true);
 	list_move(&link->cset_link, &cgrp->cset_links);
+
 	/*
 	 * Always add links to the tail of the list so that the list
 	 * is sorted by order of hierarchy creation
@@ -2643,6 +2687,12 @@ err_undo_css:
 	goto out_unlock;
 }
 
+static int cgroup_populated_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%d\n", (bool)seq_css(seq)->cgroup->populated_cnt);
+	return 0;
+}
+
 static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 				 size_t nbytes, loff_t off)
 {
@@ -2809,6 +2859,8 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 
 	if (cft->seq_show == cgroup_subtree_control_show)
 		cgrp->control_kn = kn;
+	else if (cft->seq_show == cgroup_populated_show)
+		cgrp->populated_kn = kn;
 	return 0;
 }
 
@@ -3918,6 +3970,11 @@ static struct cftype cgroup_base_files[] = {
 		.seq_show = cgroup_subtree_control_show,
 		.write_string = cgroup_subtree_control_write,
 	},
+	{
+		.name = "cgroup.populated",
+		.flags = CFTYPE_ONLY_ON_DFL | CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_populated_show,
+	},
 
 	/*
 	 * Historical crazy stuff.  These don't have "cgroup."  prefix and
-- 
cgit 


From ea7e586bdd331fd6fba2b6f9fd3777928c2814d8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 13 Apr 2014 20:08:00 +0100
Subject: iio: st_sensors: move regulator retrieveal to core

Currently the pressure sensor has code to retrieve and enable two
regulators for Vdd and Vdd IO, but actually these voltage inputs
are found on all of these ST sensors, so move the regulator
handling to the core and make sure all the ST sensors call these
functions on probe() and remove() to enable/disable power.

Here also mover over to obtaining the regulator from the *parent*
device of the IIO device, as the IIO device is created on-the-fly
in this very subsystem it very unlikely evert have any regulators
attached to it whatsoever. It is much more likely that the parent
is a platform device, possibly instantiated from a device tree,
which in turn have Vdd and Vdd IO supplied assigned to it.

Cc: Lee Jones <lee.jones@linaro.org>
Cc: Denis CIOCCA <denis.ciocca@st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/st_accel_core.c               |  4 +++
 drivers/iio/common/st_sensors/st_sensors_core.c | 37 +++++++++++++++++++++++
 drivers/iio/gyro/st_gyro_core.c                 |  4 +++
 drivers/iio/magnetometer/st_magn_core.c         |  4 +++
 drivers/iio/pressure/st_pressure_core.c         | 39 ++-----------------------
 include/linux/iio/common/st_sensors.h           |  4 +++
 6 files changed, 55 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 4e06fcf5b891..a2abf7c2ce3b 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -459,6 +459,8 @@ int st_accel_common_probe(struct iio_dev *indio_dev,
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &accel_info;
 
+	st_sensors_power_enable(indio_dev);
+
 	err = st_sensors_check_device_support(indio_dev,
 				ARRAY_SIZE(st_accel_sensors), st_accel_sensors);
 	if (err < 0)
@@ -515,6 +517,8 @@ void st_accel_common_remove(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *adata = iio_priv(indio_dev);
 
+	st_sensors_power_disable(indio_dev);
+
 	iio_device_unregister(indio_dev);
 	if (adata->get_irq_data_ready(indio_dev) > 0)
 		st_sensors_deallocate_trigger(indio_dev);
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 7ba1ef270213..e8b932fed70e 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/iio/iio.h>
+#include <linux/regulator/consumer.h>
 #include <asm/unaligned.h>
 
 #include <linux/iio/common/st_sensors.h>
@@ -198,6 +199,42 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable)
 }
 EXPORT_SYMBOL(st_sensors_set_axis_enable);
 
+void st_sensors_power_enable(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *pdata = iio_priv(indio_dev);
+	int err;
+
+	/* Regulators not mandatory, but if requested we should enable them. */
+	pdata->vdd = devm_regulator_get_optional(indio_dev->dev.parent, "vdd");
+	if (!IS_ERR(pdata->vdd)) {
+		err = regulator_enable(pdata->vdd);
+		if (err != 0)
+			dev_warn(&indio_dev->dev,
+				 "Failed to enable specified Vdd supply\n");
+	}
+
+	pdata->vdd_io = devm_regulator_get_optional(indio_dev->dev.parent, "vddio");
+	if (!IS_ERR(pdata->vdd_io)) {
+		err = regulator_enable(pdata->vdd_io);
+		if (err != 0)
+			dev_warn(&indio_dev->dev,
+				 "Failed to enable specified Vdd_IO supply\n");
+	}
+}
+EXPORT_SYMBOL(st_sensors_power_enable);
+
+void st_sensors_power_disable(struct iio_dev *indio_dev)
+{
+	struct st_sensor_data *pdata = iio_priv(indio_dev);
+
+	if (!IS_ERR(pdata->vdd))
+		regulator_disable(pdata->vdd);
+
+	if (!IS_ERR(pdata->vdd_io))
+		regulator_disable(pdata->vdd_io);
+}
+EXPORT_SYMBOL(st_sensors_power_disable);
+
 static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
 				       struct st_sensors_platform_data *pdata)
 {
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index bc71f4d1e2ce..ed74a9069989 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -311,6 +311,8 @@ int st_gyro_common_probe(struct iio_dev *indio_dev,
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &gyro_info;
 
+	st_sensors_power_enable(indio_dev);
+
 	err = st_sensors_check_device_support(indio_dev,
 				ARRAY_SIZE(st_gyro_sensors), st_gyro_sensors);
 	if (err < 0)
@@ -363,6 +365,8 @@ void st_gyro_common_remove(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *gdata = iio_priv(indio_dev);
 
+	st_sensors_power_disable(indio_dev);
+
 	iio_device_unregister(indio_dev);
 	if (gdata->get_irq_data_ready(indio_dev) > 0)
 		st_sensors_deallocate_trigger(indio_dev);
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 8e33a7682d33..240a21dd0c61 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -355,6 +355,8 @@ int st_magn_common_probe(struct iio_dev *indio_dev,
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &magn_info;
 
+	st_sensors_power_enable(indio_dev);
+
 	err = st_sensors_check_device_support(indio_dev,
 				ARRAY_SIZE(st_magn_sensors), st_magn_sensors);
 	if (err < 0)
@@ -406,6 +408,8 @@ void st_magn_common_remove(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *mdata = iio_priv(indio_dev);
 
+	st_sensors_power_disable(indio_dev);
+
 	iio_device_unregister(indio_dev);
 	if (mdata->get_irq_data_ready(indio_dev) > 0)
 		st_sensors_deallocate_trigger(indio_dev);
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 013becbe8f47..cd7e01f3a93b 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -23,7 +23,6 @@
 #include <linux/iio/sysfs.h>
 #include <linux/iio/trigger.h>
 #include <linux/iio/buffer.h>
-#include <linux/regulator/consumer.h>
 #include <asm/unaligned.h>
 
 #include <linux/iio/common/st_sensors.h>
@@ -387,40 +386,6 @@ static const struct iio_trigger_ops st_press_trigger_ops = {
 #define ST_PRESS_TRIGGER_OPS NULL
 #endif
 
-static void st_press_power_enable(struct iio_dev *indio_dev)
-{
-	struct st_sensor_data *pdata = iio_priv(indio_dev);
-	int err;
-
-	/* Regulators not mandatory, but if requested we should enable them. */
-	pdata->vdd = devm_regulator_get_optional(&indio_dev->dev, "vdd");
-	if (!IS_ERR(pdata->vdd)) {
-		err = regulator_enable(pdata->vdd);
-		if (err != 0)
-			dev_warn(&indio_dev->dev,
-				 "Failed to enable specified Vdd supply\n");
-	}
-
-	pdata->vdd_io = devm_regulator_get_optional(&indio_dev->dev, "vddio");
-	if (!IS_ERR(pdata->vdd_io)) {
-		err = regulator_enable(pdata->vdd_io);
-		if (err != 0)
-			dev_warn(&indio_dev->dev,
-				 "Failed to enable specified Vdd_IO supply\n");
-	}
-}
-
-static void st_press_power_disable(struct iio_dev *indio_dev)
-{
-	struct st_sensor_data *pdata = iio_priv(indio_dev);
-
-	if (!IS_ERR(pdata->vdd))
-		regulator_disable(pdata->vdd);
-
-	if (!IS_ERR(pdata->vdd_io))
-		regulator_disable(pdata->vdd_io);
-}
-
 int st_press_common_probe(struct iio_dev *indio_dev,
 				struct st_sensors_platform_data *plat_data)
 {
@@ -431,7 +396,7 @@ int st_press_common_probe(struct iio_dev *indio_dev,
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &press_info;
 
-	st_press_power_enable(indio_dev);
+	st_sensors_power_enable(indio_dev);
 
 	err = st_sensors_check_device_support(indio_dev,
 					      ARRAY_SIZE(st_press_sensors),
@@ -493,7 +458,7 @@ void st_press_common_remove(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *pdata = iio_priv(indio_dev);
 
-	st_press_power_disable(indio_dev);
+	st_sensors_power_disable(indio_dev);
 
 	iio_device_unregister(indio_dev);
 	if (pdata->get_irq_data_ready(indio_dev) > 0)
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 3c005eb3a0a4..96f51f0e0096 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -269,6 +269,10 @@ int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable);
 
 int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable);
 
+void st_sensors_power_enable(struct iio_dev *indio_dev);
+
+void st_sensors_power_disable(struct iio_dev *indio_dev);
+
 int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr);
 
 int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable);
-- 
cgit 


From 4af619ae2ccf47a2b3a108e1926736484721370f Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Thu, 24 Apr 2014 19:09:05 +0200
Subject: at86rf230: use irq_get_trigger_type

This patch removes the platform data for the irq_type. We use instead
the irq_get_trigger_type function to get these flags which should
already configured by the interrupt controller.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ieee802154/at86rf230.c | 28 ++++++++--------------------
 include/linux/spi/at86rf230.h      | 14 --------------
 2 files changed, 8 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index e36f194673a4..17b9c9aea9be 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/irq.h>
 #include <linux/gpio.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
@@ -970,8 +971,7 @@ static int at86rf230_irq_polarity(struct at86rf230_local *lp, int pol)
 
 static int at86rf230_hw_init(struct at86rf230_local *lp)
 {
-	struct at86rf230_platform_data *pdata = lp->spi->dev.platform_data;
-	int rc, irq_pol;
+	int rc, irq_pol, irq_type;
 	u8 status;
 	u8 csma_seed[2];
 
@@ -983,8 +983,9 @@ static int at86rf230_hw_init(struct at86rf230_local *lp)
 	if (rc)
 		return rc;
 
+	irq_type = irq_get_trigger_type(lp->spi->irq);
 	/* configure irq polarity, defaults to high active */
-	if (pdata->irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
+	if (irq_type & (IRQF_TRIGGER_FALLING | IRQF_TRIGGER_LOW))
 		irq_pol = IRQ_ACTIVE_LOW;
 	else
 		irq_pol = IRQ_ACTIVE_HIGH;
@@ -1032,7 +1033,6 @@ static struct at86rf230_platform_data *
 at86rf230_get_pdata(struct spi_device *spi)
 {
 	struct at86rf230_platform_data *pdata;
-	const char *irq_type;
 
 	if (!IS_ENABLED(CONFIG_OF) || !spi->dev.of_node)
 		return spi->dev.platform_data;
@@ -1044,19 +1044,6 @@ at86rf230_get_pdata(struct spi_device *spi)
 	pdata->rstn = of_get_named_gpio(spi->dev.of_node, "reset-gpio", 0);
 	pdata->slp_tr = of_get_named_gpio(spi->dev.of_node, "sleep-gpio", 0);
 
-	pdata->irq_type = IRQF_TRIGGER_RISING;
-	of_property_read_string(spi->dev.of_node, "irq-type", &irq_type);
-	if (!strcmp(irq_type, "level-high"))
-		pdata->irq_type = IRQF_TRIGGER_HIGH;
-	else if (!strcmp(irq_type, "level-low"))
-		pdata->irq_type = IRQF_TRIGGER_LOW;
-	else if (!strcmp(irq_type, "edge-rising"))
-		pdata->irq_type = IRQF_TRIGGER_RISING;
-	else if (!strcmp(irq_type, "edge-falling"))
-		pdata->irq_type = IRQF_TRIGGER_FALLING;
-	else
-		dev_warn(&spi->dev, "wrong irq-type specified using edge-rising\n");
-
 	spi->dev.platform_data = pdata;
 done:
 	return pdata;
@@ -1071,7 +1058,7 @@ static int at86rf230_probe(struct spi_device *spi)
 	u8 part = 0, version = 0, status;
 	irq_handler_t irq_handler;
 	work_func_t irq_worker;
-	int rc;
+	int rc, irq_type;
 	const char *chip;
 	struct ieee802154_ops *ops = NULL;
 
@@ -1176,7 +1163,8 @@ static int at86rf230_probe(struct spi_device *spi)
 	dev->extra_tx_headroom = 0;
 	dev->flags = IEEE802154_HW_OMIT_CKSUM | IEEE802154_HW_AACK;
 
-	if (pdata->irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
+	irq_type = irq_get_trigger_type(spi->irq);
+	if (irq_type & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING)) {
 		irq_worker = at86rf230_irqwork;
 		irq_handler = at86rf230_isr;
 	} else {
@@ -1203,7 +1191,7 @@ static int at86rf230_probe(struct spi_device *spi)
 		goto err_hw_init;
 
 	rc = request_irq(spi->irq, irq_handler,
-			 IRQF_SHARED | pdata->irq_type,
+			 IRQF_SHARED | irq_type,
 			 dev_name(&spi->dev), lp);
 	if (rc)
 		goto err_hw_init;
diff --git a/include/linux/spi/at86rf230.h b/include/linux/spi/at86rf230.h
index aa327a8105ad..b2b1afbb3202 100644
--- a/include/linux/spi/at86rf230.h
+++ b/include/linux/spi/at86rf230.h
@@ -26,20 +26,6 @@ struct at86rf230_platform_data {
 	int rstn;
 	int slp_tr;
 	int dig2;
-
-	/* Setting the irq_type will configure the driver to request
-	 * the platform irq trigger type according to the given value
-	 * and configure the interrupt polarity of the device to the
-	 * corresponding polarity.
-	 *
-	 * Allowed values are: IRQF_TRIGGER_RISING, IRQF_TRIGGER_FALLING,
-	 *                     IRQF_TRIGGER_HIGH and IRQF_TRIGGER_LOW
-	 *
-	 * Setting it to 0, the driver does not touch the trigger type
-	 * configuration of the interrupt and sets the interrupt polarity
-	 * of the device to high active (the default value).
-	 */
-	int irq_type;
 };
 
 #endif
-- 
cgit 


From e2dcdfe95c0bd67e37db6057edd9c4ee1f1c7b17 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Apr 2014 11:15:08 +0930
Subject: virtio: virtio_break_device() to mark all virtqueues broken.

Good for post-apocalyptic scenarios, like S/390 hotplug.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 15 +++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 1e443629f76d..4d08f45a9c29 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -865,4 +865,19 @@ bool virtqueue_is_broken(struct virtqueue *_vq)
 }
 EXPORT_SYMBOL_GPL(virtqueue_is_broken);
 
+/*
+ * This should prevent the device from being used, allowing drivers to
+ * recover.  You may need to grab appropriate locks to flush.
+ */
+void virtio_break_device(struct virtio_device *dev)
+{
+	struct virtqueue *_vq;
+
+	list_for_each_entry(_vq, &dev->vqs, list) {
+		struct vring_virtqueue *vq = to_vvq(_vq);
+		vq->broken = true;
+	}
+}
+EXPORT_SYMBOL_GPL(virtio_break_device);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index e4abb84199be..b46671e28de2 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -106,6 +106,8 @@ static inline struct virtio_device *dev_to_virtio(struct device *_dev)
 int register_virtio_device(struct virtio_device *dev);
 void unregister_virtio_device(struct virtio_device *dev);
 
+void virtio_break_device(struct virtio_device *dev);
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
-- 
cgit 


From 51e158c12aca3c9ac63988611a97c05109b14dc9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 28 Apr 2014 11:34:33 +0930
Subject: param: hand arguments after -- straight to init

The kernel passes any args it doesn't need through to init, except it
assumes anything containing '.' belongs to the kernel (for a module).
This change means all users can clearly distinguish which arguments
are for init.

For example, the kernel uses debug ("dee-bug") to mean log everything to
the console, where systemd uses the debug from the Scandinavian "day-boog"
meaning "fail to boot".  If a future versions uses argv[] instead of
reading /proc/cmdline, this confusion will be avoided.

eg: test 'FOO="this is --foo"' -- 'systemd.debug="true true true"'

Gives:
argv[0] = '/debug-init'
argv[1] = 'test'
argv[2] = 'systemd.debug=true true true'
envp[0] = 'HOME=/'
envp[1] = 'TERM=linux'
envp[2] = 'FOO=this is --foo'

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h |  2 +-
 init/main.c                 | 33 +++++++++++++++++++++++++++++----
 kernel/module.c             | 12 +++++++++---
 kernel/params.c             | 25 ++++++++++++++-----------
 4 files changed, 53 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 204a67743804..b1990c5524e1 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -321,7 +321,7 @@ extern bool parameq(const char *name1, const char *name2);
 extern bool parameqn(const char *name1, const char *name2, size_t n);
 
 /* Called on module insert or kernel boot */
-extern int parse_args(const char *name,
+extern char *parse_args(const char *name,
 		      char *args,
 		      const struct kernel_param *params,
 		      unsigned num,
diff --git a/init/main.c b/init/main.c
index 9c7fd4c9249f..e9d458b5d77b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -252,6 +252,27 @@ static int __init repair_env_string(char *param, char *val, const char *unused)
 	return 0;
 }
 
+/* Anything after -- gets handed straight to init. */
+static int __init set_init_arg(char *param, char *val, const char *unused)
+{
+	unsigned int i;
+
+	if (panic_later)
+		return 0;
+
+	repair_env_string(param, val, unused);
+
+	for (i = 0; argv_init[i]; i++) {
+		if (i == MAX_INIT_ARGS) {
+			panic_later = "init";
+			panic_param = param;
+			return 0;
+		}
+	}
+	argv_init[i] = param;
+	return 0;
+}
+
 /*
  * Unknown boot options get handed to init, unless they look like
  * unused parameters (modprobe will find them in /proc/cmdline).
@@ -478,7 +499,7 @@ static void __init mm_init(void)
 
 asmlinkage void __init start_kernel(void)
 {
-	char * command_line;
+	char * command_line, *after_dashes;
 	extern const struct kernel_param __start___param[], __stop___param[];
 
 	/*
@@ -519,9 +540,13 @@ asmlinkage void __init start_kernel(void)
 
 	pr_notice("Kernel command line: %s\n", boot_command_line);
 	parse_early_param();
-	parse_args("Booting kernel", static_command_line, __start___param,
-		   __stop___param - __start___param,
-		   -1, -1, &unknown_bootoption);
+	after_dashes = parse_args("Booting kernel",
+				  static_command_line, __start___param,
+				  __stop___param - __start___param,
+				  -1, -1, &unknown_bootoption);
+	if (after_dashes)
+		parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
+			   set_init_arg);
 
 	jump_label_init();
 
diff --git a/kernel/module.c b/kernel/module.c
index 11869408f79b..66e4e0d260a9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3193,6 +3193,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
 {
 	struct module *mod;
 	long err;
+	char *after_dashes;
 
 	err = module_sig_check(info);
 	if (err)
@@ -3277,10 +3278,15 @@ static int load_module(struct load_info *info, const char __user *uargs,
 		goto ddebug_cleanup;
 
 	/* Module is ready to execute: parsing args may do that. */
-	err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-			 -32768, 32767, unknown_module_param_cb);
-	if (err < 0)
+	after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
+				  -32768, 32767, unknown_module_param_cb);
+	if (IS_ERR(after_dashes)) {
+		err = PTR_ERR(after_dashes);
 		goto bug_cleanup;
+	} else if (after_dashes) {
+		pr_warn("%s: parameters '%s' after `--' ignored\n",
+		       mod->name, after_dashes);
+	}
 
 	/* Link in to syfs. */
 	err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
diff --git a/kernel/params.c b/kernel/params.c
index b00142e7f3ba..1e52ca233fd9 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -177,13 +177,13 @@ static char *next_arg(char *args, char **param, char **val)
 }
 
 /* Args looks like "foo=bar,bar2 baz=fuz wiz". */
-int parse_args(const char *doing,
-	       char *args,
-	       const struct kernel_param *params,
-	       unsigned num,
-	       s16 min_level,
-	       s16 max_level,
-	       int (*unknown)(char *param, char *val, const char *doing))
+char *parse_args(const char *doing,
+		 char *args,
+		 const struct kernel_param *params,
+		 unsigned num,
+		 s16 min_level,
+		 s16 max_level,
+		 int (*unknown)(char *param, char *val, const char *doing))
 {
 	char *param, *val;
 
@@ -198,6 +198,9 @@ int parse_args(const char *doing,
 		int irq_was_disabled;
 
 		args = next_arg(args, &param, &val);
+		/* Stop at -- */
+		if (!val && strcmp(param, "--") == 0)
+			return args;
 		irq_was_disabled = irqs_disabled();
 		ret = parse_one(param, val, doing, params, num,
 				min_level, max_level, unknown);
@@ -208,22 +211,22 @@ int parse_args(const char *doing,
 		switch (ret) {
 		case -ENOENT:
 			pr_err("%s: Unknown parameter `%s'\n", doing, param);
-			return ret;
+			return ERR_PTR(ret);
 		case -ENOSPC:
 			pr_err("%s: `%s' too large for parameter `%s'\n",
 			       doing, val ?: "", param);
-			return ret;
+			return ERR_PTR(ret);
 		case 0:
 			break;
 		default:
 			pr_err("%s: `%s' invalid for parameter `%s'\n",
 			       doing, val ?: "", param);
-			return ret;
+			return ERR_PTR(ret);
 		}
 	}
 
 	/* All parsed OK. */
-	return 0;
+	return NULL;
 }
 
 /* Lazy bastard, eh? */
-- 
cgit 


From 1c8732bb0355b929b09173464cdca7df4d516f89 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 9 Apr 2014 13:34:39 +0200
Subject: gpio: support threaded interrupts in irqchip helpers

Some off-chip GPIO expanders need to be communicated by I2C or
SPI traffic, but may still support IRQs. By the sleeping nature
of such buses, such IRQ handlers need to be threaded. Support
such handlers in the gpiochip irqchip helpers by flagging IRQs
as threaded if the .can_sleep property of the gpiochip is
true.

Helpfully deny registration of chained IRQ handlers if the
.can_sleep property is set, as such chips will invariably need
a nested handler rather than a chained handler.

Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 12 ++++++++++++
 include/linux/gpio/driver.h |  5 ++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index f48817d97480..c12fe9dfd2db 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1363,6 +1363,11 @@ void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip,
 				  int parent_irq,
 				  irq_flow_handler_t parent_handler)
 {
+	if (gpiochip->can_sleep) {
+		chip_err(gpiochip, "you cannot have chained interrupts on a chip that may sleep\n");
+		return;
+	}
+
 	irq_set_chained_handler(parent_irq, parent_handler);
 	/*
 	 * The parent irqchip is already using the chip_data for this
@@ -1389,6 +1394,9 @@ static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 
 	irq_set_chip_data(irq, chip);
 	irq_set_chip_and_handler(irq, chip->irqchip, chip->irq_handler);
+	/* Chips that can sleep need nested thread handlers */
+	if (chip->can_sleep)
+		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 	set_irq_flags(irq, IRQF_VALID);
 #else
@@ -1401,9 +1409,13 @@ static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 
 static void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq)
 {
+	struct gpio_chip *chip = d->host_data;
+
 #ifdef CONFIG_ARM
 	set_irq_flags(irq, 0);
 #endif
+	if (chip->can_sleep)
+		irq_set_nested_thread(irq, 0);
 	irq_set_chip_and_handler(irq, NULL, NULL);
 	irq_set_chip_data(irq, NULL);
 }
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 1827b43966d9..573e4f3243d0 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -51,7 +51,10 @@ struct seq_file;
  *      format specifier for an unsigned int.  It is substituted by the actual
  *      number of the gpio.
  * @can_sleep: flag must be set iff get()/set() methods sleep, as they
- *	must while accessing GPIO expander chips over I2C or SPI
+ *	must while accessing GPIO expander chips over I2C or SPI. This
+ *	implies that if the chip supports IRQs, these IRQs need to be threaded
+ *	as the chip access may sleep when e.g. reading out the IRQ status
+ *	registers.
  * @exported: flags if the gpiochip is exported for use from sysfs. Private.
  *
  * A gpio_chip can help platforms abstract various sources of GPIOs so
-- 
cgit 


From 5c81f2078b7be63be49916128cc86bc17be7f348 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 9 Apr 2014 12:50:40 +0200
Subject: gpio: tc3589x: get rid of static IRQ base

The static IRQ base is not used on any platforms with this chip
(only Ux500). Get rid of it forever, and rely on dynamic IRQ
descriptor allocation.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-tc3589x.c | 10 +---------
 include/linux/mfd/tc3589x.h |  1 -
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index 113e50cb1f59..4b0d8ccbe561 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -32,9 +32,6 @@ struct tc3589x_gpio {
 	struct device *dev;
 	struct mutex irq_lock;
 	struct irq_domain *domain;
-
-	int irq_base;
-
 	/* Caches of interrupt control registers for bus_lock */
 	u8 regs[CACHE_NR_REGS][CACHE_NR_BANKS];
 	u8 oldregs[CACHE_NR_REGS][CACHE_NR_BANKS];
@@ -290,8 +287,6 @@ static struct irq_domain_ops tc3589x_irq_ops = {
 static int tc3589x_gpio_irq_init(struct tc3589x_gpio *tc3589x_gpio,
 				struct device_node *np)
 {
-	int base = tc3589x_gpio->irq_base;
-
 	/*
 	 * If this results in a linear domain, irq_create_mapping() will
 	 * take care of allocating IRQ descriptors at runtime. When a base
@@ -299,7 +294,7 @@ static int tc3589x_gpio_irq_init(struct tc3589x_gpio *tc3589x_gpio,
 	 * domain is instantiated.
 	 */
 	tc3589x_gpio->domain = irq_domain_add_simple(np,
-			tc3589x_gpio->chip.ngpio, base, &tc3589x_irq_ops,
+			tc3589x_gpio->chip.ngpio, 0, &tc3589x_irq_ops,
 			tc3589x_gpio);
 	if (!tc3589x_gpio->domain) {
 		dev_err(tc3589x_gpio->dev, "Failed to create irqdomain\n");
@@ -348,9 +343,6 @@ static int tc3589x_gpio_probe(struct platform_device *pdev)
 	tc3589x_gpio->chip.of_node = np;
 #endif
 
-	tc3589x_gpio->irq_base = tc3589x->irq_base ?
-		tc3589x->irq_base + TC3589x_INT_GPIO(0) : 0;
-
 	/* Bring the GPIO module out of reset */
 	ret = tc3589x_set_bits(tc3589x, TC3589x_RSTCTRL,
 			       TC3589x_RSTCTRL_GPIRST, 0);
diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h
index 6b8e1ff4672b..e6088c2e2092 100644
--- a/include/linux/mfd/tc3589x.h
+++ b/include/linux/mfd/tc3589x.h
@@ -111,7 +111,6 @@ enum tx3589x_block {
 #define TC3589x_INT_PORIRQ	7
 
 #define TC3589x_NR_INTERNAL_IRQS	8
-#define TC3589x_INT_GPIO(x)	(TC3589x_NR_INTERNAL_IRQS + (x))
 
 struct tc3589x {
 	struct mutex lock;
-- 
cgit 


From a9af65223b41cec60cd44fa95a93d10149deb143 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 24 Apr 2014 19:46:49 +0900
Subject: extcon: Add extcon_dev_allocate/free() to control the memory of
 extcon device

This patch add APIs to control the extcon device on extcon provider driver.
The extcon_dev_allocate() allocates the memory of extcon device and initializes
supported cables. And then extcon_dev_free() decrement the reference of the
device of extcon device and free the memory of the extcon device. This APIs
must need to implement devm_extcon_dev_allocate()/free() APIs.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 drivers/extcon/extcon-class.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/extcon.h        | 13 +++++++++++++
 2 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c
index f6df68989651..654ed52e17c2 100644
--- a/drivers/extcon/extcon-class.c
+++ b/drivers/extcon/extcon-class.c
@@ -565,6 +565,42 @@ static void dummy_sysfs_dev_release(struct device *dev)
 {
 }
 
+/*
+ * extcon_dev_allocate() - Allocate the memory of extcon device.
+ * @supported_cable:	Array of supported cable names ending with NULL.
+ *			If supported_cable is NULL, cable name related APIs
+ *			are disabled.
+ *
+ * This function allocates the memory for extcon device without allocating
+ * memory in each extcon provider driver and initialize default setting for
+ * extcon device.
+ *
+ * Return the pointer of extcon device if success or ERR_PTR(err) if fail
+ */
+struct extcon_dev *extcon_dev_allocate(const char **supported_cable)
+{
+	struct extcon_dev *edev;
+
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return ERR_PTR(-ENOMEM);
+
+	edev->max_supported = 0;
+	edev->supported_cable = supported_cable;
+
+	return edev;
+}
+
+/*
+ * extcon_dev_free() - Free the memory of extcon device.
+ * @edev:	the extcon device to free
+ */
+void extcon_dev_free(struct extcon_dev *edev)
+{
+	kfree(edev);
+}
+EXPORT_SYMBOL_GPL(extcon_dev_free);
+
 /**
  * extcon_dev_register() - Register a new extcon device
  * @edev	: the new extcon device (should be allocated before calling)
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 548447be2d8f..15361a2f2f19 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -191,6 +191,12 @@ extern void devm_extcon_dev_unregister(struct device *dev,
 				       struct extcon_dev *edev);
 extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name);
 
+/*
+ * Following APIs control the memory of extcon device.
+ */
+extern struct extcon_dev *extcon_dev_allocate(const char **cables);
+extern void extcon_dev_free(struct extcon_dev *edev);
+
 /*
  * get/set/update_state access the 32b encoded state value, which represents
  * states of all possible cables of the multistate port. For example, if one
@@ -267,6 +273,13 @@ static inline int devm_extcon_dev_register(struct device *dev,
 static inline void devm_extcon_dev_unregister(struct device *dev,
 					      struct extcon_dev *edev) { }
 
+static inline struct extcon_dev *extcon_dev_allocate(const char **cables)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void extcon_dev_free(struct extcon_dev *edev) { }
+
 static inline u32 extcon_get_state(struct extcon_dev *edev)
 {
 	return 0;
-- 
cgit 


From 739ba1bfdb15e773999aafddbd6c59b5737797a0 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 24 Apr 2014 20:12:15 +0900
Subject: extcon: Add devm_extcon_dev_allocate/free to manage the resource of
 extcon device

This patch add device managed devm_extcon_dev_{allocate,free} to automatically
free the memory of extcon_dev structure without handling free operation.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
---
 drivers/extcon/extcon-class.c | 70 +++++++++++++++++++++++++++++++++++--------
 include/linux/extcon.h        | 11 +++++++
 2 files changed, 69 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-class.c b/drivers/extcon/extcon-class.c
index 654ed52e17c2..18d42c0e4581 100644
--- a/drivers/extcon/extcon-class.c
+++ b/drivers/extcon/extcon-class.c
@@ -601,6 +601,64 @@ void extcon_dev_free(struct extcon_dev *edev)
 }
 EXPORT_SYMBOL_GPL(extcon_dev_free);
 
+static int devm_extcon_dev_match(struct device *dev, void *res, void *data)
+{
+	struct extcon_dev **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+static void devm_extcon_dev_release(struct device *dev, void *res)
+{
+	extcon_dev_free(*(struct extcon_dev **)res);
+}
+
+/**
+ * devm_extcon_dev_allocate - Allocate managed extcon device
+ * @dev:		device owning the extcon device being created
+ * @supported_cable:	Array of supported cable names ending with NULL.
+ *			If supported_cable is NULL, cable name related APIs
+ *			are disabled.
+ *
+ * This function manages automatically the memory of extcon device using device
+ * resource management and simplify the control of freeing the memory of extcon
+ * device.
+ *
+ * Returns the pointer memory of allocated extcon_dev if success
+ * or ERR_PTR(err) if fail
+ */
+struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
+					    const char **supported_cable)
+{
+	struct extcon_dev **ptr, *edev;
+
+	ptr = devres_alloc(devm_extcon_dev_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	edev = extcon_dev_allocate(supported_cable);
+	if (IS_ERR(edev)) {
+		devres_free(ptr);
+		return edev;
+	}
+
+	*ptr = edev;
+	devres_add(dev, ptr);
+
+	return edev;
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_allocate);
+
+void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev)
+{
+	WARN_ON(devres_release(dev, devm_extcon_dev_release,
+			       devm_extcon_dev_match, edev));
+}
+EXPORT_SYMBOL_GPL(devm_extcon_dev_free);
+
 /**
  * extcon_dev_register() - Register a new extcon device
  * @edev	: the new extcon device (should be allocated before calling)
@@ -860,18 +918,6 @@ static void devm_extcon_dev_unreg(struct device *dev, void *res)
 	extcon_dev_unregister(*(struct extcon_dev **)res);
 }
 
-static int devm_extcon_dev_match(struct device *dev, void *res, void *data)
-{
-	struct extcon_dev **r = res;
-
-	if (!r || !*r) {
-		WARN_ON(!r || !*r);
-		return 0;
-	}
-
-	return *r == data;
-}
-
 /**
  * devm_extcon_dev_register() - Resource-managed extcon_dev_register()
  * @dev:	device to allocate extcon device
diff --git a/include/linux/extcon.h b/include/linux/extcon.h
index 15361a2f2f19..36f49c405dfb 100644
--- a/include/linux/extcon.h
+++ b/include/linux/extcon.h
@@ -196,6 +196,9 @@ extern struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name);
  */
 extern struct extcon_dev *extcon_dev_allocate(const char **cables);
 extern void extcon_dev_free(struct extcon_dev *edev);
+extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
+						   const char **cables);
+extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev);
 
 /*
  * get/set/update_state access the 32b encoded state value, which represents
@@ -280,6 +283,14 @@ static inline struct extcon_dev *extcon_dev_allocate(const char **cables)
 
 static inline void extcon_dev_free(struct extcon_dev *edev) { }
 
+static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev,
+							  const char **cables)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void devm_extcon_dev_free(struct extcon_dev *edev) { }
+
 static inline u32 extcon_get_state(struct extcon_dev *edev)
 {
 	return 0;
-- 
cgit 


From 3f79a3fb5f41e8f2229e5bf8aa725eaa79686f14 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Mon, 21 Apr 2014 20:44:53 +0900
Subject: extcon: palmas: Use devm_extcon_dev_allocate for extcon_dev

This patch use devm_extcon_dev_allocate() to simplify the memory control
of extcon device.

Cc: Graeme Gregory <gg@slimlogic.co.uk>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Felipe Balbi <balbi@ti.com>
Tested-by: Felipe Balbi <balbi@ti.com>
---
 drivers/extcon/extcon-palmas.c | 35 ++++++++++++++++++++---------------
 include/linux/mfd/palmas.h     |  2 +-
 2 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index 1a770e0ee9ae..7417ce84eb2d 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -57,7 +57,7 @@ static irqreturn_t palmas_vbus_irq_handler(int irq, void *_palmas_usb)
 	if (vbus_line_state & PALMAS_INT3_LINE_STATE_VBUS) {
 		if (palmas_usb->linkstat != PALMAS_USB_STATE_VBUS) {
 			palmas_usb->linkstat = PALMAS_USB_STATE_VBUS;
-			extcon_set_cable_state(&palmas_usb->edev, "USB", true);
+			extcon_set_cable_state(palmas_usb->edev, "USB", true);
 			dev_info(palmas_usb->dev, "USB cable is attached\n");
 		} else {
 			dev_dbg(palmas_usb->dev,
@@ -66,7 +66,7 @@ static irqreturn_t palmas_vbus_irq_handler(int irq, void *_palmas_usb)
 	} else if (!(vbus_line_state & PALMAS_INT3_LINE_STATE_VBUS)) {
 		if (palmas_usb->linkstat == PALMAS_USB_STATE_VBUS) {
 			palmas_usb->linkstat = PALMAS_USB_STATE_DISCONNECT;
-			extcon_set_cable_state(&palmas_usb->edev, "USB", false);
+			extcon_set_cable_state(palmas_usb->edev, "USB", false);
 			dev_info(palmas_usb->dev, "USB cable is detached\n");
 		} else {
 			dev_dbg(palmas_usb->dev,
@@ -93,7 +93,7 @@ static irqreturn_t palmas_id_irq_handler(int irq, void *_palmas_usb)
 			PALMAS_USB_ID_INT_LATCH_CLR,
 			PALMAS_USB_ID_INT_EN_HI_CLR_ID_GND);
 		palmas_usb->linkstat = PALMAS_USB_STATE_ID;
-		extcon_set_cable_state(&palmas_usb->edev, "USB-HOST", true);
+		extcon_set_cable_state(palmas_usb->edev, "USB-HOST", true);
 		dev_info(palmas_usb->dev, "USB-HOST cable is attached\n");
 	} else if ((set & PALMAS_USB_ID_INT_SRC_ID_FLOAT) &&
 				(id_src & PALMAS_USB_ID_INT_SRC_ID_FLOAT)) {
@@ -101,17 +101,17 @@ static irqreturn_t palmas_id_irq_handler(int irq, void *_palmas_usb)
 			PALMAS_USB_ID_INT_LATCH_CLR,
 			PALMAS_USB_ID_INT_EN_HI_CLR_ID_FLOAT);
 		palmas_usb->linkstat = PALMAS_USB_STATE_DISCONNECT;
-		extcon_set_cable_state(&palmas_usb->edev, "USB-HOST", false);
+		extcon_set_cable_state(palmas_usb->edev, "USB-HOST", false);
 		dev_info(palmas_usb->dev, "USB-HOST cable is detached\n");
 	} else if ((palmas_usb->linkstat == PALMAS_USB_STATE_ID) &&
 				(!(set & PALMAS_USB_ID_INT_SRC_ID_GND))) {
 		palmas_usb->linkstat = PALMAS_USB_STATE_DISCONNECT;
-		extcon_set_cable_state(&palmas_usb->edev, "USB-HOST", false);
+		extcon_set_cable_state(palmas_usb->edev, "USB-HOST", false);
 		dev_info(palmas_usb->dev, "USB-HOST cable is detached\n");
 	} else if ((palmas_usb->linkstat == PALMAS_USB_STATE_DISCONNECT) &&
 				(id_src & PALMAS_USB_ID_INT_SRC_ID_GND)) {
 		palmas_usb->linkstat = PALMAS_USB_STATE_ID;
-		extcon_set_cable_state(&palmas_usb->edev, "USB-HOST", true);
+		extcon_set_cable_state(palmas_usb->edev, "USB-HOST", true);
 		dev_info(palmas_usb->dev, " USB-HOST cable is attached\n");
 	}
 
@@ -187,15 +187,20 @@ static int palmas_usb_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, palmas_usb);
 
-	palmas_usb->edev.supported_cable = palmas_extcon_cable;
-	palmas_usb->edev.dev.parent = palmas_usb->dev;
-	palmas_usb->edev.name = kstrdup(node->name, GFP_KERNEL);
-	palmas_usb->edev.mutually_exclusive = mutually_exclusive;
+	palmas_usb->edev = devm_extcon_dev_allocate(&pdev->dev,
+						    palmas_extcon_cable);
+	if (IS_ERR(palmas_usb->edev)) {
+		dev_err(&pdev->dev, "failed to allocate extcon device\n");
+		return -ENOMEM;
+	}
+	palmas_usb->edev->name = kstrdup(node->name, GFP_KERNEL);
+	palmas_usb->edev->dev.parent = palmas_usb->dev;
+	palmas_usb->edev->mutually_exclusive = mutually_exclusive;
 
-	status = devm_extcon_dev_register(&pdev->dev, &palmas_usb->edev);
+	status = devm_extcon_dev_register(&pdev->dev, palmas_usb->edev);
 	if (status) {
 		dev_err(&pdev->dev, "failed to register extcon device\n");
-		kfree(palmas_usb->edev.name);
+		kfree(palmas_usb->edev->name);
 		return status;
 	}
 
@@ -209,7 +214,7 @@ static int palmas_usb_probe(struct platform_device *pdev)
 		if (status < 0) {
 			dev_err(&pdev->dev, "can't get IRQ %d, err %d\n",
 					palmas_usb->id_irq, status);
-			kfree(palmas_usb->edev.name);
+			kfree(palmas_usb->edev->name);
 			return status;
 		}
 	}
@@ -224,7 +229,7 @@ static int palmas_usb_probe(struct platform_device *pdev)
 		if (status < 0) {
 			dev_err(&pdev->dev, "can't get IRQ %d, err %d\n",
 					palmas_usb->vbus_irq, status);
-			kfree(palmas_usb->edev.name);
+			kfree(palmas_usb->edev->name);
 			return status;
 		}
 	}
@@ -238,7 +243,7 @@ static int palmas_usb_remove(struct platform_device *pdev)
 {
 	struct palmas_usb *palmas_usb = platform_get_drvdata(pdev);
 
-	kfree(palmas_usb->edev.name);
+	kfree(palmas_usb->edev->name);
 
 	return 0;
 }
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 9974e387e483..b8f87b704409 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -415,7 +415,7 @@ struct palmas_usb {
 	struct palmas *palmas;
 	struct device *dev;
 
-	struct extcon_dev edev;
+	struct extcon_dev *edev;
 
 	int id_otg_irq;
 	int id_irq;
-- 
cgit 


From 8ad357551797b1edc184fb9f6a4f80a6fa626459 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Fri, 14 Mar 2014 11:00:21 +0100
Subject: KVM: s390: enable IBS for single running VCPUs

This patch enables the IBS facility when a single VCPU is running.
The facility is dynamically turned on/off as soon as other VCPUs
enter/leave the stopped state.

When this facility is operating, some instructions can be executed
faster for single-cpu guests.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |   2 +
 arch/s390/kvm/kvm-s390.c         | 123 ++++++++++++++++++++++++++++++++++++++-
 arch/s390/kvm/trace-s390.h       |  22 +++++++
 include/linux/kvm_host.h         |   2 +
 4 files changed, 147 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 0d45f6fe734f..f0a1dc5e5d1f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -72,6 +72,7 @@ struct sca_block {
 #define CPUSTAT_ZARCH      0x00000800
 #define CPUSTAT_MCDS       0x00000100
 #define CPUSTAT_SM         0x00000080
+#define CPUSTAT_IBS        0x00000040
 #define CPUSTAT_G          0x00000008
 #define CPUSTAT_GED        0x00000004
 #define CPUSTAT_J          0x00000002
@@ -411,6 +412,7 @@ struct kvm_arch{
 	int use_cmma;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
+	spinlock_t start_stop_lock;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6c972d229ace..0a01744cbdd9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -458,6 +458,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.css_support = 0;
 	kvm->arch.use_irqchip = 0;
 
+	spin_lock_init(&kvm->arch.start_stop_lock);
+
 	return 0;
 out_nogmap:
 	debug_unregister(kvm->arch.dbf);
@@ -996,8 +998,15 @@ bool kvm_s390_cmma_enabled(struct kvm *kvm)
 	return true;
 }
 
+static bool ibs_enabled(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
+}
+
 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 {
+retry:
+	s390_vcpu_unblock(vcpu);
 	/*
 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
 	 * guest prefix page. gmap_ipte_notify will wait on the ptl lock.
@@ -1005,15 +1014,34 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
 	 * already finished. We might race against a second unmapper that
 	 * wants to set the blocking bit. Lets just retry the request loop.
 	 */
-	while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
+	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
 		int rc;
 		rc = gmap_ipte_notify(vcpu->arch.gmap,
 				      vcpu->arch.sie_block->prefix,
 				      PAGE_SIZE * 2);
 		if (rc)
 			return rc;
-		s390_vcpu_unblock(vcpu);
+		goto retry;
+	}
+
+	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
+		if (!ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
+			atomic_set_mask(CPUSTAT_IBS,
+					&vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
 	}
+
+	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
+		if (ibs_enabled(vcpu)) {
+			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
+			atomic_clear_mask(CPUSTAT_IBS,
+					  &vcpu->arch.sie_block->cpuflags);
+		}
+		goto retry;
+	}
+
 	return 0;
 }
 
@@ -1362,16 +1390,107 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
+}
+
+static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_DISABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
+static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		__disable_ibs_on_vcpu(vcpu);
+	}
+}
+
+static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
+{
+	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
+	kvm_make_request(KVM_REQ_ENABLE_IBS, vcpu);
+	exit_sie_sync(vcpu);
+}
+
 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
 {
+	int i, online_vcpus, started_vcpus = 0;
+
+	if (!is_vcpu_stopped(vcpu))
+		return;
+
 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
+			started_vcpus++;
+	}
+
+	if (started_vcpus == 0) {
+		/* we're the only active VCPU -> speed it up */
+		__enable_ibs_on_vcpu(vcpu);
+	} else if (started_vcpus == 1) {
+		/*
+		 * As we are starting a second VCPU, we have to disable
+		 * the IBS facility on all VCPUs to remove potentially
+		 * oustanding ENABLE requests.
+		 */
+		__disable_ibs_on_all_vcpus(vcpu->kvm);
+	}
+
 	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	/*
+	 * Another VCPU might have used IBS while we were offline.
+	 * Let's play safe and flush the VCPU at startup.
+	 */
+	vcpu->arch.sie_block->ihcpu  = 0xffff;
+	spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+	return;
 }
 
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
 {
+	int i, online_vcpus, started_vcpus = 0;
+	struct kvm_vcpu *started_vcpu = NULL;
+
+	if (is_vcpu_stopped(vcpu))
+		return;
+
 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
+	/* Only one cpu at a time may enter/leave the STOPPED state. */
+	spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+
 	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+	__disable_ibs_on_vcpu(vcpu);
+
+	for (i = 0; i < online_vcpus; i++) {
+		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
+			started_vcpus++;
+			started_vcpu = vcpu->kvm->vcpus[i];
+		}
+	}
+
+	if (started_vcpus == 1) {
+		/*
+		 * As we only have one VCPU left, we want to enable the
+		 * IBS facility for that VCPU to speed it up.
+		 */
+		__enable_ibs_on_vcpu(started_vcpu);
+	}
+
+	spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+	return;
 }
 
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 34d4f8af3a1d..647e9d6a4818 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -244,6 +244,28 @@ TRACE_EVENT(kvm_s390_enable_css,
 		      __entry->kvm)
 	);
 
+/*
+ * Trace point for enabling and disabling interlocking-and-broadcasting
+ * suppression.
+ */
+TRACE_EVENT(kvm_s390_enable_disable_ibs,
+	    TP_PROTO(unsigned int id, int state),
+	    TP_ARGS(id, state),
+
+	    TP_STRUCT__entry(
+		    __field(unsigned int, id)
+		    __field(int, state)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->id = id;
+		    __entry->state = state;
+		    ),
+
+	    TP_printk("%s ibs on cpu %d",
+		      __entry->state ? "enabling" : "disabling", __entry->id)
+	);
+
 
 #endif /* _TRACE_KVMS390_H */
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 820fc2e1d9df..1e125b055327 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -134,6 +134,8 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_EPR_EXIT          20
 #define KVM_REQ_SCAN_IOAPIC       21
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
+#define KVM_REQ_ENABLE_IBS        23
+#define KVM_REQ_DISABLE_IBS       24
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit 


From a235c0916543d8b886405f8871dc644124c7cf78 Mon Sep 17 00:00:00 2001
From: Iulia Manda <iulia.manda21@gmail.com>
Date: Wed, 12 Mar 2014 18:37:24 +0200
Subject: rcu: Remove "extern" from function declaration in
 include/linux/rcupdate.h

Because functions have the extern storage class specifier by default,
this keyword can be removed. It is redundant to use it explicitly.

Signed-off-by: Iulia Manda <iulia.manda21@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 00a7fd61b3c6..fdc422f3d61d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -328,7 +328,7 @@ extern struct lockdep_map rcu_lock_map;
 extern struct lockdep_map rcu_bh_lock_map;
 extern struct lockdep_map rcu_sched_lock_map;
 extern struct lockdep_map rcu_callback_map;
-extern int debug_lockdep_rcu_enabled(void);
+int debug_lockdep_rcu_enabled(void);
 
 /**
  * rcu_read_lock_held() - might we be in RCU read-side critical section?
-- 
cgit 


From 71a9b26963f8c2d0df6f782e2b29ccefc22d4fba Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 31 Mar 2014 13:13:02 -0700
Subject: rcu: Document RCU_INIT_POINTER()'s lack of ordering guarantees

Although rcu_assign_pointer() provides ordering guarantees,
RCU_INIT_POINTER() does not.  This commit makes that explicit
in the docbook comment header.

Reported-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index fdc422f3d61d..3c5ef02ea580 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -949,6 +949,9 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  * pointers, but you must use rcu_assign_pointer() to initialize the
  * external-to-structure pointer -after- you have completely initialized
  * the reader-accessible portions of the linked structure.
+ *
+ * Note that unlike rcu_assign_pointer(), RCU_INIT_POINTER() provides no
+ * ordering guarantees for either the CPU or the compiler.
  */
 #define RCU_INIT_POINTER(p, v) \
 	do { \
-- 
cgit 


From 683399eddb9fff742b1a14c5a5d03e12bfc0afff Mon Sep 17 00:00:00 2001
From: Mathieu Poirier <mathieu.poirier@linaro.org>
Date: Sun, 20 Apr 2014 18:57:36 -0600
Subject: netfilter: nfnetlink_acct: Adding quota support to accounting
 framework

nfacct objects already support accounting at the byte and packet
level.  As such it is a natural extension to add the possiblity to
define a ceiling limit for both metrics.

All the support for quotas itself is added to nfnetlink acctounting
framework to stay coherent with current accounting object management.
Quota limit checks are implemented in xt_nfacct filter where
statistic collection is already done.

Pablo Neira Ayuso has also contributed to this feature.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_acct.h      |  8 ++-
 include/uapi/linux/netfilter/nfnetlink.h      |  2 +
 include/uapi/linux/netfilter/nfnetlink_acct.h |  9 +++
 net/netfilter/nfnetlink_acct.c                | 85 +++++++++++++++++++++++++++
 net/netfilter/xt_nfacct.c                     |  5 +-
 5 files changed, 107 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
index b2e85e59f760..6ec975748742 100644
--- a/include/linux/netfilter/nfnetlink_acct.h
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -3,11 +3,17 @@
 
 #include <uapi/linux/netfilter/nfnetlink_acct.h>
 
+enum {
+	NFACCT_NO_QUOTA		= -1,
+	NFACCT_UNDERQUOTA,
+	NFACCT_OVERQUOTA,
+};
 
 struct nf_acct;
 
 struct nf_acct *nfnl_acct_find_get(const char *filter_name);
 void nfnl_acct_put(struct nf_acct *acct);
 void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
-
+extern int nfnl_acct_overquota(const struct sk_buff *skb,
+			      struct nf_acct *nfacct);
 #endif /* _NFNL_ACCT_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 596ddd45253c..354a7e5e50f2 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -20,6 +20,8 @@ enum nfnetlink_groups {
 #define NFNLGRP_CONNTRACK_EXP_DESTROY	NFNLGRP_CONNTRACK_EXP_DESTROY
 	NFNLGRP_NFTABLES,
 #define NFNLGRP_NFTABLES                NFNLGRP_NFTABLES
+	NFNLGRP_ACCT_QUOTA,
+#define NFNLGRP_ACCT_QUOTA		NFNLGRP_ACCT_QUOTA
 	__NFNLGRP_MAX,
 };
 #define NFNLGRP_MAX	(__NFNLGRP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h
index c7b6269e760b..51404ec19022 100644
--- a/include/uapi/linux/netfilter/nfnetlink_acct.h
+++ b/include/uapi/linux/netfilter/nfnetlink_acct.h
@@ -10,15 +10,24 @@ enum nfnl_acct_msg_types {
 	NFNL_MSG_ACCT_GET,
 	NFNL_MSG_ACCT_GET_CTRZERO,
 	NFNL_MSG_ACCT_DEL,
+	NFNL_MSG_ACCT_OVERQUOTA,
 	NFNL_MSG_ACCT_MAX
 };
 
+enum nfnl_acct_flags {
+	NFACCT_F_QUOTA_PKTS	= (1 << 0),
+	NFACCT_F_QUOTA_BYTES	= (1 << 1),
+	NFACCT_F_OVERQUOTA	= (1 << 2), /* can't be set from userspace */
+};
+
 enum nfnl_acct_type {
 	NFACCT_UNSPEC,
 	NFACCT_NAME,
 	NFACCT_PKTS,
 	NFACCT_BYTES,
 	NFACCT_USE,
+	NFACCT_FLAGS,
+	NFACCT_QUOTA,
 	__NFACCT_MAX
 };
 #define NFACCT_MAX (__NFACCT_MAX - 1)
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c7b6d466a662..70e86bbb3637 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -32,18 +32,24 @@ static LIST_HEAD(nfnl_acct_list);
 struct nf_acct {
 	atomic64_t		pkts;
 	atomic64_t		bytes;
+	unsigned long		flags;
 	struct list_head	head;
 	atomic_t		refcnt;
 	char			name[NFACCT_NAME_MAX];
 	struct rcu_head		rcu_head;
+	char			data[0];
 };
 
+#define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
+
 static int
 nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
 {
 	struct nf_acct *nfacct, *matching = NULL;
 	char *acct_name;
+	unsigned int size = 0;
+	u32 flags = 0;
 
 	if (!tb[NFACCT_NAME])
 		return -EINVAL;
@@ -68,15 +74,39 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
 			/* reset counters if you request a replacement. */
 			atomic64_set(&matching->pkts, 0);
 			atomic64_set(&matching->bytes, 0);
+			smp_mb__before_clear_bit();
+			/* reset overquota flag if quota is enabled. */
+			if ((matching->flags & NFACCT_F_QUOTA))
+				clear_bit(NFACCT_F_OVERQUOTA, &matching->flags);
 			return 0;
 		}
 		return -EBUSY;
 	}
 
 	nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+	if (tb[NFACCT_FLAGS]) {
+		flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
+		if (flags & ~NFACCT_F_QUOTA)
+			return -EOPNOTSUPP;
+		if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
+			return -EINVAL;
+		if (flags & NFACCT_F_OVERQUOTA)
+			return -EINVAL;
+
+		size += sizeof(u64);
+	}
+
+	nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
 	if (nfacct == NULL)
 		return -ENOMEM;
 
+	if (flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)nfacct->data;
+
+		*quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
+		nfacct->flags = flags;
+	}
+
 	strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
 
 	if (tb[NFACCT_BYTES]) {
@@ -117,6 +147,9 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
 		pkts = atomic64_xchg(&acct->pkts, 0);
 		bytes = atomic64_xchg(&acct->bytes, 0);
+		smp_mb__before_clear_bit();
+		if (acct->flags & NFACCT_F_QUOTA)
+			clear_bit(NFACCT_F_OVERQUOTA, &acct->flags);
 	} else {
 		pkts = atomic64_read(&acct->pkts);
 		bytes = atomic64_read(&acct->bytes);
@@ -125,7 +158,13 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
 	    nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
 	    nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
 		goto nla_put_failure;
+	if (acct->flags & NFACCT_F_QUOTA) {
+		u64 *quota = (u64 *)acct->data;
 
+		if (nla_put_be32(skb, NFACCT_FLAGS, htonl(acct->flags)) ||
+		    nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
+			goto nla_put_failure;
+	}
 	nlmsg_end(skb, nlh);
 	return skb->len;
 
@@ -270,6 +309,8 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
 	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
 	[NFACCT_BYTES] = { .type = NLA_U64 },
 	[NFACCT_PKTS] = { .type = NLA_U64 },
+	[NFACCT_FLAGS] = { .type = NLA_U32 },
+	[NFACCT_QUOTA] = { .type = NLA_U64 },
 };
 
 static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
@@ -336,6 +377,50 @@ void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
 }
 EXPORT_SYMBOL_GPL(nfnl_acct_update);
 
+static void nfnl_overquota_report(struct nf_acct *nfacct)
+{
+	int ret;
+	struct sk_buff *skb;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (skb == NULL)
+		return;
+
+	ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
+				  nfacct);
+	if (ret <= 0) {
+		kfree_skb(skb);
+		return;
+	}
+	netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
+			  GFP_ATOMIC);
+}
+
+int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+	u64 now;
+	u64 *quota;
+	int ret = NFACCT_UNDERQUOTA;
+
+	/* no place here if we don't have a quota */
+	if (!(nfacct->flags & NFACCT_F_QUOTA))
+		return NFACCT_NO_QUOTA;
+
+	quota = (u64 *)nfacct->data;
+	now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
+	       atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
+
+	ret = now > *quota;
+
+	if (now >= *quota &&
+	    !test_and_set_bit(NFACCT_F_OVERQUOTA, &nfacct->flags)) {
+		nfnl_overquota_report(nfacct);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+
 static int __init nfnl_acct_init(void)
 {
 	int ret;
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index b3be0ef21f19..8c646ed9c921 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -21,11 +21,14 @@ MODULE_ALIAS("ip6t_nfacct");
 
 static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
+	int overquota;
 	const struct xt_nfacct_match_info *info = par->targinfo;
 
 	nfnl_acct_update(skb, info->nfacct);
 
-	return true;
+	overquota = nfnl_acct_overquota(skb, info->nfacct);
+
+	return overquota == NFACCT_UNDERQUOTA ? false : true;
 }
 
 static int
-- 
cgit 


From 3046365bb470f0ec2f7cf5cb07a8ee7e4b490103 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 29 Apr 2014 00:51:00 +0100
Subject: devres: introduce API "devm_kmemdup

Introduce devm_kmemdup, which uses resource managed kmalloc.
There are several request from maintainers to add this instead
of using kmemdup.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/driver-model/devres.txt |  1 +
 drivers/base/devres.c                 | 21 +++++++++++++++++++++
 include/linux/device.h                |  2 ++
 3 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 4f7897e99cba..499951873997 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -236,6 +236,7 @@ certainly invest a bit more effort into libata core layer).
 MEM
   devm_kzalloc()
   devm_kfree()
+  devm_kmemdup()
 
 IIO
   devm_iio_device_alloc()
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index db4e264eecb6..d0914cba2413 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -831,3 +831,24 @@ void devm_kfree(struct device *dev, void *p)
 	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
+
+/**
+ * devm_kmemdup - Resource-managed kmemdup
+ * @dev: Device this memory belongs to
+ * @src: Memory region to duplicate
+ * @len: Memory region length
+ * @gfp: GFP mask to use
+ *
+ * Duplicate region of a memory using resource managed kmalloc
+ */
+void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
+{
+	void *p;
+
+	p = devm_kmalloc(dev, len, gfp);
+	if (p)
+		memcpy(p, src, len);
+
+	return p;
+}
+EXPORT_SYMBOL_GPL(devm_kmemdup);
diff --git a/include/linux/device.h b/include/linux/device.h
index d1d1c055b48e..ab871588da89 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -623,6 +623,8 @@ static inline void *devm_kcalloc(struct device *dev,
 }
 extern void devm_kfree(struct device *dev, void *p);
 extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp);
+extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
+			  gfp_t gfp);
 
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 void __iomem *devm_request_and_ioremap(struct device *dev,
-- 
cgit 


From 9fbfb4b37ed23f71aa9484484266381c6c6964cb Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 29 Apr 2014 00:51:00 +0100
Subject: IIO: core: Introduce read_raw_multi

This callback is introduced to overcome some limitations of existing
read_raw callback. The functionality of both existing read_raw and
read_raw_multi is similar, both are used to request values from the
device. The current read_raw callback allows only two return values.
The new read_raw_multi allows returning multiple values. Instead of
passing just address of val and val2, it passes length and pointer
to values. Depending on the type and length of passed buffer, iio
client drivers can return multiple values.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/iio_core.h           |  2 +-
 drivers/iio/industrialio-core.c  | 65 ++++++++++++++++++++++++++--------------
 drivers/iio/industrialio-event.c |  6 ++--
 drivers/iio/inkern.c             | 16 ++++++++--
 include/linux/iio/iio.h          | 17 +++++++++++
 include/linux/iio/types.h        |  1 +
 6 files changed, 80 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h
index f6db6af36ba6..5f0ea77fe717 100644
--- a/drivers/iio/iio_core.h
+++ b/drivers/iio/iio_core.h
@@ -35,7 +35,7 @@ int __iio_add_chan_devattr(const char *postfix,
 			   struct list_head *attr_list);
 void iio_free_chan_devattr_list(struct list_head *attr_list);
 
-ssize_t iio_format_value(char *buf, unsigned int type, int val, int val2);
+ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals);
 
 /* Event interface flags */
 #define IIO_BUSY_BIT_POS 1
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 184444db62ac..59540859bfae 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -373,41 +373,53 @@ EXPORT_SYMBOL_GPL(iio_enum_write);
  * @buf: The buffer to which the formated value gets written
  * @type: One of the IIO_VAL_... constants. This decides how the val and val2
  *        parameters are formatted.
- * @val: First part of the value, exact meaning depends on the type parameter.
- * @val2: Second part of the value, exact meaning depends on the type parameter.
+ * @vals: pointer to the values, exact meaning depends on the type parameter.
  */
-ssize_t iio_format_value(char *buf, unsigned int type, int val, int val2)
+ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals)
 {
 	unsigned long long tmp;
 	bool scale_db = false;
 
 	switch (type) {
 	case IIO_VAL_INT:
-		return sprintf(buf, "%d\n", val);
+		return sprintf(buf, "%d\n", vals[0]);
 	case IIO_VAL_INT_PLUS_MICRO_DB:
 		scale_db = true;
 	case IIO_VAL_INT_PLUS_MICRO:
-		if (val2 < 0)
-			return sprintf(buf, "-%ld.%06u%s\n", abs(val), -val2,
+		if (vals[1] < 0)
+			return sprintf(buf, "-%ld.%06u%s\n", abs(vals[0]),
+					-vals[1],
 				scale_db ? " dB" : "");
 		else
-			return sprintf(buf, "%d.%06u%s\n", val, val2,
+			return sprintf(buf, "%d.%06u%s\n", vals[0], vals[1],
 				scale_db ? " dB" : "");
 	case IIO_VAL_INT_PLUS_NANO:
-		if (val2 < 0)
-			return sprintf(buf, "-%ld.%09u\n", abs(val), -val2);
+		if (vals[1] < 0)
+			return sprintf(buf, "-%ld.%09u\n", abs(vals[0]),
+					-vals[1]);
 		else
-			return sprintf(buf, "%d.%09u\n", val, val2);
+			return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
 	case IIO_VAL_FRACTIONAL:
-		tmp = div_s64((s64)val * 1000000000LL, val2);
-		val2 = do_div(tmp, 1000000000LL);
-		val = tmp;
-		return sprintf(buf, "%d.%09u\n", val, val2);
+		tmp = div_s64((s64)vals[0] * 1000000000LL, vals[1]);
+		vals[1] = do_div(tmp, 1000000000LL);
+		vals[0] = tmp;
+		return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
 	case IIO_VAL_FRACTIONAL_LOG2:
-		tmp = (s64)val * 1000000000LL >> val2;
-		val2 = do_div(tmp, 1000000000LL);
-		val = tmp;
-		return sprintf(buf, "%d.%09u\n", val, val2);
+		tmp = (s64)vals[0] * 1000000000LL >> vals[1];
+		vals[1] = do_div(tmp, 1000000000LL);
+		vals[0] = tmp;
+		return sprintf(buf, "%d.%09u\n", vals[0], vals[1]);
+	case IIO_VAL_INT_MULTIPLE:
+	{
+		int i;
+		int len = 0;
+
+		for (i = 0; i < size; ++i)
+			len += snprintf(&buf[len], PAGE_SIZE - len, "%d ",
+								vals[i]);
+		len += snprintf(&buf[len], PAGE_SIZE - len, "\n");
+		return len;
+	}
 	default:
 		return 0;
 	}
@@ -419,14 +431,23 @@ static ssize_t iio_read_channel_info(struct device *dev,
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
-	int val, val2;
-	int ret = indio_dev->info->read_raw(indio_dev, this_attr->c,
-					    &val, &val2, this_attr->address);
+	int vals[INDIO_MAX_RAW_ELEMENTS];
+	int ret;
+	int val_len = 2;
+
+	if (indio_dev->info->read_raw_multi)
+		ret = indio_dev->info->read_raw_multi(indio_dev, this_attr->c,
+							INDIO_MAX_RAW_ELEMENTS,
+							vals, &val_len,
+							this_attr->address);
+	else
+		ret = indio_dev->info->read_raw(indio_dev, this_attr->c,
+				    &vals[0], &vals[1], this_attr->address);
 
 	if (ret < 0)
 		return ret;
 
-	return iio_format_value(buf, ret, val, val2);
+	return iio_format_value(buf, ret, val_len, vals);
 }
 
 /**
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index dddfb0f90d34..258a973a1fb8 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -270,7 +270,7 @@ static ssize_t iio_ev_value_show(struct device *dev,
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
-	int val, val2;
+	int val, val2, val_arr[2];
 	int ret;
 
 	ret = indio_dev->info->read_event_value(indio_dev,
@@ -279,7 +279,9 @@ static ssize_t iio_ev_value_show(struct device *dev,
 		&val, &val2);
 	if (ret < 0)
 		return ret;
-	return iio_format_value(buf, ret, val, val2);
+	val_arr[0] = val;
+	val_arr[1] = val2;
+	return iio_format_value(buf, ret, 2, val_arr);
 }
 
 static ssize_t iio_ev_value_store(struct device *dev,
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index adeba5a0ecf7..d833d55052ea 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -417,12 +417,24 @@ static int iio_channel_read(struct iio_channel *chan, int *val, int *val2,
 	enum iio_chan_info_enum info)
 {
 	int unused;
+	int vals[INDIO_MAX_RAW_ELEMENTS];
+	int ret;
+	int val_len = 2;
 
 	if (val2 == NULL)
 		val2 = &unused;
 
-	return chan->indio_dev->info->read_raw(chan->indio_dev, chan->channel,
-						val, val2, info);
+	if (chan->indio_dev->info->read_raw_multi) {
+		ret = chan->indio_dev->info->read_raw_multi(chan->indio_dev,
+					chan->channel, INDIO_MAX_RAW_ELEMENTS,
+					vals, &val_len, info);
+		*val = vals[0];
+		*val2 = vals[1];
+	} else
+		ret = chan->indio_dev->info->read_raw(chan->indio_dev,
+					chan->channel, val, val2, info);
+
+	return ret;
 }
 
 int iio_read_channel_raw(struct iio_channel *chan, int *val)
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 5f2d00e7e488..5629c92eeadf 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -288,6 +288,8 @@ static inline s64 iio_get_time_ns(void)
 #define INDIO_ALL_BUFFER_MODES					\
 	(INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE)
 
+#define INDIO_MAX_RAW_ELEMENTS		4
+
 struct iio_trigger; /* forward declaration */
 struct iio_dev;
 
@@ -302,6 +304,14 @@ struct iio_dev;
  *			the channel in question.  Return value will specify the
  *			type of value returned by the device. val and val2 will
  *			contain the elements making up the returned value.
+ * @read_raw_multi:	function to return values from the device.
+ *			mask specifies which value. Note 0 means a reading of
+ *			the channel in question.  Return value will specify the
+ *			type of value returned by the device. vals pointer
+ *			contain the elements making up the returned value.
+ *			max_len specifies maximum number of elements
+ *			vals pointer can contain. val_len is used to return
+ *			length of valid elements in vals.
  * @write_raw:		function to write a value to the device.
  *			Parameters are the same as for read_raw.
  * @write_raw_get_fmt:	callback function to query the expected
@@ -328,6 +338,13 @@ struct iio_info {
 			int *val2,
 			long mask);
 
+	int (*read_raw_multi)(struct iio_dev *indio_dev,
+			struct iio_chan_spec const *chan,
+			int max_len,
+			int *vals,
+			int *val_len,
+			long mask);
+
 	int (*write_raw)(struct iio_dev *indio_dev,
 			 struct iio_chan_spec const *chan,
 			 int val,
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 084d882fe01b..a13c2241abce 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -79,6 +79,7 @@ enum iio_event_direction {
 #define IIO_VAL_INT_PLUS_MICRO 2
 #define IIO_VAL_INT_PLUS_NANO 3
 #define IIO_VAL_INT_PLUS_MICRO_DB 4
+#define IIO_VAL_INT_MULTIPLE 5
 #define IIO_VAL_FRACTIONAL 10
 #define IIO_VAL_FRACTIONAL_LOG2 11
 
-- 
cgit 


From 0ee8546ac01864b6e12e65199142e00db59c9809 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 29 Apr 2014 00:51:00 +0100
Subject: IIO: core: Modify scan element type

The current scan element type uses the following format:
  [be|le]:[s|u]bits/storagebits[>>shift].
To specify multiple elements in this type, added a repeat value.
So new format is:
  [be|le]:[s|u]bits/storagebitsXr[>>shift].
Here r is specifying how may times, real/storage bits are repeating.

When X is value is 0 or 1, then repeat value is not used in the format,
and it will be same as existing format.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-buffer.c | 41 +++++++++++++++++++++++++++++++++------
 include/linux/iio/iio.h           |  7 +++++++
 2 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index e472cff6eeae..36b1ae92e239 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -150,7 +150,16 @@ static ssize_t iio_show_fixed_type(struct device *dev,
 		type = IIO_BE;
 #endif
 	}
-	return sprintf(buf, "%s:%c%d/%d>>%u\n",
+	if (this_attr->c->scan_type.repeat > 1)
+		return sprintf(buf, "%s:%c%d/%dX%d>>%u\n",
+		       iio_endian_prefix[type],
+		       this_attr->c->scan_type.sign,
+		       this_attr->c->scan_type.realbits,
+		       this_attr->c->scan_type.storagebits,
+		       this_attr->c->scan_type.repeat,
+		       this_attr->c->scan_type.shift);
+		else
+			return sprintf(buf, "%s:%c%d/%d>>%u\n",
 		       iio_endian_prefix[type],
 		       this_attr->c->scan_type.sign,
 		       this_attr->c->scan_type.realbits,
@@ -475,14 +484,22 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev,
 	for_each_set_bit(i, mask,
 			 indio_dev->masklength) {
 		ch = iio_find_channel_from_si(indio_dev, i);
-		length = ch->scan_type.storagebits / 8;
+		if (ch->scan_type.repeat > 1)
+			length = ch->scan_type.storagebits / 8 *
+				ch->scan_type.repeat;
+		else
+			length = ch->scan_type.storagebits / 8;
 		bytes = ALIGN(bytes, length);
 		bytes += length;
 	}
 	if (timestamp) {
 		ch = iio_find_channel_from_si(indio_dev,
 					      indio_dev->scan_index_timestamp);
-		length = ch->scan_type.storagebits / 8;
+		if (ch->scan_type.repeat > 1)
+			length = ch->scan_type.storagebits / 8 *
+				ch->scan_type.repeat;
+		else
+			length = ch->scan_type.storagebits / 8;
 		bytes = ALIGN(bytes, length);
 		bytes += length;
 	}
@@ -959,7 +976,11 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev,
 					       indio_dev->masklength,
 					       in_ind + 1);
 			ch = iio_find_channel_from_si(indio_dev, in_ind);
-			length = ch->scan_type.storagebits/8;
+			if (ch->scan_type.repeat > 1)
+				length = ch->scan_type.storagebits / 8 *
+					ch->scan_type.repeat;
+			else
+				length = ch->scan_type.storagebits / 8;
 			/* Make sure we are aligned */
 			in_loc += length;
 			if (in_loc % length)
@@ -971,7 +992,11 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev,
 			goto error_clear_mux_table;
 		}
 		ch = iio_find_channel_from_si(indio_dev, in_ind);
-		length = ch->scan_type.storagebits/8;
+		if (ch->scan_type.repeat > 1)
+			length = ch->scan_type.storagebits / 8 *
+				ch->scan_type.repeat;
+		else
+			length = ch->scan_type.storagebits / 8;
 		if (out_loc % length)
 			out_loc += length - out_loc % length;
 		if (in_loc % length)
@@ -992,7 +1017,11 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev,
 		}
 		ch = iio_find_channel_from_si(indio_dev,
 			indio_dev->scan_index_timestamp);
-		length = ch->scan_type.storagebits/8;
+		if (ch->scan_type.repeat > 1)
+			length = ch->scan_type.storagebits / 8 *
+				ch->scan_type.repeat;
+		else
+			length = ch->scan_type.storagebits / 8;
 		if (out_loc % length)
 			out_loc += length - out_loc % length;
 		if (in_loc % length)
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 5629c92eeadf..ccde91725f98 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -177,6 +177,12 @@ struct iio_event_spec {
  *			shift:		Shift right by this before masking out
  *					realbits.
  *			endianness:	little or big endian
+ *			repeat:		Number of times real/storage bits
+ *					repeats. When the repeat element is
+ *					more than 1, then the type element in
+ *					sysfs will show a repeat value.
+ *					Otherwise, the number of repetitions is
+ *					omitted.
  * @info_mask_separate: What information is to be exported that is specific to
  *			this channel.
  * @info_mask_shared_by_type: What information is to be exported that is shared
@@ -219,6 +225,7 @@ struct iio_chan_spec {
 		u8	realbits;
 		u8	storagebits;
 		u8	shift;
+		u8	repeat;
 		enum iio_endian endianness;
 	} scan_type;
 	long			info_mask_separate;
-- 
cgit 


From 5082f405b74ad1b69aa9595555ce55b75b59b2ec Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 29 Apr 2014 00:51:00 +0100
Subject: IIO: core: Add quaternion modifier

Added quaternion in the list of supported modifiers.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 1 +
 include/linux/iio/types.h       | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 59540859bfae..de8b1c2ed4b4 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -84,6 +84,7 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_LIGHT_RED] = "red",
 	[IIO_MOD_LIGHT_GREEN] = "green",
 	[IIO_MOD_LIGHT_BLUE] = "blue",
+	[IIO_MOD_QUATERNION] = "quaternion",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index a13c2241abce..4fdab2e843b4 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -53,6 +53,7 @@ enum iio_modifier {
 	IIO_MOD_LIGHT_RED,
 	IIO_MOD_LIGHT_GREEN,
 	IIO_MOD_LIGHT_BLUE,
+	IIO_MOD_QUATERNION,
 };
 
 enum iio_event_type {
-- 
cgit 


From fc18dddc0625cd1fdf6a823e85138ff05848a85f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 29 Apr 2014 00:51:00 +0100
Subject: iio: hid-sensors: Added device rotation support

Added usage id processing for device rotation. This uses IIO
interfaces for triggered buffer to present data to user
mode.This uses HID sensor framework for registering callback
events from the sensor hub.
Data is exported to user space in the form of quaternion rotation
format.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/orientation/Kconfig               |  12 +
 drivers/iio/orientation/Makefile              |   1 +
 drivers/iio/orientation/hid-sensor-rotation.c | 348 ++++++++++++++++++++++++++
 include/linux/hid-sensor-ids.h                |   1 +
 4 files changed, 362 insertions(+)
 create mode 100644 drivers/iio/orientation/hid-sensor-rotation.c

(limited to 'include/linux')

diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig
index 58c62c837e12..e3aa1e58d920 100644
--- a/drivers/iio/orientation/Kconfig
+++ b/drivers/iio/orientation/Kconfig
@@ -16,4 +16,16 @@ config HID_SENSOR_INCLINOMETER_3D
 	  Say yes here to build support for the HID SENSOR
 	  Inclinometer 3D.
 
+config HID_SENSOR_DEVICE_ROTATION
+	depends on HID_SENSOR_HUB
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
+	select HID_SENSOR_IIO_COMMON
+	select HID_SENSOR_IIO_TRIGGER
+	tristate "HID Device Rotation"
+	help
+	  Say yes here to build support for the HID SENSOR
+	  device rotation. The output of a device rotation sensor
+	  is presented using quaternion format.
+
 endmenu
diff --git a/drivers/iio/orientation/Makefile b/drivers/iio/orientation/Makefile
index 2c97572ee919..4734dabbde13 100644
--- a/drivers/iio/orientation/Makefile
+++ b/drivers/iio/orientation/Makefile
@@ -4,3 +4,4 @@
 
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_HID_SENSOR_INCLINOMETER_3D) += hid-sensor-incl-3d.o
+obj-$(CONFIG_HID_SENSOR_DEVICE_ROTATION) += hid-sensor-rotation.o
diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c
new file mode 100644
index 000000000000..51387bbc1ce1
--- /dev/null
+++ b/drivers/iio/orientation/hid-sensor-rotation.c
@@ -0,0 +1,348 @@
+/*
+ * HID Sensors Driver
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/slab.h>
+#include <linux/hid-sensor-hub.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/buffer.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+#include "../common/hid-sensors/hid-sensor-trigger.h"
+
+struct dev_rot_state {
+	struct hid_sensor_hub_callbacks callbacks;
+	struct hid_sensor_common common_attributes;
+	struct hid_sensor_hub_attribute_info quaternion;
+	u32 sampled_vals[4];
+};
+
+/* Channel definitions */
+static const struct iio_chan_spec dev_rot_channels[] = {
+	{
+		.type = IIO_ROT,
+		.modified = 1,
+		.channel2 = IIO_MOD_QUATERNION,
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
+		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ) |
+					BIT(IIO_CHAN_INFO_HYSTERESIS)
+	}
+};
+
+/* Adjust channel real bits based on report descriptor */
+static void dev_rot_adjust_channel_bit_mask(struct iio_chan_spec *chan,
+						int size)
+{
+	chan->scan_type.sign = 's';
+	/* Real storage bits will change based on the report desc. */
+	chan->scan_type.realbits = size * 8;
+	/* Maximum size of a sample to capture is u32 */
+	chan->scan_type.storagebits = sizeof(u32) * 8;
+	chan->scan_type.repeat = 4;
+}
+
+/* Channel read_raw handler */
+static int dev_rot_read_raw(struct iio_dev *indio_dev,
+				struct iio_chan_spec const *chan,
+				int size, int *vals, int *val_len,
+				long mask)
+{
+	struct dev_rot_state *rot_state = iio_priv(indio_dev);
+	int ret_type;
+	int i;
+
+	vals[0] = 0;
+	vals[1] = 0;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		if (size >= 4) {
+			for (i = 0; i < 4; ++i)
+				vals[i] = rot_state->sampled_vals[i];
+			ret_type = IIO_VAL_INT_MULTIPLE;
+			*val_len =  4;
+		} else
+			ret_type = -EINVAL;
+		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret_type = hid_sensor_read_samp_freq_value(
+			&rot_state->common_attributes, &vals[0], &vals[1]);
+		break;
+	case IIO_CHAN_INFO_HYSTERESIS:
+		ret_type = hid_sensor_read_raw_hyst_value(
+			&rot_state->common_attributes, &vals[0], &vals[1]);
+		break;
+	default:
+		ret_type = -EINVAL;
+		break;
+	}
+
+	return ret_type;
+}
+
+/* Channel write_raw handler */
+static int dev_rot_write_raw(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       int val,
+			       int val2,
+			       long mask)
+{
+	struct dev_rot_state *rot_state = iio_priv(indio_dev);
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = hid_sensor_write_samp_freq_value(
+				&rot_state->common_attributes, val, val2);
+		break;
+	case IIO_CHAN_INFO_HYSTERESIS:
+		ret = hid_sensor_write_raw_hyst_value(
+				&rot_state->common_attributes, val, val2);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct iio_info dev_rot_info = {
+	.driver_module = THIS_MODULE,
+	.read_raw_multi = &dev_rot_read_raw,
+	.write_raw = &dev_rot_write_raw,
+};
+
+/* Function to push data to buffer */
+static void hid_sensor_push_data(struct iio_dev *indio_dev, u8 *data, int len)
+{
+	dev_dbg(&indio_dev->dev, "hid_sensor_push_data >>\n");
+	iio_push_to_buffers(indio_dev, (u8 *)data);
+	dev_dbg(&indio_dev->dev, "hid_sensor_push_data <<\n");
+
+}
+
+/* Callback handler to send event after all samples are received and captured */
+static int dev_rot_proc_event(struct hid_sensor_hub_device *hsdev,
+				unsigned usage_id,
+				void *priv)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(priv);
+	struct dev_rot_state *rot_state = iio_priv(indio_dev);
+
+	dev_dbg(&indio_dev->dev, "dev_rot_proc_event [%d]\n",
+				rot_state->common_attributes.data_ready);
+
+	if (rot_state->common_attributes.data_ready)
+		hid_sensor_push_data(indio_dev,
+				(u8 *)rot_state->sampled_vals,
+				sizeof(rot_state->sampled_vals));
+
+	return 0;
+}
+
+/* Capture samples in local storage */
+static int dev_rot_capture_sample(struct hid_sensor_hub_device *hsdev,
+				unsigned usage_id,
+				size_t raw_len, char *raw_data,
+				void *priv)
+{
+	struct iio_dev *indio_dev = platform_get_drvdata(priv);
+	struct dev_rot_state *rot_state = iio_priv(indio_dev);
+
+	if (usage_id == HID_USAGE_SENSOR_ORIENT_QUATERNION) {
+		memcpy(rot_state->sampled_vals, raw_data,
+					sizeof(rot_state->sampled_vals));
+		dev_dbg(&indio_dev->dev, "Recd Quat len:%zu::%zu\n", raw_len,
+					sizeof(rot_state->sampled_vals));
+	}
+
+	return 0;
+}
+
+/* Parse report which is specific to an usage id*/
+static int dev_rot_parse_report(struct platform_device *pdev,
+				struct hid_sensor_hub_device *hsdev,
+				struct iio_chan_spec *channels,
+				unsigned usage_id,
+				struct dev_rot_state *st)
+{
+	int ret;
+
+	ret = sensor_hub_input_get_attribute_info(hsdev,
+				HID_INPUT_REPORT,
+				usage_id,
+				HID_USAGE_SENSOR_ORIENT_QUATERNION,
+				&st->quaternion);
+	if (ret)
+		return ret;
+
+	dev_rot_adjust_channel_bit_mask(&channels[0],
+		st->quaternion.size / 4);
+
+	dev_dbg(&pdev->dev, "dev_rot %x:%x\n", st->quaternion.index,
+		st->quaternion.report_id);
+
+	dev_dbg(&pdev->dev, "dev_rot: attrib size %d\n",
+				st->quaternion.size);
+
+	/* Set Sensitivity field ids, when there is no individual modifier */
+	if (st->common_attributes.sensitivity.index < 0) {
+		sensor_hub_input_get_attribute_info(hsdev,
+			HID_FEATURE_REPORT, usage_id,
+			HID_USAGE_SENSOR_DATA_MOD_CHANGE_SENSITIVITY_ABS |
+			HID_USAGE_SENSOR_DATA_ORIENTATION,
+			&st->common_attributes.sensitivity);
+		dev_dbg(&pdev->dev, "Sensitivity index:report %d:%d\n",
+			st->common_attributes.sensitivity.index,
+			st->common_attributes.sensitivity.report_id);
+	}
+
+	return 0;
+}
+
+/* Function to initialize the processing for usage id */
+static int hid_dev_rot_probe(struct platform_device *pdev)
+{
+	int ret;
+	static char *name = "dev_rotation";
+	struct iio_dev *indio_dev;
+	struct dev_rot_state *rot_state;
+	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
+	struct iio_chan_spec *channels;
+
+	indio_dev = devm_iio_device_alloc(&pdev->dev,
+					  sizeof(struct dev_rot_state));
+	if (indio_dev == NULL)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, indio_dev);
+
+	rot_state = iio_priv(indio_dev);
+	rot_state->common_attributes.hsdev = hsdev;
+	rot_state->common_attributes.pdev = pdev;
+
+	ret = hid_sensor_parse_common_attributes(hsdev,
+				HID_USAGE_SENSOR_DEVICE_ORIENTATION,
+				&rot_state->common_attributes);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to setup common attributes\n");
+		return ret;
+	}
+
+	channels = devm_kmemdup(&pdev->dev, dev_rot_channels,
+					sizeof(dev_rot_channels), GFP_KERNEL);
+	if (!channels) {
+		dev_err(&pdev->dev, "failed to duplicate channels\n");
+		return -ENOMEM;
+	}
+
+	ret = dev_rot_parse_report(pdev, hsdev, channels,
+			HID_USAGE_SENSOR_DEVICE_ORIENTATION, rot_state);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to setup attributes\n");
+		return ret;
+	}
+
+	indio_dev->channels = channels;
+	indio_dev->num_channels = ARRAY_SIZE(dev_rot_channels);
+	indio_dev->dev.parent = &pdev->dev;
+	indio_dev->info = &dev_rot_info;
+	indio_dev->name = name;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	ret = iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+		NULL, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
+		return ret;
+	}
+	rot_state->common_attributes.data_ready = false;
+	ret = hid_sensor_setup_trigger(indio_dev, name,
+					&rot_state->common_attributes);
+	if (ret) {
+		dev_err(&pdev->dev, "trigger setup failed\n");
+		goto error_unreg_buffer_funcs;
+	}
+
+	ret = iio_device_register(indio_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "device register failed\n");
+		goto error_remove_trigger;
+	}
+
+	rot_state->callbacks.send_event = dev_rot_proc_event;
+	rot_state->callbacks.capture_sample = dev_rot_capture_sample;
+	rot_state->callbacks.pdev = pdev;
+	ret = sensor_hub_register_callback(hsdev,
+					HID_USAGE_SENSOR_DEVICE_ORIENTATION,
+					&rot_state->callbacks);
+	if (ret) {
+		dev_err(&pdev->dev, "callback reg failed\n");
+		goto error_iio_unreg;
+	}
+
+	return 0;
+
+error_iio_unreg:
+	iio_device_unregister(indio_dev);
+error_remove_trigger:
+	hid_sensor_remove_trigger(&rot_state->common_attributes);
+error_unreg_buffer_funcs:
+	iio_triggered_buffer_cleanup(indio_dev);
+	return ret;
+}
+
+/* Function to deinitialize the processing for usage id */
+static int hid_dev_rot_remove(struct platform_device *pdev)
+{
+	struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
+	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
+	struct dev_rot_state *rot_state = iio_priv(indio_dev);
+
+	sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_DEVICE_ORIENTATION);
+	iio_device_unregister(indio_dev);
+	hid_sensor_remove_trigger(&rot_state->common_attributes);
+	iio_triggered_buffer_cleanup(indio_dev);
+
+	return 0;
+}
+
+static struct platform_device_id hid_dev_rot_ids[] = {
+	{
+		/* Format: HID-SENSOR-usage_id_in_hex_lowercase */
+		.name = "HID-SENSOR-20008a",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, hid_dev_rot_ids);
+
+static struct platform_driver hid_dev_rot_platform_driver = {
+	.id_table = hid_dev_rot_ids,
+	.driver = {
+		.name	= KBUILD_MODNAME,
+		.owner	= THIS_MODULE,
+	},
+	.probe		= hid_dev_rot_probe,
+	.remove		= hid_dev_rot_remove,
+};
+module_platform_driver(hid_dev_rot_platform_driver);
+
+MODULE_DESCRIPTION("HID Sensor Device Rotation");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h
index 14ead9e8eda8..109f0e633e01 100644
--- a/include/linux/hid-sensor-ids.h
+++ b/include/linux/hid-sensor-ids.h
@@ -76,6 +76,7 @@
 #define HID_USAGE_SENSOR_ORIENT_TILT_Y				0x200480
 #define HID_USAGE_SENSOR_ORIENT_TILT_Z				0x200481
 
+#define HID_USAGE_SENSOR_DEVICE_ORIENTATION			0x20008A
 #define HID_USAGE_SENSOR_ORIENT_ROTATION_MATRIX			0x200482
 #define HID_USAGE_SENSOR_ORIENT_QUATERNION			0x200483
 #define HID_USAGE_SENSOR_ORIENT_MAGN_FLUX			0x200484
-- 
cgit 


From 27e289dce29764e488c1e13e9aa6950cad1f4aab Mon Sep 17 00:00:00 2001
From: Stratos Karafotis <stratosk@semaphore.gr>
Date: Fri, 25 Apr 2014 23:15:23 +0300
Subject: cpufreq: Introduce macros for cpufreq_frequency_table iteration

Many cpufreq drivers need to iterate over the cpufreq_frequency_table
for various tasks.

This patch introduces two macros which can be used for iteration over
cpufreq_frequency_table keeping a common coding style across drivers:

- cpufreq_for_each_entry: iterate over each entry of the table
- cpufreq_for_each_valid_entry: iterate over each entry that contains
a valid frequency.

It should have no functional changes.

Signed-off-by: Stratos Karafotis <stratosk@semaphore.gr>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpu-drivers.txt | 19 +++++++++++++++++++
 drivers/cpufreq/cpufreq.c              | 11 +++++++++++
 include/linux/cpufreq.h                | 21 +++++++++++++++++++++
 3 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index 48da5fdcb9f1..b045fe54986a 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -228,3 +228,22 @@ is the corresponding frequency table helper for the ->target
 stage. Just pass the values to this function, and the unsigned int
 index returns the number of the frequency table entry which contains
 the frequency the CPU shall be set to.
+
+The following macros can be used as iterators over cpufreq_frequency_table:
+
+cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency
+table.
+
+cpufreq-for_each_valid_entry(pos, table) - iterates over all entries,
+excluding CPUFREQ_ENTRY_INVALID frequencies.
+Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and
+"table" - the cpufreq_frequency_table * you want to iterate over.
+
+For example:
+
+	struct cpufreq_frequency_table *pos, *driver_freq_table;
+
+	cpufreq_for_each_entry(pos, driver_freq_table) {
+		/* Do something with pos */
+		pos->frequency = ...
+	}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index abda6609d3e7..a517da996aaf 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -237,6 +237,17 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
+bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
+{
+	while ((*pos)->frequency != CPUFREQ_TABLE_END)
+		if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID)
+			return true;
+		else
+			(*pos)++;
+	return false;
+}
+EXPORT_SYMBOL_GPL(cpufreq_next_valid);
+
 /*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 5ae5100c1f24..77a5fa191502 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -468,6 +468,27 @@ struct cpufreq_frequency_table {
 				    * order */
 };
 
+bool cpufreq_next_valid(struct cpufreq_frequency_table **pos);
+
+/*
+ * cpufreq_for_each_entry -	iterate over a cpufreq_frequency_table
+ * @pos:	the cpufreq_frequency_table * to use as a loop cursor.
+ * @table:	the cpufreq_frequency_table * to iterate over.
+ */
+
+#define cpufreq_for_each_entry(pos, table)	\
+	for (pos = table; pos->frequency != CPUFREQ_TABLE_END; pos++)
+
+/*
+ * cpufreq_for_each_valid_entry -     iterate over a cpufreq_frequency_table
+ *	excluding CPUFREQ_ENTRY_INVALID frequencies.
+ * @pos:        the cpufreq_frequency_table * to use as a loop cursor.
+ * @table:      the cpufreq_frequency_table * to iterate over.
+ */
+
+#define cpufreq_for_each_valid_entry(pos, table)	\
+	for (pos = table; cpufreq_next_valid(&pos); pos++)
+
 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 				    struct cpufreq_frequency_table *table);
 
-- 
cgit 


From cdae05a0f0f7d15837dfd6f4200e8caea03c9cbf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 28 Apr 2014 10:49:43 +0000
Subject: dmaengine: edma: Make reading the position of active channels work

As Joel pointed out, edma_read_position() uses memcpy_fromio() to read
the parameter ram. That's not synchronized with the internal update as
it does a byte by byte copy. We need to do a 32bit read to get a
consistent value.

Further reading destination and source is pointless. In DEV_TO_MEM
transfers we are only interested in the destination, in MEM_TO_DEV we
care about the source. In MEM_TO_MEM it really does not matter which
one you read.

Simple solution: Remove the pointers, select dest/source via a bool
and return the read value.

Remove the export of this function while at it. The only potential
user is the dmaengine and that's always builtin.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Joel Fernandes <joelf@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/arm/common/edma.c             | 24 +++++++++---------------
 include/linux/platform_data/edma.h |  2 +-
 2 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 0b37f7734d0f..25fa735abc6c 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -994,29 +994,23 @@ void edma_set_dest(unsigned slot, dma_addr_t dest_port,
 EXPORT_SYMBOL(edma_set_dest);
 
 /**
- * edma_get_position - returns the current transfer points
+ * edma_get_position - returns the current transfer point
  * @slot: parameter RAM slot being examined
- * @src: pointer to source port position
- * @dst: pointer to destination port position
+ * @dst:  true selects the dest position, false the source
  *
- * Returns current source and destination addresses for a particular
- * parameter RAM slot.  Its channel should not be active when this is called.
+ * Returns the position of the current active slot
  */
-void edma_get_position(unsigned slot, dma_addr_t *src, dma_addr_t *dst)
+dma_addr_t edma_get_position(unsigned slot, bool dst)
 {
-	struct edmacc_param temp;
-	unsigned ctlr;
+	u32 offs, ctlr = EDMA_CTLR(slot);
 
-	ctlr = EDMA_CTLR(slot);
 	slot = EDMA_CHAN_SLOT(slot);
 
-	edma_read_slot(EDMA_CTLR_CHAN(ctlr, slot), &temp);
-	if (src != NULL)
-		*src = temp.src;
-	if (dst != NULL)
-		*dst = temp.dst;
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ctlr, offs);
 }
-EXPORT_SYMBOL(edma_get_position);
 
 /**
  * edma_set_src_index - configure DMA source address indexing
diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 923f8a3e4ce0..12f134b1493c 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -130,7 +130,7 @@ void edma_set_src(unsigned slot, dma_addr_t src_port,
 				enum address_mode mode, enum fifo_width);
 void edma_set_dest(unsigned slot, dma_addr_t dest_port,
 				 enum address_mode mode, enum fifo_width);
-void edma_get_position(unsigned slot, dma_addr_t *src, dma_addr_t *dst);
+dma_addr_t edma_get_position(unsigned slot, bool dst);
 void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx);
 void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx);
 void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt,
-- 
cgit 


From 9cd4360de6090a6daf7fbe024e34953f2ae60ef2 Mon Sep 17 00:00:00 2001
From: Srikanth Thokala <sthokal@xilinx.com>
Date: Wed, 23 Apr 2014 20:23:26 +0530
Subject: dma: Add Xilinx AXI Video Direct Memory Access Engine driver support

This is the driver for the AXI Video Direct Memory Access (AXI
VDMA) core, which is a soft Xilinx IP core that provides high-
bandwidth direct memory access between memory and AXI4-Stream
type video target peripherals. The core provides efficient two
dimensional DMA operations with independent asynchronous read
and write channel operation.

This module works on Zynq (ARM Based SoC) and Microblaze platforms.

Signed-off-by: Srikanth Thokala <sthokal@xilinx.com>
Acked-by: Jassi Brar <jassisinghbrar@gmail.com>
Reviewed-by: Levente Kurusa <levex@linux.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/Kconfig              |   14 +
 drivers/dma/Makefile             |    1 +
 drivers/dma/xilinx/Makefile      |    1 +
 drivers/dma/xilinx/xilinx_vdma.c | 1379 ++++++++++++++++++++++++++++++++++++++
 include/linux/amba/xilinx_dma.h  |   47 ++
 5 files changed, 1442 insertions(+)
 create mode 100644 drivers/dma/xilinx/Makefile
 create mode 100644 drivers/dma/xilinx/xilinx_vdma.c
 create mode 100644 include/linux/amba/xilinx_dma.h

(limited to 'include/linux')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 5c5863842de9..b30b7ed89fb2 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -361,6 +361,20 @@ config FSL_EDMA
 	  multiplexing capability for DMA request sources(slot).
 	  This module can be found on Freescale Vybrid and LS-1 SoCs.
 
+config XILINX_VDMA
+	tristate "Xilinx AXI VDMA Engine"
+	depends on (ARCH_ZYNQ || MICROBLAZE)
+	select DMA_ENGINE
+	help
+	  Enable support for Xilinx AXI VDMA Soft IP.
+
+	  This engine provides high-bandwidth direct memory access
+	  between memory and AXI4-Stream video type target
+	  peripherals including peripherals which support AXI4-
+	  Stream Video Protocol.  It has two stream interfaces/
+	  channels, Memory Mapped to Stream (MM2S) and Stream to
+	  Memory Mapped (S2MM) for the data transfers.
+
 config DMA_ENGINE
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 5150c82c9caf..c779e1eb2db2 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -46,3 +46,4 @@ obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
 obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
+obj-y += xilinx/
diff --git a/drivers/dma/xilinx/Makefile b/drivers/dma/xilinx/Makefile
new file mode 100644
index 000000000000..3c4e9f2fea28
--- /dev/null
+++ b/drivers/dma/xilinx/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_XILINX_VDMA) += xilinx_vdma.o
diff --git a/drivers/dma/xilinx/xilinx_vdma.c b/drivers/dma/xilinx/xilinx_vdma.c
new file mode 100644
index 000000000000..42a13e8d4607
--- /dev/null
+++ b/drivers/dma/xilinx/xilinx_vdma.c
@@ -0,0 +1,1379 @@
+/*
+ * DMA driver for Xilinx Video DMA Engine
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * Based on the Freescale DMA driver.
+ *
+ * Description:
+ * The AXI Video Direct Memory Access (AXI VDMA) core is a soft Xilinx IP
+ * core that provides high-bandwidth direct memory access between memory
+ * and AXI4-Stream type video target peripherals. The core provides efficient
+ * two dimensional DMA operations with independent asynchronous read (S2MM)
+ * and write (MM2S) channel operation. It can be configured to have either
+ * one channel or two channels. If configured as two channels, one is to
+ * transmit to the video device (MM2S) and another is to receive from the
+ * video device (S2MM). Initialization, status, interrupt and management
+ * registers are accessed through an AXI4-Lite slave interface.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/amba/xilinx_dma.h>
+#include <linux/bitops.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/slab.h>
+
+#include "../dmaengine.h"
+
+/* Register/Descriptor Offsets */
+#define XILINX_VDMA_MM2S_CTRL_OFFSET		0x0000
+#define XILINX_VDMA_S2MM_CTRL_OFFSET		0x0030
+#define XILINX_VDMA_MM2S_DESC_OFFSET		0x0050
+#define XILINX_VDMA_S2MM_DESC_OFFSET		0x00a0
+
+/* Control Registers */
+#define XILINX_VDMA_REG_DMACR			0x0000
+#define XILINX_VDMA_DMACR_DELAY_MAX		0xff
+#define XILINX_VDMA_DMACR_DELAY_SHIFT		24
+#define XILINX_VDMA_DMACR_FRAME_COUNT_MAX	0xff
+#define XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT	16
+#define XILINX_VDMA_DMACR_ERR_IRQ		BIT(14)
+#define XILINX_VDMA_DMACR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_VDMA_DMACR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_VDMA_DMACR_MASTER_SHIFT		8
+#define XILINX_VDMA_DMACR_FSYNCSRC_SHIFT	5
+#define XILINX_VDMA_DMACR_FRAMECNT_EN		BIT(4)
+#define XILINX_VDMA_DMACR_GENLOCK_EN		BIT(3)
+#define XILINX_VDMA_DMACR_RESET			BIT(2)
+#define XILINX_VDMA_DMACR_CIRC_EN		BIT(1)
+#define XILINX_VDMA_DMACR_RUNSTOP		BIT(0)
+#define XILINX_VDMA_DMACR_FSYNCSRC_MASK		GENMASK(6, 5)
+
+#define XILINX_VDMA_REG_DMASR			0x0004
+#define XILINX_VDMA_DMASR_EOL_LATE_ERR		BIT(15)
+#define XILINX_VDMA_DMASR_ERR_IRQ		BIT(14)
+#define XILINX_VDMA_DMASR_DLY_CNT_IRQ		BIT(13)
+#define XILINX_VDMA_DMASR_FRM_CNT_IRQ		BIT(12)
+#define XILINX_VDMA_DMASR_SOF_LATE_ERR		BIT(11)
+#define XILINX_VDMA_DMASR_SG_DEC_ERR		BIT(10)
+#define XILINX_VDMA_DMASR_SG_SLV_ERR		BIT(9)
+#define XILINX_VDMA_DMASR_EOF_EARLY_ERR		BIT(8)
+#define XILINX_VDMA_DMASR_SOF_EARLY_ERR		BIT(7)
+#define XILINX_VDMA_DMASR_DMA_DEC_ERR		BIT(6)
+#define XILINX_VDMA_DMASR_DMA_SLAVE_ERR		BIT(5)
+#define XILINX_VDMA_DMASR_DMA_INT_ERR		BIT(4)
+#define XILINX_VDMA_DMASR_IDLE			BIT(1)
+#define XILINX_VDMA_DMASR_HALTED		BIT(0)
+#define XILINX_VDMA_DMASR_DELAY_MASK		GENMASK(31, 24)
+#define XILINX_VDMA_DMASR_FRAME_COUNT_MASK	GENMASK(23, 16)
+
+#define XILINX_VDMA_REG_CURDESC			0x0008
+#define XILINX_VDMA_REG_TAILDESC		0x0010
+#define XILINX_VDMA_REG_REG_INDEX		0x0014
+#define XILINX_VDMA_REG_FRMSTORE		0x0018
+#define XILINX_VDMA_REG_THRESHOLD		0x001c
+#define XILINX_VDMA_REG_FRMPTR_STS		0x0024
+#define XILINX_VDMA_REG_PARK_PTR		0x0028
+#define XILINX_VDMA_PARK_PTR_WR_REF_SHIFT	8
+#define XILINX_VDMA_PARK_PTR_RD_REF_SHIFT	0
+#define XILINX_VDMA_REG_VDMA_VERSION		0x002c
+
+/* Register Direct Mode Registers */
+#define XILINX_VDMA_REG_VSIZE			0x0000
+#define XILINX_VDMA_REG_HSIZE			0x0004
+
+#define XILINX_VDMA_REG_FRMDLY_STRIDE		0x0008
+#define XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT	24
+#define XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT	0
+
+#define XILINX_VDMA_REG_START_ADDRESS(n)	(0x000c + 4 * (n))
+
+/* HW specific definitions */
+#define XILINX_VDMA_MAX_CHANS_PER_DEVICE	0x2
+
+#define XILINX_VDMA_DMAXR_ALL_IRQ_MASK	\
+		(XILINX_VDMA_DMASR_FRM_CNT_IRQ | \
+		 XILINX_VDMA_DMASR_DLY_CNT_IRQ | \
+		 XILINX_VDMA_DMASR_ERR_IRQ)
+
+#define XILINX_VDMA_DMASR_ALL_ERR_MASK	\
+		(XILINX_VDMA_DMASR_EOL_LATE_ERR | \
+		 XILINX_VDMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_VDMA_DMASR_SG_DEC_ERR | \
+		 XILINX_VDMA_DMASR_SG_SLV_ERR | \
+		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_DMA_DEC_ERR | \
+		 XILINX_VDMA_DMASR_DMA_SLAVE_ERR | \
+		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+
+/*
+ * Recoverable errors are DMA Internal error, SOF Early, EOF Early
+ * and SOF Late. They are only recoverable when C_FLUSH_ON_FSYNC
+ * is enabled in the h/w system.
+ */
+#define XILINX_VDMA_DMASR_ERR_RECOVER_MASK	\
+		(XILINX_VDMA_DMASR_SOF_LATE_ERR | \
+		 XILINX_VDMA_DMASR_EOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_SOF_EARLY_ERR | \
+		 XILINX_VDMA_DMASR_DMA_INT_ERR)
+
+/* Axi VDMA Flush on Fsync bits */
+#define XILINX_VDMA_FLUSH_S2MM		3
+#define XILINX_VDMA_FLUSH_MM2S		2
+#define XILINX_VDMA_FLUSH_BOTH		1
+
+/* Delay loop counter to prevent hardware failure */
+#define XILINX_VDMA_LOOP_COUNT		1000000
+
+/**
+ * struct xilinx_vdma_desc_hw - Hardware Descriptor
+ * @next_desc: Next Descriptor Pointer @0x00
+ * @pad1: Reserved @0x04
+ * @buf_addr: Buffer address @0x08
+ * @pad2: Reserved @0x0C
+ * @vsize: Vertical Size @0x10
+ * @hsize: Horizontal Size @0x14
+ * @stride: Number of bytes between the first
+ *	    pixels of each horizontal line @0x18
+ */
+struct xilinx_vdma_desc_hw {
+	u32 next_desc;
+	u32 pad1;
+	u32 buf_addr;
+	u32 pad2;
+	u32 vsize;
+	u32 hsize;
+	u32 stride;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_segment - Descriptor segment
+ * @hw: Hardware descriptor
+ * @node: Node in the descriptor segments list
+ * @phys: Physical address of segment
+ */
+struct xilinx_vdma_tx_segment {
+	struct xilinx_vdma_desc_hw hw;
+	struct list_head node;
+	dma_addr_t phys;
+} __aligned(64);
+
+/**
+ * struct xilinx_vdma_tx_descriptor - Per Transaction structure
+ * @async_tx: Async transaction descriptor
+ * @segments: TX segments list
+ * @node: Node in the channel descriptors list
+ */
+struct xilinx_vdma_tx_descriptor {
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head segments;
+	struct list_head node;
+};
+
+/**
+ * struct xilinx_vdma_chan - Driver specific VDMA channel structure
+ * @xdev: Driver specific device structure
+ * @ctrl_offset: Control registers offset
+ * @desc_offset: TX descriptor registers offset
+ * @lock: Descriptor operation lock
+ * @pending_list: Descriptors waiting
+ * @active_desc: Active descriptor
+ * @allocated_desc: Allocated descriptor
+ * @done_list: Complete descriptors
+ * @common: DMA common channel
+ * @desc_pool: Descriptors pool
+ * @dev: The dma device
+ * @irq: Channel IRQ
+ * @id: Channel ID
+ * @direction: Transfer direction
+ * @num_frms: Number of frames
+ * @has_sg: Support scatter transfers
+ * @genlock: Support genlock mode
+ * @err: Channel has errors
+ * @tasklet: Cleanup work after irq
+ * @config: Device configuration info
+ * @flush_on_fsync: Flush on Frame sync
+ */
+struct xilinx_vdma_chan {
+	struct xilinx_vdma_device *xdev;
+	u32 ctrl_offset;
+	u32 desc_offset;
+	spinlock_t lock;
+	struct list_head pending_list;
+	struct xilinx_vdma_tx_descriptor *active_desc;
+	struct xilinx_vdma_tx_descriptor *allocated_desc;
+	struct list_head done_list;
+	struct dma_chan common;
+	struct dma_pool *desc_pool;
+	struct device *dev;
+	int irq;
+	int id;
+	enum dma_transfer_direction direction;
+	int num_frms;
+	bool has_sg;
+	bool genlock;
+	bool err;
+	struct tasklet_struct tasklet;
+	struct xilinx_vdma_config config;
+	bool flush_on_fsync;
+};
+
+/**
+ * struct xilinx_vdma_device - VDMA device structure
+ * @regs: I/O mapped base address
+ * @dev: Device Structure
+ * @common: DMA device structure
+ * @chan: Driver specific VDMA channel
+ * @has_sg: Specifies whether Scatter-Gather is present or not
+ * @flush_on_fsync: Flush on frame sync
+ */
+struct xilinx_vdma_device {
+	void __iomem *regs;
+	struct device *dev;
+	struct dma_device common;
+	struct xilinx_vdma_chan *chan[XILINX_VDMA_MAX_CHANS_PER_DEVICE];
+	bool has_sg;
+	u32 flush_on_fsync;
+};
+
+/* Macros */
+#define to_xilinx_chan(chan) \
+	container_of(chan, struct xilinx_vdma_chan, common)
+#define to_vdma_tx_descriptor(tx) \
+	container_of(tx, struct xilinx_vdma_tx_descriptor, async_tx)
+
+/* IO accessors */
+static inline u32 vdma_read(struct xilinx_vdma_chan *chan, u32 reg)
+{
+	return ioread32(chan->xdev->regs + reg);
+}
+
+static inline void vdma_write(struct xilinx_vdma_chan *chan, u32 reg, u32 value)
+{
+	iowrite32(value, chan->xdev->regs + reg);
+}
+
+static inline void vdma_desc_write(struct xilinx_vdma_chan *chan, u32 reg,
+				   u32 value)
+{
+	vdma_write(chan, chan->desc_offset + reg, value);
+}
+
+static inline u32 vdma_ctrl_read(struct xilinx_vdma_chan *chan, u32 reg)
+{
+	return vdma_read(chan, chan->ctrl_offset + reg);
+}
+
+static inline void vdma_ctrl_write(struct xilinx_vdma_chan *chan, u32 reg,
+				   u32 value)
+{
+	vdma_write(chan, chan->ctrl_offset + reg, value);
+}
+
+static inline void vdma_ctrl_clr(struct xilinx_vdma_chan *chan, u32 reg,
+				 u32 clr)
+{
+	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) & ~clr);
+}
+
+static inline void vdma_ctrl_set(struct xilinx_vdma_chan *chan, u32 reg,
+				 u32 set)
+{
+	vdma_ctrl_write(chan, reg, vdma_ctrl_read(chan, reg) | set);
+}
+
+/* -----------------------------------------------------------------------------
+ * Descriptors and segments alloc and free
+ */
+
+/**
+ * xilinx_vdma_alloc_tx_segment - Allocate transaction segment
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: The allocated segment on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_segment *
+xilinx_vdma_alloc_tx_segment(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_segment *segment;
+	dma_addr_t phys;
+
+	segment = dma_pool_alloc(chan->desc_pool, GFP_ATOMIC, &phys);
+	if (!segment)
+		return NULL;
+
+	memset(segment, 0, sizeof(*segment));
+	segment->phys = phys;
+
+	return segment;
+}
+
+/**
+ * xilinx_vdma_free_tx_segment - Free transaction segment
+ * @chan: Driver specific VDMA channel
+ * @segment: VDMA transaction segment
+ */
+static void xilinx_vdma_free_tx_segment(struct xilinx_vdma_chan *chan,
+					struct xilinx_vdma_tx_segment *segment)
+{
+	dma_pool_free(chan->desc_pool, segment, segment->phys);
+}
+
+/**
+ * xilinx_vdma_tx_descriptor - Allocate transaction descriptor
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: The allocated descriptor on success and NULL on failure.
+ */
+static struct xilinx_vdma_tx_descriptor *
+xilinx_vdma_alloc_tx_descriptor(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+
+	if (chan->allocated_desc)
+		return chan->allocated_desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->allocated_desc = desc;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	INIT_LIST_HEAD(&desc->segments);
+
+	return desc;
+}
+
+/**
+ * xilinx_vdma_free_tx_descriptor - Free transaction descriptor
+ * @chan: Driver specific VDMA channel
+ * @desc: VDMA transaction descriptor
+ */
+static void
+xilinx_vdma_free_tx_descriptor(struct xilinx_vdma_chan *chan,
+			       struct xilinx_vdma_tx_descriptor *desc)
+{
+	struct xilinx_vdma_tx_segment *segment, *next;
+
+	if (!desc)
+		return;
+
+	list_for_each_entry_safe(segment, next, &desc->segments, node) {
+		list_del(&segment->node);
+		xilinx_vdma_free_tx_segment(chan, segment);
+	}
+
+	kfree(desc);
+}
+
+/* Required functions */
+
+/**
+ * xilinx_vdma_free_desc_list - Free descriptors list
+ * @chan: Driver specific VDMA channel
+ * @list: List to parse and delete the descriptor
+ */
+static void xilinx_vdma_free_desc_list(struct xilinx_vdma_chan *chan,
+					struct list_head *list)
+{
+	struct xilinx_vdma_tx_descriptor *desc, *next;
+
+	list_for_each_entry_safe(desc, next, list, node) {
+		list_del(&desc->node);
+		xilinx_vdma_free_tx_descriptor(chan, desc);
+	}
+}
+
+/**
+ * xilinx_vdma_free_descriptors - Free channel descriptors
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_free_descriptors(struct xilinx_vdma_chan *chan)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	xilinx_vdma_free_desc_list(chan, &chan->pending_list);
+	xilinx_vdma_free_desc_list(chan, &chan->done_list);
+
+	xilinx_vdma_free_tx_descriptor(chan, chan->active_desc);
+	chan->active_desc = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_free_chan_resources - Free channel resources
+ * @dchan: DMA channel
+ */
+static void xilinx_vdma_free_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	dev_dbg(chan->dev, "Free all channel resources.\n");
+
+	xilinx_vdma_free_descriptors(chan);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+}
+
+/**
+ * xilinx_vdma_chan_desc_cleanup - Clean channel descriptors
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_chan_desc_cleanup(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	list_for_each_entry_safe(desc, next, &chan->done_list, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		/* Remove from the list of running transactions */
+		list_del(&desc->node);
+
+		/* Run the link descriptor callback function */
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+		if (callback) {
+			spin_unlock_irqrestore(&chan->lock, flags);
+			callback(callback_param);
+			spin_lock_irqsave(&chan->lock, flags);
+		}
+
+		/* Run any dependencies, then free the descriptor */
+		dma_run_dependencies(&desc->async_tx);
+		xilinx_vdma_free_tx_descriptor(chan, desc);
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_do_tasklet - Schedule completion tasklet
+ * @data: Pointer to the Xilinx VDMA channel structure
+ */
+static void xilinx_vdma_do_tasklet(unsigned long data)
+{
+	struct xilinx_vdma_chan *chan = (struct xilinx_vdma_chan *)data;
+
+	xilinx_vdma_chan_desc_cleanup(chan);
+}
+
+/**
+ * xilinx_vdma_alloc_chan_resources - Allocate channel resources
+ * @dchan: DMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	/* Has this channel already been allocated? */
+	if (chan->desc_pool)
+		return 0;
+
+	/*
+	 * We need the descriptor to be aligned to 64bytes
+	 * for meeting Xilinx VDMA specification requirement.
+	 */
+	chan->desc_pool = dma_pool_create("xilinx_vdma_desc_pool",
+				chan->dev,
+				sizeof(struct xilinx_vdma_tx_segment),
+				__alignof__(struct xilinx_vdma_tx_segment), 0);
+	if (!chan->desc_pool) {
+		dev_err(chan->dev,
+			"unable to allocate channel %d descriptor pool\n",
+			chan->id);
+		return -ENOMEM;
+	}
+
+	dma_cookie_init(dchan);
+	return 0;
+}
+
+/**
+ * xilinx_vdma_tx_status - Get VDMA transaction status
+ * @dchan: DMA channel
+ * @cookie: Transaction identifier
+ * @txstate: Transaction state
+ *
+ * Return: DMA transaction status
+ */
+static enum dma_status xilinx_vdma_tx_status(struct dma_chan *dchan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	return dma_cookie_status(dchan, cookie, txstate);
+}
+
+/**
+ * xilinx_vdma_is_running - Check if VDMA channel is running
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '1' if running, '0' if not.
+ */
+static bool xilinx_vdma_is_running(struct xilinx_vdma_chan *chan)
+{
+	return !(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		 XILINX_VDMA_DMASR_HALTED) &&
+		(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+		 XILINX_VDMA_DMACR_RUNSTOP);
+}
+
+/**
+ * xilinx_vdma_is_idle - Check if VDMA channel is idle
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '1' if idle, '0' if not.
+ */
+static bool xilinx_vdma_is_idle(struct xilinx_vdma_chan *chan)
+{
+	return vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		XILINX_VDMA_DMASR_IDLE;
+}
+
+/**
+ * xilinx_vdma_halt - Halt VDMA channel
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_halt(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+
+	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to halt */
+	do {
+		if (vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		    XILINX_VDMA_DMASR_HALTED)
+			break;
+	} while (loop--);
+
+	if (!loop) {
+		dev_err(chan->dev, "Cannot stop channel %p: %x\n",
+			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+		chan->err = true;
+	}
+
+	return;
+}
+
+/**
+ * xilinx_vdma_start - Start VDMA channel
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_start(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RUNSTOP);
+
+	/* Wait for the hardware to start */
+	do {
+		if (!(vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR) &
+		      XILINX_VDMA_DMASR_HALTED))
+			break;
+	} while (loop--);
+
+	if (!loop) {
+		dev_err(chan->dev, "Cannot start channel %p: %x\n",
+			chan, vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+
+		chan->err = true;
+	}
+
+	return;
+}
+
+/**
+ * xilinx_vdma_start_transfer - Starts VDMA transfer
+ * @chan: Driver specific channel struct pointer
+ */
+static void xilinx_vdma_start_transfer(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_config *config = &chan->config;
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+	u32 reg;
+	struct xilinx_vdma_tx_segment *head, *tail = NULL;
+
+	if (chan->err)
+		return;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* There's already an active descriptor, bail out. */
+	if (chan->active_desc)
+		goto out_unlock;
+
+	if (list_empty(&chan->pending_list))
+		goto out_unlock;
+
+	desc = list_first_entry(&chan->pending_list,
+				struct xilinx_vdma_tx_descriptor, node);
+
+	/* If it is SG mode and hardware is busy, cannot submit */
+	if (chan->has_sg && xilinx_vdma_is_running(chan) &&
+	    !xilinx_vdma_is_idle(chan)) {
+		dev_dbg(chan->dev, "DMA controller still busy\n");
+		goto out_unlock;
+	}
+
+	/*
+	 * If hardware is idle, then all descriptors on the running lists are
+	 * done, start new transfers
+	 */
+	if (chan->has_sg) {
+		head = list_first_entry(&desc->segments,
+					struct xilinx_vdma_tx_segment, node);
+		tail = list_entry(desc->segments.prev,
+				  struct xilinx_vdma_tx_segment, node);
+
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_CURDESC, head->phys);
+	}
+
+	/* Configure the hardware using info in the config structure */
+	reg = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+
+	if (config->frm_cnt_en)
+		reg |= XILINX_VDMA_DMACR_FRAMECNT_EN;
+	else
+		reg &= ~XILINX_VDMA_DMACR_FRAMECNT_EN;
+
+	/*
+	 * With SG, start with circular mode, so that BDs can be fetched.
+	 * In direct register mode, if not parking, enable circular mode
+	 */
+	if (chan->has_sg || !config->park)
+		reg |= XILINX_VDMA_DMACR_CIRC_EN;
+
+	if (config->park)
+		reg &= ~XILINX_VDMA_DMACR_CIRC_EN;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, reg);
+
+	if (config->park && (config->park_frm >= 0) &&
+			(config->park_frm < chan->num_frms)) {
+		if (chan->direction == DMA_MEM_TO_DEV)
+			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+				config->park_frm <<
+					XILINX_VDMA_PARK_PTR_RD_REF_SHIFT);
+		else
+			vdma_write(chan, XILINX_VDMA_REG_PARK_PTR,
+				config->park_frm <<
+					XILINX_VDMA_PARK_PTR_WR_REF_SHIFT);
+	}
+
+	/* Start the hardware */
+	xilinx_vdma_start(chan);
+
+	if (chan->err)
+		goto out_unlock;
+
+	/* Start the transfer */
+	if (chan->has_sg) {
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_TAILDESC, tail->phys);
+	} else {
+		struct xilinx_vdma_tx_segment *segment, *last = NULL;
+		int i = 0;
+
+		list_for_each_entry(segment, &desc->segments, node) {
+			vdma_desc_write(chan,
+					XILINX_VDMA_REG_START_ADDRESS(i++),
+					segment->hw.buf_addr);
+			last = segment;
+		}
+
+		if (!last)
+			goto out_unlock;
+
+		/* HW expects these parameters to be same for one transaction */
+		vdma_desc_write(chan, XILINX_VDMA_REG_HSIZE, last->hw.hsize);
+		vdma_desc_write(chan, XILINX_VDMA_REG_FRMDLY_STRIDE,
+				last->hw.stride);
+		vdma_desc_write(chan, XILINX_VDMA_REG_VSIZE, last->hw.vsize);
+	}
+
+	list_del(&desc->node);
+	chan->active_desc = desc;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_issue_pending - Issue pending transactions
+ * @dchan: DMA channel
+ */
+static void xilinx_vdma_issue_pending(struct dma_chan *dchan)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	xilinx_vdma_start_transfer(chan);
+}
+
+/**
+ * xilinx_vdma_complete_descriptor - Mark the active descriptor as complete
+ * @chan : xilinx DMA channel
+ *
+ * CONTEXT: hardirq
+ */
+static void xilinx_vdma_complete_descriptor(struct xilinx_vdma_chan *chan)
+{
+	struct xilinx_vdma_tx_descriptor *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = chan->active_desc;
+	if (!desc) {
+		dev_dbg(chan->dev, "no running descriptors\n");
+		goto out_unlock;
+	}
+
+	dma_cookie_complete(&desc->async_tx);
+	list_add_tail(&desc->node, &chan->done_list);
+
+	chan->active_desc = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+/**
+ * xilinx_vdma_reset - Reset VDMA channel
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_reset(struct xilinx_vdma_chan *chan)
+{
+	int loop = XILINX_VDMA_LOOP_COUNT;
+	u32 tmp;
+
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR, XILINX_VDMA_DMACR_RESET);
+
+	tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+		XILINX_VDMA_DMACR_RESET;
+
+	/* Wait for the hardware to finish reset */
+	do {
+		tmp = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR) &
+			XILINX_VDMA_DMACR_RESET;
+	} while (loop-- && tmp);
+
+	if (!loop) {
+		dev_err(chan->dev, "reset timeout, cr %x, sr %x\n",
+			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR),
+			vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR));
+		return -ETIMEDOUT;
+	}
+
+	chan->err = false;
+
+	return 0;
+}
+
+/**
+ * xilinx_vdma_chan_reset - Reset VDMA channel and enable interrupts
+ * @chan: Driver specific VDMA channel
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_chan_reset(struct xilinx_vdma_chan *chan)
+{
+	int err;
+
+	/* Reset VDMA */
+	err = xilinx_vdma_reset(chan);
+	if (err)
+		return err;
+
+	/* Enable interrupts */
+	vdma_ctrl_set(chan, XILINX_VDMA_REG_DMACR,
+		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	return 0;
+}
+
+/**
+ * xilinx_vdma_irq_handler - VDMA Interrupt handler
+ * @irq: IRQ number
+ * @data: Pointer to the Xilinx VDMA channel structure
+ *
+ * Return: IRQ_HANDLED/IRQ_NONE
+ */
+static irqreturn_t xilinx_vdma_irq_handler(int irq, void *data)
+{
+	struct xilinx_vdma_chan *chan = data;
+	u32 status;
+
+	/* Read the status and ack the interrupts. */
+	status = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMASR);
+	if (!(status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK))
+		return IRQ_NONE;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
+			status & XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	if (status & XILINX_VDMA_DMASR_ERR_IRQ) {
+		/*
+		 * An error occurred. If C_FLUSH_ON_FSYNC is enabled and the
+		 * error is recoverable, ignore it. Otherwise flag the error.
+		 *
+		 * Only recoverable errors can be cleared in the DMASR register,
+		 * make sure not to write to other error bits to 1.
+		 */
+		u32 errors = status & XILINX_VDMA_DMASR_ALL_ERR_MASK;
+		vdma_ctrl_write(chan, XILINX_VDMA_REG_DMASR,
+				errors & XILINX_VDMA_DMASR_ERR_RECOVER_MASK);
+
+		if (!chan->flush_on_fsync ||
+		    (errors & ~XILINX_VDMA_DMASR_ERR_RECOVER_MASK)) {
+			dev_err(chan->dev,
+				"Channel %p has errors %x, cdr %x tdr %x\n",
+				chan, errors,
+				vdma_ctrl_read(chan, XILINX_VDMA_REG_CURDESC),
+				vdma_ctrl_read(chan, XILINX_VDMA_REG_TAILDESC));
+			chan->err = true;
+		}
+	}
+
+	if (status & XILINX_VDMA_DMASR_DLY_CNT_IRQ) {
+		/*
+		 * Device takes too long to do the transfer when user requires
+		 * responsiveness.
+		 */
+		dev_dbg(chan->dev, "Inter-packet latency too long\n");
+	}
+
+	if (status & XILINX_VDMA_DMASR_FRM_CNT_IRQ) {
+		xilinx_vdma_complete_descriptor(chan);
+		xilinx_vdma_start_transfer(chan);
+	}
+
+	tasklet_schedule(&chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+/**
+ * xilinx_vdma_tx_submit - Submit DMA transaction
+ * @tx: Async transaction descriptor
+ *
+ * Return: cookie value on success and failure value on error
+ */
+static dma_cookie_t xilinx_vdma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct xilinx_vdma_tx_descriptor *desc = to_vdma_tx_descriptor(tx);
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(tx->chan);
+	dma_cookie_t cookie;
+	unsigned long flags;
+	int err;
+
+	if (chan->err) {
+		/*
+		 * If reset fails, need to hard reset the system.
+		 * Channel is no longer functional
+		 */
+		err = xilinx_vdma_chan_reset(chan);
+		if (err < 0)
+			return err;
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx);
+
+	/* Append the transaction to the pending transactions queue. */
+	list_add_tail(&desc->node, &chan->pending_list);
+
+	/* Free the allocated desc */
+	chan->allocated_desc = NULL;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return cookie;
+}
+
+/**
+ * xilinx_vdma_dma_prep_interleaved - prepare a descriptor for a
+ *	DMA_SLAVE transaction
+ * @dchan: DMA channel
+ * @xt: Interleaved template pointer
+ * @flags: transfer ack flags
+ *
+ * Return: Async transaction descriptor on success and NULL on failure
+ */
+static struct dma_async_tx_descriptor *
+xilinx_vdma_dma_prep_interleaved(struct dma_chan *dchan,
+				 struct dma_interleaved_template *xt,
+				 unsigned long flags)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	struct xilinx_vdma_tx_descriptor *desc;
+	struct xilinx_vdma_tx_segment *segment, *prev = NULL;
+	struct xilinx_vdma_desc_hw *hw;
+
+	if (!is_slave_direction(xt->dir))
+		return NULL;
+
+	if (!xt->numf || !xt->sgl[0].size)
+		return NULL;
+
+	/* Allocate a transaction descriptor. */
+	desc = xilinx_vdma_alloc_tx_descriptor(chan);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &chan->common);
+	desc->async_tx.tx_submit = xilinx_vdma_tx_submit;
+	async_tx_ack(&desc->async_tx);
+
+	/* Allocate the link descriptor from DMA pool */
+	segment = xilinx_vdma_alloc_tx_segment(chan);
+	if (!segment)
+		goto error;
+
+	/* Fill in the hardware descriptor */
+	hw = &segment->hw;
+	hw->vsize = xt->numf;
+	hw->hsize = xt->sgl[0].size;
+	hw->stride = xt->sgl[0].icg <<
+			XILINX_VDMA_FRMDLY_STRIDE_STRIDE_SHIFT;
+	hw->stride |= chan->config.frm_dly <<
+			XILINX_VDMA_FRMDLY_STRIDE_FRMDLY_SHIFT;
+
+	if (xt->dir != DMA_MEM_TO_DEV)
+		hw->buf_addr = xt->dst_start;
+	else
+		hw->buf_addr = xt->src_start;
+
+	/* Link the previous next descriptor to current */
+	prev = list_last_entry(&desc->segments,
+				struct xilinx_vdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	/* Insert the segment into the descriptor segments list. */
+	list_add_tail(&segment->node, &desc->segments);
+
+	prev = segment;
+
+	/* Link the last hardware descriptor with the first. */
+	segment = list_first_entry(&desc->segments,
+				   struct xilinx_vdma_tx_segment, node);
+	prev->hw.next_desc = segment->phys;
+
+	return &desc->async_tx;
+
+error:
+	xilinx_vdma_free_tx_descriptor(chan, desc);
+	return NULL;
+}
+
+/**
+ * xilinx_vdma_terminate_all - Halt the channel and free descriptors
+ * @chan: Driver specific VDMA Channel pointer
+ */
+static void xilinx_vdma_terminate_all(struct xilinx_vdma_chan *chan)
+{
+	/* Halt the DMA engine */
+	xilinx_vdma_halt(chan);
+
+	/* Remove and free all of the descriptors in the lists */
+	xilinx_vdma_free_descriptors(chan);
+}
+
+/**
+ * xilinx_vdma_channel_set_config - Configure VDMA channel
+ * Run-time configuration for Axi VDMA, supports:
+ * . halt the channel
+ * . configure interrupt coalescing and inter-packet delay threshold
+ * . start/stop parking
+ * . enable genlock
+ *
+ * @dchan: DMA channel
+ * @cfg: VDMA device configuration pointer
+ *
+ * Return: '0' on success and failure value on error
+ */
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+	u32 dmacr;
+
+	if (cfg->reset)
+		return xilinx_vdma_chan_reset(chan);
+
+	dmacr = vdma_ctrl_read(chan, XILINX_VDMA_REG_DMACR);
+
+	chan->config.frm_dly = cfg->frm_dly;
+	chan->config.park = cfg->park;
+
+	/* genlock settings */
+	chan->config.gen_lock = cfg->gen_lock;
+	chan->config.master = cfg->master;
+
+	if (cfg->gen_lock && chan->genlock) {
+		dmacr |= XILINX_VDMA_DMACR_GENLOCK_EN;
+		dmacr |= cfg->master << XILINX_VDMA_DMACR_MASTER_SHIFT;
+	}
+
+	chan->config.frm_cnt_en = cfg->frm_cnt_en;
+	if (cfg->park)
+		chan->config.park_frm = cfg->park_frm;
+	else
+		chan->config.park_frm = -1;
+
+	chan->config.coalesc = cfg->coalesc;
+	chan->config.delay = cfg->delay;
+
+	if (cfg->coalesc <= XILINX_VDMA_DMACR_FRAME_COUNT_MAX) {
+		dmacr |= cfg->coalesc << XILINX_VDMA_DMACR_FRAME_COUNT_SHIFT;
+		chan->config.coalesc = cfg->coalesc;
+	}
+
+	if (cfg->delay <= XILINX_VDMA_DMACR_DELAY_MAX) {
+		dmacr |= cfg->delay << XILINX_VDMA_DMACR_DELAY_SHIFT;
+		chan->config.delay = cfg->delay;
+	}
+
+	/* FSync Source selection */
+	dmacr &= ~XILINX_VDMA_DMACR_FSYNCSRC_MASK;
+	dmacr |= cfg->ext_fsync << XILINX_VDMA_DMACR_FSYNCSRC_SHIFT;
+
+	vdma_ctrl_write(chan, XILINX_VDMA_REG_DMACR, dmacr);
+
+	return 0;
+}
+EXPORT_SYMBOL(xilinx_vdma_channel_set_config);
+
+/**
+ * xilinx_vdma_device_control - Configure DMA channel of the device
+ * @dchan: DMA Channel pointer
+ * @cmd: DMA control command
+ * @arg: Channel configuration
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_device_control(struct dma_chan *dchan,
+				      enum dma_ctrl_cmd cmd, unsigned long arg)
+{
+	struct xilinx_vdma_chan *chan = to_xilinx_chan(dchan);
+
+	if (cmd != DMA_TERMINATE_ALL)
+		return -ENXIO;
+
+	xilinx_vdma_terminate_all(chan);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+/**
+ * xilinx_vdma_chan_remove - Per Channel remove function
+ * @chan: Driver specific VDMA channel
+ */
+static void xilinx_vdma_chan_remove(struct xilinx_vdma_chan *chan)
+{
+	/* Disable all interrupts */
+	vdma_ctrl_clr(chan, XILINX_VDMA_REG_DMACR,
+		      XILINX_VDMA_DMAXR_ALL_IRQ_MASK);
+
+	if (chan->irq > 0)
+		free_irq(chan->irq, chan);
+
+	tasklet_kill(&chan->tasklet);
+
+	list_del(&chan->common.device_node);
+}
+
+/**
+ * xilinx_vdma_chan_probe - Per Channel Probing
+ * It get channel features from the device tree entry and
+ * initialize special channel handling routines
+ *
+ * @xdev: Driver specific device structure
+ * @node: Device node
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_chan_probe(struct xilinx_vdma_device *xdev,
+				  struct device_node *node)
+{
+	struct xilinx_vdma_chan *chan;
+	bool has_dre = false;
+	u32 value, width;
+	int err;
+
+	/* Allocate and initialize the channel structure */
+	chan = devm_kzalloc(xdev->dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	chan->dev = xdev->dev;
+	chan->xdev = xdev;
+	chan->has_sg = xdev->has_sg;
+
+	spin_lock_init(&chan->lock);
+	INIT_LIST_HEAD(&chan->pending_list);
+	INIT_LIST_HEAD(&chan->done_list);
+
+	/* Retrieve the channel properties from the device tree */
+	has_dre = of_property_read_bool(node, "xlnx,include-dre");
+
+	chan->genlock = of_property_read_bool(node, "xlnx,genlock-mode");
+
+	err = of_property_read_u32(node, "xlnx,datawidth", &value);
+	if (err) {
+		dev_err(xdev->dev, "missing xlnx,datawidth property\n");
+		return err;
+	}
+	width = value >> 3; /* Convert bits to bytes */
+
+	/* If data width is greater than 8 bytes, DRE is not in hw */
+	if (width > 8)
+		has_dre = false;
+
+	if (!has_dre)
+		xdev->common.copy_align = fls(width - 1);
+
+	if (of_device_is_compatible(node, "xlnx,axi-vdma-mm2s-channel")) {
+		chan->direction = DMA_MEM_TO_DEV;
+		chan->id = 0;
+
+		chan->ctrl_offset = XILINX_VDMA_MM2S_CTRL_OFFSET;
+		chan->desc_offset = XILINX_VDMA_MM2S_DESC_OFFSET;
+
+		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
+		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_MM2S)
+			chan->flush_on_fsync = true;
+	} else if (of_device_is_compatible(node,
+					    "xlnx,axi-vdma-s2mm-channel")) {
+		chan->direction = DMA_DEV_TO_MEM;
+		chan->id = 1;
+
+		chan->ctrl_offset = XILINX_VDMA_S2MM_CTRL_OFFSET;
+		chan->desc_offset = XILINX_VDMA_S2MM_DESC_OFFSET;
+
+		if (xdev->flush_on_fsync == XILINX_VDMA_FLUSH_BOTH ||
+		    xdev->flush_on_fsync == XILINX_VDMA_FLUSH_S2MM)
+			chan->flush_on_fsync = true;
+	} else {
+		dev_err(xdev->dev, "Invalid channel compatible node\n");
+		return -EINVAL;
+	}
+
+	/* Request the interrupt */
+	chan->irq = irq_of_parse_and_map(node, 0);
+	err = request_irq(chan->irq, xilinx_vdma_irq_handler, IRQF_SHARED,
+			  "xilinx-vdma-controller", chan);
+	if (err) {
+		dev_err(xdev->dev, "unable to request IRQ %d\n", chan->irq);
+		return err;
+	}
+
+	/* Initialize the tasklet */
+	tasklet_init(&chan->tasklet, xilinx_vdma_do_tasklet,
+			(unsigned long)chan);
+
+	/*
+	 * Initialize the DMA channel and add it to the DMA engine channels
+	 * list.
+	 */
+	chan->common.device = &xdev->common;
+
+	list_add_tail(&chan->common.device_node, &xdev->common.channels);
+	xdev->chan[chan->id] = chan;
+
+	/* Reset the channel */
+	err = xilinx_vdma_chan_reset(chan);
+	if (err < 0) {
+		dev_err(xdev->dev, "Reset channel failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * of_dma_xilinx_xlate - Translation function
+ * @dma_spec: Pointer to DMA specifier as found in the device tree
+ * @ofdma: Pointer to DMA controller data
+ *
+ * Return: DMA channel pointer on success and NULL on error
+ */
+static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec,
+						struct of_dma *ofdma)
+{
+	struct xilinx_vdma_device *xdev = ofdma->of_dma_data;
+	int chan_id = dma_spec->args[0];
+
+	if (chan_id >= XILINX_VDMA_MAX_CHANS_PER_DEVICE)
+		return NULL;
+
+	return dma_get_slave_channel(&xdev->chan[chan_id]->common);
+}
+
+/**
+ * xilinx_vdma_probe - Driver probe function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: '0' on success and failure value on error
+ */
+static int xilinx_vdma_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct xilinx_vdma_device *xdev;
+	struct device_node *child;
+	struct resource *io;
+	u32 num_frames;
+	int i, err;
+
+	/* Allocate and initialize the DMA engine structure */
+	xdev = devm_kzalloc(&pdev->dev, sizeof(*xdev), GFP_KERNEL);
+	if (!xdev)
+		return -ENOMEM;
+
+	xdev->dev = &pdev->dev;
+
+	/* Request and map I/O memory */
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	xdev->regs = devm_ioremap_resource(&pdev->dev, io);
+	if (IS_ERR(xdev->regs))
+		return PTR_ERR(xdev->regs);
+
+	/* Retrieve the DMA engine properties from the device tree */
+	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
+
+	err = of_property_read_u32(node, "xlnx,num-fstores", &num_frames);
+	if (err < 0) {
+		dev_err(xdev->dev, "missing xlnx,num-fstores property\n");
+		return err;
+	}
+
+	err = of_property_read_u32(node, "xlnx,flush-fsync",
+					&xdev->flush_on_fsync);
+	if (err < 0)
+		dev_warn(xdev->dev, "missing xlnx,flush-fsync property\n");
+
+	/* Initialize the DMA engine */
+	xdev->common.dev = &pdev->dev;
+
+	INIT_LIST_HEAD(&xdev->common.channels);
+	dma_cap_set(DMA_SLAVE, xdev->common.cap_mask);
+	dma_cap_set(DMA_PRIVATE, xdev->common.cap_mask);
+
+	xdev->common.device_alloc_chan_resources =
+				xilinx_vdma_alloc_chan_resources;
+	xdev->common.device_free_chan_resources =
+				xilinx_vdma_free_chan_resources;
+	xdev->common.device_prep_interleaved_dma =
+				xilinx_vdma_dma_prep_interleaved;
+	xdev->common.device_control = xilinx_vdma_device_control;
+	xdev->common.device_tx_status = xilinx_vdma_tx_status;
+	xdev->common.device_issue_pending = xilinx_vdma_issue_pending;
+
+	platform_set_drvdata(pdev, xdev);
+
+	/* Initialize the channels */
+	for_each_child_of_node(node, child) {
+		err = xilinx_vdma_chan_probe(xdev, child);
+		if (err < 0)
+			goto error;
+	}
+
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xdev->chan[i]->num_frms = num_frames;
+
+	/* Register the DMA engine with the core */
+	dma_async_device_register(&xdev->common);
+
+	err = of_dma_controller_register(node, of_dma_xilinx_xlate,
+					 xdev);
+	if (err < 0) {
+		dev_err(&pdev->dev, "Unable to register DMA to DT\n");
+		dma_async_device_unregister(&xdev->common);
+		goto error;
+	}
+
+	dev_info(&pdev->dev, "Xilinx AXI VDMA Engine Driver Probed!!\n");
+
+	return 0;
+
+error:
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xilinx_vdma_chan_remove(xdev->chan[i]);
+
+	return err;
+}
+
+/**
+ * xilinx_vdma_remove - Driver remove function
+ * @pdev: Pointer to the platform_device structure
+ *
+ * Return: Always '0'
+ */
+static int xilinx_vdma_remove(struct platform_device *pdev)
+{
+	struct xilinx_vdma_device *xdev = platform_get_drvdata(pdev);
+	int i;
+
+	of_dma_controller_free(pdev->dev.of_node);
+
+	dma_async_device_unregister(&xdev->common);
+
+	for (i = 0; i < XILINX_VDMA_MAX_CHANS_PER_DEVICE; i++)
+		if (xdev->chan[i])
+			xilinx_vdma_chan_remove(xdev->chan[i]);
+
+	return 0;
+}
+
+static const struct of_device_id xilinx_vdma_of_ids[] = {
+	{ .compatible = "xlnx,axi-vdma-1.00.a",},
+	{}
+};
+
+static struct platform_driver xilinx_vdma_driver = {
+	.driver = {
+		.name = "xilinx-vdma",
+		.owner = THIS_MODULE,
+		.of_match_table = xilinx_vdma_of_ids,
+	},
+	.probe = xilinx_vdma_probe,
+	.remove = xilinx_vdma_remove,
+};
+
+module_platform_driver(xilinx_vdma_driver);
+
+MODULE_AUTHOR("Xilinx, Inc.");
+MODULE_DESCRIPTION("Xilinx VDMA driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/amba/xilinx_dma.h b/include/linux/amba/xilinx_dma.h
new file mode 100644
index 000000000000..34b98f276ed0
--- /dev/null
+++ b/include/linux/amba/xilinx_dma.h
@@ -0,0 +1,47 @@
+/*
+ * Xilinx DMA Engine drivers support header file
+ *
+ * Copyright (C) 2010-2014 Xilinx, Inc. All rights reserved.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __DMA_XILINX_DMA_H
+#define __DMA_XILINX_DMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+
+/**
+ * struct xilinx_vdma_config - VDMA Configuration structure
+ * @frm_dly: Frame delay
+ * @gen_lock: Whether in gen-lock mode
+ * @master: Master that it syncs to
+ * @frm_cnt_en: Enable frame count enable
+ * @park: Whether wants to park
+ * @park_frm: Frame to park on
+ * @coalesc: Interrupt coalescing threshold
+ * @delay: Delay counter
+ * @reset: Reset Channel
+ * @ext_fsync: External Frame Sync source
+ */
+struct xilinx_vdma_config {
+	int frm_dly;
+	int gen_lock;
+	int master;
+	int frm_cnt_en;
+	int park;
+	int park_frm;
+	int coalesc;
+	int delay;
+	int reset;
+	int ext_fsync;
+};
+
+int xilinx_vdma_channel_set_config(struct dma_chan *dchan,
+					struct xilinx_vdma_config *cfg);
+
+#endif
-- 
cgit 


From ccf3356e6b3d2802ea452c0091314605a9e7b7a0 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 3 Apr 2014 13:38:32 -0500
Subject: of/fdt: consolidate built-in dtb section variables

Unify the various architectures __dtb_start and __dtb_end definitions
moving them into of_fdt.h.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Vineet Gupta <vgupta@synopsys.com>
Acked-by: James Hogan <james.hogan@imgtec.com>
Tested-by: Michal Simek <michal.simek@xilinx.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: linux-metag@vger.kernel.org
Cc: linux-mips@linux-mips.org
Cc: linux@lists.openrisc.net
Cc: linux-xtensa@linux-xtensa.org
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 arch/arc/include/asm/sections.h             | 1 -
 arch/metag/kernel/setup.c                   | 4 ----
 arch/mips/include/asm/mips-boards/generic.h | 2 --
 arch/mips/lantiq/prom.h                     | 2 --
 arch/mips/netlogic/xlp/dt.c                 | 2 +-
 arch/mips/ralink/of.c                       | 2 --
 arch/openrisc/kernel/vmlinux.h              | 2 --
 arch/xtensa/kernel/setup.c                  | 1 -
 include/linux/of_fdt.h                      | 3 +++
 9 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/include/asm/sections.h b/arch/arc/include/asm/sections.h
index 764f1e3ba752..09db952e14bd 100644
--- a/arch/arc/include/asm/sections.h
+++ b/arch/arc/include/asm/sections.h
@@ -12,6 +12,5 @@
 #include <asm-generic/sections.h>
 
 extern char __arc_dccm_base[];
-extern char __dtb_start[];
 
 #endif
diff --git a/arch/metag/kernel/setup.c b/arch/metag/kernel/setup.c
index 129c7cdda1ce..31cf53d0eba2 100644
--- a/arch/metag/kernel/setup.c
+++ b/arch/metag/kernel/setup.c
@@ -105,10 +105,6 @@
 
 extern char _heap_start[];
 
-#ifdef CONFIG_METAG_BUILTIN_DTB
-extern u32 __dtb_start[];
-#endif
-
 #ifdef CONFIG_DA_CONSOLE
 /* Our early channel based console driver */
 extern struct console dash_console;
diff --git a/arch/mips/include/asm/mips-boards/generic.h b/arch/mips/include/asm/mips-boards/generic.h
index b969491aa98d..c904c24550f6 100644
--- a/arch/mips/include/asm/mips-boards/generic.h
+++ b/arch/mips/include/asm/mips-boards/generic.h
@@ -67,8 +67,6 @@
 
 extern int mips_revision_sconid;
 
-extern char __dtb_start[];
-
 #ifdef CONFIG_PCI
 extern void mips_pcibios_init(void);
 #else
diff --git a/arch/mips/lantiq/prom.h b/arch/mips/lantiq/prom.h
index 69a4c582338d..bfd2d58c1d69 100644
--- a/arch/mips/lantiq/prom.h
+++ b/arch/mips/lantiq/prom.h
@@ -26,6 +26,4 @@ struct ltq_soc_info {
 extern void ltq_soc_detect(struct ltq_soc_info *i);
 extern void ltq_soc_init(void);
 
-extern char __dtb_start[];
-
 #endif
diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c
index 7f9615a712fb..bdde33147bce 100644
--- a/arch/mips/netlogic/xlp/dt.c
+++ b/arch/mips/netlogic/xlp/dt.c
@@ -42,7 +42,7 @@
 #include <asm/prom.h>
 
 extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[],
-	__dtb_xlp_fvp_begin[], __dtb_xlp_gvp_begin[], __dtb_start[];
+	__dtb_xlp_fvp_begin[], __dtb_xlp_gvp_begin[];
 static void *xlp_fdt_blob;
 
 void __init *xlp_dt_init(void *fdtp)
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 91d7060d5aea..251395210e23 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -28,8 +28,6 @@
 __iomem void *rt_sysc_membase;
 __iomem void *rt_memc_membase;
 
-extern char __dtb_start[];
-
 __iomem void *plat_of_remap_node(const char *node)
 {
 	struct resource res;
diff --git a/arch/openrisc/kernel/vmlinux.h b/arch/openrisc/kernel/vmlinux.h
index 70b9ce41835c..bbcdf21b0b35 100644
--- a/arch/openrisc/kernel/vmlinux.h
+++ b/arch/openrisc/kernel/vmlinux.h
@@ -5,6 +5,4 @@
 extern char __initrd_start, __initrd_end;
 #endif
 
-extern u32 __dtb_start[];
-
 #endif
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 84fe931bb60e..89986e55d594 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -73,7 +73,6 @@ extern int initrd_below_start_ok;
 #endif
 
 #ifdef CONFIG_OF
-extern u32 __dtb_start[];
 void *dtb_start = __dtb_start;
 #endif
 
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index ddd7219af8ac..d4d0efe534b9 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -80,6 +80,9 @@ extern int __initdata dt_root_addr_cells;
 extern int __initdata dt_root_size_cells;
 extern struct boot_param_header *initial_boot_params;
 
+extern char __dtb_start[];
+extern char __dtb_end[];
+
 /* For scanning the flat device-tree at boot time */
 extern char *find_flat_dt_string(u32 offset);
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
-- 
cgit 


From bba04d965d06abbbe10afd3687742389107e198e Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Sat, 29 Mar 2014 14:14:17 -0500
Subject: of/fdt: remove unused of_scan_flat_dt_by_path

of_scan_flat_dt_by_path is unused anywhere in the kernel, so remove it.

Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Michal Simek <michal.simek@xilinx.com>
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 drivers/of/fdt.c       | 67 --------------------------------------------------
 include/linux/of_fdt.h |  3 ---
 2 files changed, 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 63bdcee473fa..9c8535291909 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -671,73 +671,6 @@ struct fdt_scan_status {
 	void *data;
 };
 
-/**
- * fdt_scan_node_by_path - iterator for of_scan_flat_dt_by_path function
- */
-static int __init fdt_scan_node_by_path(unsigned long node, const char *uname,
-					int depth, void *data)
-{
-	struct fdt_scan_status *st = data;
-
-	/*
-	 * if scan at the requested fdt node has been completed,
-	 * return -ENXIO to abort further scanning
-	 */
-	if (depth <= st->depth)
-		return -ENXIO;
-
-	/* requested fdt node has been found, so call iterator function */
-	if (st->found)
-		return st->iterator(node, uname, depth, st->data);
-
-	/* check if scanning automata is entering next level of fdt nodes */
-	if (depth == st->depth + 1 &&
-	    strncmp(st->name, uname, st->namelen) == 0 &&
-	    uname[st->namelen] == 0) {
-		st->depth += 1;
-		if (st->name[st->namelen] == 0) {
-			st->found = 1;
-		} else {
-			const char *next = st->name + st->namelen + 1;
-			st->name = next;
-			st->namelen = strcspn(next, "/");
-		}
-		return 0;
-	}
-
-	/* scan next fdt node */
-	return 0;
-}
-
-/**
- * of_scan_flat_dt_by_path - scan flattened tree blob and call callback on each
- *			     child of the given path.
- * @path: path to start searching for children
- * @it: callback function
- * @data: context data pointer
- *
- * This function is used to scan the flattened device-tree starting from the
- * node given by path. It is used to extract information (like reserved
- * memory), which is required on ealy boot before we can unflatten the tree.
- */
-int __init of_scan_flat_dt_by_path(const char *path,
-	int (*it)(unsigned long node, const char *name, int depth, void *data),
-	void *data)
-{
-	struct fdt_scan_status st = {path, 0, -1, 0, it, data};
-	int ret = 0;
-
-	if (initial_boot_params)
-                ret = of_scan_flat_dt(fdt_scan_node_by_path, &st);
-
-	if (!st.found)
-		return -ENOENT;
-	else if (ret == -ENXIO)	/* scan has been completed */
-		return 0;
-	else
-		return ret;
-}
-
 const char * __init of_flat_dt_get_machine_name(void)
 {
 	const char *name;
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index d4d0efe534b9..991ec74b4e11 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -93,9 +93,6 @@ extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
-extern int of_scan_flat_dt_by_path(const char *path,
-	int (*it)(unsigned long node, const char *name, int depth, void *data),
-	void *data);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
-- 
cgit 


From 9d0c4dfedd96ee54fc075b16d02f82499c8cc3a6 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 1 Apr 2014 23:49:03 -0500
Subject: of/fdt: update of_get_flat_dt_prop in prep for libfdt

Make of_get_flat_dt_prop arguments compatible with libfdt fdt_getprop
call in preparation to convert FDT code to use libfdt. Make the return
value const and the property length ptr type an int.

Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Michal Simek <michal.simek@xilinx.com>
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 arch/arc/kernel/devtree.c              |  2 +-
 arch/arm/kernel/devtree.c              |  2 +-
 arch/arm/mach-exynos/exynos.c          |  2 +-
 arch/arm/mach-vexpress/platsmp.c       |  2 +-
 arch/arm/plat-samsung/s5p-dev-mfc.c    |  4 ++--
 arch/microblaze/kernel/prom.c          |  8 +++----
 arch/powerpc/kernel/epapr_paravirt.c   |  2 +-
 arch/powerpc/kernel/fadump.c           |  4 ++--
 arch/powerpc/kernel/prom.c             | 24 +++++++++++----------
 arch/powerpc/kernel/rtas.c             |  2 +-
 arch/powerpc/mm/hash_utils_64.c        | 22 +++++++++----------
 arch/powerpc/platforms/52xx/efika.c    |  4 ++--
 arch/powerpc/platforms/chrp/setup.c    |  4 ++--
 arch/powerpc/platforms/powernv/opal.c  | 12 +++++------
 arch/powerpc/platforms/pseries/setup.c |  4 ++--
 arch/xtensa/kernel/setup.c             |  2 +-
 drivers/of/fdt.c                       | 39 +++++++++++++++++-----------------
 drivers/of/of_reserved_mem.c           |  4 ++--
 include/linux/of_fdt.h                 |  8 +++----
 19 files changed, 77 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c
index b6dc4e21fd32..0b3ef4025d89 100644
--- a/arch/arc/kernel/devtree.c
+++ b/arch/arc/kernel/devtree.c
@@ -42,7 +42,7 @@ const struct machine_desc * __init setup_machine_fdt(void *dt)
 	const struct machine_desc *mdesc;
 	unsigned long dt_root;
 	void *clk;
-	unsigned long len;
+	int len;
 
 	if (!early_init_dt_scan(dt))
 		return NULL;
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index dff9cc0e9bd6..38f4711b4995 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -247,7 +247,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
 
 	if (!mdesc) {
 		const char *prop;
-		long size;
+		int size;
 		unsigned long dt_root;
 
 		early_print("\nError: unrecognized/unsupported "
diff --git a/arch/arm/mach-exynos/exynos.c b/arch/arm/mach-exynos/exynos.c
index b32a907d021d..77293d39dfc9 100644
--- a/arch/arm/mach-exynos/exynos.c
+++ b/arch/arm/mach-exynos/exynos.c
@@ -250,7 +250,7 @@ static int __init exynos_fdt_map_chipid(unsigned long node, const char *uname,
 {
 	struct map_desc iodesc;
 	__be32 *reg;
-	unsigned long len;
+	int len;
 
 	if (!of_flat_dt_is_compatible(node, "samsung,exynos4210-chipid") &&
 		!of_flat_dt_is_compatible(node, "samsung,exynos5440-clock"))
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index 993c9ae5dc5e..b4a5f0d8390d 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -53,7 +53,7 @@ static int __init vexpress_dt_find_scu(unsigned long node,
 {
 	if (of_flat_dt_match(node, vexpress_dt_cortex_a9_match)) {
 		phys_addr_t phys_addr;
-		__be32 *reg = of_get_flat_dt_prop(node, "reg", NULL);
+		const __be32 *reg = of_get_flat_dt_prop(node, "reg", NULL);
 
 		if (WARN_ON(!reg))
 			return -EINVAL;
diff --git a/arch/arm/plat-samsung/s5p-dev-mfc.c b/arch/arm/plat-samsung/s5p-dev-mfc.c
index 98087b655df0..469b86260fe3 100644
--- a/arch/arm/plat-samsung/s5p-dev-mfc.c
+++ b/arch/arm/plat-samsung/s5p-dev-mfc.c
@@ -125,8 +125,8 @@ device_initcall(s5p_mfc_memory_init);
 int __init s5p_fdt_alloc_mfc_mem(unsigned long node, const char *uname,
 				int depth, void *data)
 {
-	__be32 *prop;
-	unsigned long len;
+	const __be32 *prop;
+	int len;
 	struct s5p_mfc_dt_meminfo mfc_mem;
 
 	if (!data)
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index abdfb10e7eca..c76630603058 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -43,13 +43,13 @@
 #include <asm/pci-bridge.h>
 
 #ifdef CONFIG_EARLY_PRINTK
-static char *stdout;
+static const char *stdout;
 
 static int __init early_init_dt_scan_chosen_serial(unsigned long node,
 				const char *uname, int depth, void *data)
 {
-	unsigned long l;
-	char *p;
+	int l;
+	const char *p;
 
 	pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname);
 
@@ -80,7 +80,7 @@ static int __init early_init_dt_scan_chosen_serial(unsigned long node,
 				(strncmp(p, "xlnx,opb-uartlite", 17) == 0) ||
 				(strncmp(p, "xlnx,axi-uartlite", 17) == 0) ||
 				(strncmp(p, "xlnx,mdm", 8) == 0)) {
-			unsigned int *addrp;
+			const unsigned int *addrp;
 
 			*(u32 *)data = UARTLITE;
 
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 7898be90f2dc..d64e92b22dd8 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -36,7 +36,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
 					   int depth, void *data)
 {
 	const u32 *insts;
-	unsigned long len;
+	int len;
 	int i;
 
 	insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2230fd0ca3e4..7213d930918d 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -55,9 +55,9 @@ int crash_mem_ranges;
 int __init early_init_dt_scan_fw_dump(unsigned long node,
 			const char *uname, int depth, void *data)
 {
-	__be32 *sections;
+	const __be32 *sections;
 	int i, num_sections;
-	unsigned long size;
+	int size;
 	const int *token;
 
 	if (depth != 1 || strcmp(uname, "rtas") != 0)
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index d65754935652..483273e5c3e0 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -163,7 +163,7 @@ static struct ibm_pa_feature {
 	{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
 };
 
-static void __init scan_features(unsigned long node, unsigned char *ftrs,
+static void __init scan_features(unsigned long node, const unsigned char *ftrs,
 				 unsigned long tablelen,
 				 struct ibm_pa_feature *fp,
 				 unsigned long ft_size)
@@ -202,8 +202,8 @@ static void __init scan_features(unsigned long node, unsigned char *ftrs,
 
 static void __init check_cpu_pa_features(unsigned long node)
 {
-	unsigned char *pa_ftrs;
-	unsigned long tablelen;
+	const unsigned char *pa_ftrs;
+	int tablelen;
 
 	pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
 	if (pa_ftrs == NULL)
@@ -216,7 +216,7 @@ static void __init check_cpu_pa_features(unsigned long node)
 #ifdef CONFIG_PPC_STD_MMU_64
 static void __init check_cpu_slb_size(unsigned long node)
 {
-	__be32 *slb_size_ptr;
+	const __be32 *slb_size_ptr;
 
 	slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
 	if (slb_size_ptr != NULL) {
@@ -257,7 +257,7 @@ static struct feature_property {
 static inline void identical_pvr_fixup(unsigned long node)
 {
 	unsigned int pvr;
-	char *model = of_get_flat_dt_prop(node, "model", NULL);
+	const char *model = of_get_flat_dt_prop(node, "model", NULL);
 
 	/*
 	 * Since 440GR(x)/440EP(x) processors have the same pvr,
@@ -295,11 +295,11 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
 	const __be32 *prop;
 	const __be32 *intserv;
 	int i, nthreads;
-	unsigned long len;
+	int len;
 	int found = -1;
 	int found_thread = 0;
 
@@ -392,7 +392,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
 					 int depth, void *data)
 {
-	unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
+	const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
 
 	/* Use common scan routine to determine if this is the chosen node */
 	if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
@@ -443,8 +443,9 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
  */
 static int __init early_init_dt_scan_drconf_memory(unsigned long node)
 {
-	__be32 *dm, *ls, *usm;
-	unsigned long l, n, flags;
+	const __be32 *dm, *ls, *usm;
+	int l;
+	unsigned long n, flags;
 	u64 base, size, memblock_size;
 	unsigned int is_kexec_kdump = 0, rngs;
 
@@ -564,7 +565,8 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 
 static void __init early_reserve_mem_dt(void)
 {
-	unsigned long i, len, dt_root;
+	unsigned long i, dt_root;
+	int len;
 	const __be32 *prop;
 
 	early_init_fdt_scan_reserved_mem();
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8cd5ed049b5d..8b4c857c1421 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1142,7 +1142,7 @@ void __init rtas_initialize(void)
 int __init early_init_dt_scan_rtas(unsigned long node,
 		const char *uname, int depth, void *data)
 {
-	u32 *basep, *entryp, *sizep;
+	const u32 *basep, *entryp, *sizep;
 
 	if (depth != 1 || strcmp(uname, "rtas") != 0)
 		return 0;
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index d766d6ee33fe..59cc19a23a7a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -265,9 +265,9 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
 					 const char *uname, int depth,
 					 void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *prop;
-	unsigned long size = 0;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -320,9 +320,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 					  const char *uname, int depth,
 					  void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *prop;
-	unsigned long size = 0;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int size = 0;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
@@ -402,9 +402,9 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
 static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
 					const char *uname, int depth,
 					void *data) {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be64 *addr_prop;
-	__be32 *page_count_prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be64 *addr_prop;
+	const __be32 *page_count_prop;
 	unsigned int expected_pages;
 	long unsigned int phys_addr;
 	long unsigned int block_size;
@@ -546,8 +546,8 @@ static int __init htab_dt_scan_pftsize(unsigned long node,
 				       const char *uname, int depth,
 				       void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *prop;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
 
 	/* We are scanning "cpu" nodes only */
 	if (type == NULL || strcmp(type, "cpu") != 0)
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 18c104820198..6e19b0ad5d26 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -199,8 +199,8 @@ static void __init efika_setup_arch(void)
 
 static int __init efika_probe(void)
 {
-	char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
-					  "model", NULL);
+	const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"model", NULL);
 
 	if (model == NULL)
 		return 0;
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index c665d7de6c99..7044fd36197b 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -574,8 +574,8 @@ chrp_init2(void)
 
 static int __init chrp_probe(void)
 {
- 	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
- 					  "device_type", NULL);
+	const char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
+						"device_type", NULL);
  	if (dtype == NULL)
  		return 0;
  	if (strcmp(dtype, "chrp"))
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 49d2f00019e5..c1329846bfa3 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -61,7 +61,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
 				   const char *uname, int depth, void *data)
 {
 	const void *basep, *entryp, *sizep;
-	unsigned long basesz, entrysz, runtimesz;
+	int basesz, entrysz, runtimesz;
 
 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
 		return 0;
@@ -77,11 +77,11 @@ int __init early_init_dt_scan_opal(unsigned long node,
 	opal.entry = of_read_number(entryp, entrysz/4);
 	opal.size = of_read_number(sizep, runtimesz/4);
 
-	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%ld)\n",
+	pr_debug("OPAL Base  = 0x%llx (basep=%p basesz=%d)\n",
 		 opal.base, basep, basesz);
-	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%ld)\n",
+	pr_debug("OPAL Entry = 0x%llx (entryp=%p basesz=%d)\n",
 		 opal.entry, entryp, entrysz);
-	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%ld)\n",
+	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
 		 opal.size, sizep, runtimesz);
 
 	powerpc_firmware_features |= FW_FEATURE_OPAL;
@@ -102,7 +102,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
 int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 				   const char *uname, int depth, void *data)
 {
-	unsigned long i, psize, size;
+	int i, psize, size;
 	const __be32 *prop;
 
 	if (depth != 1 || strcmp(uname, "ibm,opal") != 0)
@@ -359,7 +359,7 @@ int opal_get_chars(uint32_t vtermno, char *buf, int count)
 	if ((be64_to_cpu(evt) & OPAL_EVENT_CONSOLE_INPUT) == 0)
 		return 0;
 	len = cpu_to_be64(count);
-	rc = opal_console_read(vtermno, &len, buf);	
+	rc = opal_console_read(vtermno, &len, buf);
 	if (rc == OPAL_SUCCESS)
 		return be64_to_cpu(len);
 	return 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 2db8cc691bf4..099d2df976a2 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -665,7 +665,7 @@ static int __init pseries_probe_fw_features(unsigned long node,
 					    void *data)
 {
 	const char *prop;
-	unsigned long len;
+	int len;
 	static int hypertas_found;
 	static int vec5_found;
 
@@ -698,7 +698,7 @@ static int __init pseries_probe_fw_features(unsigned long node,
 static int __init pSeries_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();
- 	char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
+	const char *dtype = of_get_flat_dt_prop(root, "device_type", NULL);
 
  	if (dtype == NULL)
  		return 0;
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 89986e55d594..1991a3d0b2f8 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -220,7 +220,7 @@ static int __init xtensa_dt_io_area(unsigned long node, const char *uname,
 		int depth, void *data)
 {
 	const __be32 *ranges;
-	unsigned long len;
+	int len;
 
 	if (depth > 1)
 		return 0;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 9c8535291909..1d1582bb81fb 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -35,7 +35,7 @@ char *of_fdt_get_string(struct boot_param_header *blob, u32 offset)
  */
 void *of_fdt_get_property(struct boot_param_header *blob,
 		       unsigned long node, const char *name,
-		       unsigned long *size)
+		       int *size)
 {
 	unsigned long p = node;
 
@@ -85,7 +85,8 @@ int of_fdt_is_compatible(struct boot_param_header *blob,
 		      unsigned long node, const char *compat)
 {
 	const char *cp;
-	unsigned long cplen, l, score = 0;
+	int cplen;
+	unsigned long l, score = 0;
 
 	cp = of_fdt_get_property(blob, node, "compatible", &cplen);
 	if (cp == NULL)
@@ -444,8 +445,8 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
 {
 	int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
 	phys_addr_t base, size;
-	unsigned long len;
-	__be32 *prop;
+	int len;
+	const __be32 *prop;
 	int nomap, first = 1;
 
 	prop = of_get_flat_dt_prop(node, "reg", &len);
@@ -488,7 +489,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
  */
 static int __init __reserved_mem_check_root(unsigned long node)
 {
-	__be32 *prop;
+	const __be32 *prop;
 
 	prop = of_get_flat_dt_prop(node, "#size-cells", NULL);
 	if (!prop || be32_to_cpup(prop) != dt_root_size_cells)
@@ -638,8 +639,8 @@ unsigned long __init of_get_flat_dt_root(void)
  * This function can be used within scan_flattened_dt callback to get
  * access to properties
  */
-void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size)
+const void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
+				       int *size)
 {
 	return of_fdt_get_property(initial_boot_params, node, name, size);
 }
@@ -710,7 +711,7 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
 	}
 	if (!best_data) {
 		const char *prop;
-		long size;
+		int size;
 
 		pr_err("\n unrecognized device tree list:\n[ ");
 
@@ -739,8 +740,8 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
 static void __init early_init_dt_check_for_initrd(unsigned long node)
 {
 	u64 start, end;
-	unsigned long len;
-	__be32 *prop;
+	int len;
+	const __be32 *prop;
 
 	pr_debug("Looking for initrd properties... ");
 
@@ -773,7 +774,7 @@ static inline void early_init_dt_check_for_initrd(unsigned long node)
 int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 				   int depth, void *data)
 {
-	__be32 *prop;
+	const __be32 *prop;
 
 	if (depth != 0)
 		return 0;
@@ -795,9 +796,9 @@ int __init early_init_dt_scan_root(unsigned long node, const char *uname,
 	return 1;
 }
 
-u64 __init dt_mem_next_cell(int s, __be32 **cellp)
+u64 __init dt_mem_next_cell(int s, const __be32 **cellp)
 {
-	__be32 *p = *cellp;
+	const __be32 *p = *cellp;
 
 	*cellp = p + s;
 	return of_read_number(p, s);
@@ -809,9 +810,9 @@ u64 __init dt_mem_next_cell(int s, __be32 **cellp)
 int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
-	char *type = of_get_flat_dt_prop(node, "device_type", NULL);
-	__be32 *reg, *endp;
-	unsigned long l;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *reg, *endp;
+	int l;
 
 	/* We are scanning "memory" nodes only */
 	if (type == NULL) {
@@ -832,7 +833,7 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 
 	endp = reg + (l / sizeof(__be32));
 
-	pr_debug("memory scan node %s, reg size %ld, data: %x %x %x %x,\n",
+	pr_debug("memory scan node %s, reg size %d, data: %x %x %x %x,\n",
 	    uname, l, reg[0], reg[1], reg[2], reg[3]);
 
 	while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
@@ -855,8 +856,8 @@ int __init early_init_dt_scan_memory(unsigned long node, const char *uname,
 int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data)
 {
-	unsigned long l;
-	char *p;
+	int l;
+	const char *p;
 
 	pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname);
 
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index daaaf935911d..e420eb52e5c9 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -95,8 +95,8 @@ static int __init __reserved_mem_alloc_size(unsigned long node,
 	int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
 	phys_addr_t start = 0, end = 0;
 	phys_addr_t base = 0, align = 0, size;
-	unsigned long len;
-	__be32 *prop;
+	int len;
+	const __be32 *prop;
 	int nomap;
 	int ret;
 
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 991ec74b4e11..b36a50d6af37 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -66,7 +66,7 @@ extern char *of_fdt_get_string(struct boot_param_header *blob, u32 offset);
 extern void *of_fdt_get_property(struct boot_param_header *blob,
 				 unsigned long node,
 				 const char *name,
-				 unsigned long *size);
+				 int *size);
 extern int of_fdt_is_compatible(struct boot_param_header *blob,
 				unsigned long node,
 				const char *compat);
@@ -88,8 +88,8 @@ extern char *find_flat_dt_string(u32 offset);
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 				     int depth, void *data),
 			   void *data);
-extern void *of_get_flat_dt_prop(unsigned long node, const char *name,
-				 unsigned long *size);
+extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
+				       int *size);
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
@@ -103,7 +103,7 @@ extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern int early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size,
 					     bool no_map);
 extern void * early_init_dt_alloc_memory_arch(u64 size, u64 align);
-extern u64 dt_mem_next_cell(int s, __be32 **cellp);
+extern u64 dt_mem_next_cell(int s, const __be32 **cellp);
 
 /* Early flat tree scan hooks */
 extern int early_init_dt_scan_root(unsigned long node, const char *uname,
-- 
cgit 


From e6a6928c3ea1d0195ed75a091e345696b916c09b Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 2 Apr 2014 15:10:14 -0500
Subject: of/fdt: Convert FDT functions to use libfdt

The kernel FDT functions predate libfdt and are much more limited in
functionality. Also, the kernel functions and libfdt functions are
not compatible with each other because they have different definitions
of node offsets. To avoid this incompatibility and in preparation to
add more FDT parsing functions which will need libfdt, let's first
convert the existing code to use libfdt.

The FDT unflattening, top-level FDT scanning, and property retrieval
functions are converted to use libfdt. The scanning code should be
re-worked to be more efficient and understandable by using libfdt to
find nodes directly by path or compatible strings.

Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Michal Simek <michal.simek@xilinx.com>
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 drivers/of/Kconfig     |   1 +
 drivers/of/Makefile    |   2 +
 drivers/of/fdt.c       | 209 ++++++++++++++-----------------------------------
 include/linux/of_fdt.h |   1 -
 4 files changed, 60 insertions(+), 153 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 889005fa4d04..2dcb0541012d 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -20,6 +20,7 @@ config OF_SELFTEST
 config OF_FLATTREE
 	bool
 	select DTC
+	select LIBFDT
 
 config OF_EARLY_FLATTREE
 	bool
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index ed9660adad77..9891232f999e 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -10,3 +10,5 @@ obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
+
+CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 1d1582bb81fb..8e820a2b106d 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -19,58 +19,11 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/libfdt.h>
 
 #include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
 #include <asm/page.h>
 
-char *of_fdt_get_string(struct boot_param_header *blob, u32 offset)
-{
-	return ((char *)blob) +
-		be32_to_cpu(blob->off_dt_strings) + offset;
-}
-
-/**
- * of_fdt_get_property - Given a node in the given flat blob, return
- * the property ptr
- */
-void *of_fdt_get_property(struct boot_param_header *blob,
-		       unsigned long node, const char *name,
-		       int *size)
-{
-	unsigned long p = node;
-
-	do {
-		u32 tag = be32_to_cpup((__be32 *)p);
-		u32 sz, noff;
-		const char *nstr;
-
-		p += 4;
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag != OF_DT_PROP)
-			return NULL;
-
-		sz = be32_to_cpup((__be32 *)p);
-		noff = be32_to_cpup((__be32 *)(p + 4));
-		p += 8;
-		if (be32_to_cpu(blob->version) < 0x10)
-			p = ALIGN(p, sz >= 8 ? 8 : 4);
-
-		nstr = of_fdt_get_string(blob, noff);
-		if (nstr == NULL) {
-			pr_warning("Can't find property index name !\n");
-			return NULL;
-		}
-		if (strcmp(name, nstr) == 0) {
-			if (size)
-				*size = sz;
-			return (void *)p;
-		}
-		p += sz;
-		p = ALIGN(p, 4);
-	} while (1);
-}
-
 /**
  * of_fdt_is_compatible - Return true if given node from the given blob has
  * compat in its compatible list
@@ -88,7 +41,7 @@ int of_fdt_is_compatible(struct boot_param_header *blob,
 	int cplen;
 	unsigned long l, score = 0;
 
-	cp = of_fdt_get_property(blob, node, "compatible", &cplen);
+	cp = fdt_getprop(blob, node, "compatible", &cplen);
 	if (cp == NULL)
 		return 0;
 	while (cplen > 0) {
@@ -147,28 +100,27 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size,
  */
 static void * unflatten_dt_node(struct boot_param_header *blob,
 				void *mem,
-				void **p,
+				int *poffset,
 				struct device_node *dad,
 				struct device_node ***allnextpp,
 				unsigned long fpsize)
 {
+	const __be32 *p;
 	struct device_node *np;
 	struct property *pp, **prev_pp = NULL;
-	char *pathp;
-	u32 tag;
+	const char *pathp;
 	unsigned int l, allocl;
+	static int depth = 0;
+	int old_depth;
+	int offset;
 	int has_name = 0;
 	int new_format = 0;
 
-	tag = be32_to_cpup(*p);
-	if (tag != OF_DT_BEGIN_NODE) {
-		pr_err("Weird tag at start of node: %x\n", tag);
+	pathp = fdt_get_name(blob, *poffset, &l);
+	if (!pathp)
 		return mem;
-	}
-	*p += 4;
-	pathp = *p;
-	l = allocl = strlen(pathp) + 1;
-	*p = PTR_ALIGN(*p + l, 4);
+
+	allocl = l++;
 
 	/* version 0x10 has a more compact unit name here instead of the full
 	 * path. we accumulate the full path size using "fpsize", we'll rebuild
@@ -186,7 +138,7 @@ static void * unflatten_dt_node(struct boot_param_header *blob,
 			fpsize = 1;
 			allocl = 2;
 			l = 1;
-			*pathp = '\0';
+			pathp = "";
 		} else {
 			/* account for '/' and path size minus terminal 0
 			 * already in 'l'
@@ -233,32 +185,23 @@ static void * unflatten_dt_node(struct boot_param_header *blob,
 		}
 	}
 	/* process properties */
-	while (1) {
-		u32 sz, noff;
-		char *pname;
-
-		tag = be32_to_cpup(*p);
-		if (tag == OF_DT_NOP) {
-			*p += 4;
-			continue;
-		}
-		if (tag != OF_DT_PROP)
+	for (offset = fdt_first_property_offset(blob, *poffset);
+	     (offset >= 0);
+	     (offset = fdt_next_property_offset(blob, offset))) {
+		const char *pname;
+		u32 sz;
+
+		if (!(p = fdt_getprop_by_offset(blob, offset, &pname, &sz))) {
+			offset = -FDT_ERR_INTERNAL;
 			break;
-		*p += 4;
-		sz = be32_to_cpup(*p);
-		noff = be32_to_cpup(*p + 4);
-		*p += 8;
-		if (be32_to_cpu(blob->version) < 0x10)
-			*p = PTR_ALIGN(*p, sz >= 8 ? 8 : 4);
-
-		pname = of_fdt_get_string(blob, noff);
+		}
+
 		if (pname == NULL) {
 			pr_info("Can't find property name in list !\n");
 			break;
 		}
 		if (strcmp(pname, "name") == 0)
 			has_name = 1;
-		l = strlen(pname) + 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
 					__alignof__(struct property));
 		if (allnextpp) {
@@ -270,26 +213,25 @@ static void * unflatten_dt_node(struct boot_param_header *blob,
 			if ((strcmp(pname, "phandle") == 0) ||
 			    (strcmp(pname, "linux,phandle") == 0)) {
 				if (np->phandle == 0)
-					np->phandle = be32_to_cpup((__be32*)*p);
+					np->phandle = be32_to_cpup(p);
 			}
 			/* And we process the "ibm,phandle" property
 			 * used in pSeries dynamic device tree
 			 * stuff */
 			if (strcmp(pname, "ibm,phandle") == 0)
-				np->phandle = be32_to_cpup((__be32 *)*p);
-			pp->name = pname;
+				np->phandle = be32_to_cpup(p);
+			pp->name = (char *)pname;
 			pp->length = sz;
-			pp->value = *p;
+			pp->value = (__be32 *)p;
 			*prev_pp = pp;
 			prev_pp = &pp->next;
 		}
-		*p = PTR_ALIGN((*p) + sz, 4);
 	}
 	/* with version 0x10 we may not have the name property, recreate
 	 * it here from the unit name if absent
 	 */
 	if (!has_name) {
-		char *p1 = pathp, *ps = pathp, *pa = NULL;
+		const char *p1 = pathp, *ps = pathp, *pa = NULL;
 		int sz;
 
 		while (*p1) {
@@ -326,19 +268,18 @@ static void * unflatten_dt_node(struct boot_param_header *blob,
 		if (!np->type)
 			np->type = "<NULL>";
 	}
-	while (tag == OF_DT_BEGIN_NODE || tag == OF_DT_NOP) {
-		if (tag == OF_DT_NOP)
-			*p += 4;
-		else
-			mem = unflatten_dt_node(blob, mem, p, np, allnextpp,
-						fpsize);
-		tag = be32_to_cpup(*p);
-	}
-	if (tag != OF_DT_END_NODE) {
-		pr_err("Weird tag at end of node: %x\n", tag);
-		return mem;
-	}
-	*p += 4;
+
+	old_depth = depth;
+	*poffset = fdt_next_node(blob, *poffset, &depth);
+	if (depth < 0)
+		depth = 0;
+	while (*poffset > 0 && depth > old_depth)
+		mem = unflatten_dt_node(blob, mem, poffset, np, allnextpp,
+					fpsize);
+
+	if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
+		pr_err("unflatten: error %d processing FDT\n", *poffset);
+
 	return mem;
 }
 
@@ -359,7 +300,8 @@ static void __unflatten_device_tree(struct boot_param_header *blob,
 			     void * (*dt_alloc)(u64 size, u64 align))
 {
 	unsigned long size;
-	void *start, *mem;
+	int start;
+	void *mem;
 	struct device_node **allnextp = mynodes;
 
 	pr_debug(" -> unflatten_device_tree()\n");
@@ -380,7 +322,7 @@ static void __unflatten_device_tree(struct boot_param_header *blob,
 	}
 
 	/* First pass, scan for size */
-	start = ((void *)blob) + be32_to_cpu(blob->off_dt_struct);
+	start = 0;
 	size = (unsigned long)unflatten_dt_node(blob, 0, &start, NULL, NULL, 0);
 	size = ALIGN(size, 4);
 
@@ -395,10 +337,8 @@ static void __unflatten_device_tree(struct boot_param_header *blob,
 	pr_debug("  unflattening %p...\n", mem);
 
 	/* Second pass, do actual unflattening */
-	start = ((void *)blob) + be32_to_cpu(blob->off_dt_struct);
+	start = 0;
 	unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
-	if (be32_to_cpup(start) != OF_DT_END)
-		pr_warning("Weird tag at end of tree: %08x\n", be32_to_cpup(start));
 	if (be32_to_cpup(mem + size) != 0xdeadbeef)
 		pr_warning("End of tree marker overwritten: %08x\n",
 			   be32_to_cpup(mem + size));
@@ -574,47 +514,19 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
 				     void *data),
 			   void *data)
 {
-	unsigned long p = ((unsigned long)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_struct);
-	int rc = 0;
-	int depth = -1;
-
-	do {
-		u32 tag = be32_to_cpup((__be32 *)p);
-		const char *pathp;
-
-		p += 4;
-		if (tag == OF_DT_END_NODE) {
-			depth--;
-			continue;
-		}
-		if (tag == OF_DT_NOP)
-			continue;
-		if (tag == OF_DT_END)
-			break;
-		if (tag == OF_DT_PROP) {
-			u32 sz = be32_to_cpup((__be32 *)p);
-			p += 8;
-			if (be32_to_cpu(initial_boot_params->version) < 0x10)
-				p = ALIGN(p, sz >= 8 ? 8 : 4);
-			p += sz;
-			p = ALIGN(p, 4);
-			continue;
-		}
-		if (tag != OF_DT_BEGIN_NODE) {
-			pr_err("Invalid tag %x in flat device tree!\n", tag);
-			return -EINVAL;
-		}
-		depth++;
-		pathp = (char *)p;
-		p = ALIGN(p + strlen(pathp) + 1, 4);
+	const void *blob = initial_boot_params;
+	const char *pathp;
+	int offset, rc = 0, depth = -1;
+
+        for (offset = fdt_next_node(blob, -1, &depth);
+             offset >= 0 && depth >= 0 && !rc;
+             offset = fdt_next_node(blob, offset, &depth)) {
+
+		pathp = fdt_get_name(blob, offset, NULL);
 		if (*pathp == '/')
 			pathp = kbasename(pathp);
-		rc = it(p, pathp, depth, data);
-		if (rc != 0)
-			break;
-	} while (1);
-
+		rc = it(offset, pathp, depth, data);
+	}
 	return rc;
 }
 
@@ -623,14 +535,7 @@ int __init of_scan_flat_dt(int (*it)(unsigned long node,
  */
 unsigned long __init of_get_flat_dt_root(void)
 {
-	unsigned long p = ((unsigned long)initial_boot_params) +
-		be32_to_cpu(initial_boot_params->off_dt_struct);
-
-	while (be32_to_cpup((__be32 *)p) == OF_DT_NOP)
-		p += 4;
-	BUG_ON(be32_to_cpup((__be32 *)p) != OF_DT_BEGIN_NODE);
-	p += 4;
-	return ALIGN(p + strlen((char *)p) + 1, 4);
+	return 0;
 }
 
 /**
@@ -642,7 +547,7 @@ unsigned long __init of_get_flat_dt_root(void)
 const void *__init of_get_flat_dt_prop(unsigned long node, const char *name,
 				       int *size)
 {
-	return of_fdt_get_property(initial_boot_params, node, name, size);
+	return fdt_getprop(initial_boot_params, node, name, size);
 }
 
 /**
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index b36a50d6af37..26cef9ac55c5 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -84,7 +84,6 @@ extern char __dtb_start[];
 extern char __dtb_end[];
 
 /* For scanning the flat device-tree at boot time */
-extern char *find_flat_dt_string(u32 offset);
 extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname,
 				     int depth, void *data),
 			   void *data);
-- 
cgit 


From c972de14971f1482ab482f0a7abc85679a23326a Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 1 Apr 2014 22:48:01 -0500
Subject: of/fdt: use libfdt accessors for header data

With libfdt support, we can take advantage of helper accessors in libfdt
for accessing the FDT header data. This makes the code more readable and
makes the FDT blob structure more opaque to the kernel. This also
prepares for removing struct boot_param_header completely.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Tested-by: Michal Simek <michal.simek@xilinx.com>
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 drivers/of/fdt.c       | 26 ++++++++++++--------------
 include/linux/of_fdt.h |  8 ++++----
 2 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 8e820a2b106d..0b38a6aa8603 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -34,7 +34,7 @@
  * On match, returns a non-zero value with smaller values returned for more
  * specific compatible values.
  */
-int of_fdt_is_compatible(struct boot_param_header *blob,
+int of_fdt_is_compatible(const void *blob,
 		      unsigned long node, const char *compat)
 {
 	const char *cp;
@@ -59,7 +59,7 @@ int of_fdt_is_compatible(struct boot_param_header *blob,
 /**
  * of_fdt_match - Return true if node matches a list of compatible values
  */
-int of_fdt_match(struct boot_param_header *blob, unsigned long node,
+int of_fdt_match(const void *blob, unsigned long node,
                  const char *const *compat)
 {
 	unsigned int tmp, score = 0;
@@ -98,7 +98,7 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size,
  * @allnextpp: pointer to ->allnext from last allocated device_node
  * @fpsize: Size of the node path up at the current depth.
  */
-static void * unflatten_dt_node(struct boot_param_header *blob,
+static void * unflatten_dt_node(void *blob,
 				void *mem,
 				int *poffset,
 				struct device_node *dad,
@@ -295,7 +295,7 @@ static void * unflatten_dt_node(struct boot_param_header *blob,
  * @dt_alloc: An allocator that provides a virtual address to memory
  * for the resulting tree
  */
-static void __unflatten_device_tree(struct boot_param_header *blob,
+static void __unflatten_device_tree(void *blob,
 			     struct device_node **mynodes,
 			     void * (*dt_alloc)(u64 size, u64 align))
 {
@@ -312,11 +312,11 @@ static void __unflatten_device_tree(struct boot_param_header *blob,
 	}
 
 	pr_debug("Unflattening device tree:\n");
-	pr_debug("magic: %08x\n", be32_to_cpu(blob->magic));
-	pr_debug("size: %08x\n", be32_to_cpu(blob->totalsize));
-	pr_debug("version: %08x\n", be32_to_cpu(blob->version));
+	pr_debug("magic: %08x\n", fdt_magic(blob));
+	pr_debug("size: %08x\n", fdt_totalsize(blob));
+	pr_debug("version: %08x\n", fdt_version(blob));
 
-	if (be32_to_cpu(blob->magic) != OF_DT_HEADER) {
+	if (fdt_check_header(blob)) {
 		pr_err("Invalid device tree blob header\n");
 		return;
 	}
@@ -363,9 +363,7 @@ static void *kernel_tree_alloc(u64 size, u64 align)
 void of_fdt_unflatten_tree(unsigned long *blob,
 			struct device_node **mynodes)
 {
-	struct boot_param_header *device_tree =
-		(struct boot_param_header *)blob;
-	__unflatten_device_tree(device_tree, mynodes, &kernel_tree_alloc);
+	__unflatten_device_tree(blob, mynodes, &kernel_tree_alloc);
 }
 EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
 
@@ -852,7 +850,7 @@ bool __init early_init_dt_scan(void *params)
 	initial_boot_params = params;
 
 	/* check device tree validity */
-	if (be32_to_cpu(initial_boot_params->magic) != OF_DT_HEADER) {
+	if (fdt_check_header(params)) {
 		initial_boot_params = NULL;
 		return false;
 	}
@@ -907,9 +905,9 @@ void __init unflatten_and_copy_device_tree(void)
 		return;
 	}
 
-	size = __be32_to_cpu(initial_boot_params->totalsize);
+	size = fdt_totalsize(initial_boot_params);
 	dt = early_init_dt_alloc_memory_arch(size,
-		__alignof__(struct boot_param_header));
+					     roundup_pow_of_two(FDT_V17_SIZE));
 
 	if (dt) {
 		memcpy(dt, initial_boot_params, size);
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 26cef9ac55c5..348dae2c8a3c 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -62,15 +62,15 @@ struct boot_param_header {
 struct device_node;
 
 /* For scanning an arbitrary device-tree at any time */
-extern char *of_fdt_get_string(struct boot_param_header *blob, u32 offset);
-extern void *of_fdt_get_property(struct boot_param_header *blob,
+extern char *of_fdt_get_string(const void *blob, u32 offset);
+extern void *of_fdt_get_property(const void *blob,
 				 unsigned long node,
 				 const char *name,
 				 int *size);
-extern int of_fdt_is_compatible(struct boot_param_header *blob,
+extern int of_fdt_is_compatible(const void *blob,
 				unsigned long node,
 				const char *compat);
-extern int of_fdt_match(struct boot_param_header *blob, unsigned long node,
+extern int of_fdt_match(const void *blob, unsigned long node,
 			const char *const *compat);
 extern void of_fdt_unflatten_tree(unsigned long *blob,
 			       struct device_node **mynodes);
-- 
cgit 


From c0556d3f2c3f42eaed049139ce6f0899ecdb0217 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 22 Apr 2014 12:55:10 -0500
Subject: of/fdt: introduce of_get_flat_dt_size

Add a wrapper function to retrieve the FDT size from the FDT header. This
is primarily to avoid libfdt include paths for the whole kernel.

Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 drivers/of/fdt.c       | 8 ++++++++
 include/linux/of_fdt.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d9e64504cda0..358bcf0500d2 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -553,6 +553,14 @@ unsigned long __init of_get_flat_dt_root(void)
 	return 0;
 }
 
+/**
+ * of_get_flat_dt_size - Return the total size of the FDT
+ */
+int __init of_get_flat_dt_size(void)
+{
+	return fdt_totalsize(initial_boot_params);
+}
+
 /**
  * of_get_flat_dt_prop - Given a node in the flat blob, return the property ptr
  *
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 348dae2c8a3c..e10099c95999 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -92,6 +92,7 @@ extern const void *of_get_flat_dt_prop(unsigned long node, const char *name,
 extern int of_flat_dt_is_compatible(unsigned long node, const char *name);
 extern int of_flat_dt_match(unsigned long node, const char *const *matches);
 extern unsigned long of_get_flat_dt_root(void);
+extern int of_get_flat_dt_size(void);
 
 extern int early_init_dt_scan_chosen(unsigned long node, const char *uname,
 				     int depth, void *data);
-- 
cgit 


From 1daa0c4ced334f18f458aba6ace7e01e8cdc2ecf Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 31 Mar 2014 15:25:04 -0500
Subject: of/fdt: convert initial_boot_params to opaque pointer

Now that all accesses to FDT header data has been converted to accessor
helpers, initial_boot_params can become an opaque pointer.

Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Michal Simek <michal.simek@xilinx.com>
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 drivers/of/fdt.c       | 2 +-
 include/linux/of_fdt.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 358bcf0500d2..a6f83ea107ae 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -372,7 +372,7 @@ EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree);
 int __initdata dt_root_addr_cells;
 int __initdata dt_root_size_cells;
 
-struct boot_param_header *initial_boot_params;
+void *initial_boot_params;
 
 #ifdef CONFIG_OF_EARLY_FLATTREE
 
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index e10099c95999..1f882e1da728 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -78,7 +78,7 @@ extern void of_fdt_unflatten_tree(unsigned long *blob,
 /* TBD: Temporary export of fdt globals - remove when code fully merged */
 extern int __initdata dt_root_addr_cells;
 extern int __initdata dt_root_size_cells;
-extern struct boot_param_header *initial_boot_params;
+extern void *initial_boot_params;
 
 extern char __dtb_start[];
 extern char __dtb_end[];
-- 
cgit 


From c3fc952d2fbe3ec78defd70cf73d5d76d27092ec Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 1 Apr 2014 22:55:14 -0500
Subject: of: push struct boot_param_header and defines into powerpc

Now powerpc is the only user of struct boot_param_header and FDT defines,
so they can be moved into the powerpc architecture code.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: linuxppc-dev@lists.ozlabs.org
Tested-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Stephen Chivers <schivers@csc.com>
---
 arch/powerpc/include/asm/prom.h | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h          | 37 -------------------------------------
 2 files changed, 39 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index d977b9b78696..74b79f07f041 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -26,6 +26,45 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 
+#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
+#define OF_DT_END_NODE		0x2		/* End node */
+#define OF_DT_PROP		0x3		/* Property: name off, size,
+						 * content */
+#define OF_DT_NOP		0x4		/* nop */
+#define OF_DT_END		0x9
+
+#define OF_DT_VERSION		0x10
+
+/*
+ * This is what gets passed to the kernel by prom_init or kexec
+ *
+ * The dt struct contains the device tree structure, full pathes and
+ * property contents. The dt strings contain a separate block with just
+ * the strings for the property names, and is fully page aligned and
+ * self contained in a page, so that it can be kept around by the kernel,
+ * each property name appears only once in this page (cheap compression)
+ *
+ * the mem_rsvmap contains a map of reserved ranges of physical memory,
+ * passing it here instead of in the device-tree itself greatly simplifies
+ * the job of everybody. It's just a list of u64 pairs (base/size) that
+ * ends when size is 0
+ */
+struct boot_param_header {
+	__be32	magic;			/* magic word OF_DT_HEADER */
+	__be32	totalsize;		/* total size of DT block */
+	__be32	off_dt_struct;		/* offset to structure */
+	__be32	off_dt_strings;		/* offset to strings */
+	__be32	off_mem_rsvmap;		/* offset to memory reserve map */
+	__be32	version;		/* format version */
+	__be32	last_comp_version;	/* last compatible version */
+	/* version 2 fields below */
+	__be32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
+	/* version 3 fields below */
+	__be32	dt_strings_size;	/* size of the DT strings block */
+	/* version 17 fields below */
+	__be32	dt_struct_size;		/* size of the DT structure block */
+};
+
 /*
  * OF address retreival & translation
  */
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 1f882e1da728..5c0ab057eecf 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -17,45 +17,8 @@
 
 /* Definitions used by the flattened device tree */
 #define OF_DT_HEADER		0xd00dfeed	/* marker */
-#define OF_DT_BEGIN_NODE	0x1		/* Start of node, full name */
-#define OF_DT_END_NODE		0x2		/* End node */
-#define OF_DT_PROP		0x3		/* Property: name off, size,
-						 * content */
-#define OF_DT_NOP		0x4		/* nop */
-#define OF_DT_END		0x9
-
-#define OF_DT_VERSION		0x10
 
 #ifndef __ASSEMBLY__
-/*
- * This is what gets passed to the kernel by prom_init or kexec
- *
- * The dt struct contains the device tree structure, full pathes and
- * property contents. The dt strings contain a separate block with just
- * the strings for the property names, and is fully page aligned and
- * self contained in a page, so that it can be kept around by the kernel,
- * each property name appears only once in this page (cheap compression)
- *
- * the mem_rsvmap contains a map of reserved ranges of physical memory,
- * passing it here instead of in the device-tree itself greatly simplifies
- * the job of everybody. It's just a list of u64 pairs (base/size) that
- * ends when size is 0
- */
-struct boot_param_header {
-	__be32	magic;			/* magic word OF_DT_HEADER */
-	__be32	totalsize;		/* total size of DT block */
-	__be32	off_dt_struct;		/* offset to structure */
-	__be32	off_dt_strings;		/* offset to strings */
-	__be32	off_mem_rsvmap;		/* offset to memory reserve map */
-	__be32	version;		/* format version */
-	__be32	last_comp_version;	/* last compatible version */
-	/* version 2 fields below */
-	__be32	boot_cpuid_phys;	/* Physical CPU id we're booting on */
-	/* version 3 fields below */
-	__be32	dt_strings_size;	/* size of the DT strings block */
-	/* version 17 fields below */
-	__be32	dt_struct_size;		/* size of the DT structure block */
-};
 
 #if defined(CONFIG_OF_FLATTREE)
 
-- 
cgit 


From 37cfdaf782590e277d9352626dba4496734e0375 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:06 +0300
Subject: usb: phy: msm: Move global regulators variables to driver state

Eliminating global variables allows driver to handle multiple
device instances.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-msm-usb.c | 82 ++++++++++++++++++++-----------------------
 include/linux/usb/msm_hsusb.h |  3 ++
 2 files changed, 42 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 5b37b81f2ef6..878f67d29ed5 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -58,47 +58,43 @@
 #define USB_PHY_VDD_DIG_VOL_MIN	1000000 /* uV */
 #define USB_PHY_VDD_DIG_VOL_MAX	1320000 /* uV */
 
-static struct regulator *hsusb_3p3;
-static struct regulator *hsusb_1p8;
-static struct regulator *hsusb_vddcx;
-
 static int msm_hsusb_init_vddcx(struct msm_otg *motg, int init)
 {
 	int ret = 0;
 
 	if (init) {
-		hsusb_vddcx = regulator_get(motg->phy.dev, "HSUSB_VDDCX");
-		if (IS_ERR(hsusb_vddcx)) {
+		motg->vddcx = regulator_get(motg->phy.dev, "HSUSB_VDDCX");
+		if (IS_ERR(motg->vddcx)) {
 			dev_err(motg->phy.dev, "unable to get hsusb vddcx\n");
-			return PTR_ERR(hsusb_vddcx);
+			return PTR_ERR(motg->vddcx);
 		}
 
-		ret = regulator_set_voltage(hsusb_vddcx,
+		ret = regulator_set_voltage(motg->vddcx,
 				USB_PHY_VDD_DIG_VOL_MIN,
 				USB_PHY_VDD_DIG_VOL_MAX);
 		if (ret) {
 			dev_err(motg->phy.dev, "unable to set the voltage "
 					"for hsusb vddcx\n");
-			regulator_put(hsusb_vddcx);
+			regulator_put(motg->vddcx);
 			return ret;
 		}
 
-		ret = regulator_enable(hsusb_vddcx);
+		ret = regulator_enable(motg->vddcx);
 		if (ret) {
 			dev_err(motg->phy.dev, "unable to enable hsusb vddcx\n");
-			regulator_put(hsusb_vddcx);
+			regulator_put(motg->vddcx);
 		}
 	} else {
-		ret = regulator_set_voltage(hsusb_vddcx, 0,
+		ret = regulator_set_voltage(motg->vddcx, 0,
 			USB_PHY_VDD_DIG_VOL_MAX);
 		if (ret)
 			dev_err(motg->phy.dev, "unable to set the voltage "
 					"for hsusb vddcx\n");
-		ret = regulator_disable(hsusb_vddcx);
+		ret = regulator_disable(motg->vddcx);
 		if (ret)
 			dev_err(motg->phy.dev, "unable to disable hsusb vddcx\n");
 
-		regulator_put(hsusb_vddcx);
+		regulator_put(motg->vddcx);
 	}
 
 	return ret;
@@ -109,38 +105,38 @@ static int msm_hsusb_ldo_init(struct msm_otg *motg, int init)
 	int rc = 0;
 
 	if (init) {
-		hsusb_3p3 = regulator_get(motg->phy.dev, "HSUSB_3p3");
-		if (IS_ERR(hsusb_3p3)) {
+		motg->v3p3 = regulator_get(motg->phy.dev, "HSUSB_3p3");
+		if (IS_ERR(motg->v3p3)) {
 			dev_err(motg->phy.dev, "unable to get hsusb 3p3\n");
-			return PTR_ERR(hsusb_3p3);
+			return PTR_ERR(motg->v3p3);
 		}
 
-		rc = regulator_set_voltage(hsusb_3p3, USB_PHY_3P3_VOL_MIN,
+		rc = regulator_set_voltage(motg->v3p3, USB_PHY_3P3_VOL_MIN,
 				USB_PHY_3P3_VOL_MAX);
 		if (rc) {
 			dev_err(motg->phy.dev, "unable to set voltage level "
 					"for hsusb 3p3\n");
 			goto put_3p3;
 		}
-		rc = regulator_enable(hsusb_3p3);
+		rc = regulator_enable(motg->v3p3);
 		if (rc) {
 			dev_err(motg->phy.dev, "unable to enable the hsusb 3p3\n");
 			goto put_3p3;
 		}
-		hsusb_1p8 = regulator_get(motg->phy.dev, "HSUSB_1p8");
-		if (IS_ERR(hsusb_1p8)) {
+		motg->v1p8 = regulator_get(motg->phy.dev, "HSUSB_1p8");
+		if (IS_ERR(motg->v1p8)) {
 			dev_err(motg->phy.dev, "unable to get hsusb 1p8\n");
-			rc = PTR_ERR(hsusb_1p8);
+			rc = PTR_ERR(motg->v1p8);
 			goto disable_3p3;
 		}
-		rc = regulator_set_voltage(hsusb_1p8, USB_PHY_1P8_VOL_MIN,
+		rc = regulator_set_voltage(motg->v1p8, USB_PHY_1P8_VOL_MIN,
 				USB_PHY_1P8_VOL_MAX);
 		if (rc) {
 			dev_err(motg->phy.dev, "unable to set voltage level "
 					"for hsusb 1p8\n");
 			goto put_1p8;
 		}
-		rc = regulator_enable(hsusb_1p8);
+		rc = regulator_enable(motg->v1p8);
 		if (rc) {
 			dev_err(motg->phy.dev, "unable to enable the hsusb 1p8\n");
 			goto put_1p8;
@@ -149,54 +145,54 @@ static int msm_hsusb_ldo_init(struct msm_otg *motg, int init)
 		return 0;
 	}
 
-	regulator_disable(hsusb_1p8);
+	regulator_disable(motg->v1p8);
 put_1p8:
-	regulator_put(hsusb_1p8);
+	regulator_put(motg->v1p8);
 disable_3p3:
-	regulator_disable(hsusb_3p3);
+	regulator_disable(motg->v3p3);
 put_3p3:
-	regulator_put(hsusb_3p3);
+	regulator_put(motg->v3p3);
 	return rc;
 }
 
-static int msm_hsusb_ldo_set_mode(int on)
+static int msm_hsusb_ldo_set_mode(struct msm_otg *motg, int on)
 {
 	int ret = 0;
 
-	if (!hsusb_1p8 || IS_ERR(hsusb_1p8)) {
+	if (!motg->v1p8 || IS_ERR(motg->v1p8)) {
 		pr_err("%s: HSUSB_1p8 is not initialized\n", __func__);
 		return -ENODEV;
 	}
 
-	if (!hsusb_3p3 || IS_ERR(hsusb_3p3)) {
+	if (!motg->v3p3 || IS_ERR(motg->v3p3)) {
 		pr_err("%s: HSUSB_3p3 is not initialized\n", __func__);
 		return -ENODEV;
 	}
 
 	if (on) {
-		ret = regulator_set_optimum_mode(hsusb_1p8,
+		ret = regulator_set_optimum_mode(motg->v1p8,
 				USB_PHY_1P8_HPM_LOAD);
 		if (ret < 0) {
 			pr_err("%s: Unable to set HPM of the regulator "
 				"HSUSB_1p8\n", __func__);
 			return ret;
 		}
-		ret = regulator_set_optimum_mode(hsusb_3p3,
+		ret = regulator_set_optimum_mode(motg->v3p3,
 				USB_PHY_3P3_HPM_LOAD);
 		if (ret < 0) {
 			pr_err("%s: Unable to set HPM of the regulator "
 				"HSUSB_3p3\n", __func__);
-			regulator_set_optimum_mode(hsusb_1p8,
+			regulator_set_optimum_mode(motg->v1p8,
 				USB_PHY_1P8_LPM_LOAD);
 			return ret;
 		}
 	} else {
-		ret = regulator_set_optimum_mode(hsusb_1p8,
+		ret = regulator_set_optimum_mode(motg->v1p8,
 				USB_PHY_1P8_LPM_LOAD);
 		if (ret < 0)
 			pr_err("%s: Unable to set LPM of the regulator "
 				"HSUSB_1p8\n", __func__);
-		ret = regulator_set_optimum_mode(hsusb_3p3,
+		ret = regulator_set_optimum_mode(motg->v3p3,
 				USB_PHY_3P3_LPM_LOAD);
 		if (ret < 0)
 			pr_err("%s: Unable to set LPM of the regulator "
@@ -417,7 +413,7 @@ static int msm_otg_reset(struct usb_phy *phy)
 #ifdef CONFIG_PM
 
 #define USB_PHY_SUSP_DIG_VOL  500000
-static int msm_hsusb_config_vddcx(int high)
+static int msm_hsusb_config_vddcx(struct msm_otg *motg, int high)
 {
 	int max_vol = USB_PHY_VDD_DIG_VOL_MAX;
 	int min_vol;
@@ -428,7 +424,7 @@ static int msm_hsusb_config_vddcx(int high)
 	else
 		min_vol = USB_PHY_SUSP_DIG_VOL;
 
-	ret = regulator_set_voltage(hsusb_vddcx, min_vol, max_vol);
+	ret = regulator_set_voltage(motg->vddcx, min_vol, max_vol);
 	if (ret) {
 		pr_err("%s: unable to set the voltage for regulator "
 			"HSUSB_VDDCX\n", __func__);
@@ -518,8 +514,8 @@ static int msm_otg_suspend(struct msm_otg *motg)
 
 	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
 			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
-		msm_hsusb_ldo_set_mode(0);
-		msm_hsusb_config_vddcx(0);
+		msm_hsusb_ldo_set_mode(motg, 0);
+		msm_hsusb_config_vddcx(motg, 0);
 	}
 
 	if (device_may_wakeup(phy->dev))
@@ -555,8 +551,8 @@ static int msm_otg_resume(struct msm_otg *motg)
 
 	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
 			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
-		msm_hsusb_ldo_set_mode(1);
-		msm_hsusb_config_vddcx(1);
+		msm_hsusb_ldo_set_mode(motg, 1);
+		msm_hsusb_config_vddcx(motg, 1);
 		writel(readl(USB_PHY_CTRL) & ~PHY_RETEN, USB_PHY_CTRL);
 	}
 
@@ -1521,7 +1517,7 @@ static int __init msm_otg_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "hsusb vreg configuration failed\n");
 		goto vddcx_exit;
 	}
-	ret = msm_hsusb_ldo_set_mode(1);
+	ret = msm_hsusb_ldo_set_mode(motg, 1);
 	if (ret) {
 		dev_err(&pdev->dev, "hsusb vreg enable failed\n");
 		goto ldo_exit;
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 32754835a39b..8705b0164684 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -183,6 +183,9 @@ struct msm_otg {
 	enum usb_chg_state chg_state;
 	enum usb_chg_type chg_type;
 	u8 dcd_retries;
+	struct regulator *v3p3;
+	struct regulator *v1p8;
+	struct regulator *vddcx;
 };
 
 #endif
-- 
cgit 


From 971232cf7c7a71ad3cbf433f592eee3ae1a578ac Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:11 +0300
Subject: usb: phy: msm: Replace custom enum usb_mode_type with enum
 usb_dr_mode

Use enum usb_dr_mode and drop default usb_dr_mode from platform data.

USB DT bindings states: dr_mode: "...In case this attribute isn't
passed via DT, USB DRD controllers should default to OTG...",
so remove redundand field.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Acked-by: David Brown <davidb@codeaurora.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 arch/arm/mach-msm/board-msm7x30.c |  2 +-
 arch/arm/mach-msm/board-qsd8x50.c |  2 +-
 drivers/usb/phy/phy-msm-usb.c     | 41 ++++++++++++++++-----------------------
 include/linux/usb/msm_hsusb.h     | 20 +------------------
 4 files changed, 20 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-msm/board-msm7x30.c b/arch/arm/mach-msm/board-msm7x30.c
index 46de789ad3ae..0c4c200e1221 100644
--- a/arch/arm/mach-msm/board-msm7x30.c
+++ b/arch/arm/mach-msm/board-msm7x30.c
@@ -95,7 +95,7 @@ static int hsusb_phy_clk_reset(struct clk *phy_clk)
 
 static struct msm_otg_platform_data msm_otg_pdata = {
 	.phy_init_seq		= hsusb_phy_init_seq,
-	.mode                   = USB_PERIPHERAL,
+	.mode                   = USB_DR_MODE_PERIPHERAL,
 	.otg_control		= OTG_PHY_CONTROL,
 	.link_clk_reset		= hsusb_link_clk_reset,
 	.phy_clk_reset		= hsusb_phy_clk_reset,
diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c
index 9169ec324a43..4c748616ef47 100644
--- a/arch/arm/mach-msm/board-qsd8x50.c
+++ b/arch/arm/mach-msm/board-qsd8x50.c
@@ -116,7 +116,7 @@ static int hsusb_phy_clk_reset(struct clk *phy_clk)
 
 static struct msm_otg_platform_data msm_otg_pdata = {
 	.phy_init_seq		= hsusb_phy_init_seq,
-	.mode                   = USB_PERIPHERAL,
+	.mode                   = USB_DR_MODE_PERIPHERAL,
 	.otg_control		= OTG_PHY_CONTROL,
 	.link_clk_reset		= hsusb_link_clk_reset,
 	.phy_clk_reset		= hsusb_phy_clk_reset,
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 874c51a85683..7eb2abf3f874 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -348,10 +348,10 @@ static int msm_otg_reset(struct usb_phy *phy)
 
 	if (pdata->otg_control == OTG_PHY_CONTROL) {
 		val = readl(USB_OTGSC);
-		if (pdata->mode == USB_OTG) {
+		if (pdata->mode == USB_DR_MODE_OTG) {
 			ulpi_val = ULPI_INT_IDGRD | ULPI_INT_SESS_VALID;
 			val |= OTGSC_IDIE | OTGSC_BSVIE;
-		} else if (pdata->mode == USB_PERIPHERAL) {
+		} else if (pdata->mode == USB_DR_MODE_PERIPHERAL) {
 			ulpi_val = ULPI_INT_SESS_VALID;
 			val |= OTGSC_BSVIE;
 		}
@@ -637,7 +637,7 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 	 * Fail host registration if this board can support
 	 * only peripheral configuration.
 	 */
-	if (motg->pdata->mode == USB_PERIPHERAL) {
+	if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL) {
 		dev_info(otg->phy->dev, "Host mode is not supported\n");
 		return -ENODEV;
 	}
@@ -666,7 +666,7 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 	 * Kick the state machine work, if peripheral is not supported
 	 * or peripheral is already registered with us.
 	 */
-	if (motg->pdata->mode == USB_HOST || otg->gadget) {
+	if (motg->pdata->mode == USB_DR_MODE_HOST || otg->gadget) {
 		pm_runtime_get_sync(otg->phy->dev);
 		schedule_work(&motg->sm_work);
 	}
@@ -710,7 +710,7 @@ static int msm_otg_set_peripheral(struct usb_otg *otg,
 	 * Fail peripheral registration if this board can support
 	 * only host configuration.
 	 */
-	if (motg->pdata->mode == USB_HOST) {
+	if (motg->pdata->mode == USB_DR_MODE_HOST) {
 		dev_info(otg->phy->dev, "Peripheral mode is not supported\n");
 		return -ENODEV;
 	}
@@ -735,7 +735,7 @@ static int msm_otg_set_peripheral(struct usb_otg *otg,
 	 * Kick the state machine work, if host is not supported
 	 * or host is already registered with us.
 	 */
-	if (motg->pdata->mode == USB_PERIPHERAL || otg->host) {
+	if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL || otg->host) {
 		pm_runtime_get_sync(otg->phy->dev);
 		schedule_work(&motg->sm_work);
 	}
@@ -1056,7 +1056,7 @@ static void msm_otg_init_sm(struct msm_otg *motg)
 	u32 otgsc = readl(USB_OTGSC);
 
 	switch (pdata->mode) {
-	case USB_OTG:
+	case USB_DR_MODE_OTG:
 		if (pdata->otg_control == OTG_PHY_CONTROL) {
 			if (otgsc & OTGSC_ID)
 				set_bit(ID, &motg->inputs);
@@ -1068,21 +1068,14 @@ static void msm_otg_init_sm(struct msm_otg *motg)
 			else
 				clear_bit(B_SESS_VLD, &motg->inputs);
 		} else if (pdata->otg_control == OTG_USER_CONTROL) {
-			if (pdata->default_mode == USB_HOST) {
-				clear_bit(ID, &motg->inputs);
-			} else if (pdata->default_mode == USB_PERIPHERAL) {
-				set_bit(ID, &motg->inputs);
-				set_bit(B_SESS_VLD, &motg->inputs);
-			} else {
 				set_bit(ID, &motg->inputs);
 				clear_bit(B_SESS_VLD, &motg->inputs);
-			}
 		}
 		break;
-	case USB_HOST:
+	case USB_DR_MODE_HOST:
 		clear_bit(ID, &motg->inputs);
 		break;
-	case USB_PERIPHERAL:
+	case USB_DR_MODE_PERIPHERAL:
 		set_bit(ID, &motg->inputs);
 		if (otgsc & OTGSC_BSV)
 			set_bit(B_SESS_VLD, &motg->inputs);
@@ -1258,7 +1251,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 	char buf[16];
 	struct usb_otg *otg = motg->phy.otg;
 	int status = count;
-	enum usb_mode_type req_mode;
+	enum usb_dr_mode req_mode;
 
 	memset(buf, 0x00, sizeof(buf));
 
@@ -1268,18 +1261,18 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 	}
 
 	if (!strncmp(buf, "host", 4)) {
-		req_mode = USB_HOST;
+		req_mode = USB_DR_MODE_HOST;
 	} else if (!strncmp(buf, "peripheral", 10)) {
-		req_mode = USB_PERIPHERAL;
+		req_mode = USB_DR_MODE_PERIPHERAL;
 	} else if (!strncmp(buf, "none", 4)) {
-		req_mode = USB_NONE;
+		req_mode = USB_DR_MODE_UNKNOWN;
 	} else {
 		status = -EINVAL;
 		goto out;
 	}
 
 	switch (req_mode) {
-	case USB_NONE:
+	case USB_DR_MODE_UNKNOWN:
 		switch (otg->phy->state) {
 		case OTG_STATE_A_HOST:
 		case OTG_STATE_B_PERIPHERAL:
@@ -1290,7 +1283,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 			goto out;
 		}
 		break;
-	case USB_PERIPHERAL:
+	case USB_DR_MODE_PERIPHERAL:
 		switch (otg->phy->state) {
 		case OTG_STATE_B_IDLE:
 		case OTG_STATE_A_HOST:
@@ -1301,7 +1294,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 			goto out;
 		}
 		break;
-	case USB_HOST:
+	case USB_DR_MODE_HOST:
 		switch (otg->phy->state) {
 		case OTG_STATE_B_IDLE:
 		case OTG_STATE_B_PERIPHERAL:
@@ -1511,7 +1504,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, motg);
 	device_init_wakeup(&pdev->dev, 1);
 
-	if (motg->pdata->mode == USB_OTG &&
+	if (motg->pdata->mode == USB_DR_MODE_OTG &&
 			motg->pdata->otg_control == OTG_USER_CONTROL) {
 		ret = msm_otg_debugfs_init(motg);
 		if (ret)
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 8705b0164684..72c5830455bf 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -22,21 +22,6 @@
 #include <linux/usb/otg.h>
 #include <linux/clk.h>
 
-/**
- * Supported USB modes
- *
- * USB_PERIPHERAL       Only peripheral mode is supported.
- * USB_HOST             Only host mode is supported.
- * USB_OTG              OTG mode is supported.
- *
- */
-enum usb_mode_type {
-	USB_NONE = 0,
-	USB_PERIPHERAL,
-	USB_HOST,
-	USB_OTG,
-};
-
 /**
  * OTG control
  *
@@ -121,8 +106,6 @@ enum usb_chg_type {
  * @power_budget: VBUS power budget in mA (0 will be treated as 500mA).
  * @mode: Supported mode (OTG/peripheral/host).
  * @otg_control: OTG switch controlled by user/Id pin
- * @default_mode: Default operational mode. Applicable only if
- *              OTG switch is controller by user.
  * @pclk_src_name: pclk is derived from ebi1_usb_clk in case of 7x27 and 8k
  *              dfab_usb_hs_clk in case of 8660 and 8960.
  */
@@ -130,9 +113,8 @@ struct msm_otg_platform_data {
 	int *phy_init_seq;
 	void (*vbus_power)(bool on);
 	unsigned power_budget;
-	enum usb_mode_type mode;
+	enum usb_dr_mode mode;
 	enum otg_control_type otg_control;
-	enum usb_mode_type default_mode;
 	enum msm_usb_phy_type phy_type;
 	void (*setup_gpio)(enum usb_otg_state state);
 	char *pclk_src_name;
-- 
cgit 


From ff0e4a68c931dc34e43c081d1b6a895a9aaf8a2b Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:12 +0300
Subject: usb: phy: msm: Remove unused pclk_src_name

There are no references to 'pclk_src_name' in plaform code,
so it is unused.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-msm-usb.c | 26 +-------------------------
 include/linux/usb/msm_hsusb.h |  5 -----
 2 files changed, 1 insertion(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 7eb2abf3f874..c2361bfd2002 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -464,9 +464,6 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	if (!IS_ERR(motg->core_clk))
 		clk_disable_unprepare(motg->core_clk);
 
-	if (!IS_ERR(motg->pclk_src))
-		clk_disable_unprepare(motg->pclk_src);
-
 	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
 			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
 		msm_hsusb_ldo_set_mode(motg, 0);
@@ -496,9 +493,6 @@ static int msm_otg_resume(struct msm_otg *motg)
 	if (!atomic_read(&motg->in_lpm))
 		return 0;
 
-	if (!IS_ERR(motg->pclk_src))
-		clk_prepare_enable(motg->pclk_src);
-
 	clk_prepare_enable(motg->pclk);
 	clk_prepare_enable(motg->clk);
 	if (!IS_ERR(motg->core_clk))
@@ -1396,17 +1390,8 @@ static int msm_otg_probe(struct platform_device *pdev)
 	 * If USB Core is running its protocol engine based on CORE CLK,
 	 * CORE CLK  must be running at >55Mhz for correct HSUSB
 	 * operation and USB core cannot tolerate frequency changes on
-	 * CORE CLK. For such USB cores, vote for maximum clk frequency
-	 * on pclk source
+	 * CORE CLK.
 	 */
-	 motg->pclk_src = ERR_PTR(-ENOENT);
-	 if (motg->pdata->pclk_src_name) {
-		motg->pclk_src = devm_clk_get(&pdev->dev,
-					motg->pdata->pclk_src_name);
-		if (IS_ERR(motg->pclk_src))
-			return PTR_ERR(motg->pclk_src);
-	}
-
 	motg->pclk = devm_clk_get(&pdev->dev, "usb_hs_pclk");
 	if (IS_ERR(motg->pclk)) {
 		dev_err(&pdev->dev, "failed to get usb_hs_pclk\n");
@@ -1446,10 +1431,6 @@ static int msm_otg_probe(struct platform_device *pdev)
 	motg->v1p8  = regs[2].consumer;
 
 	clk_set_rate(motg->clk, 60000000);
-	if (!IS_ERR(motg->pclk_src)) {
-		clk_set_rate(motg->pclk_src, INT_MAX);
-		clk_prepare_enable(motg->pclk_src);
-	}
 
 	clk_prepare_enable(motg->clk);
 	clk_prepare_enable(motg->pclk);
@@ -1525,8 +1506,6 @@ disable_clks:
 	clk_disable_unprepare(motg->clk);
 	if (!IS_ERR(motg->core_clk))
 		clk_disable_unprepare(motg->core_clk);
-	if (!IS_ERR(motg->pclk_src))
-		clk_disable_unprepare(motg->pclk_src);
 	return ret;
 }
 
@@ -1571,9 +1550,6 @@ static int msm_otg_remove(struct platform_device *pdev)
 	clk_disable_unprepare(motg->clk);
 	if (!IS_ERR(motg->core_clk))
 		clk_disable_unprepare(motg->core_clk);
-	if (!IS_ERR(motg->pclk_src))
-		clk_disable_unprepare(motg->pclk_src);
-
 	msm_hsusb_ldo_init(motg, 0);
 
 	pm_runtime_set_suspended(&pdev->dev);
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 72c5830455bf..262ed80a0b9e 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -106,8 +106,6 @@ enum usb_chg_type {
  * @power_budget: VBUS power budget in mA (0 will be treated as 500mA).
  * @mode: Supported mode (OTG/peripheral/host).
  * @otg_control: OTG switch controlled by user/Id pin
- * @pclk_src_name: pclk is derived from ebi1_usb_clk in case of 7x27 and 8k
- *              dfab_usb_hs_clk in case of 8660 and 8960.
  */
 struct msm_otg_platform_data {
 	int *phy_init_seq;
@@ -117,7 +115,6 @@ struct msm_otg_platform_data {
 	enum otg_control_type otg_control;
 	enum msm_usb_phy_type phy_type;
 	void (*setup_gpio)(enum usb_otg_state state);
-	char *pclk_src_name;
 	int (*link_clk_reset)(struct clk *link_clk, bool assert);
 	int (*phy_clk_reset)(struct clk *phy_clk);
 };
@@ -129,7 +126,6 @@ struct msm_otg_platform_data {
  * @irq: IRQ number assigned for HSUSB controller.
  * @clk: clock struct of usb_hs_clk.
  * @pclk: clock struct of usb_hs_pclk.
- * @pclk_src: pclk source for voting.
  * @phy_reset_clk: clock struct of usb_phy_clk.
  * @core_clk: clock struct of usb_hs_core_clk.
  * @regs: ioremapped register base address.
@@ -150,7 +146,6 @@ struct msm_otg {
 	int irq;
 	struct clk *clk;
 	struct clk *pclk;
-	struct clk *pclk_src;
 	struct clk *phy_reset_clk;
 	struct clk *core_clk;
 	void __iomem *regs;
-- 
cgit 


From 8364f9af237f47fa128bd4e4f7b45beef890c994 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:15 +0300
Subject: usb: phy: msm: Add device tree support and binding information

Allows controller to be specified via device tree.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 .../devicetree/bindings/usb/msm-hsusb.txt          |  67 ++++++++++++
 drivers/usb/phy/phy-msm-usb.c                      | 113 +++++++++++++++++----
 include/linux/usb/msm_hsusb.h                      |   6 +-
 3 files changed, 165 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/msm-hsusb.txt b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
index 5ea26c631e3a..ee4123de3de4 100644
--- a/Documentation/devicetree/bindings/usb/msm-hsusb.txt
+++ b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
@@ -15,3 +15,70 @@ Example EHCI controller device node:
 		usb-phy = <&usb_otg>;
 	};
 
+USB PHY with optional OTG:
+
+Required properties:
+- compatible:   Should contain:
+  "qcom,usb-otg-ci" for chipsets with ChipIdea 45nm PHY
+  "qcom,usb-otg-snps" for chipsets with Synopsys 28nm PHY
+
+- regs:         Offset and length of the register set in the memory map
+- interrupts:   interrupt-specifier for the OTG interrupt.
+
+- clocks:       A list of phandle + clock-specifier pairs for the
+                clocks listed in clock-names
+- clock-names:  Should contain the following:
+  "phy"         USB PHY reference clock
+  "core"        Protocol engine clock
+  "iface"       Interface bus clock
+  "alt_core"    Protocol engine clock for targets with asynchronous
+                reset methodology. (optional)
+
+- vdccx-supply: phandle to the regulator for the vdd supply for
+                digital circuit operation.
+- v1p8-supply:  phandle to the regulator for the 1.8V supply
+- v3p3-supply:  phandle to the regulator for the 3.3V supply
+
+- resets:       A list of phandle + reset-specifier pairs for the
+                resets listed in reset-names
+- reset-names:  Should contain the following:
+  "phy"         USB PHY controller reset
+  "link"        USB LINK controller reset
+
+- qcom,otg-control: OTG control (VBUS and ID notifications) can be one of
+                1 - PHY control
+                2 - PMIC control
+
+Optional properties:
+- dr_mode:      One of "host", "peripheral" or "otg". Defaults to "otg"
+
+- qcom,phy-init-sequence: PHY configuration sequence values. This is related to Device
+                Mode Eye Diagram test. Start address at which these values will be
+                written is ULPI_EXT_VENDOR_SPECIFIC. Value of -1 is reserved as
+                "do not overwrite default value at this address".
+                For example: qcom,phy-init-sequence = < -1 0x63 >;
+                Will update only value at address ULPI_EXT_VENDOR_SPECIFIC + 1.
+
+Example HSUSB OTG controller device node:
+
+    usb@f9a55000 {
+        compatible = "qcom,usb-otg-snps";
+        reg = <0xf9a55000 0x400>;
+        interrupts = <0 134 0>;
+        dr_mode = "peripheral";
+
+        clocks = <&gcc GCC_XO_CLK>, <&gcc GCC_USB_HS_SYSTEM_CLK>,
+                <&gcc GCC_USB_HS_AHB_CLK>;
+
+        clock-names = "phy", "core", "iface";
+
+        vddcx-supply = <&pm8841_s2_corner>;
+        v1p8-supply = <&pm8941_l6>;
+        v3p3-supply = <&pm8941_l24>;
+
+        resets = <&gcc GCC_USB2A_PHY_BCR>, <&gcc GCC_USB_HS_BCR>;
+        reset-names = "phy", "link";
+
+        qcom,otg-control = <1>;
+        qcom,phy-init-sequence = < -1 0x63 >;
+	};
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 7e968aa143ce..1bf2d4ee29d2 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -30,9 +30,12 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <linux/usb.h>
 #include <linux/usb/otg.h>
+#include <linux/usb/of.h>
 #include <linux/usb/ulpi.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/hcd.h>
@@ -217,16 +220,16 @@ static struct usb_phy_io_ops msm_otg_io_ops = {
 static void ulpi_init(struct msm_otg *motg)
 {
 	struct msm_otg_platform_data *pdata = motg->pdata;
-	int *seq = pdata->phy_init_seq;
+	int *seq = pdata->phy_init_seq, idx;
+	u32 addr = ULPI_EXT_VENDOR_SPECIFIC;
 
-	if (!seq)
-		return;
+	for (idx = 0; idx < pdata->phy_init_sz; idx++) {
+		if (seq[idx] == -1)
+			continue;
 
-	while (seq[0] >= 0) {
 		dev_vdbg(motg->phy.dev, "ulpi: write 0x%02x to 0x%02x\n",
-				seq[0], seq[1]);
-		ulpi_write(&motg->phy, seq[0], seq[1]);
-		seq += 2;
+				seq[idx], addr + idx);
+		ulpi_write(&motg->phy, seq[idx], addr + idx);
 	}
 }
 
@@ -1343,26 +1346,96 @@ static void msm_otg_debugfs_cleanup(void)
 	debugfs_remove(msm_otg_dbg_root);
 }
 
+static struct of_device_id msm_otg_dt_match[] = {
+	{
+		.compatible = "qcom,usb-otg-ci",
+		.data = (void *) CI_45NM_INTEGRATED_PHY
+	},
+	{
+		.compatible = "qcom,usb-otg-snps",
+		.data = (void *) SNPS_28NM_INTEGRATED_PHY
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, msm_otg_dt_match);
+
+static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
+{
+	struct msm_otg_platform_data *pdata;
+	const struct of_device_id *id;
+	struct device_node *node = pdev->dev.of_node;
+	struct property *prop;
+	int len, ret, words;
+	u32 val;
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return -ENOMEM;
+
+	motg->pdata = pdata;
+
+	id = of_match_device(msm_otg_dt_match, &pdev->dev);
+	pdata->phy_type = (int) id->data;
+
+	pdata->mode = of_usb_get_dr_mode(node);
+	if (pdata->mode == USB_DR_MODE_UNKNOWN)
+		pdata->mode = USB_DR_MODE_OTG;
+
+	pdata->otg_control = OTG_PHY_CONTROL;
+	if (!of_property_read_u32(node, "qcom,otg-control", &val))
+		if (val == OTG_PMIC_CONTROL)
+			pdata->otg_control = val;
+
+	prop = of_find_property(node, "qcom,phy-init-sequence", &len);
+	if (!prop || !len)
+		return 0;
+
+	words = len / sizeof(u32);
+
+	if (words >= ULPI_EXT_VENDOR_SPECIFIC) {
+		dev_warn(&pdev->dev, "Too big PHY init sequence %d\n", words);
+		return 0;
+	}
+
+	pdata->phy_init_seq = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!pdata->phy_init_seq) {
+		dev_warn(&pdev->dev, "No space for PHY init sequence\n");
+		return 0;
+	}
+
+	ret = of_property_read_u32_array(node, "qcom,phy-init-sequence",
+					 pdata->phy_init_seq, words);
+	if (!ret)
+		pdata->phy_init_sz = words;
+
+	return 0;
+}
+
 static int msm_otg_probe(struct platform_device *pdev)
 {
 	struct regulator_bulk_data regs[3];
 	int ret = 0;
+	struct device_node *np = pdev->dev.of_node;
+	struct msm_otg_platform_data *pdata;
 	struct resource *res;
 	struct msm_otg *motg;
 	struct usb_phy *phy;
 
-	dev_info(&pdev->dev, "msm_otg probe\n");
-	if (!dev_get_platdata(&pdev->dev)) {
-		dev_err(&pdev->dev, "No platform data given. Bailing out\n");
-		return -ENODEV;
-	}
-
 	motg = devm_kzalloc(&pdev->dev, sizeof(struct msm_otg), GFP_KERNEL);
 	if (!motg) {
 		dev_err(&pdev->dev, "unable to allocate msm_otg\n");
 		return -ENOMEM;
 	}
 
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		if (!np)
+			return -ENXIO;
+		ret = msm_otg_read_dt(pdev, motg);
+		if (ret)
+			return ret;
+	}
+
 	motg->phy.otg = devm_kzalloc(&pdev->dev, sizeof(struct usb_otg),
 				     GFP_KERNEL);
 	if (!motg->phy.otg) {
@@ -1370,17 +1443,17 @@ static int msm_otg_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	motg->pdata = dev_get_platdata(&pdev->dev);
 	phy = &motg->phy;
 	phy->dev = &pdev->dev;
 
-	motg->phy_reset_clk = devm_clk_get(&pdev->dev, "usb_phy_clk");
+	motg->phy_reset_clk = devm_clk_get(&pdev->dev,
+					   np ? "phy" : "usb_phy_clk");
 	if (IS_ERR(motg->phy_reset_clk)) {
 		dev_err(&pdev->dev, "failed to get usb_phy_clk\n");
 		return PTR_ERR(motg->phy_reset_clk);
 	}
 
-	motg->clk = devm_clk_get(&pdev->dev, "usb_hs_clk");
+	motg->clk = devm_clk_get(&pdev->dev, np ? "core" : "usb_hs_clk");
 	if (IS_ERR(motg->clk)) {
 		dev_err(&pdev->dev, "failed to get usb_hs_clk\n");
 		return PTR_ERR(motg->clk);
@@ -1392,7 +1465,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 	 * operation and USB core cannot tolerate frequency changes on
 	 * CORE CLK.
 	 */
-	motg->pclk = devm_clk_get(&pdev->dev, "usb_hs_pclk");
+	motg->pclk = devm_clk_get(&pdev->dev, np ? "iface" : "usb_hs_pclk");
 	if (IS_ERR(motg->pclk)) {
 		dev_err(&pdev->dev, "failed to get usb_hs_pclk\n");
 		return PTR_ERR(motg->pclk);
@@ -1403,7 +1476,8 @@ static int msm_otg_probe(struct platform_device *pdev)
 	 * clock is introduced to remove the dependency on AXI
 	 * bus frequency.
 	 */
-	motg->core_clk = devm_clk_get(&pdev->dev, "usb_hs_core_clk");
+	motg->core_clk = devm_clk_get(&pdev->dev,
+				      np ? "alt_core" : "usb_hs_core_clk");
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	motg->regs = devm_ioremap(&pdev->dev, res->start, resource_size(res));
@@ -1486,7 +1560,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 	device_init_wakeup(&pdev->dev, 1);
 
 	if (motg->pdata->mode == USB_DR_MODE_OTG &&
-			motg->pdata->otg_control == OTG_USER_CONTROL) {
+		motg->pdata->otg_control == OTG_USER_CONTROL) {
 		ret = msm_otg_debugfs_init(motg);
 		if (ret)
 			dev_dbg(&pdev->dev, "Can not create mode change file\n");
@@ -1639,6 +1713,7 @@ static struct platform_driver msm_otg_driver = {
 		.name = DRIVER_NAME,
 		.owner = THIS_MODULE,
 		.pm = &msm_otg_dev_pm_ops,
+		.of_match_table = msm_otg_dt_match,
 	},
 };
 
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 262ed80a0b9e..bd68299c278e 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -100,8 +100,9 @@ enum usb_chg_type {
 /**
  * struct msm_otg_platform_data - platform device data
  *              for msm_otg driver.
- * @phy_init_seq: PHY configuration sequence. val, reg pairs
- *              terminated by -1.
+ * @phy_init_seq: PHY configuration sequence values. Value of -1 is reserved as
+ *              "do not overwrite default vaule at this address".
+ * @phy_init_sz: PHY configuration sequence size.
  * @vbus_power: VBUS power on/off routine.
  * @power_budget: VBUS power budget in mA (0 will be treated as 500mA).
  * @mode: Supported mode (OTG/peripheral/host).
@@ -109,6 +110,7 @@ enum usb_chg_type {
  */
 struct msm_otg_platform_data {
 	int *phy_init_seq;
+	int phy_init_sz;
 	void (*vbus_power)(bool on);
 	unsigned power_budget;
 	enum usb_dr_mode mode;
-- 
cgit 


From a27345434134080273e0597e1d9721ff9e6ca67f Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:16 +0300
Subject: usb: phy: msm: Use reset framework for LINK and PHY resets

Using reset framework eliminate need of platform specific
callbacks and enable reset lines to be specified in DT files.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-msm-usb.c | 29 +++++++++++++++++++++--------
 include/linux/usb/msm_hsusb.h |  3 +++
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 1bf2d4ee29d2..a6abb1b3a7f0 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -32,6 +32,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/reset.h>
 
 #include <linux/usb.h>
 #include <linux/usb/otg.h>
@@ -235,12 +236,15 @@ static void ulpi_init(struct msm_otg *motg)
 
 static int msm_otg_link_clk_reset(struct msm_otg *motg, bool assert)
 {
-	int ret = 0;
+	int ret;
 
-	if (!motg->pdata->link_clk_reset)
-		return ret;
+	if (motg->pdata->link_clk_reset)
+		ret = motg->pdata->link_clk_reset(motg->clk, assert);
+	else if (assert)
+		ret = reset_control_assert(motg->link_rst);
+	else
+		ret = reset_control_deassert(motg->link_rst);
 
-	ret = motg->pdata->link_clk_reset(motg->clk, assert);
 	if (ret)
 		dev_err(motg->phy.dev, "usb link clk reset %s failed\n",
 			assert ? "assert" : "deassert");
@@ -250,12 +254,13 @@ static int msm_otg_link_clk_reset(struct msm_otg *motg, bool assert)
 
 static int msm_otg_phy_clk_reset(struct msm_otg *motg)
 {
-	int ret = 0;
+	int ret;
 
-	if (!motg->pdata->phy_clk_reset)
-		return ret;
+	if (motg->pdata->phy_clk_reset)
+		ret = motg->pdata->phy_clk_reset(motg->phy_reset_clk);
+	else
+		ret = reset_control_reset(motg->phy_rst);
 
-	ret = motg->pdata->phy_clk_reset(motg->phy_reset_clk);
 	if (ret)
 		dev_err(motg->phy.dev, "usb phy clk reset failed\n");
 
@@ -1377,6 +1382,14 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 	id = of_match_device(msm_otg_dt_match, &pdev->dev);
 	pdata->phy_type = (int) id->data;
 
+	motg->link_rst = devm_reset_control_get(&pdev->dev, "link");
+	if (IS_ERR(motg->link_rst))
+		return PTR_ERR(motg->link_rst);
+
+	motg->phy_rst = devm_reset_control_get(&pdev->dev, "phy");
+	if (IS_ERR(motg->phy_rst))
+		return PTR_ERR(motg->phy_rst);
+
 	pdata->mode = of_usb_get_dr_mode(node);
 	if (pdata->mode == USB_DR_MODE_UNKNOWN)
 		pdata->mode = USB_DR_MODE_OTG;
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index bd68299c278e..4e5d9168f52e 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -165,6 +165,9 @@ struct msm_otg {
 	struct regulator *v3p3;
 	struct regulator *v1p8;
 	struct regulator *vddcx;
+
+	struct reset_control *phy_rst;
+	struct reset_control *link_rst;
 };
 
 #endif
-- 
cgit 


From cfa3ff5dfe6a11ac8bc4a080416984ab00b0980c Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:17 +0300
Subject: usb: phy: msm: Add support for secondary PHY control

Allow support to use 2nd HSPHY with USB2 Core.
Some platforms may have configuration to allow USB controller
work with any of the two HSPHYs present. By default driver
configures USB core to use primary HSPHY. Add support to allow
user select 2nd HSPHY using DT parameter.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Cc: Manu Gautam <mgautam@codeaurora.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 .../devicetree/bindings/usb/msm-hsusb.txt          |  6 ++++++
 drivers/usb/phy/phy-msm-usb.c                      | 24 ++++++++++++++++++++--
 include/linux/usb/msm_hsusb.h                      |  1 +
 include/linux/usb/msm_hsusb_hw.h                   |  1 +
 4 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/msm-hsusb.txt b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
index ee4123de3de4..066966706ca1 100644
--- a/Documentation/devicetree/bindings/usb/msm-hsusb.txt
+++ b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
@@ -59,6 +59,12 @@ Optional properties:
                 For example: qcom,phy-init-sequence = < -1 0x63 >;
                 Will update only value at address ULPI_EXT_VENDOR_SPECIFIC + 1.
 
+- qcom,phy-num: Select number of pyco-phy to use, can be one of
+                0 - PHY one, default
+                1 - Second PHY
+                Some platforms may have configuration to allow USB
+                controller work with any of the two HSPHYs present.
+
 Example HSUSB OTG controller device node:
 
     usb@f9a55000 {
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index a6abb1b3a7f0..8d57045ac938 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -314,6 +314,9 @@ static int msm_otg_phy_reset(struct msm_otg *motg)
 	if (!retries)
 		return -ETIMEDOUT;
 
+	if (motg->phy_number)
+		writel(readl(USB_PHY_CTRL2) | BIT(16), USB_PHY_CTRL2);
+
 	dev_info(motg->phy.dev, "phy_reset: success\n");
 	return 0;
 }
@@ -368,6 +371,9 @@ static int msm_otg_reset(struct usb_phy *phy)
 		ulpi_write(phy, ulpi_val, ULPI_USB_INT_EN_FALL);
 	}
 
+	if (motg->phy_number)
+		writel(readl(USB_PHY_CTRL2) | BIT(16), USB_PHY_CTRL2);
+
 	return 0;
 }
 
@@ -404,6 +410,7 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	struct usb_phy *phy = &motg->phy;
 	struct usb_bus *bus = phy->otg->host;
 	struct msm_otg_platform_data *pdata = motg->pdata;
+	void __iomem *addr;
 	int cnt = 0;
 
 	if (atomic_read(&motg->in_lpm))
@@ -463,9 +470,13 @@ static int msm_otg_suspend(struct msm_otg *motg)
 	 */
 	writel(readl(USB_USBCMD) | ASYNC_INTR_CTRL | ULPI_STP_CTRL, USB_USBCMD);
 
+	addr = USB_PHY_CTRL;
+	if (motg->phy_number)
+		addr = USB_PHY_CTRL2;
+
 	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
 			motg->pdata->otg_control == OTG_PMIC_CONTROL)
-		writel(readl(USB_PHY_CTRL) | PHY_RETEN, USB_PHY_CTRL);
+		writel(readl(addr) | PHY_RETEN, addr);
 
 	clk_disable_unprepare(motg->pclk);
 	clk_disable_unprepare(motg->clk);
@@ -495,6 +506,7 @@ static int msm_otg_resume(struct msm_otg *motg)
 {
 	struct usb_phy *phy = &motg->phy;
 	struct usb_bus *bus = phy->otg->host;
+	void __iomem *addr;
 	int cnt = 0;
 	unsigned temp;
 
@@ -508,9 +520,14 @@ static int msm_otg_resume(struct msm_otg *motg)
 
 	if (motg->pdata->phy_type == SNPS_28NM_INTEGRATED_PHY &&
 			motg->pdata->otg_control == OTG_PMIC_CONTROL) {
+
+		addr = USB_PHY_CTRL;
+		if (motg->phy_number)
+			addr = USB_PHY_CTRL2;
+
 		msm_hsusb_ldo_set_mode(motg, 1);
 		msm_hsusb_config_vddcx(motg, 1);
-		writel(readl(USB_PHY_CTRL) & ~PHY_RETEN, USB_PHY_CTRL);
+		writel(readl(addr) & ~PHY_RETEN, addr);
 	}
 
 	temp = readl(USB_USBCMD);
@@ -1399,6 +1416,9 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 		if (val == OTG_PMIC_CONTROL)
 			pdata->otg_control = val;
 
+	if (!of_property_read_u32(node, "qcom,phy-num", &val) && val < 2)
+		motg->phy_number = val;
+
 	prop = of_find_property(node, "qcom,phy-init-sequence", &len);
 	if (!prop || !len)
 		return 0;
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 4e5d9168f52e..4628f1a4713e 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -158,6 +158,7 @@ struct msm_otg {
 	atomic_t in_lpm;
 	int async_int;
 	unsigned cur_power;
+	int phy_number;
 	struct delayed_work chg_work;
 	enum usb_chg_state chg_state;
 	enum usb_chg_type chg_type;
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index 6e97a2d3d39f..e6d703567155 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -25,6 +25,7 @@
 #define USB_OTGSC            (MSM_USB_BASE + 0x01A4)
 #define USB_USBMODE          (MSM_USB_BASE + 0x01A8)
 #define USB_PHY_CTRL         (MSM_USB_BASE + 0x0240)
+#define USB_PHY_CTRL2        (MSM_USB_BASE + 0x0278)
 
 #define USBCMD_RESET   2
 #define USB_USBINTR          (MSM_USB_BASE + 0x0148)
-- 
cgit 


From d69c6f5df376ea40df5886468b155f515fddfbb2 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:18 +0300
Subject: usb: phy: msm: Correct USB PHY Reset sequence for newer platform

On few legacy platforms, USB PHY is having dedicated reset clk.
It is used to reset USB PHY after putting USB PHY into low power
mode and for calibration of USB PHY. Putting USB PHY into low
power mode is causing ulpi read/write timeout as expected. USB PHY
reset clk is not available on newer platform.

For 28nm PHY, reset USB PHY after resetting USB LINK.
Also reset USB PHY using USB_PHY_PON bit with USB_OTG_HS_PHY_CTRL
register after programming USB PHY Override registers as suggested
with hardware programming guidelines.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Tim Bird <tim.bird@sonymobile.com>
Cc: Mayank Rana <mrana@codeaurora.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-msm-usb.c    | 140 ++++++++++++++++++++++++---------------
 include/linux/usb/msm_hsusb_hw.h |   5 ++
 2 files changed, 93 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 8d57045ac938..bb339963f8bb 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -48,6 +48,7 @@
 #define DRIVER_NAME	"msm_otg"
 
 #define ULPI_IO_TIMEOUT_USEC	(10 * 1000)
+#define LINK_RESET_TIMEOUT_USEC	(250 * 1000)
 
 #define USB_PHY_3P3_VOL_MIN	3050000 /* uV */
 #define USB_PHY_3P3_VOL_MAX	3300000 /* uV */
@@ -267,77 +268,35 @@ static int msm_otg_phy_clk_reset(struct msm_otg *motg)
 	return ret;
 }
 
-static int msm_otg_phy_reset(struct msm_otg *motg)
+static int msm_link_reset(struct msm_otg *motg)
 {
 	u32 val;
 	int ret;
-	int retries;
 
 	ret = msm_otg_link_clk_reset(motg, 1);
-	if (ret)
-		return ret;
-	ret = msm_otg_phy_clk_reset(motg);
-	if (ret)
-		return ret;
-	ret = msm_otg_link_clk_reset(motg, 0);
 	if (ret)
 		return ret;
 
-	val = readl(USB_PORTSC) & ~PORTSC_PTS_MASK;
-	writel(val | PORTSC_PTS_ULPI, USB_PORTSC);
-
-	for (retries = 3; retries > 0; retries--) {
-		ret = ulpi_write(&motg->phy, ULPI_FUNC_CTRL_SUSPENDM,
-				ULPI_CLR(ULPI_FUNC_CTRL));
-		if (!ret)
-			break;
-		ret = msm_otg_phy_clk_reset(motg);
-		if (ret)
-			return ret;
-	}
-	if (!retries)
-		return -ETIMEDOUT;
+	/* wait for 1ms delay as suggested in HPG. */
+	usleep_range(1000, 1200);
 
-	/* This reset calibrates the phy, if the above write succeeded */
-	ret = msm_otg_phy_clk_reset(motg);
+	ret = msm_otg_link_clk_reset(motg, 0);
 	if (ret)
 		return ret;
 
-	for (retries = 3; retries > 0; retries--) {
-		ret = ulpi_read(&motg->phy, ULPI_DEBUG);
-		if (ret != -ETIMEDOUT)
-			break;
-		ret = msm_otg_phy_clk_reset(motg);
-		if (ret)
-			return ret;
-	}
-	if (!retries)
-		return -ETIMEDOUT;
-
 	if (motg->phy_number)
 		writel(readl(USB_PHY_CTRL2) | BIT(16), USB_PHY_CTRL2);
 
-	dev_info(motg->phy.dev, "phy_reset: success\n");
+	val = readl(USB_PORTSC) & ~PORTSC_PTS_MASK;
+	writel(val | PORTSC_PTS_ULPI, USB_PORTSC);
+
 	return 0;
 }
 
-#define LINK_RESET_TIMEOUT_USEC		(250 * 1000)
 static int msm_otg_reset(struct usb_phy *phy)
 {
 	struct msm_otg *motg = container_of(phy, struct msm_otg, phy);
-	struct msm_otg_platform_data *pdata = motg->pdata;
 	int cnt = 0;
-	int ret;
-	u32 val = 0;
-	u32 ulpi_val = 0;
-
-	ret = msm_otg_phy_reset(motg);
-	if (ret) {
-		dev_err(phy->dev, "phy_reset failed\n");
-		return ret;
-	}
-
-	ulpi_init(motg);
 
 	writel(USBCMD_RESET, USB_USBCMD);
 	while (cnt < LINK_RESET_TIMEOUT_USEC) {
@@ -351,11 +310,86 @@ static int msm_otg_reset(struct usb_phy *phy)
 
 	/* select ULPI phy */
 	writel(0x80000000, USB_PORTSC);
+	writel(0x0, USB_AHBBURST);
+	writel(0x08, USB_AHBMODE);
+
+	if (motg->phy_number)
+		writel(readl(USB_PHY_CTRL2) | BIT(16), USB_PHY_CTRL2);
+	return 0;
+}
+
+static void msm_phy_reset(struct msm_otg *motg)
+{
+	void __iomem *addr;
+
+	if (motg->pdata->phy_type != SNPS_28NM_INTEGRATED_PHY) {
+		msm_otg_phy_clk_reset(motg);
+		return;
+	}
+
+	addr = USB_PHY_CTRL;
+	if (motg->phy_number)
+		addr = USB_PHY_CTRL2;
+
+	/* Assert USB PHY_POR */
+	writel(readl(addr) | PHY_POR_ASSERT, addr);
+
+	/*
+	 * wait for minimum 10 microseconds as suggested in HPG.
+	 * Use a slightly larger value since the exact value didn't
+	 * work 100% of the time.
+	 */
+	udelay(12);
+
+	/* Deassert USB PHY_POR */
+	writel(readl(addr) & ~PHY_POR_ASSERT, addr);
+}
+
+static int msm_usb_reset(struct usb_phy *phy)
+{
+	struct msm_otg *motg = container_of(phy, struct msm_otg, phy);
+	int ret;
+
+	if (!IS_ERR(motg->core_clk))
+		clk_prepare_enable(motg->core_clk);
+
+	ret = msm_link_reset(motg);
+	if (ret) {
+		dev_err(phy->dev, "phy_reset failed\n");
+		return ret;
+	}
+
+	ret = msm_otg_reset(&motg->phy);
+	if (ret) {
+		dev_err(phy->dev, "link reset failed\n");
+		return ret;
+	}
 
 	msleep(100);
 
-	writel(0x0, USB_AHBBURST);
-	writel(0x00, USB_AHBMODE);
+	/* Reset USB PHY after performing USB Link RESET */
+	msm_phy_reset(motg);
+
+	if (!IS_ERR(motg->core_clk))
+		clk_disable_unprepare(motg->core_clk);
+
+	return 0;
+}
+
+static int msm_phy_init(struct usb_phy *phy)
+{
+	struct msm_otg *motg = container_of(phy, struct msm_otg, phy);
+	struct msm_otg_platform_data *pdata = motg->pdata;
+	u32 val, ulpi_val = 0;
+
+	/* Program USB PHY Override registers. */
+	ulpi_init(motg);
+
+	/*
+	 * It is recommended in HPG to reset USB PHY after programming
+	 * USB PHY Override registers.
+	 */
+	msm_phy_reset(motg);
 
 	if (pdata->otg_control == OTG_PHY_CONTROL) {
 		val = readl(USB_OTGSC);
@@ -1574,7 +1608,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 		goto disable_ldo;
 	}
 
-	phy->init = msm_otg_reset;
+	phy->init = msm_phy_init;
 	phy->set_power = msm_otg_set_power;
 
 	phy->io_ops = &msm_otg_io_ops;
@@ -1583,6 +1617,8 @@ static int msm_otg_probe(struct platform_device *pdev)
 	phy->otg->set_host = msm_otg_set_host;
 	phy->otg->set_peripheral = msm_otg_set_peripheral;
 
+	msm_usb_reset(phy);
+
 	ret = usb_add_phy(&motg->phy, USB_PHY_TYPE_USB2);
 	if (ret) {
 		dev_err(&pdev->dev, "usb_add_phy failed\n");
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index e6d703567155..575c74397e52 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -42,9 +42,14 @@
 #define ULPI_DATA(n)          ((n) & 255)
 #define ULPI_DATA_READ(n)     (((n) >> 8) & 255)
 
+/* synopsys 28nm phy registers */
+#define ULPI_PWR_CLK_MNG_REG	0x88
+#define OTG_COMP_DISABLE	BIT(0)
+
 #define ASYNC_INTR_CTRL         (1 << 29) /* Enable async interrupt */
 #define ULPI_STP_CTRL           (1 << 30) /* Block communication with PHY */
 #define PHY_RETEN               (1 << 1) /* PHY retention enable/disable */
+#define PHY_POR_ASSERT		(1 << 0) /* USB2 28nm PHY POR ASSERT */
 
 /* OTG definitions */
 #define OTGSC_INTSTS_MASK	(0x7f << 16)
-- 
cgit 


From 9f27984b9e098ce0a35b210ec0315c76108494e4 Mon Sep 17 00:00:00 2001
From: Tim Bird <tbird20d@gmail.com>
Date: Mon, 28 Apr 2014 16:34:19 +0300
Subject: usb: phy: msm: Fix PTS definitions for MSM USB controller

Fix the value used for Parallel Transceiver Select (PTS) for the MSM USB
controller.  This is a standard chipidea PORTSC definition, where
a PHY_TYPE of 10b (<<30) is ULPI and 11b (<<30) is SERIAL.
Fix the definitions and use them correctly in the driver code.

Signed-off-by: Tim Bird <tim.bird@sonymobile.com>
Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-msm-usb.c    | 8 +++++---
 include/linux/usb/msm_hsusb_hw.h | 5 +++--
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index bb339963f8bb..db8d96377620 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -287,8 +287,9 @@ static int msm_link_reset(struct msm_otg *motg)
 	if (motg->phy_number)
 		writel(readl(USB_PHY_CTRL2) | BIT(16), USB_PHY_CTRL2);
 
+	/* put transceiver in serial mode as part of reset */
 	val = readl(USB_PORTSC) & ~PORTSC_PTS_MASK;
-	writel(val | PORTSC_PTS_ULPI, USB_PORTSC);
+	writel(val | PORTSC_PTS_SERIAL, USB_PORTSC);
 
 	return 0;
 }
@@ -308,8 +309,9 @@ static int msm_otg_reset(struct usb_phy *phy)
 	if (cnt >= LINK_RESET_TIMEOUT_USEC)
 		return -ETIMEDOUT;
 
-	/* select ULPI phy */
-	writel(0x80000000, USB_PORTSC);
+	/* select ULPI phy and clear other status/control bits in PORTSC */
+	writel(PORTSC_PTS_ULPI, USB_PORTSC);
+
 	writel(0x0, USB_AHBBURST);
 	writel(0x08, USB_AHBMODE);
 
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index 575c74397e52..98d3dd8976e5 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -31,8 +31,9 @@
 #define USB_USBINTR          (MSM_USB_BASE + 0x0148)
 
 #define PORTSC_PHCD            (1 << 23) /* phy suspend mode */
-#define PORTSC_PTS_MASK         (3 << 30)
-#define PORTSC_PTS_ULPI         (3 << 30)
+#define PORTSC_PTS_MASK        (3 << 30)
+#define PORTSC_PTS_ULPI        (2 << 30)
+#define PORTSC_PTS_SERIAL      (3 << 30)
 
 #define USB_ULPI_VIEWPORT    (MSM_USB_BASE + 0x0170)
 #define ULPI_RUN              (1 << 30)
-- 
cgit 


From 30bf8667cef5655ddfaedf043f13d03606844213 Mon Sep 17 00:00:00 2001
From: Tim Bird <tbird20d@gmail.com>
Date: Mon, 28 Apr 2014 16:34:20 +0300
Subject: usb: phy: msm: Select secondary PHY via TCSR

Select the secondary PHY using the TCSR register, if phy-num=1
in the DTS (or phy_number is set in the platform data).  The
SOC has 2 PHYs which can be used with the OTG port, and this
code allows configuring the correct one.

Note: This resolves the problem I was seeing where I couldn't
get the USB driver working at all on a dragonboard, from cold
boot.  This patch depends on patch 5/14 from Ivan's msm USB
patch set.  It does not use DT for the register address, as
there's no evidence that this address changes between SoC
versions.

Signed-off-by: Tim Bird <tim.bird@sonymobile.com>
Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-msm-usb.c    | 14 ++++++++++++++
 include/linux/usb/msm_hsusb_hw.h |  3 +++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index db8d96377620..9437bcf8c367 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -1489,6 +1489,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 	struct resource *res;
 	struct msm_otg *motg;
 	struct usb_phy *phy;
+	void __iomem *phy_select;
 
 	motg = devm_kzalloc(&pdev->dev, sizeof(struct msm_otg), GFP_KERNEL);
 	if (!motg) {
@@ -1553,6 +1554,19 @@ static int msm_otg_probe(struct platform_device *pdev)
 	if (IS_ERR(motg->regs))
 		return PTR_ERR(motg->regs);
 
+	/*
+	 * NOTE: The PHYs can be multiplexed between the chipidea controller
+	 * and the dwc3 controller, using a single bit. It is important that
+	 * the dwc3 driver does not set this bit in an incompatible way.
+	 */
+	if (motg->phy_number) {
+		phy_select = devm_ioremap_nocache(&pdev->dev, USB2_PHY_SEL, 4);
+		if (IS_ERR(phy_select))
+			return PTR_ERR(phy_select);
+		/* Enable second PHY with the OTG port */
+		writel_relaxed(0x1, phy_select);
+	}
+
 	dev_info(&pdev->dev, "OTG regs = %p\n", motg->regs);
 
 	motg->irq = platform_get_irq(pdev, 0);
diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h
index 98d3dd8976e5..a29f6030afb1 100644
--- a/include/linux/usb/msm_hsusb_hw.h
+++ b/include/linux/usb/msm_hsusb_hw.h
@@ -16,6 +16,9 @@
 #ifndef __LINUX_USB_GADGET_MSM72K_UDC_H__
 #define __LINUX_USB_GADGET_MSM72K_UDC_H__
 
+/* USB phy selector - in TCSR address range */
+#define USB2_PHY_SEL         0xfd4ab000
+
 #define USB_AHBBURST         (MSM_USB_BASE + 0x0090)
 #define USB_AHBMODE          (MSM_USB_BASE + 0x0098)
 #define USB_CAPLENGTH        (MSM_USB_BASE + 0x0100) /* 8 bit */
-- 
cgit 


From 01799b622217ffebdc95e8e0aedbd4cff6a35a50 Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Mon, 28 Apr 2014 16:34:22 +0300
Subject: usb: phy: msm: Vote for corner of VDD CX instead of voltage of VDD CX

New platform uses RBCPR hardware feature, with that voting for
absolute voltage of VDD CX is not required. Hence vote for corner of
VDD CX which uses nominal corner voltage on VDD CX.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Cc: Mayank Rana <mrana@codeaurora.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 .../devicetree/bindings/usb/msm-hsusb.txt          |  5 ++++
 drivers/usb/phy/phy-msm-usb.c                      | 35 +++++++++++++++++-----
 include/linux/usb/msm_hsusb.h                      |  1 +
 3 files changed, 33 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/usb/msm-hsusb.txt b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
index 066966706ca1..2826f2af503a 100644
--- a/Documentation/devicetree/bindings/usb/msm-hsusb.txt
+++ b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
@@ -65,6 +65,10 @@ Optional properties:
                 Some platforms may have configuration to allow USB
                 controller work with any of the two HSPHYs present.
 
+- qcom,vdd-levels: This property must be a list of three integer values
+                (no, min, max) where each value represents either a voltage
+                in microvolts or a value corresponding to voltage corner.
+
 Example HSUSB OTG controller device node:
 
     usb@f9a55000 {
@@ -87,4 +91,5 @@ Example HSUSB OTG controller device node:
 
         qcom,otg-control = <1>;
         qcom,phy-init-sequence = < -1 0x63 >;
+        qcom,vdd-levels = <1 5 7>;
 	};
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 366527ecbdd1..8e7956eb8a77 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -62,6 +62,13 @@
 
 #define USB_PHY_VDD_DIG_VOL_MIN	1000000 /* uV */
 #define USB_PHY_VDD_DIG_VOL_MAX	1320000 /* uV */
+#define USB_PHY_SUSP_DIG_VOL	500000  /* uV */
+
+enum vdd_levels {
+	VDD_LEVEL_NONE = 0,
+	VDD_LEVEL_MIN,
+	VDD_LEVEL_MAX,
+};
 
 static int msm_hsusb_init_vddcx(struct msm_otg *motg, int init)
 {
@@ -69,8 +76,8 @@ static int msm_hsusb_init_vddcx(struct msm_otg *motg, int init)
 
 	if (init) {
 		ret = regulator_set_voltage(motg->vddcx,
-				USB_PHY_VDD_DIG_VOL_MIN,
-				USB_PHY_VDD_DIG_VOL_MAX);
+				motg->vdd_levels[VDD_LEVEL_MIN],
+				motg->vdd_levels[VDD_LEVEL_MAX]);
 		if (ret) {
 			dev_err(motg->phy.dev, "Cannot set vddcx voltage\n");
 			return ret;
@@ -81,7 +88,7 @@ static int msm_hsusb_init_vddcx(struct msm_otg *motg, int init)
 			dev_err(motg->phy.dev, "unable to enable hsusb vddcx\n");
 	} else {
 		ret = regulator_set_voltage(motg->vddcx, 0,
-			USB_PHY_VDD_DIG_VOL_MAX);
+				motg->vdd_levels[VDD_LEVEL_MAX]);
 		if (ret)
 			dev_err(motg->phy.dev, "Cannot set vddcx voltage\n");
 		ret = regulator_disable(motg->vddcx);
@@ -435,17 +442,16 @@ static int msm_phy_init(struct usb_phy *phy)
 
 #ifdef CONFIG_PM
 
-#define USB_PHY_SUSP_DIG_VOL  500000
 static int msm_hsusb_config_vddcx(struct msm_otg *motg, int high)
 {
-	int max_vol = USB_PHY_VDD_DIG_VOL_MAX;
+	int max_vol = motg->vdd_levels[VDD_LEVEL_MAX];
 	int min_vol;
 	int ret;
 
 	if (high)
-		min_vol = USB_PHY_VDD_DIG_VOL_MIN;
+		min_vol = motg->vdd_levels[VDD_LEVEL_MIN];
 	else
-		min_vol = USB_PHY_SUSP_DIG_VOL;
+		min_vol = motg->vdd_levels[VDD_LEVEL_NONE];
 
 	ret = regulator_set_voltage(motg->vddcx, min_vol, max_vol);
 	if (ret) {
@@ -1441,7 +1447,7 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 	struct device_node *node = pdev->dev.of_node;
 	struct property *prop;
 	int len, ret, words;
-	u32 val;
+	u32 val, tmp[3];
 
 	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
 	if (!pdata)
@@ -1472,6 +1478,19 @@ static int msm_otg_read_dt(struct platform_device *pdev, struct msm_otg *motg)
 	if (!of_property_read_u32(node, "qcom,phy-num", &val) && val < 2)
 		motg->phy_number = val;
 
+	motg->vdd_levels[VDD_LEVEL_NONE] = USB_PHY_SUSP_DIG_VOL;
+	motg->vdd_levels[VDD_LEVEL_MIN] = USB_PHY_VDD_DIG_VOL_MIN;
+	motg->vdd_levels[VDD_LEVEL_MAX] = USB_PHY_VDD_DIG_VOL_MAX;
+
+	if (of_get_property(node, "qcom,vdd-levels", &len) &&
+	    len == sizeof(tmp)) {
+		of_property_read_u32_array(node, "qcom,vdd-levels",
+					   tmp, len / sizeof(*tmp));
+		motg->vdd_levels[VDD_LEVEL_NONE] = tmp[VDD_LEVEL_NONE];
+		motg->vdd_levels[VDD_LEVEL_MIN] = tmp[VDD_LEVEL_MIN];
+		motg->vdd_levels[VDD_LEVEL_MAX] = tmp[VDD_LEVEL_MAX];
+	}
+
 	prop = of_find_property(node, "qcom,phy-init-sequence", &len);
 	if (!prop || !len)
 		return 0;
diff --git a/include/linux/usb/msm_hsusb.h b/include/linux/usb/msm_hsusb.h
index 4628f1a4713e..b0a39243295a 100644
--- a/include/linux/usb/msm_hsusb.h
+++ b/include/linux/usb/msm_hsusb.h
@@ -169,6 +169,7 @@ struct msm_otg {
 
 	struct reset_control *phy_rst;
 	struct reset_control *link_rst;
+	int vdd_levels[3];
 };
 
 #endif
-- 
cgit 


From 0302f71c0aa59571ac306f93068fbbfe65ea349b Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Mon, 30 Dec 2013 12:12:12 -0500
Subject: efi: add helper function to get UEFI params from FDT

ARM and ARM64 architectures use the device tree to pass UEFI parameters
from stub to kernel. These parameters are things known to the stub but
not discoverable by the kernel after the stub calls ExitBootSerives().
There is a helper function in:

   drivers/firmware/efi/fdt.c

which the stub uses to add the UEFI parameters to the device tree.
This patch adds a complimentary helper function which UEFI runtime
support may use to retrieve the parameters from the device tree.
If an architecture wants to use this helper, it should select
CONFIG_EFI_PARAMS_FROM_FDT.

Signed-off-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/Kconfig |  7 ++++
 drivers/firmware/efi/efi.c   | 79 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/efi.h          |  9 +++++
 3 files changed, 95 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig
index 1e75f48b61f8..d420ae2d3413 100644
--- a/drivers/firmware/efi/Kconfig
+++ b/drivers/firmware/efi/Kconfig
@@ -47,6 +47,13 @@ config EFI_RUNTIME_MAP
 
 	  See also Documentation/ABI/testing/sysfs-firmware-efi-runtime-map.
 
+config EFI_PARAMS_FROM_FDT
+	bool
+	help
+	  Select this config option from the architecture Kconfig if
+	  the EFI runtime support gets system table address, memory
+          map address, and other parameters from the device tree.
+
 endmenu
 
 config UEFI_CPER
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index af20f1712337..cd36deb619fa 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -20,6 +20,8 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/efi.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
 #include <linux/io.h>
 
 struct efi __read_mostly efi = {
@@ -318,3 +320,80 @@ int __init efi_config_init(efi_config_table_type_t *arch_tables)
 
 	return 0;
 }
+
+#ifdef CONFIG_EFI_PARAMS_FROM_FDT
+
+#define UEFI_PARAM(name, prop, field)			   \
+	{						   \
+		{ name },				   \
+		{ prop },				   \
+		offsetof(struct efi_fdt_params, field),    \
+		FIELD_SIZEOF(struct efi_fdt_params, field) \
+	}
+
+static __initdata struct {
+	const char name[32];
+	const char propname[32];
+	int offset;
+	int size;
+} dt_params[] = {
+	UEFI_PARAM("System Table", "linux,uefi-system-table", system_table),
+	UEFI_PARAM("MemMap Address", "linux,uefi-mmap-start", mmap),
+	UEFI_PARAM("MemMap Size", "linux,uefi-mmap-size", mmap_size),
+	UEFI_PARAM("MemMap Desc. Size", "linux,uefi-mmap-desc-size", desc_size),
+	UEFI_PARAM("MemMap Desc. Version", "linux,uefi-mmap-desc-ver", desc_ver)
+};
+
+struct param_info {
+	int verbose;
+	void *params;
+};
+
+static int __init fdt_find_uefi_params(unsigned long node, const char *uname,
+				       int depth, void *data)
+{
+	struct param_info *info = data;
+	void *prop, *dest;
+	unsigned long len;
+	u64 val;
+	int i;
+
+	if (depth != 1 ||
+	    (strcmp(uname, "chosen") != 0 && strcmp(uname, "chosen@0") != 0))
+		return 0;
+
+	pr_info("Getting parameters from FDT:\n");
+
+	for (i = 0; i < ARRAY_SIZE(dt_params); i++) {
+		prop = of_get_flat_dt_prop(node, dt_params[i].propname, &len);
+		if (!prop) {
+			pr_err("Can't find %s in device tree!\n",
+			       dt_params[i].name);
+			return 0;
+		}
+		dest = info->params + dt_params[i].offset;
+
+		val = of_read_number(prop, len / sizeof(u32));
+
+		if (dt_params[i].size == sizeof(u32))
+			*(u32 *)dest = val;
+		else
+			*(u64 *)dest = val;
+
+		if (info->verbose)
+			pr_info("  %s: 0x%0*llx\n", dt_params[i].name,
+				dt_params[i].size * 2, val);
+	}
+	return 1;
+}
+
+int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
+{
+	struct param_info info;
+
+	info.verbose = verbose;
+	info.params = params;
+
+	return of_scan_flat_dt(fdt_find_uefi_params, &info);
+}
+#endif /* CONFIG_EFI_PARAMS_FROM_FDT */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6a4d8e27d1d7..cd0172e796cb 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -664,6 +664,14 @@ struct efi_memory_map {
 	unsigned long desc_size;
 };
 
+struct efi_fdt_params {
+	u64 system_table;
+	u64 mmap;
+	u32 mmap_size;
+	u32 desc_size;
+	u32 desc_ver;
+};
+
 typedef struct {
 	u32 revision;
 	u32 parent_handle;
@@ -861,6 +869,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource,
 extern void efi_get_time(struct timespec *now);
 extern int efi_set_rtc_mmss(const struct timespec *now);
 extern void efi_reserve_boot_services(void);
+extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose);
 extern struct efi_memory_map memmap;
 
 /* Iterate through an efi_memory_map */
-- 
cgit 


From 263b4a30bfdb0d756ae9c70c6ff2eef1eb951770 Mon Sep 17 00:00:00 2001
From: Roy Franz <roy.franz@linaro.org>
Date: Wed, 8 Jan 2014 17:54:19 -0800
Subject: efi: Add shared FDT related functions for ARM/ARM64

Both ARM and ARM64 stubs will update the device tree that they pass to
the kernel.  In both cases they primarily need to add the same UEFI
related information, so the function can be shared.  Create a new FDT
related file for this to avoid use of architecture #ifdefs in
efi-stub-helper.c.

Signed-off-by: Roy Franz <roy.franz@linaro.org>
[ Fixed memory node deletion code. ]
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/fdt.c | 285 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/efi.h        |   3 +
 2 files changed, 288 insertions(+)
 create mode 100644 drivers/firmware/efi/fdt.c

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/fdt.c b/drivers/firmware/efi/fdt.c
new file mode 100644
index 000000000000..5c6a8e8a9580
--- /dev/null
+++ b/drivers/firmware/efi/fdt.c
@@ -0,0 +1,285 @@
+/*
+ * FDT related Helper functions used by the EFI stub on multiple
+ * architectures. This should be #included by the EFI stub
+ * implementation files.
+ *
+ * Copyright 2013 Linaro Limited; author Roy Franz
+ *
+ * This file is part of the Linux kernel, and is made available
+ * under the terms of the GNU General Public License version 2.
+ *
+ */
+
+static efi_status_t update_fdt(efi_system_table_t *sys_table, void *orig_fdt,
+			       unsigned long orig_fdt_size,
+			       void *fdt, int new_fdt_size, char *cmdline_ptr,
+			       u64 initrd_addr, u64 initrd_size,
+			       efi_memory_desc_t *memory_map,
+			       unsigned long map_size, unsigned long desc_size,
+			       u32 desc_ver)
+{
+	int node, prev;
+	int status;
+	u32 fdt_val32;
+	u64 fdt_val64;
+
+	/*
+	 * Copy definition of linux_banner here.  Since this code is
+	 * built as part of the decompressor for ARM v7, pulling
+	 * in version.c where linux_banner is defined for the
+	 * kernel brings other kernel dependencies with it.
+	 */
+	const char linux_banner[] =
+	    "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+	    LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
+
+	/* Do some checks on provided FDT, if it exists*/
+	if (orig_fdt) {
+		if (fdt_check_header(orig_fdt)) {
+			pr_efi_err(sys_table, "Device Tree header not valid!\n");
+			return EFI_LOAD_ERROR;
+		}
+		/*
+		 * We don't get the size of the FDT if we get if from a
+		 * configuration table.
+		 */
+		if (orig_fdt_size && fdt_totalsize(orig_fdt) > orig_fdt_size) {
+			pr_efi_err(sys_table, "Truncated device tree! foo!\n");
+			return EFI_LOAD_ERROR;
+		}
+	}
+
+	if (orig_fdt)
+		status = fdt_open_into(orig_fdt, fdt, new_fdt_size);
+	else
+		status = fdt_create_empty_tree(fdt, new_fdt_size);
+
+	if (status != 0)
+		goto fdt_set_fail;
+
+	/*
+	 * Delete any memory nodes present. We must delete nodes which
+	 * early_init_dt_scan_memory may try to use.
+	 */
+	prev = 0;
+	for (;;) {
+		const char *type, *name;
+		int len;
+
+		node = fdt_next_node(fdt, prev, NULL);
+		if (node < 0)
+			break;
+
+		type = fdt_getprop(fdt, node, "device_type", &len);
+		if (type && strncmp(type, "memory", len) == 0) {
+			fdt_del_node(fdt, node);
+			continue;
+		}
+
+		prev = node;
+	}
+
+	node = fdt_subnode_offset(fdt, 0, "chosen");
+	if (node < 0) {
+		node = fdt_add_subnode(fdt, 0, "chosen");
+		if (node < 0) {
+			status = node; /* node is error code when negative */
+			goto fdt_set_fail;
+		}
+	}
+
+	if ((cmdline_ptr != NULL) && (strlen(cmdline_ptr) > 0)) {
+		status = fdt_setprop(fdt, node, "bootargs", cmdline_ptr,
+				     strlen(cmdline_ptr) + 1);
+		if (status)
+			goto fdt_set_fail;
+	}
+
+	/* Set initrd address/end in device tree, if present */
+	if (initrd_size != 0) {
+		u64 initrd_image_end;
+		u64 initrd_image_start = cpu_to_fdt64(initrd_addr);
+
+		status = fdt_setprop(fdt, node, "linux,initrd-start",
+				     &initrd_image_start, sizeof(u64));
+		if (status)
+			goto fdt_set_fail;
+		initrd_image_end = cpu_to_fdt64(initrd_addr + initrd_size);
+		status = fdt_setprop(fdt, node, "linux,initrd-end",
+				     &initrd_image_end, sizeof(u64));
+		if (status)
+			goto fdt_set_fail;
+	}
+
+	/* Add FDT entries for EFI runtime services in chosen node. */
+	node = fdt_subnode_offset(fdt, 0, "chosen");
+	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)sys_table);
+	status = fdt_setprop(fdt, node, "linux,uefi-system-table",
+			     &fdt_val64, sizeof(fdt_val64));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val64 = cpu_to_fdt64((u64)(unsigned long)memory_map);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-start",
+			     &fdt_val64,  sizeof(fdt_val64));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val32 = cpu_to_fdt32(map_size);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-size",
+			     &fdt_val32,  sizeof(fdt_val32));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val32 = cpu_to_fdt32(desc_size);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-size",
+			     &fdt_val32, sizeof(fdt_val32));
+	if (status)
+		goto fdt_set_fail;
+
+	fdt_val32 = cpu_to_fdt32(desc_ver);
+	status = fdt_setprop(fdt, node, "linux,uefi-mmap-desc-ver",
+			     &fdt_val32, sizeof(fdt_val32));
+	if (status)
+		goto fdt_set_fail;
+
+	/*
+	 * Add kernel version banner so stub/kernel match can be
+	 * verified.
+	 */
+	status = fdt_setprop_string(fdt, node, "linux,uefi-stub-kern-ver",
+			     linux_banner);
+	if (status)
+		goto fdt_set_fail;
+
+	return EFI_SUCCESS;
+
+fdt_set_fail:
+	if (status == -FDT_ERR_NOSPACE)
+		return EFI_BUFFER_TOO_SMALL;
+
+	return EFI_LOAD_ERROR;
+}
+
+#ifndef EFI_FDT_ALIGN
+#define EFI_FDT_ALIGN EFI_PAGE_SIZE
+#endif
+
+/*
+ * Allocate memory for a new FDT, then add EFI, commandline, and
+ * initrd related fields to the FDT.  This routine increases the
+ * FDT allocation size until the allocated memory is large
+ * enough.  EFI allocations are in EFI_PAGE_SIZE granules,
+ * which are fixed at 4K bytes, so in most cases the first
+ * allocation should succeed.
+ * EFI boot services are exited at the end of this function.
+ * There must be no allocations between the get_memory_map()
+ * call and the exit_boot_services() call, so the exiting of
+ * boot services is very tightly tied to the creation of the FDT
+ * with the final memory map in it.
+ */
+
+efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
+					    void *handle,
+					    unsigned long *new_fdt_addr,
+					    unsigned long max_addr,
+					    u64 initrd_addr, u64 initrd_size,
+					    char *cmdline_ptr,
+					    unsigned long fdt_addr,
+					    unsigned long fdt_size)
+{
+	unsigned long map_size, desc_size;
+	u32 desc_ver;
+	unsigned long mmap_key;
+	efi_memory_desc_t *memory_map;
+	unsigned long new_fdt_size;
+	efi_status_t status;
+
+	/*
+	 * Estimate size of new FDT, and allocate memory for it. We
+	 * will allocate a bigger buffer if this ends up being too
+	 * small, so a rough guess is OK here.
+	 */
+	new_fdt_size = fdt_size + EFI_PAGE_SIZE;
+	while (1) {
+		status = efi_high_alloc(sys_table, new_fdt_size, EFI_FDT_ALIGN,
+					new_fdt_addr, max_addr);
+		if (status != EFI_SUCCESS) {
+			pr_efi_err(sys_table, "Unable to allocate memory for new device tree.\n");
+			goto fail;
+		}
+
+		/*
+		 * Now that we have done our final memory allocation (and free)
+		 * we can get the memory map key  needed for
+		 * exit_boot_services().
+		 */
+		status = efi_get_memory_map(sys_table, &memory_map, &map_size,
+					    &desc_size, &desc_ver, &mmap_key);
+		if (status != EFI_SUCCESS)
+			goto fail_free_new_fdt;
+
+		status = update_fdt(sys_table,
+				    (void *)fdt_addr, fdt_size,
+				    (void *)*new_fdt_addr, new_fdt_size,
+				    cmdline_ptr, initrd_addr, initrd_size,
+				    memory_map, map_size, desc_size, desc_ver);
+
+		/* Succeeding the first time is the expected case. */
+		if (status == EFI_SUCCESS)
+			break;
+
+		if (status == EFI_BUFFER_TOO_SMALL) {
+			/*
+			 * We need to allocate more space for the new
+			 * device tree, so free existing buffer that is
+			 * too small.  Also free memory map, as we will need
+			 * to get new one that reflects the free/alloc we do
+			 * on the device tree buffer.
+			 */
+			efi_free(sys_table, new_fdt_size, *new_fdt_addr);
+			sys_table->boottime->free_pool(memory_map);
+			new_fdt_size += EFI_PAGE_SIZE;
+		} else {
+			pr_efi_err(sys_table, "Unable to constuct new device tree.\n");
+			goto fail_free_mmap;
+		}
+	}
+
+	/* Now we are ready to exit_boot_services.*/
+	status = sys_table->boottime->exit_boot_services(handle, mmap_key);
+
+
+	if (status == EFI_SUCCESS)
+		return status;
+
+	pr_efi_err(sys_table, "Exit boot services failed.\n");
+
+fail_free_mmap:
+	sys_table->boottime->free_pool(memory_map);
+
+fail_free_new_fdt:
+	efi_free(sys_table, new_fdt_size, *new_fdt_addr);
+
+fail:
+	return EFI_LOAD_ERROR;
+}
+
+static void *get_fdt(efi_system_table_t *sys_table)
+{
+	efi_guid_t fdt_guid = DEVICE_TREE_GUID;
+	efi_config_table_t *tables;
+	void *fdt;
+	int i;
+
+	tables = (efi_config_table_t *) sys_table->tables;
+	fdt = NULL;
+
+	for (i = 0; i < sys_table->nr_tables; i++)
+		if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) {
+			fdt = (void *) tables[i].table;
+			break;
+	 }
+
+	return fdt;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index cd0172e796cb..41bbf8ba4ba8 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -575,6 +575,9 @@ typedef efi_status_t efi_query_variable_store_t(u32 attributes, unsigned long si
 #define EFI_FILE_SYSTEM_GUID \
     EFI_GUID(  0x964e5b22, 0x6459, 0x11d2, 0x8e, 0x39, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b )
 
+#define DEVICE_TREE_GUID \
+    EFI_GUID(  0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 )
+
 typedef struct {
 	efi_guid_t guid;
 	u64 table;
-- 
cgit 


From 774b514390b1eb8476bc759262790762bd1ef45a Mon Sep 17 00:00:00 2001
From: Maxime COQUELIN <maxime.coquelin@st.com>
Date: Wed, 29 Jan 2014 17:24:07 +0100
Subject: clk: divider: Add round to closest divider

In some cases, we want to be able to round the divider to the closest one,
instead than rounding up.

This patch adds a new CLK_DIVIDER_ROUND_CLOSEST flag to specify the divider
has to round to closest div, keeping rounding up as de default behaviour.

Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-divider.c    | 69 ++++++++++++++++++++++++++++++++++++++++++--
 include/linux/clk-provider.h |  3 ++
 2 files changed, 70 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 4637697c139f..c57294563a98 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -43,6 +43,17 @@ static unsigned int _get_table_maxdiv(const struct clk_div_table *table)
 	return maxdiv;
 }
 
+static unsigned int _get_table_mindiv(const struct clk_div_table *table)
+{
+	unsigned int mindiv = UINT_MAX;
+	const struct clk_div_table *clkt;
+
+	for (clkt = table; clkt->div; clkt++)
+		if (clkt->div < mindiv)
+			mindiv = clkt->div;
+	return mindiv;
+}
+
 static unsigned int _get_maxdiv(struct clk_divider *divider)
 {
 	if (divider->flags & CLK_DIVIDER_ONE_BASED)
@@ -162,6 +173,24 @@ static int _round_up_table(const struct clk_div_table *table, int div)
 	return up;
 }
 
+static int _round_down_table(const struct clk_div_table *table, int div)
+{
+	const struct clk_div_table *clkt;
+	int down = _get_table_mindiv(table);
+
+	for (clkt = table; clkt->div; clkt++) {
+		if (clkt->div == div)
+			return clkt->div;
+		else if (clkt->div > div)
+			continue;
+
+		if ((div - clkt->div) < (div - down))
+			down = clkt->div;
+	}
+
+	return down;
+}
+
 static int _div_round_up(struct clk_divider *divider,
 		unsigned long parent_rate, unsigned long rate)
 {
@@ -175,6 +204,42 @@ static int _div_round_up(struct clk_divider *divider,
 	return div;
 }
 
+static int _div_round_closest(struct clk_divider *divider,
+		unsigned long parent_rate, unsigned long rate)
+{
+	int up, down, div;
+
+	up = down = div = DIV_ROUND_CLOSEST(parent_rate, rate);
+
+	if (divider->flags & CLK_DIVIDER_POWER_OF_TWO) {
+		up = __roundup_pow_of_two(div);
+		down = __rounddown_pow_of_two(div);
+	} else if (divider->table) {
+		up = _round_up_table(divider->table, div);
+		down = _round_down_table(divider->table, div);
+	}
+
+	return (up - div) <= (div - down) ? up : down;
+}
+
+static int _div_round(struct clk_divider *divider, unsigned long parent_rate,
+		unsigned long rate)
+{
+	if (divider->flags & CLK_DIVIDER_ROUND_CLOSEST)
+		return _div_round_closest(divider, parent_rate, rate);
+
+	return _div_round_up(divider, parent_rate, rate);
+}
+
+static bool _is_best_div(struct clk_divider *divider,
+		int rate, int now, int best)
+{
+	if (divider->flags & CLK_DIVIDER_ROUND_CLOSEST)
+		return abs(rate - now) < abs(rate - best);
+
+	return now <= rate && now > best;
+}
+
 static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 		unsigned long *best_parent_rate)
 {
@@ -190,7 +255,7 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 
 	if (!(__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT)) {
 		parent_rate = *best_parent_rate;
-		bestdiv = _div_round_up(divider, parent_rate, rate);
+		bestdiv = _div_round(divider, parent_rate, rate);
 		bestdiv = bestdiv == 0 ? 1 : bestdiv;
 		bestdiv = bestdiv > maxdiv ? maxdiv : bestdiv;
 		return bestdiv;
@@ -217,7 +282,7 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 		parent_rate = __clk_round_rate(__clk_get_parent(hw->clk),
 				MULT_ROUND_UP(rate, i));
 		now = DIV_ROUND_UP(parent_rate, i);
-		if (now <= rate && now > best) {
+		if (_is_best_div(divider, rate, now, best)) {
 			bestdiv = i;
 			best = now;
 			*best_parent_rate = parent_rate;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 511917416fb0..59e2eb58f555 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -312,6 +312,8 @@ struct clk_div_table {
  *   of this register, and mask of divider bits are in higher 16-bit of this
  *   register.  While setting the divider bits, higher 16-bit should also be
  *   updated to indicate changing divider bits.
+ * CLK_DIVIDER_ROUND_CLOSEST - Makes the best calculated divider to be rounded
+ *	to the closest integer instead of the up one.
  */
 struct clk_divider {
 	struct clk_hw	hw;
@@ -327,6 +329,7 @@ struct clk_divider {
 #define CLK_DIVIDER_POWER_OF_TWO	BIT(1)
 #define CLK_DIVIDER_ALLOW_ZERO		BIT(2)
 #define CLK_DIVIDER_HIWORD_MASK		BIT(3)
+#define CLK_DIVIDER_ROUND_CLOSEST	BIT(4)
 
 extern const struct clk_ops clk_divider_ops;
 struct clk *clk_register_divider(struct device *dev, const char *name,
-- 
cgit 


From c1b3156f121fd301191e0b4c5fa2fec42cd17871 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 28 Apr 2014 18:43:32 +0200
Subject: drbd: use blk_set_stacking_limits()

...instead directly assigning to q->limits.discard_zeroes_data

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/drbd/drbd_nl.c | 12 ++++++------
 include/linux/drbd.h         |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index a187c5b0da27..b4fc401b587f 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1114,15 +1114,18 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 	struct request_queue * const q = device->rq_queue;
 	unsigned int max_hw_sectors = max_bio_size >> 9;
 	unsigned int max_segments = 0;
+	struct request_queue *b = NULL;
 
 	if (get_ldev_if_state(device, D_ATTACHING)) {
-		struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
+		b = device->ldev->backing_bdev->bd_disk->queue;
 
 		max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
 		rcu_read_lock();
 		max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
 		rcu_read_unlock();
-		put_ldev(device);
+
+		blk_set_stacking_limits(&q->limits);
+		blk_queue_max_write_same_sectors(q, 0);
 	}
 
 	blk_queue_logical_block_size(q, 512);
@@ -1131,14 +1134,11 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
 	blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
 	blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
 
-	if (get_ldev_if_state(device, D_ATTACHING)) {
-		struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue;
+	if (b) {
 		struct drbd_connection *connection = first_peer_device(device)->connection;
 
 		if (blk_queue_discard(b) &&
 		    (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
-			/* inherit from backing queue */
-			q->limits.discard_zeroes_data = 1;
 			/* For now, don't allow more than one activity log extent worth of data
 			 * to be discarded in one go. We may need to rework drbd_al_begin_io()
 			 * to allow for even larger discard ranges */
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 3dbe9bd57a09..fffd4b8563cb 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.3"
+#define REL_VERSION "8.4.4"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
-- 
cgit 


From 02df6fe145715f1d3858c0c65aed991f148b70b4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Mon, 28 Apr 2014 18:43:33 +0200
Subject: drbd: Test cstate while holding req_lock

In case a connection transitions into C_TIMEOUT within the timer
function (request_timer_fn()) we need to make sure that the receiver
thread (potentially running on a different CPU) sees the updated
cstate later on.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/drbd/drbd_nl.c | 3 ++-
 include/linux/drbd.h         | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index b4fc401b587f..f4d3aff89aa1 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -442,12 +442,13 @@ bool conn_try_outdate_peer(struct drbd_connection *connection)
 	char *ex_to_string;
 	int r;
 
+	spin_lock_irq(&connection->resource->req_lock);
 	if (connection->cstate >= C_WF_REPORT_PARAMS) {
 		drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
+		spin_unlock_irq(&connection->resource->req_lock);
 		return false;
 	}
 
-	spin_lock_irq(&connection->resource->req_lock);
 	connect_cnt = connection->connect_cnt;
 	spin_unlock_irq(&connection->resource->req_lock);
 
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index fffd4b8563cb..3dbe9bd57a09 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.4"
+#define REL_VERSION "8.4.3"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
-- 
cgit 


From 52c324f8a87b336496d0f5e9d8dff1aa32bb08cd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 1 May 2014 00:13:47 +0200
Subject: cpuidle: Combine cpuidle_enabled() with cpuidle_select()

Since both cpuidle_enabled() and cpuidle_select() are only called by
cpuidle_idle_call(), it is not really useful to keep them separate
and combining them will help to avoid complicating cpuidle_idle_call()
even further if governors are changed to return error codes sometimes.

This code modification shouldn't lead to any functional changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 26 ++++++--------------------
 include/linux/cpuidle.h   |  5 -----
 kernel/sched/idle.c       | 20 +++++++-------------
 3 files changed, 13 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 8236746e46bb..f38359f64cc6 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -64,26 +64,6 @@ int cpuidle_play_dead(void)
 	return -ENODEV;
 }
 
-/**
- * cpuidle_enabled - check if the cpuidle framework is ready
- * @dev: cpuidle device for this cpu
- * @drv: cpuidle driver for this cpu
- *
- * Return 0 on success, otherwise:
- * -NODEV : the cpuidle framework is not available
- * -EBUSY : the cpuidle framework is not initialized
- */
-int cpuidle_enabled(struct cpuidle_driver *drv, struct cpuidle_device *dev)
-{
-	if (off || !initialized)
-		return -ENODEV;
-
-	if (!drv || !dev || !dev->enabled)
-		return -EBUSY;
-
-	return 0;
-}
-
 /**
  * cpuidle_enter_state - enter the state and update stats
  * @dev: cpuidle device for this cpu
@@ -138,6 +118,12 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
  */
 int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
+	if (off || !initialized)
+		return -ENODEV;
+
+	if (!drv || !dev || !dev->enabled)
+		return -EBUSY;
+
 	return cpuidle_curr_governor->select(drv, dev);
 }
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b0238cba440b..a8d5bd391a26 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -120,8 +120,6 @@ struct cpuidle_driver {
 #ifdef CONFIG_CPU_IDLE
 extern void disable_cpuidle(void);
 
-extern int cpuidle_enabled(struct cpuidle_driver *drv,
-			  struct cpuidle_device *dev);
 extern int cpuidle_select(struct cpuidle_driver *drv,
 			  struct cpuidle_device *dev);
 extern int cpuidle_enter(struct cpuidle_driver *drv,
@@ -149,9 +147,6 @@ extern int cpuidle_play_dead(void);
 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
 #else
 static inline void disable_cpuidle(void) { }
-static inline int cpuidle_enabled(struct cpuidle_driver *drv,
-				  struct cpuidle_device *dev)
-{return -ENODEV; }
 static inline int cpuidle_select(struct cpuidle_driver *drv,
 				 struct cpuidle_device *dev)
 {return -ENODEV; }
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 8f4390a079c7..a8f12247ce7c 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -101,19 +101,13 @@ static int cpuidle_idle_call(void)
 	rcu_idle_enter();
 
 	/*
-	 * Check if the cpuidle framework is ready, otherwise fallback
-	 * to the default arch specific idle method
+	 * Ask the cpuidle framework to choose a convenient idle state.
+	 * Fall back to the default arch specific idle method on errors.
 	 */
-	ret = cpuidle_enabled(drv, dev);
-
-	if (!ret) {
-		/*
-		 * Ask the governor to choose an idle state it thinks
-		 * it is convenient to go to. There is *always* a
-		 * convenient idle state
-		 */
-		next_state = cpuidle_select(drv, dev);
+	next_state = cpuidle_select(drv, dev);
 
+	ret = next_state;
+	if (ret >= 0) {
 		/*
 		 * The idle task must be scheduled, it is pointless to
 		 * go to idle, just update no idle residency and get
@@ -140,7 +134,7 @@ static int cpuidle_idle_call(void)
 					CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
 					&dev->cpu);
 
-			if (!ret) {
+			if (ret >= 0) {
 				trace_cpu_idle_rcuidle(next_state, dev->cpu);
 
 				/*
@@ -175,7 +169,7 @@ static int cpuidle_idle_call(void)
 	 * We can't use the cpuidle framework, let's use the default
 	 * idle routine
 	 */
-	if (ret)
+	if (ret < 0)
 		arch_cpu_idle();
 
 	__current_set_polling();
-- 
cgit 


From 034cd97ebda4062eb4402a6cf963ccd262caa86a Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Mon, 14 Apr 2014 15:28:35 +0200
Subject: PCI/MSI: Remove pci_enable_msi_block()

There are no users of pci_enable_msi_block() function left.  Obsolete it in
favor of pci_enable_msi_range() and pci_enable_msi_exact() functions.

Previously, we called arch_setup_msi_irqs() once, requesting the same
vector count we passed to arch_msi_check_device().  Now we may call it
several times: if it returns failure, we may retry and request fewer
vectors.

We don't keep track of the vector count we initially passed to
arch_msi_check_device().  We only keep track of the number of vectors
successfully set up by arch_setup_msi_irqs(), and this is what we use to
clean things up when disabling MSI.  Therefore, we assume that
arch_msi_check_device() does nothing that will have to be cleaned up later.

[bhelgaas: changelog]
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c   | 79 ++++++++++++++++++++++-------------------------------
 include/linux/pci.h |  5 +---
 2 files changed, 34 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 04130c3f9cf6..36dd0caa1759 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -879,50 +879,6 @@ int pci_msi_vec_count(struct pci_dev *dev)
 }
 EXPORT_SYMBOL(pci_msi_vec_count);
 
-/**
- * pci_enable_msi_block - configure device's MSI capability structure
- * @dev: device to configure
- * @nvec: number of interrupts to configure
- *
- * Allocate IRQs for a device with the MSI capability.
- * This function returns a negative errno if an error occurs.  If it
- * is unable to allocate the number of interrupts requested, it returns
- * the number of interrupts it might be able to allocate.  If it successfully
- * allocates at least the number of interrupts requested, it returns 0 and
- * updates the @dev's irq member to the lowest new interrupt number; the
- * other interrupt numbers allocated to this device are consecutive.
- */
-int pci_enable_msi_block(struct pci_dev *dev, int nvec)
-{
-	int status, maxvec;
-
-	if (dev->current_state != PCI_D0)
-		return -EINVAL;
-
-	maxvec = pci_msi_vec_count(dev);
-	if (maxvec < 0)
-		return maxvec;
-	if (nvec > maxvec)
-		return maxvec;
-
-	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
-	if (status)
-		return status;
-
-	WARN_ON(!!dev->msi_enabled);
-
-	/* Check whether driver already requested MSI-X irqs */
-	if (dev->msix_enabled) {
-		dev_info(&dev->dev, "can't enable MSI "
-			 "(MSI-X already enabled)\n");
-		return -EINVAL;
-	}
-
-	status = msi_capability_init(dev, nvec);
-	return status;
-}
-EXPORT_SYMBOL(pci_enable_msi_block);
-
 void pci_msi_shutdown(struct pci_dev *dev)
 {
 	struct msi_desc *desc;
@@ -1128,14 +1084,45 @@ void pci_msi_init_pci_dev(struct pci_dev *dev)
  **/
 int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 {
-	int nvec = maxvec;
+	int nvec;
 	int rc;
 
+	if (dev->current_state != PCI_D0)
+		return -EINVAL;
+
+	WARN_ON(!!dev->msi_enabled);
+
+	/* Check whether driver already requested MSI-X irqs */
+	if (dev->msix_enabled) {
+		dev_info(&dev->dev,
+			 "can't enable MSI (MSI-X already enabled)\n");
+		return -EINVAL;
+	}
+
 	if (maxvec < minvec)
 		return -ERANGE;
 
+	nvec = pci_msi_vec_count(dev);
+	if (nvec < 0)
+		return nvec;
+	else if (nvec < minvec)
+		return -EINVAL;
+	else if (nvec > maxvec)
+		nvec = maxvec;
+
+	do {
+		rc = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
+		if (rc < 0) {
+			return rc;
+		} else if (rc > 0) {
+			if (rc < minvec)
+				return -ENOSPC;
+			nvec = rc;
+		}
+	} while (rc);
+
 	do {
-		rc = pci_enable_msi_block(dev, nvec);
+		rc = msi_capability_init(dev, nvec);
 		if (rc < 0) {
 			return rc;
 		} else if (rc > 0) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..499755e6dab5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1158,7 +1158,6 @@ struct msix_entry {
 
 #ifdef CONFIG_PCI_MSI
 int pci_msi_vec_count(struct pci_dev *dev);
-int pci_enable_msi_block(struct pci_dev *dev, int nvec);
 void pci_msi_shutdown(struct pci_dev *dev);
 void pci_disable_msi(struct pci_dev *dev);
 int pci_msix_vec_count(struct pci_dev *dev);
@@ -1188,8 +1187,6 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev,
 }
 #else
 static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
-static inline int pci_enable_msi_block(struct pci_dev *dev, int nvec)
-{ return -ENOSYS; }
 static inline void pci_msi_shutdown(struct pci_dev *dev) { }
 static inline void pci_disable_msi(struct pci_dev *dev) { }
 static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; }
@@ -1244,7 +1241,7 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { }
 static inline void pcie_ecrc_get_policy(char *str) { }
 #endif
 
-#define pci_enable_msi(pdev)	pci_enable_msi_block(pdev, 1)
+#define pci_enable_msi(pdev)	pci_enable_msi_exact(pdev, 1)
 
 #ifdef CONFIG_HT_IRQ
 /* The functions a driver should call */
-- 
cgit 


From ee3468739ed83d862dbbd90397aff5258f8f2c8e Mon Sep 17 00:00:00 2001
From: Brian W Hart <hartb@linux.vnet.ibm.com>
Date: Thu, 1 May 2014 14:32:35 -0500
Subject: fbdev/fb.h: silence warning with -Wsign-compare
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Silence the warning when building with -Wsign-compare when fb.h is
included:

include/linux/fb.h: In function ‘__fb_pad_aligned_buffer’:
include/linux/fb.h:650:17: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
   for (j = 0; j < s_pitch; j++)
                 ^

Signed-off-by: Brian W Hart <hartb@linux.vnet.ibm.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/fb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fb.h b/include/linux/fb.h
index 506242979eea..b6bfda99add3 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -642,7 +642,7 @@ static inline void unlock_fb_info(struct fb_info *info)
 static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch,
 					   u8 *src, u32 s_pitch, u32 height)
 {
-	int i, j;
+	u32 i, j;
 
 	d_pitch -= s_pitch;
 
-- 
cgit 


From dfbb85cab5f0819d0424a3637b03e7892704fa42 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Wed, 2 Apr 2014 20:17:00 -0700
Subject: DMA: shdma: add cyclic transfer support

This patch add cyclic transfer support and enables dmaengine_prep_dma_cyclic()

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
[reflown changelog for readablity]
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/sh/shdma-base.c | 72 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/shdma-base.h  |  1 +
 2 files changed, 66 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c
index 6786ecbd5ed4..974794cdb6ed 100644
--- a/drivers/dma/sh/shdma-base.c
+++ b/drivers/dma/sh/shdma-base.c
@@ -304,6 +304,7 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 	dma_async_tx_callback callback = NULL;
 	void *param = NULL;
 	unsigned long flags;
+	LIST_HEAD(cyclic_list);
 
 	spin_lock_irqsave(&schan->chan_lock, flags);
 	list_for_each_entry_safe(desc, _desc, &schan->ld_queue, node) {
@@ -369,10 +370,16 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 		if (((desc->mark == DESC_COMPLETED ||
 		      desc->mark == DESC_WAITING) &&
 		     async_tx_test_ack(&desc->async_tx)) || all) {
-			/* Remove from ld_queue list */
-			desc->mark = DESC_IDLE;
 
-			list_move(&desc->node, &schan->ld_free);
+			if (all || !desc->cyclic) {
+				/* Remove from ld_queue list */
+				desc->mark = DESC_IDLE;
+				list_move(&desc->node, &schan->ld_free);
+			} else {
+				/* reuse as cyclic */
+				desc->mark = DESC_SUBMITTED;
+				list_move_tail(&desc->node, &cyclic_list);
+			}
 
 			if (list_empty(&schan->ld_queue)) {
 				dev_dbg(schan->dev, "Bring down channel %d\n", schan->id);
@@ -389,6 +396,8 @@ static dma_async_tx_callback __ld_cleanup(struct shdma_chan *schan, bool all)
 		 */
 		schan->dma_chan.completed_cookie = schan->dma_chan.cookie;
 
+	list_splice_tail(&cyclic_list, &schan->ld_queue);
+
 	spin_unlock_irqrestore(&schan->chan_lock, flags);
 
 	if (callback)
@@ -521,7 +530,7 @@ static struct shdma_desc *shdma_add_desc(struct shdma_chan *schan,
  */
 static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
 	struct scatterlist *sgl, unsigned int sg_len, dma_addr_t *addr,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags, bool cyclic)
 {
 	struct scatterlist *sg;
 	struct shdma_desc *first = NULL, *new = NULL /* compiler... */;
@@ -569,7 +578,11 @@ static struct dma_async_tx_descriptor *shdma_prep_sg(struct shdma_chan *schan,
 			if (!new)
 				goto err_get_desc;
 
-			new->chunks = chunks--;
+			new->cyclic = cyclic;
+			if (cyclic)
+				new->chunks = 1;
+			else
+				new->chunks = chunks--;
 			list_add_tail(&new->node, &tx_list);
 		} while (len);
 	}
@@ -612,7 +625,8 @@ static struct dma_async_tx_descriptor *shdma_prep_memcpy(
 	sg_dma_address(&sg) = dma_src;
 	sg_dma_len(&sg) = len;
 
-	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM, flags);
+	return shdma_prep_sg(schan, &sg, 1, &dma_dest, DMA_MEM_TO_MEM,
+			     flags, false);
 }
 
 static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
@@ -640,7 +654,50 @@ static struct dma_async_tx_descriptor *shdma_prep_slave_sg(
 	slave_addr = ops->slave_addr(schan);
 
 	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
-			      direction, flags);
+			     direction, flags, false);
+}
+
+struct dma_async_tx_descriptor *shdma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct shdma_chan *schan = to_shdma_chan(chan);
+	struct shdma_dev *sdev = to_shdma_dev(schan->dma_chan.device);
+	const struct shdma_ops *ops = sdev->ops;
+	unsigned int sg_len = buf_len / period_len;
+	int slave_id = schan->slave_id;
+	dma_addr_t slave_addr;
+	struct scatterlist sgl[sg_len];
+	int i;
+
+	if (!chan)
+		return NULL;
+
+	BUG_ON(!schan->desc_num);
+
+	/* Someone calling slave DMA on a generic channel? */
+	if (slave_id < 0 || (buf_len < period_len)) {
+		dev_warn(schan->dev,
+			"%s: bad parameter: buf_len=%d, period_len=%d, id=%d\n",
+			__func__, buf_len, period_len, slave_id);
+		return NULL;
+	}
+
+	slave_addr = ops->slave_addr(schan);
+
+	sg_init_table(sgl, sg_len);
+	for (i = 0; i < sg_len; i++) {
+		dma_addr_t src = buf_addr + (period_len * i);
+
+		sg_set_page(&sgl[i], pfn_to_page(PFN_DOWN(src)), period_len,
+			    offset_in_page(src));
+		sg_dma_address(&sgl[i]) = src;
+		sg_dma_len(&sgl[i]) = period_len;
+	}
+
+	return shdma_prep_sg(schan, sgl, sg_len, &slave_addr,
+			     direction, flags, true);
 }
 
 static int shdma_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd,
@@ -915,6 +972,7 @@ int shdma_init(struct device *dev, struct shdma_dev *sdev,
 
 	/* Compulsory for DMA_SLAVE fields */
 	dma_dev->device_prep_slave_sg = shdma_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = shdma_prep_dma_cyclic;
 	dma_dev->device_control = shdma_control;
 
 	dma_dev->dev = dev;
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index f92c0a43c54c..abdf1f229dc3 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -54,6 +54,7 @@ struct shdma_desc {
 	dma_cookie_t cookie;
 	int chunks;
 	int mark;
+	bool cyclic;			/* used as cyclic transfer */
 };
 
 struct shdma_chan {
-- 
cgit 


From 6d48f44b7b2af67b33c1ae5994b8f642685c8bc8 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 30 Apr 2014 15:23:33 +0300
Subject: mdio_bus: implement devm_mdiobus_alloc/devm_mdiobus_free

Add a resource managed devm_mdiobus_alloc[_size]()/devm_mdiobus_free()
to automatically clean up MDIO bus alocations made by MDIO drivers,
thus leading to simplified MDIO drivers code.

Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-and-tested-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/driver-model/devres.txt |  5 +++
 drivers/net/phy/mdio_bus.c            | 67 +++++++++++++++++++++++++++++++++++
 include/linux/phy.h                   |  7 ++++
 3 files changed, 79 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 4f7897e99cba..c74e04494ade 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -308,3 +308,8 @@ SLAVE DMA ENGINE
 
 SPI
   devm_spi_register_master()
+
+MDIO
+  devm_mdiobus_alloc()
+  devm_mdiobus_alloc_size()
+  devm_mdiobus_free()
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 76f54b32a120..68a9a3867c0f 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -69,6 +69,73 @@ struct mii_bus *mdiobus_alloc_size(size_t size)
 }
 EXPORT_SYMBOL(mdiobus_alloc_size);
 
+static void _devm_mdiobus_free(struct device *dev, void *res)
+{
+	mdiobus_free(*(struct mii_bus **)res);
+}
+
+static int devm_mdiobus_match(struct device *dev, void *res, void *data)
+{
+	struct mii_bus **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+/**
+ * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size()
+ * @dev:		Device to allocate mii_bus for
+ * @sizeof_priv:	Space to allocate for private structure.
+ *
+ * Managed mdiobus_alloc_size. mii_bus allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * If an mii_bus allocated with this function needs to be freed separately,
+ * devm_mdiobus_free() must be used.
+ *
+ * RETURNS:
+ * Pointer to allocated mii_bus on success, NULL on failure.
+ */
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv)
+{
+	struct mii_bus **ptr, *bus;
+
+	ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	/* use raw alloc_dr for kmalloc caller tracing */
+	bus = mdiobus_alloc_size(sizeof_priv);
+	if (bus) {
+		*ptr = bus;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return bus;
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_alloc);
+
+/**
+ * devm_mdiobus_free - Resource-managed mdiobus_free()
+ * @dev:		Device this mii_bus belongs to
+ * @bus:		the mii_bus associated with the device
+ *
+ * Free mii_bus allocated with devm_mdiobus_alloc_size().
+ */
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus)
+{
+	int rc;
+
+	rc = devres_release(dev, _devm_mdiobus_free,
+			    devm_mdiobus_match, bus);
+	WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_mdiobus_free);
+
 /**
  * mdiobus_release - mii_bus device release callback
  * @d: the target struct device that contains the mii_bus
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 51d15f684e7e..864ddafad8cc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -198,6 +198,13 @@ static inline struct mii_bus *mdiobus_alloc(void)
 int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
+struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
+static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev)
+{
+	return devm_mdiobus_alloc_size(dev, 0);
+}
+
+void devm_mdiobus_free(struct device *dev, struct mii_bus *bus);
 struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
 int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
 int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
-- 
cgit 


From e114a710aa5058c0ba4aa1dfb105132aefeb5e04 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Apr 2014 11:58:13 -0700
Subject: tcp: fix cwnd limited checking to improve congestion control

Yuchung discovered tcp_is_cwnd_limited() was returning false in
slow start phase even if the application filled the socket write queue.

All congestion modules take into account tcp_is_cwnd_limited()
before increasing cwnd, so this behavior limits slow start from
probing the bandwidth at full speed.

The problem is that even if write queue is full (aka we are _not_
application limited), cwnd can be under utilized if TSO should auto
defer or TCP Small queues decided to hold packets.

So the in_flight can be kept to smaller value, and we can get to the
point tcp_is_cwnd_limited() returns false.

With TCP Small Queues and FQ/pacing, this issue is more visible.

We fix this by having tcp_cwnd_validate(), which is supposed to track
such things, take into account unsent_segs, the number of segs that we
are not sending at the moment due to TSO or TSQ, but intend to send
real soon. Then when we are cwnd-limited, remember this fact while we
are processing the window of ACKs that comes back.

For example, suppose we have a brand new connection with cwnd=10; we
are in slow start, and we send a flight of 9 packets. By the time we
have received ACKs for all 9 packets we want our cwnd to be 18.
We implement this by setting tp->lsnd_pending to 9, and
considering ourselves to be cwnd-limited while cwnd is less than
twice tp->lsnd_pending (2*9 -> 18).

This makes tcp_is_cwnd_limited() more understandable, by removing
the GSO/TSO kludge, that tried to work around the issue.

Note the in_flight parameter can be removed in a followup cleanup
patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 include/net/tcp.h     | 22 +++++++++++++++++++++-
 net/ipv4/tcp_cong.c   | 20 --------------------
 net/ipv4/tcp_output.c | 21 ++++++++++++++-------
 4 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 239946868142..4e37c71ecd74 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -230,6 +230,7 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
+	u32	lsnd_pending;	/* packets inflight or unsent since last xmit */
 	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 163d2b467d78..a9fe7bc4f4bb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -974,7 +974,27 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
 {
 	return tp->snd_una + tp->snd_wnd;
 }
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
+
+/* We follow the spirit of RFC2861 to validate cwnd but implement a more
+ * flexible approach. The RFC suggests cwnd should not be raised unless
+ * it was fully used previously. But we allow cwnd to grow as long as the
+ * application has used half the cwnd.
+ * Example :
+ *    cwnd is 10 (IW10), but application sends 9 frames.
+ *    We allow cwnd to reach 18 when all frames are ACKed.
+ * This check is safe because it's as aggressive as slow start which already
+ * risks 100% overshoot. The advantage is that we discourage application to
+ * either send more filler packets or data to artificially blow up the cwnd
+ * usage, and allow application-limited process to probe bw more aggressively.
+ *
+ * TODO: remove in_flight once we can fix all callers, and their callers...
+ */
+static inline bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+{
+	const struct tcp_sock *tp = tcp_sk(sk);
+
+	return tp->snd_cwnd < 2 * tp->lsnd_pending;
+}
 
 static inline void tcp_check_probe_timer(struct sock *sk)
 {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c93b88..a93b41ba05ff 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -276,26 +276,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
 	return err;
 }
 
-/* RFC2861 Check whether we are limited by application or congestion window
- * This is the inverse of cwnd check in tcp_tso_should_defer
- */
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	u32 left;
-
-	if (in_flight >= tp->snd_cwnd)
-		return true;
-
-	left = tp->snd_cwnd - in_flight;
-	if (sk_can_gso(sk) &&
-	    left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
-	    left < tp->xmit_size_goal_segs)
-		return true;
-	return left <= tcp_max_tso_deferred_mss(tp);
-}
-EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
-
 /* Slow start is used when congestion window is no greater than the slow start
  * threshold. We base on RFC2581 and also handle stretch ACKs properly.
  * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20847de991ea..f9181a133462 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,12 +1402,13 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-/* Congestion window validation. (RFC2861) */
-static void tcp_cwnd_validate(struct sock *sk)
+static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (tp->packets_out >= tp->snd_cwnd) {
+	tp->lsnd_pending = tp->packets_out + unsent_segs;
+
+	if (tcp_is_cwnd_limited(sk, 0)) {
 		/* Network is feed fully. */
 		tp->snd_cwnd_used = 0;
 		tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1880,7 +1881,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int tso_segs, sent_pkts;
+	unsigned int tso_segs, sent_pkts, unsent_segs = 0;
 	int cwnd_quota;
 	int result;
 
@@ -1924,7 +1925,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 				break;
 		} else {
 			if (!push_one && tcp_tso_should_defer(sk, skb))
-				break;
+				goto compute_unsent_segs;
 		}
 
 		/* TCP Small Queues :
@@ -1949,8 +1950,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			 * there is no smp_mb__after_set_bit() yet
 			 */
 			smp_mb__after_clear_bit();
-			if (atomic_read(&sk->sk_wmem_alloc) > limit)
+			if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+				u32 unsent_bytes;
+
+compute_unsent_segs:
+				unsent_bytes = tp->write_seq - tp->snd_nxt;
+				unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
 				break;
+			}
 		}
 
 		limit = mss_now;
@@ -1990,7 +1997,7 @@ repair:
 		/* Send one loss probe per tail loss episode. */
 		if (push_one != 2)
 			tcp_schedule_loss_probe(sk);
-		tcp_cwnd_validate(sk);
+		tcp_cwnd_validate(sk, unsent_segs);
 		return false;
 	}
 	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
-- 
cgit 


From 638b43b347216bab1a989b036a92eb7d9d9ee421 Mon Sep 17 00:00:00 2001
From: Peter Meerwald <pmeerw@pmeerw.net>
Date: Wed, 5 Feb 2014 16:57:00 +0000
Subject: iio: Add TEMP_AMBIENT and TEMP_OBJECT channel modifiers

useful for contactless temperature sensors to distinguish
between the ambient temperature and the temperature of the object

Signed-off-by: Peter Meerwald <pmeerw@pmeerw.net>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/industrialio-core.c | 2 ++
 include/linux/iio/types.h       | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index de8b1c2ed4b4..4b1f375c5659 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -85,6 +85,8 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_LIGHT_GREEN] = "green",
 	[IIO_MOD_LIGHT_BLUE] = "blue",
 	[IIO_MOD_QUATERNION] = "quaternion",
+	[IIO_MOD_TEMP_AMBIENT] = "ambient",
+	[IIO_MOD_TEMP_OBJECT] = "object",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 4fdab2e843b4..d480631eabc2 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -54,6 +54,8 @@ enum iio_modifier {
 	IIO_MOD_LIGHT_GREEN,
 	IIO_MOD_LIGHT_BLUE,
 	IIO_MOD_QUATERNION,
+	IIO_MOD_TEMP_AMBIENT,
+	IIO_MOD_TEMP_OBJECT,
 };
 
 enum iio_event_type {
-- 
cgit 


From 1e77d0a1ed7417d2a5a52a7b8d32aea1833faa6c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 7 Mar 2013 14:53:45 +0100
Subject: genirq: Sanitize spurious interrupt detection of threaded irqs

Till reported that the spurious interrupt detection of threaded
interrupts is broken in two ways:

- note_interrupt() is called for each action thread of a shared
  interrupt line. That's wrong as we are only interested whether none
  of the device drivers felt responsible for the interrupt, but by
  calling multiple times for a single interrupt line we account
  IRQ_NONE even if one of the drivers felt responsible.

- note_interrupt() when called from the thread handler is not
  serialized. That leaves the members of irq_desc which are used for
  the spurious detection unprotected.

To solve this we need to defer the spurious detection of a threaded
interrupt to the next hardware interrupt context where we have
implicit serialization.

If note_interrupt is called with action_ret == IRQ_WAKE_THREAD, we
check whether the previous interrupt requested a deferred check. If
not, we request a deferred check for the next hardware interrupt and
return.

If set, we check whether one of the interrupt threads signaled
success. Depending on this information we feed the result into the
spurious detector.

If one primary handler of a shared interrupt returns IRQ_HANDLED we
disable the deferred check of irq threads on the same line, as we have
found at least one device driver who cared.

Reported-by: Till Straumann <strauman@slac.stanford.edu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Austin Schuh <austin@peloton-tech.com>
Cc: Oliver Hartkopp <socketcan@hartkopp.net>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: linux-can@vger.kernel.org
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1303071450130.22263@ionos
---
 include/linux/irqdesc.h |   4 ++
 kernel/irq/manage.c     |   4 +-
 kernel/irq/spurious.c   | 106 ++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 108 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 26e2661d3935..472c021a2d4f 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -27,6 +27,8 @@ struct irq_desc;
  * @irq_count:		stats field to detect stalled irqs
  * @last_unhandled:	aging timer for unhandled count
  * @irqs_unhandled:	stats field for spurious unhandled interrupts
+ * @threads_handled:	stats field for deferred spurious detection of threaded handlers
+ * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers
  * @lock:		locking for SMP
  * @affinity_hint:	hint to user space for preferred irq affinity
  * @affinity_notify:	context for notification of affinity changes
@@ -52,6 +54,8 @@ struct irq_desc {
 	unsigned int		irq_count;	/* For detecting broken IRQs */
 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
 	unsigned int		irqs_unhandled;
+	atomic_t		threads_handled;
+	int			threads_handled_last;
 	raw_spinlock_t		lock;
 	struct cpumask		*percpu_enabled;
 #ifdef CONFIG_SMP
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d34131ca372b..3dc6a61bf06a 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -886,8 +886,8 @@ static int irq_thread(void *data)
 		irq_thread_check_affinity(desc, action);
 
 		action_ret = handler_fn(desc, action);
-		if (!noirqdebug)
-			note_interrupt(action->irq, desc, action_ret);
+		if (action_ret == IRQ_HANDLED)
+			atomic_inc(&desc->threads_handled);
 
 		wake_threads_waitq(desc);
 	}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index a1d8cc63b56e..e2514b0e439e 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -270,6 +270,8 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
 	return action && (action->flags & IRQF_IRQPOLL);
 }
 
+#define SPURIOUS_DEFERRED	0x80000000
+
 void note_interrupt(unsigned int irq, struct irq_desc *desc,
 		    irqreturn_t action_ret)
 {
@@ -277,15 +279,111 @@ void note_interrupt(unsigned int irq, struct irq_desc *desc,
 	    irq_settings_is_polled(desc))
 		return;
 
-	/* we get here again via the threaded handler */
-	if (action_ret == IRQ_WAKE_THREAD)
-		return;
-
 	if (bad_action_ret(action_ret)) {
 		report_bad_irq(irq, desc, action_ret);
 		return;
 	}
 
+	/*
+	 * We cannot call note_interrupt from the threaded handler
+	 * because we need to look at the compound of all handlers
+	 * (primary and threaded). Aside of that in the threaded
+	 * shared case we have no serialization against an incoming
+	 * hardware interrupt while we are dealing with a threaded
+	 * result.
+	 *
+	 * So in case a thread is woken, we just note the fact and
+	 * defer the analysis to the next hardware interrupt.
+	 *
+	 * The threaded handlers store whether they sucessfully
+	 * handled an interrupt and we check whether that number
+	 * changed versus the last invocation.
+	 *
+	 * We could handle all interrupts with the delayed by one
+	 * mechanism, but for the non forced threaded case we'd just
+	 * add pointless overhead to the straight hardirq interrupts
+	 * for the sake of a few lines less code.
+	 */
+	if (action_ret & IRQ_WAKE_THREAD) {
+		/*
+		 * There is a thread woken. Check whether one of the
+		 * shared primary handlers returned IRQ_HANDLED. If
+		 * not we defer the spurious detection to the next
+		 * interrupt.
+		 */
+		if (action_ret == IRQ_WAKE_THREAD) {
+			int handled;
+			/*
+			 * We use bit 31 of thread_handled_last to
+			 * denote the deferred spurious detection
+			 * active. No locking necessary as
+			 * thread_handled_last is only accessed here
+			 * and we have the guarantee that hard
+			 * interrupts are not reentrant.
+			 */
+			if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
+				desc->threads_handled_last |= SPURIOUS_DEFERRED;
+				return;
+			}
+			/*
+			 * Check whether one of the threaded handlers
+			 * returned IRQ_HANDLED since the last
+			 * interrupt happened.
+			 *
+			 * For simplicity we just set bit 31, as it is
+			 * set in threads_handled_last as well. So we
+			 * avoid extra masking. And we really do not
+			 * care about the high bits of the handled
+			 * count. We just care about the count being
+			 * different than the one we saw before.
+			 */
+			handled = atomic_read(&desc->threads_handled);
+			handled |= SPURIOUS_DEFERRED;
+			if (handled != desc->threads_handled_last) {
+				action_ret = IRQ_HANDLED;
+				/*
+				 * Note: We keep the SPURIOUS_DEFERRED
+				 * bit set. We are handling the
+				 * previous invocation right now.
+				 * Keep it for the current one, so the
+				 * next hardware interrupt will
+				 * account for it.
+				 */
+				desc->threads_handled_last = handled;
+			} else {
+				/*
+				 * None of the threaded handlers felt
+				 * responsible for the last interrupt
+				 *
+				 * We keep the SPURIOUS_DEFERRED bit
+				 * set in threads_handled_last as we
+				 * need to account for the current
+				 * interrupt as well.
+				 */
+				action_ret = IRQ_NONE;
+			}
+		} else {
+			/*
+			 * One of the primary handlers returned
+			 * IRQ_HANDLED. So we don't care about the
+			 * threaded handlers on the same line. Clear
+			 * the deferred detection bit.
+			 *
+			 * In theory we could/should check whether the
+			 * deferred bit is set and take the result of
+			 * the previous run into account here as
+			 * well. But it's really not worth the
+			 * trouble. If every other interrupt is
+			 * handled we never trigger the spurious
+			 * detector. And if this is just the one out
+			 * of 100k unhandled ones which is handled
+			 * then we merily delay the spurious detection
+			 * by one hard interrupt. Not a real problem.
+			 */
+			desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
+		}
+	}
+
 	if (unlikely(action_ret == IRQ_NONE)) {
 		/*
 		 * If we are seeing only the odd spurious IRQ caused by
-- 
cgit 


From d3ba720dd58cdf6630fee4b89482c465d5ad0d0f Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 8 Apr 2014 18:45:53 -0700
Subject: Drivers: hv: Eliminate the channel spinlock in the callback path

By ensuring that we set the callback handler to NULL in the channel close
path on the same CPU that the channel is bound to, we can eliminate this lock
acquisition and release in a performance critical path.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      | 16 ++++++++++++----
 drivers/hv/channel_mgmt.c | 11 +++++++----
 drivers/hv/connection.c   | 11 ++++-------
 include/linux/hyperv.h    |  2 ++
 4 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 602ca86a6488..740edec161bb 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -471,18 +471,26 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
 
+static void reset_channel_cb(void *arg)
+{
+	struct vmbus_channel *channel = arg;
+
+	channel->onchannel_callback = NULL;
+}
+
 static void vmbus_close_internal(struct vmbus_channel *channel)
 {
 	struct vmbus_channel_close_channel *msg;
 	int ret;
-	unsigned long flags;
 
 	channel->state = CHANNEL_OPEN_STATE;
 	channel->sc_creation_callback = NULL;
 	/* Stop callback and cancel the timer asap */
-	spin_lock_irqsave(&channel->inbound_lock, flags);
-	channel->onchannel_callback = NULL;
-	spin_unlock_irqrestore(&channel->inbound_lock, flags);
+	if (channel->target_cpu != smp_processor_id())
+		smp_call_function_single(channel->target_cpu, reset_channel_cb,
+					 channel, true);
+	else
+		reset_channel_cb(channel);
 
 	/* Send a closing message */
 
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index fa920469bf10..6f7fdd9a7e77 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -365,7 +365,7 @@ static u32  next_vp;
  * performance critical channels (IDE, SCSI and Network) will be uniformly
  * distributed across all available CPUs.
  */
-static u32 get_vp_index(uuid_le *type_guid)
+static void init_vp_index(struct vmbus_channel *channel, uuid_le *type_guid)
 {
 	u32 cur_cpu;
 	int i;
@@ -387,10 +387,13 @@ static u32 get_vp_index(uuid_le *type_guid)
 		 * Also if the channel is not a performance critical
 		 * channel, bind it to cpu 0.
 		 */
-		return 0;
+		channel->target_cpu = 0;
+		channel->target_vp = 0;
+		return;
 	}
 	cur_cpu = (++next_vp % max_cpus);
-	return hv_context.vp_index[cur_cpu];
+	channel->target_cpu = cur_cpu;
+	channel->target_vp = hv_context.vp_index[cur_cpu];
 }
 
 /*
@@ -438,7 +441,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 				offer->connection_id;
 	}
 
-	newchannel->target_vp = get_vp_index(&offer->offer.if_type);
+	init_vp_index(newchannel, &offer->offer.if_type);
 
 	memcpy(&newchannel->offermsg, offer,
 	       sizeof(struct vmbus_channel_offer_channel));
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 2e7801af466e..df2363ea017f 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -277,7 +277,6 @@ struct vmbus_channel *relid2channel(u32 relid)
 static void process_chn_event(u32 relid)
 {
 	struct vmbus_channel *channel;
-	unsigned long flags;
 	void *arg;
 	bool read_state;
 	u32 bytes_to_read;
@@ -296,13 +295,12 @@ static void process_chn_event(u32 relid)
 	/*
 	 * A channel once created is persistent even when there
 	 * is no driver handling the device. An unloading driver
-	 * sets the onchannel_callback to NULL under the
-	 * protection of the channel inbound_lock. Thus, checking
-	 * and invoking the driver specific callback takes care of
-	 * orderly unloading of the driver.
+	 * sets the onchannel_callback to NULL on the same CPU
+	 * as where this interrupt is handled (in an interrupt context).
+	 * Thus, checking and invoking the driver specific callback takes
+	 * care of orderly unloading of the driver.
 	 */
 
-	spin_lock_irqsave(&channel->inbound_lock, flags);
 	if (channel->onchannel_callback != NULL) {
 		arg = channel->channel_callback_context;
 		read_state = channel->batched_reading;
@@ -327,7 +325,6 @@ static void process_chn_event(u32 relid)
 		pr_err("no channel callback for relid - %u\n", relid);
 	}
 
-	spin_unlock_irqrestore(&channel->inbound_lock, flags);
 }
 
 /*
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2d7b4f139c32..a274e089df78 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -696,6 +696,8 @@ struct vmbus_channel {
 	 * preserve the earlier behavior.
 	 */
 	u32 target_vp;
+	/* The corresponding CPUID in the guest */
+	u32 target_cpu;
 	/*
 	 * Support for sub-channels. For high performance devices,
 	 * it will be useful to have multiple sub-channels to support
-- 
cgit 


From 3a28fa35d6658703cd26f9c16aaea0eae06afd40 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Tue, 8 Apr 2014 18:45:54 -0700
Subject: Drivers: hv: vmbus: Implement per-CPU mapping of relid to channel

Currently the mapping of the relID to channel is done under the protection of a
single spin lock. Starting with ws2012, each channel is bound to a specific VCPU
in the guest. Use this binding to eliminate the spin lock by setting up
per-cpu state for mapping relId to the channel.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel_mgmt.c | 41 ++++++++++++++++++++++++++++++++++++++++-
 drivers/hv/connection.c   | 24 +++++++++++++++++++++++-
 drivers/hv/hv.c           |  2 ++
 drivers/hv/hyperv_vmbus.h |  5 +++++
 include/linux/hyperv.h    |  5 +++++
 5 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 6f7fdd9a7e77..6c8b032cacba 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -149,6 +149,7 @@ static struct vmbus_channel *alloc_channel(void)
 	spin_lock_init(&channel->sc_lock);
 
 	INIT_LIST_HEAD(&channel->sc_list);
+	INIT_LIST_HEAD(&channel->percpu_list);
 
 	channel->controlwq = create_workqueue("hv_vmbus_ctl");
 	if (!channel->controlwq) {
@@ -188,7 +189,20 @@ static void free_channel(struct vmbus_channel *channel)
 	queue_work(vmbus_connection.work_queue, &channel->work);
 }
 
+static void percpu_channel_enq(void *arg)
+{
+	struct vmbus_channel *channel = arg;
+	int cpu = smp_processor_id();
+
+	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
+}
 
+static void percpu_channel_deq(void *arg)
+{
+	struct vmbus_channel *channel = arg;
+
+	list_del(&channel->percpu_list);
+}
 
 /*
  * vmbus_process_rescind_offer -
@@ -210,6 +224,12 @@ static void vmbus_process_rescind_offer(struct work_struct *work)
 	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
 	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
 
+	if (channel->target_cpu != smp_processor_id())
+		smp_call_function_single(channel->target_cpu,
+					 percpu_channel_deq, channel, true);
+	else
+		percpu_channel_deq(channel);
+
 	if (channel->primary_channel == NULL) {
 		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
 		list_del(&channel->listentry);
@@ -245,6 +265,7 @@ static void vmbus_process_offer(struct work_struct *work)
 							work);
 	struct vmbus_channel *channel;
 	bool fnew = true;
+	bool enq = false;
 	int ret;
 	unsigned long flags;
 
@@ -264,12 +285,22 @@ static void vmbus_process_offer(struct work_struct *work)
 		}
 	}
 
-	if (fnew)
+	if (fnew) {
 		list_add_tail(&newchannel->listentry,
 			      &vmbus_connection.chn_list);
+		enq = true;
+	}
 
 	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
 
+	if (enq) {
+		if (newchannel->target_cpu != smp_processor_id())
+			smp_call_function_single(newchannel->target_cpu,
+						 percpu_channel_enq,
+						 newchannel, true);
+		else
+			percpu_channel_enq(newchannel);
+	}
 	if (!fnew) {
 		/*
 		 * Check to see if this is a sub-channel.
@@ -282,6 +313,14 @@ static void vmbus_process_offer(struct work_struct *work)
 			spin_lock_irqsave(&channel->sc_lock, flags);
 			list_add_tail(&newchannel->sc_list, &channel->sc_list);
 			spin_unlock_irqrestore(&channel->sc_lock, flags);
+
+			if (newchannel->target_cpu != smp_processor_id())
+				smp_call_function_single(newchannel->target_cpu,
+							 percpu_channel_enq,
+							 newchannel, true);
+			else
+				percpu_channel_enq(newchannel);
+
 			newchannel->state = CHANNEL_OPEN_STATE;
 			if (channel->sc_creation_callback != NULL)
 				channel->sc_creation_callback(newchannel);
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index df2363ea017f..7f10c151632a 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -234,6 +234,28 @@ cleanup:
 	return ret;
 }
 
+/*
+ * Map the given relid to the corresponding channel based on the
+ * per-cpu list of channels that have been affinitized to this CPU.
+ * This will be used in the channel callback path as we can do this
+ * mapping in a lock-free fashion.
+ */
+static struct vmbus_channel *pcpu_relid2channel(u32 relid)
+{
+	struct vmbus_channel *channel;
+	struct vmbus_channel *found_channel  = NULL;
+	int cpu = smp_processor_id();
+	struct list_head *pcpu_head = &hv_context.percpu_list[cpu];
+
+	list_for_each_entry(channel, pcpu_head, percpu_list) {
+		if (channel->offermsg.child_relid == relid) {
+			found_channel = channel;
+			break;
+		}
+	}
+
+	return found_channel;
+}
 
 /*
  * relid2channel - Get the channel object given its
@@ -285,7 +307,7 @@ static void process_chn_event(u32 relid)
 	 * Find the channel based on this relid and invokes the
 	 * channel callback to process the event
 	 */
-	channel = relid2channel(relid);
+	channel = pcpu_relid2channel(relid);
 
 	if (!channel) {
 		pr_err("channel not found for relid - %u\n", relid);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index bcb49502c3bf..edfc8488cb03 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -383,6 +383,8 @@ void hv_synic_init(void *arg)
 	 */
 	rdmsrl(HV_X64_MSR_VP_INDEX, vp_index);
 	hv_context.vp_index[cpu] = (u32)vp_index;
+
+	INIT_LIST_HEAD(&hv_context.percpu_list[cpu]);
 	return;
 }
 
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 860134da8039..18d1a8404cbc 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -510,6 +510,11 @@ struct hv_context {
 	 * basis.
 	 */
 	struct tasklet_struct *event_dpc[NR_CPUS];
+	/*
+	 * To optimize the mapping of relid to channel, maintain
+	 * per-cpu list of the channels based on their CPU affinity.
+	 */
+	struct list_head percpu_list[NR_CPUS];
 };
 
 extern struct hv_context hv_context;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a274e089df78..08cfaff8a072 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -734,6 +734,11 @@ struct vmbus_channel {
 	 * Support per-channel state for use by vmbus drivers.
 	 */
 	void *per_channel_state;
+	/*
+	 * To support per-cpu lookup mapping of relid to channel,
+	 * link up channels based on their CPU affinity.
+	 */
+	struct list_head percpu_list;
 };
 
 static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
-- 
cgit 


From 425f3740cf8e93fac6318ed862bcc3081b818f0b Mon Sep 17 00:00:00 2001
From: Alan <alan@linux.intel.com>
Date: Mon, 28 Apr 2014 20:47:36 +0100
Subject: goldfish: Add a 64bit write helper

The base code imported from the Google tree is ifdef heaven. Prepare to fix
this by adding a helper function.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 include/linux/goldfish.h

(limited to 'include/linux')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
new file mode 100644
index 000000000000..9cc28902b54c
--- /dev/null
+++ b/include/linux/goldfish.h
@@ -0,0 +1,15 @@
+#ifndef __LINUX_GOLDFISH_H
+#define __LINUX_GOLDFISH_H
+
+/* Helpers for Goldfish virtual platform */
+
+static inline void gf_write64(unsigned long data,
+		void __iomem *portl, void __iomem *porth)
+{
+	writel((u32)data, portl);
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	writel(data>>32, porth);
+#endif
+}
+
+#endif /* __LINUX_GOLDFISH_H */
-- 
cgit 


From 69dfa00ccb72a37f3810687ca110e5a8154c6eed Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 4 May 2014 15:09:13 -0400
Subject: cgroup: make flags and subsys_masks unsigned int

There's no reason to use atomic bitops for cgroup_subsys_state->flags,
cgroup_root->flags and various subsys_masks.  This patch updates those
to use bitwise and/or operations instead and converts them form
unsigned long to unsigned int.

This makes the fields occupy (marginally) smaller space and makes it
clear that they don't require atomicity.

This patch doesn't cause any behavior difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  8 ++++----
 kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4b38e2d6110d..c6c703f2486b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -62,7 +62,7 @@ struct cgroup_subsys_state {
 	/* the parent css */
 	struct cgroup_subsys_state *parent;
 
-	unsigned long flags;
+	unsigned int flags;
 
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
@@ -185,7 +185,7 @@ struct cgroup {
 	u64 serial_nr;
 
 	/* the bitmask of subsystems enabled on the child cgroups */
-	unsigned long child_subsys_mask;
+	unsigned int child_subsys_mask;
 
 	/* Private pointers for each registered subsystem */
 	struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@@ -312,7 +312,7 @@ struct cgroup_root {
 	struct kernfs_root *kf_root;
 
 	/* The bitmask of subsystems attached to this hierarchy */
-	unsigned long subsys_mask;
+	unsigned int subsys_mask;
 
 	/* Unique id for this hierarchy. */
 	int hierarchy_id;
@@ -327,7 +327,7 @@ struct cgroup_root {
 	struct list_head root_list;
 
 	/* Hierarchy-specific flags */
-	unsigned long flags;
+	unsigned int flags;
 
 	/* IDs for cgroups in this hierarchy */
 	struct idr cgroup_idr;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3873267c9ee3..21667f396a1e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -181,7 +181,7 @@ static struct cftype cgroup_base_files[];
 
 static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask);
+			     unsigned int ss_mask);
 static void cgroup_destroy_css_killed(struct cgroup *cgrp);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
@@ -963,7 +963,7 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * update of a tasks cgroup pointer by cgroup_attach_task()
  */
 
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask);
 static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
 static const struct file_operations proc_cgroupstats_operations;
 
@@ -1079,7 +1079,7 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
  * @cgrp: target cgroup
  * @subsys_mask: mask of the subsystem ids whose files should be removed
  */
-static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
+static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 {
 	struct cgroup_subsys *ss;
 	int i;
@@ -1087,15 +1087,14 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned long subsys_mask)
 	for_each_subsys(ss, i) {
 		struct cftype *cfts;
 
-		if (!test_bit(i, &subsys_mask))
+		if (!(subsys_mask & (1 << i)))
 			continue;
 		list_for_each_entry(cfts, &ss->cfts, node)
 			cgroup_addrm_files(cgrp, cfts, false);
 	}
 }
 
-static int rebind_subsystems(struct cgroup_root *dst_root,
-			     unsigned long ss_mask)
+static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 {
 	struct cgroup_subsys *ss;
 	int ssid, i, ret;
@@ -1128,7 +1127,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root,
 		 * Just warn about it and continue.
 		 */
 		if (cgrp_dfl_root_visible) {
-			pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n",
+			pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n",
 				ret, ss_mask);
 			pr_warn("you may retry by moving them to a different hierarchy and unbinding\n");
 		}
@@ -1214,8 +1213,8 @@ static int cgroup_show_options(struct seq_file *seq,
 }
 
 struct cgroup_sb_opts {
-	unsigned long subsys_mask;
-	unsigned long flags;
+	unsigned int subsys_mask;
+	unsigned int flags;
 	char *release_agent;
 	bool cpuset_clone_children;
 	char *name;
@@ -1227,12 +1226,12 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
 	char *token, *o = data;
 	bool all_ss = false, one_ss = false;
-	unsigned long mask = (unsigned long)-1;
+	unsigned int mask = -1U;
 	struct cgroup_subsys *ss;
 	int i;
 
 #ifdef CONFIG_CPUSETS
-	mask = ~(1UL << cpuset_cgrp_id);
+	mask = ~(1U << cpuset_cgrp_id);
 #endif
 
 	memset(opts, 0, sizeof(*opts));
@@ -1313,7 +1312,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 			/* Mutually exclusive option 'all' + subsystem name */
 			if (all_ss)
 				return -EINVAL;
-			set_bit(i, &opts->subsys_mask);
+			opts->subsys_mask |= (1 << i);
 			one_ss = true;
 
 			break;
@@ -1342,7 +1341,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 		if (all_ss || (!one_ss && !opts->none && !opts->name))
 			for_each_subsys(ss, i)
 				if (!ss->disabled)
-					set_bit(i, &opts->subsys_mask);
+					opts->subsys_mask |= (1 << i);
 
 		/*
 		 * We either have to specify by name or by subsystems. (So
@@ -1373,7 +1372,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	int ret = 0;
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 	struct cgroup_sb_opts opts;
-	unsigned long added_mask, removed_mask;
+	unsigned int added_mask, removed_mask;
 
 	if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) {
 		pr_err("sane_behavior: remount is not allowed\n");
@@ -1398,7 +1397,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	/* Don't allow flags or name to change at remount */
 	if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
 	    (opts.name && strcmp(opts.name, root->name))) {
-		pr_err("option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n",
+		pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
 		       opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
 		       root->flags & CGRP_ROOT_OPTION_MASK, root->name);
 		ret = -EINVAL;
@@ -1522,7 +1521,7 @@ static void init_cgroup_root(struct cgroup_root *root,
 		set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
 }
 
-static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask)
+static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 {
 	LIST_HEAD(tmp_links);
 	struct cgroup *root_cgrp = &root->cgrp;
@@ -2507,7 +2506,7 @@ out_finish:
 static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css,
 					struct cftype *cft, char *buffer)
 {
-	unsigned long enable_req = 0, disable_req = 0, enable, disable;
+	unsigned int enable_req = 0, disable_req = 0, enable, disable;
 	struct cgroup *cgrp = dummy_css->cgroup, *child;
 	struct cgroup_subsys *ss;
 	char *tok, *p;
@@ -3998,7 +3997,7 @@ static struct cftype cgroup_base_files[] = {
  *
  * On failure, no file is added.
  */
-static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
+static int cgroup_populate_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 {
 	struct cgroup_subsys *ss;
 	int i, ret = 0;
@@ -4007,7 +4006,7 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask)
 	for_each_subsys(ss, i) {
 		struct cftype *cfts;
 
-		if (!test_bit(i, &subsys_mask))
+		if (!(subsys_mask & (1 << i)))
 			continue;
 
 		list_for_each_entry(cfts, &ss->cfts, node) {
-- 
cgit 


From 7d699ddb2b181a2c76e5ea18b1bdf102c4bebe4b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 4 May 2014 15:09:13 -0400
Subject: cgroup, memcg: allocate cgroup ID from 1

Currently, cgroup->id is allocated from 0, which is always assigned to
the root cgroup; unfortunately, memcg wants to use ID 0 to indicate
invalid IDs and ends up incrementing all IDs by one.

It's reasonable to reserve 0 for special purposes.  This patch updates
cgroup core so that ID 0 is not used and the root cgroups get ID 1.
The ID incrementing is removed form memcg.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 4 ++--
 kernel/cgroup.c        | 4 ++--
 mm/memcontrol.c        | 8 ++------
 3 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c6c703f2486b..793f70a48820 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -144,8 +144,8 @@ struct cgroup {
 	/*
 	 * idr allocated in-hierarchy ID.
 	 *
-	 * The ID of the root cgroup is always 0, and a new cgroup
-	 * will be assigned with a smallest available ID.
+	 * ID 0 is not used, the ID of the root cgroup is always 1, and a
+	 * new cgroup will be assigned with a smallest available ID.
 	 *
 	 * Allocating/Removing ID must be protected by cgroup_mutex.
 	 */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 21667f396a1e..3fa0463e74bb 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1531,7 +1531,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 	lockdep_assert_held(&cgroup_tree_mutex);
 	lockdep_assert_held(&cgroup_mutex);
 
-	ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL);
+	ret = idr_alloc(&root->cgroup_idr, root_cgrp, 1, 2, GFP_KERNEL);
 	if (ret < 0)
 		goto out;
 	root_cgrp->id = ret;
@@ -4225,7 +4225,7 @@ static long cgroup_create(struct cgroup *parent, const char *name,
 	 * Temporarily set the pointer to NULL, so idr_find() won't return
 	 * a half-baked cgroup.
 	 */
-	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL);
+	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_KERNEL);
 	if (cgrp->id < 0) {
 		err = -ENOMEM;
 		goto err_unlock;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..1d0b29715b73 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -527,18 +527,14 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-	/*
-	 * The ID of the root cgroup is 0, but memcg treat 0 as an
-	 * invalid ID, so we return (cgroup_id + 1).
-	 */
-	return memcg->css.cgroup->id + 1;
+	return memcg->css.cgroup->id;
 }
 
 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
 	struct cgroup_subsys_state *css;
 
-	css = css_from_id(id - 1, &memory_cgrp_subsys);
+	css = css_from_id(id, &memory_cgrp_subsys);
 	return mem_cgroup_from_css(css);
 }
 
-- 
cgit 


From 15a4c835e4ed3e60dd68727cd1907e3dd89563f4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 4 May 2014 15:09:14 -0400
Subject: cgroup, memcg: implement css->id and convert css_from_id() to use it

Until now, cgroup->id has been used to identify all the associated
csses and css_from_id() takes cgroup ID and returns the matching css
by looking up the cgroup and then dereferencing the css associated
with it; however, now that the lifetimes of cgroup and css are
separate, this is incorrect and breaks on the unified hierarchy when a
controller is disabled and enabled back again before the previous
instance is released.

This patch adds css->id which is a subsystem-unique ID and converts
css_from_id() to look up by the new css->id instead.  memcg is the
only user of css_from_id() and also converted to use css->id instead.

For traditional hierarchies, this shouldn't make any functional
difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jianyu Zhan <nasa4836@gmail.com>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  9 ++++++++
 kernel/cgroup.c        | 59 ++++++++++++++++++++++++++++++++------------------
 mm/memcontrol.c        |  4 ++--
 3 files changed, 49 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 793f70a48820..2dfabb3b749a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -62,6 +62,12 @@ struct cgroup_subsys_state {
 	/* the parent css */
 	struct cgroup_subsys_state *parent;
 
+	/*
+	 * Subsys-unique ID.  0 is unused and root is always 1.  The
+	 * matching css can be looked up using css_from_id().
+	 */
+	int id;
+
 	unsigned int flags;
 
 	/* percpu_ref killing and RCU release */
@@ -655,6 +661,9 @@ struct cgroup_subsys {
 	/* link to parent, protected by cgroup_lock() */
 	struct cgroup_root *root;
 
+	/* idr for css->id */
+	struct idr css_idr;
+
 	/*
 	 * List of cftypes.  Each entry is the first entry of an array
 	 * terminated by zero length name.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f1c98c527b2d..a1a20e8c973a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -100,8 +100,8 @@ static DECLARE_RWSEM(css_set_rwsem);
 #endif
 
 /*
- * Protects cgroup_idr so that IDs can be released without grabbing
- * cgroup_mutex.
+ * Protects cgroup_idr and css_idr so that IDs can be released without
+ * grabbing cgroup_mutex.
  */
 static DEFINE_SPINLOCK(cgroup_idr_lock);
 
@@ -1089,12 +1089,6 @@ static void cgroup_put(struct cgroup *cgrp)
 	if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
 		return;
 
-	/*
-	 * XXX: cgrp->id is only used to look up css's.  As cgroup and
-	 * css's lifetimes will be decoupled, it should be made
-	 * per-subsystem and moved to css->id so that lookups are
-	 * successful until the target css is released.
-	 */
 	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
 	cgrp->id = -1;
 
@@ -4104,8 +4098,11 @@ static void css_release(struct percpu_ref *ref)
 {
 	struct cgroup_subsys_state *css =
 		container_of(ref, struct cgroup_subsys_state, refcnt);
+	struct cgroup_subsys *ss = css->ss;
+
+	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
+	cgroup_idr_remove(&ss->css_idr, css->id);
 
-	RCU_INIT_POINTER(css->cgroup->subsys[css->ss->id], NULL);
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 }
 
@@ -4195,9 +4192,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 	if (err)
 		goto err_free_css;
 
+	err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_NOWAIT);
+	if (err < 0)
+		goto err_free_percpu_ref;
+	css->id = err;
+
 	err = cgroup_populate_dir(cgrp, 1 << ss->id);
 	if (err)
-		goto err_free_percpu_ref;
+		goto err_free_id;
+
+	/* @css is ready to be brought online now, make it visible */
+	cgroup_idr_replace(&ss->css_idr, css, css->id);
 
 	err = online_css(css);
 	if (err)
@@ -4216,6 +4221,8 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 
 err_clear_dir:
 	cgroup_clear_dir(css->cgroup, 1 << css->ss->id);
+err_free_id:
+	cgroup_idr_remove(&ss->css_idr, css->id);
 err_free_percpu_ref:
 	percpu_ref_cancel_init(&css->refcnt);
 err_free_css:
@@ -4642,7 +4649,7 @@ static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
 	.rename			= cgroup_rename,
 };
 
-static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
+static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 {
 	struct cgroup_subsys_state *css;
 
@@ -4651,6 +4658,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	mutex_lock(&cgroup_tree_mutex);
 	mutex_lock(&cgroup_mutex);
 
+	idr_init(&ss->css_idr);
 	INIT_LIST_HEAD(&ss->cfts);
 
 	/* Create the root cgroup state for this subsystem */
@@ -4659,6 +4667,13 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
 	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+	if (early) {
+		/* idr_alloc() can't be called safely during early init */
+		css->id = 1;
+	} else {
+		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
+		BUG_ON(css->id < 0);
+	}
 
 	/* Update the init_css_set to contain a subsys
 	 * pointer to this state - since the subsystem is
@@ -4709,7 +4724,7 @@ int __init cgroup_init_early(void)
 		ss->name = cgroup_subsys_name[i];
 
 		if (ss->early_init)
-			cgroup_init_subsys(ss);
+			cgroup_init_subsys(ss, true);
 	}
 	return 0;
 }
@@ -4741,8 +4756,16 @@ int __init cgroup_init(void)
 	mutex_unlock(&cgroup_tree_mutex);
 
 	for_each_subsys(ss, ssid) {
-		if (!ss->early_init)
-			cgroup_init_subsys(ss);
+		if (ss->early_init) {
+			struct cgroup_subsys_state *css =
+				init_css_set.subsys[ss->id];
+
+			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
+						   GFP_KERNEL);
+			BUG_ON(css->id < 0);
+		} else {
+			cgroup_init_subsys(ss, false);
+		}
 
 		list_add_tail(&init_css_set.e_cset_node[ssid],
 			      &cgrp_dfl_root.cgrp.e_csets[ssid]);
@@ -5196,14 +5219,8 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
  */
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
 {
-	struct cgroup *cgrp;
-
 	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	cgrp = idr_find(&ss->root->cgroup_idr, id);
-	if (cgrp)
-		return cgroup_css(cgrp, ss);
-	return NULL;
+	return idr_find(&ss->css_idr, id);
 }
 
 #ifdef CONFIG_CGROUP_DEBUG
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1d0b29715b73..c3f82f69ef58 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -527,7 +527,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 
 static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
 {
-	return memcg->css.cgroup->id;
+	return memcg->css.id;
 }
 
 static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
@@ -6401,7 +6401,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
 
-	if (css->cgroup->id > MEM_CGROUP_ID_MAX)
+	if (css->id > MEM_CGROUP_ID_MAX)
 		return -ENOSPC;
 
 	if (!parent)
-- 
cgit 


From 5bcfedf06f7fdf9efcf65dc11198e9012f7530f4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 1 May 2014 18:34:18 +0200
Subject: net: filter: simplify label names from jump-table

This patch simplifies label naming for the BPF jump-table.
When we define labels via DL(), we just concatenate/textify
the combination of instruction opcode which consists of the
class, subclass, word size, target register and so on. Each
time we leave BPF_ prefix intact, so that e.g. the preprocessor
generates a label BPF_ALU_BPF_ADD_BPF_X for DL(BPF_ALU, BPF_ADD,
BPF_X) whereas a label name of ALU_ADD_X is much more easy
to grasp. Pure cleanup only.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |   3 +
 net/core/filter.c      | 308 ++++++++++++++++++++++++-------------------------
 2 files changed, 157 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 759abf78dd61..b042d1db127f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -37,6 +37,9 @@
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
+/* Placeholder/dummy for 0 */
+#define BPF_0		0
+
 /* BPF has 10 general purpose 64-bit registers and stack frame. */
 #define MAX_BPF_REG	11
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 7c4db3dd3d1e..a1784e95b049 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -156,94 +156,94 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
-#define DL(A, B, C)	[A|B|C] = &&A##_##B##_##C
-		DL(BPF_ALU, BPF_ADD, BPF_X),
-		DL(BPF_ALU, BPF_ADD, BPF_K),
-		DL(BPF_ALU, BPF_SUB, BPF_X),
-		DL(BPF_ALU, BPF_SUB, BPF_K),
-		DL(BPF_ALU, BPF_AND, BPF_X),
-		DL(BPF_ALU, BPF_AND, BPF_K),
-		DL(BPF_ALU, BPF_OR, BPF_X),
-		DL(BPF_ALU, BPF_OR, BPF_K),
-		DL(BPF_ALU, BPF_LSH, BPF_X),
-		DL(BPF_ALU, BPF_LSH, BPF_K),
-		DL(BPF_ALU, BPF_RSH, BPF_X),
-		DL(BPF_ALU, BPF_RSH, BPF_K),
-		DL(BPF_ALU, BPF_XOR, BPF_X),
-		DL(BPF_ALU, BPF_XOR, BPF_K),
-		DL(BPF_ALU, BPF_MUL, BPF_X),
-		DL(BPF_ALU, BPF_MUL, BPF_K),
-		DL(BPF_ALU, BPF_MOV, BPF_X),
-		DL(BPF_ALU, BPF_MOV, BPF_K),
-		DL(BPF_ALU, BPF_DIV, BPF_X),
-		DL(BPF_ALU, BPF_DIV, BPF_K),
-		DL(BPF_ALU, BPF_MOD, BPF_X),
-		DL(BPF_ALU, BPF_MOD, BPF_K),
-		DL(BPF_ALU, BPF_NEG, 0),
-		DL(BPF_ALU, BPF_END, BPF_TO_BE),
-		DL(BPF_ALU, BPF_END, BPF_TO_LE),
-		DL(BPF_ALU64, BPF_ADD, BPF_X),
-		DL(BPF_ALU64, BPF_ADD, BPF_K),
-		DL(BPF_ALU64, BPF_SUB, BPF_X),
-		DL(BPF_ALU64, BPF_SUB, BPF_K),
-		DL(BPF_ALU64, BPF_AND, BPF_X),
-		DL(BPF_ALU64, BPF_AND, BPF_K),
-		DL(BPF_ALU64, BPF_OR, BPF_X),
-		DL(BPF_ALU64, BPF_OR, BPF_K),
-		DL(BPF_ALU64, BPF_LSH, BPF_X),
-		DL(BPF_ALU64, BPF_LSH, BPF_K),
-		DL(BPF_ALU64, BPF_RSH, BPF_X),
-		DL(BPF_ALU64, BPF_RSH, BPF_K),
-		DL(BPF_ALU64, BPF_XOR, BPF_X),
-		DL(BPF_ALU64, BPF_XOR, BPF_K),
-		DL(BPF_ALU64, BPF_MUL, BPF_X),
-		DL(BPF_ALU64, BPF_MUL, BPF_K),
-		DL(BPF_ALU64, BPF_MOV, BPF_X),
-		DL(BPF_ALU64, BPF_MOV, BPF_K),
-		DL(BPF_ALU64, BPF_ARSH, BPF_X),
-		DL(BPF_ALU64, BPF_ARSH, BPF_K),
-		DL(BPF_ALU64, BPF_DIV, BPF_X),
-		DL(BPF_ALU64, BPF_DIV, BPF_K),
-		DL(BPF_ALU64, BPF_MOD, BPF_X),
-		DL(BPF_ALU64, BPF_MOD, BPF_K),
-		DL(BPF_ALU64, BPF_NEG, 0),
-		DL(BPF_JMP, BPF_CALL, 0),
-		DL(BPF_JMP, BPF_JA, 0),
-		DL(BPF_JMP, BPF_JEQ, BPF_X),
-		DL(BPF_JMP, BPF_JEQ, BPF_K),
-		DL(BPF_JMP, BPF_JNE, BPF_X),
-		DL(BPF_JMP, BPF_JNE, BPF_K),
-		DL(BPF_JMP, BPF_JGT, BPF_X),
-		DL(BPF_JMP, BPF_JGT, BPF_K),
-		DL(BPF_JMP, BPF_JGE, BPF_X),
-		DL(BPF_JMP, BPF_JGE, BPF_K),
-		DL(BPF_JMP, BPF_JSGT, BPF_X),
-		DL(BPF_JMP, BPF_JSGT, BPF_K),
-		DL(BPF_JMP, BPF_JSGE, BPF_X),
-		DL(BPF_JMP, BPF_JSGE, BPF_K),
-		DL(BPF_JMP, BPF_JSET, BPF_X),
-		DL(BPF_JMP, BPF_JSET, BPF_K),
-		DL(BPF_JMP, BPF_EXIT, 0),
-		DL(BPF_STX, BPF_MEM, BPF_B),
-		DL(BPF_STX, BPF_MEM, BPF_H),
-		DL(BPF_STX, BPF_MEM, BPF_W),
-		DL(BPF_STX, BPF_MEM, BPF_DW),
-		DL(BPF_STX, BPF_XADD, BPF_W),
-		DL(BPF_STX, BPF_XADD, BPF_DW),
-		DL(BPF_ST, BPF_MEM, BPF_B),
-		DL(BPF_ST, BPF_MEM, BPF_H),
-		DL(BPF_ST, BPF_MEM, BPF_W),
-		DL(BPF_ST, BPF_MEM, BPF_DW),
-		DL(BPF_LDX, BPF_MEM, BPF_B),
-		DL(BPF_LDX, BPF_MEM, BPF_H),
-		DL(BPF_LDX, BPF_MEM, BPF_W),
-		DL(BPF_LDX, BPF_MEM, BPF_DW),
-		DL(BPF_LD, BPF_ABS, BPF_W),
-		DL(BPF_LD, BPF_ABS, BPF_H),
-		DL(BPF_LD, BPF_ABS, BPF_B),
-		DL(BPF_LD, BPF_IND, BPF_W),
-		DL(BPF_LD, BPF_IND, BPF_H),
-		DL(BPF_LD, BPF_IND, BPF_B),
+#define DL(A, B, C)	[BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
+		DL(ALU, ADD, X),
+		DL(ALU, ADD, K),
+		DL(ALU, SUB, X),
+		DL(ALU, SUB, K),
+		DL(ALU, AND, X),
+		DL(ALU, AND, K),
+		DL(ALU, OR, X),
+		DL(ALU, OR, K),
+		DL(ALU, LSH, X),
+		DL(ALU, LSH, K),
+		DL(ALU, RSH, X),
+		DL(ALU, RSH, K),
+		DL(ALU, XOR, X),
+		DL(ALU, XOR, K),
+		DL(ALU, MUL, X),
+		DL(ALU, MUL, K),
+		DL(ALU, MOV, X),
+		DL(ALU, MOV, K),
+		DL(ALU, DIV, X),
+		DL(ALU, DIV, K),
+		DL(ALU, MOD, X),
+		DL(ALU, MOD, K),
+		DL(ALU, NEG, 0),
+		DL(ALU, END, TO_BE),
+		DL(ALU, END, TO_LE),
+		DL(ALU64, ADD, X),
+		DL(ALU64, ADD, K),
+		DL(ALU64, SUB, X),
+		DL(ALU64, SUB, K),
+		DL(ALU64, AND, X),
+		DL(ALU64, AND, K),
+		DL(ALU64, OR, X),
+		DL(ALU64, OR, K),
+		DL(ALU64, LSH, X),
+		DL(ALU64, LSH, K),
+		DL(ALU64, RSH, X),
+		DL(ALU64, RSH, K),
+		DL(ALU64, XOR, X),
+		DL(ALU64, XOR, K),
+		DL(ALU64, MUL, X),
+		DL(ALU64, MUL, K),
+		DL(ALU64, MOV, X),
+		DL(ALU64, MOV, K),
+		DL(ALU64, ARSH, X),
+		DL(ALU64, ARSH, K),
+		DL(ALU64, DIV, X),
+		DL(ALU64, DIV, K),
+		DL(ALU64, MOD, X),
+		DL(ALU64, MOD, K),
+		DL(ALU64, NEG, 0),
+		DL(JMP, CALL, 0),
+		DL(JMP, JA, 0),
+		DL(JMP, JEQ, X),
+		DL(JMP, JEQ, K),
+		DL(JMP, JNE, X),
+		DL(JMP, JNE, K),
+		DL(JMP, JGT, X),
+		DL(JMP, JGT, K),
+		DL(JMP, JGE, X),
+		DL(JMP, JGE, K),
+		DL(JMP, JSGT, X),
+		DL(JMP, JSGT, K),
+		DL(JMP, JSGE, X),
+		DL(JMP, JSGE, K),
+		DL(JMP, JSET, X),
+		DL(JMP, JSET, K),
+		DL(JMP, EXIT, 0),
+		DL(STX, MEM, B),
+		DL(STX, MEM, H),
+		DL(STX, MEM, W),
+		DL(STX, MEM, DW),
+		DL(STX, XADD, W),
+		DL(STX, XADD, DW),
+		DL(ST, MEM, B),
+		DL(ST, MEM, H),
+		DL(ST, MEM, W),
+		DL(ST, MEM, DW),
+		DL(LDX, MEM, B),
+		DL(LDX, MEM, H),
+		DL(LDX, MEM, W),
+		DL(LDX, MEM, DW),
+		DL(LD, ABS, W),
+		DL(LD, ABS, H),
+		DL(LD, ABS, B),
+		DL(LD, IND, W),
+		DL(LD, IND, H),
+		DL(LD, IND, B),
 #undef DL
 	};
 
@@ -257,93 +257,93 @@ select_insn:
 
 	/* ALU */
 #define ALU(OPCODE, OP)			\
-	BPF_ALU64_##OPCODE##_BPF_X:	\
+	ALU64_##OPCODE##_X:		\
 		A = A OP X;		\
 		CONT;			\
-	BPF_ALU_##OPCODE##_BPF_X:	\
+	ALU_##OPCODE##_X:		\
 		A = (u32) A OP (u32) X;	\
 		CONT;			\
-	BPF_ALU64_##OPCODE##_BPF_K:	\
+	ALU64_##OPCODE##_K:		\
 		A = A OP K;		\
 		CONT;			\
-	BPF_ALU_##OPCODE##_BPF_K:	\
+	ALU_##OPCODE##_K:		\
 		A = (u32) A OP (u32) K;	\
 		CONT;
 
-	ALU(BPF_ADD,  +)
-	ALU(BPF_SUB,  -)
-	ALU(BPF_AND,  &)
-	ALU(BPF_OR,   |)
-	ALU(BPF_LSH, <<)
-	ALU(BPF_RSH, >>)
-	ALU(BPF_XOR,  ^)
-	ALU(BPF_MUL,  *)
+	ALU(ADD,  +)
+	ALU(SUB,  -)
+	ALU(AND,  &)
+	ALU(OR,   |)
+	ALU(LSH, <<)
+	ALU(RSH, >>)
+	ALU(XOR,  ^)
+	ALU(MUL,  *)
 #undef ALU
-	BPF_ALU_BPF_NEG_0:
+	ALU_NEG_0:
 		A = (u32) -A;
 		CONT;
-	BPF_ALU64_BPF_NEG_0:
+	ALU64_NEG_0:
 		A = -A;
 		CONT;
-	BPF_ALU_BPF_MOV_BPF_X:
+	ALU_MOV_X:
 		A = (u32) X;
 		CONT;
-	BPF_ALU_BPF_MOV_BPF_K:
+	ALU_MOV_K:
 		A = (u32) K;
 		CONT;
-	BPF_ALU64_BPF_MOV_BPF_X:
+	ALU64_MOV_X:
 		A = X;
 		CONT;
-	BPF_ALU64_BPF_MOV_BPF_K:
+	ALU64_MOV_K:
 		A = K;
 		CONT;
-	BPF_ALU64_BPF_ARSH_BPF_X:
+	ALU64_ARSH_X:
 		(*(s64 *) &A) >>= X;
 		CONT;
-	BPF_ALU64_BPF_ARSH_BPF_K:
+	ALU64_ARSH_K:
 		(*(s64 *) &A) >>= K;
 		CONT;
-	BPF_ALU64_BPF_MOD_BPF_X:
+	ALU64_MOD_X:
 		if (unlikely(X == 0))
 			return 0;
 		tmp = A;
 		A = do_div(tmp, X);
 		CONT;
-	BPF_ALU_BPF_MOD_BPF_X:
+	ALU_MOD_X:
 		if (unlikely(X == 0))
 			return 0;
 		tmp = (u32) A;
 		A = do_div(tmp, (u32) X);
 		CONT;
-	BPF_ALU64_BPF_MOD_BPF_K:
+	ALU64_MOD_K:
 		tmp = A;
 		A = do_div(tmp, K);
 		CONT;
-	BPF_ALU_BPF_MOD_BPF_K:
+	ALU_MOD_K:
 		tmp = (u32) A;
 		A = do_div(tmp, (u32) K);
 		CONT;
-	BPF_ALU64_BPF_DIV_BPF_X:
+	ALU64_DIV_X:
 		if (unlikely(X == 0))
 			return 0;
 		do_div(A, X);
 		CONT;
-	BPF_ALU_BPF_DIV_BPF_X:
+	ALU_DIV_X:
 		if (unlikely(X == 0))
 			return 0;
 		tmp = (u32) A;
 		do_div(tmp, (u32) X);
 		A = (u32) tmp;
 		CONT;
-	BPF_ALU64_BPF_DIV_BPF_K:
+	ALU64_DIV_K:
 		do_div(A, K);
 		CONT;
-	BPF_ALU_BPF_DIV_BPF_K:
+	ALU_DIV_K:
 		tmp = (u32) A;
 		do_div(tmp, (u32) K);
 		A = (u32) tmp;
 		CONT;
-	BPF_ALU_BPF_END_BPF_TO_BE:
+	ALU_END_TO_BE:
 		switch (K) {
 		case 16:
 			A = (__force u16) cpu_to_be16(A);
@@ -356,7 +356,7 @@ select_insn:
 			break;
 		}
 		CONT;
-	BPF_ALU_BPF_END_BPF_TO_LE:
+	ALU_END_TO_LE:
 		switch (K) {
 		case 16:
 			A = (__force u16) cpu_to_le16(A);
@@ -371,7 +371,7 @@ select_insn:
 		CONT;
 
 	/* CALL */
-	BPF_JMP_BPF_CALL_0:
+	JMP_CALL_0:
 		/* Function call scratches R1-R5 registers, preserves R6-R9,
 		 * and stores return value into R0.
 		 */
@@ -380,122 +380,122 @@ select_insn:
 		CONT;
 
 	/* JMP */
-	BPF_JMP_BPF_JA_0:
+	JMP_JA_0:
 		insn += insn->off;
 		CONT;
-	BPF_JMP_BPF_JEQ_BPF_X:
+	JMP_JEQ_X:
 		if (A == X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JEQ_BPF_K:
+	JMP_JEQ_K:
 		if (A == K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JNE_BPF_X:
+	JMP_JNE_X:
 		if (A != X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JNE_BPF_K:
+	JMP_JNE_K:
 		if (A != K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGT_BPF_X:
+	JMP_JGT_X:
 		if (A > X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGT_BPF_K:
+	JMP_JGT_K:
 		if (A > K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGE_BPF_X:
+	JMP_JGE_X:
 		if (A >= X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JGE_BPF_K:
+	JMP_JGE_K:
 		if (A >= K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGT_BPF_X:
-		if (((s64)A) > ((s64)X)) {
+	JMP_JSGT_X:
+		if (((s64) A) > ((s64) X)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGT_BPF_K:
-		if (((s64)A) > ((s64)K)) {
+	JMP_JSGT_K:
+		if (((s64) A) > ((s64) K)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGE_BPF_X:
-		if (((s64)A) >= ((s64)X)) {
+	JMP_JSGE_X:
+		if (((s64) A) >= ((s64) X)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSGE_BPF_K:
-		if (((s64)A) >= ((s64)K)) {
+	JMP_JSGE_K:
+		if (((s64) A) >= ((s64) K)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSET_BPF_X:
+	JMP_JSET_X:
 		if (A & X) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_JSET_BPF_K:
+	JMP_JSET_K:
 		if (A & K) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
-	BPF_JMP_BPF_EXIT_0:
+	JMP_EXIT_0:
 		return R0;
 
 	/* STX and ST and LDX*/
 #define LDST(SIZEOP, SIZE)					\
-	BPF_STX_BPF_MEM_##SIZEOP:				\
+	STX_MEM_##SIZEOP:					\
 		*(SIZE *)(unsigned long) (A + insn->off) = X;	\
 		CONT;						\
-	BPF_ST_BPF_MEM_##SIZEOP:				\
+	ST_MEM_##SIZEOP:					\
 		*(SIZE *)(unsigned long) (A + insn->off) = K;	\
 		CONT;						\
-	BPF_LDX_BPF_MEM_##SIZEOP:				\
+	LDX_MEM_##SIZEOP:					\
 		A = *(SIZE *)(unsigned long) (X + insn->off);	\
 		CONT;
 
-	LDST(BPF_B,   u8)
-	LDST(BPF_H,  u16)
-	LDST(BPF_W,  u32)
-	LDST(BPF_DW, u64)
+	LDST(B,   u8)
+	LDST(H,  u16)
+	LDST(W,  u32)
+	LDST(DW, u64)
 #undef LDST
-	BPF_STX_BPF_XADD_BPF_W: /* lock xadd *(u32 *)(A + insn->off) += X */
+	STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
 		atomic_add((u32) X, (atomic_t *)(unsigned long)
 			   (A + insn->off));
 		CONT;
-	BPF_STX_BPF_XADD_BPF_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
+	STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
 		atomic64_add((u64) X, (atomic64_t *)(unsigned long)
 			     (A + insn->off));
 		CONT;
-	BPF_LD_BPF_ABS_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
+	LD_ABS_W: /* R0 = ntohl(*(u32 *) (skb->data + K)) */
 		off = K;
 load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
@@ -524,7 +524,7 @@ load_word:
 			CONT;
 		}
 		return 0;
-	BPF_LD_BPF_ABS_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
+	LD_ABS_H: /* R0 = ntohs(*(u16 *) (skb->data + K)) */
 		off = K;
 load_half:
 		ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
@@ -533,7 +533,7 @@ load_half:
 			CONT;
 		}
 		return 0;
-	BPF_LD_BPF_ABS_BPF_B: /* R0 = *(u8 *) (ctx + K) */
+	LD_ABS_B: /* R0 = *(u8 *) (ctx + K) */
 		off = K;
 load_byte:
 		ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
@@ -542,13 +542,13 @@ load_byte:
 			CONT;
 		}
 		return 0;
-	BPF_LD_BPF_IND_BPF_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
+	LD_IND_W: /* R0 = ntohl(*(u32 *) (skb->data + X + K)) */
 		off = K + X;
 		goto load_word;
-	BPF_LD_BPF_IND_BPF_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
+	LD_IND_H: /* R0 = ntohs(*(u16 *) (skb->data + X + K)) */
 		off = K + X;
 		goto load_half;
-	BPF_LD_BPF_IND_BPF_B: /* R0 = *(u8 *) (skb->data + X + K) */
+	LD_IND_B: /* R0 = *(u8 *) (skb->data + X + K) */
 		off = K + X;
 		goto load_byte;
 
-- 
cgit 


From 30743837dd204d2b04fd4e9d3db78cc7b118c81a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 1 May 2014 18:34:19 +0200
Subject: net: filter: make register naming more comprehensible

The current code is a bit hard to parse on which registers can be used,
how they are mapped and all play together. It makes much more sense to
define this a bit more clearly so that the code is a bit more intuitive.
This patch cleans this up, and makes naming a bit more consistent among
the code. This also allows for moving some of the defines into the header
file. Clearing of A and X registers in __sk_run_filter() do not get a
particular register name assigned as they have not an 'official' function,
but rather just result from the concrete initial mapping of old BPF
programs. Since for BPF helper functions for BPF_CALL we already use
small letters, so be consistent here as well. No functional changes.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  44 ++++++++--
 net/core/filter.c      | 215 +++++++++++++++++++++++++------------------------
 2 files changed, 145 insertions(+), 114 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index b042d1db127f..ed1efab10b8f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -40,16 +40,47 @@
 /* Placeholder/dummy for 0 */
 #define BPF_0		0
 
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
 /* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	11
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+/* ArgX, context and stack frame pointer register positions. Note,
+ * Arg1, Arg2, Arg3, etc are used as argument mappings of function
+ * calls in BPF_CALL instruction.
+ */
+#define BPF_REG_ARG1	BPF_REG_1
+#define BPF_REG_ARG2	BPF_REG_2
+#define BPF_REG_ARG3	BPF_REG_3
+#define BPF_REG_ARG4	BPF_REG_4
+#define BPF_REG_ARG5	BPF_REG_5
+#define BPF_REG_CTX	BPF_REG_6
+#define BPF_REG_FP	BPF_REG_10
+
+/* Additional register mappings for converted user programs. */
+#define BPF_REG_A	BPF_REG_0
+#define BPF_REG_X	BPF_REG_7
+#define BPF_REG_TMP	BPF_REG_8
 
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-/* Arg1, context and stack frame pointer register positions. */
-#define ARG1_REG	1
-#define CTX_REG		6
-#define FP_REG		10
+/* Macro to invoke filter function. */
+#define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
 struct sock_filter_int {
 	__u8	code;		/* opcode */
@@ -100,9 +131,6 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
 #define sk_filter_proglen(fprog)			\
 		(fprog->len * sizeof(fprog->filter[0]))
 
-#define SK_RUN_FILTER(filter, ctx)			\
-		(*filter->bpf_func)(ctx, filter->insnsi)
-
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
 u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
diff --git a/net/core/filter.c b/net/core/filter.c
index a1784e95b049..c93ca8d66f37 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -45,6 +45,27 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 
+/* Registers */
+#define R0	regs[BPF_REG_0]
+#define R1	regs[BPF_REG_1]
+#define R2	regs[BPF_REG_2]
+#define R3	regs[BPF_REG_3]
+#define R4	regs[BPF_REG_4]
+#define R5	regs[BPF_REG_5]
+#define R6	regs[BPF_REG_6]
+#define R7	regs[BPF_REG_7]
+#define R8	regs[BPF_REG_8]
+#define R9	regs[BPF_REG_9]
+#define R10	regs[BPF_REG_10]
+
+/* Named registers */
+#define A	regs[insn->a_reg]
+#define X	regs[insn->x_reg]
+#define FP	regs[BPF_REG_FP]
+#define ARG1	regs[BPF_REG_ARG1]
+#define CTX	regs[BPF_REG_CTX]
+#define K	insn->imm
+
 /* No hurry in this branch
  *
  * Exported for the bpf jit load helper.
@@ -122,13 +143,6 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return 0;
 }
 
-/* Register mappings for user programs. */
-#define A_REG		0
-#define X_REG		7
-#define TMP_REG		8
-#define ARG2_REG	2
-#define ARG3_REG	3
-
 /**
  *	__sk_run_filter - run a filter on a given context
  *	@ctx: buffer to run the filter on
@@ -142,17 +156,6 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
-	void *ptr;
-	int off;
-
-#define K  insn->imm
-#define A  regs[insn->a_reg]
-#define X  regs[insn->x_reg]
-#define R0 regs[0]
-
-#define CONT	 ({insn++; goto select_insn; })
-#define CONT_JMP ({insn++; goto select_insn; })
-
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
@@ -246,11 +249,18 @@ unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 		DL(LD, IND, B),
 #undef DL
 	};
+	void *ptr;
+	int off;
 
-	regs[FP_REG]  = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
-	regs[ARG1_REG] = (u64) (unsigned long) ctx;
-	regs[A_REG] = 0;
-	regs[X_REG] = 0;
+#define CONT	 ({ insn++; goto select_insn; })
+#define CONT_JMP ({ insn++; goto select_insn; })
+
+	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
+	ARG1 = (u64) (unsigned long) ctx;
+
+	/* Register for user BPF programs need to be reset first. */
+	regs[BPF_REG_A] = 0;
+	regs[BPF_REG_X] = 0;
 
 select_insn:
 	goto *jumptable[insn->code];
@@ -375,8 +385,7 @@ select_insn:
 		/* Function call scratches R1-R5 registers, preserves R6-R9,
 		 * and stores return value into R0.
 		 */
-		R0 = (__bpf_call_base + insn->imm)(regs[1], regs[2], regs[3],
-						   regs[4], regs[5]);
+		R0 = (__bpf_call_base + insn->imm)(R1, R2, R3, R4, R5);
 		CONT;
 
 	/* JMP */
@@ -500,7 +509,7 @@ select_insn:
 load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are only
 		 * appearing in the programs where ctx == skb. All programs
-		 * keep 'ctx' in regs[CTX_REG] == R6, sk_convert_filter()
+		 * keep 'ctx' in regs[BPF_REG_CTX] == R6, sk_convert_filter()
 		 * saves it in R6, internal BPF verifier will check that
 		 * R6 == ctx.
 		 *
@@ -556,13 +565,6 @@ load_byte:
 		/* If we ever reach this, we have a bug somewhere. */
 		WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
 		return 0;
-#undef CONT_JMP
-#undef CONT
-
-#undef R0
-#undef X
-#undef A
-#undef K
 }
 
 u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
@@ -594,14 +596,14 @@ static unsigned int pkt_type_offset(void)
 	return -1;
 }
 
-static u64 __skb_get_pay_offset(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
 
 	return __skb_get_poff(skb);
 }
 
-static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
 	struct nlattr *nla;
@@ -612,17 +614,17 @@ static u64 __skb_get_nlattr(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
 	if (skb->len < sizeof(struct nlattr))
 		return 0;
 
-	if (A > skb->len - sizeof(struct nlattr))
+	if (a > skb->len - sizeof(struct nlattr))
 		return 0;
 
-	nla = nla_find((struct nlattr *) &skb->data[A], skb->len - A, X);
+	nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
 	if (nla)
 		return (void *) nla - (void *) skb->data;
 
 	return 0;
 }
 
-static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long) ctx;
 	struct nlattr *nla;
@@ -633,27 +635,27 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
 	if (skb->len < sizeof(struct nlattr))
 		return 0;
 
-	if (A > skb->len - sizeof(struct nlattr))
+	if (a > skb->len - sizeof(struct nlattr))
 		return 0;
 
-	nla = (struct nlattr *) &skb->data[A];
-	if (nla->nla_len > skb->len - A)
+	nla = (struct nlattr *) &skb->data[a];
+	if (nla->nla_len > skb->len - a)
 		return 0;
 
-	nla = nla_find_nested(nla, X);
+	nla = nla_find_nested(nla, x);
 	if (nla)
 		return (void *) nla - (void *) skb->data;
 
 	return 0;
 }
 
-static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return raw_smp_processor_id();
 }
 
 /* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5)
+static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return (u64)prandom_u32();
 }
@@ -668,28 +670,28 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, protocol);
 		insn++;
 
 		/* A = ntohs(A) [emitting a nop or swap16] */
 		insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
-		insn->a_reg = A_REG;
+		insn->a_reg = BPF_REG_A;
 		insn->imm = 16;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
 		insn->code = BPF_LDX | BPF_MEM | BPF_B;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = pkt_type_offset();
 		if (insn->off < 0)
 			return false;
 		insn++;
 
 		insn->code = BPF_ALU | BPF_AND | BPF_K;
-		insn->a_reg = A_REG;
+		insn->a_reg = BPF_REG_A;
 		insn->imm = PKT_TYPE_MAX;
 		break;
 
@@ -699,13 +701,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 			insn->code = BPF_LDX | BPF_MEM | BPF_DW;
 		else
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = TMP_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_TMP;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, dev);
 		insn++;
 
 		insn->code = BPF_JMP | BPF_JNE | BPF_K;
-		insn->a_reg = TMP_REG;
+		insn->a_reg = BPF_REG_TMP;
 		insn->imm = 0;
 		insn->off = 1;
 		insn++;
@@ -716,8 +718,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
 
-		insn->a_reg = A_REG;
-		insn->x_reg = TMP_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_TMP;
 
 		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
@@ -732,8 +734,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, mark);
 		break;
 
@@ -741,8 +743,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, hash);
 		break;
 
@@ -750,8 +752,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, queue_mapping);
 		break;
 
@@ -760,8 +762,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 
 		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = A_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_CTX;
 		insn->off = offsetof(struct sk_buff, vlan_tci);
 		insn++;
 
@@ -769,16 +771,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
 			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = ~VLAN_TAG_PRESENT;
 		} else {
 			insn->code = BPF_ALU | BPF_RSH | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 12;
 			insn++;
 
 			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 1;
 		}
 		break;
@@ -790,20 +792,20 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG1_REG;
-		insn->x_reg = CTX_REG;
+		insn->a_reg = BPF_REG_ARG1;
+		insn->x_reg = BPF_REG_CTX;
 		insn++;
 
 		/* arg2 = A */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG2_REG;
-		insn->x_reg = A_REG;
+		insn->a_reg = BPF_REG_ARG2;
+		insn->x_reg = BPF_REG_A;
 		insn++;
 
 		/* arg3 = X */
 		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = ARG3_REG;
-		insn->x_reg = X_REG;
+		insn->a_reg = BPF_REG_ARG3;
+		insn->x_reg = BPF_REG_X;
 		insn++;
 
 		/* Emit call(ctx, arg2=A, arg3=X) */
@@ -829,8 +831,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
 		insn->code = BPF_ALU | BPF_XOR | BPF_X;
-		insn->a_reg = A_REG;
-		insn->x_reg = X_REG;
+		insn->a_reg = BPF_REG_A;
+		insn->x_reg = BPF_REG_X;
 		break;
 
 	default:
@@ -880,7 +882,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
 	u8 bpf_src;
 
 	BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
-	BUILD_BUG_ON(FP_REG + 1 != MAX_BPF_REG);
+	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
 
 	if (len <= 0 || len >= BPF_MAXINSNS)
 		return -EINVAL;
@@ -897,8 +899,8 @@ do_pass:
 
 	if (new_insn) {
 		new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		new_insn->a_reg = CTX_REG;
-		new_insn->x_reg = ARG1_REG;
+		new_insn->a_reg = BPF_REG_CTX;
+		new_insn->x_reg = BPF_REG_ARG1;
 	}
 	new_insn++;
 
@@ -948,8 +950,8 @@ do_pass:
 				break;
 
 			insn->code = fp->code;
-			insn->a_reg = A_REG;
-			insn->x_reg = X_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_X;
 			insn->imm = fp->k;
 			break;
 
@@ -983,16 +985,16 @@ do_pass:
 				 * in compare insn.
 				 */
 				insn->code = BPF_ALU | BPF_MOV | BPF_K;
-				insn->a_reg = TMP_REG;
+				insn->a_reg = BPF_REG_TMP;
 				insn->imm = fp->k;
 				insn++;
 
-				insn->a_reg = A_REG;
-				insn->x_reg = TMP_REG;
+				insn->a_reg = BPF_REG_A;
+				insn->x_reg = BPF_REG_TMP;
 				bpf_src = BPF_X;
 			} else {
-				insn->a_reg = A_REG;
-				insn->x_reg = X_REG;
+				insn->a_reg = BPF_REG_A;
+				insn->x_reg = BPF_REG_X;
 				insn->imm = fp->k;
 				bpf_src = BPF_SRC(fp->code);
 			}
@@ -1027,33 +1029,33 @@ do_pass:
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = TMP_REG;
-			insn->x_reg = A_REG;
+			insn->a_reg = BPF_REG_TMP;
+			insn->x_reg = BPF_REG_A;
 			insn++;
 
 			insn->code = BPF_LD | BPF_ABS | BPF_B;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = fp->k;
 			insn++;
 
 			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 0xf;
 			insn++;
 
 			insn->code = BPF_ALU | BPF_LSH | BPF_K;
-			insn->a_reg = A_REG;
+			insn->a_reg = BPF_REG_A;
 			insn->imm = 2;
 			insn++;
 
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = X_REG;
-			insn->x_reg = A_REG;
+			insn->a_reg = BPF_REG_X;
+			insn->x_reg = BPF_REG_A;
 			insn++;
 
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = A_REG;
-			insn->x_reg = TMP_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_TMP;
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
@@ -1063,7 +1065,7 @@ do_pass:
 				     (BPF_RVAL(fp->code) == BPF_K ?
 				      BPF_K : BPF_X);
 			insn->a_reg = 0;
-			insn->x_reg = A_REG;
+			insn->x_reg = BPF_REG_A;
 			insn->imm = fp->k;
 			insn++;
 
@@ -1074,8 +1076,9 @@ do_pass:
 		case BPF_ST:
 		case BPF_STX:
 			insn->code = BPF_STX | BPF_MEM | BPF_W;
-			insn->a_reg = FP_REG;
-			insn->x_reg = fp->code == BPF_ST ? A_REG : X_REG;
+			insn->a_reg = BPF_REG_FP;
+			insn->x_reg = fp->code == BPF_ST ?
+				      BPF_REG_A : BPF_REG_X;
 			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
 			break;
 
@@ -1084,8 +1087,8 @@ do_pass:
 		case BPF_LDX | BPF_MEM:
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->x_reg = FP_REG;
+				      BPF_REG_A : BPF_REG_X;
+			insn->x_reg = BPF_REG_FP;
 			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
 			break;
 
@@ -1094,22 +1097,22 @@ do_pass:
 		case BPF_LDX | BPF_IMM:
 			insn->code = BPF_ALU | BPF_MOV | BPF_K;
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
+				      BPF_REG_A : BPF_REG_X;
 			insn->imm = fp->k;
 			break;
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = X_REG;
-			insn->x_reg = A_REG;
+			insn->a_reg = BPF_REG_X;
+			insn->x_reg = BPF_REG_A;
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
 			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = A_REG;
-			insn->x_reg = X_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_X;
 			break;
 
 		/* A = skb->len or X = skb->len */
@@ -1117,16 +1120,16 @@ do_pass:
 		case BPF_LDX | BPF_W | BPF_LEN:
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
 			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      A_REG : X_REG;
-			insn->x_reg = CTX_REG;
+				      BPF_REG_A : BPF_REG_X;
+			insn->x_reg = BPF_REG_CTX;
 			insn->off = offsetof(struct sk_buff, len);
 			break;
 
 		/* access seccomp_data fields */
 		case BPF_LDX | BPF_ABS | BPF_W:
 			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = A_REG;
-			insn->x_reg = CTX_REG;
+			insn->a_reg = BPF_REG_A;
+			insn->x_reg = BPF_REG_CTX;
 			insn->off = fp->k;
 			break;
 
-- 
cgit 


From 9da93f9b7cdf8ab28da6b364cdc1fafc8670b4dc Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@sandeen.net>
Date: Mon, 5 May 2014 17:25:50 +1000
Subject: xfs: fix Q_XQUOTARM ioctl

The Q_XQUOTARM quotactl was not working properly, because
we weren't passing around proper flags.  The xfs_fs_set_xstate()
ioctl handler used the same flags for Q_XQUOTAON/OFF as
well as for Q_XQUOTARM, but Q_XQUOTAON/OFF look for
XFS_UQUOTA_ACCT, XFS_UQUOTA_ENFD, XFS_GQUOTA_ACCT etc,
i.e. quota type + state, while Q_XQUOTARM looks only for
the type of quota, i.e. XFS_DQ_USER, XFS_DQ_GROUP etc.

Unfortunately these flag spaces overlap a bit, so we
got semi-random results for Q_XQUOTARM; i.e. the value
for XFS_DQ_USER == XFS_UQUOTA_ACCT, etc.  yeargh.

Add a new quotactl op vector specifically for the QUOTARM
operation, since it operates with a different flag space.

This has been broken more or less forever, AFAICT.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/quota/quota.c      | 14 +++++++++++++-
 fs/xfs/xfs_quotaops.c | 29 +++++++++++++++++++++++++----
 include/linux/quota.h |  1 +
 3 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2b363e23f36e..ff3f0b3cfdb3 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -278,6 +278,17 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 	return ret;
 }
 
+static int quota_rmxquota(struct super_block *sb, void __user *addr)
+{
+	__u32 flags;
+
+	if (copy_from_user(&flags, addr, sizeof(flags)))
+		return -EFAULT;
+	if (!sb->s_qcop->rm_xquota)
+		return -ENOSYS;
+	return sb->s_qcop->rm_xquota(sb, flags);
+}
+
 /* Copy parameters and call proper function */
 static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		       void __user *addr, struct path *path)
@@ -316,8 +327,9 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 		return sb->s_qcop->quota_sync(sb, type);
 	case Q_XQUOTAON:
 	case Q_XQUOTAOFF:
-	case Q_XQUOTARM:
 		return quota_setxstate(sb, cmd, addr);
+	case Q_XQUOTARM:
+		return quota_rmxquota(sb, addr);
 	case Q_XGETQSTAT:
 		return quota_getxstate(sb, addr);
 	case Q_XGETQSTATV:
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index af33cafe69b6..2ad1b9822e92 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -100,15 +100,35 @@ xfs_fs_set_xstate(
 		if (!XFS_IS_QUOTA_ON(mp))
 			return -EINVAL;
 		return -xfs_qm_scall_quotaoff(mp, flags);
-	case Q_XQUOTARM:
-		if (XFS_IS_QUOTA_ON(mp))
-			return -EINVAL;
-		return -xfs_qm_scall_trunc_qfiles(mp, flags);
 	}
 
 	return -EINVAL;
 }
 
+STATIC int
+xfs_fs_rm_xquota(
+	struct super_block	*sb,
+	unsigned int		uflags)
+{
+	struct xfs_mount	*mp = XFS_M(sb);
+	unsigned int		flags = 0;
+	
+	if (sb->s_flags & MS_RDONLY)
+		return -EROFS;
+
+	if (XFS_IS_QUOTA_ON(mp))
+		return -EINVAL;
+
+	if (uflags & FS_USER_QUOTA)
+		flags |= XFS_DQ_USER;
+	if (uflags & FS_GROUP_QUOTA)
+		flags |= XFS_DQ_GROUP;
+	if (uflags & FS_USER_QUOTA)
+		flags |= XFS_DQ_PROJ;
+
+	return -xfs_qm_scall_trunc_qfiles(mp, flags);
+}	
+
 STATIC int
 xfs_fs_get_dqblk(
 	struct super_block	*sb,
@@ -149,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = {
 	.get_xstatev		= xfs_fs_get_xstatev,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
+	.rm_xquota		= xfs_fs_rm_xquota,
 	.get_dqblk		= xfs_fs_get_dqblk,
 	.set_dqblk		= xfs_fs_set_dqblk,
 };
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cc7494a35429..0f3c5d38da1f 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -329,6 +329,7 @@ struct quotactl_ops {
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
 	int (*get_xstatev)(struct super_block *, struct fs_quota_statv *);
+	int (*rm_xquota)(struct super_block *, unsigned int);
 };
 
 struct quota_format_type {
-- 
cgit 


From 5d02edfc3957446fd625c0b018e14c6631a791f4 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Sat, 19 Apr 2014 00:22:00 +0100
Subject: iio: hid-sensors: Convert units and exponent

HID sensor hub specify a default unit and alternative units. This
along with unit exponent can be used adjust scale. This change
change HID sensor data units to IIO defined units for each
sensor type. So in this way user space can use a simply use:
"(data + offset) * scale" to get final result.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 .../iio/common/hid-sensors/hid-sensor-attributes.c | 114 +++++++++++++++++++++
 include/linux/hid-sensor-hub.h                     |   4 +
 2 files changed, 118 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index 75b54730a963..e61b1faa1e06 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -26,6 +26,40 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
 
+struct {
+	u32 usage_id;
+	int unit; /* 0 for default others from HID sensor spec */
+	int scale_val0; /* scale, whole number */
+	int scale_val1; /* scale, fraction in micros */
+} static unit_conversion[] = {
+	{HID_USAGE_SENSOR_ACCEL_3D, 0, 9, 806650},
+	{HID_USAGE_SENSOR_ACCEL_3D,
+		HID_USAGE_SENSOR_UNITS_METERS_PER_SEC_SQRD, 1, 0},
+	{HID_USAGE_SENSOR_ACCEL_3D,
+		HID_USAGE_SENSOR_UNITS_G, 9, 806650},
+
+	{HID_USAGE_SENSOR_GYRO_3D, 0, 0, 17453},
+	{HID_USAGE_SENSOR_GYRO_3D,
+		HID_USAGE_SENSOR_UNITS_RADIANS_PER_SECOND, 1, 0},
+	{HID_USAGE_SENSOR_GYRO_3D,
+		HID_USAGE_SENSOR_UNITS_DEGREES_PER_SECOND, 0, 17453},
+
+	{HID_USAGE_SENSOR_COMPASS_3D, 0, 0, 1000},
+	{HID_USAGE_SENSOR_COMPASS_3D, HID_USAGE_SENSOR_UNITS_GAUSS, 1, 0},
+
+	{HID_USAGE_SENSOR_INCLINOMETER_3D, 0, 0, 17453},
+	{HID_USAGE_SENSOR_INCLINOMETER_3D,
+		HID_USAGE_SENSOR_UNITS_DEGREES, 0, 17453},
+	{HID_USAGE_SENSOR_INCLINOMETER_3D,
+		HID_USAGE_SENSOR_UNITS_RADIANS, 1, 0},
+
+	{HID_USAGE_SENSOR_ALS, 0, 1, 0},
+	{HID_USAGE_SENSOR_ALS, HID_USAGE_SENSOR_UNITS_LUX, 1, 0},
+
+	{HID_USAGE_SENSOR_PRESSURE, 0, 100000, 0},
+	{HID_USAGE_SENSOR_PRESSURE, HID_USAGE_SENSOR_UNITS_PASCAL, 1, 0},
+};
+
 static int pow_10(unsigned power)
 {
 	int i;
@@ -209,6 +243,86 @@ int hid_sensor_write_raw_hyst_value(struct hid_sensor_common *st,
 }
 EXPORT_SYMBOL(hid_sensor_write_raw_hyst_value);
 
+/*
+ * This fuction applies the unit exponent to the scale.
+ * For example:
+ * 9.806650 ->exp:2-> val0[980]val1[665000]
+ * 9.000806 ->exp:2-> val0[900]val1[80600]
+ * 0.174535 ->exp:2-> val0[17]val1[453500]
+ * 1.001745 ->exp:0-> val0[1]val1[1745]
+ * 1.001745 ->exp:2-> val0[100]val1[174500]
+ * 1.001745 ->exp:4-> val0[10017]val1[450000]
+ * 9.806650 ->exp:-2-> val0[0]val1[98066]
+ */
+static void adjust_exponent_micro(int *val0, int *val1, int scale0,
+				  int scale1, int exp)
+{
+	int i;
+	int x;
+	int res;
+	int rem;
+
+	if (exp > 0) {
+		*val0 = scale0 * pow_10(exp);
+		res = 0;
+		if (exp > 6) {
+			*val1 = 0;
+			return;
+		}
+		for (i = 0; i < exp; ++i) {
+			x = scale1 / pow_10(5 - i);
+			res += (pow_10(exp - 1 - i) * x);
+			scale1 = scale1 % pow_10(5 - i);
+		}
+		*val0 += res;
+			*val1 = scale1 * pow_10(exp);
+	} else if (exp < 0) {
+		exp = abs(exp);
+		if (exp > 6) {
+			*val0 = *val1 = 0;
+			return;
+		}
+		*val0 = scale0 / pow_10(exp);
+		rem = scale0 % pow_10(exp);
+		res = 0;
+		for (i = 0; i < (6 - exp); ++i) {
+			x = scale1 / pow_10(5 - i);
+			res += (pow_10(5 - exp - i) * x);
+			scale1 = scale1 % pow_10(5 - i);
+		}
+		*val1 = rem * pow_10(6 - exp) + res;
+	} else {
+		*val0 = scale0;
+		*val1 = scale1;
+	}
+}
+
+int hid_sensor_format_scale(u32 usage_id,
+			struct hid_sensor_hub_attribute_info *attr_info,
+			int *val0, int *val1)
+{
+	int i;
+	int exp;
+
+	*val0 = 1;
+	*val1 = 0;
+
+	for (i = 0; ARRAY_SIZE(unit_conversion); ++i) {
+		if (unit_conversion[i].usage_id == usage_id &&
+			unit_conversion[i].unit == attr_info->units) {
+			exp  = hid_sensor_convert_exponent(
+						attr_info->unit_expo);
+			adjust_exponent_micro(val0, val1,
+					unit_conversion[i].scale_val0,
+					unit_conversion[i].scale_val1, exp);
+			break;
+		}
+	}
+
+	return IIO_VAL_INT_PLUS_MICRO;
+}
+EXPORT_SYMBOL(hid_sensor_format_scale);
+
 int hid_sensor_parse_common_attributes(struct hid_sensor_hub_device *hsdev,
 					u32 usage_id,
 					struct hid_sensor_common *st)
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index b70cfd7ff29c..89626b23c246 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -223,4 +223,8 @@ int hid_sensor_read_samp_freq_value(struct hid_sensor_common *st,
 int hid_sensor_get_usage_index(struct hid_sensor_hub_device *hsdev,
 				u32 report_id, int field_index, u32 usage_id);
 
+int hid_sensor_format_scale(u32 usage_id,
+			    struct hid_sensor_hub_attribute_info *attr_info,
+			    int *val0, int *val1);
+
 #endif
-- 
cgit 


From 9030924510a9e7d4b7d218749533840075879f2f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Sat, 19 Apr 2014 00:22:00 +0100
Subject: iio: hid-sensors: Add api to get poll value

Added interface to get poll value in milli-seconds. This value is
changed by changing sampling frequency. This API allows clients
to wait for at least some poll milli seconds before reading a new sample.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 .../iio/common/hid-sensors/hid-sensor-attributes.c   | 20 ++++++++++++++++++++
 include/linux/hid-sensor-hub.h                       |  2 ++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
index e61b1faa1e06..372964635ccf 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-attributes.c
@@ -147,6 +147,26 @@ static u32 convert_to_vtf_format(int size, int exp, int val1, int val2)
 	return value;
 }
 
+s32 hid_sensor_read_poll_value(struct hid_sensor_common *st)
+{
+	s32 value = 0;
+	int ret;
+
+	ret = sensor_hub_get_feature(st->hsdev,
+		st->poll.report_id,
+		st->poll.index, &value);
+
+	if (ret < 0 || value < 0) {
+		return -EINVAL;
+	} else {
+		if (st->poll.units == HID_USAGE_SENSOR_UNITS_SECOND)
+			value = value * 1000;
+	}
+
+	return value;
+}
+EXPORT_SYMBOL(hid_sensor_read_poll_value);
+
 int hid_sensor_read_samp_freq_value(struct hid_sensor_common *st,
 				int *val1, int *val2)
 {
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 89626b23c246..88d8d636a68f 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -227,4 +227,6 @@ int hid_sensor_format_scale(u32 usage_id,
 			    struct hid_sensor_hub_attribute_info *attr_info,
 			    int *val0, int *val1);
 
+s32 hid_sensor_read_poll_value(struct hid_sensor_common *st);
+
 #endif
-- 
cgit 


From 56ff6be608659ac06d4e3cc5827476efa29d610f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Sat, 19 Apr 2014 00:22:00 +0100
Subject: iio: hid-sensors: Add API to power on/off

Added an API to allow client drivers to turn ON and OFF sensors for
quick read. Added data_read as counting varaible instead of boolean,
so that sensor is powered off only when last user released it.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/hid-sensor-accel-3d.c             |  7 +++----
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c | 17 +++++++++++++----
 drivers/iio/common/hid-sensors/hid-sensor-trigger.h |  1 +
 drivers/iio/gyro/hid-sensor-gyro-3d.c               |  7 +++----
 drivers/iio/light/hid-sensor-als.c                  |  7 +++----
 drivers/iio/light/hid-sensor-prox.c                 |  7 +++----
 drivers/iio/magnetometer/hid-sensor-magn-3d.c       |  7 +++----
 drivers/iio/orientation/hid-sensor-incl-3d.c        |  7 +++----
 drivers/iio/orientation/hid-sensor-rotation.c       |  8 +++-----
 drivers/iio/pressure/hid-sensor-press.c             |  7 +++----
 include/linux/hid-sensor-hub.h                      |  2 +-
 11 files changed, 39 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/hid-sensor-accel-3d.c b/drivers/iio/accel/hid-sensor-accel-3d.c
index ca50a91752d8..cf61c87a47e9 100644
--- a/drivers/iio/accel/hid-sensor-accel-3d.c
+++ b/drivers/iio/accel/hid-sensor-accel-3d.c
@@ -201,9 +201,8 @@ static int accel_3d_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct accel_3d_state *accel_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "accel_3d_proc_event [%d]\n",
-				accel_state->common_attributes.data_ready);
-	if (accel_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "accel_3d_proc_event\n");
+	if (atomic_read(&accel_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				accel_state->accel_val,
 				sizeof(accel_state->accel_val));
@@ -342,7 +341,7 @@ static int hid_accel_3d_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	accel_state->common_attributes.data_ready = false;
+	atomic_set(&accel_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 					&accel_state->common_attributes);
 	if (ret < 0) {
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index dbefbdaf7cd1..73282cee0c81 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -28,16 +28,17 @@
 #include <linux/iio/sysfs.h>
 #include "hid-sensor-trigger.h"
 
-static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
-						bool state)
+int hid_sensor_power_state(struct hid_sensor_common *st, bool state)
 {
-	struct hid_sensor_common *st = iio_trigger_get_drvdata(trig);
 	int state_val;
 	int report_val;
 
 	if (state) {
 		if (sensor_hub_device_open(st->hsdev))
 			return -EIO;
+
+		atomic_inc(&st->data_ready);
+
 		state_val = hid_sensor_get_usage_index(st->hsdev,
 			st->power_state.report_id,
 			st->power_state.index,
@@ -47,6 +48,8 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
 			st->report_state.index,
 			HID_USAGE_SENSOR_PROP_REPORTING_STATE_ALL_EVENTS_ENUM);
 	} else {
+		if (!atomic_dec_and_test(&st->data_ready))
+			return 0;
 		sensor_hub_device_close(st->hsdev);
 		state_val = hid_sensor_get_usage_index(st->hsdev,
 			st->power_state.report_id,
@@ -57,7 +60,6 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
 			st->report_state.index,
 			HID_USAGE_SENSOR_PROP_REPORTING_STATE_NO_EVENTS_ENUM);
 	}
-	st->data_ready = state;
 
 	if (state_val >= 0) {
 		state_val += st->power_state.logical_minimum;
@@ -75,6 +77,13 @@ static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
 
 	return 0;
 }
+EXPORT_SYMBOL(hid_sensor_power_state);
+
+static int hid_sensor_data_rdy_trigger_set_state(struct iio_trigger *trig,
+						bool state)
+{
+	return hid_sensor_power_state(iio_trigger_get_drvdata(trig), state);
+}
 
 void hid_sensor_remove_trigger(struct hid_sensor_common *attrb)
 {
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
index ca02f7811aa8..0f8e78c249d3 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.h
@@ -22,5 +22,6 @@
 int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 				struct hid_sensor_common *attrb);
 void hid_sensor_remove_trigger(struct hid_sensor_common *attrb);
+int hid_sensor_power_state(struct hid_sensor_common *st, bool state);
 
 #endif
diff --git a/drivers/iio/gyro/hid-sensor-gyro-3d.c b/drivers/iio/gyro/hid-sensor-gyro-3d.c
index 53ac06040fbe..392c30b8cd74 100644
--- a/drivers/iio/gyro/hid-sensor-gyro-3d.c
+++ b/drivers/iio/gyro/hid-sensor-gyro-3d.c
@@ -201,9 +201,8 @@ static int gyro_3d_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct gyro_3d_state *gyro_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "gyro_3d_proc_event [%d]\n",
-				gyro_state->common_attributes.data_ready);
-	if (gyro_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "gyro_3d_proc_event\n");
+	if (atomic_read(&gyro_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				gyro_state->gyro_val,
 				sizeof(gyro_state->gyro_val));
@@ -339,7 +338,7 @@ static int hid_gyro_3d_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	gyro_state->common_attributes.data_ready = false;
+	atomic_set(&gyro_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 					&gyro_state->common_attributes);
 	if (ret < 0) {
diff --git a/drivers/iio/light/hid-sensor-als.c b/drivers/iio/light/hid-sensor-als.c
index 39b50be9d456..e124b395f320 100644
--- a/drivers/iio/light/hid-sensor-als.c
+++ b/drivers/iio/light/hid-sensor-als.c
@@ -180,9 +180,8 @@ static int als_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct als_state *als_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "als_proc_event [%d]\n",
-				als_state->common_attributes.data_ready);
-	if (als_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "als_proc_event\n");
+	if (atomic_read(&als_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				&als_state->illum,
 				sizeof(als_state->illum));
@@ -305,7 +304,7 @@ static int hid_als_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	als_state->common_attributes.data_ready = false;
+	atomic_set(&als_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 				&als_state->common_attributes);
 	if (ret < 0) {
diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c
index 1894ab196f97..07e98ec8e9f1 100644
--- a/drivers/iio/light/hid-sensor-prox.c
+++ b/drivers/iio/light/hid-sensor-prox.c
@@ -176,9 +176,8 @@ static int prox_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct prox_state *prox_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "prox_proc_event [%d]\n",
-				prox_state->common_attributes.data_ready);
-	if (prox_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "prox_proc_event\n");
+	if (atomic_read(&prox_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				&prox_state->human_presence,
 				sizeof(prox_state->human_presence));
@@ -297,7 +296,7 @@ static int hid_prox_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	prox_state->common_attributes.data_ready = false;
+	atomic_set(&prox_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 				&prox_state->common_attributes);
 	if (ret) {
diff --git a/drivers/iio/magnetometer/hid-sensor-magn-3d.c b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
index 131ced0dcb1c..54eea6a17061 100644
--- a/drivers/iio/magnetometer/hid-sensor-magn-3d.c
+++ b/drivers/iio/magnetometer/hid-sensor-magn-3d.c
@@ -202,9 +202,8 @@ static int magn_3d_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct magn_3d_state *magn_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "magn_3d_proc_event [%d]\n",
-				magn_state->common_attributes.data_ready);
-	if (magn_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "magn_3d_proc_event\n");
+	if (atomic_read(&magn_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				magn_state->magn_val,
 				sizeof(magn_state->magn_val));
@@ -343,7 +342,7 @@ static int hid_magn_3d_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	magn_state->common_attributes.data_ready = false;
+	atomic_set(&magn_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 					&magn_state->common_attributes);
 	if (ret < 0) {
diff --git a/drivers/iio/orientation/hid-sensor-incl-3d.c b/drivers/iio/orientation/hid-sensor-incl-3d.c
index f0c465cc192a..bf11678dd04e 100644
--- a/drivers/iio/orientation/hid-sensor-incl-3d.c
+++ b/drivers/iio/orientation/hid-sensor-incl-3d.c
@@ -200,9 +200,8 @@ static int incl_3d_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct incl_3d_state *incl_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "incl_3d_proc_event [%d]\n",
-				incl_state->common_attributes.data_ready);
-	if (incl_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "incl_3d_proc_event\n");
+	if (atomic_read(&incl_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				(u8 *)incl_state->incl_val,
 				sizeof(incl_state->incl_val));
@@ -358,7 +357,7 @@ static int hid_incl_3d_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	incl_state->common_attributes.data_ready = false;
+	atomic_set(&incl_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 					&incl_state->common_attributes);
 	if (ret) {
diff --git a/drivers/iio/orientation/hid-sensor-rotation.c b/drivers/iio/orientation/hid-sensor-rotation.c
index 51387bbc1ce1..dccf848e8b0f 100644
--- a/drivers/iio/orientation/hid-sensor-rotation.c
+++ b/drivers/iio/orientation/hid-sensor-rotation.c
@@ -145,10 +145,8 @@ static int dev_rot_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct dev_rot_state *rot_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "dev_rot_proc_event [%d]\n",
-				rot_state->common_attributes.data_ready);
-
-	if (rot_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "dev_rot_proc_event\n");
+	if (atomic_read(&rot_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				(u8 *)rot_state->sampled_vals,
 				sizeof(rot_state->sampled_vals));
@@ -272,7 +270,7 @@ static int hid_dev_rot_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		return ret;
 	}
-	rot_state->common_attributes.data_ready = false;
+	atomic_set(&rot_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 					&rot_state->common_attributes);
 	if (ret) {
diff --git a/drivers/iio/pressure/hid-sensor-press.c b/drivers/iio/pressure/hid-sensor-press.c
index ff69da4443b8..39df50c45dab 100644
--- a/drivers/iio/pressure/hid-sensor-press.c
+++ b/drivers/iio/pressure/hid-sensor-press.c
@@ -180,9 +180,8 @@ static int press_proc_event(struct hid_sensor_hub_device *hsdev,
 	struct iio_dev *indio_dev = platform_get_drvdata(priv);
 	struct press_state *press_state = iio_priv(indio_dev);
 
-	dev_dbg(&indio_dev->dev, "press_proc_event [%d]\n",
-				press_state->common_attributes.data_ready);
-	if (press_state->common_attributes.data_ready)
+	dev_dbg(&indio_dev->dev, "press_proc_event\n");
+	if (atomic_read(&press_state->common_attributes.data_ready))
 		hid_sensor_push_data(indio_dev,
 				&press_state->press_data,
 				sizeof(press_state->press_data));
@@ -307,7 +306,7 @@ static int hid_press_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to initialize trigger buffer\n");
 		goto error_free_dev_mem;
 	}
-	press_state->common_attributes.data_ready = false;
+	atomic_set(&press_state->common_attributes.data_ready, 0);
 	ret = hid_sensor_setup_trigger(indio_dev, name,
 				&press_state->common_attributes);
 	if (ret) {
diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h
index 88d8d636a68f..51f7ccadf923 100644
--- a/include/linux/hid-sensor-hub.h
+++ b/include/linux/hid-sensor-hub.h
@@ -189,7 +189,7 @@ struct hid_sensor_common {
 	struct hid_sensor_hub_device *hsdev;
 	struct platform_device *pdev;
 	unsigned usage_id;
-	bool data_ready;
+	atomic_t data_ready;
 	struct iio_trigger *trigger;
 	struct hid_sensor_hub_attribute_info poll;
 	struct hid_sensor_hub_attribute_info report_state;
-- 
cgit 


From 8c1eb25326552bfe6912ea160dfb3de0207a7550 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@linux-m68k.org>
Date: Tue, 11 Mar 2014 11:23:50 +0100
Subject: of: Spelling s/anonymouns/anonymous/

Signed-off-by: Geert Uytterhoeven <geert+renesas@linux-m68k.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/of_platform.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 05cb4a928252..8cdd53bf1114 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -37,7 +37,7 @@
  * Note: Using an auxdata lookup table should be considered a last resort when
  * converting a platform to use the DT.  Normally the automatically generated
  * device name will not matter, and drivers should obtain data from the device
- * node instead of from an anonymouns platform_data pointer.
+ * node instead of from an anonymous platform_data pointer.
  */
 struct of_dev_auxdata {
 	char *compatible;
-- 
cgit 


From 719d93cd5f5c5c8775b7a38192069e8e1d1ac46e Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 16 Jan 2014 13:44:20 +0100
Subject: kvm/irqchip: Speed up KVM_SET_GSI_ROUTING

When starting lots of dataplane devices the bootup takes very long on
Christian's s390 with irqfd patches. With larger setups he is even
able to trigger some timeouts in some components.  Turns out that the
KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec)
when having multiple CPUs.  This is caused by the  synchronize_rcu and
the HZ=100 of s390.  By changing the code to use a private srcu we can
speed things up.  This patch reduces the boot time till mounting root
from 8 to 2 seconds on my s390 guest with 100 disks.

Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu
are fine because they do not have lockdep checks (hlist_for_each_entry_rcu
uses rcu_dereference_raw rather than rcu_dereference, and write-sides
do not do rcu lockdep at all).

Note that we're hardly relying on the "sleepable" part of srcu.  We just
want SRCU's faster detection of grace periods.

Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS
and RR.  The difference between results "before" and "after" the patch
has mean -0.2% and standard deviation 0.6%.  Using a paired t-test on the
data points says that there is a 2.5% probability that the patch is the
cause of the performance difference (rather than a random fluctuation).

(Restricting the t-test to RR, which is the most likely to be affected,
changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8%
probability that the numbers actually say something about the patch.
The probability increases mostly because there are fewer data points).

Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # s390
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/eventfd.c       | 25 +++++++++++++++----------
 virt/kvm/irq_comm.c      | 17 +++++++++--------
 virt/kvm/irqchip.c       | 31 ++++++++++++++++---------------
 virt/kvm/kvm_main.c      | 16 ++++++++++------
 5 files changed, 51 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1e125b055327..970c68197c69 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -370,6 +370,7 @@ struct kvm {
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
 	struct srcu_struct srcu;
+	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	u32 bsp_vcpu_id;
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 912ec5a95e2c..20c3af7692c5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
 #include <linux/list.h>
 #include <linux/eventfd.h>
 #include <linux/kernel.h>
+#include <linux/srcu.h>
 #include <linux/slab.h>
 
 #include "iodev.h"
@@ -118,19 +119,22 @@ static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
 	struct _irqfd_resampler *resampler;
+	struct kvm *kvm;
 	struct _irqfd *irqfd;
+	int idx;
 
 	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	kvm = resampler->kvm;
 
-	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
 		    resampler->notifier.gsi, 0, false);
 
-	rcu_read_lock();
+	idx = srcu_read_lock(&kvm->irq_srcu);
 
 	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
 		eventfd_signal(irqfd->resamplefd, 1);
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 	mutex_lock(&kvm->irqfds.resampler_lock);
 
 	list_del_rcu(&irqfd->resampler_link);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 
 	if (list_empty(&resampler->list)) {
 		list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 	unsigned long flags = (unsigned long)key;
 	struct kvm_kernel_irq_routing_entry *irq;
 	struct kvm *kvm = irqfd->kvm;
+	int idx;
 
 	if (flags & POLLIN) {
-		rcu_read_lock();
-		irq = rcu_dereference(irqfd->irq_entry);
+		idx = srcu_read_lock(&kvm->irq_srcu);
+		irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
 		/* An event has been signaled, inject an interrupt */
 		if (irq)
 			kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
 		else
 			schedule_work(&irqfd->inject);
-		rcu_read_unlock();
+		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
 
 	if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 		}
 
 		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
-		synchronize_rcu();
+		synchronize_srcu(&kvm->irq_srcu);
 
 		mutex_unlock(&kvm->irqfds.resampler_lock);
 	}
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 			 * another thread calls kvm_irq_routing_update before
 			 * we flush workqueue below (we synchronize with
 			 * kvm_irq_routing_update using irqfds.lock).
-			 * It is paired with synchronize_rcu done by caller
+			 * It is paired with synchronize_srcu done by caller
 			 * of that function.
 			 */
 			rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
 
 /*
  * Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  */
 void kvm_irq_routing_update(struct kvm *kvm,
 			    struct kvm_irq_routing_table *irq_rt)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b4473a96..ced4a542a031 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	struct kvm_kernel_irq_routing_entry *e;
 	int ret = -EINVAL;
 	struct kvm_irq_routing_table *irq_rt;
+	int idx;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
 
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 	 * Since there's no easy way to do this, we only support injecting MSI
 	 * which is limited to 1:1 GSI mapping.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link) {
 			if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
 				ret = -EWOULDBLOCK;
 			break;
 		}
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 	return ret;
 }
 
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_rcu(&kimn->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 			     bool mask)
 {
 	struct kvm_irq_mask_notifier *kimn;
-	int gsi;
+	int idx, gsi;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4a8f6c..b43c275775cd 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
+#include <linux/srcu.h>
 #include <linux/export.h>
 #include <trace/events/kvm.h>
 #include "irq.h"
@@ -33,19 +34,19 @@
 bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi) {
-				rcu_read_unlock();
+				srcu_read_unlock(&kvm->irq_srcu, idx);
 				return true;
 			}
 
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	return false;
 }
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 {
 	struct kvm_irq_ack_notifier *kian;
-	int gsi;
+	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
 
-	rcu_read_lock();
-	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
 	if (gsi != -1)
 		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
 					 link)
 			if (kian->gsi == gsi)
 				kian->irq_acked(kian);
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
-	synchronize_rcu();
+	synchronize_srcu(&kvm->irq_srcu);
 #ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
 #endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 		bool line_status)
 {
 	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
-	int ret = -1, i = 0;
+	int ret = -1, i = 0, idx;
 	struct kvm_irq_routing_table *irq_rt;
 
 	trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
 	 * IOAPIC.  So set the bit in both. The guest will ignore
 	 * writes to the unused one.
 	 */
-	rcu_read_lock();
-	irq_rt = rcu_dereference(kvm->irq_routing);
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
 	if (irq < irq_rt->nr_rt_entries)
 		hlist_for_each_entry(e, &irq_rt->map[irq], link)
 			irq_set[i++] = *e;
-	rcu_read_unlock();
+	srcu_read_unlock(&kvm->irq_srcu, idx);
 
 	while(i--) {
 		int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm, new);
 	mutex_unlock(&kvm->irq_lock);
 
-	synchronize_rcu();
+	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
 	r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fa70c6e642b4..95b4c2b3906a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -457,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
 
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 	r = hardware_enable_all();
 	if (r)
-		goto out_err_nodisable;
+		goto out_err_no_disable;
 
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -473,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	r = -ENOMEM;
 	kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
 	if (!kvm->memslots)
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
 	kvm_init_memslots_id(kvm);
 	if (init_srcu_struct(&kvm->srcu))
-		goto out_err_nosrcu;
+		goto out_err_no_srcu;
+	if (init_srcu_struct(&kvm->irq_srcu))
+		goto out_err_no_irq_srcu;
 	for (i = 0; i < KVM_NR_BUSES; i++) {
 		kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
 					GFP_KERNEL);
@@ -505,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	return kvm;
 
 out_err:
+	cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
 	cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
 	hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
 	kfree(kvm->memslots);
-- 
cgit 


From 8757ad65d30f009fe0beeb2d70d3cd834cb998f2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Fri, 11 Apr 2014 10:37:39 -0400
Subject: NVMe: Update copyright headers

Make the copyright dates accurate and remove the final paragraph that
includes the address of the FSF.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 4 ----
 drivers/block/nvme-scsi.c | 6 +-----
 include/linux/nvme.h      | 6 +-----
 include/uapi/linux/nvme.h | 6 +-----
 4 files changed, 3 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 7c64fa756cce..eacd64e36be3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/nvme.h>
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 2c3f5be06da1..da3b252dea6f 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -1,6 +1,6 @@
 /*
  * NVM Express device driver
- * Copyright (c) 2011, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 /*
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index a50173ca1d72..cfd084cab22b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _LINUX_NVME_H
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index 096fe1c6f83d..ad9014e49693 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -1,6 +1,6 @@
 /*
  * Definitions for the NVM Express interface
- * Copyright (c) 2011-2013, Intel Corporation.
+ * Copyright (c) 2011-2014, Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -10,10 +10,6 @@
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #ifndef _UAPI_LINUX_NVME_H
-- 
cgit 


From a7d2ce2832d84e0182585f63bf96ca7323b3aee7 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 29 Apr 2014 11:41:28 -0600
Subject: NVMe: Configure support for block flush

This configures an nvme request_queue as flush capable if the device
has a volatile write cache present.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 3 +++
 include/linux/nvme.h      | 1 +
 include/uapi/linux/nvme.h | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 025dd4cad4a6..e7c4fdb6a651 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -1897,6 +1897,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
+		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 
 	disk->major = nvme_major;
 	disk->first_minor = 0;
@@ -2201,6 +2203,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	nn = le32_to_cpup(&ctrl->nn);
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
+	dev->vwc = ctrl->vwc;
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index cfd084cab22b..6266373d3147 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -99,6 +99,7 @@ struct nvme_dev {
 	u32 stripe_size;
 	u16 oncs;
 	u16 abort_limit;
+	u8 vwc;
 	u8 initialized;
 };
 
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index ad9014e49693..f090336d5bad 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -73,6 +73,7 @@ enum {
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
 	NVME_CTRL_ONCS_WRITE_UNCORRECTABLE	= 1 << 1,
 	NVME_CTRL_ONCS_DSM			= 1 << 2,
+	NVME_CTRL_VWC_PRESENT			= 1 << 0,
 };
 
 struct nvme_lbaf {
-- 
cgit 


From 53562be74bd06bbe74d2acf3caca5398f8eeb160 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 29 Apr 2014 11:41:29 -0600
Subject: NVMe: Flush with data support

It is possible a filesystem may send a flush flagged bio with write
data. There is no such composite NVMe command, so the driver sends flush
and write separately.

The device is allowed to execute these commands in any order, so it was
possible the driver ends the bio after the write completes, but while the
flush is still active. We don't want to let a filesystem believe flush
succeeded before it really has; this could cause data corruption on a
power loss between these events. To fix, this patch splits the flush
and write into chained bios.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 44 ++++++++++++++++++++++++--------------------
 include/linux/nvme.h      |  1 -
 2 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index e7c4fdb6a651..cd8a8bc711cc 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -197,16 +197,13 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
-#define CMD_CTX_FLUSH		(0x318 + CMD_CTX_BASE)
-#define CMD_CTX_ABORT		(0x31C + CMD_CTX_BASE)
+#define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	if (ctx == CMD_CTX_CANCELLED)
 		return;
-	if (ctx == CMD_CTX_FLUSH)
-		return;
 	if (ctx == CMD_CTX_ABORT) {
 		++nvmeq->dev->abort_limit;
 		return;
@@ -629,16 +626,6 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	return 0;
 }
 
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns)
-{
-	int cmdid = alloc_cmdid(nvmeq, (void *)CMD_CTX_FLUSH,
-					special_completion, NVME_IO_TIMEOUT);
-	if (unlikely(cmdid < 0))
-		return cmdid;
-
-	return nvme_submit_flush(nvmeq, ns, cmdid);
-}
-
 static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 {
 	struct bio *bio = iod->private;
@@ -654,7 +641,7 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 
 	if (bio->bi_rw & REQ_DISCARD)
 		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-	if ((bio->bi_rw & REQ_FLUSH) && !iod->nents)
+	if (bio->bi_rw & REQ_FLUSH)
 		return nvme_submit_flush(nvmeq, ns, cmdid);
 
 	control = 0;
@@ -688,6 +675,26 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 	return 0;
 }
 
+static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
+{
+	struct bio *split = bio_clone(bio, GFP_ATOMIC);
+	if (!split)
+		return -ENOMEM;
+
+	split->bi_iter.bi_size = 0;
+	split->bi_phys_segments = 0;
+	bio->bi_rw &= ~REQ_FLUSH;
+	bio_chain(split, bio);
+
+	if (!waitqueue_active(&nvmeq->sq_full))
+		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
+	bio_list_add(&nvmeq->sq_cong, split);
+	bio_list_add(&nvmeq->sq_cong, bio);
+	wake_up_process(nvme_thread);
+
+	return 0;
+}
+
 /*
  * Called with local interrupts disabled and the q_lock held.  May not sleep.
  */
@@ -698,11 +705,8 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	int psegs = bio_phys_segments(ns->queue, bio);
 	int result;
 
-	if ((bio->bi_rw & REQ_FLUSH) && psegs) {
-		result = nvme_submit_flush_data(nvmeq, ns);
-		if (result)
-			return result;
-	}
+	if ((bio->bi_rw & REQ_FLUSH) && psegs)
+		return nvme_split_flush_data(nvmeq, bio);
 
 	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
 	if (!iod)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 6266373d3147..1813cfdb7e80 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -156,7 +156,6 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
 int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
-int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
 int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
 							u32 *result);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
-- 
cgit 


From 76ba0aae673075c77a8b775e9133c8e8b1a44563 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 2 May 2014 16:29:18 -0700
Subject: net: Generalize checksum_init functions

Create a general __skb_checksum_validate function (actually a
macro) to subsume the various checksum_init functions. This
function can either init the checksum, or do the full validation
(logically checksum_init+skb_check_complete)-- a flag specifies
if full vaidation is performed. Also, there is a flag to the function
to indicate that zero checksums are allowed (to support optional
UDP checksums).

Added several stub functions for calling __skb_checksum_validate.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 08074a810164..3ca0dda5a42e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2741,6 +2741,99 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 	       0 : __skb_checksum_complete(skb);
 }
 
+/* Check if we need to perform checksum complete validation.
+ *
+ * Returns true if checksum complete is needed, false otherwise
+ * (either checksum is unnecessary or zero checksum is allowed).
+ */
+static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
+						  bool zero_okay,
+						  __sum16 check)
+{
+	if (skb_csum_unnecessary(skb)) {
+		return false;
+	} else if (zero_okay && !check) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		return false;
+	}
+
+	return true;
+}
+
+/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+ * in checksum_init.
+ */
+#define CHECKSUM_BREAK 76
+
+/* Validate (init) checksum based on checksum complete.
+ *
+ * Return values:
+ *   0: checksum is validated or try to in skb_checksum_complete. In the latter
+ *	case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
+ *	checksum is stored in skb->csum for use in __skb_checksum_complete
+ *   non-zero: value of invalid checksum
+ *
+ */
+static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
+						       bool complete,
+						       __wsum psum)
+{
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		if (!csum_fold(csum_add(psum, skb->csum))) {
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			return 0;
+		}
+	}
+
+	skb->csum = psum;
+
+	if (complete || skb->len <= CHECKSUM_BREAK)
+		return __skb_checksum_complete(skb);
+
+	return 0;
+}
+
+static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
+{
+	return 0;
+}
+
+/* Perform checksum validate (init). Note that this is a macro since we only
+ * want to calculate the pseudo header which is an input function if necessary.
+ * First we try to validate without any computation (checksum unnecessary) and
+ * then calculate based on checksum complete calling the function to compute
+ * pseudo header.
+ *
+ * Return values:
+ *   0: checksum is validated or try to in skb_checksum_complete
+ *   non-zero: value of invalid checksum
+ */
+#define __skb_checksum_validate(skb, proto, complete,			\
+				zero_okay, check, compute_pseudo)	\
+({									\
+	__sum16 __ret = 0;						\
+	if (__skb_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_checksum_validate_complete(skb,		\
+				complete, compute_pseudo(skb, proto));	\
+	__ret;								\
+})
+
+#define skb_checksum_init(skb, proto, compute_pseudo)			\
+	__skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)
+
+#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo)	\
+	__skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)
+
+#define skb_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)
+
+#define skb_checksum_validate_zero_check(skb, proto, check,		\
+					 compute_pseudo)		\
+	__skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo)
+
+#define skb_checksum_simple_validate(skb)				\
+	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
-- 
cgit 


From 8febcaa2aac184d7e729acb75e9c4b80b04ad1b9 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 24 Apr 2014 11:30:01 -0400
Subject: device: introduce per device dma_pfn_offset

On few architectures, there are few restrictions on DMAble area of system
RAM. That also means that devices needs to know about this restrictions so
that the dma_masks can be updated accordingly and dma address translation
helpers can add/subtract the dma offset.

In most of cases DMA addresses can be performed using offset value of
Bus address space relatively to physical address space as following:

PFN->DMA:  __pfn_to_phys(pfn + [-]dma_pfn_offset)
DMA->PFN:  __phys_to_pfn(dma_addr) + [-]dma_pfn_offset

So we introduce per device dma_pfn_offset which can be popullated
by architecture init code while creating the devices.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Olof Johansson <olof@lixom.net>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 233bbbeb768d..85a52d698f78 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -691,6 +691,7 @@ struct acpi_dev_node {
  * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
  * 		hardware supports 64-bit addresses for consistent allocations
  * 		such descriptors.
+ * @dma_pfn_offset: offset of DMA memory range relatively of RAM
  * @dma_parms:	A low level driver may set these to teach IOMMU code about
  * 		segment limitations.
  * @dma_pools:	Dma pools (if dma'ble device).
@@ -756,6 +757,7 @@ struct device {
 					     not all hardware supports
 					     64 bit addresses for consistent
 					     allocations such descriptors. */
+	unsigned long	dma_pfn_offset;
 
 	struct device_dma_parameters *dma_parms;
 
-- 
cgit 


From 18308c94723e162ed121942335bc186e66820a7a Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 24 Apr 2014 11:30:02 -0400
Subject: of: introduce of_dma_get_range() helper

The of_dma_get_range() allows to find "dma-range" property for
the specified device and parse it.
 dma-ranges format:
   DMA addr (dma_addr)          : naddr cells
   CPU addr (phys_addr_t)       : pna cells
   size                         : nsize cells

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Olof Johansson <olof@lixom.net>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 drivers/of/address.c       | 87 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_address.h |  8 +++++
 2 files changed, 95 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index cb4242a69cd5..c54baee87d93 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -721,3 +721,90 @@ void __iomem *of_iomap(struct device_node *np, int index)
 	return ioremap(res.start, resource_size(&res));
 }
 EXPORT_SYMBOL(of_iomap);
+
+/**
+ * of_dma_get_range - Get DMA range info
+ * @np:		device node to get DMA range info
+ * @dma_addr:	pointer to store initial DMA address of DMA range
+ * @paddr:	pointer to store initial CPU address of DMA range
+ * @size:	pointer to store size of DMA range
+ *
+ * Look in bottom up direction for the first "dma-ranges" property
+ * and parse it.
+ *  dma-ranges format:
+ *	DMA addr (dma_addr)	: naddr cells
+ *	CPU addr (phys_addr_t)	: pna cells
+ *	size			: nsize cells
+ *
+ * It returns -ENODEV if "dma-ranges" property was not found
+ * for this device in DT.
+ */
+int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size)
+{
+	struct device_node *node = of_node_get(np);
+	const __be32 *ranges = NULL;
+	int len, naddr, nsize, pna;
+	int ret = 0;
+	u64 dmaaddr;
+
+	if (!node)
+		return -EINVAL;
+
+	while (1) {
+		naddr = of_n_addr_cells(node);
+		nsize = of_n_size_cells(node);
+		node = of_get_next_parent(node);
+		if (!node)
+			break;
+
+		ranges = of_get_property(node, "dma-ranges", &len);
+
+		/* Ignore empty ranges, they imply no translation required */
+		if (ranges && len > 0)
+			break;
+
+		/*
+		 * At least empty ranges has to be defined for parent node if
+		 * DMA is supported
+		 */
+		if (!ranges)
+			break;
+	}
+
+	if (!ranges) {
+		pr_debug("%s: no dma-ranges found for node(%s)\n",
+			 __func__, np->full_name);
+		ret = -ENODEV;
+		goto out;
+	}
+
+	len /= sizeof(u32);
+
+	pna = of_n_addr_cells(node);
+
+	/* dma-ranges format:
+	 * DMA addr	: naddr cells
+	 * CPU addr	: pna cells
+	 * size		: nsize cells
+	 */
+	dmaaddr = of_read_number(ranges, naddr);
+	*paddr = of_translate_dma_address(np, ranges);
+	if (*paddr == OF_BAD_ADDR) {
+		pr_err("%s: translation of DMA address(%pad) to CPU address failed node(%s)\n",
+		       __func__, dma_addr, np->full_name);
+		ret = -EINVAL;
+		goto out;
+	}
+	*dma_addr = dmaaddr;
+
+	*size = of_read_number(ranges + naddr + pna, nsize);
+
+	pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n",
+		 *dma_addr, *paddr, *size);
+
+out:
+	of_node_put(node);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(of_dma_get_range);
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 5f6ed6b182b8..4d7b325af2ca 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -63,6 +63,8 @@ extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 extern struct of_pci_range *of_pci_range_parser_one(
 					struct of_pci_range_parser *parser,
 					struct of_pci_range *range);
+extern int of_dma_get_range(struct device_node *np, u64 *dma_addr,
+				u64 *paddr, u64 *size);
 #else /* CONFIG_OF_ADDRESS */
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
@@ -90,6 +92,12 @@ static inline struct of_pci_range *of_pci_range_parser_one(
 {
 	return NULL;
 }
+
+static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr,
+				u64 *paddr, u64 *size)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_OF_ADDRESS */
 
 #ifdef CONFIG_OF
-- 
cgit 


From 92ea637edea36e58236e3124f199161da6f5c5de Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 24 Apr 2014 11:30:03 -0400
Subject: of: introduce of_dma_is_coherent() helper

The of_dma_is_coherent() helper parses the given DT device
node to see if the "dma-coherent" property is supported and
returns true or false accordingly.

If the arch is always coherent or always noncoherent, then the default
DMA ops has to be specified accordingly.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Olof Johansson <olof@lixom.net>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/of/address.c       | 23 +++++++++++++++++++++++
 include/linux/of_address.h |  6 ++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index c54baee87d93..d244b2859aac 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -808,3 +808,26 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(of_dma_get_range);
+
+/**
+ * of_dma_is_coherent - Check if device is coherent
+ * @np:	device node
+ *
+ * It returns true if "dma-coherent" property was found
+ * for this device in DT.
+ */
+bool of_dma_is_coherent(struct device_node *np)
+{
+	struct device_node *node = of_node_get(np);
+
+	while (node) {
+		if (of_property_read_bool(node, "dma-coherent")) {
+			of_node_put(node);
+			return true;
+		}
+		node = of_get_next_parent(node);
+	}
+	of_node_put(node);
+	return false;
+}
+EXPORT_SYMBOL_GPL(of_dma_is_coherent);
\ No newline at end of file
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 4d7b325af2ca..839a3521b28e 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -65,6 +65,7 @@ extern struct of_pci_range *of_pci_range_parser_one(
 					struct of_pci_range *range);
 extern int of_dma_get_range(struct device_node *np, u64 *dma_addr,
 				u64 *paddr, u64 *size);
+extern bool of_dma_is_coherent(struct device_node *np);
 #else /* CONFIG_OF_ADDRESS */
 static inline struct device_node *of_find_matching_node_by_address(
 					struct device_node *from,
@@ -98,6 +99,11 @@ static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr,
 {
 	return -ENODEV;
 }
+
+static inline bool of_dma_is_coherent(struct device_node *np)
+{
+	return false;
+}
 #endif /* CONFIG_OF_ADDRESS */
 
 #ifdef CONFIG_OF
-- 
cgit 


From 95713978b0a2929b72933235bb07c0a793e71afa Mon Sep 17 00:00:00 2001
From: Emilio López <emilio@elopez.com.ar>
Date: Fri, 2 May 2014 17:57:16 +0200
Subject: clk: sunxi: Implement MMC phase control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

HdG: add header exporting clk_sunxi_mmc_phase_control

Signed-off-by: Emilio López <emilio@elopez.com.ar>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/sunxi/clk-sunxi.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/clk/sunxi.h     | 22 ++++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 include/linux/clk/sunxi.h

(limited to 'include/linux')

diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index bd7dc733c1ca..59f90401b900 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -506,6 +506,42 @@ CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk",
 
 
+/**
+ * clk_sunxi_mmc_phase_control() - configures MMC clock phase control
+ */
+
+void clk_sunxi_mmc_phase_control(struct clk_hw *hw, u8 sample, u8 output)
+{
+	#define to_clk_composite(_hw) container_of(_hw, struct clk_composite, hw)
+	#define to_clk_factors(_hw) container_of(_hw, struct clk_factors, hw)
+
+	struct clk_composite *composite = to_clk_composite(hw);
+	struct clk_hw *rate_hw = composite->rate_hw;
+	struct clk_factors *factors = to_clk_factors(rate_hw);
+	unsigned long flags = 0;
+	u32 reg;
+
+	if (factors->lock)
+		spin_lock_irqsave(factors->lock, flags);
+
+	reg = readl(factors->reg);
+
+	/* set sample clock phase control */
+	reg &= ~(0x7 << 20);
+	reg |= ((sample & 0x7) << 20);
+
+	/* set output clock phase control */
+	reg &= ~(0x7 << 8);
+	reg |= ((output & 0x7) << 8);
+
+	writel(reg, factors->reg);
+
+	if (factors->lock)
+		spin_unlock_irqrestore(factors->lock, flags);
+}
+EXPORT_SYMBOL(clk_sunxi_mmc_phase_control);
+
+
 /**
  * sunxi_factors_clk_setup() - Setup function for factor clocks
  */
diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h
new file mode 100644
index 000000000000..1ef5c899e458
--- /dev/null
+++ b/include/linux/clk/sunxi.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2013 - Hans de Goede <hdegoede@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_CLK_SUNXI_H_
+#define __LINUX_CLK_SUNXI_H_
+
+#include <linux/clk.h>
+
+void clk_sunxi_mmc_phase_control(struct clk_hw *hw, u8 sample, u8 output);
+
+#endif
-- 
cgit 


From bdffd893a0e9c431304142d12d9a0a21d365c502 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 29 Apr 2014 14:17:40 -0500
Subject: tracing: Replace __get_cpu_var uses with this_cpu_ptr

Replace uses of &__get_cpu_var for address calculation with this_cpu_ptr.

Link: http://lkml.kernel.org/p/alpine.DEB.2.10.1404291415560.18364@gentwo.org

Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 2 +-
 kernel/trace/ftrace.c   | 4 ++--
 kernel/trace/trace.c    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 925eaf28fca9..7bd2ad01e39c 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -355,7 +355,7 @@ static inline void reset_current_kprobe(void)
 
 static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void)
 {
-	return (&__get_cpu_var(kprobe_ctlblk));
+	return this_cpu_ptr(&kprobe_ctlblk);
 }
 
 int register_kprobe(struct kprobe *p);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9eb1aa03a18d..38e5cf73b9ae 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -822,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
 
 	local_irq_save(flags);
 
-	stat = &__get_cpu_var(ftrace_profile_stats);
+	stat = this_cpu_ptr(&ftrace_profile_stats);
 	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
@@ -853,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	stat = &__get_cpu_var(ftrace_profile_stats);
+	stat = this_cpu_ptr(&ftrace_profile_stats);
 	if (!stat->hash || !ftrace_profile_enabled)
 		goto out;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4c392c8238bf..05431696b10c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1726,7 +1726,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
 	 */
 	barrier();
 	if (use_stack == 1) {
-		trace.entries		= &__get_cpu_var(ftrace_stack).calls[0];
+		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
 		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;
 
 		if (regs)
-- 
cgit 


From e7c24607b5d68a4cdc56e09d70a3c8bae5f0519f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 10 Apr 2014 20:54:51 -0400
Subject: kill iov_iter_copy_from_user()

all callers can use copy_page_from_iter() and it actually simplifies
them.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/file.c         |  3 +--
 fs/cifs/file.c         |  7 +++----
 include/linux/uio.h    |  2 --
 mm/iov_iter.c          | 27 ---------------------------
 mm/process_vm_access.c |  6 +-----
 5 files changed, 5 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 88a6df4cbe6d..ef9115e4a6fa 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -737,13 +737,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
 		left = len;
 		for (n = 0; n < num_pages; n++) {
 			size_t plen = min_t(size_t, left, PAGE_SIZE);
-			ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+			ret = copy_page_from_iter(pages[n], 0, plen, &i);
 			if (ret != plen) {
 				ret = -EFAULT;
 				break;
 			}
 			left -= ret;
-			iov_iter_advance(&i, ret);
 		}
 
 		if (ret < 0) {
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5ed03e0b8b40..2900d150654e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2444,11 +2444,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 
 		save_len = cur_len;
 		for (i = 0; i < nr_pages; i++) {
-			bytes = min_t(const size_t, cur_len, PAGE_SIZE);
-			copied = iov_iter_copy_from_user(wdata->pages[i], &it,
-							 0, bytes);
+			bytes = min_t(size_t, cur_len, PAGE_SIZE);
+			copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
+						     &it);
 			cur_len -= copied;
-			iov_iter_advance(&it, copied);
 			/*
 			 * If we didn't copy as much as we expected, then that
 			 * may mean we trod into an unmapped area. Stop copying
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 199bcc34241b..abbe83ded630 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -62,8 +62,6 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
 
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes);
-size_t iov_iter_copy_from_user(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 10e46cd721de..22ec1ef068a8 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -129,33 +129,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 }
 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-/*
- * This has the same sideeffects and return value as
- * iov_iter_copy_from_user_atomic().
- * The difference is that it attempts to resolve faults.
- * Page must not be locked.
- */
-size_t iov_iter_copy_from_user(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes)
-{
-	char *kaddr;
-	size_t copied;
-
-	kaddr = kmap(page);
-	if (likely(i->nr_segs == 1)) {
-		int left;
-		char __user *buf = i->iov->iov_base + i->iov_offset;
-		left = __copy_from_user(kaddr + offset, buf, bytes);
-		copied = bytes - left;
-	} else {
-		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
-						i->iov, i->iov_offset, bytes);
-	}
-	kunmap(page);
-	return copied;
-}
-EXPORT_SYMBOL(iov_iter_copy_from_user);
-
 void iov_iter_advance(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 8505c9262b35..f32b1fbbfe69 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -46,11 +46,7 @@ static int process_vm_rw_pages(struct page **pages,
 			copy = len;
 
 		if (vm_write) {
-			if (copy > iov_iter_count(iter))
-				copy = iov_iter_count(iter);
-			copied = iov_iter_copy_from_user(page, iter,
-					offset, copy);
-			iov_iter_advance(iter, copied);
+			copied = copy_page_from_iter(page, offset, copy, iter);
 			set_page_dirty_lock(page);
 		} else {
 			copied = copy_page_to_iter(page, offset, copy, iter);
-- 
cgit 


From f8579f8673b7ecdb7a81d5d5bb1d981093d9aa94 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 3 Mar 2014 22:03:20 -0500
Subject: generic_file_direct_write(): switch to iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/file.c    |  6 ++----
 fs/fuse/file.c     |  6 ++----
 fs/ocfs2/file.c    |  6 +++---
 fs/xfs/xfs_file.c  |  5 +++--
 include/linux/fs.h |  4 ++--
 mm/filemap.c       | 15 +++++++--------
 6 files changed, 19 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae6af072b635..9fe20c2052af 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1669,15 +1669,13 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	loff_t endbyte;
 	int err;
 
-	written = generic_file_direct_write(iocb, iov, &nr_segs, pos,
-					    count, ocount);
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+	written = generic_file_direct_write(iocb, &i, pos, count, ocount);
 
 	if (written < 0 || written == count)
 		return written;
 
 	pos += written;
-	count -= written;
-	iov_iter_init(&i, iov, nr_segs, count, written);
 	written_buffered = __btrfs_buffered_write(file, &i, pos);
 	if (written_buffered < 0) {
 		err = written_buffered;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 96d513e01a5d..126deb5d0a9c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1235,15 +1235,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 
 	if (file->f_flags & O_DIRECT) {
-		written = generic_file_direct_write(iocb, iov, &nr_segs, pos, 
-						    count, ocount);
+		iov_iter_init(&i, iov, nr_segs, count, 0);
+		written = generic_file_direct_write(iocb, &i, pos, count, ocount);
 		if (written < 0 || written == count)
 			goto out;
 
 		pos += written;
-		count -= written;
 
-		iov_iter_init(&i, iov, nr_segs, count, written);
 		written_buffered = fuse_perform_write(file, mapping, &i, pos);
 		if (written_buffered < 0) {
 			err = written_buffered;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8970dcf74de5..d6d78c2aa96e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2251,6 +2251,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	int full_coherency = !(osb->s_mount_opt &
 			       OCFS2_MOUNT_COHERENCY_BUFFERED);
 	int unaligned_dio = 0;
+	struct iov_iter from;
 
 	trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -2365,16 +2366,15 @@ relock:
 	if (ret)
 		goto out_dio;
 
+	iov_iter_init(&from, iov, nr_segs, count, 0);
 	if (direct_io) {
-		written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
+		written = generic_file_direct_write(iocb, &from, *ppos,
 						    count, ocount);
 		if (written < 0) {
 			ret = written;
 			goto out_dio;
 		}
 	} else {
-		struct iov_iter from;
-		iov_iter_init(&from, iov, nr_segs, count, 0);
 		current->backing_dev_info = file->f_mapping->backing_dev_info;
 		written = generic_perform_write(file, &from, *ppos);
 		if (likely(written >= 0))
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 951a2321ee01..8617497867c7 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -641,6 +641,7 @@ xfs_file_dio_aio_write(
 	int			iolock;
 	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
 					mp->m_rtdev_targp : mp->m_ddev_targp;
+	struct iov_iter		from;
 
 	/* DIO must be aligned to device logical sector size */
 	if ((pos | count) & target->bt_logical_sectormask)
@@ -698,8 +699,8 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	ret = generic_file_direct_write(iocb, iovp,
-			&nr_segs, pos, count, ocount);
+	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	ret = generic_file_direct_write(iocb, &from, pos, count, ocount);
 
 out:
 	xfs_rw_iunlock(ip, iolock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 878031227c57..262f96e579b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2407,8 +2407,8 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isbl
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
-		unsigned long *, loff_t, size_t, size_t);
+extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
+		loff_t, size_t, size_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
diff --git a/mm/filemap.c b/mm/filemap.c
index 000a220e2a41..a840890ed39f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2385,9 +2385,8 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
 EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
-generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long *nr_segs, loff_t pos,
-		size_t count, size_t ocount)
+generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
+		loff_t pos, size_t count, size_t ocount)
 {
 	struct file	*file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -2397,9 +2396,9 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	pgoff_t		end;
 
 	if (count != ocount)
-		*nr_segs = iov_shorten((struct iovec *)iov, *nr_segs, count);
+		from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count);
 
-	write_len = iov_length(iov, *nr_segs);
+	write_len = iov_length(from->iov, from->nr_segs);
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@ -2426,7 +2425,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		}
 	}
 
-	written = mapping->a_ops->direct_IO(WRITE, iocb, iov, pos, *nr_segs);
+	written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs);
 
 	/*
 	 * Finally, try again to invalidate clean pages which might have been
@@ -2443,6 +2442,7 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	if (written > 0) {
 		pos += written;
+		iov_iter_advance(from, written);
 		if (pos > i_size_read(inode) && !S_ISBLK(inode->i_mode)) {
 			i_size_write(inode, pos);
 			mark_inode_dirty(inode);
@@ -2645,11 +2645,10 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, iov, &from.nr_segs, pos,
+		written = generic_file_direct_write(iocb, &from, pos,
 							count, ocount);
 		if (written < 0 || written == count)
 			goto out;
-		iov_iter_advance(&from, written);
 
 		/*
 		 * direct-io write to a hole: fall through to buffered I/O
-- 
cgit 


From cb66a7a1f149ff705fa37cad6d1252b046e0ad4f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 4 Mar 2014 15:24:06 -0500
Subject: kill generic_segment_checks()

all callers of ->aio_read() and ->aio_write() have iov/nr_segs already
checked - generic_segment_checks() done after that is just an odd way
to spell iov_length().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/llite/file.c | 10 ++----
 fs/btrfs/file.c                            |  7 +---
 fs/ceph/file.c                             | 13 ++------
 fs/fuse/file.c                             |  7 +---
 fs/ntfs/file.c                             |  5 +--
 fs/ocfs2/file.c                            |  7 +---
 fs/xfs/xfs_file.c                          |  9 ++---
 include/linux/fs.h                         |  2 --
 mm/filemap.c                               | 53 ++----------------------------
 mm/shmem.c                                 |  7 ++--
 10 files changed, 16 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 8e844a6371e0..220bd8390a84 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -1180,9 +1180,7 @@ static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t	     result;
 	int		 refcheck;
 
-	result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (result)
-		return result;
+	count = iov_length(iov, nr_segs);
 
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
@@ -1235,14 +1233,10 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = 0;
+	size_t	      count = iov_length(iov, nr_segs);
 	ssize_t	     result;
 	int		 refcheck;
 
-	result = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (result)
-		return result;
-
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return PTR_ERR(env);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1dafe0701daf..a0a94a30d85a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1726,12 +1726,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
 	mutex_lock(&inode->i_mutex);
 
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err) {
-		mutex_unlock(&inode->i_mutex);
-		goto out;
-	}
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ef9115e4a6fa..21a56c27b74c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -828,12 +828,8 @@ again:
 		     inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		if (!read) {
-			ret = generic_segment_checks(iov, &nr_segs,
-							&len, VERIFY_WRITE);
-			if (ret)
-				goto out;
-		}
+		if (!read)
+			len = iov_length(iov, nr_segs);
 
 		iov_iter_init(&i, iov, nr_segs, len, read);
 
@@ -855,7 +851,6 @@ again:
 
 		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
 	}
-out:
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
 	ceph_put_cap_refs(ci, got);
@@ -911,9 +906,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	mutex_lock(&inode->i_mutex);
 
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (err)
-		goto out;
+	count = iov_length(iov, nr_segs);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = file->f_mapping->backing_dev_info;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 126deb5d0a9c..9c7f346879e7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1208,12 +1208,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	WARN_ON(iocb->ki_pos != pos);
 
-	ocount = 0;
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err)
-		return err;
-
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 	mutex_lock(&inode->i_mutex);
 
 	/* We can write back this queue in page reclaim */
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db9bd8a31725..b6fa457d8d01 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2091,10 +2091,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 	size_t count;		/* after file limit checks */
 	ssize_t written, err;
 
-	count = 0;
-	err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
-	if (err)
-		return err;
+	count = iov_length(iov, nr_segs);
 	pos = *ppos;
 	/* We can write back this queue in page reclaim. */
 	current->backing_dev_info = mapping->backing_dev_info;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d6d78c2aa96e..d33c4ced0baf 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2355,12 +2355,7 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	ret = generic_segment_checks(iov, &nr_segs, &ocount,
-				     VERIFY_READ);
-	if (ret)
-		goto out_dio;
-
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 	ret = generic_write_checks(file, ppos, &count,
 				   S_ISBLK(inode->i_mode));
 	if (ret)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8617497867c7..f0f8084a67be 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -253,9 +253,7 @@ xfs_file_aio_read(
 	if (file->f_mode & FMODE_NOCMTIME)
 		ioflags |= IO_INVIS;
 
-	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
-	if (ret < 0)
-		return ret;
+	size = iov_length(iovp, nr_segs);
 
 	if (unlikely(ioflags & IO_ISDIRECT)) {
 		xfs_buftarg_t	*target =
@@ -777,10 +775,7 @@ xfs_file_aio_write(
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
-	if (ret)
-		return ret;
-
+	ocount = iov_length(iovp, nr_segs);
 	if (ocount == 0)
 		return 0;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 262f96e579b8..796de742fe4a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2412,8 +2412,6 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern int generic_segment_checks(const struct iovec *iov,
-		unsigned long *nr_segs, size_t *count, int access_flags);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
diff --git a/mm/filemap.c b/mm/filemap.c
index a840890ed39f..7c1417b0bd7b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1663,45 +1663,6 @@ out:
 	return written ? written : error;
 }
 
-/*
- * Performs necessary checks before doing a write
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @count:	number of bytes to write
- * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE
- *
- * Adjust number of segments and amount of bytes to write (nr_segs should be
- * properly initialized first). Returns appropriate error code that caller
- * should return or zero in case that write should be allowed.
- */
-int generic_segment_checks(const struct iovec *iov,
-			unsigned long *nr_segs, size_t *count, int access_flags)
-{
-	unsigned long   seg;
-	size_t cnt = 0;
-	for (seg = 0; seg < *nr_segs; seg++) {
-		const struct iovec *iv = &iov[seg];
-
-		/*
-		 * If any segment has a negative length, or the cumulative
-		 * length ever wraps negative then return -EINVAL.
-		 */
-		cnt += iv->iov_len;
-		if (unlikely((ssize_t)(cnt|iv->iov_len) < 0))
-			return -EINVAL;
-		if (access_ok(access_flags, iv->iov_base, iv->iov_len))
-			continue;
-		if (seg == 0)
-			return -EFAULT;
-		*nr_segs = seg;
-		cnt -= iv->iov_len;	/* This segment is no good */
-		break;
-	}
-	*count = cnt;
-	return 0;
-}
-EXPORT_SYMBOL(generic_segment_checks);
-
 /**
  * generic_file_aio_read - generic filesystem read routine
  * @iocb:	kernel I/O control block
@@ -1717,15 +1678,12 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *filp = iocb->ki_filp;
-	ssize_t retval;
+	ssize_t retval = 0;
 	size_t count;
 	loff_t *ppos = &iocb->ki_pos;
 	struct iov_iter i;
 
-	count = 0;
-	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (retval)
-		return retval;
+	count = iov_length(iov, nr_segs);
 	iov_iter_init(&i, iov, nr_segs, count, 0);
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
@@ -2615,12 +2573,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t		status;
 	struct iov_iter from;
 
-	ocount = 0;
-	err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-	if (err)
-		return err;
-
-	count = ocount;
+	count = ocount = iov_length(iov, nr_segs);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
diff --git a/mm/shmem.c b/mm/shmem.c
index 9f70e02111c6..2a93e625adaf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1412,14 +1412,11 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
 	unsigned long offset;
 	enum sgp_type sgp = SGP_READ;
 	int error = 0;
-	ssize_t retval;
-	size_t count;
+	ssize_t retval = 0;
+	size_t count = iov_length(iov, nr_segs);
 	loff_t *ppos = &iocb->ki_pos;
 	struct iov_iter iter;
 
-	retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE);
-	if (retval)
-		return retval;
 	iov_iter_init(&iter, iov, nr_segs, count, 0);
 
 	/*
-- 
cgit 


From d8d3d94b80aa1a1c0ca75c58b8abdc7356f38418 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 4 Mar 2014 21:27:34 -0500
Subject: pass iov_iter to ->direct_IO()

unmodified, for now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking          |  3 +--
 Documentation/filesystems/vfs.txt          |  3 +--
 drivers/staging/lustre/lustre/llite/rw26.c | 17 ++++++++---------
 fs/9p/vfs_addr.c                           |  5 ++---
 fs/block_dev.c                             |  9 +++++----
 fs/btrfs/inode.c                           | 12 ++++++------
 fs/ceph/addr.c                             |  4 ++--
 fs/cifs/file.c                             |  4 ++--
 fs/exofs/inode.c                           |  2 +-
 fs/ext2/inode.c                            | 11 ++++++-----
 fs/ext3/inode.c                            | 16 +++++++---------
 fs/ext4/inode.c                            | 11 +++++------
 fs/f2fs/data.c                             |  8 ++++----
 fs/fat/inode.c                             | 13 +++++++------
 fs/fuse/file.c                             | 10 +++++-----
 fs/gfs2/aops.c                             | 11 +++++------
 fs/hfs/inode.c                             |  8 ++++----
 fs/hfsplus/inode.c                         |  6 +++---
 fs/jfs/inode.c                             |  8 ++++----
 fs/nfs/direct.c                            |  8 ++++----
 fs/nilfs2/inode.c                          | 10 +++++-----
 fs/ocfs2/aops.c                            |  7 +++----
 fs/reiserfs/inode.c                        |  9 ++++-----
 fs/udf/file.c                              |  4 ++--
 fs/udf/inode.c                             |  8 ++++----
 fs/xfs/xfs_aops.c                          | 15 +++++++--------
 include/linux/fs.h                         |  3 +--
 include/linux/nfs_fs.h                     |  3 +--
 mm/filemap.c                               |  9 ++++-----
 mm/page_io.c                               |  6 ++++--
 30 files changed, 117 insertions(+), 126 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index eba790134253..9b0d5a33c8bf 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -196,8 +196,7 @@ prototypes:
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
 				unsigned long *);
 	int (*migratepage)(struct address_space *, struct page *, struct page *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 617f6d70c077..1846374a5add 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -589,8 +589,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, int);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	struct page* (*get_xip_page)(struct address_space *, sector_t,
 			int);
 	/* migrate the contents of a page to the specified target */
diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 7e3e0967993b..66e05c6f4d27 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -363,15 +363,14 @@ static ssize_t ll_direct_IO_26_seg(const struct lu_env *env, struct cl_io *io,
 #define MAX_DIO_SIZE ((MAX_MALLOC / sizeof(struct brw_page) * PAGE_CACHE_SIZE) & \
 		      ~(DT_MAX_BRW_SIZE - 1))
 static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
-			       const struct iovec *iov, loff_t file_offset,
-			       unsigned long nr_segs)
+			       struct iov_iter *iter, loff_t file_offset)
 {
 	struct lu_env *env;
 	struct cl_io *io;
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct ccc_object *obj = cl_inode2ccc(inode);
-	long count = iov_length(iov, nr_segs);
+	long count = iov_length(iter->iov, iter->nr_segs);
 	long tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
 	unsigned long seg = 0;
@@ -392,9 +391,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	for (seg = 0; seg < nr_segs; seg++) {
-		if (((unsigned long)iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-		    (iov[seg].iov_len & ~CFS_PAGE_MASK))
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		if (((unsigned long)iter->iov[seg].iov_base & ~CFS_PAGE_MASK) ||
+		    (iter->iov[seg].iov_len & ~CFS_PAGE_MASK))
 			return -EINVAL;
 	}
 
@@ -411,9 +410,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 		mutex_lock(&inode->i_mutex);
 
 	LASSERT(obj->cob_transient_pages == 0);
-	for (seg = 0; seg < nr_segs; seg++) {
-		long iov_left = iov[seg].iov_len;
-		unsigned long user_addr = (unsigned long)iov[seg].iov_base;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		long iov_left = iter->iov[seg].iov_len;
+		unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base;
 
 		if (rw == READ) {
 			if (file_offset >= i_size_read(inode))
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index c71e88602ff4..cc1cfae726b3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page)
  *
  */
 static ssize_t
-v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-	       loff_t pos, unsigned long nr_segs)
+v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 	/*
 	 * FIXME
@@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	 */
 	p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n",
 		 iocb->ki_filp->f_path.dentry->d_name.name,
-		 (long long)pos, nr_segs);
+		 (long long)pos, iter->nr_segs);
 
 	return -EINVAL;
 }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 552a8d13bc32..938fc707d769 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock,
 }
 
 static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
-				    nr_segs, blkdev_get_block, NULL, NULL, 0);
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter->iov,
+				    offset, iter->nr_segs, blkdev_get_block,
+				    NULL, NULL, 0);
 }
 
 int __sync_blockdev(struct block_device *bdev, int wait)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5f805bc944fa..30a6cc51f32c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7433,8 +7433,7 @@ out:
 }
 
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -7444,8 +7443,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 	bool relock = false;
 	ssize_t ret;
 
-	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
-			    offset, nr_segs))
+	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter->iov,
+			    offset, iter->nr_segs))
 		return 0;
 
 	atomic_inc(&inode->i_dio_count);
@@ -7457,7 +7456,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 	 * we need to flush the dirty pages again to make absolutely sure
 	 * that any outstanding dirty pages are on disk.
 	 */
-	count = iov_length(iov, nr_segs);
+	count = iov_length(iter->iov, iter->nr_segs);
 	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
 		     &BTRFS_I(inode)->runtime_flags))
 		filemap_fdatawrite_range(inode->i_mapping, offset, count);
@@ -7484,7 +7483,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = __blockdev_direct_IO(rw, iocb, inode,
 			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-			iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+			iter->iov, offset, iter->nr_segs,
+			btrfs_get_blocks_direct, NULL,
 			btrfs_submit_direct, flags);
 	if (rw & WRITE) {
 		if (ret < 0 && ret != -EIOCBQUEUED)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b53278c9fd97..342ca5e423f9 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1187,8 +1187,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
  * never get called.
  */
 static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
-			      const struct iovec *iov,
-			      loff_t pos, unsigned long nr_segs)
+			      struct iov_iter *iter,
+			      loff_t pos)
 {
 	WARN_ON(1);
 	return -EINVAL;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 2900d150654e..a4ccc39e6c11 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3702,8 +3702,8 @@ void cifs_oplock_break(struct work_struct *work)
  * Direct IO is not yet supported in the cached mode. 
  */
 static ssize_t
-cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov,
-               loff_t pos, unsigned long nr_segs)
+cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
+               loff_t pos)
 {
         /*
          * FIXME
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index d1c244d67667..3f9cafd73931 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset,
 
  /* TODO: Should be easy enough to do proprly */
 static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	return 0;
 }
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b1d2a4675d42..47fbe760a7f8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -850,18 +850,19 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t
-ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 ext2_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, ext2_get_block);
 	if (ret < 0 && (rw & WRITE))
-		ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		ext2_write_failed(mapping, offset +
+				  iov_length(iter->iov, iter->nr_segs));
 	return ret;
 }
 
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index f5157d0d1b43..7a5c501dc31b 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1820,8 +1820,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * VFS code falls back into buffered path in that case so we are safe.
  */
 static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
-			const struct iovec *iov, loff_t offset,
-			unsigned long nr_segs)
+			struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1829,10 +1828,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	handle_t *handle;
 	ssize_t ret;
 	int orphan = 0;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_length(iter->iov, iter->nr_segs);
 	int retries = 0;
 
-	trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+	trace_ext3_direct_IO_enter(inode, offset, count, rw);
 
 	if (rw == WRITE) {
 		loff_t final_size = offset + count;
@@ -1856,15 +1855,15 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 ext3_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + count;
 
 		if (end > isize)
 			ext3_truncate_failed_direct_write(inode);
@@ -1909,8 +1908,7 @@ retry:
 			ret = err;
 	}
 out:
-	trace_ext3_direct_IO_exit(inode, offset,
-				iov_length(iov, nr_segs), rw, ret);
+	trace_ext3_direct_IO_exit(inode, offset, count, rw, ret);
 	return ret;
 }
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d7b7462a0e13..f51db730da39 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3222,8 +3222,7 @@ retake_lock:
 }
 
 static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -3239,13 +3238,13 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+	trace_ext4_direct_IO_enter(inode, offset, iov_length(iter->iov, iter->nr_segs), rw);
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+		ret = ext4_ext_direct_IO(rw, iocb, iter->iov, offset, iter->nr_segs);
 	else
-		ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+		ret = ext4_ind_direct_IO(rw, iocb, iter->iov, offset, iter->nr_segs);
 	trace_ext4_direct_IO_exit(inode, offset,
-				iov_length(iov, nr_segs), rw, ret);
+				iov_length(iter->iov, iter->nr_segs), rw, ret);
 	return ret;
 }
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 45abd60e2bff..3a6ef121c095 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1010,7 +1010,7 @@ static int check_direct_IO(struct inode *inode, int rw,
 }
 
 static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1019,11 +1019,11 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	if (f2fs_has_inline_data(inode))
 		return 0;
 
-	if (check_direct_IO(inode, rw, iov, offset, nr_segs))
+	if (check_direct_IO(inode, rw, iter->iov, offset, iter->nr_segs))
 		return 0;
 
-	return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-							get_data_block);
+	return blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				  iter->nr_segs, get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b3361fe2bcb5..d5237a199055 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -185,8 +185,8 @@ static int fat_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
-			     const struct iovec *iov,
-			     loff_t offset, unsigned long nr_segs)
+			     struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -203,7 +203,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 		 *
 		 * Return 0, and fallback to normal buffered write.
 		 */
-		loff_t size = offset + iov_length(iov, nr_segs);
+		loff_t size = offset + iov_length(iter->iov, iter->nr_segs);
 		if (MSDOS_I(inode)->mmu_private < size)
 			return 0;
 	}
@@ -212,10 +212,11 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 fat_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, fat_get_block);
 	if (ret < 0 && (rw & WRITE))
-		fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		fat_write_failed(mapping, offset +
+			         iov_length(iter->iov, iter->nr_segs));
 
 	return ret;
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9c7f346879e7..17d96f36df15 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2890,8 +2890,8 @@ static inline loff_t fuse_round_up(loff_t off)
 }
 
 static ssize_t
-fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs)
+fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+			loff_t offset)
 {
 	ssize_t ret = 0;
 	struct file *file = iocb->ki_filp;
@@ -2900,7 +2900,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	loff_t pos = 0;
 	struct inode *inode;
 	loff_t i_size;
-	size_t count = iov_length(iov, nr_segs);
+	size_t count = iov_length(iter->iov, iter->nr_segs);
 	struct fuse_io_priv *io;
 
 	pos = offset;
@@ -2944,9 +2944,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 		io->async = false;
 
 	if (rw == WRITE)
-		ret = __fuse_direct_write(io, iov, nr_segs, &pos);
+		ret = __fuse_direct_write(io, iter->iov, iter->nr_segs, &pos);
 	else
-		ret = __fuse_direct_read(io, iov, nr_segs, &pos, count);
+		ret = __fuse_direct_read(io, iter->iov, iter->nr_segs, &pos, count);
 
 	if (io->async) {
 		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ce62dcac90b6..e84ddaa42104 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1041,8 +1041,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
 
 
 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
-			      const struct iovec *iov, loff_t offset,
-			      unsigned long nr_segs)
+			      struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
@@ -1082,7 +1081,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (mapping->nrpages) {
 		loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
-		loff_t len = iov_length(iov, nr_segs);
+		loff_t len = iov_length(iter->iov, iter->nr_segs);
 		loff_t end = PAGE_ALIGN(offset + len) - 1;
 
 		rv = 0;
@@ -1097,9 +1096,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 			truncate_inode_pages_range(mapping, lstart, end);
 	}
 
-	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				  offset, nr_segs, gfs2_get_block_direct,
-				  NULL, NULL, 0);
+	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
+				  iter->iov, offset, iter->nr_segs,
+				  gfs2_get_block_direct, NULL, NULL, 0);
 out:
 	gfs2_glock_dq(&gh);
 	gfs2_holder_uninit(&gh);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9e2fecd62f62..09cff13528c5 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 }
 
 static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file)->i_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 hfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, hfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			hfs_write_failed(mapping, end);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index a4f45bd88a63..7f894a5b5eaf 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -123,14 +123,14 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
 }
 
 static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
-		const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+		struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file_inode(file)->i_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
 				 hfsplus_get_block);
 
 	/*
@@ -139,7 +139,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			hfsplus_write_failed(mapping, end);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f8fe72c2a7a..7052744d5107 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block)
 }
 
 static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
-	const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+	struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				 jfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, jfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			jfs_write_failed(mapping, end);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..e9cde3935001 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -121,20 +121,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
  * shunt off direct read and write requests before the VFS gets them,
  * so this method is only ever called for swap.
  */
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
 {
 #ifndef CONFIG_NFS_SWAP
 	dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
-			iocb->ki_filp, (long long) pos, nr_segs);
+			iocb->ki_filp, (long long) pos, iter->nr_segs);
 
 	return -EINVAL;
 #else
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
 	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+		return nfs_file_direct_read(iocb, iter->iov, iter->nr_segs, pos,
 				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+	return nfs_file_direct_write(iocb, iter->iov, iter->nr_segs, pos,
 				rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9c5726120e3..1c0e8fedc095 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -298,8 +298,8 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t
-nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-		loff_t offset, unsigned long nr_segs)
+nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+		loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -310,8 +310,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 		return 0;
 
 	/* Needs synchronization with the cleaner */
-	size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				  nilfs_get_block);
+	size = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				  iter->nr_segs, nilfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -319,7 +319,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	 */
 	if (unlikely((rw & WRITE) && size < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if (end > isize)
 			nilfs_write_failed(mapping, end);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d310d12a9adc..799fd0afcb35 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
 
 static ssize_t ocfs2_direct_IO(int rw,
 			       struct kiocb *iocb,
-			       const struct iovec *iov,
-			       loff_t offset,
-			       unsigned long nr_segs)
+			       struct iov_iter *iter,
+			       loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file)->i_mapping->host;
@@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 		return 0;
 
 	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				    iov, offset, nr_segs,
+				    iter->iov, offset, iter->nr_segs,
 				    ocfs2_direct_IO_get_blocks,
 				    ocfs2_dio_end_io, NULL, 0);
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index bc8b8009897d..17bf4c41a509 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3083,15 +3083,14 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 /* We thank Mingming Cao for helping us understand in great detail what
    to do in this section of the code. */
 static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
-				  const struct iovec *iov, loff_t offset,
-				  unsigned long nr_segs)
+				  struct iov_iter *iter, loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
-				  reiserfs_get_blocks_direct_io);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
+				 iter->nr_segs, reiserfs_get_blocks_direct_io);
 
 	/*
 	 * In case of error extending write may have instantiated a few
@@ -3099,7 +3098,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
 	 */
 	if (unlikely((rw & WRITE) && ret < 0)) {
 		loff_t isize = i_size_read(inode);
-		loff_t end = offset + iov_length(iov, nr_segs);
+		loff_t end = offset + iov_length(iter->iov, iter->nr_segs);
 
 		if ((end > isize) && inode_newsize_ok(inode, isize) == 0) {
 			truncate_setsize(inode, isize);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d2c170f8b035..ade886401658 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file,
 }
 
 static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb,
-				     const struct iovec *iov,
-				     loff_t offset, unsigned long nr_segs)
+				     struct iov_iter *iter,
+				     loff_t offset)
 {
 	/* Fallback to buffered I/O. */
 	return 0;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 5d643706212f..5b184c7f7dcb 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping,
 }
 
 static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
-			     const struct iovec *iov,
-			     loff_t offset, unsigned long nr_segs)
+			     struct iov_iter *iter,
+			     loff_t offset)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	struct inode *inode = mapping->host;
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
 				  udf_get_block);
 	if (unlikely(ret < 0 && (rw & WRITE)))
-		udf_write_failed(mapping, offset + iov_length(iov, nr_segs));
+		udf_write_failed(mapping, offset + iov_length(iter->iov, iter->nr_segs));
 	return ret;
 }
 
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0479c32c5eb1..330d7b1c4be3 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1449,9 +1449,8 @@ STATIC ssize_t
 xfs_vm_direct_IO(
 	int			rw,
 	struct kiocb		*iocb,
-	const struct iovec	*iov,
-	loff_t			offset,
-	unsigned long		nr_segs)
+	struct iov_iter		*iter,
+	loff_t			offset)
 {
 	struct inode		*inode = iocb->ki_filp->f_mapping->host;
 	struct block_device	*bdev = xfs_find_bdev_for_inode(inode);
@@ -1459,7 +1458,7 @@ xfs_vm_direct_IO(
 	ssize_t			ret;
 
 	if (rw & WRITE) {
-		size_t size = iov_length(iov, nr_segs);
+		size_t size = iov_length(iter->iov, iter->nr_segs);
 
 		/*
 		 * We cannot preallocate a size update transaction here as we
@@ -1471,16 +1470,16 @@ xfs_vm_direct_IO(
 		if (offset + size > XFS_I(inode)->i_d.di_size)
 			ioend->io_isdirect = 1;
 
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
+					    offset, iter->nr_segs,
 					    xfs_get_blocks_direct,
 					    xfs_end_io_direct_write, NULL,
 					    DIO_ASYNC_EXTEND);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			goto out_destroy_ioend;
 	} else {
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
-					    offset, nr_segs,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
+					    offset, iter->nr_segs,
 					    xfs_get_blocks_direct,
 					    NULL, NULL, 0);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 796de742fe4a..399a338c92b5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -343,8 +343,7 @@ struct address_space_operations {
 	void (*invalidatepage) (struct page *, unsigned int, unsigned int);
 	int (*releasepage) (struct page *, gfp_t);
 	void (*freepage)(struct page *);
-	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
-			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
 	/*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fa6918b0f829..5a0d78ec739d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -459,8 +459,7 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
 /*
  * linux/fs/nfs/direct.c
  */
-extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,
-			unsigned long);
+extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
 			const struct iovec *iov, unsigned long nr_segs,
 			loff_t pos, bool uio);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7c1417b0bd7b..139641274f1e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1699,10 +1699,9 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		size = i_size_read(inode);
 		retval = filemap_write_and_wait_range(mapping, pos,
 					pos + iov_length(iov, nr_segs) - 1);
-		if (!retval) {
-			retval = mapping->a_ops->direct_IO(READ, iocb,
-							   iov, pos, nr_segs);
-		}
+		if (!retval)
+			retval = mapping->a_ops->direct_IO(READ, iocb, &i, pos);
+
 		if (retval > 0) {
 			*ppos = pos + retval;
 			count -= retval;
@@ -2383,7 +2382,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 		}
 	}
 
-	written = mapping->a_ops->direct_IO(WRITE, iocb, from->iov, pos, from->nr_segs);
+	written = mapping->a_ops->direct_IO(WRITE, iocb, from, pos);
 
 	/*
 	 * Finally, try again to invalidate clean pages which might have been
diff --git a/mm/page_io.c b/mm/page_io.c
index 7c59ef681381..0ed0644c73db 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -263,16 +263,18 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 			.iov_base = kmap(page),
 			.iov_len  = PAGE_SIZE,
 		};
+		struct iov_iter from;
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 		kiocb.ki_nbytes = PAGE_SIZE;
+		iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0);
 
 		set_page_writeback(page);
 		unlock_page(page);
 		ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
-						&kiocb, &iov,
-						kiocb.ki_pos, 1);
+						&kiocb, &from,
+						kiocb.ki_pos);
 		kunmap(page);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
-- 
cgit 


From 619d30b4b8c488042b4a720ca79dccc346d1a516 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 4 Mar 2014 21:53:33 -0500
Subject: convert the guts of nfs_direct_IO() to iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/nfs/direct.c        | 46 +++++++++++++++++++++-------------------------
 fs/nfs/file.c          | 18 ++++++++++++------
 include/linux/nfs_fs.h |  4 ++--
 3 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e9cde3935001..21723149668b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -132,9 +132,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
 	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iter->iov, iter->nr_segs, pos,
+		return nfs_file_direct_read(iocb, iter, pos,
 				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iter->iov, iter->nr_segs, pos,
+	return nfs_file_direct_write(iocb, iter, pos,
 				rw == WRITE ? true : false);
 #endif /* CONFIG_NFS_SWAP */
 }
@@ -414,8 +414,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 }
 
 static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-					      const struct iovec *iov,
-					      unsigned long nr_segs,
+					      struct iov_iter *iter,
 					      loff_t pos, bool uio)
 {
 	struct nfs_pageio_descriptor desc;
@@ -430,8 +429,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	desc.pg_dreq = dreq;
 	atomic_inc(&inode->i_dio_count);
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *vec = &iov[seg];
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		const struct iovec *vec = &iter->iov[seg];
 		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
 		if (result < 0)
 			break;
@@ -461,8 +460,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 /**
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers into which to read data
  * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
@@ -479,8 +477,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -490,7 +488,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result = -EINVAL;
 	size_t count;
 
-	count = iov_length(iov, nr_segs);
+	count = iov_length(iter->iov, iter->nr_segs);
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
 
 	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
@@ -513,7 +511,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 		goto out_unlock;
 
 	dreq->inode = inode;
-	dreq->bytes_left = iov_length(iov, nr_segs);
+	dreq->bytes_left = count;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
 	if (IS_ERR(l_ctx)) {
@@ -524,8 +522,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	NFS_I(inode)->read_io += iov_length(iov, nr_segs);
-	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	NFS_I(inode)->read_io += count;
+	result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio);
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -864,8 +862,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
 };
 
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
-					       const struct iovec *iov,
-					       unsigned long nr_segs,
+					       struct iov_iter *iter,
 					       loff_t pos, bool uio)
 {
 	struct nfs_pageio_descriptor desc;
@@ -880,9 +877,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	get_dreq(dreq);
 	atomic_inc(&inode->i_dio_count);
 
-	NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
-	for (seg = 0; seg < nr_segs; seg++) {
-		const struct iovec *vec = &iov[seg];
+	NFS_I(dreq->inode)->write_io += iov_length(iter->iov, iter->nr_segs);
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		const struct iovec *vec = &iter->iov[seg];
 		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
 		if (result < 0)
 			break;
@@ -911,8 +908,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 /**
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
  * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
@@ -930,8 +926,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+				loff_t pos, bool uio)
 {
 	ssize_t result = -EINVAL;
 	struct file *file = iocb->ki_filp;
@@ -942,7 +938,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	loff_t end;
 	size_t count;
 
-	count = iov_length(iov, nr_segs);
+	count = iov_length(iter->iov, iter->nr_segs);
 	end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
 
 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
@@ -993,7 +989,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
+	result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio);
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 284ca901fe16..3d01b152894e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -169,14 +169,18 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
+	size_t count = iov_length(iov, nr_segs);
 	ssize_t result;
+	struct iov_iter to;
+
+	iov_iter_init(&to, iov, nr_segs, count, 0);
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_read(iocb, &to, pos, true);
 
-	dprintk("NFS: read(%pD2, %lu@%lu)\n",
+	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
-		(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+		count, (unsigned long) pos);
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
@@ -643,16 +647,18 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	unsigned long written = 0;
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter from;
+	iov_iter_init(&from, iov, nr_segs, count, 0);
 
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
 		return result;
 
 	if (file->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
+		return nfs_file_direct_write(iocb, &from, pos, true);
 
-	dprintk("NFS: write(%pD2, %lu@%Ld)\n",
-		file, (unsigned long) count, (long long) pos);
+	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
+		file, count, (long long) pos);
 
 	result = -EBUSY;
 	if (IS_SWAPFILE(inode))
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 5a0d78ec739d..0a82b6fbae8a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -461,10 +461,10 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
  */
 extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+			struct iov_iter *iter,
 			loff_t pos, bool uio);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
-			const struct iovec *iov, unsigned long nr_segs,
+			struct iov_iter *iter,
 			loff_t pos, bool uio);
 
 /*
-- 
cgit 


From 31b140398ce56ab41646eda7f02bcb78d6a4c916 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 01:33:16 -0500
Subject: switch {__,}blockdev_direct_IO() to iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c      |  4 ++--
 fs/btrfs/inode.c    |  3 +--
 fs/direct-io.c      | 33 ++++++++++++++++-----------------
 fs/ext2/inode.c     |  3 +--
 fs/ext3/inode.c     |  3 +--
 fs/ext4/indirect.c  |  7 +++----
 fs/ext4/inode.c     |  4 ++--
 fs/f2fs/data.c      |  4 ++--
 fs/fat/inode.c      |  3 +--
 fs/gfs2/aops.c      |  2 +-
 fs/hfs/inode.c      |  3 +--
 fs/hfsplus/inode.c  |  2 +-
 fs/jfs/inode.c      |  3 +--
 fs/nilfs2/inode.c   |  4 ++--
 fs/ocfs2/aops.c     |  2 +-
 fs/reiserfs/inode.c |  4 ++--
 fs/udf/inode.c      |  3 +--
 fs/xfs/xfs_aops.c   | 10 ++++------
 include/linux/fs.h  | 12 ++++++------
 19 files changed, 49 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 938fc707d769..937e3011ed58 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -171,8 +171,8 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 
-	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter->iov,
-				    offset, iter->nr_segs, blkdev_get_block,
+	return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
+				    offset, blkdev_get_block,
 				    NULL, NULL, 0);
 }
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c46a025d0c4b..b0b8fa0efba3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7483,8 +7483,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = __blockdev_direct_IO(rw, iocb, inode,
 			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
-			iter->iov, offset, iter->nr_segs,
-			btrfs_get_blocks_direct, NULL,
+			iter, offset, btrfs_get_blocks_direct, NULL,
 			btrfs_submit_direct, flags);
 	if (rw & WRITE) {
 		if (ret < 0 && ret != -EIOCBQUEUED)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 31ba0935e32e..1c677899b989 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1107,8 +1107,8 @@ static inline int drop_refcount(struct dio *dio)
  */
 static inline ssize_t
 do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset, 
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
 	int seg;
@@ -1143,9 +1143,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	}
 
 	/* Check the memory alignment.  Blocks cannot straddle pages */
-	for (seg = 0; seg < nr_segs; seg++) {
-		addr = (unsigned long)iov[seg].iov_base;
-		size = iov[seg].iov_len;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		addr = (unsigned long)iter->iov[seg].iov_base;
+		size = iter->iov[seg].iov_len;
 		end += size;
 		if (unlikely((addr & blocksize_mask) ||
 			     (size & blocksize_mask))) {
@@ -1256,18 +1256,18 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (unlikely(sdio.blkfactor))
 		sdio.pages_in_io = 2;
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		user_addr = (unsigned long)iov[seg].iov_base;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		user_addr = (unsigned long)iter->iov[seg].iov_base;
 		sdio.pages_in_io +=
-			((user_addr + iov[seg].iov_len + PAGE_SIZE-1) /
+			((user_addr + iter->iov[seg].iov_len + PAGE_SIZE-1) /
 				PAGE_SIZE - user_addr / PAGE_SIZE);
 	}
 
 	blk_start_plug(&plug);
 
-	for (seg = 0; seg < nr_segs; seg++) {
-		user_addr = (unsigned long)iov[seg].iov_base;
-		sdio.size += bytes = iov[seg].iov_len;
+	for (seg = 0; seg < iter->nr_segs; seg++) {
+		user_addr = (unsigned long)iter->iov[seg].iov_base;
+		sdio.size += bytes = iter->iov[seg].iov_len;
 
 		/* Index into the first page of the first block */
 		sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
@@ -1288,7 +1288,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 		retval = do_direct_IO(dio, &sdio, &map_bh);
 
-		dio->result += iov[seg].iov_len -
+		dio->result += iter->iov[seg].iov_len -
 			((sdio.final_block_in_request - sdio.block_in_file) <<
 					blkbits);
 
@@ -1365,8 +1365,8 @@ out:
 
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
 	/*
@@ -1381,9 +1381,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	prefetch(bdev->bd_queue);
 	prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
 
-	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				     nr_segs, get_block, end_io,
-				     submit_io, flags);
+	return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset,
+				     get_block, end_io, submit_io, flags);
 }
 
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 116e809aa7cb..36d35c36311d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -859,8 +859,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, ext2_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block);
 	if (ret < 0 && (rw & WRITE))
 		ext2_write_failed(mapping, offset + count);
 	return ret;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 8582ae2c80b0..4d32133a76c4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1855,8 +1855,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 retry:
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, ext3_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block);
 	/*
 	 * In case of error extending write may have instantiated a few
 	 * blocks outside i_size. Trim these off again.
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 123898a6af05..8a57e9fcd1b9 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -686,14 +686,13 @@ retry:
 			goto locked;
 		}
 		ret = __blockdev_direct_IO(rw, iocb, inode,
-				 inode->i_sb->s_bdev, iter->iov,
-				 offset, iter->nr_segs,
+				 inode->i_sb->s_bdev, iter, offset,
 				 ext4_get_block, NULL, NULL, 0);
 		inode_dio_done(inode);
 	} else {
 locked:
-		ret = blockdev_direct_IO(rw, iocb, inode, iter->iov,
-				 offset, iter->nr_segs, ext4_get_block);
+		ret = blockdev_direct_IO(rw, iocb, inode, iter,
+				 offset, ext4_get_block);
 
 		if (unlikely((rw & WRITE) && ret < 0)) {
 			loff_t isize = i_size_read(inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2b993579a968..e5718385a037 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3166,8 +3166,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
 		dio_flags = DIO_LOCKING;
 	}
 	ret = __blockdev_direct_IO(rw, iocb, inode,
-				   inode->i_sb->s_bdev, iter->iov,
-				   offset, iter->nr_segs,
+				   inode->i_sb->s_bdev, iter,
+				   offset,
 				   get_block_func,
 				   ext4_end_io_dio,
 				   NULL,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 3a6ef121c095..151488f27755 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1022,8 +1022,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	if (check_direct_IO(inode, rw, iter->iov, offset, iter->nr_segs))
 		return 0;
 
-	return blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				  iter->nr_segs, get_data_block);
+	return blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				  get_data_block);
 }
 
 static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 154a6f9d3189..385cce464e82 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -213,8 +213,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
 	 * FAT need to use the DIO_LOCKING for avoiding the race
 	 * condition of fat_get_block() and ->truncate().
 	 */
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, fat_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block);
 	if (ret < 0 && (rw & WRITE))
 		fat_write_failed(mapping, offset + count);
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 228a12d2afa9..910838951d66 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -1097,7 +1097,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
 	}
 
 	rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				  iter->iov, offset, iter->nr_segs,
+				  iter, offset,
 				  gfs2_get_block_direct, NULL, NULL, 0);
 out:
 	gfs2_glock_dq(&gh);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index dc69e8f31581..f5fb09ebc850 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -133,8 +133,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, hfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index e6b1251af47a..76b930ff58ae 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -131,7 +131,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, 
 				 hfsplus_get_block);
 
 	/*
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6cde5928693b..bd3df1ca3c9b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -339,8 +339,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, jfs_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 7aaf913e8709..6252b173a465 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -311,8 +311,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		return 0;
 
 	/* Needs synchronization with the cleaner */
-	size = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				  iter->nr_segs, nilfs_get_block);
+	size = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				  nilfs_get_block);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 799fd0afcb35..4a231a166cf8 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -617,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw,
 		return 0;
 
 	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev,
-				    iter->iov, offset, iter->nr_segs,
+				    iter, offset,
 				    ocfs2_direct_IO_get_blocks,
 				    ocfs2_dio_end_io, NULL, 0);
 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 723affe921f1..b8003e8dd1f4 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3090,8 +3090,8 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset,
-				 iter->nr_segs, reiserfs_get_blocks_direct_io);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset,
+				 reiserfs_get_blocks_direct_io);
 
 	/*
 	 * In case of error extending write may have instantiated a few
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 28984baf6194..236cd48184c2 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -226,8 +226,7 @@ static ssize_t udf_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t ret;
 
-	ret = blockdev_direct_IO(rw, iocb, inode, iter->iov, offset, iter->nr_segs,
-				  udf_get_block);
+	ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block);
 	if (unlikely(ret < 0 && (rw & WRITE)))
 		udf_write_failed(mapping, offset + count);
 	return ret;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6462b3186784..08d13e395252 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1470,17 +1470,15 @@ xfs_vm_direct_IO(
 		if (offset + size > XFS_I(inode)->i_d.di_size)
 			ioend->io_isdirect = 1;
 
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
-					    offset, iter->nr_segs,
-					    xfs_get_blocks_direct,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+					    offset, xfs_get_blocks_direct,
 					    xfs_end_io_direct_write, NULL,
 					    DIO_ASYNC_EXTEND);
 		if (ret != -EIOCBQUEUED && iocb->private)
 			goto out_destroy_ioend;
 	} else {
-		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter->iov,
-					    offset, iter->nr_segs,
-					    xfs_get_blocks_direct,
+		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter,
+					    offset, xfs_get_blocks_direct,
 					    NULL, NULL, 0);
 	}
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 399a338c92b5..946a9484844f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2474,16 +2474,16 @@ enum {
 void dio_end_io(struct bio *bio, int error);
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
-	struct block_device *bdev, const struct iovec *iov, loff_t offset,
-	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+	struct block_device *bdev, struct iov_iter *iter, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags);
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
-		struct inode *inode, const struct iovec *iov, loff_t offset,
-		unsigned long nr_segs, get_block_t get_block)
+		struct inode *inode, struct iov_iter *iter, loff_t offset,
+		get_block_t get_block)
 {
-	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
-				    offset, nr_segs, get_block, NULL, NULL,
+	return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter,
+				    offset, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
 #endif
-- 
cgit 


From 886a39115005ced8b15ab067c9c2a8d546b40a5e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 13:50:45 -0500
Subject: new primitive: iov_iter_alignment()

returns the value aligned as badly as the worst remaining segment
in iov_iter is.  Use instead of open-coded equivalents.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/llite/rw26.c |  7 ++-----
 fs/direct-io.c                             | 27 +++++----------------------
 include/linux/uio.h                        |  2 ++
 mm/iov_iter.c                              | 25 +++++++++++++++++++++++++
 4 files changed, 34 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index 38a5b580e7f0..f718585c9e08 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -391,11 +391,8 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	       MAX_DIO_SIZE >> PAGE_CACHE_SHIFT);
 
 	/* Check that all user buffers are aligned as well */
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		if (((unsigned long)iter->iov[seg].iov_base & ~CFS_PAGE_MASK) ||
-		    (iter->iov[seg].iov_len & ~CFS_PAGE_MASK))
-			return -EINVAL;
-	}
+	if (iov_iter_alignment(iter) & ~CFS_PAGE_MASK)
+		return -EINVAL;
 
 	env = cl_env_get(&refcheck);
 	LASSERT(!IS_ERR(env));
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1c677899b989..adfa1fb33456 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1112,19 +1112,18 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	dio_submit_t submit_io,	int flags)
 {
 	int seg;
-	size_t size;
-	unsigned long addr;
 	unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
 	unsigned blkbits = i_blkbits;
 	unsigned blocksize_mask = (1 << blkbits) - 1;
 	ssize_t retval = -EINVAL;
-	loff_t end = offset;
+	loff_t end = offset + iov_iter_count(iter);
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
 	unsigned long user_addr;
 	size_t bytes;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
+	unsigned long align = offset | iov_iter_alignment(iter);
 
 	if (rw & WRITE)
 		rw = WRITE_ODIRECT;
@@ -1134,32 +1133,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	 * the early prefetch in the caller enough time.
 	 */
 
-	if (offset & blocksize_mask) {
+	if (align & blocksize_mask) {
 		if (bdev)
 			blkbits = blksize_bits(bdev_logical_block_size(bdev));
 		blocksize_mask = (1 << blkbits) - 1;
-		if (offset & blocksize_mask)
+		if (align & blocksize_mask)
 			goto out;
 	}
 
-	/* Check the memory alignment.  Blocks cannot straddle pages */
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		addr = (unsigned long)iter->iov[seg].iov_base;
-		size = iter->iov[seg].iov_len;
-		end += size;
-		if (unlikely((addr & blocksize_mask) ||
-			     (size & blocksize_mask))) {
-			if (bdev)
-				blkbits = blksize_bits(
-					 bdev_logical_block_size(bdev));
-			blocksize_mask = (1 << blkbits) - 1;
-			if ((addr & blocksize_mask) || (size & blocksize_mask))
-				goto out;
-		}
-	}
-
 	/* watch out for a 0 len io from a tricksy fs */
-	if (rw == READ && end == offset)
+	if (rw == READ && !iov_iter_count(iter))
 		return 0;
 
 	dio = kmem_cache_alloc(dio_cache, GFP_KERNEL);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index abbe83ded630..4ee17413fe1b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -67,6 +67,7 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+unsigned long iov_iter_alignment(const struct iov_iter *i);
 
 static inline void iov_iter_init(struct iov_iter *i,
 			const struct iovec *iov, unsigned long nr_segs,
@@ -88,4 +89,5 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
+
 #endif
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 22ec1ef068a8..2f762cc21080 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -195,3 +195,28 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 		return min(i->count, iov->iov_len - i->iov_offset);
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	const struct iovec *iov = i->iov;
+	unsigned long res;
+	size_t size = i->count;
+	size_t n;
+
+	if (!size)
+		return 0;
+
+	res = (unsigned long)iov->iov_base + i->iov_offset;
+	n = iov->iov_len - i->iov_offset;
+	if (n >= size)
+		return res | size;
+	size -= n;
+	res |= n;
+	while (size > (++iov)->iov_len) {
+		res |= (unsigned long)iov->iov_base | iov->iov_len;
+		size -= iov->iov_len;
+	}
+	res |= (unsigned long)iov->iov_base | size;
+	return res;
+}
+EXPORT_SYMBOL(iov_iter_alignment);
-- 
cgit 


From ed978a811ec528dbe40243605c3afab55892f722 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 22:53:04 -0500
Subject: new helper: generic_file_read_iter()

iov_iter-using variant of generic_file_aio_read().  Some callers
converted.  Note that it's still not quite there for use as ->read_iter() -
we depend on having zero iter->iov_offset in O_DIRECT case.  Fortunately,
that's true for all converted callers (and for generic_file_aio_read() itself).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/file.c     | 15 +-----------
 fs/nfs/file.c      |  2 +-
 include/linux/fs.h |  1 +
 mm/filemap.c       | 67 +++++++++++++++++++++++++++---------------------------
 4 files changed, 37 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d8f383d59449..910a3022eb27 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -833,24 +833,11 @@ again:
 		/* hmm, this isn't really async... */
 		ret = ceph_sync_read(iocb, &i, &checkeof);
 	} else {
-		/*
-		 * We can't modify the content of iov,
-		 * so we only read from beginning.
-		 *
-		 * When we switch generic_file_aio_read() to iov_iter, the
-		 * if () below will be removed -- AV
-		 */
-		if (read) {
-			iocb->ki_pos = pos;
-			len = iocb->ki_nbytes;
-			read = 0;
-			iov_iter_init(&i, iov, nr_segs, len, 0);
-		}
 		dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
 		     inode, ceph_vinop(inode), pos, (unsigned)len,
 		     ceph_cap_string(got));
 
-		ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		ret = generic_file_read_iter(iocb, &i);
 	}
 	dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
 	     inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3d01b152894e..a352bc6d613f 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -184,7 +184,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 
 	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
 	if (!result) {
-		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
+		result = generic_file_read_iter(iocb, &to);
 		if (result > 0)
 			nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 946a9484844f..d096ebc7f348 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2404,6 +2404,7 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 		unsigned long size, pgoff_t pgoff);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
diff --git a/mm/filemap.c b/mm/filemap.c
index 866f4ae8223b..a7f79e90209c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1663,55 +1663,34 @@ out:
 	return written ? written : error;
 }
 
-/**
- * generic_file_aio_read - generic filesystem read routine
- * @iocb:	kernel I/O control block
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @pos:	current file position
- *
- * This is the "read()" routine for all filesystems
- * that can use the page cache directly.
- */
 ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
-	struct file *filp = iocb->ki_filp;
+	struct file *file = iocb->ki_filp;
 	ssize_t retval = 0;
-	size_t count;
 	loff_t *ppos = &iocb->ki_pos;
-	struct iov_iter i;
-
-	count = iov_length(iov, nr_segs);
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	loff_t pos = *ppos;
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
-	if (filp->f_flags & O_DIRECT) {
+	if (file->f_flags & O_DIRECT) {
+		struct address_space *mapping = file->f_mapping;
+		struct inode *inode = mapping->host;
+		size_t count = iov_iter_count(iter);
 		loff_t size;
-		struct address_space *mapping;
-		struct inode *inode;
 
-		mapping = filp->f_mapping;
-		inode = mapping->host;
 		if (!count)
 			goto out; /* skip atime */
 		size = i_size_read(inode);
 		retval = filemap_write_and_wait_range(mapping, pos,
 					pos + count - 1);
 		if (!retval) {
-			struct iov_iter data = i;
+			struct iov_iter data = *iter;
 			retval = mapping->a_ops->direct_IO(READ, iocb, &data, pos);
 		}
 
 		if (retval > 0) {
 			*ppos = pos + retval;
-			count -= retval;
-			/*
-			 * If we did a short DIO read we need to skip the
-			 * section of the iov that we've already read data into.
-			 */
-			iov_iter_advance(&i, retval);
+			iov_iter_advance(iter, retval);
 		}
 
 		/*
@@ -1722,16 +1701,38 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 		 * and return.  Otherwise fallthrough to buffered io for
 		 * the rest of the read.
 		 */
-		if (retval < 0 || !count || *ppos >= size) {
-			file_accessed(filp);
+		if (retval < 0 || !iov_iter_count(iter) || *ppos >= size) {
+			file_accessed(file);
 			goto out;
 		}
 	}
 
-	retval = do_generic_file_read(filp, ppos, &i, retval);
+	retval = do_generic_file_read(file, ppos, iter, retval);
 out:
 	return retval;
 }
+EXPORT_SYMBOL(generic_file_read_iter);
+
+/**
+ * generic_file_aio_read - generic filesystem read routine
+ * @iocb:	kernel I/O control block
+ * @iov:	io vector request
+ * @nr_segs:	number of segments in the iovec
+ * @pos:	current file position
+ *
+ * This is the "read()" routine for all filesystems
+ * that can use the page cache directly.
+ */
+ssize_t
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
+{
+	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter i;
+
+	iov_iter_init(&i, iov, nr_segs, count, 0);
+	return generic_file_read_iter(iocb, &i);
+}
 EXPORT_SYMBOL(generic_file_aio_read);
 
 #ifdef CONFIG_MMU
-- 
cgit 


From 71d8e532b1549a478e6a6a8a44f309d050294d00 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 5 Mar 2014 19:28:09 -0500
Subject: start adding the tag to iov_iter

For now, just use the same thing we pass to ->direct_IO() - it's all
iovec-based at the moment.  Pass it explicitly to iov_iter_init() and
account for kvec vs. iovec in there, by the same kludge NFS ->direct_IO()
uses.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/file.c        |  2 +-
 fs/ceph/file.c         |  8 ++++----
 fs/cifs/file.c         |  4 ++--
 fs/fuse/file.c         |  6 +++---
 fs/nfs/file.c          |  4 ++--
 fs/ocfs2/file.c        |  2 +-
 fs/pipe.c              |  2 +-
 fs/splice.c            |  2 +-
 fs/xfs/xfs_file.c      |  4 ++--
 include/linux/uio.h    | 15 +++------------
 mm/filemap.c           |  4 ++--
 mm/iov_iter.c          | 15 +++++++++++++++
 mm/page_io.c           |  2 +-
 mm/process_vm_access.c |  4 ++--
 mm/shmem.c             |  2 +-
 15 files changed, 41 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a0a94a30d85a..f8cee205618a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1740,7 +1740,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		goto out;
 	}
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	err = file_remove_suid(file);
 	if (err) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 910a3022eb27..5b93cadedfbe 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -582,7 +582,7 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		CEPH_OSD_FLAG_ONDISK |
 		CEPH_OSD_FLAG_WRITE;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	while (iov_iter_count(&i) > 0) {
 		void __user *data = i.iov->iov_base + i.iov_offset;
@@ -703,7 +703,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
 		CEPH_OSD_FLAG_WRITE |
 		CEPH_OSD_FLAG_ACK;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	while ((len = iov_iter_count(&i)) > 0) {
 		size_t left;
@@ -808,7 +808,7 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	int checkeof = 0, read = 0;
 	struct iov_iter i;
 
-	iov_iter_init(&i, iov, nr_segs, len, 0);
+	iov_iter_init(&i, READ, iov, nr_segs, len);
 
 again:
 	dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -961,7 +961,7 @@ retry_snap:
 		 * are pending vmtruncate. So write and vmtruncate
 		 * can not run at the same time
 		 */
-		iov_iter_init(&from, iov, nr_segs, count, 0);
+		iov_iter_init(&from, WRITE, iov, nr_segs, count);
 		written = generic_perform_write(file, &from, pos);
 		if (likely(written >= 0))
 			iocb->ki_pos = pos + written;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index a4ccc39e6c11..15201c21ac88 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2424,7 +2424,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
 	else
 		pid = current->tgid;
 
-	iov_iter_init(&it, iov, nr_segs, len, 0);
+	iov_iter_init(&it, WRITE, iov, nr_segs, len);
 	do {
 		size_t save_len;
 
@@ -2854,7 +2854,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
 	if (!len)
 		return 0;
 
-	iov_iter_init(&to, iov, nr_segs, len, 0);
+	iov_iter_init(&to, READ, iov, nr_segs, len);
 
 	INIT_LIST_HEAD(&rdata_list);
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index fc54d04a41e2..4a5519ca253f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1217,7 +1217,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err)
 		goto out;
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	if (count == 0)
 		goto out;
@@ -1386,7 +1386,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
 	struct fuse_req *req;
 	struct iov_iter ii;
 
-	iov_iter_init(&ii, iov, nr_segs, count, 0);
+	iov_iter_init(&ii, write ? WRITE : READ, iov, nr_segs, count);
 
 	if (io->async)
 		req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
@@ -2367,7 +2367,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
 	if (!bytes)
 		return 0;
 
-	iov_iter_init(&ii, iov, nr_segs, bytes, 0);
+	iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes);
 
 	while (iov_iter_count(&ii)) {
 		struct page *page = pages[page_idx++];
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a352bc6d613f..ead8f44f7973 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -173,7 +173,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result;
 	struct iov_iter to;
 
-	iov_iter_init(&to, iov, nr_segs, count, 0);
+	iov_iter_init(&to, READ, iov, nr_segs, count);
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
 		return nfs_file_direct_read(iocb, &to, pos, true);
@@ -648,7 +648,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 	struct iov_iter from;
-	iov_iter_init(&from, iov, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 
 	result = nfs_key_timeout_notify(file, inode);
 	if (result)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index d33c4ced0baf..9ce9ed7615c1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2361,7 +2361,7 @@ relock:
 	if (ret)
 		goto out_dio;
 
-	iov_iter_init(&from, iov, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 	if (direct_io) {
 		written = generic_file_direct_write(iocb, &from, *ppos,
 						    count, ocount);
diff --git a/fs/pipe.c b/fs/pipe.c
index 034bffac3f97..cd4ccf07e772 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -287,7 +287,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 	if (unlikely(total_len == 0))
 		return 0;
 
-	iov_iter_init(&iter, iov, nr_segs, total_len, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, total_len);
 
 	do_wakeup = 0;
 	ret = 0;
diff --git a/fs/splice.c b/fs/splice.c
index 9bc07d2b53cf..f99e420744c7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1548,7 +1548,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,
 	if (ret <= 0)
 		return ret;
 
-	iov_iter_init(&iter, iov, nr_segs, count, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, count);
 
 	sd.len = 0;
 	sd.total_len = count;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f0f8084a67be..762bb3e148a6 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -697,7 +697,7 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
 	ret = generic_file_direct_write(iocb, &from, pos, count, ocount);
 
 out:
@@ -731,7 +731,7 @@ xfs_file_buffered_aio_write(
 	if (ret)
 		goto out;
 
-	iov_iter_init(&from, iovp, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 4ee17413fe1b..b80bbe197d13 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -20,6 +20,7 @@ struct kvec {
 };
 
 struct iov_iter {
+	int type;
 	const struct iovec *iov;
 	unsigned long nr_segs;
 	size_t iov_offset;
@@ -68,18 +69,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
-
-static inline void iov_iter_init(struct iov_iter *i,
-			const struct iovec *iov, unsigned long nr_segs,
-			size_t count, size_t written)
-{
-	i->iov = iov;
-	i->nr_segs = nr_segs;
-	i->iov_offset = 0;
-	i->count = count + written;
-
-	iov_iter_advance(i, written);
-}
+void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+			unsigned long nr_segs, size_t count);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index a7f79e90209c..3aeaf2df4135 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1730,7 +1730,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	size_t count = iov_length(iov, nr_segs);
 	struct iov_iter i;
 
-	iov_iter_init(&i, iov, nr_segs, count, 0);
+	iov_iter_init(&i, READ, iov, nr_segs, count);
 	return generic_file_read_iter(iocb, &i);
 }
 EXPORT_SYMBOL(generic_file_aio_read);
@@ -2596,7 +2596,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (err)
 		goto out;
 
-	iov_iter_init(&from, iov, nr_segs, count, 0);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (unlikely(file->f_flags & O_DIRECT)) {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 2f762cc21080..e2c9a2db4350 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -220,3 +220,18 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	return res;
 }
 EXPORT_SYMBOL(iov_iter_alignment);
+
+void iov_iter_init(struct iov_iter *i, int direction,
+			const struct iovec *iov, unsigned long nr_segs,
+			size_t count)
+{
+	/* It will get better.  Eventually... */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		direction |= REQ_KERNEL;
+	i->type = direction;
+	i->iov = iov;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_init);
diff --git a/mm/page_io.c b/mm/page_io.c
index 0ed0644c73db..313bfedb75d1 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -268,7 +268,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 		kiocb.ki_nbytes = PAGE_SIZE;
-		iov_iter_init(&from, &iov, 1, PAGE_SIZE, 0);
+		iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE);
 
 		set_page_writeback(page);
 		unlock_page(page);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index f32b1fbbfe69..5077afcd9e11 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -274,7 +274,7 @@ static ssize_t process_vm_rw(pid_t pid,
 	if (rc <= 0)
 		goto free_iovecs;
 
-	iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 
 	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
 				   iovstack_r, &iov_r);
@@ -337,7 +337,7 @@ compat_process_vm_rw(compat_pid_t pid,
 						  &iov_l);
 	if (rc <= 0)
 		goto free_iovecs;
-	iov_iter_init(&iter, iov_l, liovcnt, rc, 0);
+	iov_iter_init(&iter, vm_write ? WRITE : READ, iov_l, liovcnt, rc);
 	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
 					  UIO_FASTIOV, iovstack_r,
 					  &iov_r);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2a93e625adaf..e0b76696c3f9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1417,7 +1417,7 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb,
 	loff_t *ppos = &iocb->ki_pos;
 	struct iov_iter iter;
 
-	iov_iter_init(&iter, iov, nr_segs, count, 0);
+	iov_iter_init(&iter, READ, iov, nr_segs, count);
 
 	/*
 	 * Might this read be for a stacking filesystem?  Then when reading
-- 
cgit 


From 7b2c99d15559e285384c742db52316802e24b0bd Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 15 Mar 2014 04:05:57 -0400
Subject: new helper: iov_iter_get_pages()

iov_iter_get_pages(iter, pages, maxsize, &start) grabs references pinning
the pages of up to maxsize of (contiguous) data from iter.  Returns the
amount of memory grabbed or -error.  In case of success, the requested
area begins at offset start in pages[0] and runs through pages[1], etc.
Less than requested amount might be returned - either because the contiguous
area in the beginning of iterator is smaller than requested, or because
the kernel failed to pin that many pages.

direct-io.c switched to using iov_iter_get_pages()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      | 111 ++++++++++++++++++----------------------------------
 include/linux/uio.h |   2 +
 mm/iov_iter.c       |  27 +++++++++++++
 3 files changed, 67 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 387d91989c45..4b410d58faae 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -77,7 +77,6 @@ struct dio_submit {
 	unsigned blocks_available;	/* At block_in_file.  changes */
 	int reap_counter;		/* rate limit reaping */
 	sector_t final_block_in_request;/* doesn't change */
-	unsigned first_block_in_page;	/* doesn't change, Used only once */
 	int boundary;			/* prev block is at a boundary */
 	get_block_t *get_block;		/* block mapping function */
 	dio_submit_t *submit_io;	/* IO submition function */
@@ -98,19 +97,14 @@ struct dio_submit {
 	sector_t cur_page_block;	/* Where it starts */
 	loff_t cur_page_fs_offset;	/* Offset in file */
 
-	/*
-	 * Page fetching state. These variables belong to dio_refill_pages().
-	 */
-	int curr_page;			/* changes */
-	int total_pages;		/* doesn't change */
-	unsigned long curr_user_address;/* changes */
-
+	struct iov_iter *iter;
 	/*
 	 * Page queue.  These variables belong to dio_refill_pages() and
 	 * dio_get_page().
 	 */
 	unsigned head;			/* next page to process */
 	unsigned tail;			/* last valid page + 1 */
+	size_t from, to;
 };
 
 /* dio_state communicated between submission path and end_io */
@@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio)
  */
 static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 {
-	int ret;
-	int nr_pages;
+	ssize_t ret;
 
-	nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES);
-	ret = get_user_pages_fast(
-		sdio->curr_user_address,		/* Where from? */
-		nr_pages,			/* How many pages? */
-		dio->rw == READ,		/* Write to memory? */
-		&dio->pages[0]);		/* Put results here */
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE,
+				&sdio->from);
 
 	if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) {
 		struct page *page = ZERO_PAGE(0);
@@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 		dio->pages[0] = page;
 		sdio->head = 0;
 		sdio->tail = 1;
-		ret = 0;
-		goto out;
+		sdio->from = 0;
+		sdio->to = PAGE_SIZE;
+		return 0;
 	}
 
 	if (ret >= 0) {
-		sdio->curr_user_address += ret * PAGE_SIZE;
-		sdio->curr_page += ret;
+		iov_iter_advance(sdio->iter, ret);
+		ret += sdio->from;
 		sdio->head = 0;
-		sdio->tail = ret;
-		ret = 0;
+		sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE;
+		sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1;
+		return 0;
 	}
-out:
 	return ret;	
 }
 
@@ -208,8 +198,9 @@ out:
  * L1 cache.
  */
 static inline struct page *dio_get_page(struct dio *dio,
-		struct dio_submit *sdio)
+		struct dio_submit *sdio, size_t *from, size_t *to)
 {
+	int n;
 	if (dio_pages_present(sdio) == 0) {
 		int ret;
 
@@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio,
 			return ERR_PTR(ret);
 		BUG_ON(dio_pages_present(sdio) == 0);
 	}
-	return dio->pages[sdio->head++];
+	n = sdio->head++;
+	*from = n ? 0 : sdio->from;
+	*to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE;
+	return dio->pages[n];
 }
 
 /**
@@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
  */
 static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
 {
-	while (dio_pages_present(sdio))
-		page_cache_release(dio_get_page(dio, sdio));
+	while (sdio->head < sdio->tail)
+		page_cache_release(dio->pages[sdio->head++]);
 }
 
 /*
@@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
 			struct buffer_head *map_bh)
 {
 	const unsigned blkbits = sdio->blkbits;
-	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
-	struct page *page;
-	unsigned block_in_page;
 	int ret = 0;
 
-	/* The I/O can start at any block offset within the first page */
-	block_in_page = sdio->first_block_in_page;
-
 	while (sdio->block_in_file < sdio->final_block_in_request) {
-		page = dio_get_page(dio, sdio);
+		struct page *page;
+		size_t from, to;
+		page = dio_get_page(dio, sdio, &from, &to);
 		if (IS_ERR(page)) {
 			ret = PTR_ERR(page);
 			goto out;
 		}
 
-		while (block_in_page < blocks_per_page) {
-			unsigned offset_in_page = block_in_page << blkbits;
+		while (from < to) {
 			unsigned this_chunk_bytes;	/* # of bytes mapped */
 			unsigned this_chunk_blocks;	/* # of blocks */
 			unsigned u;
@@ -999,10 +988,9 @@ do_holes:
 					page_cache_release(page);
 					goto out;
 				}
-				zero_user(page, block_in_page << blkbits,
-						1 << blkbits);
+				zero_user(page, from, 1 << blkbits);
 				sdio->block_in_file++;
-				block_in_page++;
+				from += 1 << blkbits;
 				dio->result += 1 << blkbits;
 				goto next_block;
 			}
@@ -1020,7 +1008,7 @@ do_holes:
 			 * can add to this page
 			 */
 			this_chunk_blocks = sdio->blocks_available;
-			u = (PAGE_SIZE - offset_in_page) >> blkbits;
+			u = (to - from) >> blkbits;
 			if (this_chunk_blocks > u)
 				this_chunk_blocks = u;
 			u = sdio->final_block_in_request - sdio->block_in_file;
@@ -1032,7 +1020,7 @@ do_holes:
 			if (this_chunk_blocks == sdio->blocks_available)
 				sdio->boundary = buffer_boundary(map_bh);
 			ret = submit_page_section(dio, sdio, page,
-						  offset_in_page,
+						  from,
 						  this_chunk_bytes,
 						  sdio->next_block_for_io,
 						  map_bh);
@@ -1043,9 +1031,9 @@ do_holes:
 			sdio->next_block_for_io += this_chunk_blocks;
 
 			sdio->block_in_file += this_chunk_blocks;
-			block_in_page += this_chunk_blocks;
+			from += this_chunk_bytes;
+			dio->result += this_chunk_bytes;
 			sdio->blocks_available -= this_chunk_blocks;
-			dio->result += this_chunk_blocks << blkbits;
 next_block:
 			BUG_ON(sdio->block_in_file > sdio->final_block_in_request);
 			if (sdio->block_in_file == sdio->final_block_in_request)
@@ -1054,7 +1042,6 @@ next_block:
 
 		/* Drop the ref which was taken in get_user_pages() */
 		page_cache_release(page);
-		block_in_page = 0;
 	}
 out:
 	return ret;
@@ -1122,7 +1109,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
 	unsigned long user_addr;
-	size_t bytes;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
 	unsigned long align = offset | iov_iter_alignment(iter);
@@ -1234,6 +1220,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
+	sdio.iter = iter;
+	sdio.final_block_in_request =
+		(offset + iov_iter_count(iter)) >> blkbits;
+
 	/*
 	 * In case of non-aligned buffers, we may need 2 more
 	 * pages since we need to zero out first and last block.
@@ -1250,34 +1240,9 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 	blk_start_plug(&plug);
 
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		user_addr = (unsigned long)iter->iov[seg].iov_base;
-		sdio.size += bytes = iter->iov[seg].iov_len;
-
-		/* Index into the first page of the first block */
-		sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits;
-		sdio.final_block_in_request = sdio.block_in_file +
-						(bytes >> blkbits);
-		/* Page fetching state */
-		sdio.head = 0;
-		sdio.tail = 0;
-		sdio.curr_page = 0;
-
-		sdio.total_pages = 0;
-		if (user_addr & (PAGE_SIZE-1)) {
-			sdio.total_pages++;
-			bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1));
-		}
-		sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
-		sdio.curr_user_address = user_addr;
-
-		retval = do_direct_IO(dio, &sdio, &map_bh);
-
-		if (retval) {
-			dio_cleanup(dio, &sdio);
-			break;
-		}
-	} /* end iovec loop */
+	retval = do_direct_IO(dio, &sdio, &map_bh);
+	if (retval)
+		dio_cleanup(dio, &sdio);
 
 	if (retval == -ENOTBLK) {
 		/*
diff --git a/include/linux/uio.h b/include/linux/uio.h
index b80bbe197d13..341986116d83 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -71,6 +71,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+			size_t maxsize, size_t *start);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index e2c9a2db4350..45204cd5ccd8 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -235,3 +235,30 @@ void iov_iter_init(struct iov_iter *i, int direction,
 	i->count = count;
 }
 EXPORT_SYMBOL(iov_iter_init);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	size_t offset = i->iov_offset;
+	const struct iovec *iov = i->iov;
+	size_t len;
+	unsigned long addr;
+	int n;
+	int res;
+
+	len = iov->iov_len - offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	addr = (unsigned long)iov->iov_base + offset;
+	len += *start = addr & (PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+	if (unlikely(res < 0))
+		return res;
+	return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
-- 
cgit 


From f67da30c1d5fc9e341bc8121708874bfd7b31e45 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 19 Mar 2014 01:16:16 -0400
Subject: new helper: iov_iter_npages()

counts the pages covered by iov_iter, up to given limit.
do_block_direct_io() and fuse_iter_npages() switched to
it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/direct-io.c      |  9 +--------
 fs/fuse/file.c      | 16 ++--------------
 include/linux/uio.h |  1 +
 mm/iov_iter.c       | 27 +++++++++++++++++++++++++++
 4 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4b410d58faae..98040ba388ac 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1100,7 +1100,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	get_block_t get_block, dio_iodone_t end_io,
 	dio_submit_t submit_io,	int flags)
 {
-	int seg;
 	unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits);
 	unsigned blkbits = i_blkbits;
 	unsigned blocksize_mask = (1 << blkbits) - 1;
@@ -1108,7 +1107,6 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	loff_t end = offset + iov_iter_count(iter);
 	struct dio *dio;
 	struct dio_submit sdio = { 0, };
-	unsigned long user_addr;
 	struct buffer_head map_bh = { 0, };
 	struct blk_plug plug;
 	unsigned long align = offset | iov_iter_alignment(iter);
@@ -1231,12 +1229,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	if (unlikely(sdio.blkfactor))
 		sdio.pages_in_io = 2;
 
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		user_addr = (unsigned long)iter->iov[seg].iov_base;
-		sdio.pages_in_io +=
-			((user_addr + iter->iov[seg].iov_len + PAGE_SIZE-1) /
-				PAGE_SIZE - user_addr / PAGE_SIZE);
-	}
+	sdio.pages_in_io += iov_iter_npages(iter, INT_MAX);
 
 	blk_start_plug(&plug);
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7db564d18dc6..7026014717bc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1310,7 +1310,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 
 	while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
 		unsigned npages;
-		size_t start, end, frag_size;
+		size_t start;
 		unsigned n = req->max_pages - req->num_pages;
 		ssize_t ret = iov_iter_get_pages(ii,
 					&req->pages[req->num_pages],
@@ -1344,19 +1344,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 
 static inline int fuse_iter_npages(const struct iov_iter *ii_p)
 {
-	struct iov_iter ii = *ii_p;
-	int npages = 0;
-
-	while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) {
-		unsigned long user_addr = fuse_get_user_addr(&ii);
-		unsigned offset = user_addr & ~PAGE_MASK;
-		size_t frag_size = iov_iter_single_seg_count(&ii);
-
-		npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		iov_iter_advance(&ii, frag_size);
-	}
-
-	return min(npages, FUSE_MAX_PAGES_PER_REQ);
+	return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ);
 }
 
 ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 341986116d83..2f8825b06680 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -73,6 +73,7 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, size_t *start);
+int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 45204cd5ccd8..0b677f8f9bad 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -262,3 +262,30 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	const struct iovec *iov = i->iov;
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, iov++) {
+		unsigned long addr = (unsigned long)iov->iov_base + offset;
+		size_t len = iov->iov_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages += (addr + len + PAGE_SIZE - 1) / PAGE_SIZE
+			  - addr / PAGE_SIZE;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+EXPORT_SYMBOL(iov_iter_npages);
-- 
cgit 


From 91f79c43d1b54d7154b118860d81b39bad07dfff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Mar 2014 04:58:33 -0400
Subject: new helper: iov_iter_get_pages_alloc()

same as iov_iter_get_pages(), except that pages array is allocated
(kmalloc if possible, vmalloc if that fails) and left for caller to
free.  Lustre and NFS ->direct_IO() switched to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/llite/rw26.c |  92 ++++-----
 fs/nfs/direct.c                            | 290 +++++++++--------------------
 include/linux/uio.h                        |   2 +
 mm/iov_iter.c                              |  40 ++++
 4 files changed, 167 insertions(+), 257 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c
index f718585c9e08..6b5994577b6b 100644
--- a/drivers/staging/lustre/lustre/llite/rw26.c
+++ b/drivers/staging/lustre/lustre/llite/rw26.c
@@ -218,14 +218,11 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)
 	int i;
 
 	for (i = 0; i < npages; i++) {
-		if (pages[i] == NULL)
-			break;
 		if (do_dirty)
 			set_page_dirty_lock(pages[i]);
 		page_cache_release(pages[i]);
 	}
-
-	OBD_FREE_LARGE(pages, npages * sizeof(*pages));
+	kvfree(pages);
 }
 
 ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
@@ -370,10 +367,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	struct ccc_object *obj = cl_inode2ccc(inode);
-	long count = iov_iter_count(iter);
-	long tot_bytes = 0, result = 0;
+	ssize_t count = iov_iter_count(iter);
+	ssize_t tot_bytes = 0, result = 0;
 	struct ll_inode_info *lli = ll_i2info(inode);
-	unsigned long seg = 0;
 	long size = MAX_DIO_SIZE;
 	int refcheck;
 
@@ -407,63 +403,49 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,
 		mutex_lock(&inode->i_mutex);
 
 	LASSERT(obj->cob_transient_pages == 0);
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		long iov_left = iter->iov[seg].iov_len;
-		unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base;
+	while (iov_iter_count(iter)) {
+		struct page **pages;
+		size_t offs;
 
+		count = min_t(size_t, iov_iter_count(iter), size);
 		if (rw == READ) {
 			if (file_offset >= i_size_read(inode))
 				break;
-			if (file_offset + iov_left > i_size_read(inode))
-				iov_left = i_size_read(inode) - file_offset;
+			if (file_offset + count > i_size_read(inode))
+				count = i_size_read(inode) - file_offset;
 		}
 
-		while (iov_left > 0) {
-			struct page **pages;
-			int page_count, max_pages = 0;
-			long bytes;
-
-			bytes = min(size, iov_left);
-			page_count = ll_get_user_pages(rw, user_addr, bytes,
-						       &pages, &max_pages);
-			if (likely(page_count > 0)) {
-				if (unlikely(page_count <  max_pages))
-					bytes = page_count << PAGE_CACHE_SHIFT;
-				result = ll_direct_IO_26_seg(env, io, rw, inode,
-							     file->f_mapping,
-							     bytes, file_offset,
-							     pages, page_count);
-				ll_free_user_pages(pages, max_pages, rw==READ);
-			} else if (page_count == 0) {
-				GOTO(out, result = -EFAULT);
-			} else {
-				result = page_count;
-			}
-			if (unlikely(result <= 0)) {
-				/* If we can't allocate a large enough buffer
-				 * for the request, shrink it to a smaller
-				 * PAGE_SIZE multiple and try again.
-				 * We should always be able to kmalloc for a
-				 * page worth of page pointers = 4MB on i386. */
-				if (result == -ENOMEM &&
-				    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
-					   PAGE_CACHE_SIZE) {
-					size = ((((size / 2) - 1) |
-						 ~CFS_PAGE_MASK) + 1) &
-						CFS_PAGE_MASK;
-					CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
-					       size);
-					continue;
-				}
-
-				GOTO(out, result);
+		result = iov_iter_get_pages_alloc(iter, &pages, count, &offs);
+		if (likely(result > 0)) {
+			int n = (result + offs + PAGE_SIZE - 1) / PAGE_SIZE;
+			result = ll_direct_IO_26_seg(env, io, rw, inode,
+						     file->f_mapping,
+						     result, file_offset,
+						     pages, n);
+			ll_free_user_pages(pages, n, rw==READ);
+		}
+		if (unlikely(result <= 0)) {
+			/* If we can't allocate a large enough buffer
+			 * for the request, shrink it to a smaller
+			 * PAGE_SIZE multiple and try again.
+			 * We should always be able to kmalloc for a
+			 * page worth of page pointers = 4MB on i386. */
+			if (result == -ENOMEM &&
+			    size > (PAGE_CACHE_SIZE / sizeof(*pages)) *
+				   PAGE_CACHE_SIZE) {
+				size = ((((size / 2) - 1) |
+					 ~CFS_PAGE_MASK) + 1) &
+					CFS_PAGE_MASK;
+				CDEBUG(D_VFSTRACE,"DIO size now %lu\n",
+				       size);
+				continue;
 			}
 
-			tot_bytes += result;
-			file_offset += result;
-			iov_left -= result;
-			user_addr += result;
+			GOTO(out, result);
 		}
+		iov_iter_advance(iter, result);
+		tot_bytes += result;
+		file_offset += result;
 	}
 out:
 	LASSERT(obj->cob_transient_pages == 0);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1d34f454989e..b122fe21fea0 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -322,60 +322,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
  * handled automatically by nfs_direct_read_result().  Otherwise, if
  * no requests have been sent, just return an error.
  */
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
-						const struct iovec *iov,
-						loff_t pos, bool uio)
-{
-	struct nfs_direct_req *dreq = desc->pg_dreq;
-	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
-	unsigned long user_addr = (unsigned long)iov->iov_base;
-	size_t count = iov->iov_len;
-	size_t rsize = NFS_SERVER(inode)->rsize;
-	unsigned int pgbase;
-	int result;
-	ssize_t started = 0;
-	struct page **pagevec = NULL;
-	unsigned int npages;
-
-	do {
-		size_t bytes;
-		int i;
 
-		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+					      struct iov_iter *iter,
+					      loff_t pos)
+{
+	struct nfs_pageio_descriptor desc;
+	struct inode *inode = dreq->inode;
+	ssize_t result = -EINVAL;
+	size_t requested_bytes = 0;
+	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
 
-		result = -ENOMEM;
-		npages = nfs_page_array_len(pgbase, bytes);
-		if (!pagevec)
-			pagevec = kmalloc(npages * sizeof(struct page *),
-					  GFP_KERNEL);
-		if (!pagevec)
-			break;
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-					npages, 1, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 1, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
+	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
+			     &nfs_direct_read_completion_ops);
+	get_dreq(dreq);
+	desc.pg_dreq = dreq;
+	atomic_inc(&inode->i_dio_count);
 
-		if ((unsigned)result < npages) {
-			bytes = result * PAGE_SIZE;
-			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pagevec, result);
-				break;
-			}
-			bytes -= pgbase;
-			npages = result;
-		}
+	while (iov_iter_count(iter)) {
+		struct page **pagevec;
+		size_t bytes;
+		size_t pgbase;
+		unsigned npages, i;
 
+		result = iov_iter_get_pages_alloc(iter, &pagevec, 
+						  rsize, &pgbase);
+		if (result < 0)
+			break;
+	
+		bytes = result;
+		iov_iter_advance(iter, bytes);
+		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
 		for (i = 0; i < npages; i++) {
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
@@ -389,55 +366,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 			}
 			req->wb_index = pos >> PAGE_SHIFT;
 			req->wb_offset = pos & ~PAGE_MASK;
-			if (!nfs_pageio_add_request(desc, req)) {
-				result = desc->pg_error;
+			if (!nfs_pageio_add_request(&desc, req)) {
+				result = desc.pg_error;
 				nfs_release_request(req);
 				break;
 			}
 			pgbase = 0;
 			bytes -= req_len;
-			started += req_len;
-			user_addr += req_len;
+			requested_bytes += req_len;
 			pos += req_len;
-			count -= req_len;
 			dreq->bytes_left -= req_len;
 		}
-		/* The nfs_page now hold references to these pages */
 		nfs_direct_release_pages(pagevec, npages);
-	} while (count != 0 && result >= 0);
-
-	kfree(pagevec);
-
-	if (started)
-		return started;
-	return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
-					      struct iov_iter *iter,
-					      loff_t pos, bool uio)
-{
-	struct nfs_pageio_descriptor desc;
-	struct inode *inode = dreq->inode;
-	ssize_t result = -EINVAL;
-	size_t requested_bytes = 0;
-	unsigned long seg;
-
-	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
-			     &nfs_direct_read_completion_ops);
-	get_dreq(dreq);
-	desc.pg_dreq = dreq;
-	atomic_inc(&inode->i_dio_count);
-
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		const struct iovec *vec = &iter->iov[seg];
-		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+		kvfree(pagevec);
 		if (result < 0)
 			break;
-		requested_bytes += result;
-		if ((size_t)result < vec->iov_len)
-			break;
-		pos += vec->iov_len;
 	}
 
 	nfs_pageio_complete(&desc);
@@ -521,7 +464,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 		dreq->iocb = iocb;
 
 	NFS_I(inode)->read_io += count;
-	result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio);
+	result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -677,109 +620,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 }
 #endif
 
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes.  Write length accounting is
- * handled automatically by nfs_direct_write_result().  Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
-						 const struct iovec *iov,
-						 loff_t pos, bool uio)
-{
-	struct nfs_direct_req *dreq = desc->pg_dreq;
-	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
-	unsigned long user_addr = (unsigned long)iov->iov_base;
-	size_t count = iov->iov_len;
-	size_t wsize = NFS_SERVER(inode)->wsize;
-	unsigned int pgbase;
-	int result;
-	ssize_t started = 0;
-	struct page **pagevec = NULL;
-	unsigned int npages;
-
-	do {
-		size_t bytes;
-		int i;
-
-		pgbase = user_addr & ~PAGE_MASK;
-		bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
-		result = -ENOMEM;
-		npages = nfs_page_array_len(pgbase, bytes);
-		if (!pagevec)
-			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
-		if (!pagevec)
-			break;
-
-		if (uio) {
-			down_read(&current->mm->mmap_sem);
-			result = get_user_pages(current, current->mm, user_addr,
-						npages, 0, 0, pagevec, NULL);
-			up_read(&current->mm->mmap_sem);
-			if (result < 0)
-				break;
-		} else {
-			WARN_ON(npages != 1);
-			result = get_kernel_page(user_addr, 0, pagevec);
-			if (WARN_ON(result != 1))
-				break;
-		}
-
-		if ((unsigned)result < npages) {
-			bytes = result * PAGE_SIZE;
-			if (bytes <= pgbase) {
-				nfs_direct_release_pages(pagevec, result);
-				break;
-			}
-			bytes -= pgbase;
-			npages = result;
-		}
-
-		for (i = 0; i < npages; i++) {
-			struct nfs_page *req;
-			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
-						 pgbase, req_len);
-			if (IS_ERR(req)) {
-				result = PTR_ERR(req);
-				break;
-			}
-			nfs_lock_request(req);
-			req->wb_index = pos >> PAGE_SHIFT;
-			req->wb_offset = pos & ~PAGE_MASK;
-			if (!nfs_pageio_add_request(desc, req)) {
-				result = desc->pg_error;
-				nfs_unlock_and_release_request(req);
-				break;
-			}
-			pgbase = 0;
-			bytes -= req_len;
-			started += req_len;
-			user_addr += req_len;
-			pos += req_len;
-			count -= req_len;
-			dreq->bytes_left -= req_len;
-		}
-		/* The nfs_page now hold references to these pages */
-		nfs_direct_release_pages(pagevec, npages);
-	} while (count != 0 && result >= 0);
-
-	kfree(pagevec);
-
-	if (started)
-		return started;
-	return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
 static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
@@ -859,15 +699,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
 	.completion = nfs_direct_write_completion,
 };
 
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes.  Write length accounting is
+ * handled automatically by nfs_direct_write_result().  Otherwise, if
+ * no requests have been sent, just return an error.
+ */
 static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 					       struct iov_iter *iter,
-					       loff_t pos, bool uio)
+					       loff_t pos)
 {
 	struct nfs_pageio_descriptor desc;
 	struct inode *inode = dreq->inode;
 	ssize_t result = 0;
 	size_t requested_bytes = 0;
-	unsigned long seg;
+	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
 
 	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
 			      &nfs_direct_write_completion_ops);
@@ -875,16 +727,50 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	get_dreq(dreq);
 	atomic_inc(&inode->i_dio_count);
 
-	NFS_I(dreq->inode)->write_io += iov_iter_count(iter);
-	for (seg = 0; seg < iter->nr_segs; seg++) {
-		const struct iovec *vec = &iter->iov[seg];
-		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+	NFS_I(inode)->write_io += iov_iter_count(iter);
+	while (iov_iter_count(iter)) {
+		struct page **pagevec;
+		size_t bytes;
+		size_t pgbase;
+		unsigned npages, i;
+
+		result = iov_iter_get_pages_alloc(iter, &pagevec, 
+						  wsize, &pgbase);
 		if (result < 0)
 			break;
-		requested_bytes += result;
-		if ((size_t)result < vec->iov_len)
+
+		bytes = result;
+		iov_iter_advance(iter, bytes);
+		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+		for (i = 0; i < npages; i++) {
+			struct nfs_page *req;
+			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+			req = nfs_create_request(dreq->ctx, inode,
+						 pagevec[i],
+						 pgbase, req_len);
+			if (IS_ERR(req)) {
+				result = PTR_ERR(req);
+				break;
+			}
+			nfs_lock_request(req);
+			req->wb_index = pos >> PAGE_SHIFT;
+			req->wb_offset = pos & ~PAGE_MASK;
+			if (!nfs_pageio_add_request(&desc, req)) {
+				result = desc.pg_error;
+				nfs_unlock_and_release_request(req);
+				break;
+			}
+			pgbase = 0;
+			bytes -= req_len;
+			requested_bytes += req_len;
+			pos += req_len;
+			dreq->bytes_left -= req_len;
+		}
+		nfs_direct_release_pages(pagevec, npages);
+		kvfree(pagevec);
+		if (result < 0)
 			break;
-		pos += vec->iov_len;
 	}
 	nfs_pageio_complete(&desc);
 
@@ -985,7 +871,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio);
+	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
 
 	if (mapping->nrpages) {
 		invalidate_inode_pages2_range(mapping,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2f8825b06680..4876e9f2a58f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -73,6 +73,8 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, size_t *start);
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
+			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
 static inline size_t iov_iter_count(struct iov_iter *i)
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 0b677f8f9bad..a5c691c1a283 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -1,6 +1,8 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
@@ -263,6 +265,44 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages);
 
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	size_t offset = i->iov_offset;
+	const struct iovec *iov = i->iov;
+	size_t len;
+	unsigned long addr;
+	void *p;
+	int n;
+	int res;
+
+	len = iov->iov_len - offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	addr = (unsigned long)iov->iov_base + offset;
+	len += *start = addr & (PAGE_SIZE - 1);
+	addr &= ~(PAGE_SIZE - 1);
+	n = (len + PAGE_SIZE - 1) / PAGE_SIZE;
+	
+	p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
+	if (!p)
+		p = vmalloc(n * sizeof(struct page *));
+	if (!p)
+		return -ENOMEM;
+
+	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+	if (unlikely(res < 0)) {
+		kvfree(p);
+		return res;
+	}
+	*pages = p;
+	return (res == n ? len : res * PAGE_SIZE) - *start;
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
 int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
 	size_t offset = i->iov_offset;
-- 
cgit 


From 0c949334a9e2581646c6ff0d1470a805b1e5be99 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 22 Mar 2014 06:51:37 -0400
Subject: iov_iter_truncate()

Now It Can Be Done(tm) - we don't need to do iov_shorten() in
generic_file_direct_write() anymore, now that all ->direct_IO()
instances are converted to proper iov_iter methods and honour
iter->count and iter->iov_offset properly.

Get rid of count/ocount arguments of generic_file_direct_write(),
while we are at it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/file.c     | 17 ++++++++---------
 fs/fuse/file.c      | 18 ++++++++----------
 fs/ocfs2/file.c     | 10 +++++-----
 fs/xfs/xfs_file.c   |  9 +++++----
 include/linux/fs.h  |  3 +--
 include/linux/uio.h |  6 ++++++
 mm/filemap.c        | 19 +++++++------------
 7 files changed, 40 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f8cee205618a..ea63a51c148c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1659,8 +1659,7 @@ again:
 
 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 				    struct iov_iter *from,
-				    loff_t pos,
-				    size_t count, size_t ocount)
+				    loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	ssize_t written;
@@ -1668,9 +1667,9 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	loff_t endbyte;
 	int err;
 
-	written = generic_file_direct_write(iocb, from, pos, count, ocount);
+	written = generic_file_direct_write(iocb, from, pos);
 
-	if (written < 0 || written == count)
+	if (written < 0 || !iov_iter_count(from))
 		return written;
 
 	pos += written;
@@ -1720,13 +1719,14 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	u64 end_pos;
 	ssize_t num_written = 0;
 	ssize_t err = 0;
-	size_t count, ocount;
+	size_t count;
 	bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
 	struct iov_iter i;
 
 	mutex_lock(&inode->i_mutex);
 
-	count = ocount = iov_length(iov, nr_segs);
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	current->backing_dev_info = inode->i_mapping->backing_dev_info;
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
@@ -1740,7 +1740,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		goto out;
 	}
 
-	iov_iter_init(&i, WRITE, iov, nr_segs, count);
+	iov_iter_truncate(&i, count);
 
 	err = file_remove_suid(file);
 	if (err) {
@@ -1783,8 +1783,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		atomic_inc(&BTRFS_I(inode)->sync_writers);
 
 	if (unlikely(file->f_flags & O_DIRECT)) {
-		num_written = __btrfs_direct_write(iocb, &i,
-						   pos, count, ocount);
+		num_written = __btrfs_direct_write(iocb, &i, pos);
 	} else {
 		num_written = __btrfs_buffered_write(file, &i, pos);
 		if (num_written > 0)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7026014717bc..66d2d5de19d2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1188,8 +1188,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
-	size_t count = 0;
-	size_t ocount = 0;
+	size_t count;
 	ssize_t written = 0;
 	ssize_t written_buffered = 0;
 	struct inode *inode = mapping->host;
@@ -1208,7 +1207,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
 	WARN_ON(iocb->ki_pos != pos);
 
-	count = ocount = iov_length(iov, nr_segs);
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 	mutex_lock(&inode->i_mutex);
 
 	/* We can write back this queue in page reclaim */
@@ -1217,11 +1217,11 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
 	if (err)
 		goto out;
-	iov_iter_init(&i, WRITE, iov, nr_segs, count);
 
 	if (count == 0)
 		goto out;
 
+	iov_iter_truncate(&i, count);
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -1231,8 +1231,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 
 	if (file->f_flags & O_DIRECT) {
-		written = generic_file_direct_write(iocb, &i, pos, count, ocount);
-		if (written < 0 || written == count)
+		written = generic_file_direct_write(iocb, &i, pos);
+		if (written < 0 || !iov_iter_count(&i))
 			goto out;
 
 		pos += written;
@@ -1469,8 +1469,7 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io,
 
 	res = generic_write_checks(file, ppos, &count, 0);
 	if (!res) {
-		if (iter->count > count)
-			iter->count = count;
+		iov_iter_truncate(iter, count);
 		res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE);
 	}
 
@@ -2896,8 +2895,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
 		if (offset >= i_size)
 			return 0;
 		count = min_t(loff_t, count, fuse_round_up(i_size - offset));
-		if (iter->count > count)
-			iter->count = count;
+		iov_iter_truncate(iter, count);
 	}
 
 	io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9ce9ed7615c1..06b6a16d9776 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2241,7 +2241,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	int ret, direct_io, appending, rw_level, have_alloc_sem  = 0;
 	int can_do_direct, has_refcount = 0;
 	ssize_t written = 0;
-	size_t ocount;		/* original count */
 	size_t count;		/* after file limit checks */
 	loff_t old_size, *ppos = &iocb->ki_pos;
 	u32 old_clusters;
@@ -2253,6 +2252,9 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 	int unaligned_dio = 0;
 	struct iov_iter from;
 
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+
 	trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry,
 		(unsigned long long)OCFS2_I(inode)->ip_blkno,
 		file->f_path.dentry->d_name.len,
@@ -2355,16 +2357,14 @@ relock:
 	/* communicate with ocfs2_dio_end_io */
 	ocfs2_iocb_set_rw_locked(iocb, rw_level);
 
-	count = ocount = iov_length(iov, nr_segs);
 	ret = generic_write_checks(file, ppos, &count,
 				   S_ISBLK(inode->i_mode));
 	if (ret)
 		goto out_dio;
 
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	iov_iter_truncate(&from, count);
 	if (direct_io) {
-		written = generic_file_direct_write(iocb, &from, *ppos,
-						    count, ocount);
+		written = generic_file_direct_write(iocb, &from, *ppos);
 		if (written < 0) {
 			ret = written;
 			goto out_dio;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 762bb3e148a6..c997aa2751b2 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -626,7 +626,7 @@ xfs_file_dio_aio_write(
 	const struct iovec	*iovp,
 	unsigned long		nr_segs,
 	loff_t			pos,
-	size_t			ocount)
+	size_t			count)
 {
 	struct file		*file = iocb->ki_filp;
 	struct address_space	*mapping = file->f_mapping;
@@ -634,7 +634,6 @@ xfs_file_dio_aio_write(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct xfs_mount	*mp = ip->i_mount;
 	ssize_t			ret = 0;
-	size_t			count = ocount;
 	int			unaligned_io = 0;
 	int			iolock;
 	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
@@ -645,6 +644,8 @@ xfs_file_dio_aio_write(
 	if ((pos | count) & target->bt_logical_sectormask)
 		return -XFS_ERROR(EINVAL);
 
+	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
+
 	/* "unaligned" here means not aligned to a filesystem block */
 	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
 		unaligned_io = 1;
@@ -676,6 +677,7 @@ xfs_file_dio_aio_write(
 	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
 	if (ret)
 		goto out;
+	iov_iter_truncate(&from, count);
 
 	if (mapping->nrpages) {
 		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
@@ -697,8 +699,7 @@ xfs_file_dio_aio_write(
 	}
 
 	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
-	iov_iter_init(&from, WRITE, iovp, nr_segs, count);
-	ret = generic_file_direct_write(iocb, &from, pos, count, ocount);
+	ret = generic_file_direct_write(iocb, &from, pos);
 
 out:
 	xfs_rw_iunlock(ip, iolock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d096ebc7f348..8153396d19b4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2407,8 +2407,7 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *,
-		loff_t, size_t, size_t);
+extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 4876e9f2a58f..532f59d0adbb 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -82,6 +82,12 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
+static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
+{
+	if (i->count > count)
+		i->count = count;
+}
+
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 3aeaf2df4135..c0404b763a17 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2345,8 +2345,7 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
 EXPORT_SYMBOL(pagecache_write_end);
 
 ssize_t
-generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
-		loff_t pos, size_t count, size_t ocount)
+generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 {
 	struct file	*file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -2356,10 +2355,7 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	pgoff_t		end;
 	struct iov_iter data;
 
-	if (count != ocount)
-		from->nr_segs = iov_shorten((struct iovec *)from->iov, from->nr_segs, count);
-
-	write_len = iov_length(from->iov, from->nr_segs);
+	write_len = iov_iter_count(from);
 	end = (pos + write_len - 1) >> PAGE_CACHE_SHIFT;
 
 	written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 1);
@@ -2568,7 +2564,6 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
-	size_t ocount;		/* original count */
 	size_t count;		/* after file limit checks */
 	struct inode 	*inode = mapping->host;
 	loff_t		pos = iocb->ki_pos;
@@ -2577,7 +2572,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t		status;
 	struct iov_iter from;
 
-	count = ocount = iov_length(iov, nr_segs);
+	count = iov_length(iov, nr_segs);
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
@@ -2588,6 +2584,8 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
+	iov_iter_truncate(&from, count);
+
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
@@ -2596,14 +2594,11 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (err)
 		goto out;
 
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
-
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, &from, pos,
-							count, ocount);
+		written = generic_file_direct_write(iocb, &from, pos);
 		if (written < 0 || written == count)
 			goto out;
 
-- 
cgit 


From 7f7f25e82d54870df24d415a7007fbd327da027b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Feb 2014 17:49:24 -0500
Subject: replace checking for ->read/->aio_read presence with check in
 ->f_mode

Since we are about to introduce new methods (read_iter/write_iter), the
tests in a bunch of places would have to grow inconveniently.  Check
once (at open() time) and store results in ->f_mode as FMODE_CAN_READ
and FMODE_CAN_WRITE resp.  It might end up being a temporary measure -
once everything switches from ->aio_{read,write} to ->{read,write}_iter
it might make sense to return to open-coded checks.  We'll see...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/mtd/nand/nandsim.c          |  4 ++--
 drivers/usb/gadget/storage_common.c |  4 ++--
 fs/file_table.c                     |  4 ++++
 fs/open.c                           |  4 ++++
 fs/read_write.c                     | 14 +++++++-------
 include/linux/fs.h                  |  4 ++++
 6 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c
index 42e8a770e631..4f0d83648e5a 100644
--- a/drivers/mtd/nand/nandsim.c
+++ b/drivers/mtd/nand/nandsim.c
@@ -575,12 +575,12 @@ static int alloc_device(struct nandsim *ns)
 		cfile = filp_open(cache_file, O_CREAT | O_RDWR | O_LARGEFILE, 0600);
 		if (IS_ERR(cfile))
 			return PTR_ERR(cfile);
-		if (!cfile->f_op->read && !cfile->f_op->aio_read) {
+		if (!(cfile->f_mode & FMODE_CAN_READ)) {
 			NS_ERR("alloc_device: cache file not readable\n");
 			err = -EINVAL;
 			goto err_close;
 		}
-		if (!cfile->f_op->write && !cfile->f_op->aio_write) {
+		if (!(cfile->f_mode & FMODE_CAN_WRITE)) {
 			NS_ERR("alloc_device: cache file not writeable\n");
 			err = -EINVAL;
 			goto err_close;
diff --git a/drivers/usb/gadget/storage_common.c b/drivers/usb/gadget/storage_common.c
index ec20a1f50c2d..a8898df131ed 100644
--- a/drivers/usb/gadget/storage_common.c
+++ b/drivers/usb/gadget/storage_common.c
@@ -220,11 +220,11 @@ int fsg_lun_open(struct fsg_lun *curlun, const char *filename)
 	 * If we can't read the file, it's no good.
 	 * If we can't write the file, use it read-only.
 	 */
-	if (!(filp->f_op->read || filp->f_op->aio_read)) {
+	if (!(filp->f_mode & FMODE_CAN_READ)) {
 		LINFO(curlun, "file not readable: %s\n", filename);
 		goto out;
 	}
-	if (!(filp->f_op->write || filp->f_op->aio_write))
+	if (!(filp->f_mode & FMODE_CAN_WRITE))
 		ro = 1;
 
 	size = i_size_read(inode->i_mapping->host);
diff --git a/fs/file_table.c b/fs/file_table.c
index a374f5033e97..be73cbc48c12 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -175,6 +175,10 @@ struct file *alloc_file(struct path *path, fmode_t mode,
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
+	if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read))
+		mode |= FMODE_CAN_READ;
+	if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write))
+		mode |= FMODE_CAN_WRITE;
 	file->f_mode = mode;
 	file->f_op = fop;
 	if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
diff --git a/fs/open.c b/fs/open.c
index 9d64679cec73..39d3d0468ee6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -725,6 +725,10 @@ static int do_dentry_open(struct file *f,
 	}
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
+	if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read))
+		f->f_mode |= FMODE_CAN_READ;
+	if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write))
+		f->f_mode |= FMODE_CAN_WRITE;
 
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 31c6efa43183..d29d2a361d2c 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -396,7 +396,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op->read && !file->f_op->aio_read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
 		return -EFAULT;
@@ -445,7 +445,7 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 	const char __user *p;
 	ssize_t ret;
 
-	if (!file->f_op->write && !file->f_op->aio_write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
 	old_fs = get_fs();
@@ -472,7 +472,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op->write && !file->f_op->aio_write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 	if (unlikely(!access_ok(VERIFY_READ, buf, count)))
 		return -EFAULT;
@@ -785,7 +785,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 {
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
-	if (!file->f_op->aio_read && !file->f_op->read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
 
 	return do_readv_writev(READ, file, vec, vlen, pos);
@@ -798,7 +798,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
 {
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
-	if (!file->f_op->aio_write && !file->f_op->write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
 
 	return do_readv_writev(WRITE, file, vec, vlen, pos);
@@ -964,7 +964,7 @@ static size_t compat_readv(struct file *file,
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op->aio_read && !file->f_op->read)
+	if (!(file->f_mode & FMODE_CAN_READ))
 		goto out;
 
 	ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
@@ -1041,7 +1041,7 @@ static size_t compat_writev(struct file *file,
 		goto out;
 
 	ret = -EINVAL;
-	if (!file->f_op->aio_write && !file->f_op->write)
+	if (!(file->f_mode & FMODE_CAN_WRITE))
 		goto out;
 
 	ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8153396d19b4..75eb71357adf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -128,6 +128,10 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_ATOMIC_POS	((__force fmode_t)0x8000)
 /* Write access to underlying fs */
 #define FMODE_WRITER		((__force fmode_t)0x10000)
+/* Has read method(s) */
+#define FMODE_CAN_READ          ((__force fmode_t)0x20000)
+/* Has write method(s) */
+#define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
-- 
cgit 


From 293bc9822fa9b3c9d4b7893bcb241e085580771a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Feb 2014 18:37:41 -0500
Subject: new methods: ->read_iter() and ->write_iter()

Beginning to introduce those.  Just the callers for now, and it's
clumsier than it'll eventually become; once we finish converting
aio_read and aio_write instances, the things will get nicer.

For now, these guys are in parallel to ->aio_read() and ->aio_write();
they take iocb and iov_iter, with everything in iov_iter already
validated.  File offset is passed in iocb->ki_pos, iov/nr_segs -
in iov_iter.

Main concerns in that series are stack footprint and ability to
split the damn thing cleanly.

[fix from Peter Ujfalusi <peter.ujfalusi@ti.com> folded]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking |  2 +
 Documentation/filesystems/vfs.txt | 10 ++++-
 fs/aio.c                          | 14 +++++-
 fs/file_table.c                   |  6 ++-
 fs/open.c                         |  6 ++-
 fs/read_write.c                   | 90 ++++++++++++++++++++++++++++++++++++---
 include/linux/fs.h                |  6 +++
 7 files changed, 121 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 9b0d5a33c8bf..b18dd1779029 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -430,6 +430,8 @@ prototypes:
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1846374a5add..a1d0d7a30165 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -806,6 +806,8 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -836,11 +838,15 @@ otherwise noted.
 
   read: called by read(2) and related system calls
 
-  aio_read: called by io_submit(2) and other asynchronous I/O operations
+  aio_read: vectored, possibly asynchronous read
+
+  read_iter: possibly asynchronous read with iov_iter as destination
 
   write: called by write(2) and related system calls
 
-  aio_write: called by io_submit(2) and other asynchronous I/O operations
+  aio_write: vectored, possibly asynchronous write
+
+  write_iter: possibly asynchronous write with iov_iter as source
 
   iterate: called when the VFS needs to read the directory contents
 
diff --git a/fs/aio.c b/fs/aio.c
index a0ed6c7d2cd2..56b28607c32d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1241,6 +1241,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 
 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
 			    unsigned long, loff_t);
+typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *);
 
 static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
 				     int rw, char __user *buf,
@@ -1298,7 +1299,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 	int rw;
 	fmode_t mode;
 	aio_rw_op *rw_op;
+	rw_iter_op *iter_op;
 	struct iovec inline_vec, *iovec = &inline_vec;
+	struct iov_iter iter;
 
 	switch (opcode) {
 	case IOCB_CMD_PREAD:
@@ -1306,6 +1309,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 		mode	= FMODE_READ;
 		rw	= READ;
 		rw_op	= file->f_op->aio_read;
+		iter_op	= file->f_op->read_iter;
 		goto rw_common;
 
 	case IOCB_CMD_PWRITE:
@@ -1313,12 +1317,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 		mode	= FMODE_WRITE;
 		rw	= WRITE;
 		rw_op	= file->f_op->aio_write;
+		iter_op	= file->f_op->write_iter;
 		goto rw_common;
 rw_common:
 		if (unlikely(!(file->f_mode & mode)))
 			return -EBADF;
 
-		if (!rw_op)
+		if (!rw_op && !iter_op)
 			return -EINVAL;
 
 		ret = (opcode == IOCB_CMD_PREADV ||
@@ -1347,7 +1352,12 @@ rw_common:
 		if (rw == WRITE)
 			file_start_write(file);
 
-		ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+		if (iter_op) {
+			iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes);
+			ret = iter_op(req, &iter);
+		} else {
+			ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+		}
 
 		if (rw == WRITE)
 			file_end_write(file);
diff --git a/fs/file_table.c b/fs/file_table.c
index be73cbc48c12..f8cc881fbbfb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -175,9 +175,11 @@ struct file *alloc_file(struct path *path, fmode_t mode,
 	file->f_path = *path;
 	file->f_inode = path->dentry->d_inode;
 	file->f_mapping = path->dentry->d_inode->i_mapping;
-	if ((mode & FMODE_READ) && likely(fop->read || fop->aio_read))
+	if ((mode & FMODE_READ) &&
+	     likely(fop->read || fop->aio_read || fop->read_iter))
 		mode |= FMODE_CAN_READ;
-	if ((mode & FMODE_WRITE) && likely(fop->write || fop->aio_write))
+	if ((mode & FMODE_WRITE) &&
+	     likely(fop->write || fop->aio_write || fop->write_iter))
 		mode |= FMODE_CAN_WRITE;
 	file->f_mode = mode;
 	file->f_op = fop;
diff --git a/fs/open.c b/fs/open.c
index 39d3d0468ee6..36662d036237 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -725,9 +725,11 @@ static int do_dentry_open(struct file *f,
 	}
 	if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
 		i_readcount_inc(inode);
-	if ((f->f_mode & FMODE_READ) && likely(f->f_op->read || f->f_op->aio_read))
+	if ((f->f_mode & FMODE_READ) &&
+	     likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter))
 		f->f_mode |= FMODE_CAN_READ;
-	if ((f->f_mode & FMODE_WRITE) && likely(f->f_op->write || f->f_op->aio_write))
+	if ((f->f_mode & FMODE_WRITE) &&
+	     likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter))
 		f->f_mode |= FMODE_CAN_WRITE;
 
 	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
diff --git a/fs/read_write.c b/fs/read_write.c
index d29d2a361d2c..fe2f9d5e3536 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -25,6 +25,7 @@
 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t);
+typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
 
 const struct file_operations generic_ro_fops = {
 	.llseek		= generic_file_llseek,
@@ -390,6 +391,27 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
 EXPORT_SYMBOL(do_sync_read);
 
+ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
+{
+	struct iovec iov = { .iov_base = buf, .iov_len = len };
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+	iov_iter_init(&iter, READ, &iov, 1, len);
+
+	ret = filp->f_op->read_iter(&kiocb, &iter);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+EXPORT_SYMBOL(new_sync_read);
+
 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 {
 	ssize_t ret;
@@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 		count = ret;
 		if (file->f_op->read)
 			ret = file->f_op->read(file, buf, count, pos);
-		else
+		else if (file->f_op->aio_read)
 			ret = do_sync_read(file, buf, count, pos);
+		else
+			ret = new_sync_read(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_access(file);
 			add_rchar(current, ret);
@@ -439,6 +463,27 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
 EXPORT_SYMBOL(do_sync_write);
 
+ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
+{
+	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+	iov_iter_init(&iter, WRITE, &iov, 1, len);
+
+	ret = filp->f_op->write_iter(&kiocb, &iter);
+	if (-EIOCBQUEUED == ret)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
+EXPORT_SYMBOL(new_sync_write);
+
 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
 {
 	mm_segment_t old_fs;
@@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t
 		count =  MAX_RW_COUNT;
 	if (file->f_op->write)
 		ret = file->f_op->write(file, p, count, pos);
-	else
+	else if (file->f_op->aio_write)
 		ret = do_sync_write(file, p, count, pos);
+	else
+		ret = new_sync_write(file, p, count, pos);
 	set_fs(old_fs);
 	if (ret > 0) {
 		fsnotify_modify(file);
@@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 		file_start_write(file);
 		if (file->f_op->write)
 			ret = file->f_op->write(file, buf, count, pos);
-		else
+		else if (file->f_op->aio_write)
 			ret = do_sync_write(file, buf, count, pos);
+		else
+			ret = new_sync_write(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_modify(file);
 			add_wchar(current, ret);
@@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);
 
+static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov,
+		unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn)
+{
+	struct kiocb kiocb;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	init_sync_kiocb(&kiocb, filp);
+	kiocb.ki_pos = *ppos;
+	kiocb.ki_nbytes = len;
+
+	iov_iter_init(&iter, rw, iov, nr_segs, len);
+	ret = fn(&kiocb, &iter);
+	if (ret == -EIOCBQUEUED)
+		ret = wait_on_sync_kiocb(&kiocb);
+	*ppos = kiocb.ki_pos;
+	return ret;
+}
+
 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 		unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
@@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
+	iter_fn_t iter_fn;
 
 	ret = rw_copy_check_uvector(type, uvector, nr_segs,
 				    ARRAY_SIZE(iovstack), iovstack, &iov);
@@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	if (type == READ) {
 		fn = file->f_op->read;
 		fnv = file->f_op->aio_read;
+		iter_fn = file->f_op->read_iter;
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		iter_fn = file->f_op->write_iter;
 		file_start_write(file);
 	}
 
-	if (fnv)
+	if (iter_fn)
+		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+						pos, iter_fn);
+	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
 	else
@@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	ssize_t ret;
 	io_fn_t fn;
 	iov_fn_t fnv;
+	iter_fn_t iter_fn;
 
 	ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
 					       UIO_FASTIOV, iovstack, &iov);
@@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 	if (type == READ) {
 		fn = file->f_op->read;
 		fnv = file->f_op->aio_read;
+		iter_fn = file->f_op->read_iter;
 	} else {
 		fn = (io_fn_t)file->f_op->write;
 		fnv = file->f_op->aio_write;
+		iter_fn = file->f_op->write_iter;
 		file_start_write(file);
 	}
 
-	if (fnv)
+	if (iter_fn)
+		ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len,
+						pos, iter_fn);
+	else if (fnv)
 		ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
 						pos, fnv);
 	else
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75eb71357adf..17535e0a4547 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1451,6 +1451,8 @@ struct block_device_operations;
 #define HAVE_COMPAT_IOCTL 1
 #define HAVE_UNLOCKED_IOCTL 1
 
+struct iov_iter;
+
 struct file_operations {
 	struct module *owner;
 	loff_t (*llseek) (struct file *, loff_t, int);
@@ -1458,6 +1460,8 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
 	ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
 	int (*iterate) (struct file *, struct dir_context *);
 	unsigned int (*poll) (struct file *, struct poll_table_struct *);
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -2415,6 +2419,8 @@ extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
 extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
+extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
+extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
 extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-- 
cgit 


From 8174202b34c30e0c07231bf63f18ab29af634f0b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 03:17:43 -0400
Subject: write_iter variants of {__,}generic_file_aio_write()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/9p/vfs_file.c        |  8 +++----
 fs/adfs/file.c          |  4 ++--
 fs/affs/file.c          |  4 ++--
 fs/bfs/file.c           |  4 ++--
 fs/ecryptfs/file.c      |  4 ++--
 fs/exofs/file.c         |  4 ++--
 fs/ext2/file.c          |  4 ++--
 fs/ext3/file.c          |  4 ++--
 fs/f2fs/file.c          |  4 ++--
 fs/fat/file.c           |  4 ++--
 fs/hfs/inode.c          |  4 ++--
 fs/hfsplus/inode.c      |  4 ++--
 fs/hostfs/hostfs_kern.c |  4 ++--
 fs/hpfs/file.c          |  4 ++--
 fs/jffs2/file.c         |  4 ++--
 fs/jfs/file.c           |  4 ++--
 fs/logfs/file.c         |  4 ++--
 fs/minix/file.c         |  4 ++--
 fs/nilfs2/file.c        |  4 ++--
 fs/omfs/file.c          |  4 ++--
 fs/ramfs/file-mmu.c     |  4 ++--
 fs/ramfs/file-nommu.c   |  4 ++--
 fs/reiserfs/file.c      |  4 ++--
 fs/sysv/file.c          |  4 ++--
 fs/ufs/file.c           |  4 ++--
 include/linux/fs.h      |  2 ++
 mm/filemap.c            | 61 ++++++++++++++++++++++++++++++-------------------
 mm/shmem.c              |  4 ++--
 mm/vmscan.c             |  2 +-
 29 files changed, 94 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 47e0597d1e9b..b9b5f979a2ca 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -763,7 +763,7 @@ err_out:
 
 buff_write:
 	mutex_unlock(&inode->i_mutex);
-	return do_sync_write(filp, data, count, offsetp);
+	return new_sync_write(filp, data, count, offsetp);
 }
 
 /**
@@ -781,7 +781,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data,
 
 	if (filp->f_flags & O_DIRECT)
 		return v9fs_direct_write(filp, data, count, offset);
-	return do_sync_write(filp, data, count, offset);
+	return new_sync_write(filp, data, count, offset);
 }
 
 
@@ -851,7 +851,7 @@ const struct file_operations v9fs_cached_file_operations = {
 	.read = v9fs_cached_file_read,
 	.write = v9fs_cached_file_write,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock,
@@ -864,7 +864,7 @@ const struct file_operations v9fs_cached_file_operations_dotl = {
 	.read = v9fs_cached_file_read,
 	.write = v9fs_cached_file_write,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
 	.lock = v9fs_file_lock_dotl,
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 3bfc9efa29b4..07c9edce5aa7 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -27,8 +27,8 @@ const struct file_operations adfs_file_operations = {
 	.read_iter	= generic_file_read_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 };
 
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 982853f17afc..9df23175e28b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -29,8 +29,8 @@ const struct file_operations affs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open		= affs_file_open,
 	.release	= affs_file_release,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 0aa788892f93..e7f88ace1a25 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -25,8 +25,8 @@ const struct file_operations bfs_file_operations = {
 	.llseek 	= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 };
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index b32827c917e1..db0fad3269c0 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -353,8 +353,8 @@ const struct file_operations ecryptfs_main_fops = {
 	.llseek = generic_file_llseek,
 	.read = new_sync_read,
 	.read_iter = ecryptfs_read_update_atime,
-	.write = do_sync_write,
-	.aio_write = generic_file_aio_write,
+	.write = new_sync_write,
+	.write_iter = generic_file_write_iter,
 	.iterate = ecryptfs_readdir,
 	.unlocked_ioctl = ecryptfs_unlocked_ioctl,
 #ifdef CONFIG_COMPAT
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 90d394da7471..5b7f6be5a2d5 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -68,9 +68,9 @@ static int exofs_flush(struct file *file, fl_owner_t id)
 const struct file_operations exofs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
 	.release	= exofs_release_file,
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 407305072597..970c6aca15cc 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -63,9 +63,9 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 const struct file_operations ext2_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl = ext2_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext2_compat_ioctl,
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5439d2f0141b..c833b1226d4d 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,9 +51,9 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
 const struct file_operations ext3_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl	= ext3_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext3_compat_ioctl,
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 0e01fb0bc97c..22f4900dd8eb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -680,9 +680,9 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 const struct file_operations f2fs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.open		= generic_file_open,
 	.mmap		= f2fs_file_mmap,
 	.fsync		= f2fs_sync_file,
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 29285e990c90..85f79a89e747 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -171,9 +171,9 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 const struct file_operations fat_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.release	= fat_file_release,
 	.unlocked_ioctl	= fat_generic_ioctl,
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 6d4055aff109..d0929bc81782 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -676,8 +676,8 @@ static const struct file_operations hfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= hfs_file_fsync,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index cccc89e47cb6..0cf786f2d046 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -343,8 +343,8 @@ static const struct file_operations hfsplus_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.fsync		= hfsplus_file_fsync,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index ce0005d8ffeb..bb529f3b7f2b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -381,8 +381,8 @@ static const struct file_operations hostfs_file_fops = {
 	.read		= new_sync_read,
 	.splice_read	= generic_file_splice_read,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
-	.write		= do_sync_write,
+	.write_iter	= generic_file_write_iter,
+	.write		= new_sync_write,
 	.mmap		= generic_file_mmap,
 	.open		= hostfs_file_open,
 	.release	= hostfs_file_release,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index bacb478a4990..7f54e5f76cec 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -199,8 +199,8 @@ const struct file_operations hpfs_file_ops =
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.release	= hpfs_file_release,
 	.fsync		= hpfs_file_fsync,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 9192127d591c..64989ca9ba90 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -53,8 +53,8 @@ const struct file_operations jffs2_file_operations =
 	.open =		generic_file_open,
  	.read =		new_sync_read,
  	.read_iter =	generic_file_read_iter,
- 	.write =	do_sync_write,
- 	.aio_write =	generic_file_aio_write,
+ 	.write =	new_sync_write,
+ 	.write_iter =	generic_file_write_iter,
 	.unlocked_ioctl=jffs2_ioctl,
 	.mmap =		generic_file_readonly_mmap,
 	.fsync =	jffs2_fsync,
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index a5d8299b2208..cc744ecaf51f 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -151,10 +151,10 @@ const struct inode_operations jfs_file_inode_operations = {
 const struct file_operations jfs_file_operations = {
 	.open		= jfs_open,
 	.llseek		= generic_file_llseek,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 1ca8026dc664..8538752df2f6 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -265,14 +265,14 @@ const struct inode_operations logfs_reg_iops = {
 
 const struct file_operations logfs_reg_fops = {
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.fsync		= logfs_fsync,
 	.unlocked_ioctl	= logfs_ioctl,
 	.llseek		= generic_file_llseek,
 	.mmap		= generic_file_readonly_mmap,
 	.open		= generic_file_open,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 };
 
 const struct address_space_operations logfs_reg_aops = {
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 607b47145325..a967de085ac0 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -16,8 +16,8 @@ const struct file_operations minix_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index dcb1b0e8b435..24978153c0c4 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -153,9 +153,9 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 const struct file_operations nilfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= generic_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.unlocked_ioctl	= nilfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= nilfs_compat_ioctl,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 3bf28da9f0df..902e88527fce 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -338,9 +338,9 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
 const struct file_operations omfs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = new_sync_read,
-	.write = do_sync_write,
+	.write = new_sync_write,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.mmap = generic_file_mmap,
 	.fsync = generic_file_fsync,
 	.splice_read = generic_file_splice_read,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 30ffb367bc0b..6ea0b9718a9d 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -33,8 +33,8 @@
 const struct file_operations ramfs_file_operations = {
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= noop_fsync,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 416db04f8464..9ed420f8f3ca 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -39,8 +39,8 @@ const struct file_operations ramfs_file_operations = {
 	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
 	.read			= new_sync_read,
 	.read_iter		= generic_file_read_iter,
-	.write			= do_sync_write,
-	.aio_write		= generic_file_aio_write,
+	.write			= new_sync_write,
+	.write_iter		= generic_file_write_iter,
 	.fsync			= noop_fsync,
 	.splice_read		= generic_file_splice_read,
 	.splice_write		= generic_file_splice_write,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7592d681fd8c..7c8ecd6468db 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -236,7 +236,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
 
 const struct file_operations reiserfs_file_operations = {
 	.read = new_sync_read,
-	.write = do_sync_write,
+	.write = new_sync_write,
 	.unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = reiserfs_compat_ioctl,
@@ -246,7 +246,7 @@ const struct file_operations reiserfs_file_operations = {
 	.release = reiserfs_file_release,
 	.fsync = reiserfs_sync_file,
 	.read_iter = generic_file_read_iter,
-	.aio_write = generic_file_aio_write,
+	.write_iter = generic_file_write_iter,
 	.splice_read = generic_file_splice_read,
 	.splice_write = generic_file_splice_write,
 	.llseek = generic_file_llseek,
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index d99be8877388..b00811c75b24 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -23,8 +23,8 @@ const struct file_operations sysv_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= generic_file_fsync,
 	.splice_read	= generic_file_splice_read,
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index b6b402989e6b..c84ec010a676 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -37,8 +37,8 @@ const struct file_operations ufs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= generic_file_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.open           = generic_file_open,
 	.fsync		= generic_file_fsync,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 17535e0a4547..4b221637f09e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2414,7 +2414,9 @@ int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isbl
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
+extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
 extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
diff --git a/mm/filemap.c b/mm/filemap.c
index c0404b763a17..d2d9eeec8bf0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2542,10 +2542,9 @@ again:
 EXPORT_SYMBOL(generic_perform_write);
 
 /**
- * __generic_file_aio_write - write data to a file
+ * __generic_file_write_iter - write data to a file
  * @iocb:	IO state structure (file, offset, etc.)
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
+ * @from:	iov_iter with data to write
  *
  * This function does all the work needed for actually writing data to a
  * file. It does all basic checks, removes SUID from the file, updates
@@ -2559,21 +2558,16 @@ EXPORT_SYMBOL(generic_perform_write);
  * A caller has to handle it. This is mainly due to the fact that we want to
  * avoid syncing under i_mutex.
  */
-ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs)
+ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
-	size_t count;		/* after file limit checks */
 	struct inode 	*inode = mapping->host;
 	loff_t		pos = iocb->ki_pos;
 	ssize_t		written = 0;
 	ssize_t		err;
 	ssize_t		status;
-	struct iov_iter from;
-
-	count = iov_length(iov, nr_segs);
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	size_t		count = iov_iter_count(from);
 
 	/* We can write back this queue in page reclaim */
 	current->backing_dev_info = mapping->backing_dev_info;
@@ -2584,7 +2578,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (count == 0)
 		goto out;
 
-	iov_iter_truncate(&from, count);
+	iov_iter_truncate(from, count);
 
 	err = file_remove_suid(file);
 	if (err)
@@ -2598,7 +2592,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (unlikely(file->f_flags & O_DIRECT)) {
 		loff_t endbyte;
 
-		written = generic_file_direct_write(iocb, &from, pos);
+		written = generic_file_direct_write(iocb, from, pos);
 		if (written < 0 || written == count)
 			goto out;
 
@@ -2609,7 +2603,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		pos += written;
 		count -= written;
 
-		status = generic_perform_write(file, &from, pos);
+		status = generic_perform_write(file, from, pos);
 		/*
 		 * If generic_perform_write() returned a synchronous error
 		 * then we want to return the number of bytes which were
@@ -2641,7 +2635,7 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			 */
 		}
 	} else {
-		written = generic_perform_write(file, &from, pos);
+		written = generic_perform_write(file, from, pos);
 		if (likely(written >= 0))
 			iocb->ki_pos = pos + written;
 	}
@@ -2649,30 +2643,36 @@ out:
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
+EXPORT_SYMBOL(__generic_file_write_iter);
+
+ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				 unsigned long nr_segs)
+{
+	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter from;
+
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	return __generic_file_write_iter(iocb, &from);
+}
 EXPORT_SYMBOL(__generic_file_aio_write);
 
 /**
- * generic_file_aio_write - write data to a file
+ * generic_file_write_iter - write data to a file
  * @iocb:	IO state structure
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
- * @pos:	position in file where to write
+ * @from:	iov_iter with data to write
  *
- * This is a wrapper around __generic_file_aio_write() to be used by most
+ * This is a wrapper around __generic_file_write_iter() to be used by most
  * filesystems. It takes care of syncing the file in case of O_SYNC file
  * and acquires i_mutex as needed.
  */
-ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
+ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0) {
@@ -2684,6 +2684,19 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	}
 	return ret;
 }
+EXPORT_SYMBOL(generic_file_write_iter);
+
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
+{
+	size_t count = iov_length(iov, nr_segs);
+	struct iov_iter from;
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	iov_iter_init(&from, WRITE, iov, nr_segs, count);
+	return generic_file_write_iter(iocb, &from);
+}
 EXPORT_SYMBOL(generic_file_aio_write);
 
 /**
diff --git a/mm/shmem.c b/mm/shmem.c
index edc6c7e817e9..d3e5c6fc313c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2618,9 +2618,9 @@ static const struct file_operations shmem_file_operations = {
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= shmem_file_read_iter,
-	.aio_write	= generic_file_aio_write,
+	.write_iter	= generic_file_write_iter,
 	.fsync		= noop_fsync,
 	.splice_read	= shmem_file_splice_read,
 	.splice_write	= generic_file_splice_write,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 32c661d66a45..9c2dba6ac685 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -458,7 +458,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
 	 * stalls if we need to run get_block().  We could test
 	 * PagePrivate for that.
 	 *
-	 * If this process is currently in __generic_file_aio_write() against
+	 * If this process is currently in __generic_file_write_iter() against
 	 * this page's queue, we can perform writeback even if that
 	 * will block.
 	 *
-- 
cgit 


From 1456c0a87c4241d3a801651019e66983c69ad17d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 03:21:50 -0400
Subject: blkdev_aio_write() - turn into blkdev_write_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/raw.c |  4 ++--
 fs/block_dev.c     | 16 ++++++----------
 include/linux/fs.h |  3 +--
 3 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index cfb607a64b85..0102dc788608 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -286,8 +286,8 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
 static const struct file_operations raw_fops = {
 	.read		= new_sync_read,
 	.read_iter	= generic_file_read_iter,
-	.write		= do_sync_write,
-	.aio_write	= blkdev_aio_write,
+	.write		= new_sync_write,
+	.write_iter	= blkdev_write_iter,
 	.fsync		= blkdev_fsync,
 	.open		= raw_open,
 	.release	= raw_release,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3d97f4a257ff..4e36b8ea8aa4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1509,28 +1509,24 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
  * Does not take i_mutex for the write and thus is not for general purpose
  * use.
  */
-ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			 unsigned long nr_segs, loff_t pos)
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct blk_plug plug;
 	ssize_t ret;
 
-	BUG_ON(iocb->ki_pos != pos);
-
 	blk_start_plug(&plug);
-	ret = __generic_file_aio_write(iocb, iov, nr_segs);
+	ret = __generic_file_write_iter(iocb, from);
 	if (ret > 0) {
 		ssize_t err;
-
-		err = generic_write_sync(file, pos, ret);
+		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
 		if (err < 0)
 			ret = err;
 	}
 	blk_finish_plug(&plug);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(blkdev_aio_write);
+EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
 static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
@@ -1577,9 +1573,9 @@ const struct file_operations def_blk_fops = {
 	.release	= blkdev_close,
 	.llseek		= block_llseek,
 	.read		= new_sync_read,
-	.write		= do_sync_write,
+	.write		= new_sync_write,
 	.read_iter	= blkdev_read_iter,
-	.aio_write	= blkdev_aio_write,
+	.write_iter	= blkdev_write_iter,
 	.mmap		= generic_file_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= block_ioctl,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4b221637f09e..1b9b6c59abdd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2425,8 +2425,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
-extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos);
+extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 			int datasync);
 extern void block_sync_page(struct page *page);
-- 
cgit 


From a8f3550cd228b6edc5d17fce1a9af8cc7004f185 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 03:32:25 -0400
Subject: bury __generic_file_aio_write()

all users converted to __generic_file_write_iter() now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h |  1 -
 mm/filemap.c       | 11 -----------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1b9b6c59abdd..99817c9e665e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2413,7 +2413,6 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index d2d9eeec8bf0..7dcdb9db710d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2645,17 +2645,6 @@ out:
 }
 EXPORT_SYMBOL(__generic_file_write_iter);
 
-ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs)
-{
-	size_t count = iov_length(iov, nr_segs);
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
-	return __generic_file_write_iter(iocb, &from);
-}
-EXPORT_SYMBOL(__generic_file_aio_write);
-
 /**
  * generic_file_write_iter - write data to a file
  * @iocb:	IO state structure
-- 
cgit 


From f0d1bec9d58d4c038d0ac958c9af82be6eb18045 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 15:05:18 -0400
Subject: new helper: copy_page_from_iter()

parallel to copy_page_to_iter().  pipe_write() switched to it (and became
->write_iter()).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/pipe.c           | 129 ++++++++--------------------------------------------
 include/linux/uio.h |   2 +
 mm/iov_iter.c       |  78 +++++++++++++++++++++++++++++++
 3 files changed, 99 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pipe.c b/fs/pipe.c
index 05ccb00cb407..21981e58e2a6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	pipe_lock(pipe);
 }
 
-static int
-pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
-			int atomic)
-{
-	unsigned long copy;
-
-	while (len > 0) {
-		while (!iov->iov_len)
-			iov++;
-		copy = min_t(unsigned long, len, iov->iov_len);
-
-		if (atomic) {
-			if (__copy_from_user_inatomic(to, iov->iov_base, copy))
-				return -EFAULT;
-		} else {
-			if (copy_from_user(to, iov->iov_base, copy))
-				return -EFAULT;
-		}
-		to += copy;
-		len -= copy;
-		iov->iov_base += copy;
-		iov->iov_len -= copy;
-	}
-	return 0;
-}
-
-/*
- * Pre-fault in the user memory, so we can use atomic copies.
- */
-static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
-{
-	while (!iov->iov_len)
-		iov++;
-
-	while (len > 0) {
-		unsigned long this_len;
-
-		this_len = min_t(unsigned long, len, iov->iov_len);
-		fault_in_pages_readable(iov->iov_base, this_len);
-		len -= this_len;
-		iov++;
-	}
-}
-
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -380,24 +336,19 @@ static inline int is_packetized(struct file *file)
 }
 
 static ssize_t
-pipe_write(struct kiocb *iocb, const struct iovec *_iov,
-	    unsigned long nr_segs, loff_t ppos)
+pipe_write(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *filp = iocb->ki_filp;
 	struct pipe_inode_info *pipe = filp->private_data;
-	ssize_t ret;
-	int do_wakeup;
-	struct iovec *iov = (struct iovec *)_iov;
-	size_t total_len;
+	ssize_t ret = 0;
+	int do_wakeup = 0;
+	size_t total_len = iov_iter_count(from);
 	ssize_t chars;
 
-	total_len = iov_length(iov, nr_segs);
 	/* Null write succeeds. */
 	if (unlikely(total_len == 0))
 		return 0;
 
-	do_wakeup = 0;
-	ret = 0;
 	__pipe_lock(pipe);
 
 	if (!pipe->readers) {
@@ -416,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 		int offset = buf->offset + buf->len;
 
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
-			int error, atomic = 1;
-			void *addr;
-
-			error = ops->confirm(pipe, buf);
+			int error = ops->confirm(pipe, buf);
 			if (error)
 				goto out;
 
-			iov_fault_in_pages_read(iov, chars);
-redo1:
-			if (atomic)
-				addr = kmap_atomic(buf->page);
-			else
-				addr = kmap(buf->page);
-			error = pipe_iov_copy_from_user(offset + addr, iov,
-							chars, atomic);
-			if (atomic)
-				kunmap_atomic(addr);
-			else
-				kunmap(buf->page);
-			ret = error;
-			do_wakeup = 1;
-			if (error) {
-				if (atomic) {
-					atomic = 0;
-					goto redo1;
-				}
+			ret = copy_page_from_iter(buf->page, offset, chars, from);
+			if (unlikely(ret < chars)) {
+				error = -EFAULT;
 				goto out;
 			}
+			do_wakeup = 1;
 			buf->len += chars;
-			total_len -= chars;
 			ret = chars;
-			if (!total_len)
+			if (!iov_iter_count(from))
 				goto out;
 		}
 	}
@@ -466,8 +398,7 @@ redo1:
 			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
 			struct pipe_buffer *buf = pipe->bufs + newbuf;
 			struct page *page = pipe->tmp_page;
-			char *src;
-			int error, atomic = 1;
+			int copied;
 
 			if (!page) {
 				page = alloc_page(GFP_HIGHUSER);
@@ -483,40 +414,19 @@ redo1:
 			 * FIXME! Is this really true?
 			 */
 			do_wakeup = 1;
-			chars = PAGE_SIZE;
-			if (chars > total_len)
-				chars = total_len;
-
-			iov_fault_in_pages_read(iov, chars);
-redo2:
-			if (atomic)
-				src = kmap_atomic(page);
-			else
-				src = kmap(page);
-
-			error = pipe_iov_copy_from_user(src, iov, chars,
-							atomic);
-			if (atomic)
-				kunmap_atomic(src);
-			else
-				kunmap(page);
-
-			if (unlikely(error)) {
-				if (atomic) {
-					atomic = 0;
-					goto redo2;
-				}
+			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
+			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
 				if (!ret)
-					ret = error;
+					ret = -EFAULT;
 				break;
 			}
-			ret += chars;
+			ret += copied;
 
 			/* Insert it into the buffer array */
 			buf->page = page;
 			buf->ops = &anon_pipe_buf_ops;
 			buf->offset = 0;
-			buf->len = chars;
+			buf->len = copied;
 			buf->flags = 0;
 			if (is_packetized(filp)) {
 				buf->ops = &packet_pipe_buf_ops;
@@ -525,8 +435,7 @@ redo2:
 			pipe->nrbufs = ++bufs;
 			pipe->tmp_page = NULL;
 
-			total_len -= chars;
-			if (!total_len)
+			if (!iov_iter_count(from))
 				break;
 		}
 		if (bufs < pipe->buffers)
@@ -1040,8 +949,8 @@ const struct file_operations pipefifo_fops = {
 	.llseek		= no_llseek,
 	.read		= new_sync_read,
 	.read_iter	= pipe_read,
-	.write		= do_sync_write,
-	.aio_write	= pipe_write,
+	.write		= new_sync_write,
+	.write_iter	= pipe_write,
 	.poll		= pipe_poll,
 	.unlocked_ioctl	= pipe_ioctl,
 	.release	= pipe_release,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 532f59d0adbb..66012352d333 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -68,6 +68,8 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index a5c691c1a283..081e3273085b 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -82,6 +82,84 @@ done:
 }
 EXPORT_SYMBOL(copy_page_to_iter);
 
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	if (!fault_in_pages_readable(buf, copy)) {
+		kaddr = kmap_atomic(page);
+		to = kaddr + offset;
+
+		/* first chunk, usually the only one */
+		left = __copy_from_user_inatomic(to, buf, copy);
+		copy -= left;
+		skip += copy;
+		to += copy;
+		bytes -= copy;
+
+		while (unlikely(!left && bytes)) {
+			iov++;
+			buf = iov->iov_base;
+			copy = min(bytes, iov->iov_len);
+			left = __copy_from_user_inatomic(to, buf, copy);
+			copy -= left;
+			skip = copy;
+			to += copy;
+			bytes -= copy;
+		}
+		if (likely(!bytes)) {
+			kunmap_atomic(kaddr);
+			goto done;
+		}
+		offset = to - kaddr;
+		buf += copy;
+		kunmap_atomic(kaddr);
+		copy = min(bytes, iov->iov_len - skip);
+	}
+	/* Too bad - revert to non-atomic kmap */
+	kaddr = kmap(page);
+	to = kaddr + offset;
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap(page);
+done:
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
 {
-- 
cgit 


From 2b777c9dd9ebbb2f8b6818d454cc5e6d7c1e3c8b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 3 Apr 2014 22:31:22 -0400
Subject: ceph_sync_read: stop poking into iov_iter guts

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/ceph/file.c               | 46 +++++++++++++++++---------------------------
 include/linux/ceph/libceph.h |  2 --
 net/ceph/pagevec.c           | 35 ++++-----------------------------
 3 files changed, 22 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index c9a24ba98c9a..672b0fedb17b 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 	struct page **pages;
 	u64 off = iocb->ki_pos;
 	int num_pages, ret;
-	size_t len = i->count;
+	size_t len = iov_iter_count(i);
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off,
 	     (unsigned)len,
@@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 
 	if (file->f_flags & O_DIRECT) {
 		while (iov_iter_count(i)) {
-			void __user *data = i->iov[0].iov_base + i->iov_offset;
-			size_t len = i->iov[0].iov_len - i->iov_offset;
+			size_t start;
+			ssize_t n;
 
-			num_pages = calc_pages_for((unsigned long)data, len);
-			pages = ceph_get_direct_page_vector(data,
-							    num_pages, true);
-			if (IS_ERR(pages))
-				return PTR_ERR(pages);
+			n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start);
+			if (n < 0)
+				return n;
 
-			ret = striped_read(inode, off, len,
+			num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+			ret = striped_read(inode, off, n,
 					   pages, num_pages, checkeof,
-					   1, (unsigned long)data & ~PAGE_MASK);
+					   1, start);
+
 			ceph_put_page_vector(pages, num_pages, true);
 
 			if (ret <= 0)
 				break;
 			off += ret;
 			iov_iter_advance(i, ret);
-			if (ret < len)
+			if (ret < n)
 				break;
 		}
 	} else {
@@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
 					num_pages, checkeof, 0, 0);
 		if (ret > 0) {
 			int l, k = 0;
-			size_t left = len = ret;
+			size_t left = ret;
 
 			while (left) {
-				void __user *data = i->iov[0].iov_base
-							+ i->iov_offset;
-				l = min(i->iov[0].iov_len - i->iov_offset,
-					left);
-
-				ret = ceph_copy_page_vector_to_user(&pages[k],
-								    data, off,
-								    l);
-				if (ret > 0) {
-					iov_iter_advance(i, ret);
-					left -= ret;
-					off += ret;
-					k = calc_pages_for(iocb->ki_pos,
-							   len - left + 1) - 1;
-					BUG_ON(k >= num_pages && left);
-				} else
+				int copy = min_t(size_t, PAGE_SIZE, left);
+				l = copy_page_to_iter(pages[k++], 0, copy, i);
+				off += l;
+				left -= l;
+				if (l < copy)
 					break;
 			}
 		}
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 2f49aa4c4f7f..279b0afac1c1 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -222,8 +222,6 @@ extern void ceph_copy_to_page_vector(struct page **pages,
 extern void ceph_copy_from_page_vector(struct page **pages,
 				    void *data,
 				    loff_t off, size_t len);
-extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data,
-				    loff_t off, size_t len);
 extern void ceph_zero_page_vector_range(int off, int len, struct page **pages);
 
 
diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c
index 815a2249cfa9..555013034f7a 100644
--- a/net/ceph/pagevec.c
+++ b/net/ceph/pagevec.c
@@ -53,7 +53,10 @@ void ceph_put_page_vector(struct page **pages, int num_pages, bool dirty)
 			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
 	}
-	kfree(pages);
+	if (is_vmalloc_addr(pages))
+		vfree(pages);
+	else
+		kfree(pages);
 }
 EXPORT_SYMBOL(ceph_put_page_vector);
 
@@ -164,36 +167,6 @@ void ceph_copy_from_page_vector(struct page **pages,
 }
 EXPORT_SYMBOL(ceph_copy_from_page_vector);
 
-/*
- * copy user data from a page vector into a user pointer
- */
-int ceph_copy_page_vector_to_user(struct page **pages,
-					 void __user *data,
-					 loff_t off, size_t len)
-{
-	int i = 0;
-	int po = off & ~PAGE_CACHE_MASK;
-	int left = len;
-	int l, bad;
-
-	while (left > 0) {
-		l = min_t(int, left, PAGE_CACHE_SIZE-po);
-		bad = copy_to_user(data, page_address(pages[i]) + po, l);
-		if (bad == l)
-			return -EFAULT;
-		data += l - bad;
-		left -= l - bad;
-		if (po) {
-			po += l - bad;
-			if (po == PAGE_CACHE_SIZE)
-				po = 0;
-		}
-		i++;
-	}
-	return len;
-}
-EXPORT_SYMBOL(ceph_copy_page_vector_to_user);
-
 /*
  * Zero an extent within a page vector.  Offset is relative to the
  * start of the first page.
-- 
cgit 


From b42b15fdad3ebb790250041d1517acebb9bd56d9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Apr 2014 12:15:19 -0400
Subject: lustre: get rid of messing with iovecs

* switch to ->read_iter/->write_iter
* keep a pointer to iov_iter instead of iov/nr_segs
* do not modify iovecs; use iov_iter_truncate()/iov_iter_advance() and
a new primitive - iov_iter_reexpand() (expand previously truncated
iterator) istead.
* (racy) check for lustre VMAs intersecting with iovecs kept for now as
for_each_iov() loop.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/staging/lustre/lustre/include/lclient.h    |  11 +--
 drivers/staging/lustre/lustre/lclient/lcommon_cl.c |  48 +---------
 drivers/staging/lustre/lustre/llite/file.c         | 106 ++++-----------------
 .../staging/lustre/lustre/llite/llite_internal.h   |   3 +-
 drivers/staging/lustre/lustre/llite/rw.c           |   3 +-
 drivers/staging/lustre/lustre/llite/vvp_io.c       |  29 +++---
 include/linux/uio.h                                |   9 ++
 7 files changed, 46 insertions(+), 163 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/lustre/lustre/include/lclient.h b/drivers/staging/lustre/lustre/include/lclient.h
index 827209ea6bd0..386a36c00f57 100644
--- a/drivers/staging/lustre/lustre/include/lclient.h
+++ b/drivers/staging/lustre/lustre/include/lclient.h
@@ -82,16 +82,7 @@ struct ccc_io {
 	/**
 	 * I/O vector information to or from which read/write is going.
 	 */
-	struct iovec *cui_iov;
-	unsigned long cui_nrsegs;
-	/**
-	 * Total iov count for left IO.
-	 */
-	unsigned long cui_tot_nrsegs;
-	/**
-	 * Old length for iov that was truncated partially.
-	 */
-	size_t cui_iov_olen;
+	struct iov_iter *cui_iter;
 	/**
 	 * Total size for the left IO.
 	 */
diff --git a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
index 6907a16dbbd1..a07d5156bc50 100644
--- a/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
+++ b/drivers/staging/lustre/lustre/lclient/lcommon_cl.c
@@ -721,31 +721,12 @@ int ccc_io_one_lock_index(const struct lu_env *env, struct cl_io *io,
 void ccc_io_update_iov(const struct lu_env *env,
 		       struct ccc_io *cio, struct cl_io *io)
 {
-	int i;
 	size_t size = io->u.ci_rw.crw_count;
 
-	cio->cui_iov_olen = 0;
-	if (!cl_is_normalio(env, io) || cio->cui_tot_nrsegs == 0)
+	if (!cl_is_normalio(env, io) || cio->cui_iter == NULL)
 		return;
 
-	for (i = 0; i < cio->cui_tot_nrsegs; i++) {
-		struct iovec *iv = &cio->cui_iov[i];
-
-		if (iv->iov_len < size)
-			size -= iv->iov_len;
-		else {
-			if (iv->iov_len > size) {
-				cio->cui_iov_olen = iv->iov_len;
-				iv->iov_len = size;
-			}
-			break;
-		}
-	}
-
-	cio->cui_nrsegs = i + 1;
-	LASSERTF(cio->cui_tot_nrsegs >= cio->cui_nrsegs,
-		 "tot_nrsegs: %lu, nrsegs: %lu\n",
-		 cio->cui_tot_nrsegs, cio->cui_nrsegs);
+	iov_iter_truncate(cio->cui_iter, size);
 }
 
 int ccc_io_one_lock(const struct lu_env *env, struct cl_io *io,
@@ -776,30 +757,7 @@ void ccc_io_advance(const struct lu_env *env,
 	if (!cl_is_normalio(env, io))
 		return;
 
-	LASSERT(cio->cui_tot_nrsegs >= cio->cui_nrsegs);
-	LASSERT(cio->cui_tot_count  >= nob);
-
-	cio->cui_iov	+= cio->cui_nrsegs;
-	cio->cui_tot_nrsegs -= cio->cui_nrsegs;
-	cio->cui_tot_count  -= nob;
-
-	/* update the iov */
-	if (cio->cui_iov_olen > 0) {
-		struct iovec *iv;
-
-		cio->cui_iov--;
-		cio->cui_tot_nrsegs++;
-		iv = &cio->cui_iov[0];
-		if (io->ci_continue) {
-			iv->iov_base += iv->iov_len;
-			LASSERT(cio->cui_iov_olen > iv->iov_len);
-			iv->iov_len = cio->cui_iov_olen - iv->iov_len;
-		} else {
-			/* restore the iov_len, in case of restart io. */
-			iv->iov_len = cio->cui_iov_olen;
-		}
-		cio->cui_iov_olen = 0;
-	}
+	iov_iter_reexpand(cio->cui_iter, cio->cui_tot_count  -= nob);
 }
 
 /**
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index 220bd8390a84..3efda2540d29 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -1105,9 +1105,7 @@ restart:
 
 		switch (vio->cui_io_subtype) {
 		case IO_NORMAL:
-			cio->cui_iov = args->u.normal.via_iov;
-			cio->cui_nrsegs = args->u.normal.via_nrsegs;
-			cio->cui_tot_nrsegs = cio->cui_nrsegs;
+			cio->cui_iter = args->u.normal.via_iter;
 			cio->cui_iocb = args->u.normal.via_iocb;
 			if ((iot == CIT_WRITE) &&
 			    !(cio->cui_fd->fd_flags & LL_FILE_GROUP_LOCKED)) {
@@ -1171,56 +1169,23 @@ out:
 	return result;
 }
 
-static ssize_t ll_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = 0;
 	ssize_t	     result;
 	int		 refcheck;
 
-	count = iov_length(iov, nr_segs);
-
 	env = cl_env_get(&refcheck);
 	if (IS_ERR(env))
 		return PTR_ERR(env);
 
 	args = vvp_env_args(env, IO_NORMAL);
-	args->u.normal.via_iov = (struct iovec *)iov;
-	args->u.normal.via_nrsegs = nr_segs;
+	args->u.normal.via_iter = to;
 	args->u.normal.via_iocb = iocb;
 
 	result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_READ,
-				    &iocb->ki_pos, count);
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
-			    loff_t *ppos)
-{
-	struct lu_env *env;
-	struct iovec  *local_iov;
-	struct kiocb  *kiocb;
-	ssize_t	result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	local_iov = &vvp_env_info(env)->vti_local_iov;
-	kiocb = &vvp_env_info(env)->vti_kiocb;
-	local_iov->iov_base = (void __user *)buf;
-	local_iov->iov_len = count;
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
-	kiocb->ki_nbytes = count;
-
-	result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
-	*ppos = kiocb->ki_pos;
-
+				    &iocb->ki_pos, iov_iter_count(to));
 	cl_env_put(env, &refcheck);
 	return result;
 }
@@ -1228,12 +1193,10 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
 /*
  * Write to a file (through the page cache).
  */
-static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-				 unsigned long nr_segs, loff_t pos)
+static ssize_t ll_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct lu_env      *env;
 	struct vvp_io_args *args;
-	size_t	      count = iov_length(iov, nr_segs);
 	ssize_t	     result;
 	int		 refcheck;
 
@@ -1242,46 +1205,15 @@ static ssize_t ll_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		return PTR_ERR(env);
 
 	args = vvp_env_args(env, IO_NORMAL);
-	args->u.normal.via_iov = (struct iovec *)iov;
-	args->u.normal.via_nrsegs = nr_segs;
+	args->u.normal.via_iter = from;
 	args->u.normal.via_iocb = iocb;
 
 	result = ll_file_io_generic(env, args, iocb->ki_filp, CIT_WRITE,
-				  &iocb->ki_pos, count);
+				  &iocb->ki_pos, iov_iter_count(from));
 	cl_env_put(env, &refcheck);
 	return result;
 }
 
-static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
-			     loff_t *ppos)
-{
-	struct lu_env *env;
-	struct iovec  *local_iov;
-	struct kiocb  *kiocb;
-	ssize_t	result;
-	int	    refcheck;
-
-	env = cl_env_get(&refcheck);
-	if (IS_ERR(env))
-		return PTR_ERR(env);
-
-	local_iov = &vvp_env_info(env)->vti_local_iov;
-	kiocb = &vvp_env_info(env)->vti_kiocb;
-	local_iov->iov_base = (void __user *)buf;
-	local_iov->iov_len = count;
-	init_sync_kiocb(kiocb, file);
-	kiocb->ki_pos = *ppos;
-	kiocb->ki_nbytes = count;
-
-	result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
-	*ppos = kiocb->ki_pos;
-
-	cl_env_put(env, &refcheck);
-	return result;
-}
-
-
-
 /*
  * Send file content (through pagecache) somewhere with helper
  */
@@ -3133,10 +3065,10 @@ int ll_inode_permission(struct inode *inode, int mask)
 
 /* -o localflock - only provides locally consistent flock locks */
 struct file_operations ll_file_operations = {
-	.read	   = ll_file_read,
-	.aio_read = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
@@ -3148,10 +3080,10 @@ struct file_operations ll_file_operations = {
 };
 
 struct file_operations ll_file_operations_flock = {
-	.read	   = ll_file_read,
-	.aio_read    = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write   = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter    = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter   = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
@@ -3166,10 +3098,10 @@ struct file_operations ll_file_operations_flock = {
 
 /* These are for -o noflock - to return ENOSYS on flock calls */
 struct file_operations ll_file_operations_noflock = {
-	.read	   = ll_file_read,
-	.aio_read    = ll_file_aio_read,
-	.write	  = ll_file_write,
-	.aio_write   = ll_file_aio_write,
+	.read	   = new_sync_read,
+	.read_iter    = ll_file_read_iter,
+	.write	  = new_sync_write,
+	.write_iter   = ll_file_write_iter,
 	.unlocked_ioctl = ll_file_ioctl,
 	.open	   = ll_file_open,
 	.release	= ll_file_release,
diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
index 69aba0afca41..fbb8650ead34 100644
--- a/drivers/staging/lustre/lustre/llite/llite_internal.h
+++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
@@ -974,8 +974,7 @@ struct vvp_io_args {
 	union {
 		struct {
 			struct kiocb      *via_iocb;
-			struct iovec      *via_iov;
-			unsigned long      via_nrsegs;
+			struct iov_iter   *via_iter;
 		} normal;
 		struct {
 			struct pipe_inode_info  *via_pipe;
diff --git a/drivers/staging/lustre/lustre/llite/rw.c b/drivers/staging/lustre/lustre/llite/rw.c
index 416f7a094a6d..b345dfa599f3 100644
--- a/drivers/staging/lustre/lustre/llite/rw.c
+++ b/drivers/staging/lustre/lustre/llite/rw.c
@@ -157,8 +157,7 @@ static struct ll_cl_context *ll_cl_init(struct file *file,
 		result = cl_io_rw_init(env, io, CIT_WRITE, pos, PAGE_CACHE_SIZE);
 		if (result == 0) {
 			cio->cui_fd = LUSTRE_FPRIVATE(file);
-			cio->cui_iov = NULL;
-			cio->cui_nrsegs = 0;
+			cio->cui_iter = NULL;
 			result = cl_io_iter_init(env, io);
 			if (result == 0) {
 				result = cl_io_lock(env, io);
diff --git a/drivers/staging/lustre/lustre/llite/vvp_io.c b/drivers/staging/lustre/lustre/llite/vvp_io.c
index c7d70091246e..cfe8c625ae64 100644
--- a/drivers/staging/lustre/lustre/llite/vvp_io.c
+++ b/drivers/staging/lustre/lustre/llite/vvp_io.c
@@ -211,27 +211,26 @@ static int vvp_mmap_locks(const struct lu_env *env,
 	struct cl_lock_descr   *descr = &cti->cti_descr;
 	ldlm_policy_data_t      policy;
 	unsigned long	   addr;
-	unsigned long	   seg;
 	ssize_t		 count;
 	int		     result;
+	struct iov_iter i;
+	struct iovec iov;
 
 	LASSERT(io->ci_type == CIT_READ || io->ci_type == CIT_WRITE);
 
 	if (!cl_is_normalio(env, io))
 		return 0;
 
-	if (vio->cui_iov == NULL) /* nfs or loop back device write */
+	if (vio->cui_iter == NULL) /* nfs or loop back device write */
 		return 0;
 
 	/* No MM (e.g. NFS)? No vmas too. */
 	if (mm == NULL)
 		return 0;
 
-	for (seg = 0; seg < vio->cui_nrsegs; seg++) {
-		const struct iovec *iv = &vio->cui_iov[seg];
-
-		addr = (unsigned long)iv->iov_base;
-		count = iv->iov_len;
+	iov_for_each(iov, i, *(vio->cui_iter)) {
+		addr = (unsigned long)iov.iov_base;
+		count = iov.iov_len;
 		if (count == 0)
 			continue;
 
@@ -527,9 +526,7 @@ static int vvp_io_read_start(const struct lu_env *env,
 	switch (vio->cui_io_subtype) {
 	case IO_NORMAL:
 		LASSERT(cio->cui_iocb->ki_pos == pos);
-		result = generic_file_aio_read(cio->cui_iocb,
-					       cio->cui_iov, cio->cui_nrsegs,
-					       cio->cui_iocb->ki_pos);
+		result = generic_file_read_iter(cio->cui_iocb, cio->cui_iter);
 		break;
 	case IO_SPLICE:
 		result = generic_file_splice_read(file, &pos,
@@ -595,12 +592,11 @@ static int vvp_io_write_start(const struct lu_env *env,
 
 	CDEBUG(D_VFSTRACE, "write: [%lli, %lli)\n", pos, pos + (long long)cnt);
 
-	if (cio->cui_iov == NULL) /* from a temp io in ll_cl_init(). */
+	if (cio->cui_iter == NULL) /* from a temp io in ll_cl_init(). */
 		result = 0;
 	else
-		result = generic_file_aio_write(cio->cui_iocb,
-						cio->cui_iov, cio->cui_nrsegs,
-						cio->cui_iocb->ki_pos);
+		result = generic_file_write_iter(cio->cui_iocb, cio->cui_iter);
+
 	if (result > 0) {
 		if (result < cnt)
 			io->ci_continue = 0;
@@ -1162,10 +1158,9 @@ int vvp_io_init(const struct lu_env *env, struct cl_object *obj,
 		 *  results."  -- Single Unix Spec */
 		if (count == 0)
 			result = 1;
-		else {
+		else
 			cio->cui_tot_count = count;
-			cio->cui_tot_nrsegs = 0;
-		}
+
 		/* for read/write, we store the jobid in the inode, and
 		 * it'll be fetched by osc when building RPC.
 		 *
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 66012352d333..e8a109a75de1 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -90,6 +90,15 @@ static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
 		i->count = count;
 }
 
+/*
+ * reexpand a previously truncated iterator; count must be no more than how much
+ * we had shrunk it.
+ */
+static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
+{
+	i->count = count;
+}
+
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 
-- 
cgit 


From 6abd232274fd652e4a57f486d14e52ffee6f72e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Apr 2014 14:20:57 -0400
Subject: bury generic_file_aio_{read,write}

no callers left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h |  2 --
 mm/filemap.c       | 43 ++++++++-----------------------------------
 2 files changed, 8 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 99817c9e665e..a6448849dbce 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2411,10 +2411,8 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
 		unsigned long size, pgoff_t pgoff);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7dcdb9db710d..2f724e3cdf24 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1663,6 +1663,14 @@ out:
 	return written ? written : error;
 }
 
+/**
+ * generic_file_read_iter - generic filesystem read routine
+ * @iocb:	kernel I/O control block
+ * @iter:	destination for the data read
+ *
+ * This is the "read_iter()" routine for all filesystems
+ * that can use the page cache directly.
+ */
 ssize_t
 generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
@@ -1713,28 +1721,6 @@ out:
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
-/**
- * generic_file_aio_read - generic filesystem read routine
- * @iocb:	kernel I/O control block
- * @iov:	io vector request
- * @nr_segs:	number of segments in the iovec
- * @pos:	current file position
- *
- * This is the "read()" routine for all filesystems
- * that can use the page cache directly.
- */
-ssize_t
-generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
-{
-	size_t count = iov_length(iov, nr_segs);
-	struct iov_iter i;
-
-	iov_iter_init(&i, READ, iov, nr_segs, count);
-	return generic_file_read_iter(iocb, &i);
-}
-EXPORT_SYMBOL(generic_file_aio_read);
-
 #ifdef CONFIG_MMU
 /**
  * page_cache_read - adds requested page to the page cache if not already there
@@ -2675,19 +2661,6 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_file_write_iter);
 
-ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
-{
-	size_t count = iov_length(iov, nr_segs);
-	struct iov_iter from;
-
-	BUG_ON(iocb->ki_pos != pos);
-
-	iov_iter_init(&from, WRITE, iov, nr_segs, count);
-	return generic_file_write_iter(iocb, &from);
-}
-EXPORT_SYMBOL(generic_file_aio_write);
-
 /**
  * try_to_release_page() - release old fs-specific metadata on a page
  *
-- 
cgit 


From 62a8067a7f35dba2de501c9cb00e4cf36da90bc0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 4 Apr 2014 23:12:29 -0400
Subject: bio_vec-backed iov_iter

New variant of iov_iter - ITER_BVEC in iter->type, backed with
bio_vec array instead of iovec one.  Primitives taught to deal
with such beasts, __swap_write() switched to using that kind
of iov_iter.

Note that bio_vec is just a <page, offset, length> triple - there's
nothing block-specific about it.  I've left the definition where it
was, but took it from under ifdef CONFIG_BLOCK.

Next target: ->splice_write()...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/fuse/file.c            |   2 +-
 include/linux/blk_types.h |   4 +-
 include/linux/uio.h       |  14 +-
 mm/iov_iter.c             | 390 ++++++++++++++++++++++++++++++++++++++++++----
 mm/page_io.c              |  19 ++-
 5 files changed, 385 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 7fbc803cf51d..b2dae9d1437c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1288,7 +1288,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 	size_t nbytes = 0;  /* # bytes already packed in req */
 
 	/* Special case for kernel I/O: can copy directly into the buffer */
-	if (ii->type & REQ_KERNEL) {
+	if (ii->type & ITER_KVEC) {
 		unsigned long user_addr = fuse_get_user_addr(ii);
 		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index aa0eaa2d0bd8..86df13b97160 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -5,8 +5,6 @@
 #ifndef __LINUX_BLK_TYPES_H
 #define __LINUX_BLK_TYPES_H
 
-#ifdef CONFIG_BLOCK
-
 #include <linux/types.h>
 
 struct bio_set;
@@ -28,6 +26,8 @@ struct bio_vec {
 	unsigned int	bv_offset;
 };
 
+#ifdef CONFIG_BLOCK
+
 struct bvec_iter {
 	sector_t		bi_sector;	/* device address in 512 byte
 						   sectors */
diff --git a/include/linux/uio.h b/include/linux/uio.h
index e8a109a75de1..e2231e47cec1 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -19,12 +19,21 @@ struct kvec {
 	size_t iov_len;
 };
 
+enum {
+	ITER_IOVEC = 0,
+	ITER_KVEC = 2,
+	ITER_BVEC = 4,
+};
+
 struct iov_iter {
 	int type;
-	const struct iovec *iov;
-	unsigned long nr_segs;
 	size_t iov_offset;
 	size_t count;
+	union {
+		const struct iovec *iov;
+		const struct bio_vec *bvec;
+	};
+	unsigned long nr_segs;
 };
 
 /*
@@ -54,6 +63,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)				\
+	if (!((start).type & ITER_BVEC))			\
 	for (iter = (start);					\
 	     (iter).count &&					\
 	     ((iov = iov_iter_iovec(&(iter))), 1);		\
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index fcdaaab438b6..7b5dbd1517b5 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -4,7 +4,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -84,9 +84,8 @@ done:
 	i->iov_offset = skip;
 	return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_to_iter);
 
-size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	size_t skip, copy, left, wanted;
@@ -166,7 +165,6 @@ done:
 	i->iov_offset = skip;
 	return wanted - bytes;
 }
-EXPORT_SYMBOL(copy_page_from_iter);
 
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
@@ -195,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
  * were successfully copied.  If a fault is encountered then return the number of
  * bytes which were copied.
  */
-size_t iov_iter_copy_from_user_atomic(struct page *page,
+static size_t copy_from_user_atomic_iovec(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
 	char *kaddr;
@@ -215,9 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 
 	return copied;
 }
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
-void iov_iter_advance(struct iov_iter *i, size_t bytes)
+static void advance_iovec(struct iov_iter *i, size_t bytes)
 {
 	BUG_ON(i->count < bytes);
 
@@ -252,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes)
 		i->nr_segs = nr_segs;
 	}
 }
-EXPORT_SYMBOL(iov_iter_advance);
 
 /*
  * Fault in the first iovec of the given iov_iter, to a maximum length
@@ -265,26 +261,16 @@ EXPORT_SYMBOL(iov_iter_advance);
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-	char __user *buf = i->iov->iov_base + i->iov_offset;
-	bytes = min(bytes, i->iov->iov_len - i->iov_offset);
-	return fault_in_pages_readable(buf, bytes);
+	if (!(i->type & ITER_BVEC)) {
+		char __user *buf = i->iov->iov_base + i->iov_offset;
+		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
+		return fault_in_pages_readable(buf, bytes);
+	}
+	return 0;
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-/*
- * Return the count of just the current iov_iter segment.
- */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
-{
-	const struct iovec *iov = i->iov;
-	if (i->nr_segs == 1)
-		return i->count;
-	else
-		return min(i->count, iov->iov_len - i->iov_offset);
-}
-EXPORT_SYMBOL(iov_iter_single_seg_count);
-
-unsigned long iov_iter_alignment(const struct iov_iter *i)
+static unsigned long alignment_iovec(const struct iov_iter *i)
 {
 	const struct iovec *iov = i->iov;
 	unsigned long res;
@@ -307,7 +293,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	res |= (unsigned long)iov->iov_base | size;
 	return res;
 }
-EXPORT_SYMBOL(iov_iter_alignment);
 
 void iov_iter_init(struct iov_iter *i, int direction,
 			const struct iovec *iov, unsigned long nr_segs,
@@ -315,7 +300,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 {
 	/* It will get better.  Eventually... */
 	if (segment_eq(get_fs(), KERNEL_DS))
-		direction |= REQ_KERNEL;
+		direction |= ITER_KVEC;
 	i->type = direction;
 	i->iov = iov;
 	i->nr_segs = nr_segs;
@@ -324,7 +309,7 @@ void iov_iter_init(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-ssize_t iov_iter_get_pages(struct iov_iter *i,
+static ssize_t get_pages_iovec(struct iov_iter *i,
 		   struct page **pages, size_t maxsize,
 		   size_t *start)
 {
@@ -349,9 +334,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 		return res;
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
-EXPORT_SYMBOL(iov_iter_get_pages);
 
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+static ssize_t get_pages_alloc_iovec(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
 {
@@ -387,9 +371,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 	*pages = p;
 	return (res == n ? len : res * PAGE_SIZE) - *start;
 }
-EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
+static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages)
 {
 	size_t offset = i->iov_offset;
 	size_t size = i->count;
@@ -414,4 +397,347 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	}
 	return min(npages, maxpages);
 }
+
+static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+{
+	char *from = kmap_atomic(page);
+	memcpy(to, from + offset, len);
+	kunmap_atomic(from);
+}
+
+static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len)
+{
+	char *to = kmap_atomic(page);
+	memcpy(to + offset, from, len);
+	kunmap_atomic(to);
+}
+
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+	void *kaddr, *from;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+	copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+	kaddr = kmap_atomic(page);
+	from = kaddr + offset;
+	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy);
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+	kunmap_atomic(kaddr);
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+	void *kaddr, *to;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+
+	kaddr = kmap_atomic(page);
+
+	to = kaddr + offset;
+
+	copy = min(bytes, bvec->bv_len - skip);
+
+	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
+
+	to += copy;
+	skip += copy;
+	bytes -= copy;
+
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy);
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+	kunmap_atomic(kaddr);
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted;
+}
+
+static size_t copy_from_user_bvec(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	char *kaddr;
+	size_t left;
+	const struct bio_vec *bvec;
+	size_t base = i->iov_offset;
+
+	kaddr = kmap_atomic(page);
+	for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) {
+		size_t copy = min(left, bvec->bv_len - base);
+		if (!bvec->bv_len)
+			continue;
+		memcpy_from_page(kaddr + offset, bvec->bv_page,
+				 bvec->bv_offset + base, copy);
+		offset += copy;
+		left -= copy;
+	}
+	kunmap_atomic(kaddr);
+	return bytes;
+}
+
+static void advance_bvec(struct iov_iter *i, size_t bytes)
+{
+	BUG_ON(i->count < bytes);
+
+	if (likely(i->nr_segs == 1)) {
+		i->iov_offset += bytes;
+		i->count -= bytes;
+	} else {
+		const struct bio_vec *bvec = i->bvec;
+		size_t base = i->iov_offset;
+		unsigned long nr_segs = i->nr_segs;
+
+		/*
+		 * The !iov->iov_len check ensures we skip over unlikely
+		 * zero-length segments (without overruning the iovec).
+		 */
+		while (bytes || unlikely(i->count && !bvec->bv_len)) {
+			int copy;
+
+			copy = min(bytes, bvec->bv_len - base);
+			BUG_ON(!i->count || i->count < copy);
+			i->count -= copy;
+			bytes -= copy;
+			base += copy;
+			if (bvec->bv_len == base) {
+				bvec++;
+				nr_segs--;
+				base = 0;
+			}
+		}
+		i->bvec = bvec;
+		i->iov_offset = base;
+		i->nr_segs = nr_segs;
+	}
+}
+
+static unsigned long alignment_bvec(const struct iov_iter *i)
+{
+	const struct bio_vec *bvec = i->bvec;
+	unsigned long res;
+	size_t size = i->count;
+	size_t n;
+
+	if (!size)
+		return 0;
+
+	res = bvec->bv_offset + i->iov_offset;
+	n = bvec->bv_len - i->iov_offset;
+	if (n >= size)
+		return res | size;
+	size -= n;
+	res |= n;
+	while (size > (++bvec)->bv_len) {
+		res |= bvec->bv_offset | bvec->bv_len;
+		size -= bvec->bv_len;
+	}
+	res |= bvec->bv_offset | size;
+	return res;
+}
+
+static ssize_t get_pages_bvec(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	get_page(*pages = bvec->bv_page);
+
+	return len;
+}
+
+static ssize_t get_pages_alloc_bvec(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	const struct bio_vec *bvec = i->bvec;
+	size_t len = bvec->bv_len - i->iov_offset;
+	if (len > i->count)
+		len = i->count;
+	if (len > maxsize)
+		len = maxsize;
+	*start = bvec->bv_offset + i->iov_offset;
+
+	*pages = kmalloc(sizeof(struct page *), GFP_KERNEL);
+	if (!*pages)
+		return -ENOMEM;
+
+	get_page(**pages = bvec->bv_page);
+
+	return len;
+}
+
+static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages)
+{
+	size_t offset = i->iov_offset;
+	size_t size = i->count;
+	const struct bio_vec *bvec = i->bvec;
+	int npages = 0;
+	int n;
+
+	for (n = 0; size && n < i->nr_segs; n++, bvec++) {
+		size_t len = bvec->bv_len - offset;
+		offset = 0;
+		if (unlikely(!len))	/* empty segment */
+			continue;
+		if (len > size)
+			len = size;
+		npages++;
+		if (npages >= maxpages)	/* don't bother going further */
+			return maxpages;
+		size -= len;
+		offset = 0;
+	}
+	return min(npages, maxpages);
+}
+
+size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_page_to_iter_bvec(page, offset, bytes, i);
+	else
+		return copy_page_to_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_to_iter);
+
+size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+			 struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_page_from_iter_bvec(page, offset, bytes, i);
+	else
+		return copy_page_from_iter_iovec(page, offset, bytes, i);
+}
+EXPORT_SYMBOL(copy_page_from_iter);
+
+size_t iov_iter_copy_from_user_atomic(struct page *page,
+		struct iov_iter *i, unsigned long offset, size_t bytes)
+{
+	if (i->type & ITER_BVEC)
+		return copy_from_user_bvec(page, i, offset, bytes);
+	else
+		return copy_from_user_atomic_iovec(page, i, offset, bytes);
+}
+EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
+
+void iov_iter_advance(struct iov_iter *i, size_t size)
+{
+	if (i->type & ITER_BVEC)
+		advance_bvec(i, size);
+	else
+		advance_iovec(i, size);
+}
+EXPORT_SYMBOL(iov_iter_advance);
+
+/*
+ * Return the count of just the current iov_iter segment.
+ */
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
+{
+	if (i->nr_segs == 1)
+		return i->count;
+	else if (i->type & ITER_BVEC)
+		return min(i->count, i->iov->iov_len - i->iov_offset);
+	else
+		return min(i->count, i->bvec->bv_len - i->iov_offset);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_count);
+
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return alignment_bvec(i);
+	else
+		return alignment_iovec(i);
+}
+EXPORT_SYMBOL(iov_iter_alignment);
+
+ssize_t iov_iter_get_pages(struct iov_iter *i,
+		   struct page **pages, size_t maxsize,
+		   size_t *start)
+{
+	if (i->type & ITER_BVEC)
+		return get_pages_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages);
+
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		   struct page ***pages, size_t maxsize,
+		   size_t *start)
+{
+	if (i->type & ITER_BVEC)
+		return get_pages_alloc_bvec(i, pages, maxsize, start);
+	else
+		return get_pages_alloc_iovec(i, pages, maxsize, start);
+}
+EXPORT_SYMBOL(iov_iter_get_pages_alloc);
+
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	if (i->type & ITER_BVEC)
+		return iov_iter_npages_bvec(i, maxpages);
+	else
+		return iov_iter_npages_iovec(i, maxpages);
+}
 EXPORT_SYMBOL(iov_iter_npages);
diff --git a/mm/page_io.c b/mm/page_io.c
index 313bfedb75d1..33bb38c4aad7 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -259,23 +259,28 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		struct kiocb kiocb;
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
-		struct iovec iov = {
-			.iov_base = kmap(page),
-			.iov_len  = PAGE_SIZE,
+		struct bio_vec bv = {
+			.bv_page = page,
+			.bv_len  = PAGE_SIZE,
+			.bv_offset = 0
+		};
+		struct iov_iter from = {
+			.type = ITER_BVEC | WRITE,
+			.count = PAGE_SIZE,
+			.iov_offset = 0,
+			.nr_segs = 1,
+			.bvec = &bv
 		};
-		struct iov_iter from;
 
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 		kiocb.ki_nbytes = PAGE_SIZE;
-		iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE);
 
 		set_page_writeback(page);
 		unlock_page(page);
-		ret = mapping->a_ops->direct_IO(KERNEL_WRITE,
+		ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE,
 						&kiocb, &from,
 						kiocb.ki_pos);
-		kunmap(page);
 		if (ret == PAGE_SIZE) {
 			count_vm_event(PSWPOUT);
 			ret = 0;
-- 
cgit 


From 9c69de4c94fcb11db919160d5fa0b48f13d1757a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:13 +0200
Subject: nfsd: remove <linux/nfsd/nfsfh.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The only real user of this header is fs/nfsd/nfsfh.h, so merge the
two.  Various lockѕ source files used it to indirectly get other
sunrpc or nfs headers, so fix those up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clnt4xdr.c         |  2 ++
 fs/lockd/clntxdr.c          |  2 ++
 fs/lockd/svcsubs.c          |  2 +-
 fs/lockd/xdr.c              |  2 ++
 fs/nfsd/nfsd.h              |  1 +
 fs/nfsd/nfsfh.h             | 59 ++++++++++++++++++++++++++++++++++++++----
 fs/nfsd/state.h             |  1 -
 include/linux/lockd/lockd.h |  2 +-
 include/linux/nfsd/export.h |  6 ++++-
 include/linux/nfsd/nfsfh.h  | 63 ---------------------------------------------
 10 files changed, 68 insertions(+), 72 deletions(-)
 delete mode 100644 include/linux/nfsd/nfsfh.h

(limited to 'include/linux')

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 00ec0b9c94d1..d3e40db28930 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -14,6 +14,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs3.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 9a55797a1cd4..3e9f7874b975 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -15,6 +15,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs2.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 #if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dc5c75930f0f..7ec6b1074d8c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -14,12 +14,12 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 #include <linux/module.h>
 #include <linux/mount.h>
+#include <uapi/linux/nfs2.h>
 
 #define NLMDBG_FACILITY		NLMDBG_SVCSUBS
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 964666c68a86..9340e7e10ef6 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -16,6 +16,8 @@
 #include <linux/sunrpc/stats.h>
 #include <linux/lockd/lockd.h>
 
+#include <uapi/linux/nfs2.h>
+
 #define NLMDBG_FACILITY		NLMDBG_XDR
 
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 479eb681c27c..7d5c310678d0 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -15,6 +15,7 @@
 #include <linux/nfs2.h>
 #include <linux/nfs3.h>
 #include <linux/nfs4.h>
+#include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/msg_prot.h>
 
 #include <linux/nfsd/debug.h>
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index ad67964d0bb1..2e89e70ac15c 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -1,9 +1,58 @@
-/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+/*
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * This file describes the layout of the file handles as passed
+ * over the wire.
+ */
+#ifndef _LINUX_NFSD_NFSFH_H
+#define _LINUX_NFSD_NFSFH_H
+
+#include <linux/sunrpc/svc.h>
+#include <uapi/linux/nfsd/nfsfh.h>
+
+static inline __u32 ino_t_to_u32(ino_t ino)
+{
+	return (__u32) ino;
+}
+
+static inline ino_t u32_to_ino_t(__u32 uino)
+{
+	return (ino_t) uino;
+}
 
-#ifndef _LINUX_NFSD_FH_INT_H
-#define _LINUX_NFSD_FH_INT_H
+/*
+ * This is the internal representation of an NFS handle used in knfsd.
+ * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
+ */
+typedef struct svc_fh {
+	struct knfsd_fh		fh_handle;	/* FH data */
+	struct dentry *		fh_dentry;	/* validated dentry */
+	struct svc_export *	fh_export;	/* export pointer */
+	int			fh_maxsize;	/* max size for fh_handle */
+
+	unsigned char		fh_locked;	/* inode locked by us */
+	unsigned char		fh_want_write;	/* remount protection taken */
+
+#ifdef CONFIG_NFSD_V3
+	unsigned char		fh_post_saved;	/* post-op attrs saved */
+	unsigned char		fh_pre_saved;	/* pre-op attrs saved */
+
+	/* Pre-op attributes saved during fh_lock */
+	__u64			fh_pre_size;	/* size before operation */
+	struct timespec		fh_pre_mtime;	/* mtime before oper */
+	struct timespec		fh_pre_ctime;	/* ctime before oper */
+	/*
+	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
+	 *  to find out if it is valid.
+	 */
+	u64			fh_pre_change;
+
+	/* Post-op attributes saved in fh_unlock */
+	struct kstat		fh_post_attr;	/* full attrs after operation */
+	u64			fh_post_change; /* nfsv4 change; see above */
+#endif /* CONFIG_NFSD_V3 */
 
-#include <linux/nfsd/nfsfh.h>
+} svc_fh;
 
 enum nfsd_fsid {
 	FSID_DEV = 0,
@@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp)
 	}
 }
 
-#endif /* _LINUX_NFSD_FH_INT_H */
+#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 424d8f5f2317..5b3bbf24097c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -37,7 +37,6 @@
 
 #include <linux/idr.h>
 #include <linux/sunrpc/svc_xprt.h>
-#include <linux/nfsd/nfsfh.h>
 #include "nfsfh.h"
 
 typedef struct {
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dcaad79f54ed..219d79627c05 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -17,13 +17,13 @@
 #include <linux/fs.h>
 #include <linux/kref.h>
 #include <linux/utsname.h>
-#include <linux/nfsd/nfsfh.h>
 #include <linux/lockd/bind.h>
 #include <linux/lockd/xdr.h>
 #ifdef CONFIG_LOCKD_V4
 #include <linux/lockd/xdr4.h>
 #endif
 #include <linux/lockd/debug.h>
+#include <linux/sunrpc/svc.h>
 
 /*
  * Version string
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 7898c997dfea..b12c4e526ef2 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -9,9 +9,13 @@
 #ifndef NFSD_EXPORT_H
 #define NFSD_EXPORT_H
 
-# include <linux/nfsd/nfsfh.h>
+#include <linux/sunrpc/cache.h>
 #include <uapi/linux/nfsd/export.h>
 
+struct knfsd_fh;
+struct svc_fh;
+struct svc_rqst;
+
 /*
  * FS Locations
  */
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
deleted file mode 100644
index a93593f1fa4e..000000000000
--- a/include/linux/nfsd/nfsfh.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * include/linux/nfsd/nfsfh.h
- *
- * This file describes the layout of the file handles as passed
- * over the wire.
- *
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
- * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef _LINUX_NFSD_FH_H
-#define _LINUX_NFSD_FH_H
-
-# include <linux/sunrpc/svc.h>
-#include <uapi/linux/nfsd/nfsfh.h>
-
-static inline __u32 ino_t_to_u32(ino_t ino)
-{
-	return (__u32) ino;
-}
-
-static inline ino_t u32_to_ino_t(__u32 uino)
-{
-	return (ino_t) uino;
-}
-
-/*
- * This is the internal representation of an NFS handle used in knfsd.
- * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
- */
-typedef struct svc_fh {
-	struct knfsd_fh		fh_handle;	/* FH data */
-	struct dentry *		fh_dentry;	/* validated dentry */
-	struct svc_export *	fh_export;	/* export pointer */
-	int			fh_maxsize;	/* max size for fh_handle */
-
-	unsigned char		fh_locked;	/* inode locked by us */
-	unsigned char		fh_want_write;	/* remount protection taken */
-
-#ifdef CONFIG_NFSD_V3
-	unsigned char		fh_post_saved;	/* post-op attrs saved */
-	unsigned char		fh_pre_saved;	/* pre-op attrs saved */
-
-	/* Pre-op attributes saved during fh_lock */
-	__u64			fh_pre_size;	/* size before operation */
-	struct timespec		fh_pre_mtime;	/* mtime before oper */
-	struct timespec		fh_pre_ctime;	/* ctime before oper */
-	/*
-	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
-	 *  to find out if it is valid.
-	 */
-	u64			fh_pre_change;
-
-	/* Post-op attributes saved in fh_unlock */
-	struct kstat		fh_post_attr;	/* full attrs after operation */
-	u64			fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
-
-} svc_fh;
-
-#endif /* _LINUX_NFSD_FH_H */
-- 
cgit 


From d430e8d530e900c923bf77718d72478b1c280592 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:14 +0200
Subject: nfsd: move <linux/nfsd/export.h> to fs/nfsd

There are no legitimate users outside of fs/nfsd, so move it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svcsubs.c          |   1 -
 fs/nfsd/export.h            | 109 ++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfsd.h              |   3 +-
 include/linux/nfsd/export.h | 114 --------------------------------------------
 4 files changed, 111 insertions(+), 116 deletions(-)
 create mode 100644 fs/nfsd/export.h
 delete mode 100644 include/linux/nfsd/export.h

(limited to 'include/linux')

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 7ec6b1074d8c..b6f3b84b6e99 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -14,7 +14,6 @@
 #include <linux/mutex.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/addr.h>
-#include <linux/nfsd/export.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 #include <linux/module.h>
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
new file mode 100644
index 000000000000..d7939a62a0ae
--- /dev/null
+++ b/fs/nfsd/export.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+#ifndef NFSD_EXPORT_H
+#define NFSD_EXPORT_H
+
+#include <linux/sunrpc/cache.h>
+#include <uapi/linux/nfsd/export.h>
+
+struct knfsd_fh;
+struct svc_fh;
+struct svc_rqst;
+
+/*
+ * FS Locations
+ */
+
+#define MAX_FS_LOCATIONS	128
+
+struct nfsd4_fs_location {
+	char *hosts; /* colon separated list of hosts */
+	char *path;  /* slash separated list of path components */
+};
+
+struct nfsd4_fs_locations {
+	uint32_t locations_count;
+	struct nfsd4_fs_location *locations;
+/* If we're not actually serving this data ourselves (only providing a
+ * list of replicas that do serve it) then we set "migrated": */
+	int migrated;
+};
+
+/*
+ * We keep an array of pseudoflavors with the export, in order from most
+ * to least preferred.  For the foreseeable future, we don't expect more
+ * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3,
+ * spkm3i, and spkm3p (and using all 8 at once should be rare).
+ */
+#define MAX_SECINFO_LIST	8
+
+struct exp_flavor_info {
+	u32	pseudoflavor;
+	u32	flags;
+};
+
+struct svc_export {
+	struct cache_head	h;
+	struct auth_domain *	ex_client;
+	int			ex_flags;
+	struct path		ex_path;
+	kuid_t			ex_anon_uid;
+	kgid_t			ex_anon_gid;
+	int			ex_fsid;
+	unsigned char *		ex_uuid; /* 16 byte fsid */
+	struct nfsd4_fs_locations ex_fslocs;
+	int			ex_nflavors;
+	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
+	struct cache_detail	*cd;
+};
+
+/* an "export key" (expkey) maps a filehandlefragement to an
+ * svc_export for a given client.  There can be several per export,
+ * for the different fsid types.
+ */
+struct svc_expkey {
+	struct cache_head	h;
+
+	struct auth_domain *	ek_client;
+	int			ek_fsidtype;
+	u32			ek_fsid[6];
+
+	struct path		ek_path;
+};
+
+#define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
+#define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
+#define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
+
+int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
+__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
+
+/*
+ * Function declarations
+ */
+int			nfsd_export_init(struct net *);
+void			nfsd_export_shutdown(struct net *);
+void			nfsd_export_flush(struct net *);
+struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
+					     struct path *);
+struct svc_export *	rqst_exp_parent(struct svc_rqst *,
+					struct path *);
+struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
+int			exp_rootfh(struct net *, struct auth_domain *,
+					char *path, struct knfsd_fh *, int maxsize);
+__be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
+__be32			nfserrno(int errno);
+
+static inline void exp_put(struct svc_export *exp)
+{
+	cache_put(&exp->h, exp->cd);
+}
+
+static inline void exp_get(struct svc_export *exp)
+{
+	cache_get(&exp->h);
+}
+struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
+
+#endif /* NFSD_EXPORT_H */
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 7d5c310678d0..72004caad718 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,9 +19,10 @@
 #include <linux/sunrpc/msg_prot.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
 #include <linux/nfsd/stats.h>
 
+#include "export.h"
+
 /*
  * nfsd version
  */
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
deleted file mode 100644
index b12c4e526ef2..000000000000
--- a/include/linux/nfsd/export.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * include/linux/nfsd/export.h
- * 
- * Public declarations for NFS exports. The definitions for the
- * syscall interface are in nfsctl.h
- *
- * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef NFSD_EXPORT_H
-#define NFSD_EXPORT_H
-
-#include <linux/sunrpc/cache.h>
-#include <uapi/linux/nfsd/export.h>
-
-struct knfsd_fh;
-struct svc_fh;
-struct svc_rqst;
-
-/*
- * FS Locations
- */
-
-#define MAX_FS_LOCATIONS	128
-
-struct nfsd4_fs_location {
-	char *hosts; /* colon separated list of hosts */
-	char *path;  /* slash separated list of path components */
-};
-
-struct nfsd4_fs_locations {
-	uint32_t locations_count;
-	struct nfsd4_fs_location *locations;
-/* If we're not actually serving this data ourselves (only providing a
- * list of replicas that do serve it) then we set "migrated": */
-	int migrated;
-};
-
-/*
- * We keep an array of pseudoflavors with the export, in order from most
- * to least preferred.  For the foreseeable future, we don't expect more
- * than the eight pseudoflavors null, unix, krb5, krb5i, krb5p, skpm3,
- * spkm3i, and spkm3p (and using all 8 at once should be rare).
- */
-#define MAX_SECINFO_LIST	8
-
-struct exp_flavor_info {
-	u32	pseudoflavor;
-	u32	flags;
-};
-
-struct svc_export {
-	struct cache_head	h;
-	struct auth_domain *	ex_client;
-	int			ex_flags;
-	struct path		ex_path;
-	kuid_t			ex_anon_uid;
-	kgid_t			ex_anon_gid;
-	int			ex_fsid;
-	unsigned char *		ex_uuid; /* 16 byte fsid */
-	struct nfsd4_fs_locations ex_fslocs;
-	int			ex_nflavors;
-	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
-	struct cache_detail	*cd;
-};
-
-/* an "export key" (expkey) maps a filehandlefragement to an
- * svc_export for a given client.  There can be several per export,
- * for the different fsid types.
- */
-struct svc_expkey {
-	struct cache_head	h;
-
-	struct auth_domain *	ek_client;
-	int			ek_fsidtype;
-	u32			ek_fsid[6];
-
-	struct path		ek_path;
-};
-
-#define EX_ISSYNC(exp)		(!((exp)->ex_flags & NFSEXP_ASYNC))
-#define EX_NOHIDE(exp)		((exp)->ex_flags & NFSEXP_NOHIDE)
-#define EX_WGATHER(exp)		((exp)->ex_flags & NFSEXP_GATHERED_WRITES)
-
-int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp);
-__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
-
-/*
- * Function declarations
- */
-int			nfsd_export_init(struct net *);
-void			nfsd_export_shutdown(struct net *);
-void			nfsd_export_flush(struct net *);
-struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
-					     struct path *);
-struct svc_export *	rqst_exp_parent(struct svc_rqst *,
-					struct path *);
-struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
-int			exp_rootfh(struct net *, struct auth_domain *,
-					char *path, struct knfsd_fh *, int maxsize);
-__be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
-__be32			nfserrno(int errno);
-
-static inline void exp_put(struct svc_export *exp)
-{
-	cache_put(&exp->h, exp->cd);
-}
-
-static inline void exp_get(struct svc_export *exp)
-{
-	cache_get(&exp->h);
-}
-struct svc_export * rqst_exp_find(struct svc_rqst *, int, u32 *);
-
-#endif /* NFSD_EXPORT_H */
-- 
cgit 


From 7f94423e8fcc1e0f3416be76d3da0982f586d565 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:15 +0200
Subject: nfsd: move <linux/nfsd/stats.h> to fs/nfsd

There are no legitimate users outside of fs/nfsd, so move it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h             |  2 +-
 fs/nfsd/stats.c            |  1 -
 fs/nfsd/stats.h            | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/nfsd/stats.h | 45 ---------------------------------------------
 4 files changed, 44 insertions(+), 47 deletions(-)
 create mode 100644 fs/nfsd/stats.h
 delete mode 100644 include/linux/nfsd/stats.h

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 72004caad718..7a07f9c6ee78 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -19,8 +19,8 @@
 #include <linux/sunrpc/msg_prot.h>
 
 #include <linux/nfsd/debug.h>
-#include <linux/nfsd/stats.h>
 
+#include "stats.h"
 #include "export.h"
 
 /*
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 6d4521feb6e3..cd90878a76aa 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -24,7 +24,6 @@
 #include <linux/seq_file.h>
 #include <linux/module.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/nfsd/stats.h>
 #include <net/net_namespace.h>
 
 #include "nfsd.h"
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
new file mode 100644
index 000000000000..a5c944b771c6
--- /dev/null
+++ b/fs/nfsd/stats.h
@@ -0,0 +1,43 @@
+/*
+ * Statistics for NFS server.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+#ifndef _NFSD_STATS_H
+#define _NFSD_STATS_H
+
+#include <uapi/linux/nfsd/stats.h>
+
+
+struct nfsd_stats {
+	unsigned int	rchits;		/* repcache hits */
+	unsigned int	rcmisses;	/* repcache hits */
+	unsigned int	rcnocache;	/* uncached reqs */
+	unsigned int	fh_stale;	/* FH stale error */
+	unsigned int	fh_lookup;	/* dentry cached */
+	unsigned int	fh_anon;	/* anon file dentry returned */
+	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
+	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
+	unsigned int	io_read;	/* bytes returned to read requests */
+	unsigned int	io_write;	/* bytes passed in write requests */
+	unsigned int	th_cnt;		/* number of available threads */
+	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
+					 * of available threads were in use */
+	unsigned int	th_fullcnt;	/* number of times last free thread was used */
+	unsigned int	ra_size;	/* size of ra cache */
+	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
+					 * in the cache (10percentiles). [10] = not found */
+#ifdef CONFIG_NFSD_V4
+	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
+#endif
+
+};
+
+
+extern struct nfsd_stats	nfsdstats;
+extern struct svc_stat		nfsd_svcstats;
+
+void	nfsd_stat_init(void);
+void	nfsd_stat_shutdown(void);
+
+#endif /* _NFSD_STATS_H */
diff --git a/include/linux/nfsd/stats.h b/include/linux/nfsd/stats.h
deleted file mode 100644
index e75b2544ff12..000000000000
--- a/include/linux/nfsd/stats.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * linux/include/linux/nfsd/stats.h
- *
- * Statistics for NFS server.
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef LINUX_NFSD_STATS_H
-#define LINUX_NFSD_STATS_H
-
-#include <uapi/linux/nfsd/stats.h>
-
-
-struct nfsd_stats {
-	unsigned int	rchits;		/* repcache hits */
-	unsigned int	rcmisses;	/* repcache hits */
-	unsigned int	rcnocache;	/* uncached reqs */
-	unsigned int	fh_stale;	/* FH stale error */
-	unsigned int	fh_lookup;	/* dentry cached */
-	unsigned int	fh_anon;	/* anon file dentry returned */
-	unsigned int	fh_nocache_dir;	/* filehandle not found in dcache */
-	unsigned int	fh_nocache_nondir;	/* filehandle not found in dcache */
-	unsigned int	io_read;	/* bytes returned to read requests */
-	unsigned int	io_write;	/* bytes passed in write requests */
-	unsigned int	th_cnt;		/* number of available threads */
-	unsigned int	th_usage[10];	/* number of ticks during which n perdeciles
-					 * of available threads were in use */
-	unsigned int	th_fullcnt;	/* number of times last free thread was used */
-	unsigned int	ra_size;	/* size of ra cache */
-	unsigned int	ra_depth[11];	/* number of times ra entry was found that deep
-					 * in the cache (10percentiles). [10] = not found */
-#ifdef CONFIG_NFSD_V4
-	unsigned int	nfs4_opcount[LAST_NFS4_OP + 1];	/* count of individual nfsv4 operations */
-#endif
-
-};
-
-
-extern struct nfsd_stats	nfsdstats;
-extern struct svc_stat		nfsd_svcstats;
-
-void	nfsd_stat_init(void);
-void	nfsd_stat_shutdown(void);
-
-#endif /* LINUX_NFSD_STATS_H */
-- 
cgit 


From 6f226e2ab1b895c8685e868af0a5f797fcaaaf57 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 19:37:16 +0200
Subject: nfsd: remove <linux/nfsd/debug.h>

There is almost nothing left it in, just merge it into the only file
that includes it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h             |  9 ++++++++-
 include/linux/nfsd/debug.h | 19 -------------------
 2 files changed, 8 insertions(+), 20 deletions(-)
 delete mode 100644 include/linux/nfsd/debug.h

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 7a07f9c6ee78..e9f2fd42d184 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -18,11 +18,18 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/msg_prot.h>
 
-#include <linux/nfsd/debug.h>
+#include <uapi/linux/nfsd/debug.h>
 
 #include "stats.h"
 #include "export.h"
 
+#undef ifdebug
+#ifdef NFSD_DEBUG
+# define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
+#else
+# define ifdebug(flag)		if (0)
+#endif
+
 /*
  * nfsd version
  */
diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h
deleted file mode 100644
index 19ef8375b577..000000000000
--- a/include/linux/nfsd/debug.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * linux/include/linux/nfsd/debug.h
- *
- * Debugging-related stuff for nfsd
- *
- * Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef LINUX_NFSD_DEBUG_H
-#define LINUX_NFSD_DEBUG_H
-
-#include <uapi/linux/nfsd/debug.h>
-
-# undef ifdebug
-# ifdef NFSD_DEBUG
-#  define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
-# else
-#  define ifdebug(flag)		if (0)
-# endif
-#endif /* LINUX_NFSD_DEBUG_H */
-- 
cgit 


From ca654dc3a93d3b47dddc0c24a98043060bbb256b Mon Sep 17 00:00:00 2001
From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Mon, 5 May 2014 12:52:39 +0530
Subject: cpufreq: Catch double invocations of
 cpufreq_freq_transition_begin/end

Some cpufreq drivers were redundantly invoking the _begin() and _end()
APIs around frequency transitions, and this double invocation (one from
the cpufreq core and the other from the cpufreq driver) used to result
in a self-deadlock, leading to system hangs during boot. (The _begin()
API makes contending callers wait until the previous invocation is
complete. Hence, the cpufreq driver would end up waiting on itself!).

Now all such drivers have been fixed, but debugging this issue was not
very straight-forward (even lockdep didn't catch this). So let us add a
debug infrastructure to the cpufreq core to catch such issues more easily
in the future.

We add a new field called 'transition_task' to the policy structure, to keep
track of the task which is performing the frequency transition. Using this
field, we make note of this task during _begin() and print a warning if we
find a case where the same task is calling _begin() again, before completing
the previous frequency transition using the corresponding _end().

We have left out ASYNC_NOTIFICATION drivers from this debug infrastructure
for 2 reasons:

1. At the moment, we have no way to avoid a particular scenario where this
   debug infrastructure can emit false-positive warnings for such drivers.
   The scenario is depicted below:

         Task A						Task B

    /* 1st freq transition */
    Invoke _begin() {
            ...
            ...
    }

    Change the frequency

    /* 2nd freq transition */
    Invoke _begin() {
	    ...	//waiting for B to
            ... //finish _end() for
	    ... //the 1st transition
	    ...	      |				Got interrupt for successful
	    ...	      |				change of frequency (1st one).
	    ...       |
	    ...	      |				/* 1st freq transition */
	    ...	      |				Invoke _end() {
	    ...	      |					...
	    ...	      V				}
	    ...
	    ...
    }

   This scenario is actually deadlock-free because, once Task A changes the
   frequency, it is Task B's responsibility to invoke the corresponding
   _end() for the 1st frequency transition. Hence it is perfectly legal for
   Task A to go ahead and attempt another frequency transition in the meantime.
   (Of course it won't be able to proceed until Task B finishes the 1st _end(),
   but this doesn't cause a deadlock or a hang).

   The debug infrastructure cannot handle this scenario and will treat it as
   a deadlock and print a warning. To avoid this, we exclude such drivers
   from the purview of this code.

2. Luckily, we don't _need_ this infrastructure for ASYNC_NOTIFICATION drivers
   at all! The cpufreq core does not automatically invoke the _begin() and
   _end() APIs during frequency transitions in such drivers. Thus, the driver
   alone is responsible for invoking _begin()/_end() and hence there shouldn't
   be any conflicts which lead to double invocations. So, we can skip these
   drivers, since the probability that such drivers will hit this problem is
   extremely low, as outlined above.

Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 14 ++++++++++++++
 include/linux/cpufreq.h   |  1 +
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a517da996aaf..bfe82b63875f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -365,6 +365,18 @@ static void cpufreq_notify_post_transition(struct cpufreq_policy *policy,
 void cpufreq_freq_transition_begin(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs)
 {
+
+	/*
+	 * Catch double invocations of _begin() which lead to self-deadlock.
+	 * ASYNC_NOTIFICATION drivers are left out because the cpufreq core
+	 * doesn't invoke _begin() on their behalf, and hence the chances of
+	 * double invocations are very low. Moreover, there are scenarios
+	 * where these checks can emit false-positive warnings in these
+	 * drivers; so we avoid that by skipping them altogether.
+	 */
+	WARN_ON(!(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION)
+				&& current == policy->transition_task);
+
 wait:
 	wait_event(policy->transition_wait, !policy->transition_ongoing);
 
@@ -376,6 +388,7 @@ wait:
 	}
 
 	policy->transition_ongoing = true;
+	policy->transition_task = current;
 
 	spin_unlock(&policy->transition_lock);
 
@@ -392,6 +405,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
 	cpufreq_notify_post_transition(policy, freqs, transition_failed);
 
 	policy->transition_ongoing = false;
+	policy->transition_task = NULL;
 
 	wake_up(&policy->transition_wait);
 }
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 77a5fa191502..f3822f836e14 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -110,6 +110,7 @@ struct cpufreq_policy {
 	bool			transition_ongoing; /* Tracks transition status */
 	spinlock_t		transition_lock;
 	wait_queue_head_t	transition_wait;
+	struct task_struct	*transition_task; /* Task which is doing the transition */
 };
 
 /* Only for ACPI */
-- 
cgit 


From a0dd7b79657bd6644b914d16ce7f23468c44a7b4 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Mon, 5 May 2014 08:33:50 -0500
Subject: PM / OPP: Move cpufreq specific OPP functions out of generic OPP
 library

CPUFreq specific helper functions for OPP (Operating Performance Points)
now use generic OPP functions that allow CPUFreq to be be moved back
into CPUFreq framework. This allows for independent modifications
or future enhancements as needed isolated to just CPUFreq framework
alone.

Here, we just move relevant code and documentation to make this part of
CPUFreq infrastructure.

Cc: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/core.txt |  29 +++++++++++
 Documentation/power/opp.txt     |  40 ++-------------
 drivers/base/power/opp.c        |  92 ---------------------------------
 drivers/cpufreq/Makefile        |   2 +
 drivers/cpufreq/cpufreq_opp.c   | 110 ++++++++++++++++++++++++++++++++++++++++
 include/linux/cpufreq.h         |  21 ++++++++
 include/linux/pm_opp.h          |  20 --------
 7 files changed, 167 insertions(+), 147 deletions(-)
 create mode 100644 drivers/cpufreq/cpufreq_opp.c

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
index 0060d76b445f..70933eadc308 100644
--- a/Documentation/cpu-freq/core.txt
+++ b/Documentation/cpu-freq/core.txt
@@ -20,6 +20,7 @@ Contents:
 ---------
 1.  CPUFreq core and interfaces
 2.  CPUFreq notifiers
+3.  CPUFreq Table Generation with Operating Performance Point (OPP)
 
 1. General Information
 =======================
@@ -92,3 +93,31 @@ values:
 cpu	- number of the affected CPU
 old	- old frequency
 new	- new frequency
+
+3. CPUFreq Table Generation with Operating Performance Point (OPP)
+==================================================================
+For details about OPP, see Documentation/power/opp.txt
+
+dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
+	cpufreq_frequency_table_cpuinfo which is provided with the list of
+	frequencies that are available for operation. This function provides
+	a ready to use conversion routine to translate the OPP layer's internal
+	information about the available frequencies into a format readily
+	providable to cpufreq.
+
+	WARNING: Do not use this function in interrupt context.
+
+	Example:
+	 soc_pm_init()
+	 {
+		/* Do things */
+		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
+		if (!r)
+			cpufreq_frequency_table_cpuinfo(policy, freq_table);
+		/* Do other things */
+	 }
+
+	NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
+	addition to CONFIG_PM_OPP.
+
+dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
index b8a907dc0169..a9adad828cdc 100644
--- a/Documentation/power/opp.txt
+++ b/Documentation/power/opp.txt
@@ -10,8 +10,7 @@ Contents
 3. OPP Search Functions
 4. OPP Availability Control Functions
 5. OPP Data Retrieval Functions
-6. Cpufreq Table Generation
-7. Data Structures
+6. Data Structures
 
 1. Introduction
 ===============
@@ -72,7 +71,6 @@ operations until that OPP could be re-enabled if possible.
 OPP library facilitates this concept in it's implementation. The following
 operational functions operate only on available opps:
 opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
-and dev_pm_opp_init_cpufreq_table
 
 dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
 be used for dev_pm_opp_enable/disable functions to make an opp available as required.
@@ -96,10 +94,9 @@ using RCU read locks. The opp_find_freq_{exact,ceil,floor},
 opp_get_{voltage, freq, opp_count} fall into this category.
 
 opp_{add,enable,disable} are updaters which use mutex and implement it's own
-RCU locking mechanisms. dev_pm_opp_init_cpufreq_table acts as an updater and uses
-mutex to implment RCU updater strategy. These functions should *NOT* be called
-under RCU locks and other contexts that prevent blocking functions in RCU or
-mutex operations from working.
+RCU locking mechanisms. These functions should *NOT* be called under RCU locks
+and other contexts that prevent blocking functions in RCU or mutex operations
+from working.
 
 2. Initial OPP List Registration
 ================================
@@ -311,34 +308,7 @@ dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device
 		/* Do other things */
 	 }
 
-6. Cpufreq Table Generation
-===========================
-dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
-	cpufreq_frequency_table_cpuinfo which is provided with the list of
-	frequencies that are available for operation. This function provides
-	a ready to use conversion routine to translate the OPP layer's internal
-	information about the available frequencies into a format readily
-	providable to cpufreq.
-
-	WARNING: Do not use this function in interrupt context.
-
-	Example:
-	 soc_pm_init()
-	 {
-		/* Do things */
-		r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
-		if (!r)
-			cpufreq_frequency_table_cpuinfo(policy, freq_table);
-		/* Do other things */
-	 }
-
-	NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
-	addition to CONFIG_PM as power management feature is required to
-	dynamically scale voltage and frequency in a system.
-
-dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table
-
-7. Data Structures
+6. Data Structures
 ==================
 Typically an SoC contains multiple voltage domains which are variable. Each
 domain is represented by a device pointer. The relationship to OPP can be
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 38b43bb20878..d9e376a6d19d 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -15,7 +15,6 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/cpufreq.h>
 #include <linux/device.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
@@ -596,97 +595,6 @@ int dev_pm_opp_disable(struct device *dev, unsigned long freq)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_disable);
 
-#ifdef CONFIG_CPU_FREQ
-/**
- * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
- * @dev:	device for which we do this operation
- * @table:	Cpufreq table returned back to caller
- *
- * Generate a cpufreq table for a provided device- this assumes that the
- * opp list is already initialized and ready for usage.
- *
- * This function allocates required memory for the cpufreq table. It is
- * expected that the caller does the required maintenance such as freeing
- * the table as required.
- *
- * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
- * if no memory available for the operation (table is not populated), returns 0
- * if successful and table is populated.
- *
- * WARNING: It is  important for the callers to ensure refreshing their copy of
- * the table if any of the mentioned functions have been invoked in the interim.
- *
- * Locking: The internal device_opp and opp structures are RCU protected.
- * Since we just use the regular accessor functions to access the internal data
- * structures, we use RCU read lock inside this function. As a result, users of
- * this function DONOT need to use explicit locks for invoking.
- */
-int dev_pm_opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table)
-{
-	struct dev_pm_opp *opp;
-	struct cpufreq_frequency_table *freq_table = NULL;
-	int i, max_opps, ret = 0;
-	unsigned long rate;
-
-	rcu_read_lock();
-
-	max_opps = dev_pm_opp_get_opp_count(dev);
-	if (max_opps <= 0) {
-		ret = max_opps ? max_opps : -ENODATA;
-		goto out;
-	}
-
-	freq_table = kzalloc(sizeof(*freq_table) * (max_opps + 1), GFP_KERNEL);
-	if (!freq_table) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	for (i = 0, rate = 0; i < max_opps; i++, rate++) {
-		/* find next rate */
-		opp = dev_pm_opp_find_freq_ceil(dev, &rate);
-		if (IS_ERR(opp)) {
-			ret = PTR_ERR(opp);
-			goto out;
-		}
-		freq_table[i].driver_data = i;
-		freq_table[i].frequency = rate / 1000;
-	}
-
-	freq_table[i].driver_data = i;
-	freq_table[i].frequency = CPUFREQ_TABLE_END;
-
-	*table = &freq_table[0];
-
-out:
-	rcu_read_unlock();
-	if (ret)
-		kfree(freq_table);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
-
-/**
- * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
- * @dev:	device for which we do this operation
- * @table:	table to free
- *
- * Free up the table allocated by dev_pm_opp_init_cpufreq_table
- */
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
-				struct cpufreq_frequency_table **table)
-{
-	if (!table)
-		return;
-
-	kfree(*table);
-	*table = NULL;
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
-#endif		/* CONFIG_CPU_FREQ */
-
 /**
  * dev_pm_opp_get_notifier() - find notifier_head of the device with opp
  * @dev:	device pointer used to lookup device OPPs.
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index 0dbb963c1aef..738c8b7b17dc 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -1,5 +1,7 @@
 # CPUfreq core
 obj-$(CONFIG_CPU_FREQ)			+= cpufreq.o freq_table.o
+obj-$(CONFIG_PM_OPP)			+= cpufreq_opp.o
+
 # CPUfreq stats
 obj-$(CONFIG_CPU_FREQ_STAT)             += cpufreq_stats.o
 
diff --git a/drivers/cpufreq/cpufreq_opp.c b/drivers/cpufreq/cpufreq_opp.c
new file mode 100644
index 000000000000..c0c6f4a4eccf
--- /dev/null
+++ b/drivers/cpufreq/cpufreq_opp.c
@@ -0,0 +1,110 @@
+/*
+ * Generic OPP helper interface for CPUFreq drivers
+ *
+ * Copyright (C) 2009-2014 Texas Instruments Incorporated.
+ *	Nishanth Menon
+ *	Romit Dasgupta
+ *	Kevin Hilman
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/pm_opp.h>
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+
+/**
+ * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
+ * @dev:	device for which we do this operation
+ * @table:	Cpufreq table returned back to caller
+ *
+ * Generate a cpufreq table for a provided device- this assumes that the
+ * opp list is already initialized and ready for usage.
+ *
+ * This function allocates required memory for the cpufreq table. It is
+ * expected that the caller does the required maintenance such as freeing
+ * the table as required.
+ *
+ * Returns -EINVAL for bad pointers, -ENODEV if the device is not found, -ENOMEM
+ * if no memory available for the operation (table is not populated), returns 0
+ * if successful and table is populated.
+ *
+ * WARNING: It is  important for the callers to ensure refreshing their copy of
+ * the table if any of the mentioned functions have been invoked in the interim.
+ *
+ * Locking: The internal device_opp and opp structures are RCU protected.
+ * Since we just use the regular accessor functions to access the internal data
+ * structures, we use RCU read lock inside this function. As a result, users of
+ * this function DONOT need to use explicit locks for invoking.
+ */
+int dev_pm_opp_init_cpufreq_table(struct device *dev,
+				  struct cpufreq_frequency_table **table)
+{
+	struct dev_pm_opp *opp;
+	struct cpufreq_frequency_table *freq_table = NULL;
+	int i, max_opps, ret = 0;
+	unsigned long rate;
+
+	rcu_read_lock();
+
+	max_opps = dev_pm_opp_get_opp_count(dev);
+	if (max_opps <= 0) {
+		ret = max_opps ? max_opps : -ENODATA;
+		goto out;
+	}
+
+	freq_table = kzalloc(sizeof(*freq_table) * (max_opps + 1), GFP_KERNEL);
+	if (!freq_table) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0, rate = 0; i < max_opps; i++, rate++) {
+		/* find next rate */
+		opp = dev_pm_opp_find_freq_ceil(dev, &rate);
+		if (IS_ERR(opp)) {
+			ret = PTR_ERR(opp);
+			goto out;
+		}
+		freq_table[i].driver_data = i;
+		freq_table[i].frequency = rate / 1000;
+	}
+
+	freq_table[i].driver_data = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
+
+	*table = &freq_table[0];
+
+out:
+	rcu_read_unlock();
+	if (ret)
+		kfree(freq_table);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
+
+/**
+ * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
+ * @dev:	device for which we do this operation
+ * @table:	table to free
+ *
+ * Free up the table allocated by dev_pm_opp_init_cpufreq_table
+ */
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
+				   struct cpufreq_frequency_table **table)
+{
+	if (!table)
+		return;
+
+	kfree(*table);
+	*table = NULL;
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index f3822f836e14..9d803b529ac2 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -469,6 +469,27 @@ struct cpufreq_frequency_table {
 				    * order */
 };
 
+#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
+int dev_pm_opp_init_cpufreq_table(struct device *dev,
+				  struct cpufreq_frequency_table **table);
+void dev_pm_opp_free_cpufreq_table(struct device *dev,
+				   struct cpufreq_frequency_table **table);
+#else
+static inline int dev_pm_opp_init_cpufreq_table(struct device *dev,
+						struct cpufreq_frequency_table
+						**table)
+{
+	return -EINVAL;
+}
+
+static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
+						 struct cpufreq_frequency_table
+						 **table)
+{
+}
+#endif
+
+
 bool cpufreq_next_valid(struct cpufreq_frequency_table **pos);
 
 /*
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 5151b0059585..0330217abfad 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -15,7 +15,6 @@
 #define __LINUX_OPP_H__
 
 #include <linux/err.h>
-#include <linux/cpufreq.h>
 #include <linux/notifier.h>
 
 struct dev_pm_opp;
@@ -117,23 +116,4 @@ static inline int of_init_opp_table(struct device *dev)
 }
 #endif
 
-#if defined(CONFIG_CPU_FREQ) && defined(CONFIG_PM_OPP)
-int dev_pm_opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table);
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
-				struct cpufreq_frequency_table **table);
-#else
-static inline int dev_pm_opp_init_cpufreq_table(struct device *dev,
-			    struct cpufreq_frequency_table **table)
-{
-	return -EINVAL;
-}
-
-static inline
-void dev_pm_opp_free_cpufreq_table(struct device *dev,
-				struct cpufreq_frequency_table **table)
-{
-}
-#endif		/* CONFIG_CPU_FREQ */
-
 #endif		/* __LINUX_OPP_H__ */
-- 
cgit 


From a6220fc19afc07fe77cfd16f5b8e568615517091 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 5 May 2014 00:51:54 +0200
Subject: PM / suspend: Always use deepest C-state in the "freeze" sleep state

If freeze_enter() is called, we want to bypass the current cpuidle
governor and always use the deepest available (that is, not disabled)
C-state, because we want to save as much energy as reasonably possible
then and runtime latency constraints don't matter at that point, since
the system is in a sleep state anyway.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Aubrey Li <aubrey.li@linux.intel.com>
---
 drivers/cpuidle/cpuidle.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/cpuidle.h   |  2 ++
 kernel/power/suspend.c    |  2 ++
 3 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index f38359f64cc6..cb7019977c50 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -32,6 +32,7 @@ LIST_HEAD(cpuidle_detected_devices);
 static int enabled_devices;
 static int off __read_mostly;
 static int initialized __read_mostly;
+static bool use_deepest_state __read_mostly;
 
 int cpuidle_disabled(void)
 {
@@ -64,6 +65,45 @@ int cpuidle_play_dead(void)
 	return -ENODEV;
 }
 
+/**
+ * cpuidle_use_deepest_state - Enable/disable the "deepest idle" mode.
+ * @enable: Whether enable or disable the feature.
+ *
+ * If the "deepest idle" mode is enabled, cpuidle will ignore the governor and
+ * always use the state with the greatest exit latency (out of the states that
+ * are not disabled).
+ *
+ * This function can only be called after cpuidle_pause() to avoid races.
+ */
+void cpuidle_use_deepest_state(bool enable)
+{
+	use_deepest_state = enable;
+}
+
+/**
+ * cpuidle_find_deepest_state - Find the state of the greatest exit latency.
+ * @drv: cpuidle driver for a given CPU.
+ * @dev: cpuidle device for a given CPU.
+ */
+static int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+				      struct cpuidle_device *dev)
+{
+	unsigned int latency_req = 0;
+	int i, ret = CPUIDLE_DRIVER_STATE_START - 1;
+
+	for (i = CPUIDLE_DRIVER_STATE_START; i < drv->state_count; i++) {
+		struct cpuidle_state *s = &drv->states[i];
+		struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+		if (s->disabled || su->disable || s->exit_latency <= latency_req)
+			continue;
+
+		latency_req = s->exit_latency;
+		ret = i;
+	}
+	return ret;
+}
+
 /**
  * cpuidle_enter_state - enter the state and update stats
  * @dev: cpuidle device for this cpu
@@ -124,6 +164,9 @@ int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	if (!drv || !dev || !dev->enabled)
 		return -EBUSY;
 
+	if (unlikely(use_deepest_state))
+		return cpuidle_find_deepest_state(drv, dev);
+
 	return cpuidle_curr_governor->select(drv, dev);
 }
 
@@ -155,7 +198,7 @@ int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
  */
 void cpuidle_reflect(struct cpuidle_device *dev, int index)
 {
-	if (cpuidle_curr_governor->reflect)
+	if (cpuidle_curr_governor->reflect && !unlikely(use_deepest_state))
 		cpuidle_curr_governor->reflect(dev, index);
 }
 
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a8d5bd391a26..c51a436135c4 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -143,6 +143,7 @@ extern void cpuidle_resume(void);
 extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
 extern int cpuidle_play_dead(void);
+extern void cpuidle_use_deepest_state(bool enable);
 
 extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev);
 #else
@@ -175,6 +176,7 @@ static inline int cpuidle_enable_device(struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
 static inline int cpuidle_play_dead(void) {return -ENODEV; }
+static inline void cpuidle_use_deepest_state(bool enable) {}
 static inline struct cpuidle_driver *cpuidle_get_cpu_driver(
 	struct cpuidle_device *dev) {return NULL; }
 #endif
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8233cd4047d7..155721f7f909 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -54,9 +54,11 @@ static void freeze_begin(void)
 
 static void freeze_enter(void)
 {
+	cpuidle_use_deepest_state(true);
 	cpuidle_resume();
 	wait_event(suspend_freeze_wait_head, suspend_freeze_wake);
 	cpuidle_pause();
+	cpuidle_use_deepest_state(false);
 }
 
 void freeze_wake(void)
-- 
cgit 


From 6403eb1f646a49cc92f25c08f8716f8870a4a865 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Sat, 26 Apr 2014 19:59:52 +0800
Subject: f2fs: introduce help macro ADDRS_PER_PAGE()

Introduce help macro ADDRS_PER_PAGE() to get the number of address pointers in
direct node or inode.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
---
 fs/f2fs/data.c          | 6 ++----
 fs/f2fs/file.c          | 5 +----
 fs/f2fs/recovery.c      | 5 +----
 include/linux/f2fs_fs.h | 3 +++
 4 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0147de7e3973..273fe1631af9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -652,8 +652,7 @@ static int get_data_block(struct inode *inode, sector_t iblock,
 		goto put_out;
 	}
 
-	end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 	bh_result->b_size = (((size_t)1) << blkbits);
 	dn.ofs_in_node++;
 	pgofs++;
@@ -675,8 +674,7 @@ get_next:
 		if (dn.data_blkaddr == NEW_ADDR)
 			goto put_out;
 
-		end_offset = IS_INODE(dn.node_page) ?
-			ADDRS_PER_INODE(F2FS_I(inode)) : ADDRS_PER_BLOCK;
+		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 	}
 
 	if (maxblocks > (bh_result->b_size >> blkbits)) {
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 60e7d5448a1d..bb365c932555 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -288,10 +288,7 @@ int truncate_blocks(struct inode *inode, u64 from)
 		return err;
 	}
 
-	if (IS_INODE(dn.node_page))
-		count = ADDRS_PER_INODE(F2FS_I(inode));
-	else
-		count = ADDRS_PER_BLOCK;
+	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
 
 	count -= dn.ofs_in_node;
 	f2fs_bug_on(count < 0);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 9eb6487f383d..be1e3e881725 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -301,10 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 		goto out;
 
 	start = start_bidx_of_node(ofs_of_node(page), fi);
-	if (IS_INODE(page))
-		end = start + ADDRS_PER_INODE(fi);
-	else
-		end = start + ADDRS_PER_BLOCK;
+	end = start + ADDRS_PER_PAGE(page, fi);
 
 	f2fs_lock_op(sbi);
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index df53e1753a76..8c03f71307c6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -146,6 +146,9 @@ struct f2fs_extent {
 #define ADDRS_PER_BLOCK		1018	/* Address Pointers in a Direct Block */
 #define NIDS_PER_BLOCK		1018	/* Node IDs in an Indirect Block */
 
+#define ADDRS_PER_PAGE(page, fi)	\
+	(IS_INODE(page) ? ADDRS_PER_INODE(fi) : ADDRS_PER_BLOCK)
+
 #define	NODE_DIR1_BLOCK		(DEF_ADDRS_PER_INODE + 1)
 #define	NODE_DIR2_BLOCK		(DEF_ADDRS_PER_INODE + 2)
 #define	NODE_IND1_BLOCK		(DEF_ADDRS_PER_INODE + 3)
-- 
cgit 


From 257462dbf3ed233de0dc2e489dcc58579535b33f Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 27 Feb 2014 14:53:34 +0900
Subject: pwm-backlight: switch to gpiod interface

Switch to the new gpiod interface, which allows to handle GPIO
properties such as active low transparently and removes a whole bunch of
code.

There are still a couple of users of this driver that rely on passing
the enable GPIO number through platform data, so a fallback mechanism
using a GPIO number is still available to avoid breaking them. It will
be removed once current users have switched to the GPIO lookup tables
provided by the gpiod interface.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/video/backlight/pwm_bl.c | 69 +++++++++++++++++-----------------------
 include/linux/pwm_backlight.h    |  5 +--
 2 files changed, 30 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/pwm_bl.c b/drivers/video/backlight/pwm_bl.c
index fa7f5c35b7fb..cc49347bbcc0 100644
--- a/drivers/video/backlight/pwm_bl.c
+++ b/drivers/video/backlight/pwm_bl.c
@@ -10,8 +10,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/gpio/consumer.h>
 #include <linux/gpio.h>
-#include <linux/of_gpio.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -32,8 +32,7 @@ struct pwm_bl_data {
 	unsigned int		*levels;
 	bool			enabled;
 	struct regulator	*power_supply;
-	int			enable_gpio;
-	unsigned long		enable_gpio_flags;
+	struct gpio_desc	*enable_gpio;
 	unsigned int		scale;
 	int			(*notify)(struct device *,
 					  int brightness);
@@ -54,12 +53,8 @@ static void pwm_backlight_power_on(struct pwm_bl_data *pb, int brightness)
 	if (err < 0)
 		dev_err(pb->dev, "failed to enable power supply\n");
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			gpio_set_value(pb->enable_gpio, 0);
-		else
-			gpio_set_value(pb->enable_gpio, 1);
-	}
+	if (pb->enable_gpio)
+		gpiod_set_value(pb->enable_gpio, 1);
 
 	pwm_enable(pb->pwm);
 	pb->enabled = true;
@@ -73,12 +68,8 @@ static void pwm_backlight_power_off(struct pwm_bl_data *pb)
 	pwm_config(pb->pwm, 0, pb->period);
 	pwm_disable(pb->pwm);
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			gpio_set_value(pb->enable_gpio, 1);
-		else
-			gpio_set_value(pb->enable_gpio, 0);
-	}
+	if (pb->enable_gpio)
+		gpiod_set_value(pb->enable_gpio, 0);
 
 	regulator_disable(pb->power_supply);
 	pb->enabled = false;
@@ -148,7 +139,6 @@ static int pwm_backlight_parse_dt(struct device *dev,
 				  struct platform_pwm_backlight_data *data)
 {
 	struct device_node *node = dev->of_node;
-	enum of_gpio_flags flags;
 	struct property *prop;
 	int length;
 	u32 value;
@@ -189,14 +179,6 @@ static int pwm_backlight_parse_dt(struct device *dev,
 		data->max_brightness--;
 	}
 
-	data->enable_gpio = of_get_named_gpio_flags(node, "enable-gpios", 0,
-						    &flags);
-	if (data->enable_gpio == -EPROBE_DEFER)
-		return -EPROBE_DEFER;
-
-	if (gpio_is_valid(data->enable_gpio) && (flags & OF_GPIO_ACTIVE_LOW))
-		data->enable_gpio_flags |= PWM_BACKLIGHT_GPIO_ACTIVE_LOW;
-
 	return 0;
 }
 
@@ -256,8 +238,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	} else
 		pb->scale = data->max_brightness;
 
-	pb->enable_gpio = data->enable_gpio;
-	pb->enable_gpio_flags = data->enable_gpio_flags;
 	pb->notify = data->notify;
 	pb->notify_after = data->notify_after;
 	pb->check_fb = data->check_fb;
@@ -265,26 +245,38 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	pb->dev = &pdev->dev;
 	pb->enabled = false;
 
-	if (gpio_is_valid(pb->enable_gpio)) {
-		unsigned long flags;
-
-		if (pb->enable_gpio_flags & PWM_BACKLIGHT_GPIO_ACTIVE_LOW)
-			flags = GPIOF_OUT_INIT_HIGH;
+	pb->enable_gpio = devm_gpiod_get(&pdev->dev, "enable");
+	if (IS_ERR(pb->enable_gpio)) {
+		ret = PTR_ERR(pb->enable_gpio);
+		if (ret == -ENOENT)
+			pb->enable_gpio = NULL;
 		else
-			flags = GPIOF_OUT_INIT_LOW;
+			goto err_alloc;
+	}
 
-		ret = gpio_request_one(pb->enable_gpio, flags, "enable");
+	/*
+	 * Compatibility fallback for drivers still using the integer GPIO
+	 * platform data. Must go away soon.
+	 */
+	if (!pb->enable_gpio && gpio_is_valid(data->enable_gpio)) {
+		ret = devm_gpio_request_one(&pdev->dev, data->enable_gpio,
+					    GPIOF_OUT_INIT_HIGH, "enable");
 		if (ret < 0) {
 			dev_err(&pdev->dev, "failed to request GPIO#%d: %d\n",
-				pb->enable_gpio, ret);
+				data->enable_gpio, ret);
 			goto err_alloc;
 		}
+
+		pb->enable_gpio = gpio_to_desc(data->enable_gpio);
 	}
 
+	if (pb->enable_gpio)
+		gpiod_direction_output(pb->enable_gpio, 1);
+
 	pb->power_supply = devm_regulator_get(&pdev->dev, "power");
 	if (IS_ERR(pb->power_supply)) {
 		ret = PTR_ERR(pb->power_supply);
-		goto err_gpio;
+		goto err_alloc;
 	}
 
 	pb->pwm = devm_pwm_get(&pdev->dev, NULL);
@@ -295,7 +287,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 		if (IS_ERR(pb->pwm)) {
 			dev_err(&pdev->dev, "unable to request legacy PWM\n");
 			ret = PTR_ERR(pb->pwm);
-			goto err_gpio;
+			goto err_alloc;
 		}
 	}
 
@@ -320,7 +312,7 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	if (IS_ERR(bl)) {
 		dev_err(&pdev->dev, "failed to register backlight\n");
 		ret = PTR_ERR(bl);
-		goto err_gpio;
+		goto err_alloc;
 	}
 
 	if (data->dft_brightness > data->max_brightness) {
@@ -336,9 +328,6 @@ static int pwm_backlight_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, bl);
 	return 0;
 
-err_gpio:
-	if (gpio_is_valid(pb->enable_gpio))
-		gpio_free(pb->enable_gpio);
 err_alloc:
 	if (data->exit)
 		data->exit(&pdev->dev);
diff --git a/include/linux/pwm_backlight.h b/include/linux/pwm_backlight.h
index 2de2e275b2cb..efdd9227a49c 100644
--- a/include/linux/pwm_backlight.h
+++ b/include/linux/pwm_backlight.h
@@ -6,9 +6,6 @@
 
 #include <linux/backlight.h>
 
-/* TODO: convert to gpiod_*() API once it has been merged */
-#define PWM_BACKLIGHT_GPIO_ACTIVE_LOW	(1 << 0)
-
 struct platform_pwm_backlight_data {
 	int pwm_id;
 	unsigned int max_brightness;
@@ -16,8 +13,8 @@ struct platform_pwm_backlight_data {
 	unsigned int lth_brightness;
 	unsigned int pwm_period_ns;
 	unsigned int *levels;
+	/* TODO remove once all users are switched to gpiod_* API */
 	int enable_gpio;
-	unsigned long enable_gpio_flags;
 	int (*init)(struct device *dev);
 	int (*notify)(struct device *dev, int brightness);
 	void (*notify_after)(struct device *dev, int brightness);
-- 
cgit 


From 5bfd126e80dca70431aef8fdbc1cf14535f3c338 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@gmail.com>
Date: Tue, 15 Apr 2014 13:49:04 +0200
Subject: sched/deadline: Fix sched_yield() behavior

yield_task_dl() is broken:

 o it forces current to be throttled setting its runtime to zero;
 o it sets current's dl_se->dl_new to one, expecting that dl_task_timer()
   will queue it back with proper parameters at replenish time.

Unfortunately, dl_task_timer() has this check at the very beginning:

	if (!dl_task(p) || dl_se->dl_new)
		goto unlock;

So, it just bails out and the task is never replenished. It actually
yielded forever.

To fix this, introduce a new flag indicating that the task properly yielded
the CPU before its current runtime expired. While this is a little overdoing
at the moment, the flag would be useful in the future to discriminate between
"good" jobs (of which remaining runtime could be reclaimed, i.e. recycled)
and "bad" jobs (for which dl_throttled task has been set) that needed to be
stopped.

Reported-by: yjay.kim <yjay.kim@lge.com>
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140429103953.e68eba1b2ac3309214e3dc5a@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   | 7 +++++--
 kernel/sched/core.c     | 1 +
 kernel/sched/deadline.c | 5 +++--
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25f54c79f757..2a4298fb0d85 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1153,9 +1153,12 @@ struct sched_dl_entity {
 	 *
 	 * @dl_boosted tells if we are boosted due to DI. If so we are
 	 * outside bandwidth enforcement mechanism (but only until we
-	 * exit the critical section).
+	 * exit the critical section);
+	 *
+	 * @dl_yielded tells if task gave up the cpu before consuming
+	 * all its available runtime during the last job.
 	 */
-	int dl_throttled, dl_new, dl_boosted;
+	int dl_throttled, dl_new, dl_boosted, dl_yielded;
 
 	/*
 	 * Bandwidth enforcement timer. Each -deadline task has its
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9fe2190005cb..e62c65a12d5b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3124,6 +3124,7 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
 	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
 	dl_se->dl_throttled = 0;
 	dl_se->dl_new = 1;
+	dl_se->dl_yielded = 0;
 }
 
 static void __setscheduler_params(struct task_struct *p,
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b08095786cb8..800e99b99075 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -528,6 +528,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	sched_clock_tick();
 	update_rq_clock(rq);
 	dl_se->dl_throttled = 0;
+	dl_se->dl_yielded = 0;
 	if (p->on_rq) {
 		enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
 		if (task_has_dl_policy(rq->curr))
@@ -893,10 +894,10 @@ static void yield_task_dl(struct rq *rq)
 	 * We make the task go to sleep until its current deadline by
 	 * forcing its runtime to zero. This way, update_curr_dl() stops
 	 * it and the bandwidth timer will wake it up and will give it
-	 * new scheduling parameters (thanks to dl_new=1).
+	 * new scheduling parameters (thanks to dl_yielded=1).
 	 */
 	if (p->dl.runtime > 0) {
-		rq->curr->dl.dl_new = 1;
+		rq->curr->dl.dl_yielded = 1;
 		p->dl.runtime = 0;
 	}
 	update_curr_dl(rq);
-- 
cgit 


From 143e1e28cb40bed836b0a06567208bd7347c9672 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Fri, 11 Apr 2014 11:44:37 +0200
Subject: sched: Rework sched_domain topology definition

We replace the old way to configure the scheduler topology with a new method
which enables a platform to declare additionnal level (if needed).

We still have a default topology table definition that can be used by platform
that don't want more level than the SMT, MC, CPU and NUMA ones. This table can
be overwritten by an arch which either wants to add new level where a load
balance make sense like BOOK or powergating level or wants to change the flags
configuration of some levels.

For each level, we need a function pointer that returns cpumask for each cpu,
a function pointer that returns the flags for the level and a name. Only flags
that describe topology, can be set by an architecture. The current topology
flags are:

 SD_SHARE_CPUPOWER
 SD_SHARE_PKG_RESOURCES
 SD_NUMA
 SD_ASYM_PACKING

Then, each level must be a subset on the next one. The build sequence of the
sched_domain will take care of removing useless levels like those with 1 CPU
and those with the same CPU span and no more relevant information for
load balancing than its children.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux390@de.ibm.com
Cc: linux-ia64@vger.kernel.org
Cc: linux-s390@vger.kernel.org
Link: http://lkml.kernel.org/r/1397209481-28542-2-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/ia64/include/asm/topology.h |  24 ----
 arch/s390/include/asm/topology.h |   2 -
 arch/tile/include/asm/topology.h |  33 ------
 include/linux/sched.h            |  53 +++++++++
 include/linux/topology.h         | 128 +++------------------
 kernel/sched/core.c              | 233 ++++++++++++++++++++-------------------
 6 files changed, 186 insertions(+), 287 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 5cb55a1e606b..3202aa74e0d6 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -46,30 +46,6 @@
 
 void build_cpu_to_node_map(void);
 
-#define SD_CPU_INIT (struct sched_domain) {		\
-	.parent			= NULL,			\
-	.child			= NULL,			\
-	.groups			= NULL,			\
-	.min_interval		= 1,			\
-	.max_interval		= 4,			\
-	.busy_factor		= 64,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 2,			\
-	.busy_idx		= 2,			\
-	.idle_idx		= 1,			\
-	.newidle_idx		= 0,			\
-	.wake_idx		= 0,			\
-	.forkexec_idx		= 0,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_WAKE_AFFINE,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.nr_balance_failed	= 0,			\
-}
-
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_SMP
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index 05425b18c0aa..07763bdb408d 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -64,8 +64,6 @@ static inline void s390_init_cpu_topology(void)
 };
 #endif
 
-#define SD_BOOK_INIT	SD_CPU_INIT
-
 #include <asm-generic/topology.h>
 
 #endif /* _ASM_S390_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index d15c0d8d550f..938311844233 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -44,39 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
 /* For now, use numa node -1 for global allocation. */
 #define pcibus_to_node(bus)		((void)(bus), -1)
 
-/*
- * TILE architecture has many cores integrated in one processor, so we need
- * setup bigger balance_interval for both CPU/NODE scheduling domains to
- * reduce process scheduling costs.
- */
-
-/* sched_domains SD_CPU_INIT for TILE architecture */
-#define SD_CPU_INIT (struct sched_domain) {				\
-	.min_interval		= 4,					\
-	.max_interval		= 128,					\
-	.busy_factor		= 64,					\
-	.imbalance_pct		= 125,					\
-	.cache_nice_tries	= 1,					\
-	.busy_idx		= 2,					\
-	.idle_idx		= 1,					\
-	.newidle_idx		= 0,					\
-	.wake_idx		= 0,					\
-	.forkexec_idx		= 0,					\
-									\
-	.flags			= 1*SD_LOAD_BALANCE			\
-				| 1*SD_BALANCE_NEWIDLE			\
-				| 1*SD_BALANCE_EXEC			\
-				| 1*SD_BALANCE_FORK			\
-				| 0*SD_BALANCE_WAKE			\
-				| 0*SD_WAKE_AFFINE			\
-				| 0*SD_SHARE_CPUPOWER			\
-				| 0*SD_SHARE_PKG_RESOURCES		\
-				| 0*SD_SERIALIZE			\
-				,					\
-	.last_balance		= jiffies,				\
-	.balance_interval	= 32,					\
-}
-
 /* By definition, we create nodes based on online memory. */
 #define node_has_online_mem(nid) 1
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a4298fb0d85..656b035c30e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -879,6 +879,27 @@ enum cpu_idle_type {
 
 extern int __weak arch_sd_sibiling_asym_packing(void);
 
+#ifdef CONFIG_SCHED_SMT
+static inline const int cpu_smt_flags(void)
+{
+	return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_SCHED_MC
+static inline const int cpu_core_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
+#ifdef CONFIG_NUMA
+static inline const int cpu_numa_flags(void)
+{
+	return SD_NUMA;
+}
+#endif
+
 struct sched_domain_attr {
 	int relax_domain_level;
 };
@@ -985,6 +1006,38 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
 
 bool cpus_share_cache(int this_cpu, int that_cpu);
 
+typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+typedef const int (*sched_domain_flags_f)(void);
+
+#define SDTL_OVERLAP	0x01
+
+struct sd_data {
+	struct sched_domain **__percpu sd;
+	struct sched_group **__percpu sg;
+	struct sched_group_power **__percpu sgp;
+};
+
+struct sched_domain_topology_level {
+	sched_domain_mask_f mask;
+	sched_domain_flags_f sd_flags;
+	int		    flags;
+	int		    numa_level;
+	struct sd_data      data;
+#ifdef CONFIG_SCHED_DEBUG
+	char                *name;
+#endif
+};
+
+extern struct sched_domain_topology_level *sched_domain_topology;
+
+extern void set_sched_topology(struct sched_domain_topology_level *tl);
+
+#ifdef CONFIG_SCHED_DEBUG
+# define SD_INIT_NAME(type)		.name = #type
+#else
+# define SD_INIT_NAME(type)
+#endif
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7062330a1329..973671ff9e7d 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -66,121 +66,6 @@ int arch_update_cpu_topology(void);
 #define PENALTY_FOR_NODE_WITH_CPUS	(1)
 #endif
 
-/*
- * Below are the 3 major initializers used in building sched_domains:
- * SD_SIBLING_INIT, for SMT domains
- * SD_CPU_INIT, for SMP domains
- *
- * Any architecture that cares to do any tuning to these values should do so
- * by defining their own arch-specific initializer in include/asm/topology.h.
- * A definition there will automagically override these default initializers
- * and allow arch-specific performance tuning of sched_domains.
- * (Only non-zero and non-null fields need be specified.)
- */
-
-#ifdef CONFIG_SCHED_SMT
-/* MCD - Do we really need this?  It is always on if CONFIG_SCHED_SMT is,
- * so can't we drop this in favor of CONFIG_SCHED_SMT?
- */
-#define ARCH_HAS_SCHED_WAKE_IDLE
-/* Common values for SMT siblings */
-#ifndef SD_SIBLING_INIT
-#define SD_SIBLING_INIT (struct sched_domain) {				\
-	.min_interval		= 1,					\
-	.max_interval		= 2,					\
-	.busy_factor		= 64,					\
-	.imbalance_pct		= 110,					\
-									\
-	.flags			= 1*SD_LOAD_BALANCE			\
-				| 1*SD_BALANCE_NEWIDLE			\
-				| 1*SD_BALANCE_EXEC			\
-				| 1*SD_BALANCE_FORK			\
-				| 0*SD_BALANCE_WAKE			\
-				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_SHARE_CPUPOWER			\
-				| 1*SD_SHARE_PKG_RESOURCES		\
-				| 0*SD_SERIALIZE			\
-				| 0*SD_PREFER_SIBLING			\
-				| arch_sd_sibling_asym_packing()	\
-				,					\
-	.last_balance		= jiffies,				\
-	.balance_interval	= 1,					\
-	.smt_gain		= 1178,	/* 15% */			\
-	.max_newidle_lb_cost	= 0,					\
-	.next_decay_max_lb_cost	= jiffies,				\
-}
-#endif
-#endif /* CONFIG_SCHED_SMT */
-
-#ifdef CONFIG_SCHED_MC
-/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
-#ifndef SD_MC_INIT
-#define SD_MC_INIT (struct sched_domain) {				\
-	.min_interval		= 1,					\
-	.max_interval		= 4,					\
-	.busy_factor		= 64,					\
-	.imbalance_pct		= 125,					\
-	.cache_nice_tries	= 1,					\
-	.busy_idx		= 2,					\
-	.wake_idx		= 0,					\
-	.forkexec_idx		= 0,					\
-									\
-	.flags			= 1*SD_LOAD_BALANCE			\
-				| 1*SD_BALANCE_NEWIDLE			\
-				| 1*SD_BALANCE_EXEC			\
-				| 1*SD_BALANCE_FORK			\
-				| 0*SD_BALANCE_WAKE			\
-				| 1*SD_WAKE_AFFINE			\
-				| 0*SD_SHARE_CPUPOWER			\
-				| 1*SD_SHARE_PKG_RESOURCES		\
-				| 0*SD_SERIALIZE			\
-				,					\
-	.last_balance		= jiffies,				\
-	.balance_interval	= 1,					\
-	.max_newidle_lb_cost	= 0,					\
-	.next_decay_max_lb_cost	= jiffies,				\
-}
-#endif
-#endif /* CONFIG_SCHED_MC */
-
-/* Common values for CPUs */
-#ifndef SD_CPU_INIT
-#define SD_CPU_INIT (struct sched_domain) {				\
-	.min_interval		= 1,					\
-	.max_interval		= 4,					\
-	.busy_factor		= 64,					\
-	.imbalance_pct		= 125,					\
-	.cache_nice_tries	= 1,					\
-	.busy_idx		= 2,					\
-	.idle_idx		= 1,					\
-	.newidle_idx		= 0,					\
-	.wake_idx		= 0,					\
-	.forkexec_idx		= 0,					\
-									\
-	.flags			= 1*SD_LOAD_BALANCE			\
-				| 1*SD_BALANCE_NEWIDLE			\
-				| 1*SD_BALANCE_EXEC			\
-				| 1*SD_BALANCE_FORK			\
-				| 0*SD_BALANCE_WAKE			\
-				| 1*SD_WAKE_AFFINE			\
-				| 0*SD_SHARE_CPUPOWER			\
-				| 0*SD_SHARE_PKG_RESOURCES		\
-				| 0*SD_SERIALIZE			\
-				| 1*SD_PREFER_SIBLING			\
-				,					\
-	.last_balance		= jiffies,				\
-	.balance_interval	= 1,					\
-	.max_newidle_lb_cost	= 0,					\
-	.next_decay_max_lb_cost	= jiffies,				\
-}
-#endif
-
-#ifdef CONFIG_SCHED_BOOK
-#ifndef SD_BOOK_INIT
-#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
-#endif
-#endif /* CONFIG_SCHED_BOOK */
-
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DECLARE_PER_CPU(int, numa_node);
 
@@ -295,4 +180,17 @@ static inline int cpu_to_mem(int cpu)
 #define topology_core_cpumask(cpu)		cpumask_of(cpu)
 #endif
 
+#ifdef CONFIG_SCHED_SMT
+static inline const struct cpumask *cpu_smt_mask(int cpu)
+{
+	return topology_thread_cpumask(cpu);
+}
+#endif
+
+static inline const struct cpumask *cpu_cpu_mask(int cpu)
+{
+	return cpumask_of_node(cpu_to_node(cpu));
+}
+
+
 #endif /* _LINUX_TOPOLOGY_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13584f1cccfc..7d332b7899cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5566,17 +5566,6 @@ static int __init isolated_cpu_setup(char *str)
 
 __setup("isolcpus=", isolated_cpu_setup);
 
-static const struct cpumask *cpu_cpu_mask(int cpu)
-{
-	return cpumask_of_node(cpu_to_node(cpu));
-}
-
-struct sd_data {
-	struct sched_domain **__percpu sd;
-	struct sched_group **__percpu sg;
-	struct sched_group_power **__percpu sgp;
-};
-
 struct s_data {
 	struct sched_domain ** __percpu sd;
 	struct root_domain	*rd;
@@ -5589,21 +5578,6 @@ enum s_alloc {
 	sa_none,
 };
 
-struct sched_domain_topology_level;
-
-typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
-
-#define SDTL_OVERLAP	0x01
-
-struct sched_domain_topology_level {
-	sched_domain_init_f init;
-	sched_domain_mask_f mask;
-	int		    flags;
-	int		    numa_level;
-	struct sd_data      data;
-};
-
 /*
  * Build an iteration mask that can exclude certain CPUs from the upwards
  * domain traversal.
@@ -5832,34 +5806,6 @@ int __weak arch_sd_sibling_asym_packing(void)
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
  */
 
-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(sd, type)		sd->name = #type
-#else
-# define SD_INIT_NAME(sd, type)		do { } while (0)
-#endif
-
-#define SD_INIT_FUNC(type)						\
-static noinline struct sched_domain *					\
-sd_init_##type(struct sched_domain_topology_level *tl, int cpu) 	\
-{									\
-	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);	\
-	*sd = SD_##type##_INIT;						\
-	SD_INIT_NAME(sd, type);						\
-	sd->private = &tl->data;					\
-	return sd;							\
-}
-
-SD_INIT_FUNC(CPU)
-#ifdef CONFIG_SCHED_SMT
- SD_INIT_FUNC(SIBLING)
-#endif
-#ifdef CONFIG_SCHED_MC
- SD_INIT_FUNC(MC)
-#endif
-#ifdef CONFIG_SCHED_BOOK
- SD_INIT_FUNC(BOOK)
-#endif
-
 static int default_relax_domain_level = -1;
 int sched_domain_level_max;
 
@@ -5947,99 +5893,156 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
 }
 
-#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *cpu_smt_mask(int cpu)
-{
-	return topology_thread_cpumask(cpu);
-}
-#endif
-
-/*
- * Topology list, bottom-up.
- */
-static struct sched_domain_topology_level default_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-	{ sd_init_SIBLING, cpu_smt_mask, },
-#endif
-#ifdef CONFIG_SCHED_MC
-	{ sd_init_MC, cpu_coregroup_mask, },
-#endif
-#ifdef CONFIG_SCHED_BOOK
-	{ sd_init_BOOK, cpu_book_mask, },
-#endif
-	{ sd_init_CPU, cpu_cpu_mask, },
-	{ NULL, },
-};
-
-static struct sched_domain_topology_level *sched_domain_topology = default_topology;
-
-#define for_each_sd_topology(tl)			\
-	for (tl = sched_domain_topology; tl->init; tl++)
-
 #ifdef CONFIG_NUMA
-
 static int sched_domains_numa_levels;
 static int *sched_domains_numa_distance;
 static struct cpumask ***sched_domains_numa_masks;
 static int sched_domains_curr_level;
+#endif
 
-static inline int sd_local_flags(int level)
-{
-	if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)
-		return 0;
-
-	return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
-}
+/*
+ * SD_flags allowed in topology descriptions.
+ *
+ * SD_SHARE_CPUPOWER      - describes SMT topologies
+ * SD_SHARE_PKG_RESOURCES - describes shared caches
+ * SD_NUMA                - describes NUMA topologies
+ *
+ * Odd one out:
+ * SD_ASYM_PACKING        - describes SMT quirks
+ */
+#define TOPOLOGY_SD_FLAGS		\
+	(SD_SHARE_CPUPOWER |		\
+	 SD_SHARE_PKG_RESOURCES |	\
+	 SD_NUMA |			\
+	 SD_ASYM_PACKING)
 
 static struct sched_domain *
-sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
+sd_init(struct sched_domain_topology_level *tl, int cpu)
 {
 	struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-	int level = tl->numa_level;
-	int sd_weight = cpumask_weight(
-			sched_domains_numa_masks[level][cpu_to_node(cpu)]);
+	int sd_weight, sd_flags = 0;
+
+#ifdef CONFIG_NUMA
+	/*
+	 * Ugly hack to pass state to sd_numa_mask()...
+	 */
+	sched_domains_curr_level = tl->numa_level;
+#endif
+
+	sd_weight = cpumask_weight(tl->mask(cpu));
+
+	if (tl->sd_flags)
+		sd_flags = (*tl->sd_flags)();
+	if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
+			"wrong sd_flags in topology description\n"))
+		sd_flags &= ~TOPOLOGY_SD_FLAGS;
 
 	*sd = (struct sched_domain){
 		.min_interval		= sd_weight,
 		.max_interval		= 2*sd_weight,
 		.busy_factor		= 32,
 		.imbalance_pct		= 125,
-		.cache_nice_tries	= 2,
-		.busy_idx		= 3,
-		.idle_idx		= 2,
+
+		.cache_nice_tries	= 0,
+		.busy_idx		= 0,
+		.idle_idx		= 0,
 		.newidle_idx		= 0,
 		.wake_idx		= 0,
 		.forkexec_idx		= 0,
 
 		.flags			= 1*SD_LOAD_BALANCE
 					| 1*SD_BALANCE_NEWIDLE
-					| 0*SD_BALANCE_EXEC
-					| 0*SD_BALANCE_FORK
+					| 1*SD_BALANCE_EXEC
+					| 1*SD_BALANCE_FORK
 					| 0*SD_BALANCE_WAKE
-					| 0*SD_WAKE_AFFINE
+					| 1*SD_WAKE_AFFINE
 					| 0*SD_SHARE_CPUPOWER
 					| 0*SD_SHARE_PKG_RESOURCES
-					| 1*SD_SERIALIZE
+					| 0*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
-					| 1*SD_NUMA
-					| sd_local_flags(level)
+					| 0*SD_NUMA
+					| sd_flags
 					,
+
 		.last_balance		= jiffies,
 		.balance_interval	= sd_weight,
+		.smt_gain		= 0,
 		.max_newidle_lb_cost	= 0,
 		.next_decay_max_lb_cost	= jiffies,
+#ifdef CONFIG_SCHED_DEBUG
+		.name			= tl->name,
+#endif
 	};
-	SD_INIT_NAME(sd, NUMA);
-	sd->private = &tl->data;
 
 	/*
-	 * Ugly hack to pass state to sd_numa_mask()...
+	 * Convert topological properties into behaviour.
 	 */
-	sched_domains_curr_level = tl->numa_level;
+
+	if (sd->flags & SD_SHARE_CPUPOWER) {
+		sd->imbalance_pct = 110;
+		sd->smt_gain = 1178; /* ~15% */
+		sd->flags |= arch_sd_sibling_asym_packing();
+
+	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+		sd->imbalance_pct = 117;
+		sd->cache_nice_tries = 1;
+		sd->busy_idx = 2;
+
+#ifdef CONFIG_NUMA
+	} else if (sd->flags & SD_NUMA) {
+		sd->cache_nice_tries = 2;
+		sd->busy_idx = 3;
+		sd->idle_idx = 2;
+
+		sd->flags |= SD_SERIALIZE;
+		if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
+			sd->flags &= ~(SD_BALANCE_EXEC |
+				       SD_BALANCE_FORK |
+				       SD_WAKE_AFFINE);
+		}
+
+#endif
+	} else {
+		sd->flags |= SD_PREFER_SIBLING;
+		sd->cache_nice_tries = 1;
+		sd->busy_idx = 2;
+		sd->idle_idx = 1;
+	}
+
+	sd->private = &tl->data;
 
 	return sd;
 }
 
+/*
+ * Topology list, bottom-up.
+ */
+static struct sched_domain_topology_level default_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+#ifdef CONFIG_SCHED_BOOK
+	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+struct sched_domain_topology_level *sched_domain_topology = default_topology;
+
+#define for_each_sd_topology(tl)			\
+	for (tl = sched_domain_topology; tl->mask; tl++)
+
+void set_sched_topology(struct sched_domain_topology_level *tl)
+{
+	sched_domain_topology = tl;
+}
+
+#ifdef CONFIG_NUMA
+
 static const struct cpumask *sd_numa_mask(int cpu)
 {
 	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
@@ -6183,7 +6186,10 @@ static void sched_init_numa(void)
 		}
 	}
 
-	tl = kzalloc((ARRAY_SIZE(default_topology) + level) *
+	/* Compute default topology size */
+	for (i = 0; sched_domain_topology[i].mask; i++);
+
+	tl = kzalloc((i + level) *
 			sizeof(struct sched_domain_topology_level), GFP_KERNEL);
 	if (!tl)
 		return;
@@ -6191,18 +6197,19 @@ static void sched_init_numa(void)
 	/*
 	 * Copy the default topology bits..
 	 */
-	for (i = 0; default_topology[i].init; i++)
-		tl[i] = default_topology[i];
+	for (i = 0; sched_domain_topology[i].mask; i++)
+		tl[i] = sched_domain_topology[i];
 
 	/*
 	 * .. and append 'j' levels of NUMA goodness.
 	 */
 	for (j = 0; j < level; i++, j++) {
 		tl[i] = (struct sched_domain_topology_level){
-			.init = sd_numa_init,
 			.mask = sd_numa_mask,
+			.sd_flags = cpu_numa_flags,
 			.flags = SDTL_OVERLAP,
 			.numa_level = j,
+			SD_INIT_NAME(NUMA)
 		};
 	}
 
@@ -6360,7 +6367,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		const struct cpumask *cpu_map, struct sched_domain_attr *attr,
 		struct sched_domain *child, int cpu)
 {
-	struct sched_domain *sd = tl->init(tl, cpu);
+	struct sched_domain *sd = sd_init(tl, cpu);
 	if (!sd)
 		return child;
 
-- 
cgit 


From 607b45e9a216e89a63351556e488eea06be0ff48 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Fri, 11 Apr 2014 11:44:39 +0200
Subject: sched, powerpc: Create a dedicated topology table

Create a dedicated topology table for handling asymetric feature of powerpc.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Fleming <afleming@freescale.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
Cc: schwidefsky@de.ibm.com
Cc: cmetcalf@tilera.com
Cc: dietmar.eggemann@arm.com
Cc: devicetree@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1397209481-28542-4-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/smp.c | 31 +++++++++++++++++++++++--------
 include/linux/sched.h     |  2 --
 kernel/sched/core.c       |  6 ------
 3 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index e2a4232c5871..10ffffef0414 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -766,6 +766,28 @@ int setup_profiling_timer(unsigned int multiplier)
 	return 0;
 }
 
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymetric SMT dependancy */
+static const int powerpc_smt_flags(void)
+{
+	int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+		flags |= SD_ASYM_PACKING;
+	}
+	return flags;
+}
+#endif
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	cpumask_var_t old_mask;
@@ -790,15 +812,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	dump_numa_cpu_topology();
 
-}
+	set_sched_topology(powerpc_topology);
 
-int arch_sd_sibling_asym_packing(void)
-{
-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-		return SD_ASYM_PACKING;
-	}
-	return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 656b035c30e5..439a153b8403 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -877,8 +877,6 @@ enum cpu_idle_type {
 #define SD_OVERLAP		0x2000	/* sched_domains of this level overlap */
 #define SD_NUMA			0x4000	/* cross-node balancing */
 
-extern int __weak arch_sd_sibiling_asym_packing(void);
-
 #ifdef CONFIG_SCHED_SMT
 static inline const int cpu_smt_flags(void)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e59e5aec745a..7e348e238bf1 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5796,11 +5796,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
 }
 
-int __weak arch_sd_sibling_asym_packing(void)
-{
-       return 0*SD_ASYM_PACKING;
-}
-
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -5981,7 +5976,6 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
 	if (sd->flags & SD_SHARE_CPUPOWER) {
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
-		sd->flags |= arch_sd_sibling_asym_packing();
 
 	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
 		sd->imbalance_pct = 117;
-- 
cgit 


From d77b3ed5c9f8ebedf154b52b5e943c461f3d37e6 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Fri, 11 Apr 2014 11:44:40 +0200
Subject: sched: Add a new SD_SHARE_POWERDOMAIN for sched_domain

A new flag SD_SHARE_POWERDOMAIN is created to reflect whether groups of CPUs
in a sched_domain level can or not reach different power state. As an example,
the flag should be cleared at CPU level if groups of cores can be power gated
independently. This information can be used in the load balance decision or to
add load balancing level between group of CPUs that can power gate
independantly.
This flag is part of the topology flags that can be set by arch.

Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
Cc: schwidefsky@de.ibm.com
Cc: cmetcalf@tilera.com
Cc: benh@kernel.crashing.org
Cc: preeti@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1397209481-28542-5-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 439a153b8403..accb66bfd722 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -870,6 +870,7 @@ enum cpu_idle_type {
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7e348e238bf1..1c9c3b7b26af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5261,7 +5261,8 @@ static int sd_degenerate(struct sched_domain *sd)
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
 			 SD_SHARE_CPUPOWER |
-			 SD_SHARE_PKG_RESOURCES)) {
+			 SD_SHARE_PKG_RESOURCES |
+			 SD_SHARE_POWERDOMAIN)) {
 		if (sd->groups != sd->groups->next)
 			return 0;
 	}
@@ -5292,7 +5293,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 				SD_BALANCE_EXEC |
 				SD_SHARE_CPUPOWER |
 				SD_SHARE_PKG_RESOURCES |
-				SD_PREFER_SIBLING);
+				SD_PREFER_SIBLING |
+				SD_SHARE_POWERDOMAIN);
 		if (nr_node_ids == 1)
 			pflags &= ~SD_SERIALIZE;
 	}
@@ -5901,6 +5903,7 @@ static int sched_domains_curr_level;
  * SD_SHARE_CPUPOWER      - describes SMT topologies
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
+ * SD_SHARE_POWERDOMAIN   - describes shared power domain
  *
  * Odd one out:
  * SD_ASYM_PACKING        - describes SMT quirks
@@ -5909,7 +5912,8 @@ static int sched_domains_curr_level;
 	(SD_SHARE_CPUPOWER |		\
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
-	 SD_ASYM_PACKING)
+	 SD_ASYM_PACKING |		\
+	 SD_SHARE_POWERDOMAIN)
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl, int cpu)
-- 
cgit 


From 591c1ee465ce5372385dbc41e7d3e36cbb477bd8 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Thu, 24 Apr 2014 11:30:04 -0400
Subject: of: configure the platform device dma parameters

Retrieve DMA configuration from DT and setup platform device's DMA
parameters. The DMA configuration in DT has to be specified using
"dma-ranges" and "dma-coherent" properties if supported.

We setup dma_pfn_offset using "dma-ranges" and dma_coherent_ops
using "dma-coherent" device tree properties.

The set_arch_dma_coherent_ops macro has to be defined by arch if
it supports coherent dma_ops. Otherwise, set_arch_dma_coherent_ops() is
declared as nop.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Olof Johansson <olof@lixom.net>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 drivers/of/platform.c       | 65 ++++++++++++++++++++++++++++++++++++++++-----
 include/linux/dma-mapping.h |  7 +++++
 2 files changed, 66 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 404d1daebefa..91fa9838b56f 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -186,6 +186,64 @@ struct platform_device *of_device_alloc(struct device_node *np,
 }
 EXPORT_SYMBOL(of_device_alloc);
 
+/**
+ * of_dma_configure - Setup DMA configuration
+ * @dev:	Device to apply DMA configuration
+ *
+ * Try to get devices's DMA configuration from DT and update it
+ * accordingly.
+ *
+ * In case if platform code need to use own special DMA configuration,it
+ * can use Platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE event
+ * to fix up DMA configuration.
+ */
+static void of_dma_configure(struct platform_device *pdev)
+{
+	u64 dma_addr, paddr, size;
+	int ret;
+	struct device *dev = &pdev->dev;
+
+#if defined(CONFIG_MICROBLAZE)
+	pdev->archdata.dma_mask = 0xffffffffUL;
+#endif
+
+	/*
+	 * Set default dma-mask to 32 bit. Drivers are expected to setup
+	 * the correct supported dma_mask.
+	 */
+	dev->coherent_dma_mask = DMA_BIT_MASK(32);
+
+	/*
+	 * Set it to coherent_dma_mask by default if the architecture
+	 * code has not set it.
+	 */
+	if (!dev->dma_mask)
+		dev->dma_mask = &dev->coherent_dma_mask;
+
+	/*
+	 * if dma-coherent property exist, call arch hook to setup
+	 * dma coherent operations.
+	 */
+	if (of_dma_is_coherent(dev->of_node)) {
+		set_arch_dma_coherent_ops(dev);
+		dev_dbg(dev, "device is dma coherent\n");
+	}
+
+	/*
+	 * if dma-ranges property doesn't exist - just return else
+	 * setup the dma offset
+	 */
+	ret = of_dma_get_range(dev->of_node, &dma_addr, &paddr, &size);
+	if (ret < 0) {
+		dev_dbg(dev, "no dma range information to setup\n");
+		return;
+	}
+
+	/* DMA ranges found. Calculate and set dma_pfn_offset */
+	dev->dma_pfn_offset = PFN_DOWN(paddr - dma_addr);
+	dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", dev->dma_pfn_offset);
+}
+
 /**
  * of_platform_device_create_pdata - Alloc, initialize and register an of_device
  * @np: pointer to node to create device for
@@ -211,12 +269,7 @@ static struct platform_device *of_platform_device_create_pdata(
 	if (!dev)
 		return NULL;
 
-#if defined(CONFIG_MICROBLAZE)
-	dev->archdata.dma_mask = 0xffffffffUL;
-#endif
-	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-	if (!dev->dev.dma_mask)
-		dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
+	of_dma_configure(dev);
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
 
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index fd4aee29ad10..c7d9b1b14ce7 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -123,6 +123,13 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
 
 extern u64 dma_get_required_mask(struct device *dev);
 
+#ifndef set_arch_dma_coherent_ops
+static inline int set_arch_dma_coherent_ops(struct device *dev)
+{
+	return 0;
+}
+#endif
+
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
 {
 	return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536;
-- 
cgit 


From 2b53f41fa8604845f4f7c538723694a453088b15 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 7 May 2014 09:21:56 -0400
Subject: cgroup: remove unused CGRP_SANE_BEHAVIOR

This cgroup flag has never been used.  Only CGRP_ROOT_SANE_BEHAVIOR is
used.  Remove it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2dfabb3b749a..f482f95c2c72 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -140,8 +140,6 @@ enum {
 	 * specified at mount time and thus is implemented here.
 	 */
 	CGRP_CPUSET_CLONE_CHILDREN,
-	/* see the comment above CGRP_ROOT_SANE_BEHAVIOR for details */
-	CGRP_SANE_BEHAVIOR,
 };
 
 struct cgroup {
-- 
cgit 


From 7c65bbc7dcface00b295bbd18bce82fe1db3d633 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 09:26:30 -0400
Subject: tracing: Add trace_<tracepoint>_enabled() function

There are some code paths in the kernel that need to do some preparations
before it calls a tracepoint. As that code is worthless overhead when
the tracepoint is not enabled, it would be prudent to have that code
only run when the tracepoint is active. To accomplish this, all tracepoints
now get a static inline function called "trace_<tracepoint-name>_enabled()"
which returns true when the tracepoint is enabled and false otherwise.

As an added bonus, that function uses the static_key of the tracepoint
such that no branch is needed.

  if (trace_mytracepoint_enabled()) {
	arg = process_tp_arg();
	trace_mytracepoint(arg);
  }

Will keep the "process_tp_arg()" (which may be expensive to run) from
being executed when the tracepoint isn't enabled.

It's best to encapsulate the tracepoint itself in the if statement
just to keep races. For example, if you had:

  if (trace_mytracepoint_enabled())
	arg = process_tp_arg();
  trace_mytracepoint(arg);

There's a chance that the tracepoint could be enabled just after the
if statement, and arg will be undefined when calling the tracepoint.

Link: http://lkml.kernel.org/r/20140506094407.507b6435@gandalf.local.home

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/trace/tracepoints.txt | 24 ++++++++++++++++++++++++
 include/linux/tracepoint.h          | 10 ++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt
index 6b018b53177a..a3efac621c5a 100644
--- a/Documentation/trace/tracepoints.txt
+++ b/Documentation/trace/tracepoints.txt
@@ -115,6 +115,30 @@ If the tracepoint has to be used in kernel modules, an
 EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
 used to export the defined tracepoints.
 
+If you need to do a bit of work for a tracepoint parameter, and
+that work is only used for the tracepoint, that work can be encapsulated
+within an if statement with the following:
+
+	if (trace_foo_bar_enabled()) {
+		int i;
+		int tot = 0;
+
+		for (i = 0; i < count; i++)
+			tot += calculate_nuggets();
+
+		trace_foo_bar(tot);
+	}
+
+All trace_<tracepoint>() calls have a matching trace_<tracepoint>_enabled()
+function defined that returns true if the tracepoint is enabled and
+false otherwise. The trace_<tracepoint>() should always be within the
+block of the if (trace_<tracepoint>_enabled()) to prevent races between
+the tracepoint being enabled and the check being seen.
+
+The advantage of using the trace_<tracepoint>_enabled() is that it uses
+the static_key of the tracepoint to allow the if statement to be implemented
+with jump labels and avoid conditional branches.
+
 Note: The convenience macro TRACE_EVENT provides an alternative way to
       define tracepoints. Check http://lwn.net/Articles/379903,
       http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 9d30ee469c2a..2e2a5f7717e5 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -185,6 +185,11 @@ extern void syscall_unregfunc(void);
 	static inline void						\
 	check_trace_callback_type_##name(void (*cb)(data_proto))	\
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
 /*
@@ -230,6 +235,11 @@ extern void syscall_unregfunc(void);
 	}								\
 	static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \
 	{								\
+	}								\
+	static inline bool						\
+	trace_##name##_enabled(void)					\
+	{								\
+		return false;						\
 	}
 
 #define DEFINE_TRACE_FN(name, reg, unreg)
-- 
cgit 


From 506e931f92defdc60c1dc4aa2ff4a19a5dcd8618 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 7 May 2014 10:26:44 -0600
Subject: blk-mq: add basic round-robin of what CPU to queue workqueue work on

Right now we just pick the first CPU in the mask, but that can
easily overload that one. Add some basic batching and round-robin
all the entries in the mask instead.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 45 +++++++++++++++++++++++++++++++--------------
 include/linux/blk-mq.h |  4 ++++
 2 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0d379830a278..2410e0cb7aef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -670,6 +670,30 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	}
 }
 
+/*
+ * It'd be great if the workqueue API had a way to pass
+ * in a mask and had some smarts for more clever placement.
+ * For now we just round-robin here, switching for every
+ * BLK_MQ_CPU_WORK_BATCH queued items.
+ */
+static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
+{
+	int cpu = hctx->next_cpu;
+
+	if (--hctx->next_cpu_batch <= 0) {
+		int next_cpu;
+
+		next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
+		if (next_cpu >= nr_cpu_ids)
+			next_cpu = cpumask_first(hctx->cpumask);
+
+		hctx->next_cpu = next_cpu;
+		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+	}
+
+	return cpu;
+}
+
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 {
 	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
@@ -682,13 +706,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
 	else {
 		unsigned int cpu;
 
-		/*
-		 * It'd be great if the workqueue API had a way to pass
-		 * in a mask and had some smarts for more clever placement
-		 * than the first CPU. Or we could round-robin here. For now,
-		 * just queue on the first CPU.
-		 */
-		cpu = cpumask_first(hctx->cpumask);
+		cpu = blk_mq_hctx_next_cpu(hctx);
 		kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
 	}
 }
@@ -795,13 +813,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 	else {
 		unsigned int cpu;
 
-		/*
-		 * It'd be great if the workqueue API had a way to pass
-		 * in a mask and had some smarts for more clever placement
-		 * than the first CPU. Or we could round-robin here. For now,
-		 * just queue on the first CPU.
-		 */
-		cpu = cpumask_first(hctx->cpumask);
+		cpu = blk_mq_hctx_next_cpu(hctx);
 		kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
 	}
 }
@@ -1378,6 +1390,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 		ctx->index_hw = hctx->nr_ctx;
 		hctx->ctxs[hctx->nr_ctx++] = ctx;
 	}
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		hctx->next_cpu = cpumask_first(hctx->cpumask);
+		hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+	}
 }
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 3b561d651a02..5bd677e2dcb7 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -21,6 +21,8 @@ struct blk_mq_hw_ctx {
 	struct delayed_work	run_work;
 	struct delayed_work	delay_work;
 	cpumask_var_t		cpumask;
+	int			next_cpu;
+	int			next_cpu_batch;
 
 	unsigned long		flags;		/* BLK_MQ_F_* flags */
 
@@ -126,6 +128,8 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 
 	BLK_MQ_MAX_DEPTH	= 2048,
+
+	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
-- 
cgit 


From 80eded6ce8bb8bade60955660c6957d6166c44c1 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Wed, 7 May 2014 18:02:15 +0200
Subject: clk: at91: add slow clks driver

AT91 slow clk is a clk multiplexer.

In some SoCs (sam9x5, sama5, sam9g45 families) this multiplexer can
choose among 2 sources: an internal RC oscillator circuit and an oscillator
using an external crystal.

In other Socs (sam9260 family) the multiplexer source is hardcoded with
the OSCSEL signal.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/clk/at91/Makefile    |   4 +-
 drivers/clk/at91/clk-slow.c  | 467 +++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/at91/pmc.c       |   5 +
 drivers/clk/at91/pmc.h       |   3 +
 drivers/clk/at91/sckc.c      |  57 ++++++
 drivers/clk/at91/sckc.h      |  22 ++
 include/linux/clk/at91_pmc.h |   1 +
 7 files changed, 557 insertions(+), 2 deletions(-)
 create mode 100644 drivers/clk/at91/clk-slow.c
 create mode 100644 drivers/clk/at91/sckc.c
 create mode 100644 drivers/clk/at91/sckc.h

(limited to 'include/linux')

diff --git a/drivers/clk/at91/Makefile b/drivers/clk/at91/Makefile
index 46c1d3d0d66b..4998aee59267 100644
--- a/drivers/clk/at91/Makefile
+++ b/drivers/clk/at91/Makefile
@@ -2,8 +2,8 @@
 # Makefile for at91 specific clk
 #
 
-obj-y += pmc.o
-obj-y += clk-main.o clk-pll.o clk-plldiv.o clk-master.o
+obj-y += pmc.o sckc.o
+obj-y += clk-slow.o clk-main.o clk-pll.o clk-plldiv.o clk-master.o
 obj-y += clk-system.o clk-peripheral.o clk-programmable.o
 
 obj-$(CONFIG_HAVE_AT91_UTMI)		+= clk-utmi.o
diff --git a/drivers/clk/at91/clk-slow.c b/drivers/clk/at91/clk-slow.c
new file mode 100644
index 000000000000..0300c46ee247
--- /dev/null
+++ b/drivers/clk/at91/clk-slow.c
@@ -0,0 +1,467 @@
+/*
+ * drivers/clk/at91/clk-slow.c
+ *
+ *  Copyright (C) 2013 Boris BREZILLON <b.brezillon@overkiz.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/clk/at91_pmc.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "pmc.h"
+#include "sckc.h"
+
+#define SLOW_CLOCK_FREQ		32768
+#define SLOWCK_SW_CYCLES	5
+#define SLOWCK_SW_TIME_USEC	((SLOWCK_SW_CYCLES * USEC_PER_SEC) / \
+				 SLOW_CLOCK_FREQ)
+
+#define	AT91_SCKC_CR			0x00
+#define		AT91_SCKC_RCEN		(1 << 0)
+#define		AT91_SCKC_OSC32EN	(1 << 1)
+#define		AT91_SCKC_OSC32BYP	(1 << 2)
+#define		AT91_SCKC_OSCSEL	(1 << 3)
+
+struct clk_slow_osc {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	unsigned long startup_usec;
+};
+
+#define to_clk_slow_osc(hw) container_of(hw, struct clk_slow_osc, hw)
+
+struct clk_slow_rc_osc {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	unsigned long frequency;
+	unsigned long accuracy;
+	unsigned long startup_usec;
+};
+
+#define to_clk_slow_rc_osc(hw) container_of(hw, struct clk_slow_rc_osc, hw)
+
+struct clk_sam9260_slow {
+	struct clk_hw hw;
+	struct at91_pmc *pmc;
+};
+
+#define to_clk_sam9260_slow(hw) container_of(hw, struct clk_sam9260_slow, hw)
+
+struct clk_sam9x5_slow {
+	struct clk_hw hw;
+	void __iomem *sckcr;
+	u8 parent;
+};
+
+#define to_clk_sam9x5_slow(hw) container_of(hw, struct clk_sam9x5_slow, hw)
+
+
+static int clk_slow_osc_prepare(struct clk_hw *hw)
+{
+	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+	u32 tmp = readl(sckcr);
+
+	if (tmp & AT91_SCKC_OSC32BYP)
+		return 0;
+
+	writel(tmp | AT91_SCKC_OSC32EN, sckcr);
+
+	usleep_range(osc->startup_usec, osc->startup_usec + 1);
+
+	return 0;
+}
+
+static void clk_slow_osc_unprepare(struct clk_hw *hw)
+{
+	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+	u32 tmp = readl(sckcr);
+
+	if (tmp & AT91_SCKC_OSC32BYP)
+		return;
+
+	writel(tmp & ~AT91_SCKC_OSC32EN, sckcr);
+}
+
+static int clk_slow_osc_is_prepared(struct clk_hw *hw)
+{
+	struct clk_slow_osc *osc = to_clk_slow_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+	u32 tmp = readl(sckcr);
+
+	if (tmp & AT91_SCKC_OSC32BYP)
+		return 1;
+
+	return !!(tmp & AT91_SCKC_OSC32EN);
+}
+
+static const struct clk_ops slow_osc_ops = {
+	.prepare = clk_slow_osc_prepare,
+	.unprepare = clk_slow_osc_unprepare,
+	.is_prepared = clk_slow_osc_is_prepared,
+};
+
+static struct clk * __init
+at91_clk_register_slow_osc(void __iomem *sckcr,
+			   const char *name,
+			   const char *parent_name,
+			   unsigned long startup,
+			   bool bypass)
+{
+	struct clk_slow_osc *osc;
+	struct clk *clk = NULL;
+	struct clk_init_data init;
+
+	if (!sckcr || !name || !parent_name)
+		return ERR_PTR(-EINVAL);
+
+	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
+	if (!osc)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &slow_osc_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+	init.flags = CLK_IGNORE_UNUSED;
+
+	osc->hw.init = &init;
+	osc->sckcr = sckcr;
+	osc->startup_usec = startup;
+
+	if (bypass)
+		writel((readl(sckcr) & ~AT91_SCKC_OSC32EN) | AT91_SCKC_OSC32BYP,
+		       sckcr);
+
+	clk = clk_register(NULL, &osc->hw);
+	if (IS_ERR(clk))
+		kfree(osc);
+
+	return clk;
+}
+
+void __init of_at91sam9x5_clk_slow_osc_setup(struct device_node *np,
+					     void __iomem *sckcr)
+{
+	struct clk *clk;
+	const char *parent_name;
+	const char *name = np->name;
+	u32 startup;
+	bool bypass;
+
+	parent_name = of_clk_get_parent_name(np, 0);
+	of_property_read_string(np, "clock-output-names", &name);
+	of_property_read_u32(np, "atmel,startup-time-usec", &startup);
+	bypass = of_property_read_bool(np, "atmel,osc-bypass");
+
+	clk = at91_clk_register_slow_osc(sckcr, name, parent_name, startup,
+					 bypass);
+	if (IS_ERR(clk))
+		return;
+
+	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
+
+static unsigned long clk_slow_rc_osc_recalc_rate(struct clk_hw *hw,
+						 unsigned long parent_rate)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+
+	return osc->frequency;
+}
+
+static unsigned long clk_slow_rc_osc_recalc_accuracy(struct clk_hw *hw,
+						     unsigned long parent_acc)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+
+	return osc->accuracy;
+}
+
+static int clk_slow_rc_osc_prepare(struct clk_hw *hw)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+
+	writel(readl(sckcr) | AT91_SCKC_RCEN, sckcr);
+
+	usleep_range(osc->startup_usec, osc->startup_usec + 1);
+
+	return 0;
+}
+
+static void clk_slow_rc_osc_unprepare(struct clk_hw *hw)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+	void __iomem *sckcr = osc->sckcr;
+
+	writel(readl(sckcr) & ~AT91_SCKC_RCEN, sckcr);
+}
+
+static int clk_slow_rc_osc_is_prepared(struct clk_hw *hw)
+{
+	struct clk_slow_rc_osc *osc = to_clk_slow_rc_osc(hw);
+
+	return !!(readl(osc->sckcr) & AT91_SCKC_RCEN);
+}
+
+static const struct clk_ops slow_rc_osc_ops = {
+	.prepare = clk_slow_rc_osc_prepare,
+	.unprepare = clk_slow_rc_osc_unprepare,
+	.is_prepared = clk_slow_rc_osc_is_prepared,
+	.recalc_rate = clk_slow_rc_osc_recalc_rate,
+	.recalc_accuracy = clk_slow_rc_osc_recalc_accuracy,
+};
+
+static struct clk * __init
+at91_clk_register_slow_rc_osc(void __iomem *sckcr,
+			      const char *name,
+			      unsigned long frequency,
+			      unsigned long accuracy,
+			      unsigned long startup)
+{
+	struct clk_slow_rc_osc *osc;
+	struct clk *clk = NULL;
+	struct clk_init_data init;
+
+	if (!sckcr || !name)
+		return ERR_PTR(-EINVAL);
+
+	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
+	if (!osc)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &slow_rc_osc_ops;
+	init.parent_names = NULL;
+	init.num_parents = 0;
+	init.flags = CLK_IS_ROOT | CLK_IGNORE_UNUSED;
+
+	osc->hw.init = &init;
+	osc->sckcr = sckcr;
+	osc->frequency = frequency;
+	osc->accuracy = accuracy;
+	osc->startup_usec = startup;
+
+	clk = clk_register(NULL, &osc->hw);
+	if (IS_ERR(clk))
+		kfree(osc);
+
+	return clk;
+}
+
+void __init of_at91sam9x5_clk_slow_rc_osc_setup(struct device_node *np,
+						void __iomem *sckcr)
+{
+	struct clk *clk;
+	u32 frequency = 0;
+	u32 accuracy = 0;
+	u32 startup = 0;
+	const char *name = np->name;
+
+	of_property_read_string(np, "clock-output-names", &name);
+	of_property_read_u32(np, "clock-frequency", &frequency);
+	of_property_read_u32(np, "clock-accuracy", &accuracy);
+	of_property_read_u32(np, "atmel,startup-time-usec", &startup);
+
+	clk = at91_clk_register_slow_rc_osc(sckcr, name, frequency, accuracy,
+					    startup);
+	if (IS_ERR(clk))
+		return;
+
+	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
+
+static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_sam9x5_slow *slowck = to_clk_sam9x5_slow(hw);
+	void __iomem *sckcr = slowck->sckcr;
+	u32 tmp;
+
+	if (index > 1)
+		return -EINVAL;
+
+	tmp = readl(sckcr);
+
+	if ((!index && !(tmp & AT91_SCKC_OSCSEL)) ||
+	    (index && (tmp & AT91_SCKC_OSCSEL)))
+		return 0;
+
+	if (index)
+		tmp |= AT91_SCKC_OSCSEL;
+	else
+		tmp &= ~AT91_SCKC_OSCSEL;
+
+	writel(tmp, sckcr);
+
+	usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1);
+
+	return 0;
+}
+
+static u8 clk_sam9x5_slow_get_parent(struct clk_hw *hw)
+{
+	struct clk_sam9x5_slow *slowck = to_clk_sam9x5_slow(hw);
+
+	return !!(readl(slowck->sckcr) & AT91_SCKC_OSCSEL);
+}
+
+static const struct clk_ops sam9x5_slow_ops = {
+	.set_parent = clk_sam9x5_slow_set_parent,
+	.get_parent = clk_sam9x5_slow_get_parent,
+};
+
+static struct clk * __init
+at91_clk_register_sam9x5_slow(void __iomem *sckcr,
+			      const char *name,
+			      const char **parent_names,
+			      int num_parents)
+{
+	struct clk_sam9x5_slow *slowck;
+	struct clk *clk = NULL;
+	struct clk_init_data init;
+
+	if (!sckcr || !name || !parent_names || !num_parents)
+		return ERR_PTR(-EINVAL);
+
+	slowck = kzalloc(sizeof(*slowck), GFP_KERNEL);
+	if (!slowck)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &sam9x5_slow_ops;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+	init.flags = 0;
+
+	slowck->hw.init = &init;
+	slowck->sckcr = sckcr;
+	slowck->parent = !!(readl(sckcr) & AT91_SCKC_OSCSEL);
+
+	clk = clk_register(NULL, &slowck->hw);
+	if (IS_ERR(clk))
+		kfree(slowck);
+
+	return clk;
+}
+
+void __init of_at91sam9x5_clk_slow_setup(struct device_node *np,
+					 void __iomem *sckcr)
+{
+	struct clk *clk;
+	const char *parent_names[2];
+	int num_parents;
+	const char *name = np->name;
+	int i;
+
+	num_parents = of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	if (num_parents <= 0 || num_parents > 2)
+		return;
+
+	for (i = 0; i < num_parents; ++i) {
+		parent_names[i] = of_clk_get_parent_name(np, i);
+		if (!parent_names[i])
+			return;
+	}
+
+	of_property_read_string(np, "clock-output-names", &name);
+
+	clk = at91_clk_register_sam9x5_slow(sckcr, name, parent_names,
+					    num_parents);
+	if (IS_ERR(clk))
+		return;
+
+	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
+
+static u8 clk_sam9260_slow_get_parent(struct clk_hw *hw)
+{
+	struct clk_sam9260_slow *slowck = to_clk_sam9260_slow(hw);
+
+	return !!(pmc_read(slowck->pmc, AT91_PMC_SR) & AT91_PMC_OSCSEL);
+}
+
+static const struct clk_ops sam9260_slow_ops = {
+	.get_parent = clk_sam9260_slow_get_parent,
+};
+
+static struct clk * __init
+at91_clk_register_sam9260_slow(struct at91_pmc *pmc,
+			       const char *name,
+			       const char **parent_names,
+			       int num_parents)
+{
+	struct clk_sam9260_slow *slowck;
+	struct clk *clk = NULL;
+	struct clk_init_data init;
+
+	if (!pmc || !name)
+		return ERR_PTR(-EINVAL);
+
+	if (!parent_names || !num_parents)
+		return ERR_PTR(-EINVAL);
+
+	slowck = kzalloc(sizeof(*slowck), GFP_KERNEL);
+	if (!slowck)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &sam9260_slow_ops;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+	init.flags = 0;
+
+	slowck->hw.init = &init;
+	slowck->pmc = pmc;
+
+	clk = clk_register(NULL, &slowck->hw);
+	if (IS_ERR(clk))
+		kfree(slowck);
+
+	return clk;
+}
+
+void __init of_at91sam9260_clk_slow_setup(struct device_node *np,
+					  struct at91_pmc *pmc)
+{
+	struct clk *clk;
+	const char *parent_names[2];
+	int num_parents;
+	const char *name = np->name;
+	int i;
+
+	num_parents = of_count_phandle_with_args(np, "clocks", "#clock-cells");
+	if (num_parents <= 0 || num_parents > 1)
+		return;
+
+	for (i = 0; i < num_parents; ++i) {
+		parent_names[i] = of_clk_get_parent_name(np, i);
+		if (!parent_names[i])
+			return;
+	}
+
+	of_property_read_string(np, "clock-output-names", &name);
+
+	clk = at91_clk_register_sam9260_slow(pmc, name, parent_names,
+					     num_parents);
+	if (IS_ERR(clk))
+		return;
+
+	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index dc5fdde98e1a..524196bb35a5 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -229,6 +229,11 @@ out_free_pmc:
 }
 
 static const struct of_device_id pmc_clk_ids[] __initconst = {
+	/* Slow oscillator */
+	{
+		.compatible = "atmel,at91sam9260-clk-slow",
+		.data = of_at91sam9260_clk_slow_setup,
+	},
 	/* Main clock */
 	{
 		.compatible = "atmel,at91rm9200-clk-main-osc",
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index 42cc7cc5e1d3..6c7625976113 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -58,6 +58,9 @@ static inline void pmc_write(struct at91_pmc *pmc, int offset, u32 value)
 int of_at91_get_clk_range(struct device_node *np, const char *propname,
 			  struct clk_range *range);
 
+extern void __init of_at91sam9260_clk_slow_setup(struct device_node *np,
+						 struct at91_pmc *pmc);
+
 extern void __init of_at91rm9200_clk_main_osc_setup(struct device_node *np,
 						    struct at91_pmc *pmc);
 extern void __init of_at91sam9x5_clk_main_rc_osc_setup(struct device_node *np,
diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c
new file mode 100644
index 000000000000..1184d76a7ab7
--- /dev/null
+++ b/drivers/clk/at91/sckc.c
@@ -0,0 +1,57 @@
+/*
+ * drivers/clk/at91/sckc.c
+ *
+ *  Copyright (C) 2013 Boris BREZILLON <b.brezillon@overkiz.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include "sckc.h"
+
+static const struct of_device_id sckc_clk_ids[] __initconst = {
+	/* Slow clock */
+	{
+		.compatible = "atmel,at91sam9x5-clk-slow-osc",
+		.data = of_at91sam9x5_clk_slow_osc_setup,
+	},
+	{
+		.compatible = "atmel,at91sam9x5-clk-slow-rc-osc",
+		.data = of_at91sam9x5_clk_slow_rc_osc_setup,
+	},
+	{
+		.compatible = "atmel,at91sam9x5-clk-slow",
+		.data = of_at91sam9x5_clk_slow_setup,
+	},
+	{ /*sentinel*/ }
+};
+
+static void __init of_at91sam9x5_sckc_setup(struct device_node *np)
+{
+	struct device_node *childnp;
+	void (*clk_setup)(struct device_node *, void __iomem *);
+	const struct of_device_id *clk_id;
+	void __iomem *regbase = of_iomap(np, 0);
+
+	if (!regbase)
+		return;
+
+	for_each_child_of_node(np, childnp) {
+		clk_id = of_match_node(sckc_clk_ids, childnp);
+		if (!clk_id)
+			continue;
+		clk_setup = clk_id->data;
+		clk_setup(childnp, regbase);
+	}
+}
+CLK_OF_DECLARE(at91sam9x5_clk_sckc, "atmel,at91sam9x5-sckc",
+	       of_at91sam9x5_sckc_setup);
diff --git a/drivers/clk/at91/sckc.h b/drivers/clk/at91/sckc.h
new file mode 100644
index 000000000000..836fcf59820f
--- /dev/null
+++ b/drivers/clk/at91/sckc.h
@@ -0,0 +1,22 @@
+/*
+ * drivers/clk/at91/sckc.h
+ *
+ *  Copyright (C) 2013 Boris BREZILLON <b.brezillon@overkiz.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __AT91_SCKC_H_
+#define __AT91_SCKC_H_
+
+extern void __init of_at91sam9x5_clk_slow_osc_setup(struct device_node *np,
+						    void __iomem *sckcr);
+extern void __init of_at91sam9x5_clk_slow_rc_osc_setup(struct device_node *np,
+						       void __iomem *sckcr);
+extern void __init of_at91sam9x5_clk_slow_setup(struct device_node *np,
+						void __iomem *sckcr);
+
+#endif /* __AT91_SCKC_H_ */
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index a6911ebbd02a..de4268d4987a 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -155,6 +155,7 @@ extern void __iomem *at91_pmc_base;
 #define		AT91_PMC_LOCKB		(1 <<  2)		/* PLLB Lock */
 #define		AT91_PMC_MCKRDY		(1 <<  3)		/* Master Clock */
 #define		AT91_PMC_LOCKU		(1 <<  6)		/* UPLL Lock [some SAM9] */
+#define		AT91_PMC_OSCSEL		(1 <<  7)		/* Slow Oscillator Selection [some SAM9] */
 #define		AT91_PMC_PCK0RDY	(1 <<  8)		/* Programmable Clock 0 */
 #define		AT91_PMC_PCK1RDY	(1 <<  9)		/* Programmable Clock 1 */
 #define		AT91_PMC_PCK2RDY	(1 << 10)		/* Programmable Clock 2 */
-- 
cgit 


From 2de0c019f34ffbe49744c453628afb270aa9adb6 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 15 Apr 2014 12:27:58 +0200
Subject: iio: adc: at91: cleanup platform_data

num_channels and registers are not used anymore since they are defined inside
the driver and assigned by matching the id_table.

Also, struct at91_adc_reg_desc is now only used inside the driver.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/iio/adc/at91_adc.c             | 19 +++++++++++++++++++
 include/linux/platform_data/at91_adc.h | 23 -----------------------
 2 files changed, 19 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 89777ed9abd8..1beae65aef2c 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -46,6 +46,25 @@
 #define TOUCH_SAMPLE_PERIOD_US		2000	/* 2ms */
 #define TOUCH_PEN_DETECT_DEBOUNCE_US	200
 
+/**
+ * struct at91_adc_reg_desc - Various informations relative to registers
+ * @channel_base:	Base offset for the channel data registers
+ * @drdy_mask:		Mask of the DRDY field in the relevant registers
+			(Interruptions registers mostly)
+ * @status_register:	Offset of the Interrupt Status Register
+ * @trigger_register:	Offset of the Trigger setup register
+ * @mr_prescal_mask:	Mask of the PRESCAL field in the adc MR register
+ * @mr_startup_mask:	Mask of the STARTUP field in the adc MR register
+ */
+struct at91_adc_reg_desc {
+	u8	channel_base;
+	u32	drdy_mask;
+	u8	status_register;
+	u8	trigger_register;
+	u32	mr_prescal_mask;
+	u32	mr_startup_mask;
+};
+
 struct at91_adc_caps {
 	bool	has_ts;		/* Support touch screen */
 	bool	has_tsmr;	/* only at91sam9x5, sama5d3 have TSMR reg */
diff --git a/include/linux/platform_data/at91_adc.h b/include/linux/platform_data/at91_adc.h
index b3ca1e94e0c8..fcf73879dbfe 100644
--- a/include/linux/platform_data/at91_adc.h
+++ b/include/linux/platform_data/at91_adc.h
@@ -7,25 +7,6 @@
 #ifndef _AT91_ADC_H_
 #define _AT91_ADC_H_
 
-/**
- * struct at91_adc_reg_desc - Various informations relative to registers
- * @channel_base:	Base offset for the channel data registers
- * @drdy_mask:		Mask of the DRDY field in the relevant registers
-			(Interruptions registers mostly)
- * @status_register:	Offset of the Interrupt Status Register
- * @trigger_register:	Offset of the Trigger setup register
- * @mr_prescal_mask:	Mask of the PRESCAL field in the adc MR register
- * @mr_startup_mask:	Mask of the STARTUP field in the adc MR register
- */
-struct at91_adc_reg_desc {
-	u8	channel_base;
-	u32	drdy_mask;
-	u8	status_register;
-	u8	trigger_register;
-	u32	mr_prescal_mask;
-	u32	mr_startup_mask;
-};
-
 /**
  * struct at91_adc_trigger - description of triggers
  * @name:		name of the trigger advertised to the user
@@ -42,8 +23,6 @@ struct at91_adc_trigger {
 /**
  * struct at91_adc_data - platform data for ADC driver
  * @channels_used:		channels in use on the board as a bitmask
- * @num_channels:		global number of channels available on the board
- * @registers:			Registers definition on the board
  * @startup_time:		startup time of the ADC in microseconds
  * @trigger_list:		Triggers available in the ADC
  * @trigger_number:		Number of triggers available in the ADC
@@ -52,8 +31,6 @@ struct at91_adc_trigger {
  */
 struct at91_adc_data {
 	unsigned long			channels_used;
-	u8				num_channels;
-	struct at91_adc_reg_desc	*registers;
 	u8				startup_time;
 	struct at91_adc_trigger		*trigger_list;
 	u8				trigger_number;
-- 
cgit 


From 84882b060301c35ab7e2c1ef355b0bd06b764195 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 15 Apr 2014 12:27:59 +0200
Subject: iio: adc: at91_adc: Add support for touchscreens without TSMR

Old ADCs, as present on the sam9rl and the sam9g45 don't have a TSMR register
and the touchscreen support should be handled differently.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/include/mach/at91_adc.h |  13 ++
 drivers/iio/adc/at91_adc.c                 | 200 ++++++++++++++++++++++-------
 include/linux/platform_data/at91_adc.h     |   8 ++
 3 files changed, 174 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/include/mach/at91_adc.h b/arch/arm/mach-at91/include/mach/at91_adc.h
index c287307b9a3b..7d80396346b2 100644
--- a/arch/arm/mach-at91/include/mach/at91_adc.h
+++ b/arch/arm/mach-at91/include/mach/at91_adc.h
@@ -20,6 +20,9 @@
 #define		AT91_ADC_START		(1 << 1)	/* Start Conversion */
 
 #define AT91_ADC_MR		0x04		/* Mode Register */
+#define		AT91_ADC_TSAMOD		(3 << 0)	/* ADC mode */
+#define		AT91_ADC_TSAMOD_ADC_ONLY_MODE		(0 << 0)	/* ADC Mode */
+#define		AT91_ADC_TSAMOD_TS_ONLY_MODE		(1 << 0)	/* Touch Screen Only Mode */
 #define		AT91_ADC_TRGEN		(1 << 0)	/* Trigger Enable */
 #define		AT91_ADC_TRGSEL		(7 << 1)	/* Trigger Selection */
 #define			AT91_ADC_TRGSEL_TC0		(0 << 1)
@@ -28,6 +31,7 @@
 #define			AT91_ADC_TRGSEL_EXTERNAL	(6 << 1)
 #define		AT91_ADC_LOWRES		(1 << 4)	/* Low Resolution */
 #define		AT91_ADC_SLEEP		(1 << 5)	/* Sleep Mode */
+#define		AT91_ADC_PENDET		(1 << 6)	/* Pen contact detection enable */
 #define		AT91_ADC_PRESCAL_9260	(0x3f << 8)	/* Prescalar Rate Selection */
 #define		AT91_ADC_PRESCAL_9G45	(0xff << 8)
 #define			AT91_ADC_PRESCAL_(x)	((x) << 8)
@@ -37,6 +41,12 @@
 #define			AT91_ADC_STARTUP_(x)	((x) << 16)
 #define		AT91_ADC_SHTIM		(0xf  << 24)	/* Sample & Hold Time */
 #define			AT91_ADC_SHTIM_(x)	((x) << 24)
+#define		AT91_ADC_PENDBC		(0x0f << 28)	/* Pen Debounce time */
+#define			AT91_ADC_PENDBC_(x)	((x) << 28)
+
+#define AT91_ADC_TSR		0x0C
+#define		AT91_ADC_TSR_SHTIM	(0xf  << 24)	/* Sample & Hold Time */
+#define			AT91_ADC_TSR_SHTIM_(x)	((x) << 24)
 
 #define AT91_ADC_CHER		0x10		/* Channel Enable Register */
 #define AT91_ADC_CHDR		0x14		/* Channel Disable Register */
@@ -60,6 +70,8 @@
 #define AT91_ADC_IER		0x24		/* Interrupt Enable Register */
 #define AT91_ADC_IDR		0x28		/* Interrupt Disable Register */
 #define AT91_ADC_IMR		0x2C		/* Interrupt Mask Register */
+#define		AT91RL_ADC_IER_PEN	(1 << 20)
+#define		AT91RL_ADC_IER_NOPEN	(1 << 21)
 #define		AT91_ADC_IER_PEN	(1 << 29)
 #define		AT91_ADC_IER_NOPEN	(1 << 30)
 #define		AT91_ADC_IER_XRDY	(1 << 20)
@@ -102,6 +114,7 @@
 #define		AT91_ADC_TRGR_TRGPER	(0xffff << 16)
 #define			AT91_ADC_TRGR_TRGPER_(x)	((x) << 16)
 #define		AT91_ADC_TRGR_TRGMOD	(0x7 << 0)
+#define			AT91_ADC_TRGR_NONE		(0 << 0)
 #define			AT91_ADC_TRGR_MOD_PERIOD_TRIG	(5 << 0)
 
 #endif
diff --git a/drivers/iio/adc/at91_adc.c b/drivers/iio/adc/at91_adc.c
index 1beae65aef2c..c0e4206e34e5 100644
--- a/drivers/iio/adc/at91_adc.c
+++ b/drivers/iio/adc/at91_adc.c
@@ -46,6 +46,10 @@
 #define TOUCH_SAMPLE_PERIOD_US		2000	/* 2ms */
 #define TOUCH_PEN_DETECT_DEBOUNCE_US	200
 
+#define MAX_RLPOS_BITS         10
+#define TOUCH_SAMPLE_PERIOD_US_RL      10000   /* 10ms, the SoC can't keep up with 2ms */
+#define TOUCH_SHTIM                    0xa
+
 /**
  * struct at91_adc_reg_desc - Various informations relative to registers
  * @channel_base:	Base offset for the channel data registers
@@ -83,12 +87,6 @@ struct at91_adc_caps {
 	struct at91_adc_reg_desc registers;
 };
 
-enum atmel_adc_ts_type {
-	ATMEL_ADC_TOUCHSCREEN_NONE = 0,
-	ATMEL_ADC_TOUCHSCREEN_4WIRE = 4,
-	ATMEL_ADC_TOUCHSCREEN_5WIRE = 5,
-};
-
 struct at91_adc_state {
 	struct clk		*adc_clk;
 	u16			*buffer;
@@ -133,6 +131,11 @@ struct at91_adc_state {
 
 	u16			ts_sample_period_val;
 	u32			ts_pressure_threshold;
+	u16			ts_pendbc;
+
+	bool			ts_bufferedmeasure;
+	u32			ts_prev_absx;
+	u32			ts_prev_absy;
 };
 
 static irqreturn_t at91_adc_trigger_handler(int irq, void *p)
@@ -239,7 +242,72 @@ static int at91_ts_sample(struct at91_adc_state *st)
 	return 0;
 }
 
-static irqreturn_t at91_adc_interrupt(int irq, void *private)
+static irqreturn_t at91_adc_rl_interrupt(int irq, void *private)
+{
+	struct iio_dev *idev = private;
+	struct at91_adc_state *st = iio_priv(idev);
+	u32 status = at91_adc_readl(st, st->registers->status_register);
+	unsigned int reg;
+
+	status &= at91_adc_readl(st, AT91_ADC_IMR);
+	if (status & st->registers->drdy_mask)
+		handle_adc_eoc_trigger(irq, idev);
+
+	if (status & AT91RL_ADC_IER_PEN) {
+		/* Disabling pen debounce is required to get a NOPEN irq */
+		reg = at91_adc_readl(st, AT91_ADC_MR);
+		reg &= ~AT91_ADC_PENDBC;
+		at91_adc_writel(st, AT91_ADC_MR, reg);
+
+		at91_adc_writel(st, AT91_ADC_IDR, AT91RL_ADC_IER_PEN);
+		at91_adc_writel(st, AT91_ADC_IER, AT91RL_ADC_IER_NOPEN
+				| AT91_ADC_EOC(3));
+		/* Set up period trigger for sampling */
+		at91_adc_writel(st, st->registers->trigger_register,
+			AT91_ADC_TRGR_MOD_PERIOD_TRIG |
+			AT91_ADC_TRGR_TRGPER_(st->ts_sample_period_val));
+	} else if (status & AT91RL_ADC_IER_NOPEN) {
+		reg = at91_adc_readl(st, AT91_ADC_MR);
+		reg |= AT91_ADC_PENDBC_(st->ts_pendbc) & AT91_ADC_PENDBC;
+		at91_adc_writel(st, AT91_ADC_MR, reg);
+		at91_adc_writel(st, st->registers->trigger_register,
+			AT91_ADC_TRGR_NONE);
+
+		at91_adc_writel(st, AT91_ADC_IDR, AT91RL_ADC_IER_NOPEN
+				| AT91_ADC_EOC(3));
+		at91_adc_writel(st, AT91_ADC_IER, AT91RL_ADC_IER_PEN);
+		st->ts_bufferedmeasure = false;
+		input_report_key(st->ts_input, BTN_TOUCH, 0);
+		input_sync(st->ts_input);
+	} else if (status & AT91_ADC_EOC(3)) {
+		/* Conversion finished */
+		if (st->ts_bufferedmeasure) {
+			/*
+			 * Last measurement is always discarded, since it can
+			 * be erroneous.
+			 * Always report previous measurement
+			 */
+			input_report_abs(st->ts_input, ABS_X, st->ts_prev_absx);
+			input_report_abs(st->ts_input, ABS_Y, st->ts_prev_absy);
+			input_report_key(st->ts_input, BTN_TOUCH, 1);
+			input_sync(st->ts_input);
+		} else
+			st->ts_bufferedmeasure = true;
+
+		/* Now make new measurement */
+		st->ts_prev_absx = at91_adc_readl(st, AT91_ADC_CHAN(st, 3))
+				   << MAX_RLPOS_BITS;
+		st->ts_prev_absx /= at91_adc_readl(st, AT91_ADC_CHAN(st, 2));
+
+		st->ts_prev_absy = at91_adc_readl(st, AT91_ADC_CHAN(st, 1))
+				   << MAX_RLPOS_BITS;
+		st->ts_prev_absy /= at91_adc_readl(st, AT91_ADC_CHAN(st, 0));
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t at91_adc_9x5_interrupt(int irq, void *private)
 {
 	struct iio_dev *idev = private;
 	struct at91_adc_state *st = iio_priv(idev);
@@ -672,6 +740,8 @@ static int at91_adc_probe_dt_ts(struct device_node *node,
 		return -EINVAL;
 	}
 
+	if (!st->caps->has_tsmr)
+		return 0;
 	prop = 0;
 	of_property_read_u32(node, "atmel,adc-ts-pressure-threshold", &prop);
 	st->ts_pressure_threshold = prop;
@@ -795,6 +865,7 @@ static int at91_adc_probe_pdata(struct at91_adc_state *st,
 	st->trigger_number = pdata->trigger_number;
 	st->trigger_list = pdata->trigger_list;
 	st->registers = &st->caps->registers;
+	st->touchscreen_type = pdata->touchscreen_type;
 
 	return 0;
 }
@@ -809,7 +880,10 @@ static int atmel_ts_open(struct input_dev *dev)
 {
 	struct at91_adc_state *st = input_get_drvdata(dev);
 
-	at91_adc_writel(st, AT91_ADC_IER, AT91_ADC_IER_PEN);
+	if (st->caps->has_tsmr)
+		at91_adc_writel(st, AT91_ADC_IER, AT91_ADC_IER_PEN);
+	else
+		at91_adc_writel(st, AT91_ADC_IER, AT91RL_ADC_IER_PEN);
 	return 0;
 }
 
@@ -817,45 +891,61 @@ static void atmel_ts_close(struct input_dev *dev)
 {
 	struct at91_adc_state *st = input_get_drvdata(dev);
 
-	at91_adc_writel(st, AT91_ADC_IDR, AT91_ADC_IER_PEN);
+	if (st->caps->has_tsmr)
+		at91_adc_writel(st, AT91_ADC_IDR, AT91_ADC_IER_PEN);
+	else
+		at91_adc_writel(st, AT91_ADC_IDR, AT91RL_ADC_IER_PEN);
 }
 
 static int at91_ts_hw_init(struct at91_adc_state *st, u32 adc_clk_khz)
 {
-	u32 reg = 0, pendbc;
+	u32 reg = 0;
 	int i = 0;
 
-	if (st->touchscreen_type == ATMEL_ADC_TOUCHSCREEN_4WIRE)
-		reg = AT91_ADC_TSMR_TSMODE_4WIRE_PRESS;
-	else
-		reg = AT91_ADC_TSMR_TSMODE_5WIRE;
-
 	/* a Pen Detect Debounce Time is necessary for the ADC Touch to avoid
 	 * pen detect noise.
 	 * The formula is : Pen Detect Debounce Time = (2 ^ pendbc) / ADCClock
 	 */
-	pendbc = round_up(TOUCH_PEN_DETECT_DEBOUNCE_US * adc_clk_khz / 1000, 1);
+	st->ts_pendbc = round_up(TOUCH_PEN_DETECT_DEBOUNCE_US * adc_clk_khz /
+				 1000, 1);
 
-	while (pendbc >> ++i)
+	while (st->ts_pendbc >> ++i)
 		;	/* Empty! Find the shift offset */
-	if (abs(pendbc - (1 << i)) < abs(pendbc - (1 << (i - 1))))
-		pendbc = i;
+	if (abs(st->ts_pendbc - (1 << i)) < abs(st->ts_pendbc - (1 << (i - 1))))
+		st->ts_pendbc = i;
 	else
-		pendbc = i - 1;
+		st->ts_pendbc = i - 1;
 
-	if (st->caps->has_tsmr) {
-		reg |= AT91_ADC_TSMR_TSAV_(st->caps->ts_filter_average)
-				& AT91_ADC_TSMR_TSAV;
-		reg |= AT91_ADC_TSMR_PENDBC_(pendbc) & AT91_ADC_TSMR_PENDBC;
-		reg |= AT91_ADC_TSMR_NOTSDMA;
-		reg |= AT91_ADC_TSMR_PENDET_ENA;
-		reg |= 0x03 << 8;	/* TSFREQ, need bigger than TSAV */
-
-		at91_adc_writel(st, AT91_ADC_TSMR, reg);
-	} else {
-		/* TODO: for 9g45 which has no TSMR */
+	if (!st->caps->has_tsmr) {
+		reg = at91_adc_readl(st, AT91_ADC_MR);
+		reg |= AT91_ADC_TSAMOD_TS_ONLY_MODE | AT91_ADC_PENDET;
+
+		reg |= AT91_ADC_PENDBC_(st->ts_pendbc) & AT91_ADC_PENDBC;
+		at91_adc_writel(st, AT91_ADC_MR, reg);
+
+		reg = AT91_ADC_TSR_SHTIM_(TOUCH_SHTIM) & AT91_ADC_TSR_SHTIM;
+		at91_adc_writel(st, AT91_ADC_TSR, reg);
+
+		st->ts_sample_period_val = round_up((TOUCH_SAMPLE_PERIOD_US_RL *
+						    adc_clk_khz / 1000) - 1, 1);
+
+		return 0;
 	}
 
+	if (st->touchscreen_type == ATMEL_ADC_TOUCHSCREEN_4WIRE)
+		reg = AT91_ADC_TSMR_TSMODE_4WIRE_PRESS;
+	else
+		reg = AT91_ADC_TSMR_TSMODE_5WIRE;
+
+	reg |= AT91_ADC_TSMR_TSAV_(st->caps->ts_filter_average)
+	       & AT91_ADC_TSMR_TSAV;
+	reg |= AT91_ADC_TSMR_PENDBC_(st->ts_pendbc) & AT91_ADC_TSMR_PENDBC;
+	reg |= AT91_ADC_TSMR_NOTSDMA;
+	reg |= AT91_ADC_TSMR_PENDET_ENA;
+	reg |= 0x03 << 8;	/* TSFREQ, needs to be bigger than TSAV */
+
+	at91_adc_writel(st, AT91_ADC_TSMR, reg);
+
 	/* Change adc internal resistor value for better pen detection,
 	 * default value is 100 kOhm.
 	 * 0 = 200 kOhm, 1 = 150 kOhm, 2 = 100 kOhm, 3 = 50 kOhm
@@ -864,7 +954,7 @@ static int at91_ts_hw_init(struct at91_adc_state *st, u32 adc_clk_khz)
 	at91_adc_writel(st, AT91_ADC_ACR, st->caps->ts_pen_detect_sensitivity
 			& AT91_ADC_ACR_PENDETSENS);
 
-	/* Sample Peroid Time = (TRGPER + 1) / ADCClock */
+	/* Sample Period Time = (TRGPER + 1) / ADCClock */
 	st->ts_sample_period_val = round_up((TOUCH_SAMPLE_PERIOD_US *
 			adc_clk_khz / 1000) - 1, 1);
 
@@ -893,17 +983,37 @@ static int at91_ts_register(struct at91_adc_state *st,
 	__set_bit(EV_ABS, input->evbit);
 	__set_bit(EV_KEY, input->evbit);
 	__set_bit(BTN_TOUCH, input->keybit);
-	input_set_abs_params(input, ABS_X, 0, (1 << MAX_POS_BITS) - 1, 0, 0);
-	input_set_abs_params(input, ABS_Y, 0, (1 << MAX_POS_BITS) - 1, 0, 0);
-	input_set_abs_params(input, ABS_PRESSURE, 0, 0xffffff, 0, 0);
+	if (st->caps->has_tsmr) {
+		input_set_abs_params(input, ABS_X, 0, (1 << MAX_POS_BITS) - 1,
+				     0, 0);
+		input_set_abs_params(input, ABS_Y, 0, (1 << MAX_POS_BITS) - 1,
+				     0, 0);
+		input_set_abs_params(input, ABS_PRESSURE, 0, 0xffffff, 0, 0);
+	} else {
+		if (st->touchscreen_type != ATMEL_ADC_TOUCHSCREEN_4WIRE) {
+			dev_err(&pdev->dev,
+				"This touchscreen controller only support 4 wires\n");
+			ret = -EINVAL;
+			goto err;
+		}
+
+		input_set_abs_params(input, ABS_X, 0, (1 << MAX_RLPOS_BITS) - 1,
+				     0, 0);
+		input_set_abs_params(input, ABS_Y, 0, (1 << MAX_RLPOS_BITS) - 1,
+				     0, 0);
+	}
 
 	st->ts_input = input;
 	input_set_drvdata(input, st);
 
 	ret = input_register_device(input);
 	if (ret)
-		input_free_device(st->ts_input);
+		goto err;
+
+	return ret;
 
+err:
+	input_free_device(st->ts_input);
 	return ret;
 }
 
@@ -962,11 +1072,13 @@ static int at91_adc_probe(struct platform_device *pdev)
 	 */
 	at91_adc_writel(st, AT91_ADC_CR, AT91_ADC_SWRST);
 	at91_adc_writel(st, AT91_ADC_IDR, 0xFFFFFFFF);
-	ret = request_irq(st->irq,
-			  at91_adc_interrupt,
-			  0,
-			  pdev->dev.driver->name,
-			  idev);
+
+	if (st->caps->has_tsmr)
+		ret = request_irq(st->irq, at91_adc_9x5_interrupt, 0,
+				  pdev->dev.driver->name, idev);
+	else
+		ret = request_irq(st->irq, at91_adc_rl_interrupt, 0,
+				  pdev->dev.driver->name, idev);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to allocate IRQ.\n");
 		return ret;
@@ -1070,12 +1182,6 @@ static int at91_adc_probe(struct platform_device *pdev)
 			goto error_disable_adc_clk;
 		}
 	} else {
-		if (!st->caps->has_tsmr) {
-			dev_err(&pdev->dev, "We don't support non-TSMR adc\n");
-			ret = -ENODEV;
-			goto error_disable_adc_clk;
-		}
-
 		ret = at91_ts_register(st, pdev);
 		if (ret)
 			goto error_disable_adc_clk;
diff --git a/include/linux/platform_data/at91_adc.h b/include/linux/platform_data/at91_adc.h
index fcf73879dbfe..7819fc787731 100644
--- a/include/linux/platform_data/at91_adc.h
+++ b/include/linux/platform_data/at91_adc.h
@@ -7,6 +7,12 @@
 #ifndef _AT91_ADC_H_
 #define _AT91_ADC_H_
 
+enum atmel_adc_ts_type {
+	ATMEL_ADC_TOUCHSCREEN_NONE = 0,
+	ATMEL_ADC_TOUCHSCREEN_4WIRE = 4,
+	ATMEL_ADC_TOUCHSCREEN_5WIRE = 5,
+};
+
 /**
  * struct at91_adc_trigger - description of triggers
  * @name:		name of the trigger advertised to the user
@@ -28,6 +34,7 @@ struct at91_adc_trigger {
  * @trigger_number:		Number of triggers available in the ADC
  * @use_external_triggers:	does the board has external triggers availables
  * @vref:			Reference voltage for the ADC in millivolts
+ * @touchscreen_type:		If a touchscreen is connected, its type (4 or 5 wires)
  */
 struct at91_adc_data {
 	unsigned long			channels_used;
@@ -36,6 +43,7 @@ struct at91_adc_data {
 	u8				trigger_number;
 	bool				use_external_triggers;
 	u16				vref;
+	enum atmel_adc_ts_type		touchscreen_type;
 };
 
 extern void __init at91_add_device_adc(struct at91_adc_data *data);
-- 
cgit 


From 03a3f53b965aaf1eb4f9423c1a55b41b3b4895b2 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Tue, 15 Apr 2014 12:28:09 +0200
Subject: ARM: at91: remove atmel_tsadcc platform_data

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 arch/arm/mach-at91/board.h          | 3 ---
 include/linux/platform_data/atmel.h | 7 -------
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-at91/board.h b/arch/arm/mach-at91/board.h
index 6c08b341167d..4e773b55bc2d 100644
--- a/arch/arm/mach-at91/board.h
+++ b/arch/arm/mach-at91/board.h
@@ -118,9 +118,6 @@ struct isi_platform_data;
 extern void __init at91_add_device_isi(struct isi_platform_data *data,
 		bool use_pck_as_mck);
 
- /* Touchscreen Controller */
-extern void __init at91_add_device_tsadcc(struct at91_tsadcc_data *data);
-
 /* CAN */
 extern void __init at91_add_device_can(struct at91_can_data *data);
 
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index e26b0c14edea..1466443797d7 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -87,13 +87,6 @@ struct atmel_uart_data {
 	int			rts_gpio;	/* optional RTS GPIO */
 };
 
- /* Touchscreen Controller */
-struct at91_tsadcc_data {
-	unsigned int    adc_clock;
-	u8		pendet_debounce;
-	u8		ts_sample_hold_time;
-};
-
 /* CAN */
 struct at91_can_data {
 	void (*transceiver_switch)(int on);
-- 
cgit 


From c1e756bfcbcac838a86a23f3e4501b556a961e3c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 5 May 2014 15:00:44 +0200
Subject: Revert "net: core: introduce netif_skb_dev_features"

This reverts commit d206940319c41df4299db75ed56142177bb2e5f6,
there are no more callers.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +------
 net/core/dev.c            | 22 ++++++++++------------
 2 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ed3a3aa6604..20e99efb1ca6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3180,12 +3180,7 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
-					 const struct net_device *dev);
-static inline netdev_features_t netif_skb_features(struct sk_buff *skb)
-{
-	return netif_skb_dev_features(skb, skb->dev);
-}
+netdev_features_t netif_skb_features(struct sk_buff *skb);
 
 static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index d2c8a06b3a98..c619b8641337 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2418,7 +2418,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
  * 2. No high memory really exists on this machine.
  */
 
-static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
@@ -2493,38 +2493,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 }
 
 static netdev_features_t harmonize_features(struct sk_buff *skb,
-					    const struct net_device *dev,
-					    netdev_features_t features)
+	netdev_features_t features)
 {
 	int tmp;
 
 	if (skb->ip_summed != CHECKSUM_NONE &&
 	    !can_checksum_protocol(features, skb_network_protocol(skb, &tmp))) {
 		features &= ~NETIF_F_ALL_CSUM;
-	} else if (illegal_highdma(dev, skb)) {
+	} else if (illegal_highdma(skb->dev, skb)) {
 		features &= ~NETIF_F_SG;
 	}
 
 	return features;
 }
 
-netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
-					 const struct net_device *dev)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
-	netdev_features_t features = dev->features;
+	netdev_features_t features = skb->dev->features;
 
-	if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
+	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
-		return harmonize_features(skb, dev, features);
+		return harmonize_features(skb, features);
 	}
 
-	features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
 					       NETIF_F_HW_VLAN_STAG_TX);
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2532,9 +2530,9 @@ netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
 				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_STAG_TX;
 
-	return harmonize_features(skb, dev, features);
+	return harmonize_features(skb, features);
 }
-EXPORT_SYMBOL(netif_skb_dev_features);
+EXPORT_SYMBOL(netif_skb_features);
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
-- 
cgit 


From 552a515707a5caf9fa9f3620d68fc28146c6b1b9 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Wed, 7 May 2014 09:28:31 +0200
Subject: ath9k: Allow platform override without EEPROM override

Add a new platform data flag "use_eeprom" that indicates that the eeprom
found on the card itself should be used instead of the one present in
the platform data.

This allows to override the MAC address of a PCI card while preserving
the eeprom data from the card itself.

The default behavior is preserved.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/init.c | 2 +-
 include/linux/ath9k_platform.h        | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index a6e273a2c44b..bcc7cfb1866d 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -508,7 +508,7 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 	sc->tx99_power = MAX_RATE_POWER + 1;
 	init_waitqueue_head(&sc->tx_wait);
 
-	if (!pdata) {
+	if (!pdata || pdata->use_eeprom) {
 		ah->ah_flags |= AH_USE_EEPROM;
 		sc->sc_ah->led_pin = -1;
 	} else {
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 8598f8eacb20..a495a959e8a7 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -36,6 +36,8 @@ struct ath9k_platform_data {
 
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
+
+	bool use_eeprom;
 };
 
 #endif /* _LINUX_ATH9K_PLATFORM_H */
-- 
cgit 


From 23a456f05353035d1a2b3f1b9a92707acdc036e0 Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Tue, 6 May 2014 18:52:16 +0200
Subject: net: mdio: of_mdiobus_register(): fall back to mdiobus_register() for
 !CONFIG_OF

If CONFIG_OF is not set, make of_mdiobus_register() call
mdiobus_register() instead of returning -ENOSYS.

This way, we can just call of_mdiobus_register() from all DT-enabled
drivers to handle the compat cases.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_mdio.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 6fe8464ed767..881a7c3571f4 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -31,7 +31,12 @@ extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 {
-	return -ENOSYS;
+	/*
+	 * Fall back to the non-DT function to register a bus.
+	 * This way, we don't have to keep compat bits around in drivers.
+	 */
+
+	return mdiobus_register(mdio);
 }
 
 static inline struct phy_device *of_phy_find_device(struct device_node *phy_np)
-- 
cgit 


From db6d8cc00773d8ef5a8b421b42a5ded235307b10 Mon Sep 17 00:00:00 2001
From: Alex Hung <alex.hung@canonical.com>
Date: Wed, 26 Mar 2014 02:27:02 -0700
Subject: dell-led: add mic mute led interface

This patch provides similar led functional of

  420f973 thinkpad-acpi: Add mute and mic-mute LED functionality

Signed-off-by: Alex Hung <alex.hung@canonical.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/dell-led.c  | 171 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/dell-led.h |  10 +++
 2 files changed, 174 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/dell-led.h

(limited to 'include/linux')

diff --git a/drivers/leds/dell-led.c b/drivers/leds/dell-led.c
index e5c57389efd6..c36acaf566a6 100644
--- a/drivers/leds/dell-led.c
+++ b/drivers/leds/dell-led.c
@@ -15,12 +15,15 @@
 #include <linux/leds.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/dmi.h>
+#include <linux/dell-led.h>
 
 MODULE_AUTHOR("Louis Davis/Jim Dailey");
 MODULE_DESCRIPTION("Dell LED Control Driver");
 MODULE_LICENSE("GPL");
 
 #define DELL_LED_BIOS_GUID "F6E4FE6E-909D-47cb-8BAB-C9F6F2F8D396"
+#define DELL_APP_GUID "A80593CE-A997-11DA-B012-B622A1EF5492"
 MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 
 /* Error Result Codes: */
@@ -39,6 +42,149 @@ MODULE_ALIAS("wmi:" DELL_LED_BIOS_GUID);
 #define CMD_LED_OFF	17
 #define CMD_LED_BLINK	18
 
+struct app_wmi_args {
+	u16 class;
+	u16 selector;
+	u32 arg1;
+	u32 arg2;
+	u32 arg3;
+	u32 arg4;
+	u32 res1;
+	u32 res2;
+	u32 res3;
+	u32 res4;
+	char dummy[92];
+};
+
+#define GLOBAL_MIC_MUTE_ENABLE	0x364
+#define GLOBAL_MIC_MUTE_DISABLE	0x365
+
+struct dell_bios_data_token {
+	u16 tokenid;
+	u16 location;
+	u16 value;
+};
+
+struct __attribute__ ((__packed__)) dell_bios_calling_interface {
+	struct	dmi_header header;
+	u16	cmd_io_addr;
+	u8	cmd_io_code;
+	u32	supported_cmds;
+	struct	dell_bios_data_token damap[];
+};
+
+static struct dell_bios_data_token dell_mic_tokens[2];
+
+static int dell_wmi_perform_query(struct app_wmi_args *args)
+{
+	struct app_wmi_args *bios_return;
+	union acpi_object *obj;
+	struct acpi_buffer input;
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	acpi_status status;
+	u32 rc = -EINVAL;
+
+	input.length = 128;
+	input.pointer = args;
+
+	status = wmi_evaluate_method(DELL_APP_GUID, 0, 1, &input, &output);
+	if (!ACPI_SUCCESS(status))
+		goto err_out0;
+
+	obj = output.pointer;
+	if (!obj)
+		goto err_out0;
+
+	if (obj->type != ACPI_TYPE_BUFFER)
+		goto err_out1;
+
+	bios_return = (struct app_wmi_args *)obj->buffer.pointer;
+	rc = bios_return->res1;
+	if (rc)
+		goto err_out1;
+
+	memcpy(args, bios_return, sizeof(struct app_wmi_args));
+	rc = 0;
+
+ err_out1:
+	kfree(obj);
+ err_out0:
+	return rc;
+}
+
+static void __init find_micmute_tokens(const struct dmi_header *dm, void *dummy)
+{
+	struct dell_bios_calling_interface *calling_interface;
+	struct dell_bios_data_token *token;
+	int token_size = sizeof(struct dell_bios_data_token);
+	int i = 0;
+
+	if (dm->type == 0xda && dm->length > 17) {
+		calling_interface = container_of(dm,
+				struct dell_bios_calling_interface, header);
+
+		token = &calling_interface->damap[i];
+		while (token->tokenid != 0xffff) {
+			if (token->tokenid == GLOBAL_MIC_MUTE_DISABLE)
+				memcpy(&dell_mic_tokens[0], token, token_size);
+			else if (token->tokenid == GLOBAL_MIC_MUTE_ENABLE)
+				memcpy(&dell_mic_tokens[1], token, token_size);
+
+			i++;
+			token = &calling_interface->damap[i];
+		}
+	}
+}
+
+static int dell_micmute_led_set(int state)
+{
+	struct app_wmi_args args;
+	struct dell_bios_data_token *token;
+
+	if (!wmi_has_guid(DELL_APP_GUID))
+		return -ENODEV;
+
+	if (state == 0 || state == 1)
+		token = &dell_mic_tokens[state];
+	else
+		return -EINVAL;
+
+	memset(&args, 0, sizeof(struct app_wmi_args));
+
+	args.class = 1;
+	args.arg1 = token->location;
+	args.arg2 = token->value;
+
+	dell_wmi_perform_query(&args);
+
+	return state;
+}
+
+int dell_app_wmi_led_set(int whichled, int on)
+{
+	int state = 0;
+
+	switch (whichled) {
+	case DELL_LED_MICMUTE:
+		state = dell_micmute_led_set(on);
+		break;
+	default:
+		pr_warn("led type %x is not supported\n", whichled);
+		break;
+	}
+
+	return state;
+}
+EXPORT_SYMBOL_GPL(dell_app_wmi_led_set);
+
+static int __init dell_micmute_led_init(void)
+{
+	memset(dell_mic_tokens, 0, sizeof(struct dell_bios_data_token) * 2);
+	dmi_walk(find_micmute_tokens, NULL);
+
+	return 0;
+}
+
 struct bios_args {
 	unsigned char length;
 	unsigned char result_code;
@@ -181,21 +327,32 @@ static int __init dell_led_init(void)
 {
 	int error = 0;
 
-	if (!wmi_has_guid(DELL_LED_BIOS_GUID))
+	if (!wmi_has_guid(DELL_LED_BIOS_GUID) && !wmi_has_guid(DELL_APP_GUID))
 		return -ENODEV;
 
-	error = led_off();
-	if (error != 0)
-		return -ENODEV;
+	if (wmi_has_guid(DELL_APP_GUID))
+		error = dell_micmute_led_init();
 
-	return led_classdev_register(NULL, &dell_led);
+	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
+		error = led_off();
+		if (error != 0)
+			return -ENODEV;
+
+		error = led_classdev_register(NULL, &dell_led);
+	}
+
+	return error;
 }
 
 static void __exit dell_led_exit(void)
 {
-	led_classdev_unregister(&dell_led);
+	int error = 0;
 
-	led_off();
+	if (wmi_has_guid(DELL_LED_BIOS_GUID)) {
+		error = led_off();
+		if (error == 0)
+			led_classdev_unregister(&dell_led);
+	}
 }
 
 module_init(dell_led_init);
diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h
new file mode 100644
index 000000000000..7009b8bec77b
--- /dev/null
+++ b/include/linux/dell-led.h
@@ -0,0 +1,10 @@
+#ifndef __DELL_LED_H__
+#define __DELL_LED_H__
+
+enum {
+	DELL_LED_MICMUTE,
+};
+
+int dell_app_wmi_led_set(int whichled, int on);
+
+#endif
-- 
cgit 


From 2ce112f1e788a695630e2c275f47d57a801673d3 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 5 Apr 2014 20:16:34 -0700
Subject: leds: pca9685: Remove leds-pca9685 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver is replaced by pwm-pca9685 driver and there is no user uses this
driver in current tree. So remove it.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Steffen Trumtrar <s.trumtrar@pengutronix.de>
Acked-by: Maximilian Güntner <maximilian.guentner@gmail.com>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/Kconfig                       |  10 --
 drivers/leds/Makefile                      |   1 -
 drivers/leds/leds-pca9685.c                | 213 -----------------------------
 include/linux/platform_data/leds-pca9685.h |  35 -----
 4 files changed, 259 deletions(-)
 delete mode 100644 drivers/leds/leds-pca9685.c
 delete mode 100644 include/linux/platform_data/leds-pca9685.h

(limited to 'include/linux')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 6de9dfbf61c1..6713dbb6bfda 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -300,16 +300,6 @@ config LEDS_PCA963X
 	  LED driver chip accessed via the I2C bus. Supported
 	  devices include PCA9633 and PCA9634
 
-config LEDS_PCA9685
-	tristate "LED support for PCA9685 I2C chip"
-	depends on LEDS_CLASS
-	depends on I2C
-	help
-	  This option enables support for LEDs connected to the PCA9685
-	  LED driver chip accessed via the I2C bus.
-	  The PCA9685 offers 12-bit PWM (4095 levels of brightness) on
-	  16 individual channels.
-
 config LEDS_WM831X_STATUS
 	tristate "LED support for status LEDs on WM831x PMICs"
 	depends on LEDS_CLASS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 3cd76dbd9be2..8979b0b2c85e 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_LEDS_OT200)		+= leds-ot200.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_PCA963X)		+= leds-pca963x.o
-obj-$(CONFIG_LEDS_PCA9685)		+= leds-pca9685.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
 obj-$(CONFIG_LEDS_DA9052)		+= leds-da9052.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
diff --git a/drivers/leds/leds-pca9685.c b/drivers/leds/leds-pca9685.c
deleted file mode 100644
index 6e1ef3a9d6ef..000000000000
--- a/drivers/leds/leds-pca9685.c
+++ /dev/null
@@ -1,213 +0,0 @@
-/*
- * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * Based on leds-pca963x.c driver by
- * Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * Driver for the NXP PCA9685 12-Bit PWM LED driver chip.
- *
- */
-
-#include <linux/ctype.h>
-#include <linux/delay.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/leds.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/workqueue.h>
-
-#include <linux/platform_data/leds-pca9685.h>
-
-/* Register Addresses */
-#define PCA9685_MODE1 0x00
-#define PCA9685_MODE2 0x01
-#define PCA9685_LED0_ON_L 0x06
-#define PCA9685_ALL_LED_ON_L 0xFA
-
-/* MODE1 Register */
-#define PCA9685_ALLCALL 0x00
-#define PCA9685_SLEEP   0x04
-#define PCA9685_AI      0x05
-
-/* MODE2 Register */
-#define PCA9685_INVRT   0x04
-#define PCA9685_OUTDRV  0x02
-
-static const struct i2c_device_id pca9685_id[] = {
-	{ "pca9685", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, pca9685_id);
-
-struct pca9685_led {
-	struct i2c_client *client;
-	struct work_struct work;
-	u16 brightness;
-	struct led_classdev led_cdev;
-	int led_num; /* 0-15 */
-	char name[32];
-};
-
-static void pca9685_write_msg(struct i2c_client *client, u8 *buf, u8 len)
-{
-	struct i2c_msg msg = {
-		.addr = client->addr,
-		.flags = 0x00,
-		.len = len,
-		.buf = buf
-	};
-	i2c_transfer(client->adapter, &msg, 1);
-}
-
-static void pca9685_all_off(struct i2c_client *client)
-{
-	u8 i2c_buffer[5] = {PCA9685_ALL_LED_ON_L, 0x00, 0x00, 0x00, 0x10};
-	pca9685_write_msg(client, i2c_buffer, 5);
-}
-
-static void pca9685_led_work(struct work_struct *work)
-{
-	struct pca9685_led *pca9685;
-	u8 i2c_buffer[5];
-
-	pca9685 = container_of(work, struct pca9685_led, work);
-	i2c_buffer[0] = PCA9685_LED0_ON_L + 4 * pca9685->led_num;
-	/*
-	 * 4095 is the maximum brightness, so we set the ON time to 0x1000
-	 * which disables the PWM generator for that LED
-	 */
-	if (pca9685->brightness == 4095)
-		*((__le16 *)(i2c_buffer+1)) = cpu_to_le16(0x1000);
-	else
-		*((__le16 *)(i2c_buffer+1)) = 0x0000;
-
-	if (pca9685->brightness == 0)
-		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(0x1000);
-	else if (pca9685->brightness == 4095)
-		*((__le16 *)(i2c_buffer+3)) = 0x0000;
-	else
-		*((__le16 *)(i2c_buffer+3)) = cpu_to_le16(pca9685->brightness);
-
-	pca9685_write_msg(pca9685->client, i2c_buffer, 5);
-}
-
-static void pca9685_led_set(struct led_classdev *led_cdev,
-		enum led_brightness value)
-{
-	struct pca9685_led *pca9685;
-	pca9685 = container_of(led_cdev, struct pca9685_led, led_cdev);
-	pca9685->brightness = value;
-
-	schedule_work(&pca9685->work);
-}
-
-static int pca9685_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
-{
-	struct pca9685_led *pca9685;
-	struct pca9685_platform_data *pdata;
-	int err;
-	u8 i;
-
-	pdata = dev_get_platdata(&client->dev);
-	if (pdata) {
-		if (pdata->leds.num_leds < 1 || pdata->leds.num_leds > 15) {
-			dev_err(&client->dev, "board info must claim 1-16 LEDs");
-			return -EINVAL;
-		}
-	}
-
-	pca9685 = devm_kzalloc(&client->dev, 16 * sizeof(*pca9685), GFP_KERNEL);
-	if (!pca9685)
-		return -ENOMEM;
-
-	i2c_set_clientdata(client, pca9685);
-	pca9685_all_off(client);
-
-	for (i = 0; i < 16; i++) {
-		pca9685[i].client = client;
-		pca9685[i].led_num = i;
-		pca9685[i].name[0] = '\0';
-		if (pdata && i < pdata->leds.num_leds) {
-			if (pdata->leds.leds[i].name)
-				strncpy(pca9685[i].name,
-					pdata->leds.leds[i].name,
-					sizeof(pca9685[i].name)-1);
-			if (pdata->leds.leds[i].default_trigger)
-				pca9685[i].led_cdev.default_trigger =
-					pdata->leds.leds[i].default_trigger;
-		}
-		if (strlen(pca9685[i].name) == 0) {
-			/*
-			 * Write adapter and address to the name as well.
-			 * Otherwise multiple chips attached to one host would
-			 * not work.
-			 */
-			snprintf(pca9685[i].name, sizeof(pca9685[i].name),
-					"pca9685:%d:x%.2x:%d",
-					client->adapter->nr, client->addr, i);
-		}
-		pca9685[i].led_cdev.name = pca9685[i].name;
-		pca9685[i].led_cdev.max_brightness = 0xfff;
-		pca9685[i].led_cdev.brightness_set = pca9685_led_set;
-
-		INIT_WORK(&pca9685[i].work, pca9685_led_work);
-		err = led_classdev_register(&client->dev, &pca9685[i].led_cdev);
-		if (err < 0)
-			goto exit;
-	}
-
-	if (pdata)
-		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
-			pdata->outdrv << PCA9685_OUTDRV |
-			pdata->inverted << PCA9685_INVRT);
-	else
-		i2c_smbus_write_byte_data(client, PCA9685_MODE2,
-			PCA9685_TOTEM_POLE << PCA9685_OUTDRV);
-	/* Enable Auto-Increment, enable oscillator, ALLCALL/SUBADDR disabled */
-	i2c_smbus_write_byte_data(client, PCA9685_MODE1, BIT(PCA9685_AI));
-
-	return 0;
-
-exit:
-	while (i--) {
-		led_classdev_unregister(&pca9685[i].led_cdev);
-		cancel_work_sync(&pca9685[i].work);
-	}
-	return err;
-}
-
-static int pca9685_remove(struct i2c_client *client)
-{
-	struct pca9685_led *pca9685 = i2c_get_clientdata(client);
-	u8 i;
-
-	for (i = 0; i < 16; i++) {
-		led_classdev_unregister(&pca9685[i].led_cdev);
-		cancel_work_sync(&pca9685[i].work);
-	}
-	pca9685_all_off(client);
-	return 0;
-}
-
-static struct i2c_driver pca9685_driver = {
-	.driver = {
-		.name = "leds-pca9685",
-		.owner = THIS_MODULE,
-	},
-	.probe = pca9685_probe,
-	.remove = pca9685_remove,
-	.id_table = pca9685_id,
-};
-
-module_i2c_driver(pca9685_driver);
-
-MODULE_AUTHOR("Maximilian Güntner <maximilian.guentner@gmail.com>");
-MODULE_DESCRIPTION("PCA9685 LED Driver");
-MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/leds-pca9685.h b/include/linux/platform_data/leds-pca9685.h
deleted file mode 100644
index 778e9e4249cc..000000000000
--- a/include/linux/platform_data/leds-pca9685.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright 2013 Maximilian Güntner <maximilian.guentner@gmail.com>
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * Based on leds-pca963x.h by Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * LED driver for the NXP PCA9685 PWM chip
- *
- */
-
-#ifndef __LINUX_PCA9685_H
-#define __LINUX_PCA9685_H
-
-#include <linux/leds.h>
-
-enum pca9685_outdrv {
-	PCA9685_OPEN_DRAIN,
-	PCA9685_TOTEM_POLE,
-};
-
-enum pca9685_inverted {
-	PCA9685_NOT_INVERTED,
-	PCA9685_INVERTED,
-};
-
-struct pca9685_platform_data {
-	struct led_platform_data leds;
-	enum pca9685_outdrv outdrv;
-	enum pca9685_inverted inverted;
-};
-
-#endif /* __LINUX_PCA9685_H */
-- 
cgit 


From 69dd0f848879328ae6c6f54c2ec80e49eef042d8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 9 Apr 2014 14:30:10 +0200
Subject: sched/idle: Remove TS_POLLING support

Now that there are no architectures left using it, kill the support
for TS_POLLING.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/n/tip-6yurip2tfix2f4bfc5agu2s0@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 46 ++--------------------------------------------
 1 file changed, 2 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index accb66bfd722..725eef121c9f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2775,51 +2775,9 @@ static inline int spin_needbreak(spinlock_t *lock)
 
 /*
  * Idle thread specific functions to determine the need_resched
- * polling state. We have two versions, one based on TS_POLLING in
- * thread_info.status and one based on TIF_POLLING_NRFLAG in
- * thread_info.flags
+ * polling state.
  */
-#ifdef TS_POLLING
-static inline int tsk_is_polling(struct task_struct *p)
-{
-	return task_thread_info(p)->status & TS_POLLING;
-}
-static inline void __current_set_polling(void)
-{
-	current_thread_info()->status |= TS_POLLING;
-}
-
-static inline bool __must_check current_set_polling_and_test(void)
-{
-	__current_set_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_task()
-	 */
-	smp_mb();
-
-	return unlikely(tif_need_resched());
-}
-
-static inline void __current_clr_polling(void)
-{
-	current_thread_info()->status &= ~TS_POLLING;
-}
-
-static inline bool __must_check current_clr_polling_and_test(void)
-{
-	__current_clr_polling();
-
-	/*
-	 * Polling state must be visible before we test NEED_RESCHED,
-	 * paired by resched_task()
-	 */
-	smp_mb();
-
-	return unlikely(tif_need_resched());
-}
-#elif defined(TIF_POLLING_NRFLAG)
+#ifdef TIF_POLLING_NRFLAG
 static inline int tsk_is_polling(struct task_struct *p)
 {
 	return test_tsk_thread_flag(p, TIF_POLLING_NRFLAG);
-- 
cgit 


From 5eeaf1f1897372590105f155c6a7110b3fa36aef Mon Sep 17 00:00:00 2001
From: Stratos Karafotis <stratosk@semaphore.gr>
Date: Wed, 7 May 2014 19:33:33 +0300
Subject: cpufreq: Fix build error on some platforms that use
 cpufreq_for_each_*

On platforms that use cpufreq_for_each_* macros, build fails if
CONFIG_CPU_FREQ=n, e.g. ARM/shmobile/koelsch/non-multiplatform:

drivers/built-in.o: In function `clk_round_parent':
clkdev.c:(.text+0xcf168): undefined reference to `cpufreq_next_valid'
drivers/built-in.o: In function `clk_rate_table_find':
clkdev.c:(.text+0xcf820): undefined reference to `cpufreq_next_valid'
make[3]: *** [vmlinux] Error 1

Fix this making cpufreq_next_valid function inline and move it to
cpufreq.h.

Fixes: 27e289dce297 (cpufreq: Introduce macros for cpufreq_frequency_table iteration)
Reported-and-tested-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Stratos Karafotis <stratosk@semaphore.gr>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 11 -----------
 include/linux/cpufreq.h   | 11 +++++++++--
 2 files changed, 9 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index bfe82b63875f..a05c92198b9f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -237,17 +237,6 @@ void cpufreq_cpu_put(struct cpufreq_policy *policy)
 }
 EXPORT_SYMBOL_GPL(cpufreq_cpu_put);
 
-bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
-{
-	while ((*pos)->frequency != CPUFREQ_TABLE_END)
-		if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID)
-			return true;
-		else
-			(*pos)++;
-	return false;
-}
-EXPORT_SYMBOL_GPL(cpufreq_next_valid);
-
 /*********************************************************************
  *            EXTERNALLY AFFECTING FREQUENCY CHANGES                 *
  *********************************************************************/
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 9d803b529ac2..3f458896d45c 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -489,8 +489,15 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
 }
 #endif
 
-
-bool cpufreq_next_valid(struct cpufreq_frequency_table **pos);
+static inline bool cpufreq_next_valid(struct cpufreq_frequency_table **pos)
+{
+	while ((*pos)->frequency != CPUFREQ_TABLE_END)
+		if ((*pos)->frequency != CPUFREQ_ENTRY_INVALID)
+			return true;
+		else
+			(*pos)++;
+	return false;
+}
 
 /*
  * cpufreq_for_each_entry -	iterate over a cpufreq_frequency_table
-- 
cgit 


From 9c9e321455fb806108f9dbb1872bacfd42c6002b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 8 May 2014 23:16:35 +0200
Subject: mfd: stmpe: add optional regulators

The STMPE has VCC and VIO supply lines, and sometimes (as on
Ux500) this comes from a software-controlled regulator. Make
it possible to supply the STMPE with power from these
regulators.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/mfd/stmpe.c       | 18 ++++++++++++++++++
 include/linux/mfd/stmpe.h |  5 +++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 9fa2dd6d38bd..294731be1a15 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
 #include <linux/mfd/core.h>
 #include <linux/delay.h>
+#include <linux/regulator/consumer.h>
 #include "stmpe.h"
 
 static int __stmpe_enable(struct stmpe *stmpe, unsigned int blocks)
@@ -1186,6 +1187,18 @@ int stmpe_probe(struct stmpe_client_info *ci, int partnum)
 	stmpe->variant = stmpe_variant_info[partnum];
 	stmpe->regs = stmpe->variant->regs;
 	stmpe->num_gpios = stmpe->variant->num_gpios;
+	stmpe->vcc = devm_regulator_get_optional(ci->dev, "vcc");
+	if (!IS_ERR(stmpe->vcc)) {
+		ret = regulator_enable(stmpe->vcc);
+		if (ret)
+			dev_warn(ci->dev, "failed to enable VCC supply\n");
+	}
+	stmpe->vio = devm_regulator_get_optional(ci->dev, "vio");
+	if (!IS_ERR(stmpe->vio)) {
+		ret = regulator_enable(stmpe->vio);
+		if (ret)
+			dev_warn(ci->dev, "failed to enable VIO supply\n");
+	}
 	dev_set_drvdata(stmpe->dev, stmpe);
 
 	if (ci->init)
@@ -1252,6 +1265,11 @@ int stmpe_probe(struct stmpe_client_info *ci, int partnum)
 
 int stmpe_remove(struct stmpe *stmpe)
 {
+	if (!IS_ERR(stmpe->vio))
+		regulator_disable(stmpe->vio);
+	if (!IS_ERR(stmpe->vcc))
+		regulator_disable(stmpe->vcc);
+
 	mfd_remove_devices(stmpe->dev);
 
 	return 0;
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 48395a69a7e9..980898620e57 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -11,6 +11,7 @@
 #include <linux/mutex.h>
 
 struct device;
+struct regulator;
 
 enum stmpe_block {
 	STMPE_BLOCK_GPIO	= 1 << 0,
@@ -62,6 +63,8 @@ struct stmpe_client_info;
 
 /**
  * struct stmpe - STMPE MFD structure
+ * @vcc: optional VCC regulator
+ * @vio: optional VIO regulator
  * @lock: lock protecting I/O operations
  * @irq_lock: IRQ bus lock
  * @dev: device, mostly for dev_dbg()
@@ -80,6 +83,8 @@ struct stmpe_client_info;
  * @pdata: platform data
  */
 struct stmpe {
+	struct regulator *vcc;
+	struct regulator *vio;
 	struct mutex lock;
 	struct mutex irq_lock;
 	struct device *dev;
-- 
cgit 


From 81c44c2b2ce358b1c5fe0065dc5d2e2010f39f1b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 24 Apr 2014 13:28:20 +0100
Subject: video/omap: fix modular build

The framebuffer layer can be a loadable module, which forces
omapfb to be a module as well. However, this breaks the lcd
drivers, which are linked into the omapfb driver but each
have their own module_init() function. To solve this,
we split out the lcd drivers into separate modules and
export omapfb_register_panel, which is the only interface
required between the main omapfb driver and the lcd panel
drivers.

We also have to introduce a new Kconfig symbol for H3, since
that lcd driver has a dependency on TPS65010, which we can
express better in Kconfig than Makefile syntax.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Griffin <peter.griffin@linaro.org>
Cc: Jean-Christophe Plagniol-Villard <plagnioj@jcrosoft.com>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Cc: linux-fbdev@vger.kernel.org
Cc: linux-omap@vger.kernel.org
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/video/fbdev/omap/Kconfig       |  9 +++++++++
 drivers/video/fbdev/omap/Makefile      | 23 ++++++++++++-----------
 drivers/video/fbdev/omap/omapfb_main.c |  1 +
 include/linux/omap-dma.h               |  2 +-
 4 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/omap/Kconfig b/drivers/video/fbdev/omap/Kconfig
index 0bc3a936ce2b..18c4cb0d5690 100644
--- a/drivers/video/fbdev/omap/Kconfig
+++ b/drivers/video/fbdev/omap/Kconfig
@@ -39,6 +39,15 @@ config FB_OMAP_LCD_MIPID
 	  the Mobile Industry Processor Interface DBI-C/DCS
 	  specification. (Supported LCDs: Philips LPH8923, Sharp LS041Y3)
 
+config FB_OMAP_LCD_H3
+	bool "TPS65010 LCD controller on OMAP-H3"
+	depends on MACH_OMAP_H3
+	depends on TPS65010
+	default y
+	help
+	  Say Y here if you want to have support for the LCD on the
+	  H3 board.
+
 config FB_OMAP_DMA_TUNE
         bool "Set DMA SDRAM access priority high"
         depends on FB_OMAP
diff --git a/drivers/video/fbdev/omap/Makefile b/drivers/video/fbdev/omap/Makefile
index 1927faffb5bc..732e0718be53 100644
--- a/drivers/video/fbdev/omap/Makefile
+++ b/drivers/video/fbdev/omap/Makefile
@@ -10,17 +10,18 @@ objs-y$(CONFIG_FB_OMAP_LCDC_EXTERNAL) += sossi.o
 
 objs-y$(CONFIG_FB_OMAP_LCDC_HWA742) += hwa742.o
 
-objs-y$(CONFIG_MACH_AMS_DELTA) += lcd_ams_delta.o
-objs-y$(CONFIG_MACH_OMAP_H3) += lcd_h3.o
-objs-y$(CONFIG_MACH_OMAP_PALMTE) += lcd_palmte.o
-objs-y$(CONFIG_MACH_OMAP_PALMTT) += lcd_palmtt.o
-objs-y$(CONFIG_MACH_OMAP_PALMZ71) += lcd_palmz71.o
-objs-$(CONFIG_ARCH_OMAP16XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1610.o
-objs-$(CONFIG_ARCH_OMAP15XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1510.o
-objs-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
-
-objs-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
-objs-y$(CONFIG_MACH_HERALD) += lcd_htcherald.o
+lcds-y$(CONFIG_MACH_AMS_DELTA) += lcd_ams_delta.o
+lcds-y$(CONFIG_FB_OMAP_LCD_H3) += lcd_h3.o
+lcds-y$(CONFIG_MACH_OMAP_PALMTE) += lcd_palmte.o
+lcds-y$(CONFIG_MACH_OMAP_PALMTT) += lcd_palmtt.o
+lcds-y$(CONFIG_MACH_OMAP_PALMZ71) += lcd_palmz71.o
+lcds-$(CONFIG_ARCH_OMAP16XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1610.o
+lcds-$(CONFIG_ARCH_OMAP15XX)$(CONFIG_MACH_OMAP_INNOVATOR) += lcd_inn1510.o
+lcds-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
+
+lcds-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
+lcds-y$(CONFIG_MACH_HERALD) += lcd_htcherald.o
 
 omapfb-objs := $(objs-yy)
 
+obj-$(CONFIG_FB_OMAP) += $(lcds-yy)
diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index e4fc6d9b5371..d8d028d98711 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -1823,6 +1823,7 @@ void omapfb_register_panel(struct lcd_panel *panel)
 	if (fbdev_pdev != NULL)
 		omapfb_do_probe(fbdev_pdev, fbdev_panel);
 }
+EXPORT_SYMBOL_GPL(omapfb_register_panel);
 
 /* Called when the device is being detached from the driver */
 static int omapfb_remove(struct platform_device *pdev)
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 41a13e70f41f..0a1a2e2d5c21 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -393,7 +393,7 @@ extern int omap_modify_dma_chain_params(int chain_id,
 extern int omap_dma_chain_status(int chain_id);
 #endif
 
-#if defined(CONFIG_ARCH_OMAP1) && defined(CONFIG_FB_OMAP)
+#if defined(CONFIG_ARCH_OMAP1) && IS_ENABLED(CONFIG_FB_OMAP)
 #include <mach/lcd_dma.h>
 #else
 static inline int omap_lcd_dma_running(void)
-- 
cgit 


From 29a1f2333e07bbbecb920cc78fd035fe8f53207a Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 25 Apr 2014 17:10:06 +0200
Subject: gpio: Add helpers for optional GPIOs

Introduce gpiod_get_optional() and gpiod_get_index_optional() helpers
that make it easier for drivers to handle optional GPIOs.

Currently in order to handle optional GPIOs, a driver needs to special
case error handling for -ENOENT, such as this:

	gpio = gpiod_get(dev, "foo");
	if (IS_ERR(gpio)) {
		if (PTR_ERR(gpio) != -ENOENT)
			return PTR_ERR(gpio);

		gpio = NULL;
	}

	if (gpio) {
		/* set up GPIO */
	}

With these new helpers the above is reduced to:

	gpio = gpiod_get_optional(dev, "foo");
	if (IS_ERR(gpio))
		return PTR_ERR(gpio);

	if (gpio) {
		/* set up GPIO */
	}

While at it, device-managed variants of these functions are also
provided.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-model/devres.txt |  2 ++
 drivers/gpio/devres.c                 | 43 +++++++++++++++++++++++++++++++++++
 drivers/gpio/gpiolib.c                | 43 +++++++++++++++++++++++++++++++++++
 include/linux/gpio/consumer.h         | 40 ++++++++++++++++++++++++++++++++
 4 files changed, 128 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 8ff1167cfedf..10b8c5d2c797 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -312,4 +312,6 @@ SPI
 GPIO
   devm_gpiod_get()
   devm_gpiod_get_index()
+  devm_gpiod_get_optional()
+  devm_gpiod_get_index_optional()
   devm_gpiod_put()
diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c
index 307464fd015f..65978cf85f79 100644
--- a/drivers/gpio/devres.c
+++ b/drivers/gpio/devres.c
@@ -51,6 +51,22 @@ struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
 }
 EXPORT_SYMBOL(devm_gpiod_get);
 
+/**
+ * devm_gpiod_get_optional - Resource-managed gpiod_get_optional()
+ * @dev: GPIO consumer
+ * @con_id: function within the GPIO consumer
+ *
+ * Managed gpiod_get_optional(). GPIO descriptors returned from this function
+ * are automatically disposed on driver detach. See gpiod_get_optional() for
+ * detailed information about behavior and return values.
+ */
+struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev,
+						       const char *con_id)
+{
+	return devm_gpiod_get_index_optional(dev, con_id, 0);
+}
+EXPORT_SYMBOL(devm_gpiod_get_optional);
+
 /**
  * devm_gpiod_get_index - Resource-managed gpiod_get_index()
  * @dev:	GPIO consumer
@@ -86,6 +102,33 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 }
 EXPORT_SYMBOL(devm_gpiod_get_index);
 
+/**
+ * devm_gpiod_get_index_optional - Resource-managed gpiod_get_index_optional()
+ * @dev: GPIO consumer
+ * @con_id: function within the GPIO consumer
+ * @index: index of the GPIO to obtain in the consumer
+ *
+ * Managed gpiod_get_index_optional(). GPIO descriptors returned from this
+ * function are automatically disposed on driver detach. See
+ * gpiod_get_index_optional() for detailed information about behavior and
+ * return values.
+ */
+struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev,
+							     const char *con_id,
+							     unsigned int index)
+{
+	struct gpio_desc *desc;
+
+	desc = devm_gpiod_get_index(dev, con_id, index);
+	if (IS_ERR(desc)) {
+		if (PTR_ERR(desc) == -ENOENT)
+			return NULL;
+	}
+
+	return desc;
+}
+EXPORT_SYMBOL(devm_gpiod_get_index_optional);
+
 /**
  * devm_gpiod_put - Resource-managed gpiod_put()
  * @desc:	GPIO descriptor to dispose of
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4ad110e793c5..d9c9cb4665db 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2737,6 +2737,22 @@ struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id)
 }
 EXPORT_SYMBOL_GPL(gpiod_get);
 
+/**
+ * gpiod_get_optional - obtain an optional GPIO for a given GPIO function
+ * @dev: GPIO consumer, can be NULL for system-global GPIOs
+ * @con_id: function within the GPIO consumer
+ *
+ * This is equivalent to gpiod_get(), except that when no GPIO was assigned to
+ * the requested function it will return NULL. This is convenient for drivers
+ * that need to handle optional GPIOs.
+ */
+struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
+						  const char *con_id)
+{
+	return gpiod_get_index_optional(dev, con_id, 0);
+}
+EXPORT_SYMBOL_GPL(gpiod_get_optional);
+
 /**
  * gpiod_get_index - obtain a GPIO from a multi-index GPIO function
  * @dev:	GPIO consumer, can be NULL for system-global GPIOs
@@ -2799,6 +2815,33 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(gpiod_get_index);
 
+/**
+ * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO
+ *                            function
+ * @dev: GPIO consumer, can be NULL for system-global GPIOs
+ * @con_id: function within the GPIO consumer
+ * @index: index of the GPIO to obtain in the consumer
+ *
+ * This is equivalent to gpiod_get_index(), except that when no GPIO with the
+ * specified index was assigned to the requested function it will return NULL.
+ * This is convenient for drivers that need to handle optional GPIOs.
+ */
+struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
+							const char *con_id,
+							unsigned int index)
+{
+	struct gpio_desc *desc;
+
+	desc = gpiod_get_index(dev, con_id, index);
+	if (IS_ERR(desc)) {
+		if (PTR_ERR(desc) == -ENOENT)
+			return NULL;
+	}
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(gpiod_get_index_optional);
+
 /**
  * gpiod_put - dispose of a GPIO descriptor
  * @desc:	GPIO descriptor to dispose of
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index bed128e8f4b1..6a37ef0dc59c 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -23,6 +23,12 @@ struct gpio_desc *__must_check gpiod_get(struct device *dev,
 struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 					       const char *con_id,
 					       unsigned int idx);
+struct gpio_desc *__must_check gpiod_get_optional(struct device *dev,
+						  const char *con_id);
+struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev,
+							const char *con_id,
+							unsigned int index);
+
 void gpiod_put(struct gpio_desc *desc);
 
 struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
@@ -30,6 +36,12 @@ struct gpio_desc *__must_check devm_gpiod_get(struct device *dev,
 struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 						    const char *con_id,
 						    unsigned int idx);
+struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev,
+						       const char *con_id);
+struct gpio_desc *__must_check
+devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
+			      unsigned int index);
+
 void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
 
 int gpiod_get_direction(const struct gpio_desc *desc);
@@ -73,6 +85,20 @@ static inline struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 {
 	return ERR_PTR(-ENOSYS);
 }
+
+static inline struct gpio_desc *__must_check
+gpiod_get_optional(struct device *dev, const char *con_id)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct gpio_desc *__must_check
+gpiod_get_index_optional(struct device *dev, const char *con_id,
+			 unsigned int index)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline void gpiod_put(struct gpio_desc *desc)
 {
 	might_sleep();
@@ -93,6 +119,20 @@ struct gpio_desc *__must_check devm_gpiod_get_index(struct device *dev,
 {
 	return ERR_PTR(-ENOSYS);
 }
+
+static inline struct gpio_desc *__must_check
+devm_gpiod_get_optional(struct device *dev, const char *con_id)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline struct gpio_desc *__must_check
+devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
+			      unsigned int index)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
 {
 	might_sleep();
-- 
cgit 


From af76e555e5e29e08eb8ac1f7878e23dbf0d6741f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 May 2014 12:12:45 +0200
Subject: blk-mq: initialize struct request fields individually

This allows us to avoid a non-atomic memset over ->atomic_flags as well
as killing lots of duplicate initializations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/blkdev.h |  7 ++++---
 2 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3fdb097ebe5e..492f49f96459 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -82,9 +82,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
 	tag = blk_mq_get_tag(hctx->tags, gfp, reserved);
 	if (tag != BLK_MQ_TAG_FAIL) {
 		rq = hctx->tags->rqs[tag];
-		blk_rq_init(hctx->queue, rq);
 		rq->tag = tag;
-
 		return rq;
 	}
 
@@ -187,10 +185,54 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	if (blk_queue_io_stat(q))
 		rw_flags |= REQ_IO_STAT;
 
+	INIT_LIST_HEAD(&rq->queuelist);
+	/* csd/requeue_work/fifo_time is initialized before use */
+	rq->q = q;
 	rq->mq_ctx = ctx;
 	rq->cmd_flags = rw_flags;
+	rq->cmd_type = 0;
+	/* do not touch atomic flags, it needs atomic ops against the timer */
+	rq->cpu = -1;
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = NULL;
+	rq->biotail = NULL;
+	INIT_HLIST_NODE(&rq->hash);
+	RB_CLEAR_NODE(&rq->rb_node);
+	memset(&rq->flush, 0, max(sizeof(rq->flush), sizeof(rq->elv)));
+	rq->rq_disk = NULL;
+	rq->part = NULL;
 	rq->start_time = jiffies;
+#ifdef CONFIG_BLK_CGROUP
+	rq->rl = NULL;
 	set_start_time_ns(rq);
+	rq->io_start_time_ns = 0;
+#endif
+	rq->nr_phys_segments = 0;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+	rq->nr_integrity_segments = 0;
+#endif
+	rq->ioprio = 0;
+	rq->special = NULL;
+	/* tag was already set */
+	rq->errors = 0;
+	memset(rq->__cmd, 0, sizeof(rq->__cmd));
+	rq->cmd = rq->__cmd;
+	rq->cmd_len = BLK_MAX_CDB;
+
+	rq->extra_len = 0;
+	rq->sense_len = 0;
+	rq->resid_len = 0;
+	rq->sense = NULL;
+
+	rq->deadline = 0;
+	INIT_LIST_HEAD(&rq->timeout_list);
+	rq->timeout = 0;
+	rq->retries = 0;
+	rq->end_io = NULL;
+	rq->end_io_data = NULL;
+	rq->next_rq = NULL;
+
 	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
 }
 
@@ -258,6 +300,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 	const int tag = rq->tag;
 	struct request_queue *q = rq->q;
 
+	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 	blk_mq_put_tag(hctx->tags, tag);
 	blk_mq_queue_exit(q);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 20b26d4e53a2..94b27210641b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -90,9 +90,10 @@ enum rq_cmd_type_bits {
 #define BLK_MAX_CDB	16
 
 /*
- * try to put the fields that are referenced together in the same cacheline.
- * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
- * as well!
+ * Try to put the fields that are referenced together in the same cacheline.
+ *
+ * If you modify this structure, make sure to update blk_rq_init() and
+ * especially blk_mq_rq_ctx_init() to take care of the added fields.
  */
 struct request {
 	struct list_head queuelist;
-- 
cgit 


From 4bb659b156996f2993dc16fad71fec9ee070153c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 9 May 2014 09:36:49 -0600
Subject: blk-mq: implement new and more efficient tagging scheme

blk-mq currently uses percpu_ida for tag allocation. But that only
works well if the ratio between tag space and number of CPUs is
sufficiently high. For most devices and systems, that is not the
case. The end result if that we either only utilize the tag space
partially, or we end up attempting to fully exhaust it and run
into lots of lock contention with stealing between CPUs. This is
not optimal.

This new tagging scheme is a hybrid bitmap allocator. It uses
two tricks to both be SMP friendly and allow full exhaustion
of the space:

1) We cache the last allocated (or freed) tag on a per blk-mq
   software context basis. This allows us to limit the space
   we have to search. The key element here is not caching it
   in the shared tag structure, otherwise we end up dirtying
   more shared cache lines on each allocate/free operation.

2) The tag space is split into cache line sized groups, and
   each context will start off randomly in that space. Even up
   to full utilization of the space, this divides the tag users
   efficiently into cache line groups, avoiding dirtying the same
   one both between allocators and between allocator and freeer.

This scheme shows drastically better behaviour, both on small
tag spaces but on large ones as well. It has been tested extensively
to show better performance for all the cases blk-mq cares about.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c     | 415 ++++++++++++++++++++++++++++++++++++++++---------
 block/blk-mq-tag.h     |  42 ++++-
 block/blk-mq.c         |  23 ++-
 block/blk-mq.h         |   4 +-
 include/linux/blk-mq.h |   6 +-
 5 files changed, 391 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1f43d6ee956f..467f3a20b355 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -1,64 +1,257 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/random.h>
 
 #include <linux/blk-mq.h>
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
-void blk_mq_wait_for_tags(struct blk_mq_tags *tags, bool reserved)
+void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx,
+			  bool reserved)
 {
-	int tag = blk_mq_get_tag(tags, __GFP_WAIT, reserved);
-	blk_mq_put_tag(tags, tag);
+	int tag, zero = 0;
+
+	tag = blk_mq_get_tag(tags, hctx, &zero, __GFP_WAIT, reserved);
+	blk_mq_put_tag(tags, tag, &zero);
+}
+
+static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
+{
+	int i;
+
+	for (i = 0; i < bt->map_nr; i++) {
+		struct blk_mq_bitmap *bm = &bt->map[i];
+		int ret;
+
+		ret = find_first_zero_bit(&bm->word, bm->depth);
+		if (ret < bm->depth)
+			return true;
+	}
+
+	return false;
 }
 
 bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
 {
-	return !tags ||
-		percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids) != 0;
+	if (!tags)
+		return true;
+
+	return bt_has_free_tags(&tags->bitmap_tags);
+}
+
+static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag)
+{
+	int tag, org_last_tag, end;
+
+	org_last_tag = last_tag = TAG_TO_BIT(last_tag);
+	end = bm->depth;
+	do {
+restart:
+		tag = find_next_zero_bit(&bm->word, end, last_tag);
+		if (unlikely(tag >= end)) {
+			/*
+			 * We started with an offset, start from 0 to
+			 * exhaust the map.
+			 */
+			if (org_last_tag && last_tag) {
+				end = last_tag;
+				last_tag = 0;
+				goto restart;
+			}
+			return -1;
+		}
+		last_tag = tag + 1;
+	} while (test_and_set_bit_lock(tag, &bm->word));
+
+	return tag;
+}
+
+/*
+ * Straight forward bitmap tag implementation, where each bit is a tag
+ * (cleared == free, and set == busy). The small twist is using per-cpu
+ * last_tag caches, which blk-mq stores in the blk_mq_ctx software queue
+ * contexts. This enables us to drastically limit the space searched,
+ * without dirtying an extra shared cacheline like we would if we stored
+ * the cache value inside the shared blk_mq_bitmap_tags structure. On top
+ * of that, each word of tags is in a separate cacheline. This means that
+ * multiple users will tend to stick to different cachelines, at least
+ * until the map is exhausted.
+ */
+static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache)
+{
+	unsigned int last_tag, org_last_tag;
+	int index, i, tag;
+
+	last_tag = org_last_tag = *tag_cache;
+	index = TAG_TO_INDEX(last_tag);
+
+	for (i = 0; i < bt->map_nr; i++) {
+		tag = __bt_get_word(&bt->map[index], last_tag);
+		if (tag != -1) {
+			tag += index * BITS_PER_LONG;
+			goto done;
+		}
+
+		last_tag = 0;
+		if (++index >= bt->map_nr)
+			index = 0;
+	}
+
+	*tag_cache = 0;
+	return -1;
+
+	/*
+	 * Only update the cache from the allocation path, if we ended
+	 * up using the specific cached tag.
+	 */
+done:
+	if (tag == org_last_tag) {
+		last_tag = tag + 1;
+		if (last_tag >= bt->depth - 1)
+			last_tag = 0;
+
+		*tag_cache = last_tag;
+	}
+
+	return tag;
+}
+
+static inline void bt_index_inc(unsigned int *index)
+{
+	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
+}
+
+static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
+					 struct blk_mq_hw_ctx *hctx)
+{
+	struct bt_wait_state *bs;
+
+	if (!hctx)
+		return &bt->bs[0];
+
+	bs = &bt->bs[hctx->wait_index];
+	bt_index_inc(&hctx->wait_index);
+	return bs;
 }
 
-static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp)
+static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
+		  unsigned int *last_tag, gfp_t gfp)
 {
+	struct bt_wait_state *bs;
+	DEFINE_WAIT(wait);
 	int tag;
 
-	tag = percpu_ida_alloc(&tags->free_tags, (gfp & __GFP_WAIT) ?
-			       TASK_UNINTERRUPTIBLE : TASK_RUNNING);
-	if (tag < 0)
-		return BLK_MQ_TAG_FAIL;
-	return tag + tags->nr_reserved_tags;
+	tag = __bt_get(bt, last_tag);
+	if (tag != -1)
+		return tag;
+
+	if (!(gfp & __GFP_WAIT))
+		return -1;
+
+	bs = bt_wait_ptr(bt, hctx);
+	do {
+		bool was_empty;
+
+		was_empty = list_empty(&wait.task_list);
+		prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
+
+		tag = __bt_get(bt, last_tag);
+		if (tag != -1)
+			break;
+
+		if (was_empty)
+			atomic_set(&bs->wait_cnt, bt->wake_cnt);
+
+		io_schedule();
+	} while (1);
+
+	finish_wait(&bs->wait, &wait);
+	return tag;
+}
+
+static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags,
+				     struct blk_mq_hw_ctx *hctx,
+				     unsigned int *last_tag, gfp_t gfp)
+{
+	int tag;
+
+	tag = bt_get(&tags->bitmap_tags, hctx, last_tag, gfp);
+	if (tag >= 0)
+		return tag + tags->nr_reserved_tags;
+
+	return BLK_MQ_TAG_FAIL;
 }
 
 static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
 					      gfp_t gfp)
 {
-	int tag;
+	int tag, zero = 0;
 
 	if (unlikely(!tags->nr_reserved_tags)) {
 		WARN_ON_ONCE(1);
 		return BLK_MQ_TAG_FAIL;
 	}
 
-	tag = percpu_ida_alloc(&tags->reserved_tags, (gfp & __GFP_WAIT) ?
-			       TASK_UNINTERRUPTIBLE : TASK_RUNNING);
+	tag = bt_get(&tags->breserved_tags, NULL, &zero, gfp);
 	if (tag < 0)
 		return BLK_MQ_TAG_FAIL;
+
 	return tag;
 }
 
-unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved)
+unsigned int blk_mq_get_tag(struct blk_mq_tags *tags,
+			    struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
+			    gfp_t gfp, bool reserved)
 {
 	if (!reserved)
-		return __blk_mq_get_tag(tags, gfp);
+		return __blk_mq_get_tag(tags, hctx, last_tag, gfp);
 
 	return __blk_mq_get_reserved_tag(tags, gfp);
 }
 
+static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
+{
+	int i, wake_index;
+
+	wake_index = bt->wake_index;
+	for (i = 0; i < BT_WAIT_QUEUES; i++) {
+		struct bt_wait_state *bs = &bt->bs[wake_index];
+
+		if (waitqueue_active(&bs->wait)) {
+			if (wake_index != bt->wake_index)
+				bt->wake_index = wake_index;
+
+			return bs;
+		}
+
+		bt_index_inc(&wake_index);
+	}
+
+	return NULL;
+}
+
+static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
+{
+	const int index = TAG_TO_INDEX(tag);
+	struct bt_wait_state *bs;
+
+	clear_bit(TAG_TO_BIT(tag), &bt->map[index].word);
+
+	bs = bt_wake_ptr(bt);
+	if (bs && atomic_dec_and_test(&bs->wait_cnt)) {
+		smp_mb__after_clear_bit();
+		atomic_set(&bs->wait_cnt, bt->wake_cnt);
+		bt_index_inc(&bt->wake_index);
+		wake_up(&bs->wait);
+	}
+}
+
 static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
 {
 	BUG_ON(tag >= tags->nr_tags);
 
-	percpu_ida_free(&tags->free_tags, tag - tags->nr_reserved_tags);
+	bt_clear_tag(&tags->bitmap_tags, tag);
 }
 
 static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
@@ -66,22 +259,41 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
 {
 	BUG_ON(tag >= tags->nr_reserved_tags);
 
-	percpu_ida_free(&tags->reserved_tags, tag);
+	bt_clear_tag(&tags->breserved_tags, tag);
 }
 
-void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
+void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag,
+		    unsigned int *last_tag)
 {
-	if (tag >= tags->nr_reserved_tags)
-		__blk_mq_put_tag(tags, tag);
-	else
+	if (tag >= tags->nr_reserved_tags) {
+		const int real_tag = tag - tags->nr_reserved_tags;
+
+		__blk_mq_put_tag(tags, real_tag);
+		*last_tag = real_tag;
+	} else
 		__blk_mq_put_reserved_tag(tags, tag);
 }
 
-static int __blk_mq_tag_iter(unsigned id, void *data)
+static void bt_for_each_free(struct blk_mq_bitmap_tags *bt,
+			     unsigned long *free_map, unsigned int off)
 {
-	unsigned long *tag_map = data;
-	__set_bit(id, tag_map);
-	return 0;
+	int i;
+
+	for (i = 0; i < bt->map_nr; i++) {
+		struct blk_mq_bitmap *bm = &bt->map[i];
+		int bit = 0;
+
+		do {
+			bit = find_next_zero_bit(&bm->word, bm->depth, bit);
+			if (bit >= bm->depth)
+				break;
+
+			__set_bit(bit + off, free_map);
+			bit++;
+		} while (1);
+
+		off += BITS_PER_LONG;
+	}
 }
 
 void blk_mq_tag_busy_iter(struct blk_mq_tags *tags,
@@ -95,21 +307,98 @@ void blk_mq_tag_busy_iter(struct blk_mq_tags *tags,
 	if (!tag_map)
 		return;
 
-	percpu_ida_for_each_free(&tags->free_tags, __blk_mq_tag_iter, tag_map);
+	bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags);
 	if (tags->nr_reserved_tags)
-		percpu_ida_for_each_free(&tags->reserved_tags, __blk_mq_tag_iter,
-			tag_map);
+		bt_for_each_free(&tags->breserved_tags, tag_map, 0);
 
 	fn(data, tag_map);
 	kfree(tag_map);
 }
 
+static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
+{
+	unsigned int i, used;
+
+	for (i = 0, used = 0; i < bt->map_nr; i++) {
+		struct blk_mq_bitmap *bm = &bt->map[i];
+
+		used += bitmap_weight(&bm->word, bm->depth);
+	}
+
+	return bt->depth - used;
+}
+
+static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
+			int node, bool reserved)
+{
+	int i;
+
+	/*
+	 * Depth can be zero for reserved tags, that's not a failure
+	 * condition.
+	 */
+	if (depth) {
+		int nr, i, map_depth;
+
+		nr = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
+		bt->map = kzalloc_node(nr * sizeof(struct blk_mq_bitmap),
+						GFP_KERNEL, node);
+		if (!bt->map)
+			return -ENOMEM;
+
+		bt->map_nr = nr;
+		map_depth = depth;
+		for (i = 0; i < nr; i++) {
+			bt->map[i].depth = min(map_depth, BITS_PER_LONG);
+			map_depth -= BITS_PER_LONG;
+		}
+	}
+
+	bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
+	if (!bt->bs) {
+		kfree(bt->map);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < BT_WAIT_QUEUES; i++)
+		init_waitqueue_head(&bt->bs[i].wait);
+
+	bt->wake_cnt = BT_WAIT_BATCH;
+	if (bt->wake_cnt > depth / 4)
+		bt->wake_cnt = max(1U, depth / 4);
+
+	bt->depth = depth;
+	return 0;
+}
+
+static void bt_free(struct blk_mq_bitmap_tags *bt)
+{
+	kfree(bt->map);
+	kfree(bt->bs);
+}
+
+static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
+						   int node)
+{
+	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
+
+	if (bt_alloc(&tags->bitmap_tags, depth, node, false))
+		goto enomem;
+	if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true))
+		goto enomem;
+
+	return tags;
+enomem:
+	bt_free(&tags->bitmap_tags);
+	kfree(tags);
+	return NULL;
+}
+
 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 				     unsigned int reserved_tags, int node)
 {
 	unsigned int nr_tags, nr_cache;
 	struct blk_mq_tags *tags;
-	int ret;
 
 	if (total_tags > BLK_MQ_TAG_MAX) {
 		pr_err("blk-mq: tag depth too large\n");
@@ -121,72 +410,46 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 		return NULL;
 
 	nr_tags = total_tags - reserved_tags;
-	nr_cache = nr_tags / num_possible_cpus();
-
-	if (nr_cache < BLK_MQ_TAG_CACHE_MIN)
-		nr_cache = BLK_MQ_TAG_CACHE_MIN;
-	else if (nr_cache > BLK_MQ_TAG_CACHE_MAX)
-		nr_cache = BLK_MQ_TAG_CACHE_MAX;
+	nr_cache = nr_tags / num_online_cpus();
 
 	tags->nr_tags = total_tags;
 	tags->nr_reserved_tags = reserved_tags;
-	tags->nr_max_cache = nr_cache;
-	tags->nr_batch_move = max(1u, nr_cache / 2);
-
-	ret = __percpu_ida_init(&tags->free_tags, tags->nr_tags -
-				tags->nr_reserved_tags,
-				tags->nr_max_cache,
-				tags->nr_batch_move);
-	if (ret)
-		goto err_free_tags;
-
-	if (reserved_tags) {
-		/*
-		 * With max_cahe and batch set to 1, the allocator fallbacks to
-		 * no cached. It's fine reserved tags allocation is slow.
-		 */
-		ret = __percpu_ida_init(&tags->reserved_tags, reserved_tags,
-				1, 1);
-		if (ret)
-			goto err_reserved_tags;
-	}
 
-	return tags;
-
-err_reserved_tags:
-	percpu_ida_destroy(&tags->free_tags);
-err_free_tags:
-	kfree(tags);
-	return NULL;
+	return blk_mq_init_bitmap_tags(tags, node);
 }
 
 void blk_mq_free_tags(struct blk_mq_tags *tags)
 {
-	percpu_ida_destroy(&tags->free_tags);
-	percpu_ida_destroy(&tags->reserved_tags);
+	bt_free(&tags->bitmap_tags);
+	bt_free(&tags->breserved_tags);
 	kfree(tags);
 }
 
+void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
+{
+	unsigned int depth = tags->nr_tags - tags->nr_reserved_tags;
+
+	if (depth > 1)
+		*tag = prandom_u32() % (depth - 1);
+	else
+		*tag = 0;
+}
+
 ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
 {
 	char *orig_page = page;
-	unsigned int cpu;
+	unsigned int free, res;
 
 	if (!tags)
 		return 0;
 
-	page += sprintf(page, "nr_tags=%u, reserved_tags=%u, batch_move=%u,"
-			" max_cache=%u\n", tags->nr_tags, tags->nr_reserved_tags,
-			tags->nr_batch_move, tags->nr_max_cache);
+	page += sprintf(page, "nr_tags=%u, reserved_tags=%u\n",
+			tags->nr_tags, tags->nr_reserved_tags);
 
-	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n",
-			percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids),
-			percpu_ida_free_tags(&tags->reserved_tags, nr_cpu_ids));
+	free = bt_unused_tags(&tags->bitmap_tags);
+	res = bt_unused_tags(&tags->breserved_tags);
 
-	for_each_possible_cpu(cpu) {
-		page += sprintf(page, "  cpu%02u: nr_free=%u\n", cpu,
-				percpu_ida_free_tags(&tags->free_tags, cpu));
-	}
+	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
 
 	return page - orig_page;
 }
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index c8e0645ea331..06d4a2f0f7a0 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -1,7 +1,34 @@
 #ifndef INT_BLK_MQ_TAG_H
 #define INT_BLK_MQ_TAG_H
 
-#include <linux/percpu_ida.h>
+enum {
+	BT_WAIT_QUEUES	= 8,
+	BT_WAIT_BATCH	= 8,
+};
+
+struct bt_wait_state {
+	atomic_t wait_cnt;
+	wait_queue_head_t wait;
+} ____cacheline_aligned_in_smp;
+
+#define TAG_TO_INDEX(tag)	((tag) / BITS_PER_LONG)
+#define TAG_TO_BIT(tag)		((tag) & (BITS_PER_LONG - 1))
+
+struct blk_mq_bitmap {
+	unsigned long word;
+	unsigned long depth;
+} ____cacheline_aligned_in_smp;
+
+struct blk_mq_bitmap_tags {
+	unsigned int depth;
+	unsigned int wake_cnt;
+
+	struct blk_mq_bitmap *map;
+	unsigned int map_nr;
+
+	unsigned int wake_index;
+	struct bt_wait_state *bs;
+};
 
 /*
  * Tag address space map.
@@ -9,11 +36,9 @@
 struct blk_mq_tags {
 	unsigned int nr_tags;
 	unsigned int nr_reserved_tags;
-	unsigned int nr_batch_move;
-	unsigned int nr_max_cache;
 
-	struct percpu_ida free_tags;
-	struct percpu_ida reserved_tags;
+	struct blk_mq_bitmap_tags bitmap_tags;
+	struct blk_mq_bitmap_tags breserved_tags;
 
 	struct request **rqs;
 	struct list_head page_list;
@@ -23,12 +48,13 @@ struct blk_mq_tags {
 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
 extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 
-extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved);
-extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, bool reserved);
-extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag);
+extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
+extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, bool reserved);
+extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, unsigned int *last_tag);
 extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
 extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
+extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
 
 enum {
 	BLK_MQ_TAG_CACHE_MIN	= 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 492f49f96459..9f07a266f7ab 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -74,12 +74,13 @@ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
 }
 
 static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
+					      struct blk_mq_ctx *ctx,
 					      gfp_t gfp, bool reserved)
 {
 	struct request *rq;
 	unsigned int tag;
 
-	tag = blk_mq_get_tag(hctx->tags, gfp, reserved);
+	tag = blk_mq_get_tag(hctx->tags, hctx, &ctx->last_tag, gfp, reserved);
 	if (tag != BLK_MQ_TAG_FAIL) {
 		rq = hctx->tags->rqs[tag];
 		rq->tag = tag;
@@ -246,7 +247,8 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 		struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
 		struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
-		rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved);
+		rq = __blk_mq_alloc_request(hctx, ctx, gfp & ~__GFP_WAIT,
+						reserved);
 		if (rq) {
 			blk_mq_rq_ctx_init(q, ctx, rq, rw);
 			break;
@@ -260,7 +262,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 			break;
 		}
 
-		blk_mq_wait_for_tags(hctx->tags, reserved);
+		blk_mq_wait_for_tags(hctx->tags, hctx, reserved);
 	} while (1);
 
 	return rq;
@@ -278,6 +280,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
 		blk_mq_put_ctx(rq->mq_ctx);
 	return rq;
 }
+EXPORT_SYMBOL(blk_mq_alloc_request);
 
 struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw,
 					      gfp_t gfp)
@@ -301,7 +304,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 	struct request_queue *q = rq->q;
 
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
-	blk_mq_put_tag(hctx->tags, tag);
+	blk_mq_put_tag(hctx->tags, tag, &ctx->last_tag);
 	blk_mq_queue_exit(q);
 }
 
@@ -677,11 +680,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 			queued++;
 			continue;
 		case BLK_MQ_RQ_QUEUE_BUSY:
-			/*
-			 * FIXME: we should have a mechanism to stop the queue
-			 * like blk_stop_queue, otherwise we will waste cpu
-			 * time
-			 */
 			list_add(&rq->queuelist, &rq_list);
 			__blk_mq_requeue_request(rq);
 			break;
@@ -873,6 +871,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
 		list_add(&rq->queuelist, &ctx->rq_list);
 	else
 		list_add_tail(&rq->queuelist, &ctx->rq_list);
+
 	blk_mq_hctx_mark_pending(hctx, ctx);
 
 	/*
@@ -1046,7 +1045,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	if (is_sync)
 		rw |= REQ_SYNC;
 	trace_block_getrq(q, bio, rw);
-	rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
+	rq = __blk_mq_alloc_request(hctx, ctx, GFP_ATOMIC, false);
 	if (likely(rq))
 		blk_mq_rq_ctx_init(q, ctx, rq, rw);
 	else {
@@ -1130,8 +1129,8 @@ EXPORT_SYMBOL(blk_mq_map_queue);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set,
 						   unsigned int hctx_index)
 {
-	return kmalloc_node(sizeof(struct blk_mq_hw_ctx),
-				GFP_KERNEL | __GFP_ZERO, set->numa_node);
+	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
+				set->numa_node);
 }
 EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 1ae364ceaf8b..97cfab9c092f 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -12,6 +12,8 @@ struct blk_mq_ctx {
 	unsigned int		cpu;
 	unsigned int		index_hw;
 
+	unsigned int		last_tag ____cacheline_aligned_in_smp;
+
 	/* incremented at dispatch time */
 	unsigned long		rq_dispatched[2];
 	unsigned long		rq_merged;
@@ -21,7 +23,7 @@ struct blk_mq_ctx {
 
 	struct request_queue	*queue;
 	struct kobject		kobj;
-};
+} ____cacheline_aligned_in_smp;
 
 void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5bd677e2dcb7..f83d15f6e1c1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -31,10 +31,12 @@ struct blk_mq_hw_ctx {
 
 	void			*driver_data;
 
-	unsigned int		nr_ctx;
-	struct blk_mq_ctx	**ctxs;
 	unsigned int 		nr_ctx_map;
 	unsigned long		*ctx_map;
+	unsigned int		nr_ctx;
+	struct blk_mq_ctx	**ctxs;
+
+	unsigned int		wait_index;
 
 	struct blk_mq_tags	*tags;
 
-- 
cgit 


From 2070d50e1cbe3d7f157cbf8e63279c893f375d7f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 9 May 2014 15:11:53 -0400
Subject: percpu-refcount: rename percpu_ref_tryget() to
 percpu_ref_tryget_live()

percpu_ref_tryget() is different from the usual tryget semantics in
that it fails if the refcnt is in its dying stage even if the refcnt
hasn't reached zero yet.  We're about to introduce the more
conventional tryget and the current one has only one user.  Let's
rename it to percpu_ref_tryget_live() so that it explicitly signifies
the peculiarities of its semantics.

This is pure rename.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/cgroup.h          | 2 +-
 include/linux/percpu-refcount.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c2515851c1aa..549aed8de32b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -101,7 +101,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
 {
 	if (css->flags & CSS_ROOT)
 		return true;
-	return percpu_ref_tryget(&css->refcnt);
+	return percpu_ref_tryget_live(&css->refcnt);
 }
 
 /**
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 95961f0bf62d..e22d15597cc3 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -118,7 +118,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 }
 
 /**
- * percpu_ref_tryget - try to increment a percpu refcount
+ * percpu_ref_tryget_live - try to increment a live percpu refcount
  * @ref: percpu_ref to try-get
  *
  * Increment a percpu refcount unless it has already been killed.  Returns
@@ -129,7 +129,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * used.  After the confirm_kill callback is invoked, it's guaranteed that
  * no new reference will be given out by percpu_ref_tryget().
  */
-static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
 	unsigned __percpu *pcpu_count;
 	int ret = false;
-- 
cgit 


From 4fb6e25049cb6fa0accc7f1b7c192b952fad7ac8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 9 May 2014 15:11:53 -0400
Subject: percpu-refcount: implement percpu_ref_tryget()

Implement percpu_ref_tryget() which fails if the refcnt already
reached zero.  Note that this is different from the recently renamed
percpu_ref_tryget_live() which fails if the refcnt has been killed and
is draining the remaining references.  percpu_ref_tryget() succeeds on
a killed refcnt as long as its current refcnt is above zero.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index e22d15597cc3..dba35c411e8c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -117,6 +117,36 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_tryget - try to increment a percpu refcount
+ * @ref: percpu_ref to try-get
+ *
+ * Increment a percpu refcount unless its count already reached zero.
+ * Returns %true on success; %false on failure.
+ *
+ * The caller is responsible for ensuring that @ref stays accessible.
+ */
+static inline bool percpu_ref_tryget(struct percpu_ref *ref)
+{
+	unsigned __percpu *pcpu_count;
+	int ret = false;
+
+	rcu_read_lock_sched();
+
+	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
+
+	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
+		__this_cpu_inc(*pcpu_count);
+		ret = true;
+	} else {
+		ret = atomic_inc_not_zero(&ref->count);
+	}
+
+	rcu_read_unlock_sched();
+
+	return ret;
+}
+
 /**
  * percpu_ref_tryget_live - try to increment a live percpu refcount
  * @ref: percpu_ref to try-get
@@ -128,6 +158,8 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
  * used.  After the confirm_kill callback is invoked, it's guaranteed that
  * no new reference will be given out by percpu_ref_tryget().
+ *
+ * The caller is responsible for ensuring that @ref stays accessible.
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-- 
cgit 


From 49c50b97b5522a987b80fbbf9d9869deee8d23b0 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 6 May 2014 16:02:19 -0700
Subject: mtd: nand: refactor erase_cmd() to return chip status

The nand_chip::erase_cmd callback previously served a dual purpose; for
one, it allowed a per-flash-chip override, so that AG-AND devices could
use a different erase command than other NAND. These AND devices were
dropped in commit 14c6578683367b1e7af0c3c09e872b45a45183a7 (mtd: nand:
remove AG-AND support). On the other hand, some drivers (denali and
doc-g4) need to use this sort of callback to implement
controller-specific erase operations.

To make the latter operation easier for some drivers (e.g., ST's new BCH
NAND driver), it helps if the command dispatch and wait functions can be
lumped together, rather than called separately.

This patch does two things:
 1. Pull the call to chip->waitfunc() into chip->erase_cmd(), and return
    the status from this callback
 2. Rename erase_cmd() to just erase(), since this callback does a
    little more than just send a command

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Tested-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mtd/nand/denali.c    |  7 +++----
 drivers/mtd/nand/docg4.c     |  6 ++++--
 drivers/mtd/nand/nand_base.c | 14 +++++++-------
 include/linux/mtd/nand.h     |  5 ++---
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index c07cd573ad3a..9f2012a3e764 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1233,7 +1233,7 @@ static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
 	return status;
 }
 
-static void denali_erase(struct mtd_info *mtd, int page)
+static int denali_erase(struct mtd_info *mtd, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
@@ -1250,8 +1250,7 @@ static void denali_erase(struct mtd_info *mtd, int page)
 	irq_status = wait_for_irq(denali, INTR_STATUS__ERASE_COMP |
 					INTR_STATUS__ERASE_FAIL);
 
-	denali->status = (irq_status & INTR_STATUS__ERASE_FAIL) ?
-						NAND_STATUS_FAIL : PASS;
+	return (irq_status & INTR_STATUS__ERASE_FAIL) ? NAND_STATUS_FAIL : PASS;
 }
 
 static void denali_cmdfunc(struct mtd_info *mtd, unsigned int cmd, int col,
@@ -1584,7 +1583,7 @@ int denali_init(struct denali_nand_info *denali)
 	denali->nand.ecc.write_page_raw = denali_write_page_raw;
 	denali->nand.ecc.read_oob = denali_read_oob;
 	denali->nand.ecc.write_oob = denali_write_oob;
-	denali->nand.erase_cmd = denali_erase;
+	denali->nand.erase = denali_erase;
 
 	if (nand_scan_tail(&denali->mtd)) {
 		ret = -ENXIO;
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 1b0265e85a06..ce24637e14f1 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -872,7 +872,7 @@ static int docg4_read_oob(struct mtd_info *mtd, struct nand_chip *nand,
 	return 0;
 }
 
-static void docg4_erase_block(struct mtd_info *mtd, int page)
+static int docg4_erase_block(struct mtd_info *mtd, int page)
 {
 	struct nand_chip *nand = mtd->priv;
 	struct docg4_priv *doc = nand->priv;
@@ -916,6 +916,8 @@ static void docg4_erase_block(struct mtd_info *mtd, int page)
 	write_nop(docptr);
 	poll_status(doc);
 	write_nop(docptr);
+
+	return nand->waitfunc(mtd, nand);
 }
 
 static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
@@ -1236,7 +1238,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->block_markbad = docg4_block_markbad;
 	nand->read_buf = docg4_read_buf;
 	nand->write_buf = docg4_write_buf16;
-	nand->erase_cmd = docg4_erase_block;
+	nand->erase = docg4_erase_block;
 	nand->ecc.read_page = docg4_read_page;
 	nand->ecc.write_page = docg4_write_page;
 	nand->ecc.read_page_raw = docg4_read_page_raw;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index f6c5685b79a6..7853b9b0a05e 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2617,18 +2617,20 @@ out:
 }
 
 /**
- * single_erase_cmd - [GENERIC] NAND standard block erase command function
+ * single_erase - [GENERIC] NAND standard block erase command function
  * @mtd: MTD device structure
  * @page: the page address of the block which will be erased
  *
- * Standard erase command for NAND chips.
+ * Standard erase command for NAND chips. Returns NAND status.
  */
-static void single_erase_cmd(struct mtd_info *mtd, int page)
+static int single_erase(struct mtd_info *mtd, int page)
 {
 	struct nand_chip *chip = mtd->priv;
 	/* Send commands to erase a block */
 	chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page);
 	chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1);
+
+	return chip->waitfunc(mtd, chip);
 }
 
 /**
@@ -2709,9 +2711,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		    (page + pages_per_block))
 			chip->pagebuf = -1;
 
-		chip->erase_cmd(mtd, page & chip->pagemask);
-
-		status = chip->waitfunc(mtd, chip);
+		status = chip->erase(mtd, page & chip->pagemask);
 
 		/*
 		 * See if operation failed and additional status checks are
@@ -3684,7 +3684,7 @@ ident_done:
 	}
 
 	chip->badblockbits = 8;
-	chip->erase_cmd = single_erase_cmd;
+	chip->erase = single_erase;
 
 	/* Do not replace user supplied command function! */
 	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 450d61ec7f06..7a922e6c4e4b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -552,8 +552,7 @@ struct nand_buffers {
  * @ecc:		[BOARDSPECIFIC] ECC control structure
  * @buffers:		buffer structure for read/write
  * @hwcontrol:		platform-specific hardware control structure
- * @erase_cmd:		[INTERN] erase command write function, selectable due
- *			to AND support.
+ * @erase:		[REPLACEABLE] erase function
  * @scan_bbt:		[REPLACEABLE] function to scan bad block table
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
@@ -637,7 +636,7 @@ struct nand_chip {
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
 			int page_addr);
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
-	void (*erase_cmd)(struct mtd_info *mtd, int page);
+	int (*erase)(struct mtd_info *mtd, int page);
 	int (*scan_bbt)(struct mtd_info *mtd);
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);
-- 
cgit 


From 9739eef13c926645fbf88bcb77e66442fa75d688 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 8 May 2014 14:10:51 -0700
Subject: net: filter: make BPF conversion more readable

Introduce BPF helper macros to define instructions
(similar to old BPF_STMT/BPF_JUMP macros)

Use them while converting classic BPF to internal
and in BPF testsuite later.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  51 ++++++++++++++++++
 net/core/filter.c      | 142 +++++++++++++++++--------------------------------
 2 files changed, 101 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index ed1efab10b8f..4457b383961c 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -79,6 +79,57 @@ enum {
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
+/* bpf_add|sub|...: a += x, bpf_mov: a = x */
+#define BPF_ALU64_REG(op, a, x) \
+	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0})
+#define BPF_ALU32_REG(op, a, x) \
+	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0})
+
+/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */
+#define BPF_ALU64_IMM(op, a, imm) \
+	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm})
+#define BPF_ALU32_IMM(op, a, imm) \
+	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm})
+
+/* R0 = *(uint *) (skb->data + off) */
+#define BPF_LD_ABS(size, off) \
+	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off})
+
+/* R0 = *(uint *) (skb->data + x + off) */
+#define BPF_LD_IND(size, x, off) \
+	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off})
+
+/* a = *(uint *) (x + off) */
+#define BPF_LDX_MEM(sz, a, x, off) \
+	((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0})
+
+/* if (a 'op' x) goto pc+off */
+#define BPF_JMP_REG(op, a, x, off) \
+	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0})
+
+/* if (a 'op' imm) goto pc+off */
+#define BPF_JMP_IMM(op, a, imm, off) \
+	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm})
+
+#define BPF_EXIT_INSN() \
+	((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0})
+
+static inline int size_to_bpf(int size)
+{
+	switch (size) {
+	case 1:
+		return BPF_B;
+	case 2:
+		return BPF_H;
+	case 4:
+		return BPF_W;
+	case 8:
+		return BPF_DW;
+	default:
+		return -EINVAL;
+	}
+}
+
 /* Macro to invoke filter function. */
 #define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index eb020a7d6f55..9aaa05ad8fe3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -668,10 +668,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, protocol);
+		/* A = *(u16 *) (ctx + offsetof(protocol)) */
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, protocol));
 		insn++;
 
 		/* A = ntohs(A) [emitting a nop or swap16] */
@@ -681,37 +680,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		insn->code = BPF_LDX | BPF_MEM | BPF_B;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = pkt_type_offset();
+		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+				    pkt_type_offset());
 		if (insn->off < 0)
 			return false;
 		insn++;
 
-		insn->code = BPF_ALU | BPF_AND | BPF_K;
-		insn->a_reg = BPF_REG_A;
-		insn->imm = PKT_TYPE_MAX;
+		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
 	case SKF_AD_OFF + SKF_AD_HATYPE:
-		if (FIELD_SIZEOF(struct sk_buff, dev) == 8)
-			insn->code = BPF_LDX | BPF_MEM | BPF_DW;
-		else
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = BPF_REG_TMP;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, dev);
+		*insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)),
+				    BPF_REG_TMP, BPF_REG_CTX,
+				    offsetof(struct sk_buff, dev));
 		insn++;
 
-		insn->code = BPF_JMP | BPF_JNE | BPF_K;
-		insn->a_reg = BPF_REG_TMP;
-		insn->imm = 0;
-		insn->off = 1;
+		/* if (tmp != 0) goto pc+1 */
+		*insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
 		insn++;
 
-		insn->code = BPF_JMP | BPF_EXIT;
+		*insn = BPF_EXIT_INSN();
 		insn++;
 
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
@@ -732,55 +721,45 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_MARK:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, mark);
+		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, mark));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_RXHASH:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_W;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, hash);
+		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, hash));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_QUEUE:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, queue_mapping);
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, queue_mapping));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 
-		insn->code = BPF_LDX | BPF_MEM | BPF_H;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_CTX;
-		insn->off = offsetof(struct sk_buff, vlan_tci);
+		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				    offsetof(struct sk_buff, vlan_tci));
 		insn++;
 
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = ~VLAN_TAG_PRESENT;
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
+					      ~VLAN_TAG_PRESENT);
 		} else {
-			insn->code = BPF_ALU | BPF_RSH | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 12;
+			/* A >>= 12 */
+			*insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
 			insn++;
 
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 1;
+			/* A &= 1 */
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
 		}
 		break;
 
@@ -790,21 +769,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_CPU:
 	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = BPF_REG_ARG1;
-		insn->x_reg = BPF_REG_CTX;
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX);
 		insn++;
 
 		/* arg2 = A */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = BPF_REG_ARG2;
-		insn->x_reg = BPF_REG_A;
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A);
 		insn++;
 
 		/* arg3 = X */
-		insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		insn->a_reg = BPF_REG_ARG3;
-		insn->x_reg = BPF_REG_X;
+		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
 		insn++;
 
 		/* Emit call(ctx, arg2=A, arg3=X) */
@@ -829,9 +802,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
-		insn->code = BPF_ALU | BPF_XOR | BPF_X;
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_X;
+		/* A ^= X */
+		*insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
 		break;
 
 	default:
@@ -897,9 +869,7 @@ do_pass:
 	fp = prog;
 
 	if (new_insn) {
-		new_insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-		new_insn->a_reg = BPF_REG_CTX;
-		new_insn->x_reg = BPF_REG_ARG1;
+		*new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
 	}
 	new_insn++;
 
@@ -1027,34 +997,28 @@ do_pass:
 
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_TMP;
-			insn->x_reg = BPF_REG_A;
+			/* tmp = A */
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A);
 			insn++;
 
-			insn->code = BPF_LD | BPF_ABS | BPF_B;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = fp->k;
+			/* A = R0 = *(u8 *) (skb->data + K) */
+			*insn = BPF_LD_ABS(BPF_B, fp->k);
 			insn++;
 
-			insn->code = BPF_ALU | BPF_AND | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 0xf;
+			/* A &= 0xf */
+			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
 			insn++;
 
-			insn->code = BPF_ALU | BPF_LSH | BPF_K;
-			insn->a_reg = BPF_REG_A;
-			insn->imm = 2;
+			/* A <<= 2 */
+			*insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
 			insn++;
 
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_X;
-			insn->x_reg = BPF_REG_A;
+			/* X = A */
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
 			insn++;
 
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_TMP;
+			/* A = tmp */
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP);
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
@@ -1068,7 +1032,7 @@ do_pass:
 			insn->imm = fp->k;
 			insn++;
 
-			insn->code = BPF_JMP | BPF_EXIT;
+			*insn = BPF_EXIT_INSN();
 			break;
 
 		/* Store to stack. */
@@ -1102,16 +1066,12 @@ do_pass:
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_X;
-			insn->x_reg = BPF_REG_A;
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
-			insn->code = BPF_ALU64 | BPF_MOV | BPF_X;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_X;
+			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X);
 			break;
 
 		/* A = skb->len or X = skb->len */
@@ -1126,10 +1086,8 @@ do_pass:
 
 		/* access seccomp_data fields */
 		case BPF_LDX | BPF_ABS | BPF_W:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_CTX;
-			insn->off = fp->k;
+			/* A = *(u32 *) (ctx + K) */
+			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 			break;
 
 		default:
-- 
cgit 


From 4593df29b94b31de931dc20d7da2e6c468c8d473 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 21 Mar 2014 10:13:05 +0100
Subject: mmc: mmci: Enforce DT for signal direction and feedback clock

Remove the option to provide signal direction configuration and
feeback clock as platform data, enforce it through DT.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmci.c   | 34 ++++++++++++++--------------------
 drivers/mmc/host/mmci.h   | 11 +++++++++++
 include/linux/amba/mmci.h | 16 ----------------
 3 files changed, 25 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 0d3ee08662a9..c0353f84d5be 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1287,7 +1287,7 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		 * indicating signal direction for the signals in
 		 * the SD/MMC bus and feedback-clock usage.
 		 */
-		pwr |= host->plat->sigdir;
+		pwr |= host->pwr_reg_add;
 
 		if (ios->bus_width == MMC_BUS_WIDTH_4)
 			pwr &= ~MCI_ST_DATA74DIREN;
@@ -1386,29 +1386,26 @@ static struct mmc_host_ops mmci_ops = {
 	.start_signal_voltage_switch = mmci_sig_volt_switch,
 };
 
-static void mmci_dt_populate_generic_pdata(struct device_node *np,
-					struct mmci_platform_data *pdata)
+static int mmci_of_parse(struct device_node *np, struct mmc_host *mmc)
 {
+	struct mmci_host *host = mmc_priv(mmc);
+	int ret = mmc_of_parse(mmc);
+
+	if (ret)
+		return ret;
+
 	if (of_get_property(np, "st,sig-dir-dat0", NULL))
-		pdata->sigdir |= MCI_ST_DATA0DIREN;
+		host->pwr_reg_add |= MCI_ST_DATA0DIREN;
 	if (of_get_property(np, "st,sig-dir-dat2", NULL))
-		pdata->sigdir |= MCI_ST_DATA2DIREN;
+		host->pwr_reg_add |= MCI_ST_DATA2DIREN;
 	if (of_get_property(np, "st,sig-dir-dat31", NULL))
-		pdata->sigdir |= MCI_ST_DATA31DIREN;
+		host->pwr_reg_add |= MCI_ST_DATA31DIREN;
 	if (of_get_property(np, "st,sig-dir-dat74", NULL))
-		pdata->sigdir |= MCI_ST_DATA74DIREN;
+		host->pwr_reg_add |= MCI_ST_DATA74DIREN;
 	if (of_get_property(np, "st,sig-dir-cmd", NULL))
-		pdata->sigdir |= MCI_ST_CMDDIREN;
+		host->pwr_reg_add |= MCI_ST_CMDDIREN;
 	if (of_get_property(np, "st,sig-pin-fbclk", NULL))
-		pdata->sigdir |= MCI_ST_FBCLKEN;
-}
-
-static int mmci_of_parse(struct device_node *np, struct mmc_host *mmc)
-{
-	int ret = mmc_of_parse(mmc);
-
-	if (ret)
-		return ret;
+		host->pwr_reg_add |= MCI_ST_FBCLKEN;
 
 	if (of_get_property(np, "mmc-cap-mmc-highspeed", NULL))
 		mmc->caps |= MMC_CAP_MMC_HIGHSPEED;
@@ -1440,9 +1437,6 @@ static int mmci_probe(struct amba_device *dev,
 			return -ENOMEM;
 	}
 
-	if (np)
-		mmci_dt_populate_generic_pdata(np, plat);
-
 	mmc = mmc_alloc_host(sizeof(struct mmci_host), &dev->dev);
 	if (!mmc)
 		return -ENOMEM;
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 8fc5814f938a..347d942d740b 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -13,6 +13,16 @@
 #define MCI_PWR_ON		0x03
 #define MCI_OD			(1 << 6)
 #define MCI_ROD			(1 << 7)
+/*
+ * The ST Micro version does not have ROD and reuse the voltage registers for
+ * direction settings.
+ */
+#define MCI_ST_DATA2DIREN	(1 << 2)
+#define MCI_ST_CMDDIREN		(1 << 3)
+#define MCI_ST_DATA0DIREN	(1 << 4)
+#define MCI_ST_DATA31DIREN	(1 << 5)
+#define MCI_ST_FBCLKEN		(1 << 7)
+#define MCI_ST_DATA74DIREN	(1 << 8)
 
 #define MMCICLOCK		0x004
 #define MCI_CLK_ENABLE		(1 << 8)
@@ -183,6 +193,7 @@ struct mmci_host {
 	unsigned int		mclk;
 	unsigned int		cclk;
 	u32			pwr_reg;
+	u32			pwr_reg_add;
 	u32			clk_reg;
 	u32			datactrl_reg;
 	u32			busy_status;
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 32a89cf5ec45..0d3ff95b3b4c 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,19 +6,6 @@
 
 #include <linux/mmc/host.h>
 
-
-/*
- * These defines is places here due to access is needed from machine
- * configuration files. The ST Micro version does not have ROD and
- * reuse the voltage registers for direction settings.
- */
-#define MCI_ST_DATA2DIREN	(1 << 2)
-#define MCI_ST_CMDDIREN		(1 << 3)
-#define MCI_ST_DATA0DIREN	(1 << 4)
-#define MCI_ST_DATA31DIREN	(1 << 5)
-#define MCI_ST_FBCLKEN		(1 << 7)
-#define MCI_ST_DATA74DIREN	(1 << 8)
-
 /* Just some dummy forwarding */
 struct dma_chan;
 
@@ -45,8 +32,6 @@ struct dma_chan;
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
  * @capabilities2: more capabilities, MMC_CAP2_* from mmc/host.h
- * @sigdir: a bit field indicating for what bits in the MMC bus the host
- * should enable signal direction indication.
  * @dma_filter: function used to select an appropriate RX and TX
  * DMA channel to be used for DMA, if and only if you're deploying the
  * generic DMA engine
@@ -69,7 +54,6 @@ struct mmci_platform_data {
 	bool	cd_invert;
 	unsigned long capabilities;
 	unsigned long capabilities2;
-	u32 sigdir;
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
-- 
cgit 


From 3faf80dfa342e98b5780e0b78b7a670c7b61a9be Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 21 Mar 2014 10:29:10 +0100
Subject: mmc: mmci: Enforce mmc capabilities through DT

Remove the option to provide the flags for mmc capabilities as platform
data, enforce it through DT.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmci.c   | 2 --
 include/linux/amba/mmci.h | 5 -----
 2 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index c0353f84d5be..9c60325f1a30 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1519,8 +1519,6 @@ static int mmci_probe(struct amba_device *dev,
 		dev_warn(mmc_dev(mmc), "Platform OCR mask is ignored\n");
 
 	/* DT takes precedence over platform data. */
-	mmc->caps = np ? mmc->caps : plat->capabilities;
-	mmc->caps2 = np ? mmc->caps2 : plat->capabilities2;
 	if (!np) {
 		if (!plat->cd_invert)
 			mmc->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 0d3ff95b3b4c..b992fc931295 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -29,9 +29,6 @@ struct dma_chan;
  * @gpio_wp: read this GPIO pin to see if the card is write protected
  * @gpio_cd: read this GPIO pin to detect card insertion
  * @cd_invert: true if the gpio_cd pin value is active low
- * @capabilities: the capabilities of the block as implemented in
- * this platform, signify anything MMC_CAP_* from mmc/host.h
- * @capabilities2: more capabilities, MMC_CAP2_* from mmc/host.h
  * @dma_filter: function used to select an appropriate RX and TX
  * DMA channel to be used for DMA, if and only if you're deploying the
  * generic DMA engine
@@ -52,8 +49,6 @@ struct mmci_platform_data {
 	int	gpio_wp;
 	int	gpio_cd;
 	bool	cd_invert;
-	unsigned long capabilities;
-	unsigned long capabilities2;
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
-- 
cgit 


From 1c8349a17137b93f0a83f276c764a6df1b9a116e Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Mon, 12 May 2014 08:12:25 -0400
Subject: ext4: fix data integrity sync in ordered mode

When we perform a data integrity sync we tag all the dirty pages with
PAGECACHE_TAG_TOWRITE at start of ext4_da_writepages.  Later we check
for this tag in write_cache_pages_da and creates a struct
mpage_da_data containing contiguously indexed pages tagged with this
tag and sync these pages with a call to mpage_da_map_and_submit.  This
process is done in while loop until all the PAGECACHE_TAG_TOWRITE
pages are synced. We also do journal start and stop in each iteration.
journal_stop could initiate journal commit which would call
ext4_writepage which in turn will call ext4_bio_write_page even for
delayed OR unwritten buffers. When ext4_bio_write_page is called for
such buffers, even though it does not sync them but it clears the
PAGECACHE_TAG_TOWRITE of the corresponding page and hence these pages
are also not synced by the currently running data integrity sync. We
will end up with dirty pages although sync is completed.

This could cause a potential data loss when the sync call is followed
by a truncate_pagecache call, which is exactly the case in
collapse_range.  (It will cause generic/127 failure in xfstests)

To avoid this issue, we can use set_page_writeback_keepwrite instead of
set_page_writeback, which doesn't clear TOWRITE tag.

Cc: stable@vger.kernel.org
Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4.h             |  3 ++-
 fs/ext4/inode.c            |  6 ++++--
 fs/ext4/page-io.c          |  8 ++++++--
 include/linux/page-flags.h | 12 +++++++++++-
 mm/page-writeback.c        | 11 ++++++-----
 5 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1d08a1b51bdd..aeda5061a59a 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2772,7 +2772,8 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
 			       struct page *page,
 			       int len,
-			       struct writeback_control *wbc);
+			       struct writeback_control *wbc,
+			       bool keep_towrite);
 
 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b3c52fbe86d..04dd2de10796 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1846,6 +1846,7 @@ static int ext4_writepage(struct page *page,
 	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
 	struct ext4_io_submit io_submit;
+	bool keep_towrite = false;
 
 	trace_ext4_writepage(page);
 	size = i_size_read(inode);
@@ -1876,6 +1877,7 @@ static int ext4_writepage(struct page *page,
 			unlock_page(page);
 			return 0;
 		}
+		keep_towrite = true;
 	}
 
 	if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1894,7 @@ static int ext4_writepage(struct page *page,
 		unlock_page(page);
 		return -ENOMEM;
 	}
-	ret = ext4_bio_write_page(&io_submit, page, len, wbc);
+	ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
 	ext4_io_submit(&io_submit);
 	/* Drop io_end reference we got from init */
 	ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1913,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
 	else
 		len = PAGE_CACHE_SIZE;
 	clear_page_dirty_for_io(page);
-	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc);
+	err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
 	if (!err)
 		mpd->wbc->nr_to_write--;
 	mpd->first_page++;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index c18d95b50540..4cb2743cb2e3 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -401,7 +401,8 @@ submit_and_retry:
 int ext4_bio_write_page(struct ext4_io_submit *io,
 			struct page *page,
 			int len,
-			struct writeback_control *wbc)
+			struct writeback_control *wbc,
+			bool keep_towrite)
 {
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, blocksize;
@@ -414,7 +415,10 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
 
-	set_page_writeback(page);
+	if (keep_towrite)
+		set_page_writeback_keepwrite(page);
+	else
+		set_page_writeback(page);
 	ClearPageError(page);
 
 	/*
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d1fe1a761047..ca71a1d347a0 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -317,13 +317,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 
 int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+int __test_set_page_writeback(struct page *page, bool keep_write);
+
+#define test_set_page_writeback(page)			\
+	__test_set_page_writeback(page, false)
+#define test_set_page_writeback_keepwrite(page)	\
+	__test_set_page_writeback(page, true)
 
 static inline void set_page_writeback(struct page *page)
 {
 	test_set_page_writeback(page);
 }
 
+static inline void set_page_writeback_keepwrite(struct page *page)
+{
+	test_set_page_writeback_keepwrite(page);
+}
+
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
 /*
  * System with lots of page flags available. This allows separate
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ef413492a149..d8691d9de3c4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2398,7 +2398,7 @@ int test_clear_page_writeback(struct page *page)
 	return ret;
 }
 
-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
 	int ret;
@@ -2423,9 +2423,10 @@ int test_set_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
-		radix_tree_tag_clear(&mapping->page_tree,
-				     page_index(page),
-				     PAGECACHE_TAG_TOWRITE);
+		if (!keep_write)
+			radix_tree_tag_clear(&mapping->page_tree,
+						page_index(page),
+						PAGECACHE_TAG_TOWRITE);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
@@ -2436,7 +2437,7 @@ int test_set_page_writeback(struct page *page)
 	return ret;
 
 }
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);
 
 /*
  * Return true if any of the pages in the mapping are marked with the
-- 
cgit 


From 60ff746739bf805a912484643c720b6124826140 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Sun, 4 May 2014 16:39:18 -0700
Subject: net: rename local_df to ignore_df
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As suggested by several people, rename local_df to ignore_df,
since it means "ignore df bit if it is set".

Cc: Maciej Żenczykowski <maze@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  |  4 ++--
 include/net/ip.h                        |  6 +++---
 include/net/ip6_route.h                 |  2 +-
 net/core/skbuff.c                       |  4 ++--
 net/ipv4/ip_forward.c                   |  2 +-
 net/ipv4/ip_output.c                    | 12 ++++++------
 net/ipv4/netfilter/nf_defrag_ipv4.c     |  2 +-
 net/ipv4/xfrm4_output.c                 |  2 +-
 net/ipv6/ip6_output.c                   | 12 ++++++------
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/xfrm6_output.c                 |  6 +++---
 net/l2tp/l2tp_core.c                    |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c         | 20 ++++++++++----------
 net/openvswitch/vport-gre.c             |  2 +-
 net/openvswitch/vport-vxlan.c           |  2 +-
 net/sctp/ipv6.c                         |  2 +-
 net/sctp/output.c                       |  2 +-
 17 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ca0dda5a42e..7a9beeb1c458 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -426,7 +426,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@csum_start: Offset from skb->head where checksumming should start
  *	@csum_offset: Offset from csum_start where checksum should be stored
  *	@priority: Packet queueing priority
- *	@local_df: allow local fragmentation
+ *	@ignore_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@ip_summed: Driver fed us an IP checksum
  *	@nohdr: Payload reference only, must not modify header
@@ -514,7 +514,7 @@ struct sk_buff {
 	};
 	__u32			priority;
 	kmemcheck_bitfield_begin(flags1);
-	__u8			local_df:1,
+	__u8			ignore_df:1,
 				cloned:1,
 				ip_summed:2,
 				nohdr:1,
diff --git a/include/net/ip.h b/include/net/ip.h
index 16146b667ddb..55752985c144 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -269,7 +269,7 @@ static inline bool ip_sk_use_pmtu(const struct sock *sk)
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_PROBE;
 }
 
-static inline bool ip_sk_local_df(const struct sock *sk)
+static inline bool ip_sk_ignore_df(const struct sock *sk)
 {
 	return inet_sk(sk)->pmtudisc < IP_PMTUDISC_DO ||
 	       inet_sk(sk)->pmtudisc == IP_PMTUDISC_OMIT;
@@ -304,7 +304,7 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
 {
 	struct iphdr *iph = ip_hdr(skb);
 
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
+	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 		/* This is only to work around buggy Windows95/2000
 		 * VJ compression implementations.  If the ID field
 		 * does not change, they drop every other packet in
@@ -320,7 +320,7 @@ static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *d
 {
 	struct iphdr *iph = ip_hdr(skb);
 
-	if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
+	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
 		if (sk && inet_sk(sk)->inet_daddr) {
 			iph->id = htons(inet_sk(sk)->inet_id);
 			inet_sk(sk)->inet_id += 1 + more;
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 6c4f5eac98e7..38e41e4d0998 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -185,7 +185,7 @@ static inline bool ip6_sk_accept_pmtu(const struct sock *sk)
 	       inet6_sk(sk)->pmtudisc != IPV6_PMTUDISC_OMIT;
 }
 
-static inline bool ip6_sk_local_df(const struct sock *sk)
+static inline bool ip6_sk_ignore_df(const struct sock *sk)
 {
 	return inet6_sk(sk)->pmtudisc < IPV6_PMTUDISC_DO ||
 	       inet6_sk(sk)->pmtudisc == IPV6_PMTUDISC_OMIT;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1b62343f5837..3d74530ae82b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -694,7 +694,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 	memcpy(new->cb, old->cb, sizeof(old->cb));
 	new->csum		= old->csum;
-	new->local_df		= old->local_df;
+	new->ignore_df		= old->ignore_df;
 	new->pkt_type		= old->pkt_type;
 	new->ip_summed		= old->ip_summed;
 	skb_copy_queue_mapping(new, old);
@@ -3913,7 +3913,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->skb_iif = 0;
-	skb->local_df = 0;
+	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
 	secpath_reset(skb);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 6f111e48e11c..3a83ce5efa80 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,7 +42,7 @@
 static bool ip_may_fragment(const struct sk_buff *skb)
 {
 	return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
-		skb->local_df;
+		skb->ignore_df;
 }
 
 static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a52f50187b54..6aa4380fde1a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -415,7 +415,7 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->dst) && !skb->local_df)
+	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
@@ -501,7 +501,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	iph = ip_hdr(skb);
 
 	mtu = ip_skb_dst_mtu(skb);
-	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->local_df) ||
+	if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
 		     (IPCB(skb)->frag_max_size &&
 		      IPCB(skb)->frag_max_size > mtu))) {
 		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -866,7 +866,7 @@ static int __ip_append_data(struct sock *sk,
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + length > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1189,7 +1189,7 @@ ssize_t	ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
 
 	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
-	maxnonfragsize = ip_sk_local_df(sk) ? 0xFFFF : mtu;
+	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
 
 	if (cork->length + size > maxnonfragsize - fragheaderlen) {
 		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -1350,10 +1350,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 	 * to fragment the frame generated here. No matter, what transforms
 	 * how transforms change size of the packet, it will come out.
 	 */
-	skb->local_df = ip_sk_local_df(sk);
+	skb->ignore_df = ip_sk_ignore_df(sk);
 
 	/* DF bit is set when we want to see DF on outgoing frames.
-	 * If local_df is set too, we still allow to fragment this frame
+	 * If ignore_df is set too, we still allow to fragment this frame
 	 * locally. */
 	if (inet->pmtudisc == IP_PMTUDISC_DO ||
 	    inet->pmtudisc == IP_PMTUDISC_PROBE ||
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index f40f321b41fc..b8f6381c7d0b 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -34,7 +34,7 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
 
 	if (!err) {
 		ip_send_check(ip_hdr(skb));
-		skb->local_df = 1;
+		skb->ignore_df = 1;
 	}
 
 	return err;
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 40e701f2e1e0..8e8c018d9d2d 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -25,7 +25,7 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb)
 	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
 		goto out;
 
-	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->local_df)
+	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
 		goto out;
 
 	mtu = dst_mtu(skb_dst(skb));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 31a38bde69ef..ab0cc57f779c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	skb->mark = sk->sk_mark;
 
 	mtu = dst_mtu(dst);
-	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
+	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
@@ -347,11 +347,11 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 	if (skb->len <= mtu)
 		return false;
 
-	/* ipv6 conntrack defrag sets max_frag_size + local_df */
+	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 		return true;
 
-	if (skb->local_df)
+	if (skb->ignore_df)
 		return false;
 
 	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
@@ -559,7 +559,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 	/* We must not fragment if the socket is set to force MTU discovery
 	 * or if the skb it not generated by a local socket.
 	 */
-	if (unlikely(!skb->local_df && skb->len > mtu) ||
+	if (unlikely(!skb->ignore_df && skb->len > mtu) ||
 		     (IP6CB(skb)->frag_max_size &&
 		      IP6CB(skb)->frag_max_size > mtu)) {
 		if (skb->sk && dst_allfrag(skb_dst(skb)))
@@ -1234,7 +1234,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 			      sizeof(struct frag_hdr) : 0) +
 			     rt->rt6i_nfheader_len;
 
-		if (ip6_sk_local_df(sk))
+		if (ip6_sk_ignore_df(sk))
 			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
 		else
 			maxnonfragsize = mtu;
@@ -1544,7 +1544,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 	/* Allow local fragmentation. */
-	skb->local_df = ip6_sk_local_df(sk);
+	skb->ignore_df = ip6_sk_ignore_df(sk);
 
 	*final_dst = fl6->daddr;
 	__skb_pull(skb, skb_network_header_len(skb));
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 767ab8da8218..0d5279fd852a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
 	}
 	sub_frag_mem_limit(&fq->q, head->truesize);
 
-	head->local_df = 1;
+	head->ignore_df = 1;
 	head->next = NULL;
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 19ef329bdbf8..f47c8b153dd3 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 	if (mtu < IPV6_MIN_MTU)
 		mtu = IPV6_MIN_MTU;
 
-	if (!skb->local_df && skb->len > mtu) {
+	if (!skb->ignore_df && skb->len > mtu) {
 		skb->dev = dst->dev;
 
 		if (xfrm6_local_dontfrag(skb))
@@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
 #endif
 
 	skb->protocol = htons(ETH_P_IPV6);
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	return x->outer_mode->output2(x, skb);
 }
@@ -150,7 +150,7 @@ static int __xfrm6_output(struct sk_buff *skb)
 	if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {
 		xfrm6_local_rxpmtu(skb, mtu);
 		return -EMSGSIZE;
-	} else if (!skb->local_df && skb->len > mtu && skb->sk) {
+	} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {
 		xfrm_local_error(skb, mtu);
 		return -EMSGSIZE;
 	}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index aa1a9d44c107..ed0716a075ba 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1073,7 +1073,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	}
 
 	/* Queue the packet to IP for output */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
 	if (tunnel->sock->sk_family == PF_INET6 && !tunnel->v4mapped)
 		error = inet6_csk_xmit(tunnel->sock, skb, NULL);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index c47444e4cf8c..487b55e04337 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -562,7 +562,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_send_check(iph);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
 	rcu_read_unlock();
@@ -590,7 +590,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		goto tx_error;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
 	rcu_read_unlock();
@@ -684,7 +684,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	   MTU problem. */
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
 	rcu_read_unlock();
@@ -774,7 +774,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	   MTU problem. */
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
 	rcu_read_unlock();
@@ -886,7 +886,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_select_ident(skb, &rt->dst, NULL);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
@@ -974,7 +974,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	iph->hop_limit		=	old_iph->hop_limit;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
 	if (ret == NF_ACCEPT)
@@ -1023,7 +1023,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_send_check(ip_hdr(skb));
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0);
 	rcu_read_unlock();
@@ -1060,7 +1060,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	}
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0);
 	rcu_read_unlock();
@@ -1157,7 +1157,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_vs_nat_icmp(skb, pp, cp, 0);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local);
 	rcu_read_unlock();
@@ -1249,7 +1249,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
 
 	/* Another hack: avoid icmp_send in ip_fragment */
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local);
 	rcu_read_unlock();
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index ebb6e2442554..0856f014d7a9 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -172,7 +172,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
 		htons(IP_DF) : 0;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
 			     OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 21cceb3bdf78..a93efa3f64c3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -170,7 +170,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
 		htons(IP_DF) : 0;
 
-	skb->local_df = 1;
+	skb->ignore_df = 1;
 
 	inet_get_local_port_range(net, &port_min, &port_max);
 	src_port = vxlan_src_port(port_min, port_max, skb);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2b1738ef9394..4dc5d9e08311 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -216,7 +216,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	IP6_ECN_flow_xmit(sk, fl6->flowlabel);
 
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
-		skb->local_df = 1;
+		skb->ignore_df = 1;
 
 	SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 0f4d15fc2627..01ab8e0723f0 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -591,7 +591,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 
 	pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len);
 
-	nskb->local_df = packet->ipfragok;
+	nskb->ignore_df = packet->ipfragok;
 	tp->af_specific->sctp_xmit(nskb, tp);
 
 out:
-- 
cgit 


From f06c7f9f92295faf701a9628b383156c4efb6119 Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Fri, 9 May 2014 14:58:05 +0800
Subject: vlan: rename __vlan_find_dev_deep() to __vlan_find_dev_deep_rcu()

The __vlan_find_dev_deep should always called in RCU, according
David's suggestion, rename to __vlan_find_dev_deep_rcu looks more
reasonable.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c |  2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/usb/cdc_mbim.c                         |  4 ++--
 drivers/s390/net/qeth_l3_main.c                    | 10 +++++-----
 include/linux/if_vlan.h                            |  4 ++--
 net/8021q/vlan_core.c                              |  6 +++---
 net/bridge/br_netfilter.c                          |  2 +-
 8 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index c0a9dd55f4e5..b0cbb2b7fd48 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -185,7 +185,7 @@ static struct net_device *get_iff_from_mac(struct adapter *adapter,
 		if (ether_addr_equal(dev->dev_addr, mac)) {
 			rcu_read_lock();
 			if (vlan && vlan != VLAN_VID_MASK) {
-				dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), vlan);
+				dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), vlan);
 			} else if (netif_is_bond_slave(dev)) {
 				struct net_device *upper_dev;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 8efeed3325b5..0f1e886d89e3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4068,7 +4068,7 @@ static int update_root_dev_clip(struct net_device *dev)
 
 	/* Parse all bond and vlan devices layered on top of the physical dev */
 	for (i = 0; i < VLAN_N_VID; i++) {
-		root_dev = __vlan_find_dev_deep(dev, htons(ETH_P_8021Q), i);
+		root_dev = __vlan_find_dev_deep_rcu(dev, htons(ETH_P_8021Q), i);
 		if (!root_dev)
 			continue;
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index d2e18b52caba..8a2aeb85e320 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -4146,7 +4146,7 @@ void qlcnic_restore_indev_addr(struct net_device *netdev, unsigned long event)
 
 	rcu_read_lock();
 	for_each_set_bit(vid, adapter->vlans, VLAN_N_VID) {
-		dev = __vlan_find_dev_deep(netdev, htons(ETH_P_8021Q), vid);
+		dev = __vlan_find_dev_deep_rcu(netdev, htons(ETH_P_8021Q), vid);
 		if (!dev)
 			continue;
 		qlcnic_config_indev_addr(adapter, dev, event);
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 2e025ddcef21..0ab79fca822c 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -223,8 +223,8 @@ static void do_neigh_solicit(struct usbnet *dev, u8 *buf, u16 tci)
 	/* need to send the NA on the VLAN dev, if any */
 	rcu_read_lock();
 	if (tci) {
-		netdev = __vlan_find_dev_deep(dev->net, htons(ETH_P_8021Q),
-					      tci);
+		netdev = __vlan_find_dev_deep_rcu(dev->net, htons(ETH_P_8021Q),
+						  tci);
 		if (!netdev) {
 			rcu_read_unlock();
 			return;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index c8d91d797ec8..bc2499a24884 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1659,7 +1659,7 @@ static void qeth_l3_add_vlan_mc(struct qeth_card *card)
 	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
 		struct net_device *netdev;
 
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1721,7 +1721,7 @@ static void qeth_l3_add_vlan_mc6(struct qeth_card *card)
 	for_each_set_bit(vid, card->active_vlans, VLAN_N_VID) {
 		struct net_device *netdev;
 
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		if (netdev == NULL ||
 		    !(netdev->flags & IFF_UP))
@@ -1766,7 +1766,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "frvaddr4");
 
-	netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid);
+	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
 	in_dev = in_dev_get(netdev);
@@ -1796,7 +1796,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
 
 	QETH_CARD_TEXT(card, 4, "frvaddr6");
 
-	netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q), vid);
+	netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q), vid);
 	if (!netdev)
 		return;
 	in6_dev = in6_dev_get(netdev);
@@ -2089,7 +2089,7 @@ static int qeth_l3_verify_vlan_dev(struct net_device *dev,
 		struct net_device *netdev;
 
 		rcu_read_lock();
-		netdev = __vlan_find_dev_deep(card->dev, htons(ETH_P_8021Q),
+		netdev = __vlan_find_dev_deep_rcu(card->dev, htons(ETH_P_8021Q),
 					      vid);
 		rcu_read_unlock();
 		if (netdev == dev) {
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 13bbbde00e68..8c0fb7f3a9a5 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -106,7 +106,7 @@ struct vlan_pcpu_stats {
 
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 
-extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
+extern struct net_device *__vlan_find_dev_deep_rcu(struct net_device *real_dev,
 					       __be16 vlan_proto, u16 vlan_id);
 extern struct net_device *vlan_dev_real_dev(const struct net_device *dev);
 extern u16 vlan_dev_vlan_id(const struct net_device *dev);
@@ -199,7 +199,7 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
 extern bool vlan_uses_dev(const struct net_device *dev);
 #else
 static inline struct net_device *
-__vlan_find_dev_deep(struct net_device *real_dev,
+__vlan_find_dev_deep_rcu(struct net_device *real_dev,
 		     __be16 vlan_proto, u16 vlan_id)
 {
 	return NULL;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 3c32bd257b73..9012b1c922b6 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -63,7 +63,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 }
 
 /* Must be invoked with rcu_read_lock. */
-struct net_device *__vlan_find_dev_deep(struct net_device *dev,
+struct net_device *__vlan_find_dev_deep_rcu(struct net_device *dev,
 					__be16 vlan_proto, u16 vlan_id)
 {
 	struct vlan_info *vlan_info = rcu_dereference(dev->vlan_info);
@@ -81,13 +81,13 @@ struct net_device *__vlan_find_dev_deep(struct net_device *dev,
 
 		upper_dev = netdev_master_upper_dev_get_rcu(dev);
 		if (upper_dev)
-			return __vlan_find_dev_deep(upper_dev,
+			return __vlan_find_dev_deep_rcu(upper_dev,
 						    vlan_proto, vlan_id);
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL(__vlan_find_dev_deep);
+EXPORT_SYMBOL(__vlan_find_dev_deep_rcu);
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 2acf7fa1fec6..a615264cf01a 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -535,7 +535,7 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
 	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
 		return br;
 
-	vlan = __vlan_find_dev_deep(br, skb->vlan_proto,
+	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
 				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);
 
 	return vlan ? vlan : br;
-- 
cgit 


From 907abd51012b9b0b0b687e97d5ebadbddbc682fe Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 3 Mar 2014 11:36:43 +0900
Subject: mmc: dw_mmc: remove unused member variable.

Since using the device-tree, didn't use the callback pointer.
So removed the unused callback pointer.
When the set_power callback is used, it should be added in future.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/dw_mmc.c  | 33 +++------------------------------
 include/linux/mmc/dw_mmc.h | 14 --------------
 2 files changed, 3 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 55c34cb702d4..81991eca5671 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -850,8 +850,6 @@ static void __dw_mci_start_request(struct dw_mci *host,
 	u32 cmdflags;
 
 	mrq = slot->mrq;
-	if (host->pdata->select_slot)
-		host->pdata->select_slot(slot->id);
 
 	host->cur_slot = slot;
 	host->mrq = mrq;
@@ -985,17 +983,11 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
-		/* Power up slot */
-		if (slot->host->pdata->setpower)
-			slot->host->pdata->setpower(slot->id, mmc->ocr_avail);
 		regs = mci_readl(slot->host, PWREN);
 		regs |= (1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
 		break;
 	case MMC_POWER_OFF:
-		/* Power down slot */
-		if (slot->host->pdata->setpower)
-			slot->host->pdata->setpower(slot->id, 0);
 		regs = mci_readl(slot->host, PWREN);
 		regs &= ~(1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
@@ -1009,13 +1001,10 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
 {
 	int read_only;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
-	struct dw_mci_board *brd = slot->host->pdata;
 
 	/* Use platform get_ro function, else try on board write protect */
 	if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
 		read_only = 0;
-	else if (brd->get_ro)
-		read_only = brd->get_ro(slot->id);
 	else if (gpio_is_valid(slot->wp_gpio))
 		read_only = gpio_get_value(slot->wp_gpio);
 	else
@@ -1039,8 +1028,6 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
 	/* Use platform get_cd function, else try onboard card detect */
 	if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
 		present = 1;
-	else if (brd->get_cd)
-		present = !brd->get_cd(slot->id);
 	else if (!IS_ERR_VALUE(gpio_cd))
 		present = gpio_cd;
 	else
@@ -2138,17 +2125,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 		mmc->f_max = freq[1];
 	}
 
-	if (host->pdata->get_ocr)
-		mmc->ocr_avail = host->pdata->get_ocr(id);
-	else
-		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-
-	/*
-	 * Start with slot power disabled, it will be enabled when a card
-	 * is detected.
-	 */
-	if (host->pdata->setpower)
-		host->pdata->setpower(id, 0);
+	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
@@ -2217,10 +2194,6 @@ err_setup_bus:
 
 static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
 {
-	/* Shutdown detect IRQ */
-	if (slot->host->pdata->exit)
-		slot->host->pdata->exit(id);
-
 	/* Debugfs stuff is cleaned up by mmc core */
 	mmc_remove_host(slot->mmc);
 	slot->host->slot[id] = NULL;
@@ -2395,9 +2368,9 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
-	if (!host->pdata->select_slot && host->pdata->num_slots > 1) {
+	if (host->pdata->num_slots > 1) {
 		dev_err(host->dev,
-			"Platform data must supply select_slot function\n");
+			"Platform data must supply num_slots.\n");
 		return -ENODEV;
 	}
 
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6ce7d2cd3c7a..babaea93bca6 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -248,20 +248,6 @@ struct dw_mci_board {
 	/* delay in mS before detecting cards after interrupt */
 	u32 detect_delay_ms;
 
-	int (*init)(u32 slot_id, irq_handler_t , void *);
-	int (*get_ro)(u32 slot_id);
-	int (*get_cd)(u32 slot_id);
-	int (*get_ocr)(u32 slot_id);
-	int (*get_bus_wd)(u32 slot_id);
-	/*
-	 * Enable power to selected slot and set voltage to desired level.
-	 * Voltage levels are specified using MMC_VDD_xxx defines defined
-	 * in linux/mmc/host.h file.
-	 */
-	void (*setpower)(u32 slot_id, u32 volt);
-	void (*exit)(u32 slot_id);
-	void (*select_slot)(u32 slot_id);
-
 	struct dw_mci_dma_ops *dma_ops;
 	struct dma_pdata *data;
 	struct block_settings *blk_settings;
-- 
cgit 


From cdc991790c51c693d0c347a5286af017826a5d01 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:07:35 +0900
Subject: mmc: drop the speed mode of card's state

Timing mode identifier has same role and can take the place
of speed mode. This change removes all related speed mode.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/bus.c                  |  8 ++++----
 drivers/mmc/core/core.c                 |  3 +--
 drivers/mmc/core/mmc.c                  | 11 +++--------
 drivers/mmc/core/sd.c                   | 16 +++-------------
 drivers/mmc/core/sd.h                   |  1 -
 drivers/mmc/core/sdio.c                 |  8 ++------
 drivers/net/wireless/rsi/rsi_91x_sdio.c |  4 +---
 include/linux/mmc/card.h                | 23 ++++++-----------------
 include/linux/mmc/host.h                | 23 +++++++++++++++++++++++
 9 files changed, 43 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 824644875d41..f37e9d6af84a 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -341,16 +341,16 @@ int mmc_add_card(struct mmc_card *card)
 	if (mmc_host_is_spi(card->host)) {
 		pr_info("%s: new %s%s%s card on SPI\n",
 			mmc_hostname(card->host),
-			mmc_card_highspeed(card) ? "high speed " : "",
-			mmc_card_ddr_mode(card) ? "DDR " : "",
+			mmc_card_hs(card) ? "high speed " : "",
+			mmc_card_ddr52(card) ? "DDR " : "",
 			type);
 	} else {
 		pr_info("%s: new %s%s%s%s%s card at address %04x\n",
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
-			(mmc_card_highspeed(card) ? "high speed " : ""),
+			(mmc_card_hs(card) ? "high speed " : ""),
 			(mmc_card_hs200(card) ? "HS200 " : ""),
-			mmc_card_ddr_mode(card) ? "DDR " : "",
+			mmc_card_ddr52(card) ? "DDR " : "",
 			uhs_bus_speed_mode, type, card->rca);
 	}
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index d97dff5fab62..02baa30653fa 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2192,7 +2192,7 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
 {
 	struct mmc_command cmd = {0};
 
-	if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
+	if (mmc_card_blockaddr(card) || mmc_card_ddr52(card))
 		return 0;
 
 	cmd.opcode = MMC_SET_BLOCKLEN;
@@ -2272,7 +2272,6 @@ static int mmc_do_hw_reset(struct mmc_host *host, int check)
 		}
 	}
 
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_DDR);
 	if (mmc_host_is_spi(host)) {
 		host->ios.chip_select = MMC_CS_HIGH;
 		host->ios.bus_mode = MMC_BUSMODE_PUSHPULL;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index fbcf93d81858..31220529e171 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1083,11 +1083,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		} else {
 			if (card->ext_csd.hs_max_dtr > 52000000 &&
 			    host->caps2 & MMC_CAP2_HS200) {
-				mmc_card_set_hs200(card);
 				mmc_set_timing(card->host,
 					       MMC_TIMING_MMC_HS200);
 			} else {
-				mmc_card_set_highspeed(card);
 				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
 			}
 		}
@@ -1098,10 +1096,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
+	if (mmc_card_hs(card) || mmc_card_hs200(card)) {
 		if (max_dtr > card->ext_csd.hs_max_dtr)
 			max_dtr = card->ext_csd.hs_max_dtr;
-		if (mmc_card_highspeed(card) && (max_dtr > 52000000))
+		if (mmc_card_hs(card) && (max_dtr > 52000000))
 			max_dtr = 52000000;
 	} else if (max_dtr > card->csd.max_dtr) {
 		max_dtr = card->csd.max_dtr;
@@ -1112,7 +1110,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Indicate DDR mode (if supported).
 	 */
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
 			&& (host->caps & MMC_CAP_1_8V_DDR))
 				ddr = MMC_1_8V_DDR_MODE;
@@ -1255,7 +1253,6 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 				if (err)
 					goto err;
 			}
-			mmc_card_set_ddr_mode(card);
 			mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52);
 			mmc_set_bus_width(card->host, bus_width);
 		}
@@ -1499,7 +1496,6 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 		err = mmc_sleep(host);
 	else if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 
 	if (!err) {
 		mmc_power_off(host);
@@ -1629,7 +1625,6 @@ static int mmc_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
 	mmc_claim_host(host);
 	ret = mmc_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index aef515755e5b..0c44510bf717 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -887,7 +887,7 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card)
 {
 	unsigned max_dtr = (unsigned int)-1;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		if (max_dtr > card->sw_caps.hs_max_dtr)
 			max_dtr = card->sw_caps.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -897,12 +897,6 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card)
 	return max_dtr;
 }
 
-void mmc_sd_go_highspeed(struct mmc_card *card)
-{
-	mmc_card_set_highspeed(card);
-	mmc_set_timing(card->host, MMC_TIMING_SD_HS);
-}
-
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -977,16 +971,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_sd_init_uhs_card(card);
 		if (err)
 			goto free_card;
-
-		/* Card is an ultra-high-speed card */
-		mmc_card_set_uhs(card);
 	} else {
 		/*
 		 * Attempt to change to high-speed (if supported)
 		 */
 		err = mmc_sd_switch_hs(card);
 		if (err > 0)
-			mmc_sd_go_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		else if (err)
 			goto free_card;
 
@@ -1081,7 +1072,7 @@ static int _mmc_sd_suspend(struct mmc_host *host)
 
 	if (!mmc_host_is_spi(host))
 		err = mmc_deselect_cards(host);
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
+
 	if (!err) {
 		mmc_power_off(host);
 		mmc_card_set_suspended(host->card);
@@ -1190,7 +1181,6 @@ static int mmc_sd_power_restore(struct mmc_host *host)
 {
 	int ret;
 
-	host->card->state &= ~MMC_STATE_HIGHSPEED;
 	mmc_claim_host(host);
 	ret = mmc_sd_init_card(host, host->card->ocr, host->card);
 	mmc_release_host(host);
diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h
index 4b34b24f3f76..aab824a9a7f3 100644
--- a/drivers/mmc/core/sd.h
+++ b/drivers/mmc/core/sd.h
@@ -12,6 +12,5 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
 	bool reinit);
 unsigned mmc_sd_get_max_clock(struct mmc_card *card);
 int mmc_sd_switch_hs(struct mmc_card *card);
-void mmc_sd_go_highspeed(struct mmc_card *card);
 
 #endif
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 9933e426bc36..e636d9e99e4a 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -363,7 +363,7 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card)
 {
 	unsigned max_dtr;
 
-	if (mmc_card_highspeed(card)) {
+	if (mmc_card_hs(card)) {
 		/*
 		 * The SDIO specification doesn't mention how
 		 * the CIS transfer speed register relates to
@@ -733,7 +733,6 @@ try_again:
 		mmc_set_clock(host, card->cis.max_dtr);
 
 		if (card->cccr.high_speed) {
-			mmc_card_set_highspeed(card);
 			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		}
 
@@ -792,16 +791,13 @@ try_again:
 		err = mmc_sdio_init_uhs_card(card);
 		if (err)
 			goto remove;
-
-		/* Card is an ultra-high-speed card */
-		mmc_card_set_uhs(card);
 	} else {
 		/*
 		 * Switch to high-speed (if supported).
 		 */
 		err = sdio_enable_hs(card);
 		if (err > 0)
-			mmc_sd_go_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_SD_HS);
 		else if (err)
 			goto remove;
 
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 2e39d38d6a9e..46e7af446f01 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -285,7 +285,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 		if (err) {
 			rsi_dbg(ERR_ZONE, "%s: CCCR speed reg read failed: %d\n",
 				__func__, err);
-			card->state &= ~MMC_STATE_HIGHSPEED;
 		} else {
 			err = rsi_cmd52writebyte(card,
 						 SDIO_CCCR_SPEED,
@@ -296,14 +295,13 @@ static void rsi_reset_card(struct sdio_func *pfunction)
 					__func__, err);
 				return;
 			}
-			mmc_card_set_highspeed(card);
 			host->ios.timing = MMC_TIMING_SD_HS;
 			host->ops->set_ios(host, &host->ios);
 		}
 	}
 
 	/* Set clock */
-	if (mmc_card_highspeed(card))
+	if (mmc_card_hs(card))
 		clock = 50000000;
 	else
 		clock = card->cis.max_dtr;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index aa7e57f60fb2..aadeaf155d0e 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -194,6 +194,7 @@ struct sdio_cis {
 };
 
 struct mmc_host;
+struct mmc_ios;
 struct sdio_func;
 struct sdio_func_tuple;
 
@@ -250,15 +251,11 @@ struct mmc_card {
 	unsigned int		state;		/* (our) card state */
 #define MMC_STATE_PRESENT	(1<<0)		/* present in sysfs */
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
-#define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
-#define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
-#define MMC_STATE_HIGHSPEED_DDR (1<<4)		/* card is in high speed mode */
-#define MMC_STATE_ULTRAHIGHSPEED (1<<5)		/* card is in ultra high speed mode */
-#define MMC_CARD_SDXC		(1<<6)		/* card is SDXC */
-#define MMC_CARD_REMOVED	(1<<7)		/* card has been removed */
-#define MMC_STATE_HIGHSPEED_200	(1<<8)		/* card is in HS200 mode */
-#define MMC_STATE_DOING_BKOPS	(1<<10)		/* card is doing BKOPS */
-#define MMC_STATE_SUSPENDED	(1<<11)		/* card is suspended */
+#define MMC_STATE_BLOCKADDR	(1<<2)		/* card uses block-addressing */
+#define MMC_CARD_SDXC		(1<<3)		/* card is SDXC */
+#define MMC_CARD_REMOVED	(1<<4)		/* card has been removed */
+#define MMC_STATE_DOING_BKOPS	(1<<5)		/* card is doing BKOPS */
+#define MMC_STATE_SUSPENDED	(1<<6)		/* card is suspended */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -418,11 +415,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 
 #define mmc_card_present(c)	((c)->state & MMC_STATE_PRESENT)
 #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
-#define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
-#define mmc_card_hs200(c)	((c)->state & MMC_STATE_HIGHSPEED_200)
 #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
-#define mmc_card_ddr_mode(c)	((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_uhs(c)		((c)->state & MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 #define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
@@ -430,11 +423,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
-#define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
-#define mmc_card_set_hs200(c)	((c)->state |= MMC_STATE_HIGHSPEED_200)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
-#define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
 #define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
 #define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
 #define mmc_card_set_doing_bkops(c)	((c)->state |= MMC_STATE_DOING_BKOPS)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0cf705c83998..a43853779799 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -17,6 +17,7 @@
 #include <linux/fault-inject.h>
 
 #include <linux/mmc/core.h>
+#include <linux/mmc/card.h>
 #include <linux/mmc/pm.h>
 
 struct mmc_ios {
@@ -478,4 +479,26 @@ static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
 	return host->ios.clock;
 }
 #endif
+
+static inline int mmc_card_hs(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_SD_HS ||
+		card->host->ios.timing == MMC_TIMING_MMC_HS;
+}
+
+static inline int mmc_card_uhs(struct mmc_card *card)
+{
+	return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 &&
+		card->host->ios.timing <= MMC_TIMING_UHS_DDR50;
+}
+
+static inline bool mmc_card_hs200(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_HS200;
+}
+
+static inline bool mmc_card_ddr52(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
+}
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit 


From 2415c0ef618b3cd95581c7f633cbab78b29b7ab0 Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:07:58 +0900
Subject: mmc: identify available device type to select

Device types which are supported by both host and device can be
identified when EXT_CSD is read. There is no need to check host's
capability anymore.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/mmc.c   | 72 ++++++++++++++++++++++++++----------------------
 include/linux/mmc/card.h |  2 +-
 include/linux/mmc/host.h |  6 ----
 include/linux/mmc/mmc.h  | 12 +++++---
 4 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 31220529e171..b5691fee9629 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -243,28 +243,46 @@ static void mmc_select_card_type(struct mmc_card *card)
 	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
 	u32 caps = host->caps, caps2 = host->caps2;
 	unsigned int hs_max_dtr = 0;
+	unsigned int avail_type = 0;
 
-	if (card_type & EXT_CSD_CARD_TYPE_26)
+	if (caps & MMC_CAP_MMC_HIGHSPEED &&
+	    card_type & EXT_CSD_CARD_TYPE_HS_26) {
 		hs_max_dtr = MMC_HIGH_26_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS_26;
+	}
 
 	if (caps & MMC_CAP_MMC_HIGHSPEED &&
-			card_type & EXT_CSD_CARD_TYPE_52)
+	    card_type & EXT_CSD_CARD_TYPE_HS_52) {
 		hs_max_dtr = MMC_HIGH_52_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS_52;
+	}
 
-	if ((caps & MMC_CAP_1_8V_DDR &&
-			card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) ||
-	    (caps & MMC_CAP_1_2V_DDR &&
-			card_type & EXT_CSD_CARD_TYPE_DDR_1_2V))
+	if (caps & MMC_CAP_1_8V_DDR &&
+	    card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) {
 		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_DDR_1_8V;
+	}
+
+	if (caps & MMC_CAP_1_2V_DDR &&
+	    card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+		hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_DDR_1_2V;
+	}
 
-	if ((caps2 & MMC_CAP2_HS200_1_8V_SDR &&
-			card_type & EXT_CSD_CARD_TYPE_SDR_1_8V) ||
-	    (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
-			card_type & EXT_CSD_CARD_TYPE_SDR_1_2V))
+	if (caps2 & MMC_CAP2_HS200_1_8V_SDR &&
+	    card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) {
 		hs_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V;
+	}
+
+	if (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
+	    card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) {
+		hs_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
+	}
 
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
-	card->ext_csd.card_type = card_type;
+	card->mmc_avail_type = avail_type;
 }
 
 /*
@@ -800,12 +818,10 @@ static int mmc_select_hs200(struct mmc_card *card)
 
 	host = card->host;
 
-	if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
-			host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
 		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
 
-	if (err && card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_8V &&
-			host->caps2 & MMC_CAP2_HS200_1_8V_SDR)
+	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
 		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
 
 	/* If fails try again during next card power cycle */
@@ -1064,10 +1080,9 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 */
 	if (card->ext_csd.hs_max_dtr != 0) {
 		err = 0;
-		if (card->ext_csd.hs_max_dtr > 52000000 &&
-		    host->caps2 & MMC_CAP2_HS200)
+		if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
 			err = mmc_select_hs200(card);
-		else if	(host->caps & MMC_CAP_MMC_HIGHSPEED)
+		else if	(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
 			err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 					EXT_CSD_HS_TIMING, 1,
 					card->ext_csd.generic_cmd6_time,
@@ -1081,13 +1096,11 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			       mmc_hostname(card->host));
 			err = 0;
 		} else {
-			if (card->ext_csd.hs_max_dtr > 52000000 &&
-			    host->caps2 & MMC_CAP2_HS200) {
+			if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
 				mmc_set_timing(card->host,
 					       MMC_TIMING_MMC_HS200);
-			} else {
+			else
 				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-			}
 		}
 	}
 
@@ -1110,14 +1123,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	/*
 	 * Indicate DDR mode (if supported).
 	 */
-	if (mmc_card_hs(card)) {
-		if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
-			&& (host->caps & MMC_CAP_1_8V_DDR))
-				ddr = MMC_1_8V_DDR_MODE;
-		else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
-			&& (host->caps & MMC_CAP_1_2V_DDR))
-				ddr = MMC_1_2V_DDR_MODE;
-	}
+	if (mmc_card_hs(card))
+		ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52;
 
 	/*
 	 * Indicate HS200 SDR mode (if supported).
@@ -1137,8 +1144,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		 * 3. set the clock to > 52Mhz <=200MHz and
 		 * 4. execute tuning for HS200
 		 */
-		if ((host->caps2 & MMC_CAP2_HS200) &&
-		    card->host->ops->execute_tuning) {
+		if (card->host->ops->execute_tuning) {
 			mmc_host_clk_hold(card->host);
 			err = card->host->ops->execute_tuning(card->host,
 				MMC_SEND_TUNING_BLOCK_HS200);
@@ -1247,7 +1253,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			 *
 			 * WARNING: eMMC rules are NOT the same as SD DDR
 			 */
-			if (ddr == MMC_1_2V_DDR_MODE) {
+			if (ddr & EXT_CSD_CARD_TYPE_DDR_1_2V) {
 				err = __mmc_set_signal_voltage(host,
 					MMC_SIGNAL_VOLTAGE_120);
 				if (err)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index aadeaf155d0e..fe31f8d89a03 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -68,7 +68,6 @@ struct mmc_ext_csd {
 #define MMC_HIGH_DDR_MAX_DTR	52000000
 #define MMC_HS200_MAX_DTR	200000000
 	unsigned int		sectors;
-	unsigned int		card_type;
 	unsigned int		hc_erase_size;		/* In sectors */
 	unsigned int		hc_erase_timeout;	/* In milliseconds */
 	unsigned int		sec_trim_mult;	/* Secure trim multiplier  */
@@ -298,6 +297,7 @@ struct mmc_card {
 	struct sdio_func_tuple	*tuples;	/* unknown common tuples */
 
 	unsigned int		sd_bus_speed;	/* Bus Speed Mode set for the card */
+	unsigned int		mmc_avail_type;	/* supported device type by both host and card */
 
 	struct dentry		*debugfs_root;
 	struct mmc_part	part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index a43853779799..6b1e9ee6ca10 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -62,12 +62,6 @@ struct mmc_ios {
 #define MMC_TIMING_MMC_DDR52	8
 #define MMC_TIMING_MMC_HS200	9
 
-#define MMC_SDR_MODE		0
-#define MMC_1_2V_DDR_MODE	1
-#define MMC_1_8V_DDR_MODE	2
-#define MMC_1_2V_SDR_MODE	3
-#define MMC_1_8V_SDR_MODE	4
-
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
 #define MMC_SIGNAL_VOLTAGE_330	0
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 50bcde3677ca..f734c0c64575 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -354,18 +354,22 @@ struct _mmc_csd {
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
 
-#define EXT_CSD_CARD_TYPE_26	(1<<0)	/* Card can run at 26MHz */
-#define EXT_CSD_CARD_TYPE_52	(1<<1)	/* Card can run at 52MHz */
 #define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_HS_26	(1<<0)	/* Card can run at 26MHz */
+#define EXT_CSD_CARD_TYPE_HS_52	(1<<1)	/* Card can run at 52MHz */
+#define EXT_CSD_CARD_TYPE_HS	(EXT_CSD_CARD_TYPE_HS_26 | \
+				 EXT_CSD_CARD_TYPE_HS_52)
 #define EXT_CSD_CARD_TYPE_DDR_1_8V  (1<<2)   /* Card can run at 52MHz */
 					     /* DDR mode @1.8V or 3V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_1_2V  (1<<3)   /* Card can run at 52MHz */
 					     /* DDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_DDR_52       (EXT_CSD_CARD_TYPE_DDR_1_8V  \
 					| EXT_CSD_CARD_TYPE_DDR_1_2V)
-#define EXT_CSD_CARD_TYPE_SDR_1_8V	(1<<4)	/* Card can run at 200MHz */
-#define EXT_CSD_CARD_TYPE_SDR_1_2V	(1<<5)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_8V	(1<<4)	/* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_2V	(1<<5)	/* Card can run at 200MHz */
 						/* SDR mode @1.2V I/O */
+#define EXT_CSD_CARD_TYPE_HS200		(EXT_CSD_CARD_TYPE_HS200_1_8V | \
+					 EXT_CSD_CARD_TYPE_HS200_1_2V)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
-- 
cgit 


From 577fb13199b11d8cd75609183649be4b5561243f Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:08:44 +0900
Subject: mmc: rework selection of bus speed mode

Current implementation for bus speed mode selection is too
complicated. This patch is to simplify the codes and remove
some duplicate parts.

The following changes are including:
* Adds functions for each mode selection(HS, HS-DDR, HS200 and etc)
* Rearranged the mode selection sequence with supported device type
* Adds maximum speed for HS200 mode(hs200_max_dtr)
* Adds field definition for HS_TIMING of EXT_CSD

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/debugfs.c |   2 +-
 drivers/mmc/core/mmc.c     | 431 ++++++++++++++++++++++++---------------------
 include/linux/mmc/card.h   |   1 +
 include/linux/mmc/mmc.h    |   4 +
 4 files changed, 238 insertions(+), 200 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 509229b48b55..1f730dbfaeea 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -139,7 +139,7 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 		str = "mmc DDR52";
 		break;
 	case MMC_TIMING_MMC_HS200:
-		str = "mmc high-speed SDR200";
+		str = "mmc HS200";
 		break;
 	default:
 		str = "invalid";
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 4538541ac5ab..bec6786efd19 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -242,7 +242,7 @@ static void mmc_select_card_type(struct mmc_card *card)
 	struct mmc_host *host = card->host;
 	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
 	u32 caps = host->caps, caps2 = host->caps2;
-	unsigned int hs_max_dtr = 0;
+	unsigned int hs_max_dtr = 0, hs200_max_dtr = 0;
 	unsigned int avail_type = 0;
 
 	if (caps & MMC_CAP_MMC_HIGHSPEED &&
@@ -271,17 +271,18 @@ static void mmc_select_card_type(struct mmc_card *card)
 
 	if (caps2 & MMC_CAP2_HS200_1_8V_SDR &&
 	    card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) {
-		hs_max_dtr = MMC_HS200_MAX_DTR;
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
 		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V;
 	}
 
 	if (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
 	    card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) {
-		hs_max_dtr = MMC_HS200_MAX_DTR;
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
 		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
 	}
 
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
+	card->ext_csd.hs200_max_dtr = hs200_max_dtr;
 	card->mmc_avail_type = avail_type;
 }
 
@@ -825,37 +826,46 @@ static int mmc_select_powerclass(struct mmc_card *card)
 }
 
 /*
- * Selects the desired buswidth and switch to the HS200 mode
- * if bus width set without error
+ * Set the bus speed for the selected speed mode.
  */
-static int mmc_select_hs200(struct mmc_card *card)
+static void mmc_set_bus_speed(struct mmc_card *card)
+{
+	unsigned int max_dtr = (unsigned int)-1;
+
+	if (mmc_card_hs200(card) && max_dtr > card->ext_csd.hs200_max_dtr)
+		max_dtr = card->ext_csd.hs200_max_dtr;
+	else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr)
+		max_dtr = card->ext_csd.hs_max_dtr;
+	else if (max_dtr > card->csd.max_dtr)
+		max_dtr = card->csd.max_dtr;
+
+	mmc_set_clock(card->host, max_dtr);
+}
+
+/*
+ * Select the bus width amoung 4-bit and 8-bit(SDR).
+ * If the bus width is changed successfully, return the selected width value.
+ * Zero is returned instead of error value if the wide width is not supported.
+ */
+static int mmc_select_bus_width(struct mmc_card *card)
 {
-	int idx, err = -EINVAL;
-	struct mmc_host *host;
 	static unsigned ext_csd_bits[] = {
-		EXT_CSD_BUS_WIDTH_4,
 		EXT_CSD_BUS_WIDTH_8,
+		EXT_CSD_BUS_WIDTH_4,
 	};
 	static unsigned bus_widths[] = {
-		MMC_BUS_WIDTH_4,
 		MMC_BUS_WIDTH_8,
+		MMC_BUS_WIDTH_4,
 	};
+	struct mmc_host *host = card->host;
+	unsigned idx, bus_width = 0;
+	int err = 0;
 
-	BUG_ON(!card);
-
-	host = card->host;
-
-	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
-		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
-
-	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
-		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
-
-	/* If fails try again during next card power cycle */
-	if (err)
-		goto err;
+	if ((card->csd.mmca_vsn < CSD_SPEC_VER_4) &&
+	    !(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
+		return 0;
 
-	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+	idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 0 : 1;
 
 	/*
 	 * Unlike SD, MMC cards dont have a configuration register to notify
@@ -863,8 +873,7 @@ static int mmc_select_hs200(struct mmc_card *card)
 	 * the supported bus width or compare the ext csd values of current
 	 * bus width and ext csd values of 1 bit mode read earlier.
 	 */
-	for (; idx >= 0; idx--) {
-
+	for (; idx < ARRAY_SIZE(bus_widths); idx++) {
 		/*
 		 * Host is capable of 8bit transfer, then switch
 		 * the device to work in 8bit transfer mode. If the
@@ -879,26 +888,201 @@ static int mmc_select_hs200(struct mmc_card *card)
 		if (err)
 			continue;
 
-		mmc_set_bus_width(card->host, bus_widths[idx]);
+		bus_width = bus_widths[idx];
+		mmc_set_bus_width(host, bus_width);
 
+		/*
+		 * If controller can't handle bus width test,
+		 * compare ext_csd previously read in 1 bit mode
+		 * against ext_csd at new bus width
+		 */
 		if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
-			err = mmc_compare_ext_csds(card, bus_widths[idx]);
+			err = mmc_compare_ext_csds(card, bus_width);
 		else
-			err = mmc_bus_test(card, bus_widths[idx]);
-		if (!err)
+			err = mmc_bus_test(card, bus_width);
+
+		if (!err) {
+			err = bus_width;
 			break;
+		} else {
+			pr_warn("%s: switch to bus width %d failed\n",
+				mmc_hostname(host), ext_csd_bits[idx]);
+		}
 	}
 
-	/* switch to HS200 mode if bus width set successfully */
+	return err;
+}
+
+/*
+ * Switch to the high-speed mode
+ */
+static int mmc_select_hs(struct mmc_card *card)
+{
+	int err;
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
 	if (!err)
+		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+
+	return err;
+}
+
+/*
+ * Activate wide bus and DDR if supported.
+ */
+static int mmc_select_hs_ddr(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	u32 bus_width, ext_csd_bits;
+	int err = 0;
+
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52))
+		return 0;
+
+	bus_width = host->ios.bus_width;
+	if (bus_width == MMC_BUS_WIDTH_1)
+		return 0;
+
+	ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+		EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			EXT_CSD_BUS_WIDTH,
+			ext_csd_bits,
+			card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_warn("%s: switch to bus width %d ddr failed\n",
+			mmc_hostname(host), 1 << bus_width);
+		return err;
+	}
+
+	/*
+	 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
+	 * signaling.
+	 *
+	 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
+	 *
+	 * 1.8V vccq at 3.3V core voltage (vcc) is not required
+	 * in the JEDEC spec for DDR.
+	 *
+	 * Do not force change in vccq since we are obviously
+	 * working and no change to vccq is needed.
+	 *
+	 * WARNING: eMMC rules are NOT the same as SD DDR
+	 */
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+		err = __mmc_set_signal_voltage(host,
+				MMC_SIGNAL_VOLTAGE_120);
+		if (err)
+			return err;
+	}
+
+	mmc_set_timing(host, MMC_TIMING_MMC_DDR52);
+
+	return err;
+}
+
+/*
+ * For device supporting HS200 mode, the following sequence
+ * should be done before executing the tuning process.
+ * 1. set the desired bus width(4-bit or 8-bit, 1-bit is not supported)
+ * 2. switch to HS200 mode
+ * 3. set the clock to > 52Mhz and <=200MHz
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = -EINVAL;
+
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
+		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
+
+	if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
+		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+
+	/* If fails try again during next card power cycle */
+	if (err)
+		goto err;
+
+	/*
+	 * Set the bus width(4 or 8) with host's support and
+	 * switch to HS200 mode if bus width is set successfully.
+	 */
+	err = mmc_select_bus_width(card);
+	if (!IS_ERR_VALUE(err)) {
 		err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_HS_TIMING, 2,
-				card->ext_csd.generic_cmd6_time,
-				true, true, true);
+				   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS200,
+				   card->ext_csd.generic_cmd6_time,
+				   true, true, true);
+		if (!err)
+			mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+	}
 err:
 	return err;
 }
 
+/*
+ * Activate High Speed or HS200 mode if supported.
+ */
+static int mmc_select_timing(struct mmc_card *card)
+{
+	int err = 0;
+
+	if ((card->csd.mmca_vsn < CSD_SPEC_VER_4 &&
+	     card->ext_csd.hs_max_dtr == 0))
+		goto bus_speed;
+
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
+		err = mmc_select_hs200(card);
+	else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
+		err = mmc_select_hs(card);
+
+	if (err && err != -EBADMSG)
+		return err;
+
+	if (err) {
+		pr_warn("%s: switch to %s failed\n",
+			mmc_card_hs(card) ? "high-speed" :
+			(mmc_card_hs200(card) ? "hs200" : ""),
+			mmc_hostname(card->host));
+		err = 0;
+	}
+
+bus_speed:
+	/*
+	 * Set the bus speed to the selected bus timing.
+	 * If timing is not selected, backward compatible is the default.
+	 */
+	mmc_set_bus_speed(card);
+	return err;
+}
+
+/*
+ * Execute tuning sequence to seek the proper bus operating
+ * conditions for HS200, which sends CMD21 to the device.
+ */
+static int mmc_hs200_tuning(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	if (host->ops->execute_tuning) {
+		mmc_host_clk_hold(host);
+		err = host->ops->execute_tuning(host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		mmc_host_clk_release(host);
+
+		if (err)
+			pr_warn("%s: tuning execution failed\n",
+				mmc_hostname(host));
+	}
+
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -909,9 +1093,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	struct mmc_card *oldcard)
 {
 	struct mmc_card *card;
-	int err, ddr = 0;
+	int err;
 	u32 cid[4];
-	unsigned int max_dtr;
 	u32 rocr;
 	u8 *ext_csd = NULL;
 
@@ -1103,173 +1286,23 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	}
 
 	/*
-	 * Activate high speed (if supported)
-	 */
-	if (card->ext_csd.hs_max_dtr != 0) {
-		err = 0;
-		if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
-			err = mmc_select_hs200(card);
-		else if	(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
-			err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					EXT_CSD_HS_TIMING, 1,
-					card->ext_csd.generic_cmd6_time,
-					true, true, true);
-
-		if (err && err != -EBADMSG)
-			goto free_card;
-
-		if (err) {
-			pr_warning("%s: switch to highspeed failed\n",
-			       mmc_hostname(card->host));
-			err = 0;
-		} else {
-			if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
-				mmc_set_timing(card->host,
-					       MMC_TIMING_MMC_HS200);
-			else
-				mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-		}
-	}
-
-	/*
-	 * Compute bus speed.
-	 */
-	max_dtr = (unsigned int)-1;
-
-	if (mmc_card_hs(card) || mmc_card_hs200(card)) {
-		if (max_dtr > card->ext_csd.hs_max_dtr)
-			max_dtr = card->ext_csd.hs_max_dtr;
-		if (mmc_card_hs(card) && (max_dtr > 52000000))
-			max_dtr = 52000000;
-	} else if (max_dtr > card->csd.max_dtr) {
-		max_dtr = card->csd.max_dtr;
-	}
-
-	mmc_set_clock(host, max_dtr);
-
-	/*
-	 * Indicate DDR mode (if supported).
+	 * Select timing interface
 	 */
-	if (mmc_card_hs(card))
-		ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52;
+	err = mmc_select_timing(card);
+	if (err)
+		goto free_card;
 
-	/*
-	 * Indicate HS200 SDR mode (if supported).
-	 */
 	if (mmc_card_hs200(card)) {
-		u32 ext_csd_bits;
-		u32 bus_width = card->host->ios.bus_width;
-
-		/*
-		 * For devices supporting HS200 mode, the bus width has
-		 * to be set before executing the tuning function. If
-		 * set before tuning, then device will respond with CRC
-		 * errors for responses on CMD line. So for HS200 the
-		 * sequence will be
-		 * 1. set bus width 4bit / 8 bit (1 bit not supported)
-		 * 2. switch to HS200 mode
-		 * 3. set the clock to > 52Mhz <=200MHz and
-		 * 4. execute tuning for HS200
-		 */
-		if (card->host->ops->execute_tuning) {
-			mmc_host_clk_hold(card->host);
-			err = card->host->ops->execute_tuning(card->host,
-				MMC_SEND_TUNING_BLOCK_HS200);
-			mmc_host_clk_release(card->host);
-		}
-		if (err) {
-			pr_warning("%s: tuning execution failed\n",
-				   mmc_hostname(card->host));
+		err = mmc_hs200_tuning(card);
+		if (err)
 			goto err;
-		}
-
-		ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
-				EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
-	}
-
-	/*
-	 * Activate wide bus and DDR (if supported).
-	 */
-	if (!mmc_card_hs200(card) &&
-	    (card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
-	    (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
-		static unsigned ext_csd_bits[][2] = {
-			{ EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
-			{ EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 },
-			{ EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 },
-		};
-		static unsigned bus_widths[] = {
-			MMC_BUS_WIDTH_8,
-			MMC_BUS_WIDTH_4,
-			MMC_BUS_WIDTH_1
-		};
-		unsigned idx, bus_width = 0;
-
-		if (host->caps & MMC_CAP_8_BIT_DATA)
-			idx = 0;
-		else
-			idx = 1;
-		for (; idx < ARRAY_SIZE(bus_widths); idx++) {
-			bus_width = bus_widths[idx];
-			if (bus_width == MMC_BUS_WIDTH_1)
-				ddr = 0; /* no DDR for 1-bit width */
-
-			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][0],
-					 card->ext_csd.generic_cmd6_time);
-			if (!err) {
-				mmc_set_bus_width(card->host, bus_width);
-
-				/*
-				 * If controller can't handle bus width test,
-				 * compare ext_csd previously read in 1 bit mode
-				 * against ext_csd at new bus width
-				 */
-				if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
-					err = mmc_compare_ext_csds(card,
-						bus_width);
-				else
-					err = mmc_bus_test(card, bus_width);
-				if (!err)
-					break;
-			}
-		}
-
-		if (!err && ddr) {
-			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-					 EXT_CSD_BUS_WIDTH,
-					 ext_csd_bits[idx][1],
-					 card->ext_csd.generic_cmd6_time);
-		}
-		if (err) {
-			pr_warning("%s: switch to bus width %d ddr %d "
-				"failed\n", mmc_hostname(card->host),
-				1 << bus_width, ddr);
-			goto free_card;
-		} else if (ddr) {
-			/*
-			 * eMMC cards can support 3.3V to 1.2V i/o (vccq)
-			 * signaling.
-			 *
-			 * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
-			 *
-			 * 1.8V vccq at 3.3V core voltage (vcc) is not required
-			 * in the JEDEC spec for DDR.
-			 *
-			 * Do not force change in vccq since we are obviously
-			 * working and no change to vccq is needed.
-			 *
-			 * WARNING: eMMC rules are NOT the same as SD DDR
-			 */
-			if (ddr & EXT_CSD_CARD_TYPE_DDR_1_2V) {
-				err = __mmc_set_signal_voltage(host,
-					MMC_SIGNAL_VOLTAGE_120);
-				if (err)
-					goto err;
-			}
-			mmc_set_timing(card->host, MMC_TIMING_MMC_DDR52);
-			mmc_set_bus_width(card->host, bus_width);
+	} else if (mmc_card_hs(card)) {
+		/* Select the desired bus width optionally */
+		err = mmc_select_bus_width(card);
+		if (!IS_ERR_VALUE(err)) {
+			err = mmc_select_hs_ddr(card);
+			if (err)
+				goto err;
 		}
 	}
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index fe31f8d89a03..176073692872 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -63,6 +63,7 @@ struct mmc_ext_csd {
 	unsigned int            power_off_longtime;     /* Units: ms */
 	u8			power_off_notification;	/* state */
 	unsigned int		hs_max_dtr;
+	unsigned int		hs200_max_dtr;
 #define MMC_HIGH_26_MAX_DTR	26000000
 #define MMC_HIGH_52_MAX_DTR	52000000
 #define MMC_HIGH_DDR_MAX_DTR	52000000
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index f734c0c64575..f429f13be433 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -377,6 +377,10 @@ struct _mmc_csd {
 #define EXT_CSD_DDR_BUS_WIDTH_4	5	/* Card is in 4 bit DDR mode */
 #define EXT_CSD_DDR_BUS_WIDTH_8	6	/* Card is in 8 bit DDR mode */
 
+#define EXT_CSD_TIMING_BC	0	/* Backwards compatility */
+#define EXT_CSD_TIMING_HS	1	/* High speed */
+#define EXT_CSD_TIMING_HS200	2	/* HS200 */
+
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
 #define EXT_CSD_SEC_GB_CL_EN	BIT(4)
-- 
cgit 


From 0a5b6438ee482696360bb013e67b8488f63d3e9e Mon Sep 17 00:00:00 2001
From: Seungwon Jeon <tgih.jun@samsung.com>
Date: Wed, 23 Apr 2014 17:14:58 +0900
Subject: mmc: add support for HS400 mode of eMMC5.0

This patch adds HS400 mode support for eMMC5.0 device.  HS400 mode is high
speed DDR interface timing from HS200.  Clock frequency is up to 200MHz
and only 8-bit bus width is supported. In addition, tuning process of
HS200 is required to synchronize the command response on the CMD line
because CMD input timing for HS400 mode is the same as HS200 mode.

Signed-off-by: Seungwon Jeon <tgih.jun@samsung.com>
Reviewed-by: Jackey Shen <jackey.shen@amd.com>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/bus.c     |  1 +
 drivers/mmc/core/debugfs.c |  3 ++
 drivers/mmc/core/mmc.c     | 98 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/mmc/card.h   |  1 +
 include/linux/mmc/host.h   | 14 +++++++
 include/linux/mmc/mmc.h    |  7 +++-
 6 files changed, 118 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index f37e9d6af84a..d2dbf02022bd 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -349,6 +349,7 @@ int mmc_add_card(struct mmc_card *card)
 			mmc_hostname(card->host),
 			mmc_card_uhs(card) ? "ultra high speed " :
 			(mmc_card_hs(card) ? "high speed " : ""),
+			mmc_card_hs400(card) ? "HS400 " :
 			(mmc_card_hs200(card) ? "HS200 " : ""),
 			mmc_card_ddr52(card) ? "DDR " : "",
 			uhs_bus_speed_mode, type, card->rca);
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 1f730dbfaeea..91eb16223246 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -141,6 +141,9 @@ static int mmc_ios_show(struct seq_file *s, void *data)
 	case MMC_TIMING_MMC_HS200:
 		str = "mmc HS200";
 		break;
+	case MMC_TIMING_MMC_HS400:
+		str = "mmc HS400";
+		break;
 	default:
 		str = "invalid";
 		break;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index bec6786efd19..793c6f7ddb04 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -240,7 +240,7 @@ static int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
 static void mmc_select_card_type(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
-	u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
+	u8 card_type = card->ext_csd.raw_card_type;
 	u32 caps = host->caps, caps2 = host->caps2;
 	unsigned int hs_max_dtr = 0, hs200_max_dtr = 0;
 	unsigned int avail_type = 0;
@@ -281,6 +281,18 @@ static void mmc_select_card_type(struct mmc_card *card)
 		avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
 	}
 
+	if (caps2 & MMC_CAP2_HS400_1_8V &&
+	    card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS400_1_8V;
+	}
+
+	if (caps2 & MMC_CAP2_HS400_1_2V &&
+	    card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) {
+		hs200_max_dtr = MMC_HS200_MAX_DTR;
+		avail_type |= EXT_CSD_CARD_TYPE_HS400_1_2V;
+	}
+
 	card->ext_csd.hs_max_dtr = hs_max_dtr;
 	card->ext_csd.hs200_max_dtr = hs200_max_dtr;
 	card->mmc_avail_type = avail_type;
@@ -499,6 +511,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 			ext_csd[EXT_CSD_PWR_CL_DDR_52_195];
 		card->ext_csd.raw_pwr_cl_ddr_52_360 =
 			ext_csd[EXT_CSD_PWR_CL_DDR_52_360];
+		card->ext_csd.raw_pwr_cl_ddr_200_360 =
+			ext_csd[EXT_CSD_PWR_CL_DDR_200_360];
 	}
 
 	if (card->ext_csd.rev >= 5) {
@@ -665,7 +679,10 @@ static int mmc_compare_ext_csds(struct mmc_card *card, unsigned bus_width)
 		(card->ext_csd.raw_pwr_cl_ddr_52_195 ==
 			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_195]) &&
 		(card->ext_csd.raw_pwr_cl_ddr_52_360 ==
-			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]));
+			bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]) &&
+		(card->ext_csd.raw_pwr_cl_ddr_200_360 ==
+			bw_ext_csd[EXT_CSD_PWR_CL_DDR_200_360]));
+
 	if (err)
 		err = -EINVAL;
 
@@ -768,7 +785,9 @@ static int __mmc_select_powerclass(struct mmc_card *card,
 				ext_csd->raw_pwr_cl_52_360 :
 				ext_csd->raw_pwr_cl_ddr_52_360;
 		else if (host->ios.clock <= MMC_HS200_MAX_DTR)
-			pwrclass_val = ext_csd->raw_pwr_cl_200_360;
+			pwrclass_val = (bus_width == EXT_CSD_DDR_BUS_WIDTH_8) ?
+				ext_csd->raw_pwr_cl_ddr_200_360 :
+				ext_csd->raw_pwr_cl_200_360;
 		break;
 	default:
 		pr_warning("%s: Voltage range not supported "
@@ -832,7 +851,8 @@ static void mmc_set_bus_speed(struct mmc_card *card)
 {
 	unsigned int max_dtr = (unsigned int)-1;
 
-	if (mmc_card_hs200(card) && max_dtr > card->ext_csd.hs200_max_dtr)
+	if ((mmc_card_hs200(card) || mmc_card_hs400(card)) &&
+	     max_dtr > card->ext_csd.hs200_max_dtr)
 		max_dtr = card->ext_csd.hs200_max_dtr;
 	else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr)
 		max_dtr = card->ext_csd.hs_max_dtr;
@@ -985,6 +1005,61 @@ static int mmc_select_hs_ddr(struct mmc_card *card)
 	return err;
 }
 
+static int mmc_select_hs400(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	/*
+	 * HS400 mode requires 8-bit bus width
+	 */
+	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	      host->ios.bus_width == MMC_BUS_WIDTH_8))
+		return 0;
+
+	/*
+	 * Before switching to dual data rate operation for HS400,
+	 * it is required to convert from HS200 mode to HS mode.
+	 */
+	mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+	mmc_set_bus_speed(card);
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
+	if (err) {
+		pr_warn("%s: switch to high-speed from hs200 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			 EXT_CSD_BUS_WIDTH,
+			 EXT_CSD_DDR_BUS_WIDTH_8,
+			 card->ext_csd.generic_cmd6_time);
+	if (err) {
+		pr_warn("%s: switch to bus width for hs400 failed, err:%d\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			   EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS400,
+			   card->ext_csd.generic_cmd6_time,
+			   true, true, true);
+	if (err) {
+		pr_warn("%s: switch to hs400 failed, err:%d\n",
+			 mmc_hostname(host), err);
+		return err;
+	}
+
+	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
+	mmc_set_bus_speed(card);
+
+	return 0;
+}
+
 /*
  * For device supporting HS200 mode, the following sequence
  * should be done before executing the tuning process.
@@ -1062,13 +1137,22 @@ bus_speed:
 
 /*
  * Execute tuning sequence to seek the proper bus operating
- * conditions for HS200, which sends CMD21 to the device.
+ * conditions for HS200 and HS400, which sends CMD21 to the device.
  */
 static int mmc_hs200_tuning(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
 	int err = 0;
 
+	/*
+	 * Timing should be adjusted to the HS400 target
+	 * operation frequency for tuning process
+	 */
+	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	    host->ios.bus_width == MMC_BUS_WIDTH_8)
+		if (host->ops->prepare_hs400_tuning)
+			host->ops->prepare_hs400_tuning(host, &host->ios);
+
 	if (host->ops->execute_tuning) {
 		mmc_host_clk_hold(host);
 		err = host->ops->execute_tuning(host,
@@ -1296,6 +1380,10 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_hs200_tuning(card);
 		if (err)
 			goto err;
+
+		err = mmc_select_hs400(card);
+		if (err)
+			goto err;
 	} else if (mmc_card_hs(card)) {
 		/* Select the desired bus width optionally */
 		err = mmc_select_bus_width(card);
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 176073692872..d424b9de3aff 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -110,6 +110,7 @@ struct mmc_ext_csd {
 	u8			raw_pwr_cl_200_360;	/* 237 */
 	u8			raw_pwr_cl_ddr_52_195;	/* 238 */
 	u8			raw_pwr_cl_ddr_52_360;	/* 239 */
+	u8			raw_pwr_cl_ddr_200_360;	/* 253 */
 	u8			raw_bkops_status;	/* 246 */
 	u8			raw_sectors[4];		/* 212 - 4 bytes */
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 6b1e9ee6ca10..183087374215 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -61,6 +61,7 @@ struct mmc_ios {
 #define MMC_TIMING_UHS_DDR50	7
 #define MMC_TIMING_MMC_DDR52	8
 #define MMC_TIMING_MMC_HS200	9
+#define MMC_TIMING_MMC_HS400	10
 
 	unsigned char	signal_voltage;		/* signalling voltage (1.8V or 3.3V) */
 
@@ -132,6 +133,9 @@ struct mmc_host_ops {
 
 	/* The tuning command opcode value is different for SD and eMMC cards */
 	int	(*execute_tuning)(struct mmc_host *host, u32 opcode);
+
+	/* Prepare HS400 target operating frequency depending host driver */
+	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
@@ -274,6 +278,10 @@ struct mmc_host {
 #define MMC_CAP2_PACKED_CMD	(MMC_CAP2_PACKED_RD | \
 				 MMC_CAP2_PACKED_WR)
 #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14)	/* Don't power up before scan */
+#define MMC_CAP2_HS400_1_8V	(1 << 15)	/* Can support HS400 1.8V */
+#define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
+#define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
+				 MMC_CAP2_HS400_1_2V)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -495,4 +503,10 @@ static inline bool mmc_card_ddr52(struct mmc_card *card)
 {
 	return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
 }
+
+static inline bool mmc_card_hs400(struct mmc_card *card)
+{
+	return card->host->ios.timing == MMC_TIMING_MMC_HS400;
+}
+
 #endif /* LINUX_MMC_HOST_H */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index f429f13be433..64ec963ed347 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -325,6 +325,7 @@ struct _mmc_csd {
 #define EXT_CSD_POWER_OFF_LONG_TIME	247	/* RO */
 #define EXT_CSD_GENERIC_CMD6_TIME	248	/* RO */
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
+#define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
 #define EXT_CSD_MAX_PACKED_WRITES	500	/* RO */
@@ -354,7 +355,6 @@ struct _mmc_csd {
 #define EXT_CSD_CMD_SET_SECURE		(1<<1)
 #define EXT_CSD_CMD_SET_CPSECURE	(1<<2)
 
-#define EXT_CSD_CARD_TYPE_MASK	0x3F	/* Mask out reserved bits */
 #define EXT_CSD_CARD_TYPE_HS_26	(1<<0)	/* Card can run at 26MHz */
 #define EXT_CSD_CARD_TYPE_HS_52	(1<<1)	/* Card can run at 52MHz */
 #define EXT_CSD_CARD_TYPE_HS	(EXT_CSD_CARD_TYPE_HS_26 | \
@@ -370,6 +370,10 @@ struct _mmc_csd {
 						/* SDR mode @1.2V I/O */
 #define EXT_CSD_CARD_TYPE_HS200		(EXT_CSD_CARD_TYPE_HS200_1_8V | \
 					 EXT_CSD_CARD_TYPE_HS200_1_2V)
+#define EXT_CSD_CARD_TYPE_HS400_1_8V	(1<<6)	/* Card can run at 200MHz DDR, 1.8V */
+#define EXT_CSD_CARD_TYPE_HS400_1_2V	(1<<7)	/* Card can run at 200MHz DDR, 1.2V */
+#define EXT_CSD_CARD_TYPE_HS400		(EXT_CSD_CARD_TYPE_HS400_1_8V | \
+					 EXT_CSD_CARD_TYPE_HS400_1_2V)
 
 #define EXT_CSD_BUS_WIDTH_1	0	/* Card is in 1 bit mode */
 #define EXT_CSD_BUS_WIDTH_4	1	/* Card is in 4 bit mode */
@@ -380,6 +384,7 @@ struct _mmc_csd {
 #define EXT_CSD_TIMING_BC	0	/* Backwards compatility */
 #define EXT_CSD_TIMING_HS	1	/* High speed */
 #define EXT_CSD_TIMING_HS200	2	/* HS200 */
+#define EXT_CSD_TIMING_HS400	3	/* HS400 */
 
 #define EXT_CSD_SEC_ER_EN	BIT(0)
 #define EXT_CSD_SEC_BD_BLK_EN	BIT(2)
-- 
cgit 


From 4d1f52f9a9f9a63371dba589093b3ae90fc80c3d Mon Sep 17 00:00:00 2001
From: Tim Kryger <tim.kryger@linaro.org>
Date: Tue, 6 May 2014 15:57:01 -0700
Subject: mmc: core: Improve support for deferred regulators

Callers of mmc_regulator_get_supply could benefit from knowing if either
of the regulators are present but not yet available.  Since callers do
not currently examine the return value, modify this function to return
zero or -EPROBE_DEFER if either regulator get returns the same.

Furthermore, since callers check vmmc/vqmmc using IS_ERR and can deal
with absent regulators, switch to devm_regulator_get_optional. This has
the added benefit of allowing this function to behave correctly even in
the !CONFIG_REGULATOR case such that the stub can be removed.

Signed-off-by: Tim Kryger <tim.kryger@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/core.c  | 31 +++++++++++++++++++------------
 include/linux/mmc/host.h |  8 ++------
 2 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 02baa30653fa..7dc0c85fdb60 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1314,31 +1314,38 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
 
+#endif /* CONFIG_REGULATOR */
+
 int mmc_regulator_get_supply(struct mmc_host *mmc)
 {
 	struct device *dev = mmc_dev(mmc);
-	struct regulator *supply;
 	int ret;
 
-	supply = devm_regulator_get(dev, "vmmc");
-	mmc->supply.vmmc = supply;
+	mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
 	mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc");
 
-	if (IS_ERR(supply))
-		return PTR_ERR(supply);
+	if (IS_ERR(mmc->supply.vmmc)) {
+		if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No vmmc regulator found\n");
+	} else {
+		ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
+		if (ret > 0)
+			mmc->ocr_avail = ret;
+		else
+			dev_warn(dev, "Failed getting OCR mask: %d\n", ret);
+	}
 
-	ret = mmc_regulator_get_ocrmask(supply);
-	if (ret > 0)
-		mmc->ocr_avail = ret;
-	else
-		dev_warn(mmc_dev(mmc), "Failed getting OCR mask: %d\n", ret);
+	if (IS_ERR(mmc->supply.vqmmc)) {
+		if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_info(dev, "No vqmmc regulator found\n");
+	}
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mmc_regulator_get_supply);
 
-#endif /* CONFIG_REGULATOR */
-
 /*
  * Mask off any voltages we don't support and select
  * the lowest voltage
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 183087374215..cd595275e118 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -402,7 +402,6 @@ int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
 			struct regulator *supply,
 			unsigned short vdd_bit);
-int mmc_regulator_get_supply(struct mmc_host *mmc);
 #else
 static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
 {
@@ -415,13 +414,10 @@ static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
 {
 	return 0;
 }
-
-static inline int mmc_regulator_get_supply(struct mmc_host *mmc)
-{
-	return 0;
-}
 #endif
 
+int mmc_regulator_get_supply(struct mmc_host *mmc);
+
 int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *);
 
 static inline int mmc_card_is_removable(struct mmc_host *host)
-- 
cgit 


From 725b418b43d2ddcb94b413cd25c74c1175d1c5f0 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 22 Apr 2014 15:11:41 +0200
Subject: clk: Fixup spacing in comments

- Remove spaces in front of TABs,
- Correct indentation for some CLK_* flag descriptions.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 include/linux/clk-provider.h | 88 ++++++++++++++++++++++----------------------
 1 file changed, 44 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 59e2eb58f555..397f98505bd4 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -40,14 +40,14 @@ struct dentry;
  * through the clk_* api.
  *
  * @prepare:	Prepare the clock for enabling. This must not return until
- * 		the clock is fully prepared, and it's safe to call clk_enable.
- * 		This callback is intended to allow clock implementations to
- * 		do any initialisation that may sleep. Called with
- * 		prepare_lock held.
+ *		the clock is fully prepared, and it's safe to call clk_enable.
+ *		This callback is intended to allow clock implementations to
+ *		do any initialisation that may sleep. Called with
+ *		prepare_lock held.
  *
  * @unprepare:	Release the clock from its prepared state. This will typically
- * 		undo any work done in the @prepare callback. Called with
- * 		prepare_lock held.
+ *		undo any work done in the @prepare callback. Called with
+ *		prepare_lock held.
  *
  * @is_prepared: Queries the hardware to determine if the clock is prepared.
  *		This function is allowed to sleep. Optional, if this op is not
@@ -58,16 +58,16 @@ struct dentry;
  *		Called with prepare mutex held. This function may sleep.
  *
  * @enable:	Enable the clock atomically. This must not return until the
- * 		clock is generating a valid clock signal, usable by consumer
- * 		devices. Called with enable_lock held. This function must not
- * 		sleep.
+ *		clock is generating a valid clock signal, usable by consumer
+ *		devices. Called with enable_lock held. This function must not
+ *		sleep.
  *
  * @disable:	Disable the clock atomically. Called with enable_lock held.
- * 		This function must not sleep.
+ *		This function must not sleep.
  *
  * @is_enabled:	Queries the hardware to determine if the clock is enabled.
- * 		This function must not sleep. Optional, if this op is not
- * 		set then the enable count will be used.
+ *		This function must not sleep. Optional, if this op is not
+ *		set then the enable count will be used.
  *
  * @disable_unused: Disable the clock atomically.  Only called from
  *		clk_disable_unused for gate clocks with special needs.
@@ -75,34 +75,34 @@ struct dentry;
  *		sleep.
  *
  * @recalc_rate	Recalculate the rate of this clock, by querying hardware. The
- * 		parent rate is an input parameter.  It is up to the caller to
- * 		ensure that the prepare_mutex is held across this call.
- * 		Returns the calculated rate.  Optional, but recommended - if
- * 		this op is not set then clock rate will be initialized to 0.
+ *		parent rate is an input parameter.  It is up to the caller to
+ *		ensure that the prepare_mutex is held across this call.
+ *		Returns the calculated rate.  Optional, but recommended - if
+ *		this op is not set then clock rate will be initialized to 0.
  *
  * @round_rate:	Given a target rate as input, returns the closest rate actually
- * 		supported by the clock.
+ *		supported by the clock.
  *
  * @determine_rate: Given a target rate as input, returns the closest rate
  *		actually supported by the clock, and optionally the parent clock
  *		that should be used to provide the clock rate.
  *
  * @get_parent:	Queries the hardware to determine the parent of a clock.  The
- * 		return value is a u8 which specifies the index corresponding to
- * 		the parent clock.  This index can be applied to either the
- * 		.parent_names or .parents arrays.  In short, this function
- * 		translates the parent value read from hardware into an array
- * 		index.  Currently only called when the clock is initialized by
- * 		__clk_init.  This callback is mandatory for clocks with
- * 		multiple parents.  It is optional (and unnecessary) for clocks
- * 		with 0 or 1 parents.
+ *		return value is a u8 which specifies the index corresponding to
+ *		the parent clock.  This index can be applied to either the
+ *		.parent_names or .parents arrays.  In short, this function
+ *		translates the parent value read from hardware into an array
+ *		index.  Currently only called when the clock is initialized by
+ *		__clk_init.  This callback is mandatory for clocks with
+ *		multiple parents.  It is optional (and unnecessary) for clocks
+ *		with 0 or 1 parents.
  *
  * @set_parent:	Change the input source of this clock; for clocks with multiple
- * 		possible parents specify a new parent by passing in the index
- * 		as a u8 corresponding to the parent in either the .parent_names
- * 		or .parents arrays.  This function in affect translates an
- * 		array index into the value programmed into the hardware.
- * 		Returns 0 on success, -EERROR otherwise.
+ *		possible parents specify a new parent by passing in the index
+ *		as a u8 corresponding to the parent in either the .parent_names
+ *		or .parents arrays.  This function in affect translates an
+ *		array index into the value programmed into the hardware.
+ *		Returns 0 on success, -EERROR otherwise.
  *
  * @set_rate:	Change the rate of this clock. The requested rate is specified
  *		by the second argument, which should typically be the return
@@ -254,12 +254,12 @@ void of_fixed_clk_setup(struct device_node *np);
  *
  * Flags:
  * CLK_GATE_SET_TO_DISABLE - by default this clock sets the bit at bit_idx to
- * 	enable the clock.  Setting this flag does the opposite: setting the bit
- * 	disable the clock and clearing it enables the clock
+ *	enable the clock.  Setting this flag does the opposite: setting the bit
+ *	disable the clock and clearing it enables the clock
  * CLK_GATE_HIWORD_MASK - The gate settings are only in lower 16-bit
- *   of this register, and mask of gate bits are in higher 16-bit of this
- *   register.  While setting the gate bits, higher 16-bit should also be
- *   updated to indicate changing gate bits.
+ *	of this register, and mask of gate bits are in higher 16-bit of this
+ *	register.  While setting the gate bits, higher 16-bit should also be
+ *	updated to indicate changing gate bits.
  */
 struct clk_gate {
 	struct clk_hw hw;
@@ -298,20 +298,20 @@ struct clk_div_table {
  *
  * Flags:
  * CLK_DIVIDER_ONE_BASED - by default the divisor is the value read from the
- * 	register plus one.  If CLK_DIVIDER_ONE_BASED is set then the divider is
- * 	the raw value read from the register, with the value of zero considered
+ *	register plus one.  If CLK_DIVIDER_ONE_BASED is set then the divider is
+ *	the raw value read from the register, with the value of zero considered
  *	invalid, unless CLK_DIVIDER_ALLOW_ZERO is set.
  * CLK_DIVIDER_POWER_OF_TWO - clock divisor is 2 raised to the value read from
- * 	the hardware register
+ *	the hardware register
  * CLK_DIVIDER_ALLOW_ZERO - Allow zero divisors.  For dividers which have
  *	CLK_DIVIDER_ONE_BASED set, it is possible to end up with a zero divisor.
  *	Some hardware implementations gracefully handle this case and allow a
  *	zero divisor by not modifying their input clock
  *	(divide by one / bypass).
  * CLK_DIVIDER_HIWORD_MASK - The divider settings are only in lower 16-bit
- *   of this register, and mask of divider bits are in higher 16-bit of this
- *   register.  While setting the divider bits, higher 16-bit should also be
- *   updated to indicate changing divider bits.
+ *	of this register, and mask of divider bits are in higher 16-bit of this
+ *	register.  While setting the divider bits, higher 16-bit should also be
+ *	updated to indicate changing divider bits.
  * CLK_DIVIDER_ROUND_CLOSEST - Makes the best calculated divider to be rounded
  *	to the closest integer instead of the up one.
  */
@@ -359,9 +359,9 @@ struct clk *clk_register_divider_table(struct device *dev, const char *name,
  * CLK_MUX_INDEX_ONE - register index starts at 1, not 0
  * CLK_MUX_INDEX_BIT - register index is a single bit (power of two)
  * CLK_MUX_HIWORD_MASK - The mux settings are only in lower 16-bit of this
- *   register, and mask of mux bits are in higher 16-bit of this register.
- *   While setting the mux bits, higher 16-bit should also be updated to
- *   indicate changing mux bits.
+ *	register, and mask of mux bits are in higher 16-bit of this register.
+ *	While setting the mux bits, higher 16-bit should also be updated to
+ *	indicate changing mux bits.
  */
 struct clk_mux {
 	struct clk_hw	hw;
-- 
cgit 


From 54e73016dd217be915ed83353d296f2a133d1ad5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 22 Apr 2014 15:11:42 +0200
Subject: clk: Improve clk_ops documentation

General:
  - Add parameter names to .round_rate() and .set_rate().

Documentation/clk.txt:
  - Add missing parameter for .set_rate(),
  - Add missing .debug_init().

include/linux/clk-provider.h:
  - Add parent rate documentation for .round_rate(),
  - Reorder documentation to match implementation order,
  - Add missing documentation for .init().

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 Documentation/clk.txt        | 16 +++++++++++-----
 include/linux/clk-provider.h | 44 +++++++++++++++++++++++++-------------------
 2 files changed, 36 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index c9c399af7c08..1fee72f4d331 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -68,21 +68,27 @@ the operations defined in clk.h:
 		int		(*is_enabled)(struct clk_hw *hw);
 		unsigned long	(*recalc_rate)(struct clk_hw *hw,
 						unsigned long parent_rate);
-		long		(*round_rate)(struct clk_hw *hw, unsigned long,
-						unsigned long *);
+		long		(*round_rate)(struct clk_hw *hw,
+						unsigned long rate,
+						unsigned long *parent_rate);
 		long		(*determine_rate)(struct clk_hw *hw,
 						unsigned long rate,
 						unsigned long *best_parent_rate,
 						struct clk **best_parent_clk);
 		int		(*set_parent)(struct clk_hw *hw, u8 index);
 		u8		(*get_parent)(struct clk_hw *hw);
-		int		(*set_rate)(struct clk_hw *hw, unsigned long);
+		int		(*set_rate)(struct clk_hw *hw,
+					    unsigned long rate,
+					    unsigned long parent_rate);
 		int		(*set_rate_and_parent)(struct clk_hw *hw,
 					    unsigned long rate,
-					    unsigned long parent_rate, u8 index);
+					    unsigned long parent_rate,
+					    u8 index);
 		unsigned long	(*recalc_accuracy)(struct clk_hw *hw,
-						   unsigned long parent_accuracy);
+						unsigned long parent_accuracy);
 		void		(*init)(struct clk_hw *hw);
+		int		(*debug_init)(struct clk_hw *hw,
+					      struct dentry *dentry);
 	};
 
 	Part 3 - hardware clk implementations
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 397f98505bd4..40809431641e 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -81,12 +81,20 @@ struct dentry;
  *		this op is not set then clock rate will be initialized to 0.
  *
  * @round_rate:	Given a target rate as input, returns the closest rate actually
- *		supported by the clock.
+ *		supported by the clock. The parent rate is an input/output
+ *		parameter.
  *
  * @determine_rate: Given a target rate as input, returns the closest rate
  *		actually supported by the clock, and optionally the parent clock
  *		that should be used to provide the clock rate.
  *
+ * @set_parent:	Change the input source of this clock; for clocks with multiple
+ *		possible parents specify a new parent by passing in the index
+ *		as a u8 corresponding to the parent in either the .parent_names
+ *		or .parents arrays.  This function in affect translates an
+ *		array index into the value programmed into the hardware.
+ *		Returns 0 on success, -EERROR otherwise.
+ *
  * @get_parent:	Queries the hardware to determine the parent of a clock.  The
  *		return value is a u8 which specifies the index corresponding to
  *		the parent clock.  This index can be applied to either the
@@ -97,26 +105,12 @@ struct dentry;
  *		multiple parents.  It is optional (and unnecessary) for clocks
  *		with 0 or 1 parents.
  *
- * @set_parent:	Change the input source of this clock; for clocks with multiple
- *		possible parents specify a new parent by passing in the index
- *		as a u8 corresponding to the parent in either the .parent_names
- *		or .parents arrays.  This function in affect translates an
- *		array index into the value programmed into the hardware.
- *		Returns 0 on success, -EERROR otherwise.
- *
  * @set_rate:	Change the rate of this clock. The requested rate is specified
  *		by the second argument, which should typically be the return
  *		of .round_rate call.  The third argument gives the parent rate
  *		which is likely helpful for most .set_rate implementation.
  *		Returns 0 on success, -EERROR otherwise.
  *
- * @recalc_accuracy: Recalculate the accuracy of this clock. The clock accuracy
- *		is expressed in ppb (parts per billion). The parent accuracy is
- *		an input parameter.
- *		Returns the calculated accuracy.  Optional - if	this op is not
- *		set then clock accuracy will be initialized to parent accuracy
- *		or 0 (perfect clock) if clock has no parent.
- *
  * @set_rate_and_parent: Change the rate and the parent of this clock. The
  *		requested rate is specified by the second argument, which
  *		should typically be the return of .round_rate call.  The
@@ -128,6 +122,18 @@ struct dentry;
  *		separately via calls to .set_parent and .set_rate.
  *		Returns 0 on success, -EERROR otherwise.
  *
+ * @recalc_accuracy: Recalculate the accuracy of this clock. The clock accuracy
+ *		is expressed in ppb (parts per billion). The parent accuracy is
+ *		an input parameter.
+ *		Returns the calculated accuracy.  Optional - if	this op is not
+ *		set then clock accuracy will be initialized to parent accuracy
+ *		or 0 (perfect clock) if clock has no parent.
+ *
+ * @init:	Perform platform-specific initialization magic.
+ *		This is not not used by any of the basic clock types.
+ *		Please consider other ways of solving initialization problems
+ *		before using this callback, as its use is discouraged.
+ *
  * @debug_init:	Set up type-specific debugfs entries for this clock.  This
  *		is called once, after the debugfs directory entry for this
  *		clock has been created.  The dentry pointer representing that
@@ -157,15 +163,15 @@ struct clk_ops {
 	void		(*disable_unused)(struct clk_hw *hw);
 	unsigned long	(*recalc_rate)(struct clk_hw *hw,
 					unsigned long parent_rate);
-	long		(*round_rate)(struct clk_hw *hw, unsigned long,
-					unsigned long *);
+	long		(*round_rate)(struct clk_hw *hw, unsigned long rate,
+					unsigned long *parent_rate);
 	long		(*determine_rate)(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
 					struct clk **best_parent_clk);
 	int		(*set_parent)(struct clk_hw *hw, u8 index);
 	u8		(*get_parent)(struct clk_hw *hw);
-	int		(*set_rate)(struct clk_hw *hw, unsigned long,
-				    unsigned long);
+	int		(*set_rate)(struct clk_hw *hw, unsigned long rate,
+				    unsigned long parent_rate);
 	int		(*set_rate_and_parent)(struct clk_hw *hw,
 				    unsigned long rate,
 				    unsigned long parent_rate, u8 index);
-- 
cgit 


From 7b42a997bfb93c6ae0709f34ec8e2860757804b5 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms+renesas@verge.net.au>
Date: Fri, 18 Apr 2014 08:05:50 +0900
Subject: clk: shmobile: r8a7779: Add clocks support

The R8A7779 SoC has several clocks that are too custom to be supported in a
generic driver. Those clocks are all fixed rate clocks with multiplier and
divisor set according to boot mode configuration.

Based on work for R-Car Gen2 SoCs by Laurent Pinchart.

Cc: devicetree@vger.kernel.org
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 .../bindings/clock/renesas,r8a7779-cpg-clocks.txt  |  27 ++++
 drivers/clk/shmobile/Makefile                      |   1 +
 drivers/clk/shmobile/clk-r8a7779.c                 | 180 +++++++++++++++++++++
 include/linux/clk/shmobile.h                       |   3 +
 4 files changed, 211 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
 create mode 100644 drivers/clk/shmobile/clk-r8a7779.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
new file mode 100644
index 000000000000..ed3c8cb12f4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
@@ -0,0 +1,27 @@
+* Renesas R8A7779 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the R8A7779. It includes one PLL and
+several fixed ratio dividers
+
+Required Properties:
+
+  - compatible: Must be "renesas,r8a7779-cpg-clocks"
+  - reg: Base address and length of the memory resource used by the CPG
+
+  - clocks: Reference to the parent clock
+  - #clock-cells: Must be 1
+  - clock-output-names: The names of the clocks. Supported clocks are "plla",
+    "z", "zs", "s", "s1", "p", "b", "out".
+
+
+Example
+-------
+
+	cpg_clocks: cpg_clocks@ffc80000 {
+		compatible = "renesas,r8a7779-cpg-clocks";
+		reg = <0 0xffc80000 0 0x30>;
+		clocks = <&extal_clk>;
+		#clock-cells = <1>;
+		clock-output-names = "plla", "z", "zs", "s", "s1", "p",
+		                     "b", "out";
+	};
diff --git a/drivers/clk/shmobile/Makefile b/drivers/clk/shmobile/Makefile
index 5404cb931ebf..bdf342daefa5 100644
--- a/drivers/clk/shmobile/Makefile
+++ b/drivers/clk/shmobile/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_ARCH_EMEV2)		+= clk-emev2.o
 obj-$(CONFIG_ARCH_R7S72100)		+= clk-rz.o
+obj-$(CONFIG_ARCH_R8A7779)		+= clk-r8a7779.o
 obj-$(CONFIG_ARCH_R8A7790)		+= clk-rcar-gen2.o
 obj-$(CONFIG_ARCH_R8A7791)		+= clk-rcar-gen2.o
 obj-$(CONFIG_ARCH_SHMOBILE_MULTI)	+= clk-div6.o
diff --git a/drivers/clk/shmobile/clk-r8a7779.c b/drivers/clk/shmobile/clk-r8a7779.c
new file mode 100644
index 000000000000..652ecacb6daf
--- /dev/null
+++ b/drivers/clk/shmobile/clk-r8a7779.c
@@ -0,0 +1,180 @@
+/*
+ * r8a7779 Core CPG Clocks
+ *
+ * Copyright (C) 2013, 2014 Horms Solutions Ltd.
+ *
+ * Contact: Simon Horman <horms@verge.net.au>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/clk/shmobile.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/spinlock.h>
+
+#include <dt-bindings/clock/r8a7779-clock.h>
+
+#define CPG_NUM_CLOCKS			(R8A7779_CLK_OUT + 1)
+
+struct r8a7779_cpg {
+	struct clk_onecell_data data;
+	spinlock_t lock;
+	void __iomem *reg;
+};
+
+/* -----------------------------------------------------------------------------
+ * CPG Clock Data
+ */
+
+/*
+ *		MD1 = 1			MD1 = 0
+ *		(PLLA = 1500)		(PLLA = 1600)
+ *		(MHz)			(MHz)
+ *------------------------------------------------+--------------------
+ * clkz		1000   (2/3)		800   (1/2)
+ * clkzs	 250   (1/6)		200   (1/8)
+ * clki		 750   (1/2)		800   (1/2)
+ * clks		 250   (1/6)		200   (1/8)
+ * clks1	 125   (1/12)		100   (1/16)
+ * clks3	 187.5 (1/8)		200   (1/8)
+ * clks4	  93.7 (1/16)		100   (1/16)
+ * clkp		  62.5 (1/24)		 50   (1/32)
+ * clkg		  62.5 (1/24)		 66.6 (1/24)
+ * clkb, CLKOUT
+ * (MD2 = 0)	  62.5 (1/24)		 66.6 (1/24)
+ * (MD2 = 1)	  41.6 (1/36)		 50   (1/32)
+ */
+
+#define CPG_CLK_CONFIG_INDEX(md)	(((md) & (BIT(2)|BIT(1))) >> 1)
+
+struct cpg_clk_config {
+	unsigned int z_mult;
+	unsigned int z_div;
+	unsigned int zs_and_s_div;
+	unsigned int s1_div;
+	unsigned int p_div;
+	unsigned int b_and_out_div;
+};
+
+static const struct cpg_clk_config cpg_clk_configs[4] __initconst = {
+	{ 1, 2, 8, 16, 32, 24 },
+	{ 2, 3, 6, 12, 24, 24 },
+	{ 1, 2, 8, 16, 32, 32 },
+	{ 2, 3, 6, 12, 24, 36 },
+};
+
+/*
+ *   MD		PLLA Ratio
+ * 12 11
+ *------------------------
+ * 0  0		x42
+ * 0  1		x48
+ * 1  0		x56
+ * 1  1		x64
+ */
+
+#define CPG_PLLA_MULT_INDEX(md)	(((md) & (BIT(12)|BIT(11))) >> 11)
+
+static const unsigned int cpg_plla_mult[4] __initconst = { 42, 48, 56, 64 };
+
+/* -----------------------------------------------------------------------------
+ * Initialization
+ */
+
+static u32 cpg_mode __initdata;
+
+static struct clk * __init
+r8a7779_cpg_register_clock(struct device_node *np, struct r8a7779_cpg *cpg,
+			   const struct cpg_clk_config *config,
+			   unsigned int plla_mult, const char *name)
+{
+	const char *parent_name = "plla";
+	unsigned int mult = 1;
+	unsigned int div = 1;
+
+	if (!strcmp(name, "plla")) {
+		parent_name = of_clk_get_parent_name(np, 0);
+		mult = plla_mult;
+	} else if (!strcmp(name, "z")) {
+		div = config->z_div;
+		mult = config->z_mult;
+	} else if (!strcmp(name, "zs") || !strcmp(name, "s")) {
+		div = config->zs_and_s_div;
+	} else if (!strcmp(name, "s1")) {
+		div = config->s1_div;
+	} else if (!strcmp(name, "p")) {
+		div = config->p_div;
+	} else if (!strcmp(name, "b") || !strcmp(name, "out")) {
+		div = config->b_and_out_div;
+	} else {
+		return ERR_PTR(-EINVAL);
+	}
+
+	return clk_register_fixed_factor(NULL, name, parent_name, 0, mult, div);
+}
+
+static void __init r8a7779_cpg_clocks_init(struct device_node *np)
+{
+	const struct cpg_clk_config *config;
+	struct r8a7779_cpg *cpg;
+	struct clk **clks;
+	unsigned int i, plla_mult;
+	int num_clks;
+
+	num_clks = of_property_count_strings(np, "clock-output-names");
+	if (num_clks < 0) {
+		pr_err("%s: failed to count clocks\n", __func__);
+		return;
+	}
+
+	cpg = kzalloc(sizeof(*cpg), GFP_KERNEL);
+	clks = kzalloc(CPG_NUM_CLOCKS * sizeof(*clks), GFP_KERNEL);
+	if (cpg == NULL || clks == NULL) {
+		/* We're leaking memory on purpose, there's no point in cleaning
+		 * up as the system won't boot anyway.
+		 */
+		return;
+	}
+
+	spin_lock_init(&cpg->lock);
+
+	cpg->data.clks = clks;
+	cpg->data.clk_num = num_clks;
+
+	config = &cpg_clk_configs[CPG_CLK_CONFIG_INDEX(cpg_mode)];
+	plla_mult = cpg_plla_mult[CPG_PLLA_MULT_INDEX(cpg_mode)];
+
+	for (i = 0; i < num_clks; ++i) {
+		const char *name;
+		struct clk *clk;
+
+		of_property_read_string_index(np, "clock-output-names", i,
+					      &name);
+
+		clk = r8a7779_cpg_register_clock(np, cpg, config,
+						 plla_mult, name);
+		if (IS_ERR(clk))
+			pr_err("%s: failed to register %s %s clock (%ld)\n",
+			       __func__, np->name, name, PTR_ERR(clk));
+		else
+			cpg->data.clks[i] = clk;
+	}
+
+	of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+}
+CLK_OF_DECLARE(r8a7779_cpg_clks, "renesas,r8a7779-cpg-clocks",
+	       r8a7779_cpg_clocks_init);
+
+void __init r8a7779_clocks_init(u32 mode)
+{
+	cpg_mode = mode;
+
+	of_clk_init(NULL);
+}
diff --git a/include/linux/clk/shmobile.h b/include/linux/clk/shmobile.h
index f9bf080a1123..9f8a14041dd5 100644
--- a/include/linux/clk/shmobile.h
+++ b/include/linux/clk/shmobile.h
@@ -1,7 +1,9 @@
 /*
  * Copyright 2013 Ideas On Board SPRL
+ * Copyright 2013, 2014 Horms Solutions Ltd.
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ * Contact: Simon Horman <horms@verge.net.au>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -14,6 +16,7 @@
 
 #include <linux/types.h>
 
+void r8a7779_clocks_init(u32 mode);
 void rcar_gen2_clocks_init(u32 mode);
 
 #endif
-- 
cgit 


From 59025887fb08a8b913605fb20f8a62eb0bb69b36 Mon Sep 17 00:00:00 2001
From: Vivek Gautam <gautam.vivek@samsung.com>
Date: Tue, 13 May 2014 15:30:16 +0530
Subject: phy: Add new Exynos5 USB 3.0 PHY driver

Add a new driver for the USB 3.0 PHY on Exynos5 series of SoCs.
The new driver uses the generic PHY framework and will interact
with DWC3 controller present on Exynos5 series of SoCs.

Also, created a new header file in linux/mfd/syscon/ for
Exynos5 SoCs and put the required PMU offset definitions
for the basic available PHYs.

Signed-off-by: Vivek Gautam <gautam.vivek@samsung.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/Kconfig                    |  11 +
 drivers/phy/Makefile                   |   1 +
 drivers/phy/phy-exynos5-usbdrd.c       | 644 +++++++++++++++++++++++++++++++++
 include/linux/mfd/syscon/exynos5-pmu.h |  44 +++
 4 files changed, 700 insertions(+)
 create mode 100644 drivers/phy/phy-exynos5-usbdrd.c
 create mode 100644 include/linux/mfd/syscon/exynos5-pmu.h

(limited to 'include/linux')

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 071b7633bf03..16a2f067c242 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -160,6 +160,17 @@ config PHY_EXYNOS5250_USB2
 	  particular SoC is compiled in the driver. In case of Exynos 5250 four
 	  phys are available - device, host, HSIC0 and HSIC.
 
+config PHY_EXYNOS5_USBDRD
+	tristate "Exynos5 SoC series USB DRD PHY driver"
+	depends on ARCH_EXYNOS5 && OF
+	depends on HAS_IOMEM
+	select GENERIC_PHY
+	select MFD_SYSCON
+	help
+	  Enable USB DRD PHY support for Exynos 5 SoC series.
+	  This driver provides PHY interface for USB 3.0 DRD controller
+	  present on Exynos5 SoC series.
+
 config PHY_XGENE
 	tristate "APM X-Gene 15Gbps PHY support"
 	depends on HAS_IOMEM && OF && (ARM64 || COMPILE_TEST)
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index 7728518572a4..b4f1d5770601 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -18,4 +18,5 @@ phy-exynos-usb2-y			+= phy-samsung-usb2.o
 phy-exynos-usb2-$(CONFIG_PHY_EXYNOS4210_USB2)	+= phy-exynos4210-usb2.o
 phy-exynos-usb2-$(CONFIG_PHY_EXYNOS4X12_USB2)	+= phy-exynos4x12-usb2.o
 phy-exynos-usb2-$(CONFIG_PHY_EXYNOS5250_USB2)	+= phy-exynos5250-usb2.o
+obj-$(CONFIG_PHY_EXYNOS5_USBDRD)	+= phy-exynos5-usbdrd.o
 obj-$(CONFIG_PHY_XGENE)			+= phy-xgene.o
diff --git a/drivers/phy/phy-exynos5-usbdrd.c b/drivers/phy/phy-exynos5-usbdrd.c
new file mode 100644
index 000000000000..8fcdd9434346
--- /dev/null
+++ b/drivers/phy/phy-exynos5-usbdrd.c
@@ -0,0 +1,644 @@
+/*
+ * Samsung EXYNOS5 SoC series USB DRD PHY driver
+ *
+ * Phy provider for USB 3.0 DRD controller on Exynos5 SoC series
+ *
+ * Copyright (C) 2014 Samsung Electronics Co., Ltd.
+ * Author: Vivek Gautam <gautam.vivek@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/mutex.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/exynos5-pmu.h>
+#include <linux/regmap.h>
+
+/* Exynos USB PHY registers */
+#define EXYNOS5_FSEL_9MHZ6		0x0
+#define EXYNOS5_FSEL_10MHZ		0x1
+#define EXYNOS5_FSEL_12MHZ		0x2
+#define EXYNOS5_FSEL_19MHZ2		0x3
+#define EXYNOS5_FSEL_20MHZ		0x4
+#define EXYNOS5_FSEL_24MHZ		0x5
+#define EXYNOS5_FSEL_50MHZ		0x7
+
+/* EXYNOS5: USB 3.0 DRD PHY registers */
+#define EXYNOS5_DRD_LINKSYSTEM			0x04
+
+#define LINKSYSTEM_FLADJ_MASK			(0x3f << 1)
+#define LINKSYSTEM_FLADJ(_x)			((_x) << 1)
+#define LINKSYSTEM_XHCI_VERSION_CONTROL		BIT(27)
+
+#define EXYNOS5_DRD_PHYUTMI			0x08
+
+#define PHYUTMI_OTGDISABLE			BIT(6)
+#define PHYUTMI_FORCESUSPEND			BIT(1)
+#define PHYUTMI_FORCESLEEP			BIT(0)
+
+#define EXYNOS5_DRD_PHYPIPE			0x0c
+
+#define EXYNOS5_DRD_PHYCLKRST			0x10
+
+#define PHYCLKRST_EN_UTMISUSPEND		BIT(31)
+
+#define PHYCLKRST_SSC_REFCLKSEL_MASK		(0xff << 23)
+#define PHYCLKRST_SSC_REFCLKSEL(_x)		((_x) << 23)
+
+#define PHYCLKRST_SSC_RANGE_MASK		(0x03 << 21)
+#define PHYCLKRST_SSC_RANGE(_x)			((_x) << 21)
+
+#define PHYCLKRST_SSC_EN			BIT(20)
+#define PHYCLKRST_REF_SSP_EN			BIT(19)
+#define PHYCLKRST_REF_CLKDIV2			BIT(18)
+
+#define PHYCLKRST_MPLL_MULTIPLIER_MASK		(0x7f << 11)
+#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF	(0x19 << 11)
+#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF	(0x32 << 11)
+#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF	(0x68 << 11)
+#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF	(0x7d << 11)
+#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF	(0x02 << 11)
+
+#define PHYCLKRST_FSEL_UTMI_MASK		(0x7 << 5)
+#define PHYCLKRST_FSEL_PIPE_MASK		(0x7 << 8)
+#define PHYCLKRST_FSEL(_x)			((_x) << 5)
+#define PHYCLKRST_FSEL_PAD_100MHZ		(0x27 << 5)
+#define PHYCLKRST_FSEL_PAD_24MHZ		(0x2a << 5)
+#define PHYCLKRST_FSEL_PAD_20MHZ		(0x31 << 5)
+#define PHYCLKRST_FSEL_PAD_19_2MHZ		(0x38 << 5)
+
+#define PHYCLKRST_RETENABLEN			BIT(4)
+
+#define PHYCLKRST_REFCLKSEL_MASK		(0x03 << 2)
+#define PHYCLKRST_REFCLKSEL_PAD_REFCLK		(0x2 << 2)
+#define PHYCLKRST_REFCLKSEL_EXT_REFCLK		(0x3 << 2)
+
+#define PHYCLKRST_PORTRESET			BIT(1)
+#define PHYCLKRST_COMMONONN			BIT(0)
+
+#define EXYNOS5_DRD_PHYREG0			0x14
+#define EXYNOS5_DRD_PHYREG1			0x18
+
+#define EXYNOS5_DRD_PHYPARAM0			0x1c
+
+#define PHYPARAM0_REF_USE_PAD			BIT(31)
+#define PHYPARAM0_REF_LOSLEVEL_MASK		(0x1f << 26)
+#define PHYPARAM0_REF_LOSLEVEL			(0x9 << 26)
+
+#define EXYNOS5_DRD_PHYPARAM1			0x20
+
+#define PHYPARAM1_PCS_TXDEEMPH_MASK		(0x1f << 0)
+#define PHYPARAM1_PCS_TXDEEMPH			(0x1c)
+
+#define EXYNOS5_DRD_PHYTERM			0x24
+
+#define EXYNOS5_DRD_PHYTEST			0x28
+
+#define PHYTEST_POWERDOWN_SSP			BIT(3)
+#define PHYTEST_POWERDOWN_HSP			BIT(2)
+
+#define EXYNOS5_DRD_PHYADP			0x2c
+
+#define EXYNOS5_DRD_PHYUTMICLKSEL		0x30
+
+#define PHYUTMICLKSEL_UTMI_CLKSEL		BIT(2)
+
+#define EXYNOS5_DRD_PHYRESUME			0x34
+#define EXYNOS5_DRD_LINKPORT			0x44
+
+#define KHZ	1000
+#define MHZ	(KHZ * KHZ)
+
+enum exynos5_usbdrd_phy_id {
+	EXYNOS5_DRDPHY_UTMI,
+	EXYNOS5_DRDPHY_PIPE3,
+	EXYNOS5_DRDPHYS_NUM,
+};
+
+struct phy_usb_instance;
+struct exynos5_usbdrd_phy;
+
+struct exynos5_usbdrd_phy_config {
+	u32 id;
+	void (*phy_isol)(struct phy_usb_instance *inst, u32 on);
+	void (*phy_init)(struct exynos5_usbdrd_phy *phy_drd);
+	unsigned int (*set_refclk)(struct phy_usb_instance *inst);
+};
+
+struct exynos5_usbdrd_phy_drvdata {
+	const struct exynos5_usbdrd_phy_config *phy_cfg;
+	u32 pmu_offset_usbdrd0_phy;
+	u32 pmu_offset_usbdrd1_phy;
+};
+
+/**
+ * struct exynos5_usbdrd_phy - driver data for USB 3.0 PHY
+ * @dev: pointer to device instance of this platform device
+ * @reg_phy: usb phy controller register memory base
+ * @clk: phy clock for register access
+ * @drv_data: pointer to SoC level driver data structure
+ * @phys[]: array for 'EXYNOS5_DRDPHYS_NUM' number of PHY
+ *	    instances each with its 'phy' and 'phy_cfg'.
+ * @extrefclk: frequency select settings when using 'separate
+ *	       reference clocks' for SS and HS operations
+ * @ref_clk: reference clock to PHY block from which PHY's
+ *	     operational clocks are derived
+ * @ref_rate: rate of above reference clock
+ */
+struct exynos5_usbdrd_phy {
+	struct device *dev;
+	void __iomem *reg_phy;
+	struct clk *clk;
+	const struct exynos5_usbdrd_phy_drvdata *drv_data;
+	struct phy_usb_instance {
+		struct phy *phy;
+		u32 index;
+		struct regmap *reg_pmu;
+		u32 pmu_offset;
+		const struct exynos5_usbdrd_phy_config *phy_cfg;
+	} phys[EXYNOS5_DRDPHYS_NUM];
+	u32 extrefclk;
+	struct clk *ref_clk;
+};
+
+static inline
+struct exynos5_usbdrd_phy *to_usbdrd_phy(struct phy_usb_instance *inst)
+{
+	return container_of((inst), struct exynos5_usbdrd_phy,
+			    phys[(inst)->index]);
+}
+
+/*
+ * exynos5_rate_to_clk() converts the supplied clock rate to the value that
+ * can be written to the phy register.
+ */
+static unsigned int exynos5_rate_to_clk(unsigned long rate, u32 *reg)
+{
+	/* EXYNOS5_FSEL_MASK */
+
+	switch (rate) {
+	case 9600 * KHZ:
+		*reg = EXYNOS5_FSEL_9MHZ6;
+		break;
+	case 10 * MHZ:
+		*reg = EXYNOS5_FSEL_10MHZ;
+		break;
+	case 12 * MHZ:
+		*reg = EXYNOS5_FSEL_12MHZ;
+		break;
+	case 19200 * KHZ:
+		*reg = EXYNOS5_FSEL_19MHZ2;
+		break;
+	case 20 * MHZ:
+		*reg = EXYNOS5_FSEL_20MHZ;
+		break;
+	case 24 * MHZ:
+		*reg = EXYNOS5_FSEL_24MHZ;
+		break;
+	case 50 * MHZ:
+		*reg = EXYNOS5_FSEL_50MHZ;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void exynos5_usbdrd_phy_isol(struct phy_usb_instance *inst,
+						unsigned int on)
+{
+	unsigned int val;
+
+	if (!inst->reg_pmu)
+		return;
+
+	val = on ? 0 : EXYNOS5_PHY_ENABLE;
+
+	regmap_update_bits(inst->reg_pmu, inst->pmu_offset,
+			   EXYNOS5_PHY_ENABLE, val);
+}
+
+/*
+ * Sets the pipe3 phy's clk as EXTREFCLK (XXTI) which is internal clock
+ * from clock core. Further sets multiplier values and spread spectrum
+ * clock settings for SuperSpeed operations.
+ */
+static unsigned int
+exynos5_usbdrd_pipe3_set_refclk(struct phy_usb_instance *inst)
+{
+	static u32 reg;
+	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+	/* restore any previous reference clock settings */
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+
+	/* Use EXTREFCLK as ref clock */
+	reg &= ~PHYCLKRST_REFCLKSEL_MASK;
+	reg |=	PHYCLKRST_REFCLKSEL_EXT_REFCLK;
+
+	/* FSEL settings corresponding to reference clock */
+	reg &= ~PHYCLKRST_FSEL_PIPE_MASK |
+		PHYCLKRST_MPLL_MULTIPLIER_MASK |
+		PHYCLKRST_SSC_REFCLKSEL_MASK;
+	switch (phy_drd->extrefclk) {
+	case EXYNOS5_FSEL_50MHZ:
+		reg |= (PHYCLKRST_MPLL_MULTIPLIER_50M_REF |
+			PHYCLKRST_SSC_REFCLKSEL(0x00));
+		break;
+	case EXYNOS5_FSEL_24MHZ:
+		reg |= (PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF |
+			PHYCLKRST_SSC_REFCLKSEL(0x88));
+		break;
+	case EXYNOS5_FSEL_20MHZ:
+		reg |= (PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF |
+			PHYCLKRST_SSC_REFCLKSEL(0x00));
+		break;
+	case EXYNOS5_FSEL_19MHZ2:
+		reg |= (PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF |
+			PHYCLKRST_SSC_REFCLKSEL(0x88));
+		break;
+	default:
+		dev_dbg(phy_drd->dev, "unsupported ref clk\n");
+		break;
+	}
+
+	return reg;
+}
+
+/*
+ * Sets the utmi phy's clk as EXTREFCLK (XXTI) which is internal clock
+ * from clock core. Further sets the FSEL values for HighSpeed operations.
+ */
+static unsigned int
+exynos5_usbdrd_utmi_set_refclk(struct phy_usb_instance *inst)
+{
+	static u32 reg;
+	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+	/* restore any previous reference clock settings */
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+
+	reg &= ~PHYCLKRST_REFCLKSEL_MASK;
+	reg |=	PHYCLKRST_REFCLKSEL_EXT_REFCLK;
+
+	reg &= ~PHYCLKRST_FSEL_UTMI_MASK |
+		PHYCLKRST_MPLL_MULTIPLIER_MASK |
+		PHYCLKRST_SSC_REFCLKSEL_MASK;
+	reg |= PHYCLKRST_FSEL(phy_drd->extrefclk);
+
+	return reg;
+}
+
+static void exynos5_usbdrd_pipe3_init(struct exynos5_usbdrd_phy *phy_drd)
+{
+	u32 reg;
+
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
+	/* Set Tx De-Emphasis level */
+	reg &= ~PHYPARAM1_PCS_TXDEEMPH_MASK;
+	reg |=	PHYPARAM1_PCS_TXDEEMPH;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
+
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
+	reg &= ~PHYTEST_POWERDOWN_SSP;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
+}
+
+static void exynos5_usbdrd_utmi_init(struct exynos5_usbdrd_phy *phy_drd)
+{
+	u32 reg;
+
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
+	/* Set Loss-of-Signal Detector sensitivity */
+	reg &= ~PHYPARAM0_REF_LOSLEVEL_MASK;
+	reg |=	PHYPARAM0_REF_LOSLEVEL;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
+
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
+	/* Set Tx De-Emphasis level */
+	reg &= ~PHYPARAM1_PCS_TXDEEMPH_MASK;
+	reg |=	PHYPARAM1_PCS_TXDEEMPH;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM1);
+
+	/* UTMI Power Control */
+	writel(PHYUTMI_OTGDISABLE, phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMI);
+
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
+	reg &= ~PHYTEST_POWERDOWN_HSP;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
+}
+
+static int exynos5_usbdrd_phy_init(struct phy *phy)
+{
+	int ret;
+	u32 reg;
+	struct phy_usb_instance *inst = phy_get_drvdata(phy);
+	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+	ret = clk_prepare_enable(phy_drd->clk);
+	if (ret)
+		return ret;
+
+	/* Reset USB 3.0 PHY */
+	writel(0x0, phy_drd->reg_phy + EXYNOS5_DRD_PHYREG0);
+	writel(0x0, phy_drd->reg_phy + EXYNOS5_DRD_PHYRESUME);
+
+	/*
+	 * Setting the Frame length Adj value[6:1] to default 0x20
+	 * See xHCI 1.0 spec, 5.2.4
+	 */
+	reg =	LINKSYSTEM_XHCI_VERSION_CONTROL |
+		LINKSYSTEM_FLADJ(0x20);
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_LINKSYSTEM);
+
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
+	/* Select PHY CLK source */
+	reg &= ~PHYPARAM0_REF_USE_PAD;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYPARAM0);
+
+	/* This bit must be set for both HS and SS operations */
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMICLKSEL);
+	reg |= PHYUTMICLKSEL_UTMI_CLKSEL;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMICLKSEL);
+
+	/* UTMI or PIPE3 specific init */
+	inst->phy_cfg->phy_init(phy_drd);
+
+	/* reference clock settings */
+	reg = inst->phy_cfg->set_refclk(inst);
+
+		/* Digital power supply in normal operating mode */
+	reg |=	PHYCLKRST_RETENABLEN |
+		/* Enable ref clock for SS function */
+		PHYCLKRST_REF_SSP_EN |
+		/* Enable spread spectrum */
+		PHYCLKRST_SSC_EN |
+		/* Power down HS Bias and PLL blocks in suspend mode */
+		PHYCLKRST_COMMONONN |
+		/* Reset the port */
+		PHYCLKRST_PORTRESET;
+
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+
+	udelay(10);
+
+	reg &= ~PHYCLKRST_PORTRESET;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+
+	clk_disable_unprepare(phy_drd->clk);
+
+	return 0;
+}
+
+static int exynos5_usbdrd_phy_exit(struct phy *phy)
+{
+	int ret;
+	u32 reg;
+	struct phy_usb_instance *inst = phy_get_drvdata(phy);
+	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+	ret = clk_prepare_enable(phy_drd->clk);
+	if (ret)
+		return ret;
+
+	reg =	PHYUTMI_OTGDISABLE |
+		PHYUTMI_FORCESUSPEND |
+		PHYUTMI_FORCESLEEP;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYUTMI);
+
+	/* Resetting the PHYCLKRST enable bits to reduce leakage current */
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+	reg &= ~(PHYCLKRST_REF_SSP_EN |
+		 PHYCLKRST_SSC_EN |
+		 PHYCLKRST_COMMONONN);
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYCLKRST);
+
+	/* Control PHYTEST to remove leakage current */
+	reg = readl(phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
+	reg |=	PHYTEST_POWERDOWN_SSP |
+		PHYTEST_POWERDOWN_HSP;
+	writel(reg, phy_drd->reg_phy + EXYNOS5_DRD_PHYTEST);
+
+	clk_disable_unprepare(phy_drd->clk);
+
+	return 0;
+}
+
+static int exynos5_usbdrd_phy_power_on(struct phy *phy)
+{
+	struct phy_usb_instance *inst = phy_get_drvdata(phy);
+	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+	dev_dbg(phy_drd->dev, "Request to power_on usbdrd_phy phy\n");
+
+	clk_prepare_enable(phy_drd->ref_clk);
+
+	/* Power-on PHY*/
+	inst->phy_cfg->phy_isol(inst, 0);
+
+	return 0;
+}
+
+static int exynos5_usbdrd_phy_power_off(struct phy *phy)
+{
+	struct phy_usb_instance *inst = phy_get_drvdata(phy);
+	struct exynos5_usbdrd_phy *phy_drd = to_usbdrd_phy(inst);
+
+	dev_dbg(phy_drd->dev, "Request to power_off usbdrd_phy phy\n");
+
+	/* Power-off the PHY */
+	inst->phy_cfg->phy_isol(inst, 1);
+
+	clk_disable_unprepare(phy_drd->ref_clk);
+
+	return 0;
+}
+
+static struct phy *exynos5_usbdrd_phy_xlate(struct device *dev,
+					struct of_phandle_args *args)
+{
+	struct exynos5_usbdrd_phy *phy_drd = dev_get_drvdata(dev);
+
+	if (WARN_ON(args->args[0] > EXYNOS5_DRDPHYS_NUM))
+		return ERR_PTR(-ENODEV);
+
+	return phy_drd->phys[args->args[0]].phy;
+}
+
+static struct phy_ops exynos5_usbdrd_phy_ops = {
+	.init		= exynos5_usbdrd_phy_init,
+	.exit		= exynos5_usbdrd_phy_exit,
+	.power_on	= exynos5_usbdrd_phy_power_on,
+	.power_off	= exynos5_usbdrd_phy_power_off,
+	.owner		= THIS_MODULE,
+};
+
+const struct exynos5_usbdrd_phy_config phy_cfg_exynos5[] = {
+	{
+		.id		= EXYNOS5_DRDPHY_UTMI,
+		.phy_isol	= exynos5_usbdrd_phy_isol,
+		.phy_init	= exynos5_usbdrd_utmi_init,
+		.set_refclk	= exynos5_usbdrd_utmi_set_refclk,
+	},
+	{
+		.id		= EXYNOS5_DRDPHY_PIPE3,
+		.phy_isol	= exynos5_usbdrd_phy_isol,
+		.phy_init	= exynos5_usbdrd_pipe3_init,
+		.set_refclk	= exynos5_usbdrd_pipe3_set_refclk,
+	},
+};
+
+const struct exynos5_usbdrd_phy_drvdata exynos5420_usbdrd_phy = {
+	.phy_cfg		= phy_cfg_exynos5,
+	.pmu_offset_usbdrd0_phy	= EXYNOS5_USBDRD_PHY_CONTROL,
+	.pmu_offset_usbdrd1_phy	= EXYNOS5420_USBDRD1_PHY_CONTROL,
+};
+
+const struct exynos5_usbdrd_phy_drvdata exynos5250_usbdrd_phy = {
+	.phy_cfg		= phy_cfg_exynos5,
+	.pmu_offset_usbdrd0_phy	= EXYNOS5_USBDRD_PHY_CONTROL,
+};
+
+static const struct of_device_id exynos5_usbdrd_phy_of_match[] = {
+	{
+		.compatible = "samsung,exynos5250-usbdrd-phy",
+		.data = &exynos5250_usbdrd_phy
+	}, {
+		.compatible = "samsung,exynos5420-usbdrd-phy",
+		.data = &exynos5420_usbdrd_phy
+	},
+	{ },
+};
+
+static int exynos5_usbdrd_phy_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct exynos5_usbdrd_phy *phy_drd;
+	struct phy_provider *phy_provider;
+	struct resource *res;
+	const struct of_device_id *match;
+	const struct exynos5_usbdrd_phy_drvdata *drv_data;
+	struct regmap *reg_pmu;
+	u32 pmu_offset;
+	unsigned long ref_rate;
+	int i, ret;
+	int channel;
+
+	phy_drd = devm_kzalloc(dev, sizeof(*phy_drd), GFP_KERNEL);
+	if (!phy_drd)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, phy_drd);
+	phy_drd->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	phy_drd->reg_phy = devm_ioremap_resource(dev, res);
+	if (IS_ERR(phy_drd->reg_phy))
+		return PTR_ERR(phy_drd->reg_phy);
+
+	match = of_match_node(exynos5_usbdrd_phy_of_match, pdev->dev.of_node);
+
+	drv_data = match->data;
+	phy_drd->drv_data = drv_data;
+
+	phy_drd->clk = devm_clk_get(dev, "phy");
+	if (IS_ERR(phy_drd->clk)) {
+		dev_err(dev, "Failed to get clock of phy controller\n");
+		return PTR_ERR(phy_drd->clk);
+	}
+
+	phy_drd->ref_clk = devm_clk_get(dev, "ref");
+	if (IS_ERR(phy_drd->ref_clk)) {
+		dev_err(dev, "Failed to get reference clock of usbdrd phy\n");
+		return PTR_ERR(phy_drd->ref_clk);
+	}
+	ref_rate = clk_get_rate(phy_drd->ref_clk);
+
+	ret = exynos5_rate_to_clk(ref_rate, &phy_drd->extrefclk);
+	if (ret) {
+		dev_err(phy_drd->dev, "Clock rate (%ld) not supported\n",
+			ref_rate);
+		return ret;
+	}
+
+	reg_pmu = syscon_regmap_lookup_by_phandle(dev->of_node,
+						   "samsung,pmu-syscon");
+	if (IS_ERR(reg_pmu)) {
+		dev_err(dev, "Failed to lookup PMU regmap\n");
+		return PTR_ERR(reg_pmu);
+	}
+
+	/*
+	 * Exynos5420 SoC has multiple channels for USB 3.0 PHY, with
+	 * each having separate power control registers.
+	 * 'channel' facilitates to set such registers.
+	 */
+	channel = of_alias_get_id(node, "usbdrdphy");
+	if (channel < 0)
+		dev_dbg(dev, "Not a multi-controller usbdrd phy\n");
+
+	switch (channel) {
+	case 1:
+		pmu_offset = phy_drd->drv_data->pmu_offset_usbdrd1_phy;
+		break;
+	case 0:
+	default:
+		pmu_offset = phy_drd->drv_data->pmu_offset_usbdrd0_phy;
+		break;
+	}
+
+	dev_vdbg(dev, "Creating usbdrd_phy phy\n");
+
+	for (i = 0; i < EXYNOS5_DRDPHYS_NUM; i++) {
+		struct phy *phy = devm_phy_create(dev, &exynos5_usbdrd_phy_ops,
+						  NULL);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "Failed to create usbdrd_phy phy\n");
+			return PTR_ERR(phy);
+		}
+
+		phy_drd->phys[i].phy = phy;
+		phy_drd->phys[i].index = i;
+		phy_drd->phys[i].reg_pmu = reg_pmu;
+		phy_drd->phys[i].pmu_offset = pmu_offset;
+		phy_drd->phys[i].phy_cfg = &drv_data->phy_cfg[i];
+		phy_set_drvdata(phy, &phy_drd->phys[i]);
+	}
+
+	phy_provider = devm_of_phy_provider_register(dev,
+						     exynos5_usbdrd_phy_xlate);
+	if (IS_ERR(phy_provider)) {
+		dev_err(phy_drd->dev, "Failed to register phy provider\n");
+		return PTR_ERR(phy_provider);
+	}
+
+	return 0;
+}
+
+static struct platform_driver exynos5_usb3drd_phy = {
+	.probe	= exynos5_usbdrd_phy_probe,
+	.driver = {
+		.of_match_table	= exynos5_usbdrd_phy_of_match,
+		.name		= "exynos5_usb3drd_phy",
+		.owner		= THIS_MODULE,
+	}
+};
+
+module_platform_driver(exynos5_usb3drd_phy);
+MODULE_DESCRIPTION("Samsung EXYNOS5 SoCs USB 3.0 DRD controller PHY driver");
+MODULE_AUTHOR("Vivek Gautam <gautam.vivek@samsung.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:exynos5_usb3drd_phy");
diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h
new file mode 100644
index 000000000000..00ef24bf6ede
--- /dev/null
+++ b/include/linux/mfd/syscon/exynos5-pmu.h
@@ -0,0 +1,44 @@
+/*
+ * Exynos5 SoC series Power Management Unit (PMU) register offsets
+ * and bit definitions.
+ *
+ * Copyright (C) 2014 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_
+#define _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_
+
+/* Exynos5 PMU register definitions */
+#define EXYNOS5_HDMI_PHY_CONTROL		(0x700)
+#define EXYNOS5_USBDRD_PHY_CONTROL		(0x704)
+
+/* Exynos5250 specific register definitions */
+#define EXYNOS5_USBHOST_PHY_CONTROL		(0x708)
+#define EXYNOS5_EFNAND_PHY_CONTROL		(0x70c)
+#define EXYNOS5_MIPI_PHY0_CONTROL		(0x710)
+#define EXYNOS5_MIPI_PHY1_CONTROL		(0x714)
+#define EXYNOS5_ADC_PHY_CONTROL			(0x718)
+#define EXYNOS5_MTCADC_PHY_CONTROL		(0x71c)
+#define EXYNOS5_DPTX_PHY_CONTROL		(0x720)
+#define EXYNOS5_SATA_PHY_CONTROL		(0x724)
+
+/* Exynos5420 specific register definitions */
+#define EXYNOS5420_USBDRD1_PHY_CONTROL		(0x708)
+#define EXYNOS5420_USBHOST_PHY_CONTROL		(0x70c)
+#define EXYNOS5420_MIPI_PHY0_CONTROL		(0x714)
+#define EXYNOS5420_MIPI_PHY1_CONTROL		(0x718)
+#define EXYNOS5420_MIPI_PHY2_CONTROL		(0x71c)
+#define EXYNOS5420_ADC_PHY_CONTROL		(0x720)
+#define EXYNOS5420_MTCADC_PHY_CONTROL		(0x724)
+#define EXYNOS5420_DPTX_PHY_CONTROL		(0x728)
+
+#define EXYNOS5_PHY_ENABLE			BIT(0)
+
+#define EXYNOS5_MIPI_PHY_S_RESETN		BIT(1)
+#define EXYNOS5_MIPI_PHY_M_RESETN		BIT(2)
+
+#endif /* _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ */
-- 
cgit 


From ec903c0c858e4963a9e0724bdcadfa837253341c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:11:01 -0400
Subject: cgroup: rename css_tryget*() to css_tryget_online*()

Unlike the more usual refcnting, what css_tryget() provides is the
distinction between online and offline csses instead of protection
against upping a refcnt which already reached zero.  cgroup is
planning to provide actual tryget which fails if the refcnt already
reached zero.  Let's rename the existing trygets so that they clearly
indicate that they're onliness.

I thought about keeping the existing names as-are and introducing new
names for the planned actual tryget; however, given that each
controller participates in the synchronization of the online state, it
seems worthwhile to make it explicit that these functions are about
on/offline state.

Rename css_tryget() to css_tryget_online() and css_tryget_from_dir()
to css_tryget_online_from_dir().  This is pure rename.

v2: cgroup_freezer grew new usages of css_tryget().  Update
    accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
---
 block/blk-cgroup.c      |  2 +-
 fs/bio.c                |  2 +-
 include/linux/cgroup.h  | 14 +++++++-------
 kernel/cgroup.c         | 40 ++++++++++++++++++++--------------------
 kernel/cgroup_freezer.c |  4 ++--
 kernel/cpuset.c         |  6 +++---
 kernel/events/core.c    |  3 ++-
 mm/hugetlb_cgroup.c     |  2 +-
 mm/memcontrol.c         | 46 ++++++++++++++++++++++++----------------------
 9 files changed, 61 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1039fb9ff5f5..9f5bce33e6fe 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -185,7 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 	lockdep_assert_held(q->queue_lock);
 
 	/* blkg holds a reference to blkcg */
-	if (!css_tryget(&blkcg->css)) {
+	if (!css_tryget_online(&blkcg->css)) {
 		ret = -EINVAL;
 		goto err_free_blkg;
 	}
diff --git a/fs/bio.c b/fs/bio.c
index 6f0362b77806..0608ef9422bd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1970,7 +1970,7 @@ int bio_associate_current(struct bio *bio)
 	/* associate blkcg if exists */
 	rcu_read_lock();
 	css = task_css(current, blkio_cgrp_id);
-	if (css && css_tryget(css))
+	if (css && css_tryget_online(css))
 		bio->bi_css = css;
 	rcu_read_unlock();
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index bde44618d8c2..c5f3684ef557 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -95,16 +95,16 @@ static inline void css_get(struct cgroup_subsys_state *css)
 }
 
 /**
- * css_tryget - try to obtain a reference on the specified css
+ * css_tryget_online - try to obtain a reference on the specified css if online
  * @css: target css
  *
- * Obtain a reference on @css if it's alive.  The caller naturally needs to
- * ensure that @css is accessible but doesn't have to be holding a
+ * Obtain a reference on @css if it's online.  The caller naturally needs
+ * to ensure that @css is accessible but doesn't have to be holding a
  * reference on it - IOW, RCU protected access is good enough for this
  * function.  Returns %true if a reference count was successfully obtained;
  * %false otherwise.
  */
-static inline bool css_tryget(struct cgroup_subsys_state *css)
+static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
 	if (css->flags & CSS_ROOT)
 		return true;
@@ -115,7 +115,7 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
  * css_put - put a css reference
  * @css: target css
  *
- * Put a reference obtained via css_get() and css_tryget().
+ * Put a reference obtained via css_get() and css_tryget_online().
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
@@ -905,8 +905,8 @@ void css_task_iter_end(struct css_task_iter *it);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
-struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
-						struct cgroup_subsys *ss);
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss);
 
 #else /* !CONFIG_CGROUPS */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7633703e9baf..671d8a6dae37 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3771,7 +3771,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 
 	/*
 	 * We aren't being called from kernfs and there's no guarantee on
-	 * @kn->priv's validity.  For this and css_tryget_from_dir(),
+	 * @kn->priv's validity.  For this and css_tryget_online_from_dir(),
 	 * @kn->priv is RCU safe.  Let's do the RCU dancing.
 	 */
 	rcu_read_lock();
@@ -4060,9 +4060,9 @@ err:
  *    Implemented in kill_css().
  *
  * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
- *    and thus css_tryget() is guaranteed to fail, the css can be offlined
- *    by invoking offline_css().  After offlining, the base ref is put.
- *    Implemented in css_killed_work_fn().
+ *    and thus css_tryget_online() is guaranteed to fail, the css can be
+ *    offlined by invoking offline_css().  After offlining, the base ref is
+ *    put.  Implemented in css_killed_work_fn().
  *
  * 3. When the percpu_ref reaches zero, the only possible remaining
  *    accessors are inside RCU read sections.  css_release() schedules the
@@ -4386,7 +4386,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget() is now guaranteed to fail.
+ * css_tryget_online() is now guaranteed to fail.
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
@@ -4398,8 +4398,8 @@ static void css_killed_work_fn(struct work_struct *work)
 	mutex_lock(&cgroup_mutex);
 
 	/*
-	 * css_tryget() is guaranteed to fail now.  Tell subsystems to
-	 * initate destruction.
+	 * css_tryget_online() is guaranteed to fail now.  Tell subsystems
+	 * to initate destruction.
 	 */
 	offline_css(css);
 
@@ -4440,8 +4440,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
  *
  * This function initiates destruction of @css by removing cgroup interface
  * files and putting its base reference.  ->css_offline() will be invoked
- * asynchronously once css_tryget() is guaranteed to fail and when the
- * reference count reaches zero, @css will be released.
+ * asynchronously once css_tryget_online() is guaranteed to fail and when
+ * the reference count reaches zero, @css will be released.
  */
 static void kill_css(struct cgroup_subsys_state *css)
 {
@@ -4462,7 +4462,7 @@ static void kill_css(struct cgroup_subsys_state *css)
 	/*
 	 * cgroup core guarantees that, by the time ->css_offline() is
 	 * invoked, no new css reference will be given out via
-	 * css_tryget().  We can't simply call percpu_ref_kill() and
+	 * css_tryget_online().  We can't simply call percpu_ref_kill() and
 	 * proceed to offlining css's because percpu_ref_kill() doesn't
 	 * guarantee that the ref is seen as killed on all CPUs on return.
 	 *
@@ -4478,9 +4478,9 @@ static void kill_css(struct cgroup_subsys_state *css)
  *
  * css's make use of percpu refcnts whose killing latency shouldn't be
  * exposed to userland and are RCU protected.  Also, cgroup core needs to
- * guarantee that css_tryget() won't succeed by the time ->css_offline() is
- * invoked.  To satisfy all the requirements, destruction is implemented in
- * the following two steps.
+ * guarantee that css_tryget_online() won't succeed by the time
+ * ->css_offline() is invoked.  To satisfy all the requirements,
+ * destruction is implemented in the following two steps.
  *
  * s1. Verify @cgrp can be destroyed and mark it dying.  Remove all
  *     userland visible parts and start killing the percpu refcnts of
@@ -4574,9 +4574,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	/*
 	 * There are two control paths which try to determine cgroup from
 	 * dentry without going through kernfs - cgroupstats_build() and
-	 * css_tryget_from_dir().  Those are supported by RCU protecting
-	 * clearing of cgrp->kn->priv backpointer, which should happen
-	 * after all files under it have been removed.
+	 * css_tryget_online_from_dir().  Those are supported by RCU
+	 * protecting clearing of cgrp->kn->priv backpointer, which should
+	 * happen after all files under it have been removed.
 	 */
 	kernfs_remove(cgrp->kn);	/* @cgrp has an extra ref on its kn */
 	RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL);
@@ -5173,7 +5173,7 @@ static int __init cgroup_disable(char *str)
 __setup("cgroup_disable=", cgroup_disable);
 
 /**
- * css_tryget_from_dir - get corresponding css from the dentry of a cgroup dir
+ * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
  * @dentry: directory dentry of interest
  * @ss: subsystem of interest
  *
@@ -5181,8 +5181,8 @@ __setup("cgroup_disable=", cgroup_disable);
  * to get the corresponding css and return it.  If such css doesn't exist
  * or can't be pinned, an ERR_PTR value is returned.
  */
-struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
-						struct cgroup_subsys *ss)
+struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
+						       struct cgroup_subsys *ss)
 {
 	struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
 	struct cgroup_subsys_state *css = NULL;
@@ -5204,7 +5204,7 @@ struct cgroup_subsys_state *css_tryget_from_dir(struct dentry *dentry,
 	if (cgrp)
 		css = cgroup_css(cgrp, ss);
 
-	if (!css || !css_tryget(css))
+	if (!css || !css_tryget_online(css))
 		css = ERR_PTR(-ENOENT);
 
 	rcu_read_unlock();
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 345628c78b5b..0398f7e9ac81 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -304,7 +304,7 @@ static int freezer_read(struct seq_file *m, void *v)
 
 	/* update states bottom-up */
 	css_for_each_descendant_post(pos, css) {
-		if (!css_tryget(pos))
+		if (!css_tryget_online(pos))
 			continue;
 		rcu_read_unlock();
 
@@ -404,7 +404,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 		struct freezer *pos_f = css_freezer(pos);
 		struct freezer *parent = parent_freezer(pos_f);
 
-		if (!css_tryget(pos))
+		if (!css_tryget_online(pos))
 			continue;
 		rcu_read_unlock();
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7c0e8da59e26..37ca0a5c226d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -872,7 +872,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root)
 				continue;
 			}
 		}
-		if (!css_tryget(&cp->css))
+		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
 
@@ -1108,7 +1108,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root)
 				continue;
 			}
 		}
-		if (!css_tryget(&cp->css))
+		if (!css_tryget_online(&cp->css))
 			continue;
 		rcu_read_unlock();
 
@@ -2153,7 +2153,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
 
 		rcu_read_lock();
 		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
-			if (cs == &top_cpuset || !css_tryget(&cs->css))
+			if (cs == &top_cpuset || !css_tryget_online(&cs->css))
 				continue;
 			rcu_read_unlock();
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f83a71a3e46d..077968d19b8a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -607,7 +607,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
 	if (!f.file)
 		return -EBADF;
 
-	css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
+	css = css_tryget_online_from_dir(f.file->f_dentry,
+					 &perf_event_cgrp_subsys);
 	if (IS_ERR(css)) {
 		ret = PTR_ERR(css);
 		goto out;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 595d7fd795e1..372f1adca491 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -181,7 +181,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 again:
 	rcu_read_lock();
 	h_cg = hugetlb_cgroup_from_task(current);
-	if (!css_tryget(&h_cg->css)) {
+	if (!css_tryget_online(&h_cg->css)) {
 		rcu_read_unlock();
 		goto again;
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3f82f69ef58..5cf3246314a2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -567,7 +567,8 @@ void sock_update_memcg(struct sock *sk)
 		memcg = mem_cgroup_from_task(current);
 		cg_proto = sk->sk_prot->proto_cgroup(memcg);
 		if (!mem_cgroup_is_root(memcg) &&
-		    memcg_proto_active(cg_proto) && css_tryget(&memcg->css)) {
+		    memcg_proto_active(cg_proto) &&
+		    css_tryget_online(&memcg->css)) {
 			sk->sk_cgrp = cg_proto;
 		}
 		rcu_read_unlock();
@@ -834,7 +835,7 @@ retry:
 	 */
 	__mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
 	if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
-		!css_tryget(&mz->memcg->css))
+	    !css_tryget_online(&mz->memcg->css))
 		goto retry;
 done:
 	return mz;
@@ -1076,7 +1077,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 		memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 		if (unlikely(!memcg))
 			memcg = root_mem_cgroup;
-	} while (!css_tryget(&memcg->css));
+	} while (!css_tryget_online(&memcg->css));
 	rcu_read_unlock();
 	return memcg;
 }
@@ -1113,7 +1114,8 @@ skip_node:
 	 */
 	if (next_css) {
 		if ((next_css == &root->css) ||
-		    ((next_css->flags & CSS_ONLINE) && css_tryget(next_css)))
+		    ((next_css->flags & CSS_ONLINE) &&
+		     css_tryget_online(next_css)))
 			return mem_cgroup_from_css(next_css);
 
 		prev_css = next_css;
@@ -1159,7 +1161,7 @@ mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
 		 * would be returned all the time.
 		 */
 		if (position && position != root &&
-				!css_tryget(&position->css))
+		    !css_tryget_online(&position->css))
 			position = NULL;
 	}
 	return position;
@@ -2785,9 +2787,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
 
 /*
  * A helper function to get mem_cgroup from ID. must be called under
- * rcu_read_lock().  The caller is responsible for calling css_tryget if
- * the mem_cgroup is used for charging. (dropping refcnt from swap can be
- * called against removed memcg.)
+ * rcu_read_lock().  The caller is responsible for calling
+ * css_tryget_online() if the mem_cgroup is used for charging. (dropping
+ * refcnt from swap can be called against removed memcg.)
  */
 static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 {
@@ -2810,14 +2812,14 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	lock_page_cgroup(pc);
 	if (PageCgroupUsed(pc)) {
 		memcg = pc->mem_cgroup;
-		if (memcg && !css_tryget(&memcg->css))
+		if (memcg && !css_tryget_online(&memcg->css))
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
 		id = lookup_swap_cgroup_id(ent);
 		rcu_read_lock();
 		memcg = mem_cgroup_lookup(id);
-		if (memcg && !css_tryget(&memcg->css))
+		if (memcg && !css_tryget_online(&memcg->css))
 			memcg = NULL;
 		rcu_read_unlock();
 	}
@@ -3473,7 +3475,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
 	}
 
 	/* The corresponding put will be done in the workqueue. */
-	if (!css_tryget(&memcg->css))
+	if (!css_tryget_online(&memcg->css))
 		goto out;
 	rcu_read_unlock();
 
@@ -4246,8 +4248,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
 		/*
-		 * We uncharge this because swap is freed.
-		 * This memcg can be obsolete one. We avoid calling css_tryget
+		 * We uncharge this because swap is freed.  This memcg can
+		 * be obsolete one. We avoid calling css_tryget_online().
 		 */
 		if (!mem_cgroup_is_root(memcg))
 			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
@@ -5840,10 +5842,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
 	 * which is then paired with css_put during uncharge resp. here.
 	 *
 	 * Although this might sound strange as this path is called from
-	 * css_offline() when the referencemight have dropped down to 0
-	 * and shouldn't be incremented anymore (css_tryget would fail)
-	 * we do not have other options because of the kmem allocations
-	 * lifetime.
+	 * css_offline() when the referencemight have dropped down to 0 and
+	 * shouldn't be incremented anymore (css_tryget_online() would
+	 * fail) we do not have other options because of the kmem
+	 * allocations lifetime.
 	 */
 	css_get(&memcg->css);
 
@@ -6051,8 +6053,8 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 	 * automatically removed on cgroup destruction but the removal is
 	 * asynchronous, so take an extra ref on @css.
 	 */
-	cfile_css = css_tryget_from_dir(cfile.file->f_dentry->d_parent,
-					&memory_cgrp_subsys);
+	cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+					       &memory_cgrp_subsys);
 	ret = -EINVAL;
 	if (IS_ERR(cfile_css))
 		goto out_put_cfile;
@@ -6496,7 +6498,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	/*
 	 * XXX: css_offline() would be where we should reparent all
 	 * memory to prepare the cgroup for destruction.  However,
-	 * memcg does not do css_tryget() and res_counter charging
+	 * memcg does not do css_tryget_online() and res_counter charging
 	 * under the same RCU lock region, which means that charging
 	 * could race with offlining.  Offlining only happens to
 	 * cgroups with no tasks in them but charges can show up
@@ -6510,9 +6512,9 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	 *                           lookup_swap_cgroup_id()
 	 *                           rcu_read_lock()
 	 *                           mem_cgroup_lookup()
-	 *                           css_tryget()
+	 *                           css_tryget_online()
 	 *                           rcu_read_unlock()
-	 * disable css_tryget()
+	 * disable css_tryget_online()
 	 * call_rcu()
 	 *   offline_css()
 	 *     reparent_charges()
-- 
cgit 


From b41686401e501430ffe93b575ef7959d2ecc6f2e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:21 -0400
Subject: cgroup: implement cftype->write()

During the recent conversion to kernfs, cftype's seq_file operations
are updated so that they are directly mapped to kernfs operations and
thus can fully access the associated kernfs and cgroup contexts;
however, write path hasn't seen similar updates and none of the
existing write operations has access to, for example, the associated
kernfs_open_file.

Let's introduce a new operation cftype->write() which maps directly to
the kernfs write operation and has access to all the arguments and
contexts.  This will replace ->write_string() and ->trigger() and ease
manipulation of kernfs active protection from cgroup file operations.

Two accessors - of_cft() and of_css() - are introduced to enable
accessing the associated cgroup context from cftype->write() which
only takes kernfs_open_file for the context information.  The
accessors for seq_file operations - seq_cft() and seq_css() - are
rewritten to wrap the of_ accessors.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 27 +++++++++++++++++++++++----
 kernel/cgroup.c        | 14 ++++++++------
 2 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c5f3684ef557..c5a170ca4a48 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -515,6 +515,15 @@ struct cftype {
 	 */
 	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
 
+	/*
+	 * write() is the generic write callback which maps directly to
+	 * kernfs write operation and overrides all other operations.
+	 * Maximum write size is determined by ->max_write_len.  Use
+	 * of_css/cft() to access the associated css and cft.
+	 */
+	ssize_t (*write)(struct kernfs_open_file *of,
+			 char *buf, size_t nbytes, loff_t off);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key	lockdep_key;
 #endif
@@ -552,14 +561,24 @@ static inline ino_t cgroup_ino(struct cgroup *cgrp)
 		return 0;
 }
 
-static inline struct cftype *seq_cft(struct seq_file *seq)
+/* cft/css accessors for cftype->write() operation */
+static inline struct cftype *of_cft(struct kernfs_open_file *of)
 {
-	struct kernfs_open_file *of = seq->private;
-
 	return of->kn->priv;
 }
 
-struct cgroup_subsys_state *seq_css(struct seq_file *seq);
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of);
+
+/* cft/css accessors for cftype->seq_*() operations */
+static inline struct cftype *seq_cft(struct seq_file *seq)
+{
+	return of_cft(seq->private);
+}
+
+static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+{
+	return of_css(seq->private);
+}
 
 /*
  * Name / path handling functions.  All are thin wrappers around the kernfs
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 671d8a6dae37..a16f91d12f4e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -283,11 +283,10 @@ static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 	return test_bit(CGRP_DEAD, &cgrp->flags);
 }
 
-struct cgroup_subsys_state *seq_css(struct seq_file *seq)
+struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 {
-	struct kernfs_open_file *of = seq->private;
 	struct cgroup *cgrp = of->kn->parent->priv;
-	struct cftype *cft = seq_cft(seq);
+	struct cftype *cft = of_cft(of);
 
 	/*
 	 * This is open and unprotected implementation of cgroup_css().
@@ -302,7 +301,7 @@ struct cgroup_subsys_state *seq_css(struct seq_file *seq)
 	else
 		return &cgrp->dummy_css;
 }
-EXPORT_SYMBOL_GPL(seq_css);
+EXPORT_SYMBOL_GPL(of_css);
 
 /**
  * cgroup_is_descendant - test ancestry
@@ -1035,8 +1034,8 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
 		mode |= S_IRUGO;
 
-	if (cft->write_u64 || cft->write_s64 || cft->write_string ||
-	    cft->trigger)
+	if (cft->write_u64 || cft->write_s64 || cft->write ||
+	    cft->write_string || cft->trigger)
 		mode |= S_IWUSR;
 
 	return mode;
@@ -2726,6 +2725,9 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 	struct cgroup_subsys_state *css;
 	int ret;
 
+	if (cft->write)
+		return cft->write(of, buf, nbytes, off);
+
 	/*
 	 * kernfs guarantees that a file isn't deleted with operations in
 	 * flight, which means that the matching css is and stays alive and
-- 
cgit 


From 451af504df0c62f695a69b83c250486e77c66378 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:21 -0400
Subject: cgroup: replace cftype->write_string() with cftype->write()

Convert all cftype->write_string() users to the new cftype->write()
which maps directly to kernfs write operation and has full access to
kernfs and cgroup contexts.  The conversions are mostly mechanical.

* @css and @cft are accessed using of_css() and of_cft() accessors
  respectively instead of being specified as arguments.

* Should return @nbytes on success instead of 0.

* @buf is not trimmed automatically.  Trim if necessary.  Note that
  blkcg and netprio don't need this as the parsers already handle
  whitespaces.

cftype->write_string() has no user left after the conversions and
removed.

While at it, remove unnecessary local variable @p in
cgroup_subtree_control_write() and stale comment about
CGROUP_LOCAL_BUFFER_SIZE in cgroup_freezer.c.

This patch doesn't introduce any visible behavior changes.

v2: netprio was missing from conversion.  Converted.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Aristeu Rozanski <arozansk@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: "David S. Miller" <davem@davemloft.net>
---
 block/blk-throttle.c      | 32 ++++++++++++++++----------------
 block/cfq-iosched.c       | 28 ++++++++++++++--------------
 include/linux/cgroup.h    | 10 +---------
 kernel/cgroup.c           | 38 +++++++++++++++++++-------------------
 kernel/cgroup_freezer.c   | 20 +++++++++-----------
 kernel/cpuset.c           | 16 +++++++++-------
 mm/hugetlb_cgroup.c       | 17 +++++++++--------
 mm/memcontrol.c           | 46 +++++++++++++++++++++++++---------------------
 net/core/netprio_cgroup.c | 12 ++++++------
 net/ipv4/tcp_memcontrol.c | 16 +++++++++-------
 security/device_cgroup.c  | 14 +++++++-------
 11 files changed, 124 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 033745cd7fba..5e8fd1bace98 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -1346,10 +1346,10 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
-		       const char *buf, bool is_u64)
+static ssize_t tg_set_conf(struct kernfs_open_file *of,
+			   char *buf, size_t nbytes, loff_t off, bool is_u64)
 {
-	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct blkg_conf_ctx ctx;
 	struct throtl_grp *tg;
 	struct throtl_service_queue *sq;
@@ -1368,9 +1368,9 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 		ctx.v = -1;
 
 	if (is_u64)
-		*(u64 *)((void *)tg + cft->private) = ctx.v;
+		*(u64 *)((void *)tg + of_cft(of)->private) = ctx.v;
 	else
-		*(unsigned int *)((void *)tg + cft->private) = ctx.v;
+		*(unsigned int *)((void *)tg + of_cft(of)->private) = ctx.v;
 
 	throtl_log(&tg->service_queue,
 		   "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
@@ -1404,19 +1404,19 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
 	}
 
 	blkg_conf_finish(&ctx);
-	return 0;
+	return nbytes;
 }
 
-static int tg_set_conf_u64(struct cgroup_subsys_state *css, struct cftype *cft,
-			   char *buf)
+static ssize_t tg_set_conf_u64(struct kernfs_open_file *of,
+			       char *buf, size_t nbytes, loff_t off)
 {
-	return tg_set_conf(css, cft, buf, true);
+	return tg_set_conf(of, buf, nbytes, off, true);
 }
 
-static int tg_set_conf_uint(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buf)
+static ssize_t tg_set_conf_uint(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	return tg_set_conf(css, cft, buf, false);
+	return tg_set_conf(of, buf, nbytes, off, false);
 }
 
 static struct cftype throtl_files[] = {
@@ -1424,25 +1424,25 @@ static struct cftype throtl_files[] = {
 		.name = "throttle.read_bps_device",
 		.private = offsetof(struct throtl_grp, bps[READ]),
 		.seq_show = tg_print_conf_u64,
-		.write_string = tg_set_conf_u64,
+		.write = tg_set_conf_u64,
 	},
 	{
 		.name = "throttle.write_bps_device",
 		.private = offsetof(struct throtl_grp, bps[WRITE]),
 		.seq_show = tg_print_conf_u64,
-		.write_string = tg_set_conf_u64,
+		.write = tg_set_conf_u64,
 	},
 	{
 		.name = "throttle.read_iops_device",
 		.private = offsetof(struct throtl_grp, iops[READ]),
 		.seq_show = tg_print_conf_uint,
-		.write_string = tg_set_conf_uint,
+		.write = tg_set_conf_uint,
 	},
 	{
 		.name = "throttle.write_iops_device",
 		.private = offsetof(struct throtl_grp, iops[WRITE]),
 		.seq_show = tg_print_conf_uint,
-		.write_string = tg_set_conf_uint,
+		.write = tg_set_conf_uint,
 	},
 	{
 		.name = "throttle.io_service_bytes",
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index e0985f1955e7..a73020b8c9af 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1670,11 +1670,11 @@ static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
-				    struct cftype *cft, const char *buf,
-				    bool is_leaf_weight)
+static ssize_t __cfqg_set_weight_device(struct kernfs_open_file *of,
+					char *buf, size_t nbytes, loff_t off,
+					bool is_leaf_weight)
 {
-	struct blkcg *blkcg = css_to_blkcg(css);
+	struct blkcg *blkcg = css_to_blkcg(of_css(of));
 	struct blkg_conf_ctx ctx;
 	struct cfq_group *cfqg;
 	int ret;
@@ -1697,19 +1697,19 @@ static int __cfqg_set_weight_device(struct cgroup_subsys_state *css,
 	}
 
 	blkg_conf_finish(&ctx);
-	return ret;
+	return ret ?: nbytes;
 }
 
-static int cfqg_set_weight_device(struct cgroup_subsys_state *css,
-				  struct cftype *cft, char *buf)
+static ssize_t cfqg_set_weight_device(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
 {
-	return __cfqg_set_weight_device(css, cft, buf, false);
+	return __cfqg_set_weight_device(of, buf, nbytes, off, false);
 }
 
-static int cfqg_set_leaf_weight_device(struct cgroup_subsys_state *css,
-				       struct cftype *cft, char *buf)
+static ssize_t cfqg_set_leaf_weight_device(struct kernfs_open_file *of,
+					   char *buf, size_t nbytes, loff_t off)
 {
-	return __cfqg_set_weight_device(css, cft, buf, true);
+	return __cfqg_set_weight_device(of, buf, nbytes, off, true);
 }
 
 static int __cfq_set_weight(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -1837,7 +1837,7 @@ static struct cftype cfq_blkcg_files[] = {
 		.name = "weight_device",
 		.flags = CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cfqg_print_leaf_weight_device,
-		.write_string = cfqg_set_leaf_weight_device,
+		.write = cfqg_set_leaf_weight_device,
 	},
 	{
 		.name = "weight",
@@ -1851,7 +1851,7 @@ static struct cftype cfq_blkcg_files[] = {
 		.name = "weight_device",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = cfqg_print_weight_device,
-		.write_string = cfqg_set_weight_device,
+		.write = cfqg_set_weight_device,
 	},
 	{
 		.name = "weight",
@@ -1863,7 +1863,7 @@ static struct cftype cfq_blkcg_files[] = {
 	{
 		.name = "leaf_weight_device",
 		.seq_show = cfqg_print_leaf_weight_device,
-		.write_string = cfqg_set_leaf_weight_device,
+		.write = cfqg_set_leaf_weight_device,
 	},
 	{
 		.name = "leaf_weight",
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c5a170ca4a48..aecdc84fe128 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -453,8 +453,7 @@ struct cftype {
 
 	/*
 	 * The maximum length of string, excluding trailing nul, that can
-	 * be passed to write_string.  If < PAGE_SIZE-1, PAGE_SIZE-1 is
-	 * assumed.
+	 * be passed to write.  If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed.
 	 */
 	size_t max_write_len;
 
@@ -500,13 +499,6 @@ struct cftype {
 	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
 			 s64 val);
 
-	/*
-	 * write_string() is passed a nul-terminated kernelspace
-	 * buffer of maximum length determined by max_write_len.
-	 * Returns 0 or -ve error code.
-	 */
-	int (*write_string)(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer);
 	/*
 	 * trigger() callback can be used to get some kick from the
 	 * userspace, when the actual string written is not important
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a16f91d12f4e..2a88ce7b24b6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1035,7 +1035,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 		mode |= S_IRUGO;
 
 	if (cft->write_u64 || cft->write_s64 || cft->write ||
-	    cft->write_string || cft->trigger)
+	    cft->trigger)
 		mode |= S_IWUSR;
 
 	return mode;
@@ -2352,20 +2352,21 @@ static int cgroup_procs_write(struct cgroup_subsys_state *css,
 	return attach_task_by_pid(css->cgroup, tgid, true);
 }
 
-static int cgroup_release_agent_write(struct cgroup_subsys_state *css,
-				      struct cftype *cft, char *buffer)
+static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
+					  char *buf, size_t nbytes, loff_t off)
 {
-	struct cgroup_root *root = css->cgroup->root;
+	struct cgroup *cgrp = of_css(of)->cgroup;
+	struct cgroup_root *root = cgrp->root;
 
 	BUILD_BUG_ON(sizeof(root->release_agent_path) < PATH_MAX);
-	if (!cgroup_lock_live_group(css->cgroup))
+	if (!cgroup_lock_live_group(cgrp))
 		return -ENODEV;
 	spin_lock(&release_agent_path_lock);
-	strlcpy(root->release_agent_path, buffer,
+	strlcpy(root->release_agent_path, strstrip(buf),
 		sizeof(root->release_agent_path));
 	spin_unlock(&release_agent_path_lock);
 	mutex_unlock(&cgroup_mutex);
-	return 0;
+	return nbytes;
 }
 
 static int cgroup_release_agent_show(struct seq_file *seq, void *v)
@@ -2530,21 +2531,22 @@ out_finish:
 }
 
 /* change the enabled child controllers for a cgroup in the default hierarchy */
-static int cgroup_subtree_control_write(struct cgroup_subsys_state *dummy_css,
-					struct cftype *cft, char *buffer)
+static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
 {
 	unsigned int enable = 0, disable = 0;
-	struct cgroup *cgrp = dummy_css->cgroup, *child;
+	struct cgroup *cgrp = of_css(of)->cgroup, *child;
 	struct cgroup_subsys *ss;
-	char *tok, *p;
+	char *tok;
 	int ssid, ret;
 
 	/*
 	 * Parse input - space separated list of subsystem names prefixed
 	 * with either + or -.
 	 */
-	p = buffer;
-	while ((tok = strsep(&p, " "))) {
+	buf = strstrip(buf);
+	while ((tok = strsep(&buf, " "))) {
 		if (tok[0] == '\0')
 			continue;
 		for_each_subsys(ss, ssid) {
@@ -2692,7 +2694,7 @@ out_unlock_tree:
 out_unbreak:
 	kernfs_unbreak_active_protection(cgrp->control_kn);
 	cgroup_put(cgrp);
-	return ret;
+	return ret ?: nbytes;
 
 err_undo_css:
 	cgrp->child_subsys_mask &= ~enable;
@@ -2738,9 +2740,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 	css = cgroup_css(cgrp, cft->ss);
 	rcu_read_unlock();
 
-	if (cft->write_string) {
-		ret = cft->write_string(css, cft, strstrip(buf));
-	} else if (cft->write_u64) {
+	if (cft->write_u64) {
 		unsigned long long v;
 		ret = kstrtoull(buf, 0, &v);
 		if (!ret)
@@ -3984,7 +3984,7 @@ static struct cftype cgroup_base_files[] = {
 		.name = "cgroup.subtree_control",
 		.flags = CFTYPE_ONLY_ON_DFL,
 		.seq_show = cgroup_subtree_control_show,
-		.write_string = cgroup_subtree_control_write,
+		.write = cgroup_subtree_control_write,
 	},
 	{
 		.name = "cgroup.populated",
@@ -4018,7 +4018,7 @@ static struct cftype cgroup_base_files[] = {
 		.name = "release_agent",
 		.flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
 		.seq_show = cgroup_release_agent_show,
-		.write_string = cgroup_release_agent_write,
+		.write = cgroup_release_agent_write,
 		.max_write_len = PATH_MAX - 1,
 	},
 	{ }	/* terminate */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 0398f7e9ac81..6b4e60e33a9a 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -73,10 +73,6 @@ bool cgroup_freezing(struct task_struct *task)
 	return ret;
 }
 
-/*
- * cgroups_write_string() limits the size of freezer state strings to
- * CGROUP_LOCAL_BUFFER_SIZE
- */
 static const char *freezer_state_strs(unsigned int state)
 {
 	if (state & CGROUP_FROZEN)
@@ -423,20 +419,22 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
 	mutex_unlock(&freezer_mutex);
 }
 
-static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			 char *buffer)
+static ssize_t freezer_write(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
 {
 	bool freeze;
 
-	if (strcmp(buffer, freezer_state_strs(0)) == 0)
+	buf = strstrip(buf);
+
+	if (strcmp(buf, freezer_state_strs(0)) == 0)
 		freeze = false;
-	else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0)
+	else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
 		freeze = true;
 	else
 		return -EINVAL;
 
-	freezer_change_state(css_freezer(css), freeze);
-	return 0;
+	freezer_change_state(css_freezer(of_css(of)), freeze);
+	return nbytes;
 }
 
 static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
@@ -460,7 +458,7 @@ static struct cftype files[] = {
 		.name = "state",
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = freezer_read,
-		.write_string = freezer_write,
+		.write = freezer_write,
 	},
 	{
 		.name = "self_freezing",
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 37ca0a5c226d..2f4b08b8db24 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1603,13 +1603,15 @@ out_unlock:
 /*
  * Common handling for a write to a "cpus" or "mems" file.
  */
-static int cpuset_write_resmask(struct cgroup_subsys_state *css,
-				struct cftype *cft, char *buf)
+static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
-	struct cpuset *cs = css_cs(css);
+	struct cpuset *cs = css_cs(of_css(of));
 	struct cpuset *trialcs;
 	int retval = -ENODEV;
 
+	buf = strstrip(buf);
+
 	/*
 	 * CPU or memory hotunplug may leave @cs w/o any execution
 	 * resources, in which case the hotplug code asynchronously updates
@@ -1633,7 +1635,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
 		goto out_unlock;
 	}
 
-	switch (cft->private) {
+	switch (of_cft(of)->private) {
 	case FILE_CPULIST:
 		retval = update_cpumask(cs, trialcs, buf);
 		break;
@@ -1648,7 +1650,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
 	free_trial_cpuset(trialcs);
 out_unlock:
 	mutex_unlock(&cpuset_mutex);
-	return retval;
+	return retval ?: nbytes;
 }
 
 /*
@@ -1750,7 +1752,7 @@ static struct cftype files[] = {
 	{
 		.name = "cpus",
 		.seq_show = cpuset_common_seq_show,
-		.write_string = cpuset_write_resmask,
+		.write = cpuset_write_resmask,
 		.max_write_len = (100U + 6 * NR_CPUS),
 		.private = FILE_CPULIST,
 	},
@@ -1758,7 +1760,7 @@ static struct cftype files[] = {
 	{
 		.name = "mems",
 		.seq_show = cpuset_common_seq_show,
-		.write_string = cpuset_write_resmask,
+		.write = cpuset_write_resmask,
 		.max_write_len = (100U + 6 * MAX_NUMNODES),
 		.private = FILE_MEMLIST,
 	},
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 372f1adca491..191de26b0148 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -253,15 +253,16 @@ static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 	return res_counter_read_u64(&h_cg->hugepage[idx], name);
 }
 
-static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
-				struct cftype *cft, char *buffer)
+static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
 	int idx, name, ret;
 	unsigned long long val;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	buf = strstrip(buf);
+	idx = MEMFILE_IDX(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_LIMIT:
@@ -271,7 +272,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
 			break;
 		}
 		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
@@ -280,7 +281,7 @@ static int hugetlb_cgroup_write(struct cgroup_subsys_state *css,
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
@@ -331,7 +332,7 @@ static void __init __hugetlb_cgroup_file_init(int idx)
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
 	cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
 	cft->read_u64 = hugetlb_cgroup_read_u64;
-	cft->write_string = hugetlb_cgroup_write;
+	cft->write = hugetlb_cgroup_write;
 
 	/* Add the usage file */
 	cft = &h->cgroup_files[1];
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5cf3246314a2..7098a43f7447 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5143,17 +5143,18 @@ static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
  * The user of this function is...
  * RES_LIMIT.
  */
-static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer)
+static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	enum res_type type;
 	int name;
 	unsigned long long val;
 	int ret;
 
-	type = MEMFILE_TYPE(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	buf = strstrip(buf);
+	type = MEMFILE_TYPE(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_LIMIT:
@@ -5162,7 +5163,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			break;
 		}
 		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		if (type == _MEM)
@@ -5175,7 +5176,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 			return -EINVAL;
 		break;
 	case RES_SOFT_LIMIT:
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		/*
@@ -5192,7 +5193,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 		ret = -EINVAL; /* should be BUG() ? */
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
@@ -5964,9 +5965,10 @@ static void memcg_event_ptable_queue_proc(struct file *file,
  * Input must be in format '<event_fd> <control_fd> <args>'.
  * Interpretation of args is defined by control file implementation.
  */
-static int memcg_write_event_control(struct cgroup_subsys_state *css,
-				     struct cftype *cft, char *buffer)
+static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
+					 char *buf, size_t nbytes, loff_t off)
 {
+	struct cgroup_subsys_state *css = of_css(of);
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 	struct mem_cgroup_event *event;
 	struct cgroup_subsys_state *cfile_css;
@@ -5977,15 +5979,17 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 	char *endp;
 	int ret;
 
-	efd = simple_strtoul(buffer, &endp, 10);
+	buf = strstrip(buf);
+
+	efd = simple_strtoul(buf, &endp, 10);
 	if (*endp != ' ')
 		return -EINVAL;
-	buffer = endp + 1;
+	buf = endp + 1;
 
-	cfd = simple_strtoul(buffer, &endp, 10);
+	cfd = simple_strtoul(buf, &endp, 10);
 	if ((*endp != ' ') && (*endp != '\0'))
 		return -EINVAL;
-	buffer = endp + 1;
+	buf = endp + 1;
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (!event)
@@ -6063,7 +6067,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 		goto out_put_cfile;
 	}
 
-	ret = event->register_event(memcg, event->eventfd, buffer);
+	ret = event->register_event(memcg, event->eventfd, buf);
 	if (ret)
 		goto out_put_css;
 
@@ -6076,7 +6080,7 @@ static int memcg_write_event_control(struct cgroup_subsys_state *css,
 	fdput(cfile);
 	fdput(efile);
 
-	return 0;
+	return nbytes;
 
 out_put_css:
 	css_put(css);
@@ -6107,13 +6111,13 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "soft_limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6138,7 +6142,7 @@ static struct cftype mem_cgroup_files[] = {
 	},
 	{
 		.name = "cgroup.event_control",		/* XXX: for compat */
-		.write_string = memcg_write_event_control,
+		.write = memcg_write_event_control,
 		.flags = CFTYPE_NO_PREFIX,
 		.mode = S_IWUGO,
 	},
@@ -6171,7 +6175,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "kmem.limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6217,7 +6221,7 @@ static struct cftype memsw_cgroup_files[] = {
 	{
 		.name = "memsw.limit_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
-		.write_string = mem_cgroup_write,
+		.write = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 3825f669147b..b990cefd906b 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -185,15 +185,15 @@ static int read_priomap(struct seq_file *sf, void *v)
 	return 0;
 }
 
-static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
-			 char *buffer)
+static ssize_t write_priomap(struct kernfs_open_file *of,
+			     char *buf, size_t nbytes, loff_t off)
 {
 	char devname[IFNAMSIZ + 1];
 	struct net_device *dev;
 	u32 prio;
 	int ret;
 
-	if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
+	if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
 		return -EINVAL;
 
 	dev = dev_get_by_name(&init_net, devname);
@@ -202,11 +202,11 @@ static int write_priomap(struct cgroup_subsys_state *css, struct cftype *cft,
 
 	rtnl_lock();
 
-	ret = netprio_set_prio(css, dev, prio);
+	ret = netprio_set_prio(of_css(of), dev, prio);
 
 	rtnl_unlock();
 	dev_put(dev);
-	return ret;
+	return ret ?: nbytes;
 }
 
 static int update_netprio(const void *v, struct file *file, unsigned n)
@@ -239,7 +239,7 @@ static struct cftype ss_files[] = {
 	{
 		.name = "ifpriomap",
 		.seq_show = read_priomap,
-		.write_string = write_priomap,
+		.write = write_priomap,
 	},
 	{ }	/* terminate */
 };
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index d4f015ad6c84..841fd3fa937a 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -102,17 +102,19 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
 	return 0;
 }
 
-static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
-			    char *buffer)
+static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	unsigned long long val;
 	int ret = 0;
 
-	switch (cft->private) {
+	buf = strstrip(buf);
+
+	switch (of_cft(of)->private) {
 	case RES_LIMIT:
 		/* see memcontrol.c */
-		ret = res_counter_memparse_write_strategy(buffer, &val);
+		ret = res_counter_memparse_write_strategy(buf, &val);
 		if (ret)
 			break;
 		ret = tcp_update_limit(memcg, val);
@@ -121,7 +123,7 @@ static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
@@ -193,7 +195,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 static struct cftype tcp_files[] = {
 	{
 		.name = "kmem.tcp.limit_in_bytes",
-		.write_string = tcp_cgroup_write,
+		.write = tcp_cgroup_write,
 		.read_u64 = tcp_cgroup_read,
 		.private = RES_LIMIT,
 	},
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 9134dbf70d3e..7dbac4061b1c 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -767,27 +767,27 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	return rc;
 }
 
-static int devcgroup_access_write(struct cgroup_subsys_state *css,
-				  struct cftype *cft, char *buffer)
+static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
+				      char *buf, size_t nbytes, loff_t off)
 {
 	int retval;
 
 	mutex_lock(&devcgroup_mutex);
-	retval = devcgroup_update_access(css_to_devcgroup(css),
-					 cft->private, buffer);
+	retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
+					 of_cft(of)->private, strstrip(buf));
 	mutex_unlock(&devcgroup_mutex);
-	return retval;
+	return retval ?: nbytes;
 }
 
 static struct cftype dev_cgroup_files[] = {
 	{
 		.name = "allow",
-		.write_string  = devcgroup_access_write,
+		.write = devcgroup_access_write,
 		.private = DEVCG_ALLOW,
 	},
 	{
 		.name = "deny",
-		.write_string = devcgroup_access_write,
+		.write = devcgroup_access_write,
 		.private = DEVCG_DENY,
 	},
 	{
-- 
cgit 


From 6770c64e5c8da4705d1f0973bdeb5c2bf4f3a404 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:21 -0400
Subject: cgroup: replace cftype->trigger() with cftype->write()

cftype->trigger() is pointless.  It's trivial to ignore the input
buffer from a regular ->write() operation.  Convert all ->trigger()
users to ->write() and remove ->trigger().

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
---
 include/linux/cgroup.h    |  8 --------
 kernel/cgroup.c           |  5 +----
 mm/hugetlb_cgroup.c       | 16 ++++++++--------
 mm/memcontrol.c           | 34 ++++++++++++++++++----------------
 net/ipv4/tcp_memcontrol.c | 15 ++++++++-------
 5 files changed, 35 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aecdc84fe128..08eb71ee600b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -499,14 +499,6 @@ struct cftype {
 	int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft,
 			 s64 val);
 
-	/*
-	 * trigger() callback can be used to get some kick from the
-	 * userspace, when the actual string written is not important
-	 * at all. The private field can be used to determine the
-	 * kick type for multiplexing.
-	 */
-	int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
-
 	/*
 	 * write() is the generic write callback which maps directly to
 	 * kernfs write operation and overrides all other operations.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2a88ce7b24b6..2f16aab03493 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1034,8 +1034,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 	if (cft->read_u64 || cft->read_s64 || cft->seq_show)
 		mode |= S_IRUGO;
 
-	if (cft->write_u64 || cft->write_s64 || cft->write ||
-	    cft->trigger)
+	if (cft->write_u64 || cft->write_s64 || cft->write)
 		mode |= S_IWUSR;
 
 	return mode;
@@ -2750,8 +2749,6 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 		ret = kstrtoll(buf, 0, &v);
 		if (!ret)
 			ret = cft->write_s64(css, cft, v);
-	} else if (cft->trigger) {
-		ret = cft->trigger(css, (unsigned int)cft->private);
 	} else {
 		ret = -EINVAL;
 	}
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 191de26b0148..a380681ab3cf 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -284,14 +284,14 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 	return ret ?: nbytes;
 }
 
-static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
-				unsigned int event)
+static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
+				    char *buf, size_t nbytes, loff_t off)
 {
 	int idx, name, ret = 0;
-	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
+	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(event);
-	name = MEMFILE_ATTR(event);
+	idx = MEMFILE_IDX(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_MAX_USAGE:
@@ -304,7 +304,7 @@ static int hugetlb_cgroup_reset(struct cgroup_subsys_state *css,
 		ret = -EINVAL;
 		break;
 	}
-	return ret;
+	return ret ?: nbytes;
 }
 
 static char *mem_fmt(char *buf, int size, unsigned long hsize)
@@ -344,14 +344,14 @@ static void __init __hugetlb_cgroup_file_init(int idx)
 	cft = &h->cgroup_files[2];
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
 	cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
-	cft->trigger = hugetlb_cgroup_reset;
+	cft->write = hugetlb_cgroup_reset;
 	cft->read_u64 = hugetlb_cgroup_read_u64;
 
 	/* Add the failcntfile */
 	cft = &h->cgroup_files[3];
 	snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
 	cft->private  = MEMFILE_PRIVATE(idx, RES_FAILCNT);
-	cft->trigger  = hugetlb_cgroup_reset;
+	cft->write = hugetlb_cgroup_reset;
 	cft->read_u64 = hugetlb_cgroup_read_u64;
 
 	/* NULL terminate the last cft */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7098a43f7447..b638a79209ee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4887,14 +4887,15 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 	return 0;
 }
 
-static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
-					unsigned int event)
+static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
+					    char *buf, size_t nbytes,
+					    loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 
 	if (mem_cgroup_is_root(memcg))
 		return -EINVAL;
-	return mem_cgroup_force_empty(memcg);
+	return mem_cgroup_force_empty(memcg) ?: nbytes;
 }
 
 static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
@@ -5220,14 +5221,15 @@ out:
 	*memsw_limit = min_memsw_limit;
 }
 
-static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
+				size_t nbytes, loff_t off)
 {
-	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	int name;
 	enum res_type type;
 
-	type = MEMFILE_TYPE(event);
-	name = MEMFILE_ATTR(event);
+	type = MEMFILE_TYPE(of_cft(of)->private);
+	name = MEMFILE_ATTR(of_cft(of)->private);
 
 	switch (name) {
 	case RES_MAX_USAGE:
@@ -5252,7 +5254,7 @@ static int mem_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 		break;
 	}
 
-	return 0;
+	return nbytes;
 }
 
 static u64 mem_cgroup_move_charge_read(struct cgroup_subsys_state *css,
@@ -6105,7 +6107,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6123,7 +6125,7 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "failcnt",
 		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6132,7 +6134,7 @@ static struct cftype mem_cgroup_files[] = {
 	},
 	{
 		.name = "force_empty",
-		.trigger = mem_cgroup_force_empty_write,
+		.write = mem_cgroup_force_empty_write,
 	},
 	{
 		.name = "use_hierarchy",
@@ -6186,13 +6188,13 @@ static struct cftype mem_cgroup_files[] = {
 	{
 		.name = "kmem.failcnt",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
 		.name = "kmem.max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_KMEM, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 #ifdef CONFIG_SLABINFO
@@ -6215,7 +6217,7 @@ static struct cftype memsw_cgroup_files[] = {
 	{
 		.name = "memsw.max_usage_in_bytes",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{
@@ -6227,7 +6229,7 @@ static struct cftype memsw_cgroup_files[] = {
 	{
 		.name = "memsw.failcnt",
 		.private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
-		.trigger = mem_cgroup_reset,
+		.write = mem_cgroup_reset,
 		.read_u64 = mem_cgroup_read_u64,
 	},
 	{ },	/* terminate */
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 841fd3fa937a..f7a2ec3ac584 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -170,17 +170,18 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 	return val;
 }
 
-static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
+static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
+				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg;
 	struct cg_proto *cg_proto;
 
-	memcg = mem_cgroup_from_css(css);
+	memcg = mem_cgroup_from_css(of_css(of));
 	cg_proto = tcp_prot.proto_cgroup(memcg);
 	if (!cg_proto)
-		return 0;
+		return nbytes;
 
-	switch (event) {
+	switch (of_cft(of)->private) {
 	case RES_MAX_USAGE:
 		res_counter_reset_max(&cg_proto->memory_allocated);
 		break;
@@ -189,7 +190,7 @@ static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
 		break;
 	}
 
-	return 0;
+	return nbytes;
 }
 
 static struct cftype tcp_files[] = {
@@ -207,13 +208,13 @@ static struct cftype tcp_files[] = {
 	{
 		.name = "kmem.tcp.failcnt",
 		.private = RES_FAILCNT,
-		.trigger = tcp_cgroup_reset,
+		.write = tcp_cgroup_reset,
 		.read_u64 = tcp_cgroup_read,
 	},
 	{
 		.name = "kmem.tcp.max_usage_in_bytes",
 		.private = RES_MAX_USAGE,
-		.trigger = tcp_cgroup_reset,
+		.write = tcp_cgroup_reset,
 		.read_u64 = tcp_cgroup_read,
 	},
 	{ }	/* terminate */
-- 
cgit 


From b7fc5ad235936379fae67a9f7b50bb53487a1a3a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 13 May 2014 12:16:22 -0400
Subject: cgroup: remove cgroup->control_kn

Now that cgroup_subtree_control_write() has access to the associated
kernfs_open_file and thus the kernfs_node, there's no need to cache it
in cgroup->control_kn on creation.  Remove cgroup->control_kn and use
@of->kn directly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 1 -
 kernel/cgroup.c        | 8 +++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 08eb71ee600b..aa7353deaaf3 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -177,7 +177,6 @@ struct cgroup {
 
 	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
-	struct kernfs_node *control_kn;	/* kn for "cgroup.subtree_control" */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
 	/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9a48c117ebf1..94d259bcd2b9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2580,7 +2580,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	 * active_ref protection.
 	 */
 	cgroup_get(cgrp);
-	kernfs_break_active_protection(cgrp->control_kn);
+	kernfs_break_active_protection(of->kn);
 
 	mutex_lock(&cgroup_tree_mutex);
 
@@ -2697,7 +2697,7 @@ out_unlock:
 out_unlock_tree:
 	mutex_unlock(&cgroup_tree_mutex);
 out_unbreak:
-	kernfs_unbreak_active_protection(cgrp->control_kn);
+	kernfs_unbreak_active_protection(of->kn);
 	cgroup_put(cgrp);
 	return ret ?: nbytes;
 
@@ -2887,9 +2887,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft)
 		return ret;
 	}
 
-	if (cft->seq_show == cgroup_subtree_control_show)
-		cgrp->control_kn = kn;
-	else if (cft->seq_show == cgroup_populated_show)
+	if (cft->seq_show == cgroup_populated_show)
 		cgrp->populated_kn = kn;
 	return 0;
 }
-- 
cgit 


From ad0dc7f94dbf417b1c7d42e1f0b250f045b27f8f Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 19 Feb 2014 10:51:42 -0800
Subject: rcutorture: Add forward-progress checking for writer

The rcutorture output currently does not distinguish between stalls in
the RCU implementation and stalls in the rcu_torture_writer() kthreads.
This commit therefore adds some diagnostics to help distinguish between
these two conditions, at least for the non-SRCU implementations.  (SRCU
does not provide evidence of update-side forward progress by design.)

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 19 +++++++++++++++++++
 kernel/rcu/rcutorture.c  | 37 +++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.c        | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 00a7fd61b3c6..82973738125b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,7 +51,17 @@ extern int rcu_expedited; /* for sysctl */
 extern int rcutorture_runnable; /* for sysctl */
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
+enum rcutorture_type {
+	RCU_FLAVOR,
+	RCU_BH_FLAVOR,
+	RCU_SCHED_FLAVOR,
+	SRCU_FLAVOR,
+	INVALID_RCU_FLAVOR
+};
+
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+			    unsigned long *gpnum, unsigned long *completed);
 void rcutorture_record_test_transition(void);
 void rcutorture_record_progress(unsigned long vernum);
 void do_trace_rcu_torture_read(const char *rcutorturename,
@@ -60,6 +70,15 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
 			       unsigned long c_old,
 			       unsigned long c);
 #else
+static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
+					  int *flags,
+					  unsigned long *gpnum,
+					  unsigned long *completed)
+{
+	*flags = 0;
+	*gpnum = 0;
+	*completed = 0;
+}
 static inline void rcutorture_record_test_transition(void)
 {
 }
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index bd30bc61bc05..0d739e3797e3 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -138,6 +138,15 @@ static long n_barrier_attempts;
 static long n_barrier_successes;
 static struct list_head rcu_torture_removed;
 
+static int rcu_torture_writer_state;
+#define RTWS_FIXED_DELAY	0
+#define RTWS_DELAY		1
+#define RTWS_REPLACE		2
+#define RTWS_DEF_FREE		3
+#define RTWS_EXP_SYNC		4
+#define RTWS_STUTTER		5
+#define RTWS_STOPPING		6
+
 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
 #define RCUTORTURE_RUNNABLE_INIT 1
 #else
@@ -214,6 +223,7 @@ rcu_torture_free(struct rcu_torture *p)
  */
 
 struct rcu_torture_ops {
+	int ttype;
 	void (*init)(void);
 	int (*readlock)(void);
 	void (*read_delay)(struct torture_random_state *rrsp);
@@ -312,6 +322,7 @@ static void rcu_sync_torture_init(void)
 }
 
 static struct rcu_torture_ops rcu_ops = {
+	.ttype		= RCU_FLAVOR,
 	.init		= rcu_sync_torture_init,
 	.readlock	= rcu_torture_read_lock,
 	.read_delay	= rcu_read_delay,
@@ -355,6 +366,7 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
 }
 
 static struct rcu_torture_ops rcu_bh_ops = {
+	.ttype		= RCU_BH_FLAVOR,
 	.init		= rcu_sync_torture_init,
 	.readlock	= rcu_bh_torture_read_lock,
 	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
@@ -397,6 +409,7 @@ call_rcu_busted(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 
 static struct rcu_torture_ops rcu_busted_ops = {
+	.ttype		= INVALID_RCU_FLAVOR,
 	.init		= rcu_sync_torture_init,
 	.readlock	= rcu_torture_read_lock,
 	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
@@ -492,6 +505,7 @@ static void srcu_torture_synchronize_expedited(void)
 }
 
 static struct rcu_torture_ops srcu_ops = {
+	.ttype		= SRCU_FLAVOR,
 	.init		= rcu_sync_torture_init,
 	.readlock	= srcu_torture_read_lock,
 	.read_delay	= srcu_read_delay,
@@ -527,6 +541,7 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
 }
 
 static struct rcu_torture_ops sched_ops = {
+	.ttype		= RCU_SCHED_FLAVOR,
 	.init		= rcu_sync_torture_init,
 	.readlock	= sched_torture_read_lock,
 	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
@@ -699,12 +714,15 @@ rcu_torture_writer(void *arg)
 	set_user_nice(current, MAX_NICE);
 
 	do {
+		rcu_torture_writer_state = RTWS_FIXED_DELAY;
 		schedule_timeout_uninterruptible(1);
 		rp = rcu_torture_alloc();
 		if (rp == NULL)
 			continue;
 		rp->rtort_pipe_count = 0;
+		rcu_torture_writer_state = RTWS_DELAY;
 		udelay(torture_random(&rand) & 0x3ff);
+		rcu_torture_writer_state = RTWS_REPLACE;
 		old_rp = rcu_dereference_check(rcu_torture_current,
 					       current == writer_task);
 		rp->rtort_mbtest = 1;
@@ -721,8 +739,10 @@ rcu_torture_writer(void *arg)
 			else
 				exp = gp_exp;
 			if (!exp) {
+				rcu_torture_writer_state = RTWS_DEF_FREE;
 				cur_ops->deferred_free(old_rp);
 			} else {
+				rcu_torture_writer_state = RTWS_EXP_SYNC;
 				cur_ops->exp_sync();
 				list_add(&old_rp->rtort_free,
 					 &rcu_torture_removed);
@@ -743,8 +763,10 @@ rcu_torture_writer(void *arg)
 			}
 		}
 		rcutorture_record_progress(++rcu_torture_current_version);
+		rcu_torture_writer_state = RTWS_STUTTER;
 		stutter_wait("rcu_torture_writer");
 	} while (!torture_must_stop());
+	rcu_torture_writer_state = RTWS_STOPPING;
 	torture_kthread_stopping("rcu_torture_writer");
 	return 0;
 }
@@ -937,6 +959,7 @@ rcu_torture_printk(char *page)
 	int i;
 	long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
 	long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
+	static unsigned long rtcv_snap = ULONG_MAX;
 
 	for_each_possible_cpu(cpu) {
 		for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
@@ -997,6 +1020,20 @@ rcu_torture_printk(char *page)
 	page += sprintf(page, "\n");
 	if (cur_ops->stats)
 		cur_ops->stats(page);
+	if (rtcv_snap == rcu_torture_current_version &&
+	    rcu_torture_current != NULL) {
+		int __maybe_unused flags;
+		unsigned long __maybe_unused gpnum;
+		unsigned long __maybe_unused completed;
+
+		rcutorture_get_gp_data(cur_ops->ttype,
+				       &flags, &gpnum, &completed);
+		page += sprintf(page,
+				"??? Writer stall state %d g%lu c%lu f%#x\n",
+				rcu_torture_writer_state,
+				gpnum, completed, flags);
+	}
+	rtcv_snap = rcu_torture_current_version;
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0c47e300210a..3d15b5a82ae8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -293,6 +293,39 @@ void rcutorture_record_test_transition(void)
 }
 EXPORT_SYMBOL_GPL(rcutorture_record_test_transition);
 
+/*
+ * Send along grace-period-related data for rcutorture diagnostics.
+ */
+void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
+			    unsigned long *gpnum, unsigned long *completed)
+{
+	struct rcu_state *rsp = NULL;
+
+	switch (test_type) {
+	case RCU_FLAVOR:
+		rsp = rcu_state;
+		break;
+	case RCU_BH_FLAVOR:
+		rsp = &rcu_bh_state;
+		break;
+	case RCU_SCHED_FLAVOR:
+		rsp = &rcu_sched_state;
+		break;
+	default:
+		break;
+	}
+	if (rsp != NULL) {
+		*flags = ACCESS_ONCE(rsp->gp_flags);
+		*gpnum = ACCESS_ONCE(rsp->gpnum);
+		*completed = ACCESS_ONCE(rsp->completed);
+		return;
+	}
+	*flags = 0;
+	*gpnum = 0;
+	*completed = 0;
+}
+EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
+
 /*
  * Record the number of writer passes through the current rcutorture test.
  * This is also used to correlate debugfs tracing stats with the rcutorture
-- 
cgit 


From d9c6866be8a145e32da616d8dcbae806032d75b5 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 7 May 2014 15:23:56 -0500
Subject: of: kill off of_can_translate_address

of_can_translate_address only checks some conditions for address
translation, but does not check other conditions like having range
properties. The checks it does do are redundant with
__of_address_translate. The only difference is printing a message or
not. Since we only have a single caller that does the full translation
anyway, just remove of_can_translate_address and quiet the error
message.

Cc: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Frank Rowand <frank.rowand@sonymobile.com>
Reviewed-by: Frank Rowand <frank.rowand@sonymobile.com>
---
 drivers/of/address.c       | 22 +---------------------
 drivers/of/platform.c      |  5 ++---
 include/linux/of_address.h |  1 -
 3 files changed, 3 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index cb4242a69cd5..95351b2a112c 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -498,8 +498,7 @@ static u64 __of_translate_address(struct device_node *dev,
 	/* Count address cells & copy address locally */
 	bus->count_cells(dev, &na, &ns);
 	if (!OF_CHECK_COUNTS(na, ns)) {
-		printk(KERN_ERR "prom_parse: Bad cell count for %s\n",
-		       of_node_full_name(dev));
+		pr_debug("OF: Bad cell count for %s\n", of_node_full_name(dev));
 		goto bail;
 	}
 	memcpy(addr, in_addr, na * 4);
@@ -564,25 +563,6 @@ u64 of_translate_dma_address(struct device_node *dev, const __be32 *in_addr)
 }
 EXPORT_SYMBOL(of_translate_dma_address);
 
-bool of_can_translate_address(struct device_node *dev)
-{
-	struct device_node *parent;
-	struct of_bus *bus;
-	int na, ns;
-
-	parent = of_get_parent(dev);
-	if (parent == NULL)
-		return false;
-
-	bus = of_match_bus(parent);
-	bus->count_cells(dev, &na, &ns);
-
-	of_node_put(parent);
-
-	return OF_CHECK_COUNTS(na, ns);
-}
-EXPORT_SYMBOL(of_can_translate_address);
-
 const __be32 *of_get_address(struct device_node *dev, int index, u64 *size,
 		    unsigned int *flags)
 {
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 0602eb5b1be2..d0009b3614af 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -140,9 +140,8 @@ struct platform_device *of_device_alloc(struct device_node *np,
 		return NULL;
 
 	/* count the io and irq resources */
-	if (of_can_translate_address(np))
-		while (of_address_to_resource(np, num_reg, &temp_res) == 0)
-			num_reg++;
+	while (of_address_to_resource(np, num_reg, &temp_res) == 0)
+		num_reg++;
 	num_irq = of_irq_count(np);
 
 	/* Populate the resource table */
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 5f6ed6b182b8..906ca7681756 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -40,7 +40,6 @@ extern u64 of_translate_dma_address(struct device_node *dev,
 
 #ifdef CONFIG_OF_ADDRESS
 extern u64 of_translate_address(struct device_node *np, const __be32 *addr);
-extern bool of_can_translate_address(struct device_node *dev);
 extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 extern struct device_node *of_find_matching_node_by_address(
-- 
cgit 


From 0d2602ca30e410e84e8bdf05c84ed5688e0a5a44 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 13 May 2014 15:10:52 -0600
Subject: blk-mq: improve support for shared tags maps

This adds support for active queue tracking, meaning that the
blk-mq tagging maintains a count of active users of a tag set.
This allows us to maintain a notion of fairness between users,
so that we can distribute the tag depth evenly without starving
some users while allowing others to try unfair deep queues.

If sharing of a tag set is detected, each hardware queue will
track the depth of its own queue. And if this exceeds the total
depth divided by the number of active queues, the user is actively
throttled down.

The active queue count is done lazily to avoid bouncing that data
between submitter and completer. Each hardware queue gets marked
active when it allocates its first tag, and gets marked inactive
when 1) the last tag is cleared, and 2) the queue timeout grace
period has passed.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c      |  10 +++++
 block/blk-mq-tag.c        | 112 +++++++++++++++++++++++++++++++++++++++-------
 block/blk-mq-tag.h        |  27 +++++++++--
 block/blk-mq.c            |  85 ++++++++++++++++++++++++++++++++---
 block/blk-timeout.c       |  13 +++++-
 block/blk.h               |   4 ++
 include/linux/blk-mq.h    |   7 +++
 include/linux/blk_types.h |   2 +
 include/linux/blkdev.h    |   3 ++
 9 files changed, 236 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 8145b5b25b4b..99a60a829e69 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -208,6 +208,11 @@ static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
 	return blk_mq_tag_sysfs_show(hctx->tags, page);
 }
 
+static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page)
+{
+	return sprintf(page, "%u\n", atomic_read(&hctx->nr_active));
+}
+
 static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
 {
 	unsigned int i, first = 1;
@@ -267,6 +272,10 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = {
 	.attr = {.name = "dispatched", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_dispatched_show,
 };
+static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = {
+	.attr = {.name = "active", .mode = S_IRUGO },
+	.show = blk_mq_hw_sysfs_active_show,
+};
 static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
 	.attr = {.name = "pending", .mode = S_IRUGO },
 	.show = blk_mq_hw_sysfs_rq_list_show,
@@ -287,6 +296,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
 	&blk_mq_hw_sysfs_pending.attr,
 	&blk_mq_hw_sysfs_tags.attr,
 	&blk_mq_hw_sysfs_cpus.attr,
+	&blk_mq_hw_sysfs_active.attr,
 	NULL,
 };
 
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 8d526a3e02f6..c80086c9c064 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -7,13 +7,12 @@
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
-void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx,
-			  bool reserved)
+void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved)
 {
 	int tag, zero = 0;
 
-	tag = blk_mq_get_tag(tags, hctx, &zero, __GFP_WAIT, reserved);
-	blk_mq_put_tag(tags, tag, &zero);
+	tag = blk_mq_get_tag(hctx, &zero, __GFP_WAIT, reserved);
+	blk_mq_put_tag(hctx, tag, &zero);
 }
 
 static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt)
@@ -40,6 +39,84 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
 	return bt_has_free_tags(&tags->bitmap_tags);
 }
 
+static inline void bt_index_inc(unsigned int *index)
+{
+	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
+}
+
+/*
+ * If a previously inactive queue goes active, bump the active user count.
+ */
+bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
+{
+	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
+	    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+		atomic_inc(&hctx->tags->active_queues);
+
+	return true;
+}
+
+/*
+ * If a previously busy queue goes inactive, potential waiters could now
+ * be allowed to queue. Wake them up and check.
+ */
+void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_tags *tags = hctx->tags;
+	struct blk_mq_bitmap_tags *bt;
+	int i, wake_index;
+
+	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+		return;
+
+	atomic_dec(&tags->active_queues);
+
+	/*
+	 * Will only throttle depth on non-reserved tags
+	 */
+	bt = &tags->bitmap_tags;
+	wake_index = bt->wake_index;
+	for (i = 0; i < BT_WAIT_QUEUES; i++) {
+		struct bt_wait_state *bs = &bt->bs[wake_index];
+
+		if (waitqueue_active(&bs->wait))
+			wake_up(&bs->wait);
+
+		bt_index_inc(&wake_index);
+	}
+}
+
+/*
+ * For shared tag users, we track the number of currently active users
+ * and attempt to provide a fair share of the tag depth for each of them.
+ */
+static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
+				  struct blk_mq_bitmap_tags *bt)
+{
+	unsigned int depth, users;
+
+	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED))
+		return true;
+	if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+		return true;
+
+	/*
+	 * Don't try dividing an ant
+	 */
+	if (bt->depth == 1)
+		return true;
+
+	users = atomic_read(&hctx->tags->active_queues);
+	if (!users)
+		return true;
+
+	/*
+	 * Allow at least some tags
+	 */
+	depth = max((bt->depth + users - 1) / users, 4U);
+	return atomic_read(&hctx->nr_active) < depth;
+}
+
 static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag)
 {
 	int tag, org_last_tag, end;
@@ -78,11 +155,15 @@ restart:
  * multiple users will tend to stick to different cachelines, at least
  * until the map is exhausted.
  */
-static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache)
+static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt,
+		    unsigned int *tag_cache)
 {
 	unsigned int last_tag, org_last_tag;
 	int index, i, tag;
 
+	if (!hctx_may_queue(hctx, bt))
+		return -1;
+
 	last_tag = org_last_tag = *tag_cache;
 	index = TAG_TO_INDEX(bt, last_tag);
 
@@ -117,11 +198,6 @@ done:
 	return tag;
 }
 
-static inline void bt_index_inc(unsigned int *index)
-{
-	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
-}
-
 static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
 					 struct blk_mq_hw_ctx *hctx)
 {
@@ -142,7 +218,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
 	DEFINE_WAIT(wait);
 	int tag;
 
-	tag = __bt_get(bt, last_tag);
+	tag = __bt_get(hctx, bt, last_tag);
 	if (tag != -1)
 		return tag;
 
@@ -156,7 +232,7 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx,
 		was_empty = list_empty(&wait.task_list);
 		prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
 
-		tag = __bt_get(bt, last_tag);
+		tag = __bt_get(hctx, bt, last_tag);
 		if (tag != -1)
 			break;
 
@@ -200,14 +276,13 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
 	return tag;
 }
 
-unsigned int blk_mq_get_tag(struct blk_mq_tags *tags,
-			    struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
+unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag,
 			    gfp_t gfp, bool reserved)
 {
 	if (!reserved)
-		return __blk_mq_get_tag(tags, hctx, last_tag, gfp);
+		return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp);
 
-	return __blk_mq_get_reserved_tag(tags, gfp);
+	return __blk_mq_get_reserved_tag(hctx->tags, gfp);
 }
 
 static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
@@ -265,9 +340,11 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
 	bt_clear_tag(&tags->breserved_tags, tag);
 }
 
-void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag,
+void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
 		    unsigned int *last_tag)
 {
+	struct blk_mq_tags *tags = hctx->tags;
+
 	if (tag >= tags->nr_reserved_tags) {
 		const int real_tag = tag - tags->nr_reserved_tags;
 
@@ -465,6 +542,7 @@ ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
 	res = bt_unused_tags(&tags->breserved_tags);
 
 	page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res);
+	page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues));
 
 	return page - orig_page;
 }
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 7aa9f0665489..0f5ec8b50ef3 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -38,6 +38,8 @@ struct blk_mq_tags {
 	unsigned int nr_tags;
 	unsigned int nr_reserved_tags;
 
+	atomic_t active_queues;
+
 	struct blk_mq_bitmap_tags bitmap_tags;
 	struct blk_mq_bitmap_tags breserved_tags;
 
@@ -49,9 +51,9 @@ struct blk_mq_tags {
 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node);
 extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 
-extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
-extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags, struct blk_mq_hw_ctx *hctx, bool reserved);
-extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag, unsigned int *last_tag);
+extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
+extern void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved);
+extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag);
 extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
 extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
@@ -68,4 +70,23 @@ enum {
 	BLK_MQ_TAG_MAX		= BLK_MQ_TAG_FAIL - 1,
 };
 
+extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
+extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
+
+static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
+{
+	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
+		return false;
+
+	return __blk_mq_tag_busy(hctx);
+}
+
+static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+	if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
+		return;
+
+	__blk_mq_tag_idle(hctx);
+}
+
 #endif
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9f07a266f7ab..3c4f1fceef8e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -80,9 +80,16 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
 	struct request *rq;
 	unsigned int tag;
 
-	tag = blk_mq_get_tag(hctx->tags, hctx, &ctx->last_tag, gfp, reserved);
+	tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved);
 	if (tag != BLK_MQ_TAG_FAIL) {
 		rq = hctx->tags->rqs[tag];
+
+		rq->cmd_flags = 0;
+		if (blk_mq_tag_busy(hctx)) {
+			rq->cmd_flags = REQ_MQ_INFLIGHT;
+			atomic_inc(&hctx->nr_active);
+		}
+
 		rq->tag = tag;
 		return rq;
 	}
@@ -190,7 +197,7 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	/* csd/requeue_work/fifo_time is initialized before use */
 	rq->q = q;
 	rq->mq_ctx = ctx;
-	rq->cmd_flags = rw_flags;
+	rq->cmd_flags |= rw_flags;
 	rq->cmd_type = 0;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
@@ -262,7 +269,7 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 			break;
 		}
 
-		blk_mq_wait_for_tags(hctx->tags, hctx, reserved);
+		blk_mq_wait_for_tags(hctx, reserved);
 	} while (1);
 
 	return rq;
@@ -303,8 +310,11 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 	const int tag = rq->tag;
 	struct request_queue *q = rq->q;
 
+	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
+		atomic_dec(&hctx->nr_active);
+
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
-	blk_mq_put_tag(hctx->tags, tag, &ctx->last_tag);
+	blk_mq_put_tag(hctx, tag, &ctx->last_tag);
 	blk_mq_queue_exit(q);
 }
 
@@ -571,8 +581,13 @@ static void blk_mq_rq_timer(unsigned long data)
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
 
-	if (next_set)
-		mod_timer(&q->timeout, round_jiffies_up(next));
+	if (next_set) {
+		next = blk_rq_timeout(round_jiffies_up(next));
+		mod_timer(&q->timeout, next);
+	} else {
+		queue_for_each_hw_ctx(q, hctx, i)
+			blk_mq_tag_idle(hctx);
+	}
 }
 
 /*
@@ -1439,6 +1454,56 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 	}
 }
 
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct request_queue *q;
+	bool shared;
+	int i;
+
+	if (set->tag_list.next == set->tag_list.prev)
+		shared = false;
+	else
+		shared = true;
+
+	list_for_each_entry(q, &set->tag_list, tag_set_list) {
+		blk_mq_freeze_queue(q);
+
+		queue_for_each_hw_ctx(q, hctx, i) {
+			if (shared)
+				hctx->flags |= BLK_MQ_F_TAG_SHARED;
+			else
+				hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+		}
+		blk_mq_unfreeze_queue(q);
+	}
+}
+
+static void blk_mq_del_queue_tag_set(struct request_queue *q)
+{
+	struct blk_mq_tag_set *set = q->tag_set;
+
+	blk_mq_freeze_queue(q);
+
+	mutex_lock(&set->tag_list_lock);
+	list_del_init(&q->tag_set_list);
+	blk_mq_update_tag_set_depth(set);
+	mutex_unlock(&set->tag_list_lock);
+
+	blk_mq_unfreeze_queue(q);
+}
+
+static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
+				     struct request_queue *q)
+{
+	q->tag_set = set;
+
+	mutex_lock(&set->tag_list_lock);
+	list_add_tail(&q->tag_set_list, &set->tag_list);
+	blk_mq_update_tag_set_depth(set);
+	mutex_unlock(&set->tag_list_lock);
+}
+
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
 	struct blk_mq_hw_ctx **hctxs;
@@ -1464,6 +1529,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 		if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
 			goto err_hctxs;
 
+		atomic_set(&hctxs[i]->nr_active, 0);
 		hctxs[i]->numa_node = NUMA_NO_NODE;
 		hctxs[i]->queue_num = i;
 	}
@@ -1516,6 +1582,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	list_add_tail(&q->all_q_node, &all_q_list);
 	mutex_unlock(&all_q_mutex);
 
+	blk_mq_add_queue_tag_set(set, q);
+
 	return q;
 
 err_flush_rq:
@@ -1543,6 +1611,8 @@ void blk_mq_free_queue(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
+	blk_mq_del_queue_tag_set(q);
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		kfree(hctx->ctx_map);
 		kfree(hctx->ctxs);
@@ -1635,6 +1705,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 			goto out_unwind;
 	}
 
+	mutex_init(&set->tag_list_lock);
+	INIT_LIST_HEAD(&set->tag_list);
+
 	return 0;
 
 out_unwind:
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 448745683d28..43e8b515806f 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -166,6 +166,17 @@ void blk_abort_request(struct request *req)
 }
 EXPORT_SYMBOL_GPL(blk_abort_request);
 
+unsigned long blk_rq_timeout(unsigned long timeout)
+{
+	unsigned long maxt;
+
+	maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
+	if (time_after(timeout, maxt))
+		timeout = maxt;
+
+	return timeout;
+}
+
 /**
  * blk_add_timer - Start timeout timer for a single request
  * @req:	request that is about to start running.
@@ -200,7 +211,7 @@ void blk_add_timer(struct request *req)
 	 * than an existing one, modify the timer. Round up to next nearest
 	 * second.
 	 */
-	expiry = round_jiffies_up(req->deadline);
+	expiry = blk_rq_timeout(round_jiffies_up(req->deadline));
 
 	if (!timer_pending(&q->timeout) ||
 	    time_before(expiry, q->timeout.expires)) {
diff --git a/block/blk.h b/block/blk.h
index 79be2cbce7fd..95cab70000e3 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -9,6 +9,9 @@
 /* Number of requests a "batching" process may submit */
 #define BLK_BATCH_REQ	32
 
+/* Max future timer expiry for timeouts */
+#define BLK_MAX_TIMEOUT		(5 * HZ)
+
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
@@ -37,6 +40,7 @@ bool __blk_end_bidi_request(struct request *rq, int error,
 void blk_rq_timed_out_timer(unsigned long data);
 void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout,
 			  unsigned int *next_set);
+unsigned long blk_rq_timeout(unsigned long timeout);
 void blk_add_timer(struct request *req);
 void blk_delete_timer(struct request *);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f83d15f6e1c1..379f88d5c44d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -48,6 +48,8 @@ struct blk_mq_hw_ctx {
 	unsigned int		numa_node;
 	unsigned int		cmd_size;	/* per-request extra data */
 
+	atomic_t		nr_active;
+
 	struct blk_mq_cpu_notifier	cpu_notifier;
 	struct kobject		kobj;
 };
@@ -64,6 +66,9 @@ struct blk_mq_tag_set {
 	void			*driver_data;
 
 	struct blk_mq_tags	**tags;
+
+	struct mutex		tag_list_lock;
+	struct list_head	tag_list;
 };
 
 typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
@@ -126,8 +131,10 @@ enum {
 
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
+	BLK_MQ_F_TAG_SHARED	= 1 << 2,
 
 	BLK_MQ_S_STOPPED	= 0,
+	BLK_MQ_S_TAG_ACTIVE	= 1,
 
 	BLK_MQ_MAX_DEPTH	= 2048,
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index aa0eaa2d0bd8..d8e4cea23a25 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -190,6 +190,7 @@ enum rq_flag_bits {
 	__REQ_PM,		/* runtime pm request */
 	__REQ_END,		/* last of chain of requests */
 	__REQ_HASHED,		/* on IO scheduler merge hash */
+	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -243,5 +244,6 @@ enum rq_flag_bits {
 #define REQ_PM			(1ULL << __REQ_PM)
 #define REQ_END			(1ULL << __REQ_END)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
+#define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 
 #endif /* __LINUX_BLK_TYPES_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 94b27210641b..6bc011a09e82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -481,6 +481,9 @@ struct request_queue {
 	wait_queue_head_t	mq_freeze_wq;
 	struct percpu_counter	mq_usage_counter;
 	struct list_head	all_q_node;
+
+	struct blk_mq_tag_set	*tag_set;
+	struct list_head	tag_set_list;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
cgit 


From 7ad24ea4bf620a32631d7b3069c3e30c078b0c3e Mon Sep 17 00:00:00 2001
From: Wilfried Klaebe <w-lkml@lebenslange-mailadresse.de>
Date: Sun, 11 May 2014 00:12:32 +0000
Subject: net: get rid of SET_ETHTOOL_OPS

net: get rid of SET_ETHTOOL_OPS

Dave Miller mentioned he'd like to see SET_ETHTOOL_OPS gone.
This does that.

Mostly done via coccinelle script:
@@
struct ethtool_ops *ops;
struct net_device *dev;
@@
-       SET_ETHTOOL_OPS(dev, ops);
+       dev->ethtool_ops = ops;

Compile tested only, but I'd seriously wonder if this broke anything.

Suggested-by: Dave Miller <davem@davemloft.net>
Signed-off-by: Wilfried Klaebe <w-lkml@lebenslange-mailadresse.de>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_ethtool.c            | 2 +-
 drivers/net/ethernet/3com/3c509.c                       | 2 +-
 drivers/net/ethernet/3com/3c589_cs.c                    | 2 +-
 drivers/net/ethernet/3com/typhoon.c                     | 2 +-
 drivers/net/ethernet/adaptec/starfire.c                 | 2 +-
 drivers/net/ethernet/alteon/acenic.c                    | 2 +-
 drivers/net/ethernet/altera/altera_tse_ethtool.c        | 2 +-
 drivers/net/ethernet/amd/amd8111e.c                     | 2 +-
 drivers/net/ethernet/amd/au1000_eth.c                   | 2 +-
 drivers/net/ethernet/amd/nmclan_cs.c                    | 2 +-
 drivers/net/ethernet/atheros/alx/main.c                 | 2 +-
 drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c      | 2 +-
 drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c      | 2 +-
 drivers/net/ethernet/atheros/atlx/atl2.c                | 2 +-
 drivers/net/ethernet/broadcom/b44.c                     | 2 +-
 drivers/net/ethernet/broadcom/bcm63xx_enet.c            | 4 ++--
 drivers/net/ethernet/broadcom/bcmsysport.c              | 2 +-
 drivers/net/ethernet/broadcom/bgmac.c                   | 2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c     | 6 ++----
 drivers/net/ethernet/broadcom/genet/bcmgenet.c          | 2 +-
 drivers/net/ethernet/brocade/bna/bnad_ethtool.c         | 2 +-
 drivers/net/ethernet/calxeda/xgmac.c                    | 2 +-
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c               | 2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c         | 2 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c         | 2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c     | 2 +-
 drivers/net/ethernet/cisco/enic/enic_ethtool.c          | 2 +-
 drivers/net/ethernet/dec/tulip/tulip_core.c             | 2 +-
 drivers/net/ethernet/dlink/dl2k.c                       | 2 +-
 drivers/net/ethernet/dlink/sundance.c                   | 2 +-
 drivers/net/ethernet/emulex/benet/be_main.c             | 2 +-
 drivers/net/ethernet/faraday/ftgmac100.c                | 2 +-
 drivers/net/ethernet/faraday/ftmac100.c                 | 2 +-
 drivers/net/ethernet/freescale/ucc_geth_ethtool.c       | 2 +-
 drivers/net/ethernet/fujitsu/fmvj18x_cs.c               | 2 +-
 drivers/net/ethernet/ibm/ehea/ehea_ethtool.c            | 2 +-
 drivers/net/ethernet/ibm/emac/core.c                    | 2 +-
 drivers/net/ethernet/icplus/ipg.c                       | 2 +-
 drivers/net/ethernet/intel/e100.c                       | 2 +-
 drivers/net/ethernet/intel/e1000/e1000_ethtool.c        | 2 +-
 drivers/net/ethernet/intel/e1000e/ethtool.c             | 2 +-
 drivers/net/ethernet/intel/i40e/i40e_ethtool.c          | 2 +-
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c      | 2 +-
 drivers/net/ethernet/intel/igb/igb_ethtool.c            | 2 +-
 drivers/net/ethernet/intel/igbvf/ethtool.c              | 2 +-
 drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c          | 2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c        | 2 +-
 drivers/net/ethernet/intel/ixgbevf/ethtool.c            | 2 +-
 drivers/net/ethernet/marvell/mv643xx_eth.c              | 2 +-
 drivers/net/ethernet/marvell/mvneta.c                   | 2 +-
 drivers/net/ethernet/marvell/pxa168_eth.c               | 2 +-
 drivers/net/ethernet/marvell/sky2.c                     | 2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c          | 2 +-
 drivers/net/ethernet/micrel/ks8695net.c                 | 6 +++---
 drivers/net/ethernet/micrel/ks8851.c                    | 2 +-
 drivers/net/ethernet/micrel/ksz884x.c                   | 2 +-
 drivers/net/ethernet/microchip/enc28j60.c               | 2 +-
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c        | 2 +-
 drivers/net/ethernet/natsemi/natsemi.c                  | 2 +-
 drivers/net/ethernet/natsemi/ns83820.c                  | 2 +-
 drivers/net/ethernet/neterion/s2io.c                    | 2 +-
 drivers/net/ethernet/neterion/vxge/vxge-ethtool.c       | 2 +-
 drivers/net/ethernet/nvidia/forcedeth.c                 | 2 +-
 drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c | 2 +-
 drivers/net/ethernet/packetengines/hamachi.c            | 6 ++----
 drivers/net/ethernet/packetengines/yellowfin.c          | 2 +-
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c    | 2 +-
 drivers/net/ethernet/qlogic/qla3xxx.c                   | 2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c        | 8 +++-----
 drivers/net/ethernet/qlogic/qlge/qlge_main.c            | 2 +-
 drivers/net/ethernet/realtek/r8169.c                    | 2 +-
 drivers/net/ethernet/renesas/sh_eth.c                   | 2 +-
 drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c      | 2 +-
 drivers/net/ethernet/sfc/efx.c                          | 2 +-
 drivers/net/ethernet/sis/sis190.c                       | 2 +-
 drivers/net/ethernet/smsc/smc91c92_cs.c                 | 2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c    | 2 +-
 drivers/net/ethernet/tehuti/tehuti.c                    | 2 +-
 drivers/net/ethernet/ti/cpsw.c                          | 4 ++--
 drivers/net/ethernet/ti/davinci_emac.c                  | 2 +-
 drivers/net/hyperv/netvsc_drv.c                         | 2 +-
 drivers/net/ntb_netdev.c                                | 2 +-
 drivers/net/rionet.c                                    | 2 +-
 drivers/net/usb/catc.c                                  | 2 +-
 drivers/net/usb/hso.c                                   | 2 +-
 drivers/net/usb/ipheth.c                                | 2 +-
 drivers/net/usb/kaweth.c                                | 2 +-
 drivers/net/usb/pegasus.c                               | 2 +-
 drivers/net/usb/r8152.c                                 | 2 +-
 drivers/net/usb/rtl8150.c                               | 2 +-
 drivers/net/virtio_net.c                                | 2 +-
 drivers/net/vmxnet3/vmxnet3_ethtool.c                   | 2 +-
 drivers/net/vxlan.c                                     | 2 +-
 drivers/net/wireless/hostap/hostap_main.c               | 2 +-
 drivers/net/xen-netback/interface.c                     | 2 +-
 drivers/net/xen-netfront.c                              | 2 +-
 drivers/s390/net/qeth_l2_main.c                         | 7 +++----
 drivers/s390/net/qeth_l3_main.c                         | 2 +-
 drivers/staging/et131x/et131x.c                         | 2 +-
 drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c        | 2 +-
 drivers/staging/netlogic/xlr_net.c                      | 2 +-
 drivers/staging/octeon/ethernet.c                       | 2 +-
 drivers/usb/gadget/u_ether.c                            | 4 ++--
 include/linux/netdevice.h                               | 3 ---
 net/batman-adv/soft-interface.c                         | 2 +-
 net/bridge/br_device.c                                  | 2 +-
 net/dsa/slave.c                                         | 2 +-
 net/openvswitch/vport-internal_dev.c                    | 2 +-
 108 files changed, 118 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index c4b3940845e6..078cadd6c797 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -105,5 +105,5 @@ static const struct ethtool_ops ipoib_ethtool_ops = {
 
 void ipoib_set_ethtool_ops(struct net_device *dev)
 {
-	SET_ETHTOOL_OPS(dev, &ipoib_ethtool_ops);
+	dev->ethtool_ops = &ipoib_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 35df0b9e6848..a968654b631d 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -534,7 +534,7 @@ static int el3_common_init(struct net_device *dev)
 	/* The EL3-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	err = register_netdev(dev);
 	if (err) {
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 063557e037f2..f18647c23559 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -218,7 +218,7 @@ static int tc589_probe(struct pcmcia_device *link)
 	dev->netdev_ops = &el3_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+	dev->ethtool_ops = &netdev_ethtool_ops;
 
 	return tc589_config(link);
 }
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 465cc7108d8a..e13b04624ded 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2435,7 +2435,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netif_napi_add(dev, &tp->napi, typhoon_poll, 16);
 	dev->watchdog_timeo	= TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &typhoon_ethtool_ops);
+	dev->ethtool_ops = &typhoon_ethtool_ops;
 
 	/* We can handle scatter gather, up to 16 entries, and
 	 * we can do IP checksumming (only version 4, doh...)
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 171d73c1d3c2..40dbbf740331 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -784,7 +784,7 @@ static int starfire_init_one(struct pci_dev *pdev,
 
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work);
 
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 1517e9df5ba1..9a6991be9749 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -476,7 +476,7 @@ static int acenic_probe_one(struct pci_dev *pdev,
 	dev->watchdog_timeo = 5*HZ;
 
 	dev->netdev_ops = &ace_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ace_ethtool_ops);
+	dev->ethtool_ops = &ace_ethtool_ops;
 
 	/* we only display this string ONCE */
 	if (!boards_found)
diff --git a/drivers/net/ethernet/altera/altera_tse_ethtool.c b/drivers/net/ethernet/altera/altera_tse_ethtool.c
index 76133caffa78..d817e285b266 100644
--- a/drivers/net/ethernet/altera/altera_tse_ethtool.c
+++ b/drivers/net/ethernet/altera/altera_tse_ethtool.c
@@ -237,5 +237,5 @@ static const struct ethtool_ops tse_ethtool_ops = {
 
 void altera_tse_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &tse_ethtool_ops);
+	netdev->ethtool_ops = &tse_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 26efaaa5e73f..068dc7cad5fa 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1900,7 +1900,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 
 	/* Initialize driver entry points */
 	dev->netdev_ops = &amd8111e_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	dev->irq =pdev->irq;
 	dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
 	netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32);
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index a2bd91e3d302..a78e4c136959 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1229,7 +1229,7 @@ static int au1000_probe(struct platform_device *pdev)
 	dev->base_addr = base->start;
 	dev->irq = irq;
 	dev->netdev_ops = &au1000_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &au1000_ethtool_ops);
+	dev->ethtool_ops = &au1000_ethtool_ops;
 	dev->watchdog_timeo = ETH_TX_TIMEOUT;
 
 	/*
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 08569fe2b182..abf3b1581c82 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -457,7 +457,7 @@ static int nmclan_probe(struct pcmcia_device *link)
     lp->tx_free_frames=AM2150_MAX_TX_FRAMES;
 
     dev->netdev_ops = &mace_netdev_ops;
-    SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+    dev->ethtool_ops = &netdev_ethtool_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
     return nmclan_config(link);
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 17bb9ce96260..49faa97a30c3 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1302,7 +1302,7 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	netdev->netdev_ops = &alx_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &alx_ethtool_ops);
+	netdev->ethtool_ops = &alx_ethtool_ops;
 	netdev->irq = pdev->irq;
 	netdev->watchdog_timeo = ALX_WATCHDOG_TIME;
 
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
index 859ea844ba0f..ecacaaeb2b92 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
@@ -305,5 +305,5 @@ static const struct ethtool_ops atl1c_ethtool_ops = {
 
 void atl1c_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &atl1c_ethtool_ops);
+	netdev->ethtool_ops = &atl1c_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
index 82b23861bf55..206e9b7be431 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
@@ -388,5 +388,5 @@ static const struct ethtool_ops atl1e_ethtool_ops = {
 
 void atl1e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &atl1e_ethtool_ops);
+	netdev->ethtool_ops = &atl1e_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 78befb522a52..2587fed7b02c 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1396,7 +1396,7 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	atl2_setup_pcicmd(pdev);
 
 	netdev->netdev_ops = &atl2_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &atl2_ethtool_ops);
+	netdev->ethtool_ops = &atl2_ethtool_ops;
 	netdev->watchdog_timeo = 5 * HZ;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 05ba62589017..ca5a20a48b14 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2380,7 +2380,7 @@ static int b44_init_one(struct ssb_device *sdev,
 	netif_napi_add(dev, &bp->napi, b44_poll, 64);
 	dev->watchdog_timeo = B44_TX_TIMEOUT;
 	dev->irq = sdev->irq;
-	SET_ETHTOOL_OPS(dev, &b44_ethtool_ops);
+	dev->ethtool_ops = &b44_ethtool_ops;
 
 	err = ssb_bus_powerup(sdev->bus, 0);
 	if (err) {
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 8db34d389675..3e8d1a88ed3d 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1897,7 +1897,7 @@ static int bcm_enet_probe(struct platform_device *pdev)
 	dev->netdev_ops = &bcm_enet_ops;
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
 
-	SET_ETHTOOL_OPS(dev, &bcm_enet_ethtool_ops);
+	dev->ethtool_ops = &bcm_enet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	ret = register_netdev(dev);
@@ -2783,7 +2783,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 	/* register netdevice */
 	dev->netdev_ops = &bcm_enetsw_ops;
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
-	SET_ETHTOOL_OPS(dev, &bcm_enetsw_ethtool_ops);
+	dev->ethtool_ops = &bcm_enetsw_ethtool_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	spin_lock_init(&priv->enetsw_mdio_lock);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 4dc8d1e9829b..56b74a495181 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1540,7 +1540,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	dev_set_drvdata(&pdev->dev, dev);
-	SET_ETHTOOL_OPS(dev, &bcm_sysport_ethtool_ops);
+	dev->ethtool_ops = &bcm_sysport_ethtool_ops;
 	dev->netdev_ops = &bcm_sysport_netdev_ops;
 	netif_napi_add(dev, &priv->napi, bcm_sysport_poll, 64);
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 0297a79a38e1..05c6af6c418f 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1436,7 +1436,7 @@ static int bgmac_probe(struct bcma_device *core)
 		return -ENOMEM;
 	net_dev->netdev_ops = &bgmac_netdev_ops;
 	net_dev->irq = core->irq;
-	SET_ETHTOOL_OPS(net_dev, &bgmac_ethtool_ops);
+	net_dev->ethtool_ops = &bgmac_ethtool_ops;
 	bgmac = netdev_priv(net_dev);
 	bgmac->net_dev = net_dev;
 	bgmac->core = core;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index b6de05e3149b..03224090ecf9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3506,8 +3506,6 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = {
 
 void bnx2x_set_ethtool_ops(struct bnx2x *bp, struct net_device *netdev)
 {
-	if (IS_PF(bp))
-		SET_ETHTOOL_OPS(netdev, &bnx2x_ethtool_ops);
-	else /* vf */
-		SET_ETHTOOL_OPS(netdev, &bnx2x_vf_ethtool_ops);
+	netdev->ethtool_ops = (IS_PF(bp)) ?
+		&bnx2x_ethtool_ops : &bnx2x_vf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 0966bd04375f..5ba1cfbd60da 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2481,7 +2481,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	dev_set_drvdata(&pdev->dev, dev);
 	ether_addr_copy(dev->dev_addr, macaddr);
 	dev->watchdog_timeo = 2 * HZ;
-	SET_ETHTOOL_OPS(dev, &bcmgenet_ethtool_ops);
+	dev->ethtool_ops = &bcmgenet_ethtool_ops;
 	dev->netdev_ops = &bcmgenet_netdev_ops;
 	netif_napi_add(dev, &priv->napi, bcmgenet_poll, 64);
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index f9e150825bb5..adca62b72837 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -1137,5 +1137,5 @@ static const struct ethtool_ops bnad_ethtool_ops = {
 void
 bnad_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &bnad_ethtool_ops);
+	netdev->ethtool_ops = &bnad_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 521dfea44b83..25d6b2a10e4e 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1737,7 +1737,7 @@ static int xgmac_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ndev);
 	ether_setup(ndev);
 	ndev->netdev_ops = &xgmac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &xgmac_ethtool_ops);
+	ndev->ethtool_ops = &xgmac_ethtool_ops;
 	spin_lock_init(&priv->stats_lock);
 	INIT_WORK(&priv->tx_timeout_work, xgmac_tx_timeout_work);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 0fe7ff750d77..c1b2c1dbf015 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -1100,7 +1100,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 		netif_napi_add(netdev, &adapter->napi, t1_poll, 64);
 
-		SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops);
+		netdev->ethtool_ops = &t1_ethtool_ops;
 	}
 
 	if (t1_init_sw_modules(adapter, bi) < 0) {
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 07bbb711b7e5..3ed507947248 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -3291,7 +3291,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			netdev->features |= NETIF_F_HIGHDMA;
 
 		netdev->netdev_ops = &cxgb_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
+		netdev->ethtool_ops = &cxgb_ethtool_ops;
 	}
 
 	pci_set_drvdata(pdev, adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 0f1e886d89e3..266a5bc6aedf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -6083,7 +6083,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		netdev->netdev_ops = &cxgb4_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
+		netdev->ethtool_ops = &cxgb_ethtool_ops;
 	}
 
 	pci_set_drvdata(pdev, adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 52859288de7b..ff1cdd1788b5 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2664,7 +2664,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
-		SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops);
+		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
 
 		/*
 		 * Initialize the hardware/software state for the port.
diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
index 47e3562f4866..58a8c67638e3 100644
--- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c
+++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c
@@ -253,5 +253,5 @@ static const struct ethtool_ops enic_ethtool_ops = {
 
 void enic_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &enic_ethtool_ops);
+	netdev->ethtool_ops = &enic_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 1642de78aac8..861660841ce2 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1703,7 +1703,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #ifdef CONFIG_TULIP_NAPI
 	netif_napi_add(dev, &tp->napi, tulip_poll, 16);
 #endif
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 
 	if (register_netdev(dev))
 		goto err_out_free_ring;
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 4fb756d219f7..2324f2ddfd48 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -227,7 +227,7 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 	dev->netdev_ops = &netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 #if 0
 	dev->features = NETIF_F_IP_CSUM;
 #endif
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index d9e5ca0d48c1..433c1e185442 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -577,7 +577,7 @@ static int sundance_probe1(struct pci_dev *pdev,
 
 	/* The chip-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	pci_set_drvdata(pdev, dev);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3f04356afa82..dcc5e5c69743 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4304,7 +4304,7 @@ static void be_netdev_init(struct net_device *netdev)
 
 	netdev->netdev_ops = &be_netdev_ops;
 
-	SET_ETHTOOL_OPS(netdev, &be_ethtool_ops);
+	netdev->ethtool_ops = &be_ethtool_ops;
 }
 
 static void be_unmap_pci_bars(struct be_adapter *adapter)
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 68069eabc4f8..c77fa4a69844 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1210,7 +1210,7 @@ static int ftgmac100_probe(struct platform_device *pdev)
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
-	SET_ETHTOOL_OPS(netdev, &ftgmac100_ethtool_ops);
+	netdev->ethtool_ops = &ftgmac100_ethtool_ops;
 	netdev->netdev_ops = &ftgmac100_netdev_ops;
 	netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO;
 
diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 8be5b40c0a12..4ff1adc6bfca 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1085,7 +1085,7 @@ static int ftmac100_probe(struct platform_device *pdev)
 	}
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	SET_ETHTOOL_OPS(netdev, &ftmac100_ethtool_ops);
+	netdev->ethtool_ops = &ftmac100_ethtool_ops;
 	netdev->netdev_ops = &ftmac100_netdev_ops;
 
 	platform_set_drvdata(pdev, netdev);
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 413329eff2ff..cc83350d56ba 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -417,5 +417,5 @@ static const struct ethtool_ops uec_ethtool_ops = {
 
 void uec_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &uec_ethtool_ops);
+	netdev->ethtool_ops = &uec_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 7becab1aa3e4..cfe7a7431730 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -256,7 +256,7 @@ static int fmvj18x_probe(struct pcmcia_device *link)
     dev->netdev_ops = &fjn_netdev_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
-    SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+    dev->ethtool_ops = &netdev_ethtool_ops;
 
     return fmvj18x_config(link);
 } /* fmvj18x_attach */
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
index 95837b99a464..6055e3eaf49c 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_ethtool.c
@@ -278,5 +278,5 @@ static const struct ethtool_ops ehea_ethtool_ops = {
 
 void ehea_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ehea_ethtool_ops);
+	netdev->ethtool_ops = &ehea_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index ae342fdb42c8..87bd953cc2ee 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2879,7 +2879,7 @@ static int emac_probe(struct platform_device *ofdev)
 		dev->commac.ops = &emac_commac_sg_ops;
 	} else
 		ndev->netdev_ops = &emac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &emac_ethtool_ops);
+	ndev->ethtool_ops = &emac_ethtool_ops;
 
 	netif_carrier_off(ndev);
 
diff --git a/drivers/net/ethernet/icplus/ipg.c b/drivers/net/ethernet/icplus/ipg.c
index 25045ae07171..5727779a7df2 100644
--- a/drivers/net/ethernet/icplus/ipg.c
+++ b/drivers/net/ethernet/icplus/ipg.c
@@ -2245,7 +2245,7 @@ static int ipg_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	 */
 	dev->netdev_ops = &ipg_netdev_ops;
 	SET_NETDEV_DEV(dev, &pdev->dev);
-	SET_ETHTOOL_OPS(dev, &ipg_ethtool_ops);
+	dev->ethtool_ops = &ipg_ethtool_ops;
 
 	rc = pci_request_regions(pdev, DRV_NAME);
 	if (rc)
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index b56461ce674c..9d979d7debef 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2854,7 +2854,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->hw_features |= NETIF_F_RXALL;
 
 	netdev->netdev_ops = &e100_netdev_ops;
-	SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
+	netdev->ethtool_ops = &e100_ethtool_ops;
 	netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index 73a8aeefb92a..341889a4ef7f 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1905,5 +1905,5 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 
 void e1000_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+	netdev->ethtool_ops = &e1000_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 4e5ad7ebe1f2..e9a48bb5caac 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -2318,5 +2318,5 @@ static const struct ethtool_ops e1000_ethtool_ops = {
 
 void e1000e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &e1000_ethtool_ops);
+	netdev->ethtool_ops = &e1000_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 0cf47c958081..f62929419a09 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -1700,5 +1700,5 @@ static const struct ethtool_ops i40e_ethtool_ops = {
 
 void i40e_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &i40e_ethtool_ops);
+	netdev->ethtool_ops = &i40e_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index a46be016039e..77e786d2d0e0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -705,5 +705,5 @@ static struct ethtool_ops i40evf_ethtool_ops = {
  **/
 void i40evf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &i40evf_ethtool_ops);
+	netdev->ethtool_ops = &i40evf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 333a2b0bbada..a84297c85fb1 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -3035,5 +3035,5 @@ static const struct ethtool_ops igb_ethtool_ops = {
 
 void igb_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops);
+	netdev->ethtool_ops = &igb_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index 90eef07943f4..f58170bae18b 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -476,5 +476,5 @@ static const struct ethtool_ops igbvf_ethtool_ops = {
 
 void igbvf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &igbvf_ethtool_ops);
+	netdev->ethtool_ops = &igbvf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
index dbb7dd2f8e36..1da2d987d370 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
@@ -656,5 +656,5 @@ static const struct ethtool_ops ixgb_ethtool_ops = {
 
 void ixgb_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgb_ethtool_ops);
+	netdev->ethtool_ops = &ixgb_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 6c55c14d082a..31d7268401e7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -3099,5 +3099,5 @@ static const struct ethtool_ops ixgbe_ethtool_ops = {
 
 void ixgbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops);
+	netdev->ethtool_ops = &ixgbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 1baecb60f065..a757f0734719 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -813,5 +813,5 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = {
 
 void ixgbevf_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &ixgbevf_ethtool_ops);
+	netdev->ethtool_ops = &ixgbevf_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index b7b8d74c22d9..df1d1b97187f 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -2889,7 +2889,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	if (err)
 		goto out;
 
-	SET_ETHTOOL_OPS(dev, &mv643xx_eth_ethtool_ops);
+	dev->ethtool_ops = &mv643xx_eth_ethtool_ops;
 
 	init_pscr(mp, pd->speed, pd->duplex);
 
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 14786c8bf99e..72bc47f2d2e9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2813,7 +2813,7 @@ static int mvneta_probe(struct platform_device *pdev)
 	dev->watchdog_timeo = 5 * HZ;
 	dev->netdev_ops = &mvneta_netdev_ops;
 
-	SET_ETHTOOL_OPS(dev, &mvneta_eth_tool_ops);
+	dev->ethtool_ops = &mvneta_eth_tool_ops;
 
 	pp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index b358c2f6f4bd..8f5aa7c62b18 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1488,7 +1488,7 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	dev->netdev_ops = &pxa168_eth_netdev_ops;
 	dev->watchdog_timeo = 2 * HZ;
 	dev->base_addr = 0;
-	SET_ETHTOOL_OPS(dev, &pxa168_ethtool_ops);
+	dev->ethtool_ops = &pxa168_ethtool_ops;
 
 	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index b81106451a0a..69693384b58c 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4760,7 +4760,7 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 
 	SET_NETDEV_DEV(dev, &hw->pdev->dev);
 	dev->irq = hw->pdev->irq;
-	SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops);
+	dev->ethtool_ops = &sky2_ethtool_ops;
 	dev->watchdog_timeo = TX_WATCHDOG;
 	dev->netdev_ops = &sky2_netdev_ops[port];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index fba3c8e77626..79c6f467d17e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2539,7 +2539,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
-	SET_ETHTOOL_OPS(dev, &mlx4_en_ethtool_ops);
+	dev->ethtool_ops = &mlx4_en_ethtool_ops;
 
 	/*
 	 * Set driver features
diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c
index 16435b3cfa9f..6c7c78baedca 100644
--- a/drivers/net/ethernet/micrel/ks8695net.c
+++ b/drivers/net/ethernet/micrel/ks8695net.c
@@ -1504,15 +1504,15 @@ ks8695_probe(struct platform_device *pdev)
 	if (ksp->phyiface_regs && ksp->link_irq == -1) {
 		ks8695_init_switch(ksp);
 		ksp->dtype = KS8695_DTYPE_LAN;
-		SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_ethtool_ops;
 	} else if (ksp->phyiface_regs && ksp->link_irq != -1) {
 		ks8695_init_wan_phy(ksp);
 		ksp->dtype = KS8695_DTYPE_WAN;
-		SET_ETHTOOL_OPS(ndev, &ks8695_wan_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_wan_ethtool_ops;
 	} else {
 		/* No initialisation since HPNA does not have a PHY */
 		ksp->dtype = KS8695_DTYPE_HPNA;
-		SET_ETHTOOL_OPS(ndev, &ks8695_ethtool_ops);
+		ndev->ethtool_ops = &ks8695_ethtool_ops;
 	}
 
 	/* And bring up the net_device with the net core */
diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index e0c92e0e5e1d..13767eb36a48 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1471,7 +1471,7 @@ static int ks8851_probe(struct spi_device *spi)
 
 	skb_queue_head_init(&ks->txq);
 
-	SET_ETHTOOL_OPS(ndev, &ks8851_ethtool_ops);
+	ndev->ethtool_ops = &ks8851_ethtool_ops;
 	SET_NETDEV_DEV(ndev, &spi->dev);
 
 	spi_set_drvdata(spi, ks);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 14ac0e2bc09f..4b9592c1fb40 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -7106,7 +7106,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
 		}
 
 		dev->netdev_ops = &netdev_ops;
-		SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+		dev->ethtool_ops = &netdev_ethtool_ops;
 		if (register_netdev(dev))
 			goto pcidev_init_reg_err;
 		port_set_power_saving(port, true);
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index c7b40aa21f22..b1b5f66b8b69 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1593,7 +1593,7 @@ static int enc28j60_probe(struct spi_device *spi)
 	dev->irq = spi->irq;
 	dev->netdev_ops = &enc28j60_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(dev, &enc28j60_ethtool_ops);
+	dev->ethtool_ops = &enc28j60_ethtool_ops;
 
 	enc28j60_lowpower(priv, true);
 
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 130f6b204efa..f3d5d79f1cd1 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -4112,7 +4112,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	setup_timer(&mgp->watchdog_timer, myri10ge_watchdog_timer,
 		    (unsigned long)mgp);
 
-	SET_ETHTOOL_OPS(netdev, &myri10ge_ethtool_ops);
+	netdev->ethtool_ops = &myri10ge_ethtool_ops;
 	INIT_WORK(&mgp->watchdog_work, myri10ge_watchdog);
 	status = register_netdev(netdev);
 	if (status != 0) {
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 64ec2a437f46..291fba8b9f07 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -927,7 +927,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->netdev_ops = &natsemi_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 
 	if (mtu)
 		dev->mtu = mtu;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index dbccf1de49ec..19bb8244b9e3 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -2030,7 +2030,7 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 		pci_dev->subsystem_vendor, pci_dev->subsystem_device);
 
 	ndev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ops);
+	ndev->ethtool_ops = &ops;
 	ndev->watchdog_timeo = 5 * HZ;
 	pci_set_drvdata(pci_dev, ndev);
 
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index e900c1abdef7..e3cf38e6ce3c 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -7910,7 +7910,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 
 	/*  Driver entry points */
 	dev->netdev_ops = &s2io_netdev_ops;
-	SET_ETHTOOL_OPS(dev, &netdev_ethtool_ops);
+	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO6 |
 		NETIF_F_RXCSUM | NETIF_F_LRO;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
index f8f073880f84..ddcc81ad1ae1 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-ethtool.c
@@ -1128,5 +1128,5 @@ static const struct ethtool_ops vxge_ethtool_ops = {
 
 void vxge_initialize_ethtool_ops(struct net_device *ndev)
 {
-	SET_ETHTOOL_OPS(ndev, &vxge_ethtool_ops);
+	ndev->ethtool_ops = &vxge_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index fddb464aeab3..e8235c5c5e69 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -5766,7 +5766,7 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 		dev->netdev_ops = &nv_netdev_ops_optimized;
 
 	netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP);
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	dev->watchdog_timeo = NV_WATCHDOG_TIMEO;
 
 	pci_set_drvdata(pci_dev, dev);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index 826f0ccdc23c..114d2fe52cc2 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -508,5 +508,5 @@ static const struct ethtool_ops pch_gbe_ethtool_ops = {
 
 void pch_gbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &pch_gbe_ethtool_ops);
+	netdev->ethtool_ops = &pch_gbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index b6bdeb3c1971..9a997e4c3e08 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -724,10 +724,8 @@ static int hamachi_init_one(struct pci_dev *pdev,
 
 	/* The Hamachi-specific entries in the device structure. */
 	dev->netdev_ops = &hamachi_netdev_ops;
-	if (chip_tbl[hmp->chip_id].flags & CanHaveMII)
-		SET_ETHTOOL_OPS(dev, &ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(dev, &ethtool_ops_no_mii);
+	dev->ethtool_ops = (chip_tbl[hmp->chip_id].flags & CanHaveMII) ?
+		&ethtool_ops : &ethtool_ops_no_mii;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	if (mtu)
 		dev->mtu = mtu;
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 9a6cb482dcd0..69a8dc095072 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -472,7 +472,7 @@ static int yellowfin_init_one(struct pci_dev *pdev,
 
 	/* The Yellowfin-specific entries in the device structure. */
 	dev->netdev_ops = &netdev_ops;
-	SET_ETHTOOL_OPS(dev, &ethtool_ops);
+	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
 	if (mtu)
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index f09c35d669b3..5bf05818a12c 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1373,7 +1373,7 @@ netxen_setup_netdev(struct netxen_adapter *adapter,
 
 	netxen_nic_change_mtu(netdev, netdev->mtu);
 
-	SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops);
+	netdev->ethtool_ops = &netxen_nic_ethtool_ops;
 
 	netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
 	                      NETIF_F_RXCSUM;
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 2eabd44f8914..b5d6bc1a8b00 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3838,7 +3838,7 @@ static int ql3xxx_probe(struct pci_dev *pdev,
 
 	/* Set driver entry points */
 	ndev->netdev_ops = &ql3xxx_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ql3xxx_ethtool_ops);
+	ndev->ethtool_ops = &ql3xxx_ethtool_ops;
 	ndev->watchdog_timeo = 5 * HZ;
 
 	netif_napi_add(ndev, &qdev->napi, ql_poll, 64);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 8a2aeb85e320..f0a285359e66 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2265,10 +2265,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
 
 	qlcnic_change_mtu(netdev, netdev->mtu);
 
-	if (qlcnic_sriov_vf_check(adapter))
-		SET_ETHTOOL_OPS(netdev, &qlcnic_sriov_vf_ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_ops);
+	netdev->ethtool_ops = (qlcnic_sriov_vf_check(adapter)) ?
+		&qlcnic_sriov_vf_ethtool_ops : &qlcnic_ethtool_ops;
 
 	netdev->features |= (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			     NETIF_F_IPV6_CSUM | NETIF_F_GRO |
@@ -2682,7 +2680,7 @@ err_out_disable_pdev:
 err_out_maintenance_mode:
 	set_bit(__QLCNIC_MAINTENANCE_MODE, &adapter->state);
 	netdev->netdev_ops = &qlcnic_netdev_failed_ops;
-	SET_ETHTOOL_OPS(netdev, &qlcnic_ethtool_failed_ops);
+	netdev->ethtool_ops = &qlcnic_ethtool_failed_ops;
 	ahw->port_type = QLCNIC_XGBE;
 
 	if (qlcnic_83xx_check(adapter))
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 6e36fe14b848..b40050e03a56 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4770,7 +4770,7 @@ static int qlge_probe(struct pci_dev *pdev,
 	ndev->irq = pdev->irq;
 
 	ndev->netdev_ops = &qlge_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &qlge_ethtool_ops);
+	ndev->ethtool_ops = &qlge_ethtool_ops;
 	ndev->watchdog_timeo = 10 * HZ;
 
 	err = register_netdev(ndev);
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index aa1c079f231d..be425ad5e824 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7125,7 +7125,7 @@ rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	for (i = 0; i < ETH_ALEN; i++)
 		dev->dev_addr[i] = RTL_R8(MAC0 + i);
 
-	SET_ETHTOOL_OPS(dev, &rtl8169_ethtool_ops);
+	dev->ethtool_ops = &rtl8169_ethtool_ops;
 	dev->watchdog_timeo = RTL8169_TX_TIMEOUT;
 
 	netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 6a9509ccd33b..967314cade95 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2843,7 +2843,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev)
 		ndev->netdev_ops = &sh_eth_netdev_ops_tsu;
 	else
 		ndev->netdev_ops = &sh_eth_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &sh_eth_ethtool_ops);
+	ndev->ethtool_ops = &sh_eth_ethtool_ops;
 	ndev->watchdog_timeo = TX_TIMEOUT;
 
 	/* debug message level */
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index 0415fa50eeb7..c0981ae45874 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
@@ -520,5 +520,5 @@ static const struct ethtool_ops sxgbe_ethtool_ops = {
 
 void sxgbe_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &sxgbe_ethtool_ops);
+	netdev->ethtool_ops = &sxgbe_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 63d595fd3cc5..1e274045970f 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -2248,7 +2248,7 @@ static int efx_register_netdev(struct efx_nic *efx)
 	} else {
 		net_dev->netdev_ops = &efx_farch_netdev_ops;
 	}
-	SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops);
+	net_dev->ethtool_ops = &efx_ethtool_ops;
 	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
 
 	rtnl_lock();
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index acbbe48a519c..a86339903b9b 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1877,7 +1877,7 @@ static int sis190_init_one(struct pci_dev *pdev,
 
 	dev->netdev_ops = &sis190_netdev_ops;
 
-	SET_ETHTOOL_OPS(dev, &sis190_ethtool_ops);
+	dev->ethtool_ops = &sis190_ethtool_ops;
 	dev->watchdog_timeo = SIS190_TX_TIMEOUT;
 
 	spin_lock_init(&tp->lock);
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index c7a4868571f9..6b33127ab352 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -318,7 +318,7 @@ static int smc91c92_probe(struct pcmcia_device *link)
 
     /* The SMC91c92-specific entries in the device structure. */
     dev->netdev_ops = &smc_netdev_ops;
-    SET_ETHTOOL_OPS(dev, &ethtool_ops);
+    dev->ethtool_ops = &ethtool_ops;
     dev->watchdog_timeo = TX_TIMEOUT;
 
     smc->mii_if.dev = dev;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c5f9cb85c8ef..c963394ded6c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -784,5 +784,5 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 
 void stmmac_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &stmmac_ethtool_ops);
+	netdev->ethtool_ops = &stmmac_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 2ead87759ab4..38da73a2a886 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -2413,7 +2413,7 @@ static void bdx_set_ethtool_ops(struct net_device *netdev)
 		.get_ethtool_stats = bdx_get_ethtool_stats,
 	};
 
-	SET_ETHTOOL_OPS(netdev, &bdx_ethtool_ops);
+	netdev->ethtool_ops = &bdx_ethtool_ops;
 }
 
 /**
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 91499be03c6f..e3d871055d63 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1998,7 +1998,7 @@ static int cpsw_probe_dual_emac(struct platform_device *pdev,
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &priv_sl2->napi, cpsw_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
@@ -2227,7 +2227,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 
 	ndev->netdev_ops = &cpsw_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &cpsw_ethtool_ops);
+	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &priv->napi, cpsw_poll, CPSW_POLL_WEIGHT);
 
 	/* register the network device */
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 8f0e69ce07ca..e76eae541151 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1980,7 +1980,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
 	}
 
 	ndev->netdev_ops = &emac_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ethtool_ops);
+	ndev->ethtool_ops = &ethtool_ops;
 	netif_napi_add(ndev, &priv->napi, emac_poll, EMAC_POLL_WEIGHT);
 
 	/* register the network device */
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1de3ef5dd5d2..2e967a7bdb33 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -810,7 +810,7 @@ static int netvsc_probe(struct hv_device *dev,
 	net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
 			NETIF_F_IP_CSUM | NETIF_F_TSO;
 
-	SET_ETHTOOL_OPS(net, &ethtool_ops);
+	net->ethtool_ops = &ethtool_ops;
 	SET_NETDEV_DEV(net, &dev->device);
 
 	/* Notify the netvsc driver of the new device */
diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c
index 63aa9d9e34c5..27536aa89199 100644
--- a/drivers/net/ntb_netdev.c
+++ b/drivers/net/ntb_netdev.c
@@ -348,7 +348,7 @@ static int ntb_netdev_probe(struct pci_dev *pdev)
 	memcpy(ndev->dev_addr, ndev->perm_addr, ndev->addr_len);
 
 	ndev->netdev_ops = &ntb_netdev_ops;
-	SET_ETHTOOL_OPS(ndev, &ntb_ethtool_ops);
+	ndev->ethtool_ops = &ntb_ethtool_ops;
 
 	dev->qp = ntb_transport_create_queue(ndev, pdev, &ntb_netdev_handlers);
 	if (!dev->qp) {
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index a8497183ff8b..dac7a0d9bb46 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -494,7 +494,7 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 	ndev->mtu = RIO_MAX_MSG_SIZE - 14;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
-	SET_ETHTOOL_OPS(ndev, &rionet_ethtool_ops);
+	ndev->ethtool_ops = &rionet_ethtool_ops;
 
 	spin_lock_init(&rnet->lock);
 	spin_lock_init(&rnet->tx_lock);
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 630caf48f63a..8cfc3bb0c6a6 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -793,7 +793,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
 
 	netdev->netdev_ops = &catc_netdev_ops;
 	netdev->watchdog_timeo = TX_TIMEOUT;
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	catc->usbdev = usbdev;
 	catc->netdev = netdev;
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 660bd5ea9fc0..a3a05869309d 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2425,7 +2425,7 @@ static void hso_net_init(struct net_device *net)
 	net->type = ARPHRD_NONE;
 	net->mtu = DEFAULT_MTU - 14;
 	net->tx_queue_len = 10;
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 
 	/* and initialize the semaphore */
 	spin_lock_init(&hso_net->net_lock);
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 421934c83f1c..f72570708edb 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -524,7 +524,7 @@ static int ipheth_probe(struct usb_interface *intf,
 	usb_set_intfdata(intf, dev);
 
 	SET_NETDEV_DEV(netdev, &intf->dev);
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	retval = register_netdev(netdev);
 	if (retval) {
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index a359d3bb7c5b..dcb6d33141e0 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -1171,7 +1171,7 @@ err_fw:
 	netdev->netdev_ops = &kaweth_netdev_ops;
 	netdev->watchdog_timeo = KAWETH_TX_TIMEOUT;
 	netdev->mtu = le16_to_cpu(kaweth->configuration.segment_size);
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 
 	/* kaweth is zeroed as part of alloc_netdev */
 	INIT_DELAYED_WORK(&kaweth->lowmem_work, kaweth_resubmit_tl);
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 03e8a15d7deb..f84080215915 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -1159,7 +1159,7 @@ static int pegasus_probe(struct usb_interface *intf,
 
 	net->watchdog_timeo = PEGASUS_TX_TIMEOUT;
 	net->netdev_ops = &pegasus_netdev_ops;
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 	pegasus->mii.dev = net;
 	pegasus->mii.mdio_read = mdio_read;
 	pegasus->mii.mdio_write = mdio_write;
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 3fbfb0869030..9f91c7aba4b0 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -3452,7 +3452,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 			      NETIF_F_TSO | NETIF_F_FRAGLIST |
 			      NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
 
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
 	tp->mii.dev = netdev;
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index da2c4583bd2d..6e87e5710048 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -878,7 +878,7 @@ static int rtl8150_probe(struct usb_interface *intf,
 	dev->netdev = netdev;
 	netdev->netdev_ops = &rtl8150_netdev_ops;
 	netdev->watchdog_timeo = RTL8150_TX_TIMEOUT;
-	SET_ETHTOOL_OPS(netdev, &ops);
+	netdev->ethtool_ops = &ops;
 	dev->intr_interval = 100;	/* 100ms */
 
 	if (!alloc_all_urbs(dev)) {
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b852bb368acc..7d9f84a91f37 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1646,7 +1646,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	dev->netdev_ops = &virtnet_netdev;
 	dev->features = NETIF_F_HIGHDMA;
 
-	SET_ETHTOOL_OPS(dev, &virtnet_ethtool_ops);
+	dev->ethtool_ops = &virtnet_ethtool_ops;
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	/* Do we support "hardware" checksums? */
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 600ab56c0008..00e120296e92 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -635,5 +635,5 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
 
 void vmxnet3_set_ethtool_ops(struct net_device *netdev)
 {
-	SET_ETHTOOL_OPS(netdev, &vmxnet3_ethtool_ops);
+	netdev->ethtool_ops = &vmxnet3_ethtool_ops;
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1dfee9a7fbf7..e68c8eb4ea8e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2716,7 +2716,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 		return -EEXIST;
 	}
 
-	SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
+	dev->ethtool_ops = &vxlan_ethtool_ops;
 
 	/* create an fdb entry for a valid default destination */
 	if (!vxlan_addr_any(&vxlan->default_dst.remote_ip)) {
diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c
index 67db34e56d7e..52919ad42726 100644
--- a/drivers/net/wireless/hostap/hostap_main.c
+++ b/drivers/net/wireless/hostap/hostap_main.c
@@ -882,7 +882,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local,
 	dev->mtu = local->mtu;
 
 
-	SET_ETHTOOL_OPS(dev, &prism2_ethtool_ops);
+	dev->ethtool_ops = &prism2_ethtool_ops;
 
 }
 
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index ef05c5c49d41..a7557331699f 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -386,7 +386,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 		NETIF_F_TSO | NETIF_F_TSO6;
 	dev->features = dev->hw_features | NETIF_F_RXCSUM;
-	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
+	dev->ethtool_ops = &xenvif_ethtool_ops;
 
 	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 158b5e639fc7..895355de8ac4 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1332,7 +1332,7 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
          */
 	netdev->features |= netdev->hw_features;
 
-	SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
+	netdev->ethtool_ops = &xennet_ethtool_ops;
 	SET_NETDEV_DEV(netdev, &dev->dev);
 
 	netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index e232ceca38fe..5ef5b4f45758 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -969,10 +969,9 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
 	card->dev->netdev_ops = &qeth_l2_netdev_ops;
-	if (card->info.type != QETH_CARD_TYPE_OSN)
-		SET_ETHTOOL_OPS(card->dev, &qeth_l2_ethtool_ops);
-	else
-		SET_ETHTOOL_OPS(card->dev, &qeth_l2_osn_ops);
+	card->dev->ethtool_ops =
+		(card->info.type != QETH_CARD_TYPE_OSN) ?
+		&qeth_l2_ethtool_ops : &qeth_l2_osn_ops;
 	card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
 	card->info.broadcast_capable = 1;
 	qeth_l2_request_initial_mac(card);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index bc2499a24884..c58f82af3658 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3301,7 +3301,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	card->dev->ml_priv = card;
 	card->dev->watchdog_timeo = QETH_TX_TIMEOUT;
 	card->dev->mtu = card->info.initial_mtu;
-	SET_ETHTOOL_OPS(card->dev, &qeth_l3_ethtool_ops);
+	card->dev->ethtool_ops = &qeth_l3_ethtool_ops;
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
diff --git a/drivers/staging/et131x/et131x.c b/drivers/staging/et131x/et131x.c
index d329cf314360..15e0f4da3ce0 100644
--- a/drivers/staging/et131x/et131x.c
+++ b/drivers/staging/et131x/et131x.c
@@ -4604,7 +4604,7 @@ static int et131x_pci_setup(struct pci_dev *pdev,
 	netdev->netdev_ops     = &et131x_netdev_ops;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
-	SET_ETHTOOL_OPS(netdev, &et131x_ethtool_ops);
+	netdev->ethtool_ops = &et131x_ethtool_ops;
 
 	adapter = et131x_adapter_init(netdev, pdev);
 
diff --git a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
index d6421b9b5981..a6158bef58e5 100644
--- a/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
+++ b/drivers/staging/ft1000/ft1000-pcmcia/ft1000_hw.c
@@ -2249,7 +2249,7 @@ struct net_device *init_ft1000_card(struct pcmcia_device *link,
 
 	ft1000InitProc(dev);
 	ft1000_card_present = 1;
-	SET_ETHTOOL_OPS(dev, &ops);
+	dev->ethtool_ops = &ops;
 	printk(KERN_INFO "ft1000: %s: addr 0x%04lx irq %d, MAC addr %pM\n",
 			dev->name, dev->base_addr, dev->irq, dev->dev_addr);
 	return dev;
diff --git a/drivers/staging/netlogic/xlr_net.c b/drivers/staging/netlogic/xlr_net.c
index c83e3375104b..9d957615e32a 100644
--- a/drivers/staging/netlogic/xlr_net.c
+++ b/drivers/staging/netlogic/xlr_net.c
@@ -1066,7 +1066,7 @@ static int xlr_net_probe(struct platform_device *pdev)
 	xlr_set_rx_mode(ndev);
 
 	priv->num_rx_desc += MAX_NUM_DESC_SPILL;
-	SET_ETHTOOL_OPS(ndev, &xlr_ethtool_ops);
+	ndev->ethtool_ops = &xlr_ethtool_ops;
 	SET_NETDEV_DEV(ndev, &pdev->dev);
 
 	/* Common registers, do one time initialization */
diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c
index ff7214aac9dd..da9dd6bc5660 100644
--- a/drivers/staging/octeon/ethernet.c
+++ b/drivers/staging/octeon/ethernet.c
@@ -469,7 +469,7 @@ int cvm_oct_common_init(struct net_device *dev)
 
 	/* We do our own locking, Linux doesn't need to */
 	dev->features |= NETIF_F_LLTX;
-	SET_ETHTOOL_OPS(dev, &cvm_oct_ethtool_ops);
+	dev->ethtool_ops = &cvm_oct_ethtool_ops;
 
 	cvm_oct_phy_setup_device(dev);
 	cvm_oct_set_mac_filter(dev);
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index b7d4f82872b7..ce8e28146162 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -793,7 +793,7 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
 
 	net->netdev_ops = &eth_netdev_ops;
 
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 
 	dev->gadget = g;
 	SET_NETDEV_DEV(net, &g->dev);
@@ -850,7 +850,7 @@ struct net_device *gether_setup_name_default(const char *netname)
 
 	net->netdev_ops = &eth_netdev_ops;
 
-	SET_ETHTOOL_OPS(net, &ops);
+	net->ethtool_ops = &ops;
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
 
 	return net;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index adc4658e9873..2dea98cbbdba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -56,9 +56,6 @@ struct device;
 struct phy_device;
 /* 802.11 specific */
 struct wireless_dev;
-					/* source back-compat hooks */
-#define SET_ETHTOOL_OPS(netdev,ops) \
-	( (netdev)->ethtool_ops = (ops) )
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 744a59b85e15..e7ee65dc20bf 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -884,7 +884,7 @@ static void batadv_softif_init_early(struct net_device *dev)
 	/* generate random address */
 	eth_hw_addr_random(dev);
 
-	SET_ETHTOOL_OPS(dev, &batadv_ethtool_ops);
+	dev->ethtool_ops = &batadv_ethtool_ops;
 
 	memset(priv, 0, sizeof(*priv));
 }
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 3e2da2cb72db..9212015abc8f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -348,7 +348,7 @@ void br_dev_setup(struct net_device *dev)
 
 	dev->netdev_ops = &br_netdev_ops;
 	dev->destructor = br_dev_free;
-	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
+	dev->ethtool_ops = &br_ethtool_ops;
 	SET_NETDEV_DEVTYPE(dev, &br_type);
 	dev->tx_queue_len = 0;
 	dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 02c0e1716f64..64c5af0a10dd 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -346,7 +346,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		return slave_dev;
 
 	slave_dev->features = master->vlan_features;
-	SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops);
+	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->tx_queue_len = 0;
 
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 729c68763fe7..789af9280e77 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -130,7 +130,7 @@ static void do_setup(struct net_device *netdev)
 	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	netdev->destructor = internal_dev_destructor;
-	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+	netdev->ethtool_ops = &internal_dev_ethtool_ops;
 	netdev->tx_queue_len = 0;
 
 	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
-- 
cgit 


From 50a0ffaf75e9d2d97200b523f2c600f40e9756b1 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Sun, 11 May 2014 10:47:15 +0200
Subject: net: cdc_ncm/cdc_mbim: rework probing of NCM/MBIM functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The NCM class match in the cdc_mbim driver is confusing and
cause unexpected behaviour. The USB core guarantees that a
USB interface is in altsetting 0 when probing starts. This
means that devices implementing a NCM 1.0 backwards
compatible MBIM function (a "NCM/MBIM function") always hit
the NCM entry in the cdc_mbim driver match table. Such
functions will never match any of the MBIM entries.

This causes unexpeced behaviour for cases where the NCM and
MBIM entries are differet, which is currently the case for
all except Ericsson devices.

Improve the probing of NCM/MBIM functions by looking up the
device again in the cdc_mbim match table after switching to
the MBIM identity.

The shared altsetting selection is updated to better
accommodate the new probing logic, returning the preferred
altsetting for the control interface instead of the data
interface. The control interface altsetting update is moved
to the cdc_mbim driver. It is never necessary to change the
control interface altsetting for NCM.

Cc: Greg Suarez <gsuarez@smithmicro.com>
Reported by: Yu-an Shih <yshih@nvidia.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  | 43 +++++++++++++++++++++++++++++++++++++++++--
 drivers/net/usb/cdc_ncm.c   | 27 +++++++++++++--------------
 include/linux/usb/cdc_ncm.h |  2 +-
 3 files changed, 55 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 80d27719ba38..bc23273d0455 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -107,15 +107,54 @@ static const struct net_device_ops cdc_mbim_netdev_ops = {
 	.ndo_vlan_rx_kill_vid = cdc_mbim_rx_kill_vid,
 };
 
+/* Change the control interface altsetting and update the .driver_info
+ * pointer if the matching entry after changing class codes points to
+ * a different struct
+ */
+static int cdc_mbim_set_ctrlalt(struct usbnet *dev, struct usb_interface *intf, u8 alt)
+{
+	struct usb_driver *driver = to_usb_driver(intf->dev.driver);
+	const struct usb_device_id *id;
+	struct driver_info *info;
+	int ret;
+
+	ret = usb_set_interface(dev->udev,
+				intf->cur_altsetting->desc.bInterfaceNumber,
+				alt);
+	if (ret)
+		return ret;
+
+	id = usb_match_id(intf, driver->id_table);
+	if (!id)
+		return -ENODEV;
+
+	info = (struct driver_info *)id->driver_info;
+	if (info != dev->driver_info) {
+		dev_dbg(&intf->dev, "driver_info updated to '%s'\n",
+			info->description);
+		dev->driver_info = info;
+	}
+	return 0;
+}
+
 static int cdc_mbim_bind(struct usbnet *dev, struct usb_interface *intf)
 {
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *subdriver = ERR_PTR(-ENODEV);
 	int ret = -ENODEV;
-	u8 data_altsetting = cdc_ncm_select_altsetting(dev, intf);
+	u8 data_altsetting = 1;
 	struct cdc_mbim_state *info = (void *)&dev->data;
 
-	/* Probably NCM, defer for cdc_ncm_bind */
+	/* should we change control altsetting on a NCM/MBIM function? */
+	if (cdc_ncm_select_altsetting(intf) == CDC_NCM_COMM_ALTSETTING_MBIM) {
+		data_altsetting = CDC_NCM_DATA_ALTSETTING_MBIM;
+		ret = cdc_mbim_set_ctrlalt(dev, intf, CDC_NCM_COMM_ALTSETTING_MBIM);
+		if (ret)
+			goto err;
+		ret = -ENODEV;
+	}
+
+	/* we will hit this for NCM/MBIM functions if prefer_mbim is false */
 	if (!cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
 		goto err;
 
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9a2bd11943eb..d23bca57a23f 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -541,10 +541,10 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_unbind);
 
-/* Select the MBIM altsetting iff it is preferred and available,
- * returning the number of the corresponding data interface altsetting
+/* Return the number of the MBIM control interface altsetting iff it
+ * is preferred and available,
  */
-u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf)
+u8 cdc_ncm_select_altsetting(struct usb_interface *intf)
 {
 	struct usb_host_interface *alt;
 
@@ -563,15 +563,15 @@ u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf)
 	 *   the rules given in section 6 (USB Device Model) of this
 	 *   specification."
 	 */
-	if (prefer_mbim && intf->num_altsetting == 2) {
+	if (intf->num_altsetting < 2)
+		return intf->cur_altsetting->desc.bAlternateSetting;
+
+	if (prefer_mbim) {
 		alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM);
-		if (alt && cdc_ncm_comm_intf_is_mbim(alt) &&
-		    !usb_set_interface(dev->udev,
-				       intf->cur_altsetting->desc.bInterfaceNumber,
-				       CDC_NCM_COMM_ALTSETTING_MBIM))
-			return CDC_NCM_DATA_ALTSETTING_MBIM;
+		if (alt && cdc_ncm_comm_intf_is_mbim(alt))
+			return CDC_NCM_COMM_ALTSETTING_MBIM;
 	}
-	return CDC_NCM_DATA_ALTSETTING_NCM;
+	return CDC_NCM_COMM_ALTSETTING_NCM;
 }
 EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting);
 
@@ -580,12 +580,11 @@ static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf)
 	int ret;
 
 	/* MBIM backwards compatible function? */
-	cdc_ncm_select_altsetting(dev, intf);
-	if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting))
+	if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM)
 		return -ENODEV;
 
-	/* NCM data altsetting is always 1 */
-	ret = cdc_ncm_bind_common(dev, intf, 1);
+	/* The NCM data altsetting is fixed */
+	ret = cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM);
 
 	/*
 	 * We should get an event when network connection is "connected" or
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 44b38b92236a..55b6feead93b 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -121,7 +121,7 @@ struct cdc_ncm_ctx {
 	u16 connected;
 };
 
-u8 cdc_ncm_select_altsetting(struct usbnet *dev, struct usb_interface *intf);
+u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
 int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting);
 void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf);
 struct sk_buff *cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign);
-- 
cgit 


From 89278c9dc922272df921042aafa18311f3398c6c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Sun, 11 May 2014 20:22:10 -0700
Subject: tcp: simplify fast open cookie processing

Consolidate various cookie checking and generation code to simplify
the fast open processing. The main goal is to reduce code duplication
in tcp_v4_conn_request() for IPv6 support.

Removes two experimental sysctl flags TFO_SERVER_ALWAYS and
TFO_SERVER_COOKIE_NOT_CHKD used primarily for developmental debugging
purposes.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Daniel Lee <longinus00@gmail.com>
Signed-off-by: Jerry Chu <hkchu@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h     |  5 ----
 include/net/tcp.h       |  9 +------
 net/ipv4/tcp_fastopen.c | 71 +++++++++++++++++++------------------------------
 net/ipv4/tcp_ipv4.c     | 10 +++----
 net/ipv4/tcp_output.c   |  2 +-
 5 files changed, 33 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e37c71ecd74..bc35e4709e8e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -366,11 +366,6 @@ static inline bool tcp_passive_fastopen(const struct sock *sk)
 		tcp_sk(sk)->fastopen_rsk != NULL);
 }
 
-static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc)
-{
-	return foc->len != -1;
-}
-
 extern void tcp_sock_destruct(struct sock *sk);
 
 static inline int fastopen_init_queue(struct sock *sk, int backlog)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 012236838583..17d7c6a3d037 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -220,8 +220,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define	TFO_SERVER_ENABLE	2
 #define	TFO_CLIENT_NO_COOKIE	4	/* Data in SYN w/o cookie option */
 
-/* Process SYN data but skip cookie validation */
-#define	TFO_SERVER_COOKIE_NOT_CHKED	0x100
 /* Accept SYN data w/o any cookie option */
 #define	TFO_SERVER_COOKIE_NOT_REQD	0x200
 
@@ -230,10 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
  */
 #define	TFO_SERVER_WO_SOCKOPT1	0x400
 #define	TFO_SERVER_WO_SOCKOPT2	0x800
-/* Always create TFO child sockets on a TFO listener even when
- * cookie/data not present. (For testing purpose!)
- */
-#define	TFO_SERVER_ALWAYS	0x1000
 
 extern struct inet_timewait_death_row tcp_death_row;
 
@@ -1335,8 +1329,7 @@ int tcp_fastopen_create_child(struct sock *sk,
 			      struct request_sock *req);
 bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 			struct request_sock *req,
-			struct tcp_fastopen_cookie *foc,
-			struct tcp_fastopen_cookie *valid_foc);
+			struct tcp_fastopen_cookie *foc);
 void tcp_fastopen_init_key_once(bool publish);
 #define TCP_FASTOPEN_KEY_LENGTH 16
 
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 0606c91d9d0b..5a98277b9a82 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -228,59 +228,44 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
 	return true;
 }
 
+/* Returns true if we should perform Fast Open on the SYN. The cookie (foc)
+ * may be updated and return the client in the SYN-ACK later. E.g., Fast Open
+ * cookie request (foc->len == 0).
+ */
 bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 			struct request_sock *req,
-			struct tcp_fastopen_cookie *foc,
-			struct tcp_fastopen_cookie *valid_foc)
+			struct tcp_fastopen_cookie *foc)
 {
-	bool skip_cookie = false;
-
-	if (likely(!fastopen_cookie_present(foc))) {
-		/* See include/net/tcp.h for the meaning of these knobs */
-		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
-		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
-		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
-			skip_cookie = true; /* no cookie to validate */
-		else
-			return false;
-	}
-	/* A FO option is present; bump the counter. */
-	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
+	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
+	bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
 
-	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
-	    !tcp_fastopen_queue_check(sk))
+	if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+	      (syn_data || foc->len >= 0) &&
+	      tcp_fastopen_queue_check(sk))) {
+		foc->len = -1;
 		return false;
-
-	if (skip_cookie) {
-		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
-		return true;
 	}
 
-	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
-		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
-			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-						ip_hdr(skb)->daddr, valid_foc);
-			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
-			    memcmp(&foc->val[0], &valid_foc->val[0],
-			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
-				return false;
-			valid_foc->len = -1;
-		}
-		/* Acknowledge the data received from the peer. */
+	if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+		goto fastopen;
+
+	tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
+				ip_hdr(skb)->daddr, &valid_foc);
+
+	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
+	    foc->len == valid_foc.len &&
+	    !memcmp(foc->val, valid_foc.val, foc->len)) {
+fastopen:
 		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		foc->len = -1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
 		return true;
-	} else if (foc->len == 0) { /* Client requesting a cookie */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
-		NET_INC_STATS_BH(sock_net(sk),
-		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
-	} else {
-		/* Client sent a cookie with wrong size. Treat it
-		 * the same as invalid and return a valid one.
-		 */
-		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr,
-					ip_hdr(skb)->daddr, valid_foc);
 	}
+
+	NET_INC_STATS_BH(sock_net(sk), foc->len ?
+			 LINUX_MIB_TCPFASTOPENPASSIVEFAIL :
+			 LINUX_MIB_TCPFASTOPENCOOKIEREQD);
+	*foc = valid_foc;
 	return false;
 }
 EXPORT_SYMBOL(tcp_fastopen_check);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 032fcaee164a..5ea0949dadfd 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1273,7 +1273,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	bool want_cookie = false;
 	struct flowi4 fl4;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
-	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
 	struct sk_buff *skb_synack;
 	int do_fastopen;
 
@@ -1381,7 +1380,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (dst == NULL)
 			goto drop_and_free;
 	}
-	do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
+	do_fastopen = !want_cookie &&
+		      tcp_fastopen_check(sk, skb, req, &foc);
 
 	/* We don't call tcp_v4_send_synack() directly because we need
 	 * to make sure a child socket can be created successfully before
@@ -1394,8 +1394,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * latter to remove its dependency on the current implementation
 	 * of tcp_v4_send_synack()->tcp_select_initial_window().
 	 */
-	skb_synack = tcp_make_synack(sk, dst, req,
-	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
+	skb_synack = tcp_make_synack(sk, dst, req, &foc);
 
 	if (skb_synack) {
 		__tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -1415,9 +1414,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		tcp_rsk(req)->listener = NULL;
 		/* Add the request_sock to the SYN table */
 		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-		if (fastopen_cookie_present(&foc) && foc.len != 0)
-			NET_INC_STATS_BH(sock_net(sk),
-			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
 	} else if (tcp_fastopen_create_child(sk, skb, skb_synack, req))
 		goto drop_and_release;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 694711a140d4..b20fc02920f9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
 		if (unlikely(!ireq->tstamp_ok))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
-	if (foc != NULL) {
+	if (foc != NULL && foc->len >= 0) {
 		u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
 		need = (need + 3) & ~3U;  /* Align to 32 bits */
 		if (remaining >= need) {
-- 
cgit 


From cea092c9488cbb22c8b70336ab1413e0daf350f0 Mon Sep 17 00:00:00 2001
From: Brian W Hart <hartb@linux.vnet.ibm.com>
Date: Wed, 14 May 2014 10:33:45 +0930
Subject: cpumask.h: silence warning with -Wsign-compare
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Silence the warning when building with -Wsign-compare when cpumask.h
is included:

include/linux/cpumask.h: In function ‘cpumask_parse’:
include/linux/cpumask.h:603:26: warning: signed and unsigned type in conditional expression [-Wsign-compare]
  int len = nl ? nl - buf : strlen(buf);
                          ^

V2: Rusty pointed out that unsigned should be used instead.

Signed-off-by: Brian W Hart <hartb@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..3557ea7b2049 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -600,7 +600,7 @@ static inline int cpulist_scnprintf(char *buf, int len,
 static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
 {
 	char *nl = strchr(buf, '\n');
-	int len = nl ? nl - buf : strlen(buf);
+	unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf);
 
 	return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
 }
-- 
cgit 


From 08e53fbdb85c0f6f45c0f7c1ea3defc1752a95ce Mon Sep 17 00:00:00 2001
From: Amos Kong <akong@redhat.com>
Date: Wed, 14 May 2014 10:33:46 +0930
Subject: virtio-rng: support multiple virtio-rng devices

Current hwrng core supports to register multiple hwrng devices,
and there is only one device really works in the same time.
QEMU alsu supports to have multiple virtio-rng backends.

This patch changes virtio-rng driver to support multiple
virtio-rng devices.

]# cat /sys/class/misc/hw_random/rng_available
virtio_rng.0 virtio_rng.1
]# cat /sys/class/misc/hw_random/rng_current
virtio_rng.0
]# echo -n virtio_rng.1 > /sys/class/misc/hw_random/rng_current
]# dd if=/dev/hwrng of=/dev/null

Signed-off-by: Amos Kong <akong@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hw_random/virtio-rng.c | 102 ++++++++++++++++++++++--------------
 include/linux/hw_random.h           |   2 +-
 2 files changed, 64 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 2ce0e225e58c..12e242bbb0f5 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -25,88 +25,108 @@
 #include <linux/virtio_rng.h>
 #include <linux/module.h>
 
-static struct virtqueue *vq;
-static unsigned int data_avail;
-static DECLARE_COMPLETION(have_data);
-static bool busy;
+
+struct virtrng_info {
+	struct virtio_device *vdev;
+	struct hwrng hwrng;
+	struct virtqueue *vq;
+	unsigned int data_avail;
+	struct completion have_data;
+	bool busy;
+};
 
 static void random_recv_done(struct virtqueue *vq)
 {
+	struct virtrng_info *vi = vq->vdev->priv;
+
 	/* We can get spurious callbacks, e.g. shared IRQs + virtio_pci. */
-	if (!virtqueue_get_buf(vq, &data_avail))
+	if (!virtqueue_get_buf(vi->vq, &vi->data_avail))
 		return;
 
-	complete(&have_data);
+	complete(&vi->have_data);
 }
 
 /* The host will fill any buffer we give it with sweet, sweet randomness. */
-static void register_buffer(u8 *buf, size_t size)
+static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size)
 {
 	struct scatterlist sg;
 
 	sg_init_one(&sg, buf, size);
 
 	/* There should always be room for one buffer. */
-	virtqueue_add_inbuf(vq, &sg, 1, buf, GFP_KERNEL);
+	virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL);
 
-	virtqueue_kick(vq);
+	virtqueue_kick(vi->vq);
 }
 
 static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait)
 {
 	int ret;
+	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
 
-	if (!busy) {
-		busy = true;
-		init_completion(&have_data);
-		register_buffer(buf, size);
+	if (!vi->busy) {
+		vi->busy = true;
+		init_completion(&vi->have_data);
+		register_buffer(vi, buf, size);
 	}
 
 	if (!wait)
 		return 0;
 
-	ret = wait_for_completion_killable(&have_data);
+	ret = wait_for_completion_killable(&vi->have_data);
 	if (ret < 0)
 		return ret;
 
-	busy = false;
+	vi->busy = false;
 
-	return data_avail;
+	return vi->data_avail;
 }
 
 static void virtio_cleanup(struct hwrng *rng)
 {
-	if (busy)
-		wait_for_completion(&have_data);
-}
-
+	struct virtrng_info *vi = (struct virtrng_info *)rng->priv;
 
-static struct hwrng virtio_hwrng = {
-	.name		= "virtio",
-	.cleanup	= virtio_cleanup,
-	.read		= virtio_read,
-};
+	if (vi->busy)
+		wait_for_completion(&vi->have_data);
+}
 
 static int probe_common(struct virtio_device *vdev)
 {
-	int err;
+	int err, i;
+	struct virtrng_info *vi = NULL;
+
+	vi = kmalloc(sizeof(struct virtrng_info), GFP_KERNEL);
+	vi->hwrng.name = kmalloc(40, GFP_KERNEL);
+	init_completion(&vi->have_data);
+
+	vi->hwrng.read = virtio_read;
+	vi->hwrng.cleanup = virtio_cleanup;
+	vi->hwrng.priv = (unsigned long)vi;
+	vdev->priv = vi;
 
-	if (vq) {
-		/* We only support one device for now */
-		return -EBUSY;
-	}
 	/* We expect a single virtqueue. */
-	vq = virtio_find_single_vq(vdev, random_recv_done, "input");
-	if (IS_ERR(vq)) {
-		err = PTR_ERR(vq);
-		vq = NULL;
+	vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input");
+	if (IS_ERR(vi->vq)) {
+		err = PTR_ERR(vi->vq);
+		kfree(vi->hwrng.name);
+		vi->vq = NULL;
+		kfree(vi);
+		vi = NULL;
 		return err;
 	}
 
-	err = hwrng_register(&virtio_hwrng);
+	i = 0;
+	do {
+		sprintf(vi->hwrng.name, "virtio_rng.%d", i++);
+		err = hwrng_register(&vi->hwrng);
+	} while (err == -EEXIST);
+
 	if (err) {
 		vdev->config->del_vqs(vdev);
-		vq = NULL;
+		kfree(vi->hwrng.name);
+		vi->vq = NULL;
+		kfree(vi);
+		vi = NULL;
 		return err;
 	}
 
@@ -115,11 +135,15 @@ static int probe_common(struct virtio_device *vdev)
 
 static void remove_common(struct virtio_device *vdev)
 {
+	struct virtrng_info *vi = vdev->priv;
 	vdev->config->reset(vdev);
-	busy = false;
-	hwrng_unregister(&virtio_hwrng);
+	vi->busy = false;
+	hwrng_unregister(&vi->hwrng);
 	vdev->config->del_vqs(vdev);
-	vq = NULL;
+	kfree(vi->hwrng.name);
+	vi->vq = NULL;
+	kfree(vi);
+	vi = NULL;
 }
 
 static int virtrng_probe(struct virtio_device *vdev)
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index b4b0eef5fddf..02d9c87be54c 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -31,7 +31,7 @@
  * @priv:		Private data, for use by the RNG driver.
  */
 struct hwrng {
-	const char *name;
+	char *name;
 	int (*init)(struct hwrng *rng);
 	void (*cleanup)(struct hwrng *rng);
 	int (*data_present)(struct hwrng *rng, int wait);
-- 
cgit 


From 3d4405226d27b3a215e4d03cfa51f536244e5de7 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sun, 11 May 2014 22:59:30 +0200
Subject: net: avoid dependency of net_get_random_once on nop patching
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net_get_random_once depends on the static keys infrastructure to patch up
the branch to the slow path during boot. This was realized by abusing the
static keys api and defining a new initializer to not enable the call
site while still indicating that the branch point should get patched
up. This was needed to have the fast path considered likely by gcc.

The static key initialization during boot up normally walks through all
the registered keys and either patches in ideal nops or enables the jump
site but omitted that step on x86 if ideal nops where already placed at
static_key branch points. Thus net_get_random_once branches not always
became active.

This patch switches net_get_random_once to the ordinary static_key
api and thus places the kernel fast path in the - by gcc considered -
unlikely path.  Microbenchmarks on Intel and AMD x86-64 showed that
the unlikely path actually beats the likely path in terms of cycle cost
and that different nop patterns did not make much difference, thus this
switch should not be noticeable.

Fixes: a48e42920ff38b ("net: introduce new macro net_get_random_once")
Reported-by: Tuomas Räsänen <tuomasjjrasanen@tjjr.fi>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 15 ++++-----------
 net/core/utils.c    |  8 ++++----
 2 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 94734a6259a4..17d83393afcc 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -248,24 +248,17 @@ do {								\
 bool __net_get_random_once(void *buf, int nbytes, bool *done,
 			   struct static_key *done_key);
 
-#ifdef HAVE_JUMP_LABEL
-#define ___NET_RANDOM_STATIC_KEY_INIT ((struct static_key) \
-		{ .enabled = ATOMIC_INIT(0), .entries = (void *)1 })
-#else /* !HAVE_JUMP_LABEL */
-#define ___NET_RANDOM_STATIC_KEY_INIT STATIC_KEY_INIT_FALSE
-#endif /* HAVE_JUMP_LABEL */
-
 #define net_get_random_once(buf, nbytes)				\
 	({								\
 		bool ___ret = false;					\
 		static bool ___done = false;				\
-		static struct static_key ___done_key =			\
-			___NET_RANDOM_STATIC_KEY_INIT;			\
-		if (!static_key_true(&___done_key))			\
+		static struct static_key ___once_key =			\
+			STATIC_KEY_INIT_TRUE;				\
+		if (static_key_true(&___once_key))			\
 			___ret = __net_get_random_once(buf,		\
 						       nbytes,		\
 						       &___done,	\
-						       &___done_key);	\
+						       &___once_key);	\
 		___ret;							\
 	})
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 2f737bf90b3f..eed34338736c 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,8 +348,8 @@ static void __net_random_once_deferred(struct work_struct *w)
 {
 	struct __net_random_once_work *work =
 		container_of(w, struct __net_random_once_work, work);
-	if (!static_key_enabled(work->key))
-		static_key_slow_inc(work->key);
+	BUG_ON(!static_key_enabled(work->key));
+	static_key_slow_dec(work->key);
 	kfree(work);
 }
 
@@ -367,7 +367,7 @@ static void __net_random_once_disable_jump(struct static_key *key)
 }
 
 bool __net_get_random_once(void *buf, int nbytes, bool *done,
-			   struct static_key *done_key)
+			   struct static_key *once_key)
 {
 	static DEFINE_SPINLOCK(lock);
 	unsigned long flags;
@@ -382,7 +382,7 @@ bool __net_get_random_once(void *buf, int nbytes, bool *done,
 	*done = true;
 	spin_unlock_irqrestore(&lock, flags);
 
-	__net_random_once_disable_jump(done_key);
+	__net_random_once_disable_jump(once_key);
 
 	return true;
 }
-- 
cgit 


From b02ef20a9fba08948e643d3eec0efadf1da01a44 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 12 May 2014 18:24:45 +0200
Subject: uprobes/x86: Fix the wrong ->si_addr when xol triggers a trap

If the probed insn triggers a trap, ->si_addr = regs->ip is technically
correct, but this is not what the signal handler wants; we need to pass
the address of the probed insn, not the address of xol slot.

Add the new arch-agnostic helper, uprobe_get_trap_addr(), and change
fill_trap_info() and math_error() to use it. !CONFIG_UPROBES case in
uprobes.h uses a macro to avoid include hell and ensure that it can be
compiled even if an architecture doesn't define instruction_pointer().

Test-case:

	#include <signal.h>
	#include <stdio.h>
	#include <unistd.h>

	extern void probe_div(void);

	void sigh(int sig, siginfo_t *info, void *c)
	{
		int passed = (info->si_addr == probe_div);
		printf(passed ? "PASS\n" : "FAIL\n");
		_exit(!passed);
	}

	int main(void)
	{
		struct sigaction sa = {
			.sa_sigaction	= sigh,
			.sa_flags	= SA_SIGINFO,
		};

		sigaction(SIGFPE, &sa, NULL);

		asm (
			"xor %ecx,%ecx\n"
			".globl probe_div; probe_div:\n"
			"idiv %ecx\n"
		);

		return 0;
	}

it fails if probe_div() is probed.

Note: show_unhandled_signals users should probably use this helper too,
but we need to cleanup them first.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
---
 arch/x86/kernel/traps.c |  7 ++++---
 include/linux/uprobes.h |  4 ++++
 kernel/events/uprobes.c | 10 ++++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 73b3ea32245a..3fdb20548c4b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
+#include <linux/uprobes.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -148,11 +149,11 @@ static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr,
 
 	case X86_TRAP_DE:
 		sicode = FPE_INTDIV;
-		siaddr = regs->ip;
+		siaddr = uprobe_get_trap_addr(regs);
 		break;
 	case X86_TRAP_UD:
 		sicode = ILL_ILLOPN;
-		siaddr = regs->ip;
+		siaddr = uprobe_get_trap_addr(regs);
 		break;
 	case X86_TRAP_AC:
 		sicode = BUS_ADRALN;
@@ -531,7 +532,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
 	task->thread.error_code = error_code;
 	info.si_signo = SIGFPE;
 	info.si_errno = 0;
-	info.si_addr = (void __user *)regs->ip;
+	info.si_addr = (void __user *)uprobe_get_trap_addr(regs);
 	if (trapnr == X86_TRAP_MF) {
 		unsigned short cwd, swd;
 		/*
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index edff2b97b864..88c3b7e8b384 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -102,6 +102,7 @@ extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, u
 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
 extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
@@ -130,6 +131,9 @@ extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *r
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
+
+#define uprobe_get_trap_addr(regs)	instruction_pointer(regs)
+
 static inline int
 uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index a13251e8bfa4..3b02c72938a8 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1351,6 +1351,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
 	return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
 }
 
+unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
+{
+	struct uprobe_task *utask = current->utask;
+
+	if (unlikely(utask && utask->active_uprobe))
+		return utask->vaddr;
+
+	return instruction_pointer(regs);
+}
+
 /*
  * Called with no locks held.
  * Called in context of a exiting or a exec-ing thread.
-- 
cgit 


From 9d800df12d31734a6853915e9d2deb5d6747985f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:00 -0400
Subject: cgroup: rename cgroup->dummy_css to ->self and move it to the top

cgroup->dummy_css is used as the placeholder css when performing css
oriended operations on the cgroup.  We're gonna shift more cgroup
management to this css.  Let's rename it to ->self and move it to the
top.

This is pure rename and field relocation.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  6 +++---
 kernel/cgroup.c        | 20 ++++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index aa7353deaaf3..164851e388e7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -143,6 +143,9 @@ enum {
 };
 
 struct cgroup {
+	/* self css with NULL ->ss, points back to this cgroup */
+	struct cgroup_subsys_state self;
+
 	unsigned long flags;		/* "unsigned long" so bitops work */
 
 	/*
@@ -224,9 +227,6 @@ struct cgroup {
 	struct list_head pidlists;
 	struct mutex pidlist_mutex;
 
-	/* dummy css with NULL ->ss, points back to this cgroup */
-	struct cgroup_subsys_state dummy_css;
-
 	/* For css percpu_ref killing and RCU-protected deletion */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index f36fd9c15b3a..b57a949ae4bc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -220,7 +220,7 @@ static void cgroup_idr_remove(struct idr *idr, int id)
 /**
  * cgroup_css - obtain a cgroup's css for the specified subsystem
  * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
  * Return @cgrp's css (cgroup_subsys_state) associated with @ss.  This
  * function must be called either under cgroup_mutex or rcu_read_lock() and
@@ -235,13 +235,13 @@ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
 		return rcu_dereference_check(cgrp->subsys[ss->id],
 					lockdep_is_held(&cgroup_mutex));
 	else
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 }
 
 /**
  * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest (%NULL returns the dummy_css)
+ * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
  * Similar to cgroup_css() but returns the effctive css, which is defined
  * as the matching css of the nearest ancestor including self which has @ss
@@ -254,7 +254,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	lockdep_assert_held(&cgroup_mutex);
 
 	if (!ss)
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 
 	if (!(cgrp->root->subsys_mask & (1 << ss->id)))
 		return NULL;
@@ -288,7 +288,7 @@ struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 	if (cft->ss)
 		return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
 	else
-		return &cgrp->dummy_css;
+		return &cgrp->self;
 }
 EXPORT_SYMBOL_GPL(of_css);
 
@@ -1551,7 +1551,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
-	cgrp->dummy_css.cgroup = cgrp;
+	cgrp->self.cgroup = cgrp;
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -3454,7 +3454,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
 	 * ->can_attach() fails.
 	 */
 	do {
-		css_task_iter_start(&from->dummy_css, &it);
+		css_task_iter_start(&from->self, &it);
 		task = css_task_iter_next(&it);
 		if (task)
 			get_task_struct(task);
@@ -3719,7 +3719,7 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
 	if (!array)
 		return -ENOMEM;
 	/* now, populate the array */
-	css_task_iter_start(&cgrp->dummy_css, &it);
+	css_task_iter_start(&cgrp->self, &it);
 	while ((tsk = css_task_iter_next(&it))) {
 		if (unlikely(n == length))
 			break;
@@ -3793,7 +3793,7 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
 	}
 	rcu_read_unlock();
 
-	css_task_iter_start(&cgrp->dummy_css, &it);
+	css_task_iter_start(&cgrp->self, &it);
 	while ((tsk = css_task_iter_next(&it))) {
 		switch (tsk->state) {
 		case TASK_RUNNING:
@@ -4274,7 +4274,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	init_cgroup_housekeeping(cgrp);
 
 	cgrp->parent = parent;
-	cgrp->dummy_css.parent = &parent->dummy_css;
+	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 
 	if (notify_on_release(parent))
-- 
cgit 


From 249f3468a282dcbad53484c821bebb447f14ee03 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:01 -0400
Subject: cgroup: remove cgroup_destory_css_killed()

cgroup_destroy_css_killed() is cgroup destruction stage which happens
after all csses are offlined.  After the recent updates, it no longer
does anything other than putting the base reference.  This patch
removes the function and makes cgroup_destroy_locked() put the base
ref at the end isntead.

This also makes cgroup->nr_css unnecessary.  Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  3 ---
 kernel/cgroup.c        | 62 +++++---------------------------------------------
 2 files changed, 6 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 164851e388e7..160fcc69149e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -158,9 +158,6 @@ struct cgroup {
 	 */
 	int id;
 
-	/* the number of attached css's */
-	int nr_css;
-
 	/*
 	 * If this cgroup contains any tasks, it contributes one to
 	 * populated_cnt.  All children with non-zero popuplated_cnt of
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e9aa2a51ca68..4a94b0be598d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -178,7 +178,6 @@ static struct cftype cgroup_base_files[];
 static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
-static void cgroup_destroy_css_killed(struct cgroup *cgrp);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
 static void kill_css(struct cgroup_subsys_state *css);
@@ -4169,7 +4168,6 @@ static int online_css(struct cgroup_subsys_state *css)
 		ret = ss->css_online(css);
 	if (!ret) {
 		css->flags |= CSS_ONLINE;
-		css->cgroup->nr_css++;
 		rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
 	}
 	return ret;
@@ -4189,7 +4187,6 @@ static void offline_css(struct cgroup_subsys_state *css)
 		ss->css_offline(css);
 
 	css->flags &= ~CSS_ONLINE;
-	css->cgroup->nr_css--;
 	RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
 
 	wake_up_all(&css->cgroup->offline_waitq);
@@ -4374,39 +4371,18 @@ out_destroy:
 
 /*
  * This is called when the refcnt of a css is confirmed to be killed.
- * css_tryget_online() is now guaranteed to fail.
+ * css_tryget_online() is now guaranteed to fail.  Tell the subsystem to
+ * initate destruction and put the css ref from kill_css().
  */
 static void css_killed_work_fn(struct work_struct *work)
 {
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, destroy_work);
-	struct cgroup *cgrp = css->cgroup;
 
 	mutex_lock(&cgroup_mutex);
-
-	/*
-	 * css_tryget_online() is guaranteed to fail now.  Tell subsystems
-	 * to initate destruction.
-	 */
 	offline_css(css);
-
-	/*
-	 * If @cgrp is marked dead, it's waiting for refs of all css's to
-	 * be disabled before proceeding to the second phase of cgroup
-	 * destruction.  If we are the last one, kick it off.
-	 */
-	if (!cgrp->nr_css && cgroup_is_dead(cgrp))
-		cgroup_destroy_css_killed(cgrp);
-
 	mutex_unlock(&cgroup_mutex);
 
-	/*
-	 * Put the css refs from kill_css().  Each css holds an extra
-	 * reference to the cgroup's dentry and cgroup removal proceeds
-	 * regardless of css refs.  On the last put of each css, whenever
-	 * that may be, the extra dentry ref is put so that dentry
-	 * destruction happens only after all css's are released.
-	 */
 	css_put(css);
 }
 
@@ -4518,11 +4494,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	set_bit(CGRP_DEAD, &cgrp->flags);
 
-	/*
-	 * Initiate massacre of all css's.  cgroup_destroy_css_killed()
-	 * will be invoked to perform the rest of destruction once the
-	 * percpu refs of all css's are confirmed to be killed.
-	 */
+	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
@@ -4532,15 +4504,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 		list_del_init(&cgrp->release_list);
 	raw_spin_unlock(&release_list_lock);
 
-	/*
-	 * If @cgrp has css's attached, the second stage of cgroup
-	 * destruction is kicked off from css_killed_work_fn() after the
-	 * refs of all attached css's are killed.  If @cgrp doesn't have
-	 * any css, we kick it off here.
-	 */
-	if (!cgrp->nr_css)
-		cgroup_destroy_css_killed(cgrp);
-
 	/*
 	 * Remove @cgrp directory along with the base files.  @cgrp has an
 	 * extra ref on its kn.
@@ -4550,25 +4513,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	set_bit(CGRP_RELEASABLE, &cgrp->parent->flags);
 	check_for_release(cgrp->parent);
 
+	/* put the base reference */
+	cgroup_put(cgrp);
+
 	return 0;
 };
 
-/**
- * cgroup_destroy_css_killed - the second step of cgroup destruction
- * @cgrp: the cgroup whose csses have just finished offlining
- *
- * This function is invoked from a work item for a cgroup which is being
- * destroyed after all css's are offlined and performs the rest of
- * destruction.  This is the second step of destruction described in the
- * comment above cgroup_destroy_locked().
- */
-static void cgroup_destroy_css_killed(struct cgroup *cgrp)
-{
-	lockdep_assert_held(&cgroup_mutex);
-
-	cgroup_put(cgrp);
-}
-
 static int cgroup_rmdir(struct kernfs_node *kn)
 {
 	struct cgroup *cgrp;
-- 
cgit 


From 9395a4500404e05173eda9a2d198b6fa500e90c5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:02 -0400
Subject: cgroup: enable refcnting for root csses

Currently, css_get(), css_tryget() and css_tryget_online() are noops
for root csses as an optimization; however, we're planning to use css
refcnts to track of cgroup lifetime too and root cgroups also need to
be reference counted.  Since css has been converted to percpu_refcnt,
the overhead of refcnting is miniscule and this optimization isn't too
meaningful anymore.  Furthermore, controllers which optimize the root
cgroup often never even invoke these functions in their hot paths.

This patch enables refcnting for root csses too.  This makes CSS_ROOT
flag unused and removes it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 10 ++--------
 kernel/cgroup.c        |  6 +++---
 2 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 160fcc69149e..286e39e4e9bf 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -77,7 +77,6 @@ struct cgroup_subsys_state {
 
 /* bits in struct cgroup_subsys_state flags field */
 enum {
-	CSS_ROOT	= (1 << 0), /* this CSS is the root of the subsystem */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 };
 
@@ -89,9 +88,7 @@ enum {
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	/* We don't need to reference count the root state */
-	if (!(css->flags & CSS_ROOT))
-		percpu_ref_get(&css->refcnt);
+	percpu_ref_get(&css->refcnt);
 }
 
 /**
@@ -106,8 +103,6 @@ static inline void css_get(struct cgroup_subsys_state *css)
  */
 static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	if (css->flags & CSS_ROOT)
-		return true;
 	return percpu_ref_tryget_live(&css->refcnt);
 }
 
@@ -119,8 +114,7 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	if (!(css->flags & CSS_ROOT))
-		percpu_ref_put(&css->refcnt);
+	percpu_ref_put(&css->refcnt);
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e694f4153edb..cb5864e36f99 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4158,8 +4158,6 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	if (cgrp->parent) {
 		css->parent = cgroup_css(cgrp->parent, ss);
 		css_get(css->parent);
-	} else {
-		css->flags |= CSS_ROOT;
 	}
 
 	BUG_ON(cgroup_css(cgrp, ss));
@@ -4582,9 +4580,10 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 	BUG_ON(IS_ERR(css));
 	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
 	if (early) {
-		/* idr_alloc() can't be called safely during early init */
+		/* allocation can't be done safely during early init */
 		css->id = 1;
 	} else {
+		BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
 		BUG_ON(css->id < 0);
 	}
@@ -4671,6 +4670,7 @@ int __init cgroup_init(void)
 			struct cgroup_subsys_state *css =
 				init_css_set.subsys[ss->id];
 
+			BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
 						   GFP_KERNEL);
 			BUG_ON(css->id < 0);
-- 
cgit 


From 9d755d33f0db8c9b49438f71b38a56e375b34360 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 09:15:02 -0400
Subject: cgroup: use cgroup->self.refcnt for cgroup refcnting

Currently cgroup implements refcnting separately using atomic_t
cgroup->refcnt.  The destruction paths of cgroup and css are rather
complex and bear a lot of similiarities including the use of RCU and
bouncing to a work item.

This patch makes cgroup use the refcnt of self css for refcnting
instead of using its own.  This makes cgroup refcnting use css's
percpu refcnt and share the destruction mechanism.

* css_release_work_fn() and css_free_work_fn() are updated to handle
  both csses and cgroups.  This is a bit messy but should do until we
  can make cgroup->self a full css, which currently can't be done
  thanks to multiple hierarchies.

* cgroup_destroy_locked() now performs
  percpu_ref_kill(&cgrp->self.refcnt) instead of cgroup_put(cgrp).

* Negative refcnt sanity check in cgroup_get() is no longer necessary
  as percpu_ref already handles it.

* Similarly, as a cgroup which hasn't been killed will never be
  released regardless of its refcnt value and percpu_ref has sanity
  check on kill, cgroup_is_dead() sanity check in cgroup_put() is no
  longer necessary.

* As whether a refcnt reached zero or not can only be decided after
  the reference count is killed, cgroup_root->cgrp's refcnting can no
  longer be used to decide whether to kill the root or not.  Let's
  make cgroup_kill_sb() explicitly initiate destruction if the root
  doesn't have any children.  This makes sense anyway as unmounted
  cgroup hierarchy without any children should be destroyed.

While this is a bit messy, this will allow pushing more bookkeeping
towards cgroup->self and thus handling cgroups and csses in more
uniform way.  In the very long term, it should be possible to
introduce a base subsystem and convert the self css to a proper one
making things whole lot simpler and unified.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |   6 --
 kernel/cgroup.c        | 146 +++++++++++++++++++++++++++----------------------
 2 files changed, 80 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 286e39e4e9bf..76dadd77a120 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -160,8 +160,6 @@ struct cgroup {
 	 */
 	int populated_cnt;
 
-	atomic_t refcnt;
-
 	/*
 	 * We link our 'sibling' struct into our parent's 'children'.
 	 * Our children link their 'sibling' into our 'children'.
@@ -218,10 +216,6 @@ struct cgroup {
 	struct list_head pidlists;
 	struct mutex pidlist_mutex;
 
-	/* For css percpu_ref killing and RCU-protected deletion */
-	struct rcu_head rcu_head;
-	struct work_struct destroy_work;
-
 	/* used to wait for offlining of csses */
 	wait_queue_head_t offline_waitq;
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index cb5864e36f99..c01e8e8dfad0 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -176,10 +176,12 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_base_files[];
 
 static void cgroup_put(struct cgroup *cgrp);
+static bool cgroup_has_live_children(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
@@ -1008,62 +1010,15 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
 	return mode;
 }
 
-static void cgroup_free_fn(struct work_struct *work)
-{
-	struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
-
-	atomic_dec(&cgrp->root->nr_cgrps);
-	cgroup_pidlist_destroy_all(cgrp);
-
-	if (cgrp->parent) {
-		/*
-		 * We get a ref to the parent, and put the ref when this
-		 * cgroup is being freed, so it's guaranteed that the
-		 * parent won't be destroyed before its children.
-		 */
-		cgroup_put(cgrp->parent);
-		kernfs_put(cgrp->kn);
-		kfree(cgrp);
-	} else {
-		/*
-		 * This is root cgroup's refcnt reaching zero, which
-		 * indicates that the root should be released.
-		 */
-		cgroup_destroy_root(cgrp->root);
-	}
-}
-
-static void cgroup_free_rcu(struct rcu_head *head)
-{
-	struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
-
-	INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
-	queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
-}
-
 static void cgroup_get(struct cgroup *cgrp)
 {
 	WARN_ON_ONCE(cgroup_is_dead(cgrp));
-	WARN_ON_ONCE(atomic_read(&cgrp->refcnt) <= 0);
-	atomic_inc(&cgrp->refcnt);
+	css_get(&cgrp->self);
 }
 
 static void cgroup_put(struct cgroup *cgrp)
 {
-	if (!atomic_dec_and_test(&cgrp->refcnt))
-		return;
-	if (WARN_ON_ONCE(cgrp->parent && !cgroup_is_dead(cgrp)))
-		return;
-
-	/* delete this cgroup from parent->children */
-	mutex_lock(&cgroup_mutex);
-	list_del_rcu(&cgrp->sibling);
-	mutex_unlock(&cgroup_mutex);
-
-	cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
-	cgrp->id = -1;
-
-	call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
+	css_put(&cgrp->self);
 }
 
 /**
@@ -1548,7 +1503,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	atomic_set(&cgrp->refcnt, 1);
 	INIT_LIST_HEAD(&cgrp->sibling);
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
@@ -1597,6 +1551,10 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+	if (ret)
+		goto out;
+
 	/*
 	 * We're accessing css_set_count without locking css_set_rwsem here,
 	 * but that's OK - it can only be increased by someone holding
@@ -1605,11 +1563,11 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 	 */
 	ret = allocate_cgrp_cset_links(css_set_count, &tmp_links);
 	if (ret)
-		goto out;
+		goto cancel_ref;
 
 	ret = cgroup_init_root_id(root);
 	if (ret)
-		goto out;
+		goto cancel_ref;
 
 	root->kf_root = kernfs_create_root(&cgroup_kf_syscall_ops,
 					   KERNFS_ROOT_CREATE_DEACTIVATED,
@@ -1657,6 +1615,8 @@ destroy_root:
 	root->kf_root = NULL;
 exit_root_id:
 	cgroup_exit_root_id(root);
+cancel_ref:
+	percpu_ref_cancel_init(&root_cgrp->self.refcnt);
 out:
 	free_cgrp_cset_links(&tmp_links);
 	return ret;
@@ -1735,13 +1695,14 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
 		}
 
 		/*
-		 * A root's lifetime is governed by its root cgroup.  Zero
-		 * ref indicate that the root is being destroyed.  Wait for
-		 * destruction to complete so that the subsystems are free.
-		 * We can use wait_queue for the wait but this path is
-		 * super cold.  Let's just sleep for a bit and retry.
+		 * A root's lifetime is governed by its root cgroup.
+		 * tryget_live failure indicate that the root is being
+		 * destroyed.  Wait for destruction to complete so that the
+		 * subsystems are free.  We can use wait_queue for the wait
+		 * but this path is super cold.  Let's just sleep for a bit
+		 * and retry.
 		 */
-		if (!atomic_inc_not_zero(&root->cgrp.refcnt)) {
+		if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
 			mutex_unlock(&cgroup_mutex);
 			msleep(10);
 			ret = restart_syscall();
@@ -1794,7 +1755,16 @@ static void cgroup_kill_sb(struct super_block *sb)
 	struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
 	struct cgroup_root *root = cgroup_root_from_kf(kf_root);
 
-	cgroup_put(&root->cgrp);
+	/*
+	 * If @root doesn't have any mounts or children, start killing it.
+	 * This prevents new mounts by disabling percpu_ref_tryget_live().
+	 * cgroup_mount() may wait for @root's release.
+	 */
+	if (cgroup_has_live_children(&root->cgrp))
+		cgroup_put(&root->cgrp);
+	else
+		percpu_ref_kill(&root->cgrp.self.refcnt);
+
 	kernfs_kill_sb(sb);
 }
 
@@ -4110,11 +4080,37 @@ static void css_free_work_fn(struct work_struct *work)
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup *cgrp = css->cgroup;
 
-	if (css->parent)
-		css_put(css->parent);
+	if (css->ss) {
+		/* css free path */
+		if (css->parent)
+			css_put(css->parent);
 
-	css->ss->css_free(css);
-	cgroup_put(cgrp);
+		css->ss->css_free(css);
+		cgroup_put(cgrp);
+	} else {
+		/* cgroup free path */
+		atomic_dec(&cgrp->root->nr_cgrps);
+		cgroup_pidlist_destroy_all(cgrp);
+
+		if (cgrp->parent) {
+			/*
+			 * We get a ref to the parent, and put the ref when
+			 * this cgroup is being freed, so it's guaranteed
+			 * that the parent won't be destroyed before its
+			 * children.
+			 */
+			cgroup_put(cgrp->parent);
+			kernfs_put(cgrp->kn);
+			kfree(cgrp);
+		} else {
+			/*
+			 * This is root cgroup's refcnt reaching zero,
+			 * which indicates that the root should be
+			 * released.
+			 */
+			cgroup_destroy_root(cgrp->root);
+		}
+	}
 }
 
 static void css_free_rcu_fn(struct rcu_head *rcu_head)
@@ -4131,8 +4127,20 @@ static void css_release_work_fn(struct work_struct *work)
 	struct cgroup_subsys_state *css =
 		container_of(work, struct cgroup_subsys_state, destroy_work);
 	struct cgroup_subsys *ss = css->ss;
+	struct cgroup *cgrp = css->cgroup;
 
-	cgroup_idr_remove(&ss->css_idr, css->id);
+	if (ss) {
+		/* css release path */
+		cgroup_idr_remove(&ss->css_idr, css->id);
+	} else {
+		/* cgroup release path */
+		mutex_lock(&cgroup_mutex);
+		list_del_rcu(&cgrp->sibling);
+		mutex_unlock(&cgroup_mutex);
+
+		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
+		cgrp->id = -1;
+	}
 
 	call_rcu(&css->rcu_head, css_free_rcu_fn);
 }
@@ -4285,6 +4293,10 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+	if (ret)
+		goto out_free_cgrp;
+
 	/*
 	 * Temporarily set the pointer to NULL, so idr_find() won't return
 	 * a half-baked cgroup.
@@ -4292,7 +4304,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	cgrp->id = cgroup_idr_alloc(&root->cgroup_idr, NULL, 2, 0, GFP_NOWAIT);
 	if (cgrp->id < 0) {
 		ret = -ENOMEM;
-		goto out_free_cgrp;
+		goto out_cancel_ref;
 	}
 
 	init_cgroup_housekeeping(cgrp);
@@ -4365,6 +4377,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 out_free_id:
 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
+out_cancel_ref:
+	percpu_ref_cancel_init(&cgrp->self.refcnt);
 out_free_cgrp:
 	kfree(cgrp);
 out_unlock:
@@ -4521,7 +4535,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	check_for_release(cgrp->parent);
 
 	/* put the base reference */
-	cgroup_put(cgrp);
+	percpu_ref_kill(&cgrp->self.refcnt);
 
 	return 0;
 };
-- 
cgit 


From 19824d5eeecedfb46639961da1b7a21ba3179930 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 8 May 2014 14:06:22 +0200
Subject: usb: gadget: OS String support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a custom (non-USB IF) extension to the USB standard:

http://msdn.microsoft.com/library/windows/hardware/gg463182

They grant permission to use the specification - there is
"Microsoft OS Descriptor Specification License Agreement"
under the link mentioned above, and its Section 2 "Grant
of License", letter (b) reads:

"Patent license. Microsoft hereby grants to You a nonexclusive,
royalty-free, nontransferable, worldwide license under Microsoft’s
patents embodied solely within the Specification and that are owned
or licensable by Microsoft to make, use, import, offer to sell,
sell and distribute directly or indirectly to Your Licensees Your
Implementation. You may sublicense this patent license to Your
Licensees under the same terms and conditions."

The said extension is maintained by Microsoft for Microsoft.

Yet it is fairly common for various devices to use it, and a
popular proprietary operating system expects devices to provide
"OS descriptors", so Linux-based USB gadgets whishing to be able
to talk to a variety of operating systems should be able to provide
the "OS descriptors".

This patch adds optional support for gadgets whishing to expose
the so called "OS String" under index 0xEE of language 0.
The contents of the string is generated based on the qw_sign
array and b_vendor_code.

Interested gadgets need to set the cdev->use_os_string flag,
fill cdev->qw_sign with appropriate values and fill cdev->b_vendor_code
with a value of their choice.

This patch does not however implement responding to any vendor-specific
USB requests.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 29 +++++++++++++++++++++++++++++
 include/linux/usb/composite.h  | 11 +++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 8060de6562cd..2f87b1697bf5 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -21,6 +21,22 @@
 #include <linux/usb/composite.h>
 #include <asm/unaligned.h>
 
+/**
+ * struct usb_os_string - represents OS String to be reported by a gadget
+ * @bLength: total length of the entire descritor, always 0x12
+ * @bDescriptorType: USB_DT_STRING
+ * @qwSignature: the OS String proper
+ * @bMS_VendorCode: code used by the host for subsequent requests
+ * @bPad: not used, must be zero
+ */
+struct usb_os_string {
+	__u8	bLength;
+	__u8	bDescriptorType;
+	__u8	qwSignature[OS_STRING_QW_SIGN_LEN];
+	__u8	bMS_VendorCode;
+	__u8	bPad;
+} __packed;
+
 /*
  * The code in this file is utility code, used to build a gadget driver
  * from one or more "function" drivers, one or more "configuration"
@@ -961,6 +977,19 @@ static int get_string(struct usb_composite_dev *cdev,
 		return s->bLength;
 	}
 
+	if (cdev->use_os_string && language == 0 && id == OS_STRING_IDX) {
+		struct usb_os_string *b = buf;
+		b->bLength = sizeof(*b);
+		b->bDescriptorType = USB_DT_STRING;
+		compiletime_assert(
+			sizeof(b->qwSignature) == sizeof(cdev->qw_sign),
+			"qwSignature size must be equal to qw_sign");
+		memcpy(&b->qwSignature, cdev->qw_sign, sizeof(b->qwSignature));
+		b->bMS_VendorCode = cdev->b_vendor_code;
+		b->bPad = 0;
+		return sizeof(*b);
+	}
+
 	list_for_each_entry(uc, &cdev->gstrings, list) {
 		struct usb_gadget_strings **sp;
 
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index d3ca3b53837c..7d29ee9363e8 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -335,11 +335,17 @@ static inline struct usb_composite_driver *to_cdriver(
 	return container_of(gdrv, struct usb_composite_driver, gadget_driver);
 }
 
+#define OS_STRING_QW_SIGN_LEN		14
+#define OS_STRING_IDX			0xEE
+
 /**
  * struct usb_composite_device - represents one composite usb gadget
  * @gadget: read-only, abstracts the gadget's usb peripheral controller
  * @req: used for control responses; buffer is pre-allocated
  * @config: the currently active configuration
+ * @qw_sign: qwSignature part of the OS string
+ * @b_vendor_code: bMS_VendorCode part of the OS string
+ * @use_os_string: false by default, interested gadgets set it
  *
  * One of these devices is allocated and initialized before the
  * associated device driver's bind() is called.
@@ -372,6 +378,11 @@ struct usb_composite_dev {
 
 	struct usb_configuration	*config;
 
+	/* OS String is a custom (yet popular) extension to the USB standard. */
+	u8				qw_sign[OS_STRING_QW_SIGN_LEN];
+	u8				b_vendor_code;
+	unsigned int			use_os_string:1;
+
 	/* private: */
 	/* internals */
 	unsigned int			suspended:1;
-- 
cgit 


From 37a3a533429ef9b3cc9f15a656c19623f0e88df7 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 8 May 2014 14:06:23 +0200
Subject: usb: gadget: OS Feature Descriptors support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a custom (non-USB IF) extension to the USB standard:

http://msdn.microsoft.com/library/windows/hardware/gg463182

They grant permission to use the specification - there is
"Microsoft OS Descriptor Specification License Agreement"
under the link mentioned above, and its Section 2 "Grant
of License", letter (b) reads:

"Patent license. Microsoft hereby grants to You a nonexclusive,
royalty-free, nontransferable, worldwide license under Microsoft’s
patents embodied solely within the Specification and that are owned
or licensable by Microsoft to make, use, import, offer to sell,
sell and distribute directly or indirectly to Your Licensees Your
Implementation. You may sublicense this patent license to Your
Licensees under the same terms and conditions."

The said extension is maintained by Microsoft for Microsoft.

Yet it is fairly common for various devices to use it, and a
popular proprietary operating system expects devices to provide
"OS descriptors", so Linux-based USB gadgets whishing to be able
to talk to a variety of operating systems should be able to provide
the "OS descriptors".

This patch adds optional support for gadgets whishing to expose
the so called "OS Feature Descriptors", that is "Extended Compatibility ID"
and "Extended Properties".

Hosts which do request "OS descriptors" from gadgets do so during
the enumeration phase and before the configuration is set with
SET_CONFIGURATION. What is more, those hosts never ask for configurations
at indices other than 0. Therefore, gadgets whishing to provide
"OS descriptors" must designate one configuration to be used with
this kind of hosts - this is what os_desc_config is added for in
struct usb_composite_dev. There is an additional advantage to it:
if a gadget provides "OS descriptors" and designates one configuration
to be used with such non-USB-compliant hosts it can invoke
"usb_add_config" in any order because the designated configuration
will be reported to be at index 0 anyway.

This patch also adds handling vendor-specific requests addressed
at device or interface and related to handling "OS descriptors".

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 288 ++++++++++++++++++++++++++++++++++++++++-
 drivers/usb/gadget/u_os_desc.h |  90 +++++++++++++
 include/linux/usb/composite.h  |  58 +++++++++
 3 files changed, 435 insertions(+), 1 deletion(-)
 create mode 100644 drivers/usb/gadget/u_os_desc.h

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 2f87b1697bf5..042c66b71df8 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -21,6 +21,8 @@
 #include <linux/usb/composite.h>
 #include <asm/unaligned.h>
 
+#include "u_os_desc.h"
+
 /**
  * struct usb_os_string - represents OS String to be reported by a gadget
  * @bLength: total length of the entire descritor, always 0x12
@@ -438,6 +440,7 @@ static int config_desc(struct usb_composite_dev *cdev, unsigned w_value)
 {
 	struct usb_gadget		*gadget = cdev->gadget;
 	struct usb_configuration	*c;
+	struct list_head		*pos;
 	u8				type = w_value >> 8;
 	enum usb_device_speed		speed = USB_SPEED_UNKNOWN;
 
@@ -456,7 +459,20 @@ static int config_desc(struct usb_composite_dev *cdev, unsigned w_value)
 
 	/* This is a lookup by config *INDEX* */
 	w_value &= 0xff;
-	list_for_each_entry(c, &cdev->configs, list) {
+
+	pos = &cdev->configs;
+	c = cdev->os_desc_config;
+	if (c)
+		goto check_config;
+
+	while ((pos = pos->next) !=  &cdev->configs) {
+		c = list_entry(pos, typeof(*c), list);
+
+		/* skip OS Descriptors config which is handled separately */
+		if (c == cdev->os_desc_config)
+			continue;
+
+check_config:
 		/* ignore configs that won't work at this speed */
 		switch (speed) {
 		case USB_SPEED_SUPER:
@@ -1236,6 +1252,158 @@ static void composite_setup_complete(struct usb_ep *ep, struct usb_request *req)
 				req->status, req->actual, req->length);
 }
 
+static int count_ext_compat(struct usb_configuration *c)
+{
+	int i, res;
+
+	res = 0;
+	for (i = 0; i < c->next_interface_id; ++i) {
+		struct usb_function *f;
+		int j;
+
+		f = c->interface[i];
+		for (j = 0; j < f->os_desc_n; ++j) {
+			struct usb_os_desc *d;
+
+			if (i != f->os_desc_table[j].if_id)
+				continue;
+			d = f->os_desc_table[j].os_desc;
+			if (d && d->ext_compat_id)
+				++res;
+		}
+	}
+	BUG_ON(res > 255);
+	return res;
+}
+
+static void fill_ext_compat(struct usb_configuration *c, u8 *buf)
+{
+	int i, count;
+
+	count = 16;
+	for (i = 0; i < c->next_interface_id; ++i) {
+		struct usb_function *f;
+		int j;
+
+		f = c->interface[i];
+		for (j = 0; j < f->os_desc_n; ++j) {
+			struct usb_os_desc *d;
+
+			if (i != f->os_desc_table[j].if_id)
+				continue;
+			d = f->os_desc_table[j].os_desc;
+			if (d && d->ext_compat_id) {
+				*buf++ = i;
+				*buf++ = 0x01;
+				memcpy(buf, d->ext_compat_id, 16);
+				buf += 22;
+			} else {
+				++buf;
+				*buf = 0x01;
+				buf += 23;
+			}
+			count += 24;
+			if (count >= 4096)
+				return;
+		}
+	}
+}
+
+static int count_ext_prop(struct usb_configuration *c, int interface)
+{
+	struct usb_function *f;
+	int j, res;
+
+	res = 0;
+
+	f = c->interface[interface];
+	for (j = 0; j < f->os_desc_n; ++j) {
+		struct usb_os_desc *d;
+
+		if (interface != f->os_desc_table[j].if_id)
+			continue;
+		d = f->os_desc_table[j].os_desc;
+		if (d && d->ext_compat_id)
+			return d->ext_prop_count;
+	}
+	return res;
+}
+
+static int len_ext_prop(struct usb_configuration *c, int interface)
+{
+	struct usb_function *f;
+	struct usb_os_desc *d;
+	int j, res;
+
+	res = 10; /* header length */
+	f = c->interface[interface];
+	for (j = 0; j < f->os_desc_n; ++j) {
+		if (interface != f->os_desc_table[j].if_id)
+			continue;
+		d = f->os_desc_table[j].os_desc;
+		if (d)
+			return min(res + d->ext_prop_len, 4096);
+	}
+	return res;
+}
+
+static int fill_ext_prop(struct usb_configuration *c, int interface, u8 *buf)
+{
+	struct usb_function *f;
+	struct usb_os_desc *d;
+	struct usb_os_desc_ext_prop *ext_prop;
+	int j, count, n, ret;
+	u8 *start = buf;
+
+	f = c->interface[interface];
+	for (j = 0; j < f->os_desc_n; ++j) {
+		if (interface != f->os_desc_table[j].if_id)
+			continue;
+		d = f->os_desc_table[j].os_desc;
+		if (d)
+			list_for_each_entry(ext_prop, &d->ext_prop, entry) {
+				/* 4kB minus header length */
+				n = buf - start;
+				if (n >= 4086)
+					return 0;
+
+				count = ext_prop->data_len +
+					ext_prop->name_len + 14;
+				if (count > 4086 - n)
+					return -EINVAL;
+				usb_ext_prop_put_size(buf, count);
+				usb_ext_prop_put_type(buf, ext_prop->type);
+				ret = usb_ext_prop_put_name(buf, ext_prop->name,
+							    ext_prop->name_len);
+				if (ret < 0)
+					return ret;
+				switch (ext_prop->type) {
+				case USB_EXT_PROP_UNICODE:
+				case USB_EXT_PROP_UNICODE_ENV:
+				case USB_EXT_PROP_UNICODE_LINK:
+					usb_ext_prop_put_unicode(buf, ret,
+							 ext_prop->data,
+							 ext_prop->data_len);
+					break;
+				case USB_EXT_PROP_BINARY:
+					usb_ext_prop_put_binary(buf, ret,
+							ext_prop->data,
+							ext_prop->data_len);
+					break;
+				case USB_EXT_PROP_LE32:
+					/* not implemented */
+				case USB_EXT_PROP_BE32:
+					/* not implemented */
+				default:
+					return -EINVAL;
+				}
+				buf += count;
+			}
+	}
+
+	return 0;
+}
+
 /*
  * The setup() callback implements all the ep0 functionality that's
  * not handled lower down, in hardware or the hardware driver(like
@@ -1445,6 +1613,91 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl)
 		break;
 	default:
 unknown:
+		/*
+		 * OS descriptors handling
+		 */
+		if (cdev->use_os_string && cdev->os_desc_config &&
+		    (ctrl->bRequest & USB_TYPE_VENDOR) &&
+		    ctrl->bRequest == cdev->b_vendor_code) {
+			struct usb_request		*req;
+			struct usb_configuration	*os_desc_cfg;
+			u8				*buf;
+			int				interface;
+			int				count = 0;
+
+			req = cdev->os_desc_req;
+			req->complete = composite_setup_complete;
+			buf = req->buf;
+			os_desc_cfg = cdev->os_desc_config;
+			memset(buf, 0, w_length);
+			buf[5] = 0x01;
+			switch (ctrl->bRequestType & USB_RECIP_MASK) {
+			case USB_RECIP_DEVICE:
+				if (w_index != 0x4 || (w_value >> 8))
+					break;
+				buf[6] = w_index;
+				if (w_length == 0x10) {
+					/* Number of ext compat interfaces */
+					count = count_ext_compat(os_desc_cfg);
+					buf[8] = count;
+					count *= 24; /* 24 B/ext compat desc */
+					count += 16; /* header */
+					put_unaligned_le32(count, buf);
+					value = w_length;
+				} else {
+					/* "extended compatibility ID"s */
+					count = count_ext_compat(os_desc_cfg);
+					buf[8] = count;
+					count *= 24; /* 24 B/ext compat desc */
+					count += 16; /* header */
+					put_unaligned_le32(count, buf);
+					buf += 16;
+					fill_ext_compat(os_desc_cfg, buf);
+					value = w_length;
+				}
+				break;
+			case USB_RECIP_INTERFACE:
+				if (w_index != 0x5 || (w_value >> 8))
+					break;
+				interface = w_value & 0xFF;
+				buf[6] = w_index;
+				if (w_length == 0x0A) {
+					count = count_ext_prop(os_desc_cfg,
+						interface);
+					put_unaligned_le16(count, buf + 8);
+					count = len_ext_prop(os_desc_cfg,
+						interface);
+					put_unaligned_le32(count, buf);
+
+					value = w_length;
+				} else {
+					count = count_ext_prop(os_desc_cfg,
+						interface);
+					put_unaligned_le16(count, buf + 8);
+					count = len_ext_prop(os_desc_cfg,
+						interface);
+					put_unaligned_le32(count, buf);
+					buf += 10;
+					value = fill_ext_prop(os_desc_cfg,
+							      interface, buf);
+					if (value < 0)
+						return value;
+
+					value = w_length;
+				}
+				break;
+			}
+			req->length = value;
+			req->zero = value < w_length;
+			value = usb_ep_queue(gadget->ep0, req, GFP_ATOMIC);
+			if (value < 0) {
+				DBG(cdev, "ep_queue --> %d\n", value);
+				req->status = 0;
+				composite_setup_complete(gadget->ep0, req);
+			}
+			return value;
+		}
+
 		VDBG(cdev,
 			"non-core control req%02x.%02x v%04x i%04x l%d\n",
 			ctrl->bRequestType, ctrl->bRequest,
@@ -1668,6 +1921,29 @@ fail:
 	return ret;
 }
 
+int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
+				  struct usb_ep *ep0)
+{
+	int ret = 0;
+
+	cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL);
+	if (!cdev->os_desc_req) {
+		ret = PTR_ERR(cdev->os_desc_req);
+		goto end;
+	}
+
+	/* OS feature descriptor length <= 4kB */
+	cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL);
+	if (!cdev->os_desc_req->buf) {
+		ret = PTR_ERR(cdev->os_desc_req->buf);
+		kfree(cdev->os_desc_req);
+		goto end;
+	}
+	cdev->os_desc_req->complete = composite_setup_complete;
+end:
+	return ret;
+}
+
 void composite_dev_cleanup(struct usb_composite_dev *cdev)
 {
 	struct usb_gadget_string_container *uc, *tmp;
@@ -1676,6 +1952,10 @@ void composite_dev_cleanup(struct usb_composite_dev *cdev)
 		list_del(&uc->list);
 		kfree(uc);
 	}
+	if (cdev->os_desc_req) {
+		kfree(cdev->os_desc_req->buf);
+		usb_ep_free_request(cdev->gadget->ep0, cdev->os_desc_req);
+	}
 	if (cdev->req) {
 		kfree(cdev->req->buf);
 		usb_ep_free_request(cdev->gadget->ep0, cdev->req);
@@ -1713,6 +1993,12 @@ static int composite_bind(struct usb_gadget *gadget,
 	if (status < 0)
 		goto fail;
 
+	if (cdev->use_os_string) {
+		status = composite_os_desc_req_prepare(cdev, gadget->ep0);
+		if (status)
+			goto fail;
+	}
+
 	update_unchanged_dev_desc(&cdev->desc, composite->dev);
 
 	/* has userspace failed to provide a serial number? */
diff --git a/drivers/usb/gadget/u_os_desc.h b/drivers/usb/gadget/u_os_desc.h
new file mode 100644
index 000000000000..ea5cf8c2da28
--- /dev/null
+++ b/drivers/usb/gadget/u_os_desc.h
@@ -0,0 +1,90 @@
+/*
+ * u_os_desc.h
+ *
+ * Utility definitions for "OS Descriptors" support
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *		http://www.samsung.com
+ *
+ * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __U_OS_DESC_H__
+#define __U_OS_DESC_H__
+
+#include <asm/unaligned.h>
+#include <linux/nls.h>
+
+#define USB_EXT_PROP_DW_SIZE			0
+#define USB_EXT_PROP_DW_PROPERTY_DATA_TYPE	4
+#define USB_EXT_PROP_W_PROPERTY_NAME_LENGTH	8
+#define USB_EXT_PROP_B_PROPERTY_NAME		10
+#define USB_EXT_PROP_DW_PROPERTY_DATA_LENGTH	10
+#define USB_EXT_PROP_B_PROPERTY_DATA		14
+
+#define USB_EXT_PROP_RESERVED			0
+#define USB_EXT_PROP_UNICODE			1
+#define USB_EXT_PROP_UNICODE_ENV		2
+#define USB_EXT_PROP_BINARY			3
+#define USB_EXT_PROP_LE32			4
+#define USB_EXT_PROP_BE32			5
+#define USB_EXT_PROP_UNICODE_LINK		6
+#define USB_EXT_PROP_UNICODE_MULTI		7
+
+static inline void usb_ext_prop_put_size(u8 *buf, int dw_size)
+{
+	put_unaligned_le32(dw_size, &buf[USB_EXT_PROP_DW_SIZE]);
+}
+
+static inline void usb_ext_prop_put_type(u8 *buf, int type)
+{
+	put_unaligned_le32(type, &buf[USB_EXT_PROP_DW_PROPERTY_DATA_TYPE]);
+}
+
+static inline int usb_ext_prop_put_name(u8 *buf, const char *name, int pnl)
+{
+	int result;
+
+	put_unaligned_le16(pnl, &buf[USB_EXT_PROP_W_PROPERTY_NAME_LENGTH]);
+	result = utf8s_to_utf16s(name, strlen(name), UTF16_LITTLE_ENDIAN,
+		(wchar_t *) &buf[USB_EXT_PROP_B_PROPERTY_NAME], pnl - 2);
+	if (result < 0)
+		return result;
+
+	put_unaligned_le16(0, &buf[USB_EXT_PROP_B_PROPERTY_NAME + pnl]);
+
+	return pnl;
+}
+
+static inline void usb_ext_prop_put_binary(u8 *buf, int pnl, const u8 *data,
+					   int data_len)
+{
+	put_unaligned_le32(data_len,
+			   &buf[USB_EXT_PROP_DW_PROPERTY_DATA_LENGTH + pnl]);
+	memcpy(&buf[USB_EXT_PROP_B_PROPERTY_DATA + pnl], data, data_len);
+}
+
+static inline int usb_ext_prop_put_unicode(u8 *buf, int pnl, const char *string,
+					   int data_len)
+{
+	int result;
+	put_unaligned_le32(data_len,
+			&buf[USB_EXT_PROP_DW_PROPERTY_DATA_LENGTH + pnl]);
+
+	result = utf8s_to_utf16s(string, data_len >> 1, UTF16_LITTLE_ENDIAN,
+			(wchar_t *) &buf[USB_EXT_PROP_B_PROPERTY_DATA + pnl],
+			data_len - 2);
+	if (result < 0)
+		return result;
+
+	put_unaligned_le16(0,
+			&buf[USB_EXT_PROP_B_PROPERTY_DATA + pnl + data_len]);
+
+	return data_len;
+}
+
+#endif /* __U_OS_DESC_H__ */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 7d29ee9363e8..549f5382b01a 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -56,6 +56,53 @@
 #define USB_MS_TO_HS_INTERVAL(x)	(ilog2((x * 1000 / 125)) + 1)
 struct usb_configuration;
 
+/**
+ * struct usb_os_desc_ext_prop - describes one "Extended Property"
+ * @entry: used to keep a list of extended properties
+ * @type: Extended Property type
+ * @name_len: Extended Property unicode name length, including terminating '\0'
+ * @name: Extended Property name
+ * @data_len: Length of Extended Property blob (for unicode store double len)
+ * @data: Extended Property blob
+ */
+struct usb_os_desc_ext_prop {
+	struct list_head	entry;
+	u8			type;
+	int			name_len;
+	char			*name;
+	int			data_len;
+	char			*data;
+};
+
+/**
+ * struct usb_os_desc - describes OS descriptors associated with one interface
+ * @ext_compat_id: 16 bytes of "Compatible ID" and "Subcompatible ID"
+ * @ext_prop: Extended Properties list
+ * @ext_prop_len: Total length of Extended Properties blobs
+ * @ext_prop_count: Number of Extended Properties
+ */
+struct usb_os_desc {
+	char			*ext_compat_id;
+	struct list_head	ext_prop;
+	int			ext_prop_len;
+	int			ext_prop_count;
+};
+
+/**
+ * struct usb_os_desc_table - describes OS descriptors associated with one
+ * interface of a usb_function
+ * @if_id: Interface id
+ * @os_desc: "Extended Compatibility ID" and "Extended Properties" of the
+ *	interface
+ *
+ * Each interface can have at most one "Extended Compatibility ID" and a
+ * number of "Extended Properties".
+ */
+struct usb_os_desc_table {
+	int			if_id;
+	struct usb_os_desc	*os_desc;
+};
+
 /**
  * struct usb_function - describes one function of a configuration
  * @name: For diagnostics, identifies the function.
@@ -73,6 +120,10 @@ struct usb_configuration;
  *	be available at super speed.
  * @config: assigned when @usb_add_function() is called; this is the
  *	configuration with which this function is associated.
+ * @os_desc_table: Table of (interface id, os descriptors) pairs. The function
+ *	can expose more than one interface. If an interface is a member of
+ *	an IAD, only the first interface of IAD has its entry in the table.
+ * @os_desc_n: Number of entries in os_desc_table
  * @bind: Before the gadget can register, all of its functions bind() to the
  *	available resources including string and interface identifiers used
  *	in interface or class descriptors; endpoints; I/O buffers; and so on.
@@ -129,6 +180,9 @@ struct usb_function {
 
 	struct usb_configuration	*config;
 
+	struct usb_os_desc_table	*os_desc_table;
+	unsigned			os_desc_n;
+
 	/* REVISIT:  bind() functions can be marked __init, which
 	 * makes trouble for section mismatch analysis.  See if
 	 * we can't restructure things to avoid mismatching.
@@ -342,10 +396,12 @@ static inline struct usb_composite_driver *to_cdriver(
  * struct usb_composite_device - represents one composite usb gadget
  * @gadget: read-only, abstracts the gadget's usb peripheral controller
  * @req: used for control responses; buffer is pre-allocated
+ * @os_desc_req: used for OS descriptors responses; buffer is pre-allocated
  * @config: the currently active configuration
  * @qw_sign: qwSignature part of the OS string
  * @b_vendor_code: bMS_VendorCode part of the OS string
  * @use_os_string: false by default, interested gadgets set it
+ * @os_desc_config: the configuration to be used with OS descriptors
  *
  * One of these devices is allocated and initialized before the
  * associated device driver's bind() is called.
@@ -375,12 +431,14 @@ static inline struct usb_composite_driver *to_cdriver(
 struct usb_composite_dev {
 	struct usb_gadget		*gadget;
 	struct usb_request		*req;
+	struct usb_request		*os_desc_req;
 
 	struct usb_configuration	*config;
 
 	/* OS String is a custom (yet popular) extension to the USB standard. */
 	u8				qw_sign[OS_STRING_QW_SIGN_LEN];
 	u8				b_vendor_code;
+	struct usb_configuration	*os_desc_config;
 	unsigned int			use_os_string:1;
 
 	/* private: */
-- 
cgit 


From da4243145fb197622425d4c2feff5d6422f2391e Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 8 May 2014 14:06:26 +0200
Subject: usb: gadget: configfs: OS Extended Compatibility descriptors support

Add handling of OS Extended Compatibility descriptors from configfs interface.
Hosts which expect the "OS Descriptors" ask only for configurations @ index 0,
but linux-based USB devices can provide more than one configuration.
This patch adds marking one of gadget's configurations the configuration
to be reported at index 0, regardless of the actual sequence of usb_add_config
invocations used for adding the configurations. The configuration is selected
by creating a symbolic link pointing to it from the "os_desc" directory
located at the top of a gadget's directory hierarchy.

One kind of "OS Descriptors" are "Extended Compatibility Descriptors",
which need to be specified per interface. This patch adds interface.<n>
directory in function's configfs directory to represent each interface
defined by the function. Each interface's directory contains two attributes:
"compatible_id" and "sub_compatible_id", which represent 8-byte
strings to be reported to the host as the "Compatible ID" and "Sub Compatible
ID".

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 Documentation/ABI/testing/configfs-usb-gadget |  13 ++
 drivers/usb/gadget/configfs.c                 | 190 ++++++++++++++++++++++++++
 drivers/usb/gadget/configfs.h                 |  12 ++
 include/linux/usb/composite.h                 |   6 +
 4 files changed, 221 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/configfs-usb-gadget b/Documentation/ABI/testing/configfs-usb-gadget
index 0e7b786f24ac..5c0b3e6eb981 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget
+++ b/Documentation/ABI/testing/configfs-usb-gadget
@@ -62,6 +62,19 @@ KernelVersion:	3.11
 Description:
 		This group contains functions available to this USB gadget.
 
+What:		/config/usb-gadget/gadget/functions/<func>.<inst>/interface.<n>
+Date:		May 2014
+KernelVersion:	3.16
+Description:
+		This group contains "Feature Descriptors" specific for one
+		gadget's USB interface or one interface group described
+		by an IAD.
+
+		The attributes:
+
+		compatible_id		- 8-byte string for "Compatible ID"
+		sub_compatible_id	- 8-byte string for "Sub Compatible ID"
+
 What:		/config/usb-gadget/gadget/strings
 Date:		Jun 2013
 KernelVersion:	3.11
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 8b9e038ac22b..fa6cb06cca09 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -6,6 +6,7 @@
 #include <linux/usb/composite.h>
 #include <linux/usb/gadget_configfs.h>
 #include "configfs.h"
+#include "u_f.h"
 
 int check_user_usb_string(const char *name,
 		struct usb_gadget_strings *stringtab_dev)
@@ -872,10 +873,63 @@ static void os_desc_attr_release(struct config_item *item)
 	kfree(os_desc);
 }
 
+static int os_desc_link(struct config_item *os_desc_ci,
+			struct config_item *usb_cfg_ci)
+{
+	struct gadget_info *gi = container_of(to_config_group(os_desc_ci),
+					struct gadget_info, os_desc_group);
+	struct usb_composite_dev *cdev = &gi->cdev;
+	struct config_usb_cfg *c_target =
+		container_of(to_config_group(usb_cfg_ci),
+			     struct config_usb_cfg, group);
+	struct usb_configuration *c;
+	int ret;
+
+	mutex_lock(&gi->lock);
+	list_for_each_entry(c, &cdev->configs, list) {
+		if (c == &c_target->c)
+			break;
+	}
+	if (c != &c_target->c) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (cdev->os_desc_config) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	cdev->os_desc_config = &c_target->c;
+	ret = 0;
+
+out:
+	mutex_unlock(&gi->lock);
+	return ret;
+}
+
+static int os_desc_unlink(struct config_item *os_desc_ci,
+			  struct config_item *usb_cfg_ci)
+{
+	struct gadget_info *gi = container_of(to_config_group(os_desc_ci),
+					struct gadget_info, os_desc_group);
+	struct usb_composite_dev *cdev = &gi->cdev;
+
+	mutex_lock(&gi->lock);
+	if (gi->udc_name)
+		unregister_gadget(gi);
+	cdev->os_desc_config = NULL;
+	WARN_ON(gi->udc_name);
+	mutex_unlock(&gi->lock);
+	return 0;
+}
+
 static struct configfs_item_operations os_desc_ops = {
 	.release                = os_desc_attr_release,
 	.show_attribute         = os_desc_attr_show,
 	.store_attribute        = os_desc_attr_store,
+	.allow_link		= os_desc_link,
+	.drop_link		= os_desc_unlink,
 };
 
 static struct config_item_type os_desc_type = {
@@ -884,6 +938,133 @@ static struct config_item_type os_desc_type = {
 	.ct_owner	= THIS_MODULE,
 };
 
+CONFIGFS_ATTR_STRUCT(usb_os_desc);
+CONFIGFS_ATTR_OPS(usb_os_desc);
+
+static struct configfs_item_operations interf_item_ops = {
+	.show_attribute		= usb_os_desc_attr_show,
+	.store_attribute	= usb_os_desc_attr_store,
+};
+
+static ssize_t rndis_grp_compatible_id_show(struct usb_os_desc *desc,
+					    char *page)
+{
+	memcpy(page, desc->ext_compat_id, 8);
+	return 8;
+}
+
+static ssize_t rndis_grp_compatible_id_store(struct usb_os_desc *desc,
+					     const char *page, size_t len)
+{
+	int l;
+
+	l = min_t(int, 8, len);
+	if (page[l - 1] == '\n')
+		--l;
+	if (desc->opts_mutex)
+		mutex_lock(desc->opts_mutex);
+	memcpy(desc->ext_compat_id, page, l);
+	desc->ext_compat_id[l] = '\0';
+
+	if (desc->opts_mutex)
+		mutex_unlock(desc->opts_mutex);
+
+	return len;
+}
+
+static struct usb_os_desc_attribute rndis_grp_attr_compatible_id =
+	__CONFIGFS_ATTR(compatible_id, S_IRUGO | S_IWUSR,
+			rndis_grp_compatible_id_show,
+			rndis_grp_compatible_id_store);
+
+static ssize_t rndis_grp_sub_compatible_id_show(struct usb_os_desc *desc,
+						char *page)
+{
+	memcpy(page, desc->ext_compat_id + 8, 8);
+	return 8;
+}
+
+static ssize_t rndis_grp_sub_compatible_id_store(struct usb_os_desc *desc,
+						 const char *page, size_t len)
+{
+	int l;
+
+	l = min_t(int, 8, len);
+	if (page[l - 1] == '\n')
+		--l;
+	if (desc->opts_mutex)
+		mutex_lock(desc->opts_mutex);
+	memcpy(desc->ext_compat_id + 8, page, l);
+	desc->ext_compat_id[l + 8] = '\0';
+
+	if (desc->opts_mutex)
+		mutex_unlock(desc->opts_mutex);
+
+	return len;
+}
+
+static struct usb_os_desc_attribute rndis_grp_attr_sub_compatible_id =
+	__CONFIGFS_ATTR(sub_compatible_id, S_IRUGO | S_IWUSR,
+			rndis_grp_sub_compatible_id_show,
+			rndis_grp_sub_compatible_id_store);
+
+static struct configfs_attribute *interf_grp_attrs[] = {
+	&rndis_grp_attr_compatible_id.attr,
+	&rndis_grp_attr_sub_compatible_id.attr,
+	NULL
+};
+
+int usb_os_desc_prepare_interf_dir(struct config_group *parent,
+				   int n_interf,
+				   struct usb_os_desc **desc,
+				   struct module *owner)
+{
+	struct config_group **f_default_groups, *os_desc_group,
+				**interface_groups;
+	struct config_item_type *os_desc_type, *interface_type;
+
+	vla_group(data_chunk);
+	vla_item(data_chunk, struct config_group *, f_default_groups, 2);
+	vla_item(data_chunk, struct config_group, os_desc_group, 1);
+	vla_item(data_chunk, struct config_group *, interface_groups,
+		 n_interf + 1);
+	vla_item(data_chunk, struct config_item_type, os_desc_type, 1);
+	vla_item(data_chunk, struct config_item_type, interface_type, 1);
+
+	char *vlabuf = kzalloc(vla_group_size(data_chunk), GFP_KERNEL);
+	if (!vlabuf)
+		return -ENOMEM;
+
+	f_default_groups = vla_ptr(vlabuf, data_chunk, f_default_groups);
+	os_desc_group = vla_ptr(vlabuf, data_chunk, os_desc_group);
+	os_desc_type = vla_ptr(vlabuf, data_chunk, os_desc_type);
+	interface_groups = vla_ptr(vlabuf, data_chunk, interface_groups);
+	interface_type = vla_ptr(vlabuf, data_chunk, interface_type);
+
+	parent->default_groups = f_default_groups;
+	os_desc_type->ct_owner = owner;
+	config_group_init_type_name(os_desc_group, "os_desc", os_desc_type);
+	f_default_groups[0] = os_desc_group;
+
+	os_desc_group->default_groups = interface_groups;
+	interface_type->ct_item_ops = &interf_item_ops;
+	interface_type->ct_attrs = interf_grp_attrs;
+	interface_type->ct_owner = owner;
+
+	while (n_interf--) {
+		struct usb_os_desc *d;
+
+		d = desc[n_interf];
+		config_group_init_type_name(&d->group, "", interface_type);
+		config_item_set_name(&d->group.cg_item, "interface.%d",
+				     n_interf);
+		interface_groups[n_interf] = &d->group;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(usb_os_desc_prepare_interf_dir);
+
 static int configfs_do_nothing(struct usb_composite_dev *cdev)
 {
 	WARN_ON(1);
@@ -893,6 +1074,9 @@ static int configfs_do_nothing(struct usb_composite_dev *cdev)
 int composite_dev_prepare(struct usb_composite_driver *composite,
 		struct usb_composite_dev *dev);
 
+int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
+				  struct usb_ep *ep0);
+
 static void purge_configs_funcs(struct gadget_info *gi)
 {
 	struct usb_configuration	*c;
@@ -1028,6 +1212,12 @@ static int configfs_composite_bind(struct usb_gadget *gadget,
 		}
 		usb_ep_autoconfig_reset(cdev->gadget);
 	}
+	if (cdev->use_os_string) {
+		ret = composite_os_desc_req_prepare(cdev, gadget->ep0);
+		if (ret)
+			goto err_purge_funcs;
+	}
+
 	usb_ep_autoconfig_reset(cdev->gadget);
 	return 0;
 
diff --git a/drivers/usb/gadget/configfs.h b/drivers/usb/gadget/configfs.h
index a7b564a913d1..a14ac792c698 100644
--- a/drivers/usb/gadget/configfs.h
+++ b/drivers/usb/gadget/configfs.h
@@ -1,6 +1,18 @@
 #ifndef USB__GADGET__CONFIGFS__H
 #define USB__GADGET__CONFIGFS__H
 
+#include <linux/configfs.h>
+
 void unregister_gadget_item(struct config_item *item);
 
+int usb_os_desc_prepare_interf_dir(struct config_group *parent,
+				   int n_interf,
+				   struct usb_os_desc **desc,
+				   struct module *owner);
+
+static inline struct usb_os_desc *to_usb_os_desc(struct config_item *item)
+{
+	return container_of(to_config_group(item), struct usb_os_desc, group);
+}
+
 #endif /*  USB__GADGET__CONFIGFS__H */
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 549f5382b01a..9c3903d76781 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -80,12 +80,16 @@ struct usb_os_desc_ext_prop {
  * @ext_prop: Extended Properties list
  * @ext_prop_len: Total length of Extended Properties blobs
  * @ext_prop_count: Number of Extended Properties
+ * @opts_mutex: Optional mutex protecting config data of a usb_function_instance
+ * @group: Represents OS descriptors associated with an interface in configfs
  */
 struct usb_os_desc {
 	char			*ext_compat_id;
 	struct list_head	ext_prop;
 	int			ext_prop_len;
 	int			ext_prop_count;
+	struct mutex		*opts_mutex;
+	struct config_group	group;
 };
 
 /**
@@ -381,6 +385,8 @@ extern void usb_composite_unregister(struct usb_composite_driver *driver);
 extern void usb_composite_setup_continue(struct usb_composite_dev *cdev);
 extern int composite_dev_prepare(struct usb_composite_driver *composite,
 		struct usb_composite_dev *cdev);
+extern int composite_os_desc_req_prepare(struct usb_composite_dev *cdev,
+					 struct usb_ep *ep0);
 void composite_dev_cleanup(struct usb_composite_dev *cdev);
 
 static inline struct usb_composite_driver *to_cdriver(
-- 
cgit 


From 7419485f197c436d41535df78ddea1085042d271 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Thu, 8 May 2014 14:06:28 +0200
Subject: usb: gadget: configfs: OS Extended Properties descriptors support

Add handling of OS Extended Properties descriptors from configfs interface.
One kind of "OS Descriptors" are "Extended Properties" descriptors, which
need to be specified per interface or per group of interfaces described
by an IAD. This patch adds support for creating subdirectories
in interface.<n> directory located in the function's directory.
Names of subdirectories created become names of properties.
Each property contains two attributes: "type" and "data".
The type can be a numeric value 1..7 while data is a blob interpreted
depending on the type specified.
The types are:
1 - unicode string
2 - unicode string with environment variables
3 - binary
4 - little-endian 32-bit
5 - big-endian 32-bit
6 - unicode string with a symbolic link
7 - multiple unicode strings

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 Documentation/ABI/testing/configfs-usb-gadget |  21 +++
 drivers/usb/gadget/configfs.c                 | 201 ++++++++++++++++++++++++++
 include/linux/usb/composite.h                 |   4 +
 3 files changed, 226 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/configfs-usb-gadget b/Documentation/ABI/testing/configfs-usb-gadget
index 5c0b3e6eb981..95a36589a66b 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget
+++ b/Documentation/ABI/testing/configfs-usb-gadget
@@ -75,6 +75,27 @@ Description:
 		compatible_id		- 8-byte string for "Compatible ID"
 		sub_compatible_id	- 8-byte string for "Sub Compatible ID"
 
+What:		/config/usb-gadget/gadget/functions/<func>.<inst>/interface.<n>/<property>
+Date:		May 2014
+KernelVersion:	3.16
+Description:
+		This group contains "Extended Property Descriptors" specific for one
+		gadget's USB interface or one interface group described
+		by an IAD.
+
+		The attributes:
+
+		type		- value 1..7 for interpreting the data
+				1: unicode string
+				2: unicode string with environment variable
+				3: binary
+				4: little-endian 32-bit
+				5: big-endian 32-bit
+				6: unicode string with a symbolic link
+				7: multiple unicode strings
+		data		- blob of data to be interpreted depending on
+				type
+
 What:		/config/usb-gadget/gadget/strings
 Date:		Jun 2013
 KernelVersion:	3.11
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index fa6cb06cca09..2ddcd635ca2a 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -7,6 +7,7 @@
 #include <linux/usb/gadget_configfs.h>
 #include "configfs.h"
 #include "u_f.h"
+#include "u_os_desc.h"
 
 int check_user_usb_string(const char *name,
 		struct usb_gadget_strings *stringtab_dev)
@@ -941,6 +942,204 @@ static struct config_item_type os_desc_type = {
 CONFIGFS_ATTR_STRUCT(usb_os_desc);
 CONFIGFS_ATTR_OPS(usb_os_desc);
 
+
+static inline struct usb_os_desc_ext_prop
+*to_usb_os_desc_ext_prop(struct config_item *item)
+{
+	return container_of(item, struct usb_os_desc_ext_prop, item);
+}
+
+CONFIGFS_ATTR_STRUCT(usb_os_desc_ext_prop);
+CONFIGFS_ATTR_OPS(usb_os_desc_ext_prop);
+
+static ssize_t ext_prop_type_show(struct usb_os_desc_ext_prop *ext_prop,
+				  char *page)
+{
+	return sprintf(page, "%d", ext_prop->type);
+}
+
+static ssize_t ext_prop_type_store(struct usb_os_desc_ext_prop *ext_prop,
+				   const char *page, size_t len)
+{
+	struct usb_os_desc *desc = to_usb_os_desc(ext_prop->item.ci_parent);
+	u8 type;
+	int ret;
+
+	if (desc->opts_mutex)
+		mutex_lock(desc->opts_mutex);
+	ret = kstrtou8(page, 0, &type);
+	if (ret)
+		goto end;
+	if (type < USB_EXT_PROP_UNICODE || type > USB_EXT_PROP_UNICODE_MULTI) {
+		ret = -EINVAL;
+		goto end;
+	}
+
+	if ((ext_prop->type == USB_EXT_PROP_BINARY ||
+	    ext_prop->type == USB_EXT_PROP_LE32 ||
+	    ext_prop->type == USB_EXT_PROP_BE32) &&
+	    (type == USB_EXT_PROP_UNICODE ||
+	    type == USB_EXT_PROP_UNICODE_ENV ||
+	    type == USB_EXT_PROP_UNICODE_LINK))
+		ext_prop->data_len <<= 1;
+	else if ((ext_prop->type == USB_EXT_PROP_UNICODE ||
+		   ext_prop->type == USB_EXT_PROP_UNICODE_ENV ||
+		   ext_prop->type == USB_EXT_PROP_UNICODE_LINK) &&
+		   (type == USB_EXT_PROP_BINARY ||
+		   type == USB_EXT_PROP_LE32 ||
+		   type == USB_EXT_PROP_BE32))
+		ext_prop->data_len >>= 1;
+	ext_prop->type = type;
+	ret = len;
+
+end:
+	if (desc->opts_mutex)
+		mutex_unlock(desc->opts_mutex);
+	return ret;
+}
+
+static ssize_t ext_prop_data_show(struct usb_os_desc_ext_prop *ext_prop,
+				  char *page)
+{
+	int len = ext_prop->data_len;
+
+	if (ext_prop->type == USB_EXT_PROP_UNICODE ||
+	    ext_prop->type == USB_EXT_PROP_UNICODE_ENV ||
+	    ext_prop->type == USB_EXT_PROP_UNICODE_LINK)
+		len >>= 1;
+	memcpy(page, ext_prop->data, len);
+
+	return len;
+}
+
+static ssize_t ext_prop_data_store(struct usb_os_desc_ext_prop *ext_prop,
+				   const char *page, size_t len)
+{
+	struct usb_os_desc *desc = to_usb_os_desc(ext_prop->item.ci_parent);
+	char *new_data;
+	size_t ret_len = len;
+
+	if (page[len - 1] == '\n' || page[len - 1] == '\0')
+		--len;
+	new_data = kzalloc(len, GFP_KERNEL);
+	if (!new_data)
+		return -ENOMEM;
+
+	memcpy(new_data, page, len);
+
+	if (desc->opts_mutex)
+		mutex_lock(desc->opts_mutex);
+	kfree(ext_prop->data);
+	ext_prop->data = new_data;
+	desc->ext_prop_len -= ext_prop->data_len;
+	ext_prop->data_len = len;
+	desc->ext_prop_len += ext_prop->data_len;
+	if (ext_prop->type == USB_EXT_PROP_UNICODE ||
+	    ext_prop->type == USB_EXT_PROP_UNICODE_ENV ||
+	    ext_prop->type == USB_EXT_PROP_UNICODE_LINK) {
+		desc->ext_prop_len -= ext_prop->data_len;
+		ext_prop->data_len <<= 1;
+		ext_prop->data_len += 2;
+		desc->ext_prop_len += ext_prop->data_len;
+	}
+	if (desc->opts_mutex)
+		mutex_unlock(desc->opts_mutex);
+	return ret_len;
+}
+
+static struct usb_os_desc_ext_prop_attribute ext_prop_type =
+	__CONFIGFS_ATTR(type, S_IRUGO | S_IWUSR,
+			ext_prop_type_show, ext_prop_type_store);
+
+static struct usb_os_desc_ext_prop_attribute ext_prop_data =
+	__CONFIGFS_ATTR(data, S_IRUGO | S_IWUSR,
+			ext_prop_data_show, ext_prop_data_store);
+
+static struct configfs_attribute *ext_prop_attrs[] = {
+	&ext_prop_type.attr,
+	&ext_prop_data.attr,
+	NULL,
+};
+
+static void usb_os_desc_ext_prop_release(struct config_item *item)
+{
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
+
+	kfree(ext_prop); /* frees a whole chunk */
+}
+
+static struct configfs_item_operations ext_prop_ops = {
+	.release		= usb_os_desc_ext_prop_release,
+	.show_attribute		= usb_os_desc_ext_prop_attr_show,
+	.store_attribute	= usb_os_desc_ext_prop_attr_store,
+};
+
+static struct config_item *ext_prop_make(
+		struct config_group *group,
+		const char *name)
+{
+	struct usb_os_desc_ext_prop *ext_prop;
+	struct config_item_type *ext_prop_type;
+	struct usb_os_desc *desc;
+	char *vlabuf;
+
+	vla_group(data_chunk);
+	vla_item(data_chunk, struct usb_os_desc_ext_prop, ext_prop, 1);
+	vla_item(data_chunk, struct config_item_type, ext_prop_type, 1);
+
+	vlabuf = kzalloc(vla_group_size(data_chunk), GFP_KERNEL);
+	if (!vlabuf)
+		return ERR_PTR(-ENOMEM);
+
+	ext_prop = vla_ptr(vlabuf, data_chunk, ext_prop);
+	ext_prop_type = vla_ptr(vlabuf, data_chunk, ext_prop_type);
+
+	desc = container_of(group, struct usb_os_desc, group);
+	ext_prop_type->ct_item_ops = &ext_prop_ops;
+	ext_prop_type->ct_attrs = ext_prop_attrs;
+	ext_prop_type->ct_owner = desc->owner;
+
+	config_item_init_type_name(&ext_prop->item, name, ext_prop_type);
+
+	ext_prop->name = kstrdup(name, GFP_KERNEL);
+	if (!ext_prop->name) {
+		kfree(vlabuf);
+		return ERR_PTR(-ENOMEM);
+	}
+	desc->ext_prop_len += 14;
+	ext_prop->name_len = 2 * strlen(ext_prop->name) + 2;
+	if (desc->opts_mutex)
+		mutex_lock(desc->opts_mutex);
+	desc->ext_prop_len += ext_prop->name_len;
+	list_add_tail(&ext_prop->entry, &desc->ext_prop);
+	++desc->ext_prop_count;
+	if (desc->opts_mutex)
+		mutex_unlock(desc->opts_mutex);
+
+	return &ext_prop->item;
+}
+
+static void ext_prop_drop(struct config_group *group, struct config_item *item)
+{
+	struct usb_os_desc_ext_prop *ext_prop = to_usb_os_desc_ext_prop(item);
+	struct usb_os_desc *desc = to_usb_os_desc(&group->cg_item);
+
+	if (desc->opts_mutex)
+		mutex_lock(desc->opts_mutex);
+	list_del(&ext_prop->entry);
+	--desc->ext_prop_count;
+	kfree(ext_prop->name);
+	desc->ext_prop_len -= (ext_prop->name_len + ext_prop->data_len + 14);
+	if (desc->opts_mutex)
+		mutex_unlock(desc->opts_mutex);
+	config_item_put(item);
+}
+
+static struct configfs_group_operations interf_grp_ops = {
+	.make_item	= &ext_prop_make,
+	.drop_item	= &ext_prop_drop,
+};
+
 static struct configfs_item_operations interf_item_ops = {
 	.show_attribute		= usb_os_desc_attr_show,
 	.store_attribute	= usb_os_desc_attr_store,
@@ -1048,6 +1247,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 
 	os_desc_group->default_groups = interface_groups;
 	interface_type->ct_item_ops = &interf_item_ops;
+	interface_type->ct_group_ops = &interf_grp_ops;
 	interface_type->ct_attrs = interf_grp_attrs;
 	interface_type->ct_owner = owner;
 
@@ -1055,6 +1255,7 @@ int usb_os_desc_prepare_interf_dir(struct config_group *parent,
 		struct usb_os_desc *d;
 
 		d = desc[n_interf];
+		d->owner = owner;
 		config_group_init_type_name(&d->group, "", interface_type);
 		config_item_set_name(&d->group.cg_item, "interface.%d",
 				     n_interf);
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 9c3903d76781..7373203140e7 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -64,6 +64,7 @@ struct usb_configuration;
  * @name: Extended Property name
  * @data_len: Length of Extended Property blob (for unicode store double len)
  * @data: Extended Property blob
+ * @item: Represents this Extended Property in configfs
  */
 struct usb_os_desc_ext_prop {
 	struct list_head	entry;
@@ -72,6 +73,7 @@ struct usb_os_desc_ext_prop {
 	char			*name;
 	int			data_len;
 	char			*data;
+	struct config_item	item;
 };
 
 /**
@@ -82,6 +84,7 @@ struct usb_os_desc_ext_prop {
  * @ext_prop_count: Number of Extended Properties
  * @opts_mutex: Optional mutex protecting config data of a usb_function_instance
  * @group: Represents OS descriptors associated with an interface in configfs
+ * @owner: Module associated with this OS descriptor
  */
 struct usb_os_desc {
 	char			*ext_compat_id;
@@ -90,6 +93,7 @@ struct usb_os_desc {
 	int			ext_prop_count;
 	struct mutex		*opts_mutex;
 	struct config_group	group;
+	struct module		*owner;
 };
 
 /**
-- 
cgit 


From 7413af1fb70e7efa6dbc7f27663e7a5126b3aa33 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 21:34:14 -0400
Subject: ftrace: Make get_ftrace_addr() and get_ftrace_addr_old() global

Move and rename get_ftrace_addr() and get_ftrace_addr_old() to
ftrace_get_addr_new() and ftrace_get_addr_curr() respectively.

This moves these two helper functions in the generic code out from
the arch specific code, and renames them to have a better generic
name. This will allow other archs to use them as well as makes it
a bit easier to work on getting separate trampolines for different
functions.

ftrace_get_addr_new() returns the trampoline address that the mcount
call address will be converted to.

ftrace_get_addr_curr() returns the trampoline address of what the
mcount call address currently jumps to.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c | 36 +++++-------------------------------
 include/linux/ftrace.h   |  2 ++
 kernel/trace/ftrace.c    | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4b3c195d4133..5ef43ce8492f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -349,38 +349,12 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
 	return add_break(rec->ip, old);
 }
 
-/*
- * If the record has the FTRACE_FL_REGS set, that means that it
- * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
- * is not not set, then it wants to convert to the normal callback.
- */
-static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_REGS)
-		return (unsigned long)FTRACE_REGS_ADDR;
-	else
-		return (unsigned long)FTRACE_ADDR;
-}
-
-/*
- * The FTRACE_FL_REGS_EN is set when the record already points to
- * a function that saves all the regs. Basically the '_EN' version
- * represents the current state of the function.
- */
-static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
-{
-	if (rec->flags & FTRACE_FL_REGS_EN)
-		return (unsigned long)FTRACE_REGS_ADDR;
-	else
-		return (unsigned long)FTRACE_ADDR;
-}
-
 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ftrace_addr;
 	int ret;
 
-	ftrace_addr = get_ftrace_old_addr(rec);
+	ftrace_addr = ftrace_get_addr_curr(rec);
 
 	ret = ftrace_test_record(rec, enable);
 
@@ -438,14 +412,14 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
 		 * If not, don't touch the breakpoint, we make just create
 		 * a disaster.
 		 */
-		ftrace_addr = get_ftrace_addr(rec);
+		ftrace_addr = ftrace_get_addr_new(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
 			goto update;
 
 		/* Check both ftrace_addr and ftrace_old_addr */
-		ftrace_addr = get_ftrace_old_addr(rec);
+		ftrace_addr = ftrace_get_addr_curr(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
@@ -489,7 +463,7 @@ static int add_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr  = get_ftrace_addr(rec);
+	ftrace_addr  = ftrace_get_addr_new(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
@@ -536,7 +510,7 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_update_record(rec, enable);
 
-	ftrace_addr = get_ftrace_addr(rec);
+	ftrace_addr = ftrace_get_addr_new(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f0ff2c2453e7..2f8cbffecd3d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -400,6 +400,8 @@ int ftrace_update_record(struct dyn_ftrace *rec, int enable);
 int ftrace_test_record(struct dyn_ftrace *rec, int enable);
 void ftrace_run_stop_machine(int command);
 unsigned long ftrace_location(unsigned long ip);
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec);
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec);
 
 extern ftrace_func_t ftrace_trace_function;
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 98fa931b6864..e825fded435d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1755,6 +1755,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
 	return ftrace_check_record(rec, enable, 0);
 }
 
+/**
+ * ftrace_get_addr_new - Get the call address to set to
+ * @rec:  The ftrace record descriptor
+ *
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ *
+ * Returns the address of the trampoline to set to
+ */
+unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
+/**
+ * ftrace_get_addr_curr - Get the call address that is already there
+ * @rec:  The ftrace record descriptor
+ *
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ *
+ * Returns the address of the trampoline that is currently being called
+ */
+unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS_EN)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
-- 
cgit 


From f1b2f2bd5821c6ab7feed2e133343dd54b212ed9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 7 May 2014 16:09:49 -0400
Subject: ftrace: Remove FTRACE_UPDATE_MODIFY_CALL_REGS flag

As the decision to what needs to be done (converting a call to the
ftrace_caller to ftrace_caller_regs or to convert from ftrace_caller_regs
to ftrace_caller) can easily be determined from the rec->flags of
FTRACE_FL_REGS and FTRACE_FL_REGS_EN, there's no need to have the
ftrace_check_record() return either a UPDATE_MODIFY_CALL_REGS or a
UPDATE_MODIFY_CALL. Just he latter is enough. This added flag causes
more complexity than is required. Remove it.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c |  3 ---
 include/linux/ftrace.h   |  2 --
 kernel/trace/ftrace.c    | 13 ++++---------
 3 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 5ef43ce8492f..89de3eaf8772 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -366,7 +366,6 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 		/* converting nop to call */
 		return add_brk_on_nop(rec);
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_NOP:
 		/* converting a call to a nop */
@@ -469,7 +468,6 @@ static int add_update(struct dyn_ftrace *rec, int enable)
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
@@ -516,7 +514,6 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 2f8cbffecd3d..3e6dfb31f8e6 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -362,14 +362,12 @@ enum {
  *  IGNORE           - The function is already what we want it to be
  *  MAKE_CALL        - Start tracing the function
  *  MODIFY_CALL      - Stop saving regs for the function
- *  MODIFY_CALL_REGS - Start saving regs for the function
  *  MAKE_NOP         - Stop tracing the function
  */
 enum {
 	FTRACE_UPDATE_IGNORE,
 	FTRACE_UPDATE_MAKE_CALL,
 	FTRACE_UPDATE_MODIFY_CALL,
-	FTRACE_UPDATE_MODIFY_CALL_REGS,
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 52c2b53b7953..cc07b7fc4372 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1701,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 		/*
 		 * If this record is being updated from a nop, then
 		 *   return UPDATE_MAKE_CALL.
-		 * Otherwise, if the EN flag is set, then return
-		 *   UPDATE_MODIFY_CALL_REGS to tell the caller to convert
-		 *   from the non-save regs, to a save regs function.
 		 * Otherwise,
 		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
-		 *   from the save regs, to a non-save regs function.
+		 *   from the save regs, to a non-save regs function or
+		 *   vice versa.
 		 */
 		if (flag & FTRACE_FL_ENABLED)
 			return FTRACE_UPDATE_MAKE_CALL;
-		else if (rec->flags & FTRACE_FL_REGS_EN)
-			return FTRACE_UPDATE_MODIFY_CALL_REGS;
-		else
-			return FTRACE_UPDATE_MODIFY_CALL;
+
+		return FTRACE_UPDATE_MODIFY_CALL;
 	}
 
 	if (update) {
@@ -1815,7 +1811,6 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 	case FTRACE_UPDATE_MAKE_NOP:
 		return ftrace_make_nop(NULL, rec, ftrace_addr);
 
-	case FTRACE_UPDATE_MODIFY_CALL_REGS:
 	case FTRACE_UPDATE_MODIFY_CALL:
 		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
 	}
-- 
cgit 


From afea227fd4acf4f097a9e77bbc2f07d4856ebd01 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 12 Mar 2014 07:10:41 -0700
Subject: rcutorture: Export RCU grace-period kthread wait state to rcutorture

This commit allows rcutorture to print additional state for the
RCU grace-period kthreads in cases where RCU seems reluctant to
start a new grace period.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcutiny.h |  4 ++++
 include/linux/rcutree.h |  1 +
 kernel/rcu/rcutorture.c |  1 +
 kernel/rcu/tree.c       | 17 +++++++++++++++++
 kernel/rcu/tree.h       |  8 +++++++-
 5 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 425c659d54e5..d40a6a451330 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -119,6 +119,10 @@ static inline void rcu_sched_force_quiescent_state(void)
 {
 }
 
+static inline void show_rcu_gp_kthreads(void)
+{
+}
+
 static inline void rcu_cpu_stall_reset(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index a59ca05fd4e3..3e2f5d432743 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -84,6 +84,7 @@ extern unsigned long rcutorture_vernum;
 long rcu_batches_completed(void);
 long rcu_batches_completed_bh(void);
 long rcu_batches_completed_sched(void);
+void show_rcu_gp_kthreads(void);
 
 void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 9decce0f110c..37ae5e1d4a1d 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1034,6 +1034,7 @@ rcu_torture_printk(char *page)
 				"??? Writer stall state %d g%lu c%lu f%#x\n",
 				rcu_torture_writer_state,
 				gpnum, completed, flags);
+		show_rcu_gp_kthreads();
 		rcutorture_trace_dump();
 	}
 	rtcv_snap = rcu_torture_current_version;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3d15b5a82ae8..93e64381aa2a 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -279,6 +279,21 @@ void rcu_bh_force_quiescent_state(void)
 }
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
+/*
+ * Show the state of the grace-period kthreads.
+ */
+void show_rcu_gp_kthreads(void)
+{
+	struct rcu_state *rsp;
+
+	for_each_rcu_flavor(rsp) {
+		pr_info("%s: wait state: %d ->state: %#lx\n",
+			rsp->name, rsp->gp_state, rsp->gp_kthread->state);
+		/* sched_show_task(rsp->gp_kthread); */
+	}
+}
+EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
+
 /*
  * Record the number of times rcutorture tests have been initiated and
  * terminated.  This information allows the debugfs tracing stats to be
@@ -1626,6 +1641,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 			trace_rcu_grace_period(rsp->name,
 					       ACCESS_ONCE(rsp->gpnum),
 					       TPS("reqwait"));
+			rsp->gp_state = RCU_GP_WAIT_GPS;
 			wait_event_interruptible(rsp->gp_wq,
 						 ACCESS_ONCE(rsp->gp_flags) &
 						 RCU_GP_FLAG_INIT);
@@ -1653,6 +1669,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 			trace_rcu_grace_period(rsp->name,
 					       ACCESS_ONCE(rsp->gpnum),
 					       TPS("fqswait"));
+			rsp->gp_state = RCU_GP_WAIT_FQS;
 			ret = wait_event_interruptible_timeout(rsp->gp_wq,
 					((gf = ACCESS_ONCE(rsp->gp_flags)) &
 					 RCU_GP_FLAG_FQS) ||
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 75dc3c39a02a..c2fd1e722879 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -406,7 +406,8 @@ struct rcu_state {
 	unsigned long completed;		/* # of last completed gp. */
 	struct task_struct *gp_kthread;		/* Task for grace periods. */
 	wait_queue_head_t gp_wq;		/* Where GP task waits. */
-	int gp_flags;				/* Commands for GP task. */
+	short gp_flags;				/* Commands for GP task. */
+	short gp_state;				/* GP kthread sleep state. */
 
 	/* End of fields guarded by root rcu_node's lock. */
 
@@ -469,6 +470,11 @@ struct rcu_state {
 #define RCU_GP_FLAG_INIT 0x1	/* Need grace-period initialization. */
 #define RCU_GP_FLAG_FQS  0x2	/* Need grace-period quiescent-state forcing. */
 
+/* Values for rcu_state structure's gp_flags field. */
+#define RCU_GP_WAIT_INIT 0	/* Initial state. */
+#define RCU_GP_WAIT_GPS  1	/* Wait for grace-period start. */
+#define RCU_GP_WAIT_FQS  2	/* Wait for force-quiescent-state time. */
+
 extern struct list_head rcu_struct_flavors;
 
 /* Sequence through rcu_state structures for each RCU flavor. */
-- 
cgit 


From 0e980234c97f98be6619b9281d83777f725b94ff Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 16 Apr 2014 10:07:09 -0700
Subject: percpu: Fix raw_cpu_inc_return()

The definition for raw_cpu_add_return() uses the operation prefix
"raw_add_return_", but the definitions in the various percpu.h files
expect "raw_cpu_add_return_".  This commit therefore appropriately
adjusts the definition of raw_cpu_add_return().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Christoph Lameter <cl@linux.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/percpu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e7a0b95ed527..495c6543a8f2 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -639,7 +639,7 @@ do {									\
 #  define raw_cpu_add_return_8(pcp, val)	raw_cpu_generic_add_return(pcp, val)
 # endif
 # define raw_cpu_add_return(pcp, val)	\
-	__pcpu_size_call_return2(raw_add_return_, pcp, val)
+	__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)
 #endif
 
 #define raw_cpu_sub_return(pcp, val)	raw_cpu_add_return(pcp, -(typeof(pcp))(val))
-- 
cgit 


From ac1bea85781e9004da9b3e8a4b097c18492d857c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 16 Mar 2014 21:36:25 -0700
Subject: sched,rcu: Make cond_resched() report RCU quiescent states

Given a CPU running a loop containing cond_resched(), with no
other tasks runnable on that CPU, RCU will eventually report RCU
CPU stall warnings due to lack of quiescent states.  Fortunately,
every call to cond_resched() is a perfectly good quiescent state.
Unfortunately, invoking rcu_note_context_switch() is a bit heavyweight
for cond_resched(), especially given the need to disable preemption,
and, for RCU-preempt, interrupts as well.

This commit therefore maintains a per-CPU counter that causes
cond_resched(), cond_resched_lock(), and cond_resched_softirq() to call
rcu_note_context_switch(), but only about once per 256 invocations.
This ratio was chosen in keeping with the relative time constants of
RCU grace periods.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 36 ++++++++++++++++++++++++++++++++++++
 kernel/rcu/update.c      | 18 ++++++++++++++++++
 kernel/sched/core.c      |  7 ++++++-
 3 files changed, 60 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 82973738125b..97cc8d6679b4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -44,6 +44,7 @@
 #include <linux/debugobjects.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
+#include <linux/percpu.h>
 #include <asm/barrier.h>
 
 extern int rcu_expedited; /* for sysctl */
@@ -286,6 +287,41 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
 
+/*
+ * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
+ */
+
+#define RCU_COND_RESCHED_LIM 256	/* ms vs. 100s of ms. */
+DECLARE_PER_CPU(int, rcu_cond_resched_count);
+void rcu_resched(void);
+
+/*
+ * Is it time to report RCU quiescent states?
+ *
+ * Note unsynchronized access to rcu_cond_resched_count.  Yes, we might
+ * increment some random CPU's count, and possibly also load the result from
+ * yet another CPU's count.  We might even clobber some other CPU's attempt
+ * to zero its counter.  This is all OK because the goal is not precision,
+ * but rather reasonable amortization of rcu_note_context_switch() overhead
+ * and extremely high probability of avoiding RCU CPU stall warnings.
+ * Note that this function has to be preempted in just the wrong place,
+ * many thousands of times in a row, for anything bad to happen.
+ */
+static inline bool rcu_should_resched(void)
+{
+	return raw_cpu_inc_return(rcu_cond_resched_count) >=
+	       RCU_COND_RESCHED_LIM;
+}
+
+/*
+ * Report quiscent states to RCU if it is time to do so.
+ */
+static inline void rcu_cond_resched(void)
+{
+	if (unlikely(rcu_should_resched()))
+		rcu_resched();
+}
+
 /*
  * Infrastructure to implement the synchronize_() primitives in
  * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4c0a9b0af469..ed7a0d72562c 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -338,3 +338,21 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
+/*
+ * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings.
+ */
+
+DEFINE_PER_CPU(int, rcu_cond_resched_count);
+
+/*
+ * Report a set of RCU quiescent states, for use by cond_resched()
+ * and friends.  Out of line due to being called infrequently.
+ */
+void rcu_resched(void)
+{
+	preempt_disable();
+	__this_cpu_write(rcu_cond_resched_count, 0);
+	rcu_note_context_switch(smp_processor_id());
+	preempt_enable();
+}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..9f530c9ed911 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4051,6 +4051,7 @@ static void __cond_resched(void)
 
 int __sched _cond_resched(void)
 {
+	rcu_cond_resched();
 	if (should_resched()) {
 		__cond_resched();
 		return 1;
@@ -4069,15 +4070,18 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int __cond_resched_lock(spinlock_t *lock)
 {
+	bool need_rcu_resched = rcu_should_resched();
 	int resched = should_resched();
 	int ret = 0;
 
 	lockdep_assert_held(lock);
 
-	if (spin_needbreak(lock) || resched) {
+	if (spin_needbreak(lock) || resched || need_rcu_resched) {
 		spin_unlock(lock);
 		if (resched)
 			__cond_resched();
+		else if (unlikely(need_rcu_resched))
+			rcu_resched();
 		else
 			cpu_relax();
 		ret = 1;
@@ -4091,6 +4095,7 @@ int __sched __cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
+	rcu_cond_resched();  /* BH disabled OK, just recording QSes. */
 	if (should_resched()) {
 		local_bh_enable();
 		__cond_resched();
-- 
cgit 


From 5228084eed8d54c426c7abde3be66daf8e1b0e57 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 7 Apr 2014 09:14:11 -0700
Subject: torture: Check for multiple concurrent torture tests

The torture tests are designed to run in isolation, but do not enforce
this isolation.  This commit therefore checks for concurrent torture
tests, and refuses to start new tests while old tests are running.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/torture.h      |  2 +-
 kernel/locking/locktorture.c |  3 ++-
 kernel/rcu/rcutorture.c      |  3 ++-
 kernel/torture.c             | 13 +++++++++++--
 4 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index b2e2b468e511..f998574247fd 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -81,7 +81,7 @@ void stutter_wait(const char *title);
 int torture_stutter_init(int s);
 
 /* Initialization and cleanup. */
-void torture_init_begin(char *ttype, bool v, int *runnable);
+bool torture_init_begin(char *ttype, bool v, int *runnable);
 void torture_init_end(void);
 bool torture_cleanup(void);
 bool torture_must_stop(void);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 1952466c7db5..dbafeac18e4d 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -355,7 +355,8 @@ static int __init lock_torture_init(void)
 		&lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
 	};
 
-	torture_init_begin(torture_type, verbose, &locktorture_runnable);
+	if (!torture_init_begin(torture_type, verbose, &locktorture_runnable))
+		return -EBUSY;
 
 	/* Process args and tell the world that the torturer is on the job. */
 	for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 4b7b97ff1195..7fa34f86e5ba 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1536,7 +1536,8 @@ rcu_torture_init(void)
 		&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
 	};
 
-	torture_init_begin(torture_type, verbose, &rcutorture_runnable);
+	if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
+		return -EBUSY;
 
 	/* Process args and tell the world that the torturer is on the job. */
 	for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
diff --git a/kernel/torture.c b/kernel/torture.c
index ae1723a4c751..0ed0b49d2ce1 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -599,14 +599,20 @@ static void torture_stutter_cleanup(void)
  * The runnable parameter points to a flag that controls whether or not
  * the test is currently runnable.  If there is no such flag, pass in NULL.
  */
-void __init torture_init_begin(char *ttype, bool v, int *runnable)
+bool __init torture_init_begin(char *ttype, bool v, int *runnable)
 {
 	mutex_lock(&fullstop_mutex);
+	if (torture_type != NULL) {
+		pr_alert("torture_init_begin: refusing %s init: %s running",
+			 ttype, torture_type);
+		mutex_unlock(&fullstop_mutex);
+		return false;
+	}
 	torture_type = ttype;
 	verbose = v;
 	torture_runnable = runnable;
 	fullstop = FULLSTOP_DONTSTOP;
-
+	return true;
 }
 EXPORT_SYMBOL_GPL(torture_init_begin);
 
@@ -645,6 +651,9 @@ bool torture_cleanup(void)
 	torture_shuffle_cleanup();
 	torture_stutter_cleanup();
 	torture_onoff_cleanup();
+	mutex_lock(&fullstop_mutex);
+	torture_type = NULL;
+	mutex_unlock(&fullstop_mutex);
 	return false;
 }
 EXPORT_SYMBOL_GPL(torture_cleanup);
-- 
cgit 


From 6348675c4e3612e001860354fea78258e041d9a1 Mon Sep 17 00:00:00 2001
From: Pranith Kumar <pranith@gatech.edu>
Date: Wed, 16 Apr 2014 16:46:01 -0400
Subject: torture: Remove unused definition

The torture_parm() macro is the same as torture_param(), and torture_parm()
is not used.  This commit therefore removes torture_parm().

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/torture.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index f998574247fd..5ca58fcbaf1b 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -49,12 +49,6 @@
 #define VERBOSE_TOROUT_ERRSTRING(s) \
 	do { if (verbose) pr_alert("%s" TORTURE_FLAG "!!! %s\n", torture_type, s); } while (0)
 
-/* Definitions for a non-string torture-test module parameter. */
-#define torture_parm(type, name, init, msg) \
-	static type name = init; \
-	module_param(name, type, 0444); \
-	MODULE_PARM_DESC(name, msg);
-
 /* Definitions for online/offline exerciser. */
 int torture_onoff_init(long ooholdoff, long oointerval);
 char *torture_onoff_stats(char *page);
-- 
cgit 


From f9f36917903b57c571b1ddcfc6bc794ca4dd8232 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <kefeng.wang@linaro.org>
Date: Wed, 14 May 2014 14:13:41 +0800
Subject: libahci_platform: add host_flags parameter in
 ahci_platform_init_host()

Add a dynamic host_flags argument to make ahci_platform_init_host more flexible,
then remove the AHCI_HFLAGS(...) argument from some driver's ata_port_info,
and pass that in as the new argument.

Cc: Hans de Geode <hdegoede@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Kefeng Wang <kefeng.wang@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_da850.c       | 3 ++-
 drivers/ata/ahci_imx.c         | 3 ++-
 drivers/ata/ahci_mvebu.c       | 3 ++-
 drivers/ata/ahci_platform.c    | 2 +-
 drivers/ata/ahci_st.c          | 2 +-
 drivers/ata/ahci_sunxi.c       | 9 ++++++---
 drivers/ata/ahci_xgene.c       | 7 +++++--
 drivers/ata/libahci_platform.c | 5 ++++-
 include/linux/ahci_platform.h  | 1 +
 9 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_da850.c b/drivers/ata/ahci_da850.c
index 2c83613ce2db..2b77d53bccf8 100644
--- a/drivers/ata/ahci_da850.c
+++ b/drivers/ata/ahci_da850.c
@@ -85,7 +85,8 @@ static int ahci_da850_probe(struct platform_device *pdev)
 
 	da850_sata_init(dev, pwrdn_reg, hpriv->mmio);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_da850_port_info,
+				     0, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
index 497c7abe1c7d..e7e44a73e4fe 100644
--- a/drivers/ata/ahci_imx.c
+++ b/drivers/ata/ahci_imx.c
@@ -267,7 +267,8 @@ static int imx_ahci_probe(struct platform_device *pdev)
 	reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
 	writel(reg_val, hpriv->mmio + HOST_TIMER1MS);
 
-	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info, 0, 0);
+	ret = ahci_platform_init_host(pdev, hpriv, &ahci_imx_port_info,
+				      0, 0, 0);
 	if (ret)
 		imx_sata_disable(hpriv);
 
diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c
index 1df8630c6b65..fd3dfd733b84 100644
--- a/drivers/ata/ahci_mvebu.c
+++ b/drivers/ata/ahci_mvebu.c
@@ -88,7 +88,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev)
 	ahci_mvebu_mbus_config(hpriv, dram);
 	ahci_mvebu_regret_option(hpriv);
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info,
+				     0, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index ef67e79944f9..a476a1fd3f8f 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -55,7 +55,7 @@ static int ahci_probe(struct platform_device *pdev)
 			goto disable_resources;
 	}
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0);
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info, 0, 0, 0);
 	if (rc)
 		goto pdata_exit;
 
diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c
index 633222226c19..2595598df9ce 100644
--- a/drivers/ata/ahci_st.c
+++ b/drivers/ata/ahci_st.c
@@ -166,7 +166,7 @@ static int st_ahci_probe(struct platform_device *pdev)
 	if (err)
 		return err;
 
-	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0);
+	err = ahci_platform_init_host(pdev, hpriv, &st_ahci_port_info, 0, 0, 0);
 	if (err) {
 		ahci_platform_disable_resources(hpriv);
 		return err;
diff --git a/drivers/ata/ahci_sunxi.c b/drivers/ata/ahci_sunxi.c
index 42d3f64e74b3..02002f125bd4 100644
--- a/drivers/ata/ahci_sunxi.c
+++ b/drivers/ata/ahci_sunxi.c
@@ -157,8 +157,6 @@ static void ahci_sunxi_start_engine(struct ata_port *ap)
 }
 
 static const struct ata_port_info ahci_sunxi_port_info = {
-	AHCI_HFLAGS(AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
-			  AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
 	.flags		= AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
 	.pio_mask	= ATA_PIO4,
 	.udma_mask	= ATA_UDMA6,
@@ -169,6 +167,7 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct ahci_host_priv *hpriv;
+	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -185,7 +184,11 @@ static int ahci_sunxi_probe(struct platform_device *pdev)
 	if (rc)
 		goto disable_resources;
 
-	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info, 0, 0);
+	hflags = AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI |
+		 AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &ahci_sunxi_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c
index 77c89bf171f1..042a9bb45c86 100644
--- a/drivers/ata/ahci_xgene.c
+++ b/drivers/ata/ahci_xgene.c
@@ -303,7 +303,6 @@ static struct ata_port_operations xgene_ahci_ops = {
 };
 
 static const struct ata_port_info xgene_ahci_port_info = {
-	AHCI_HFLAGS(AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ),
 	.flags = AHCI_FLAG_COMMON | ATA_FLAG_NCQ,
 	.pio_mask = ATA_PIO4,
 	.udma_mask = ATA_UDMA6,
@@ -382,6 +381,7 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 	struct ahci_host_priv *hpriv;
 	struct xgene_ahci_context *ctx;
 	struct resource *res;
+	unsigned long hflags;
 	int rc;
 
 	hpriv = ahci_platform_get_resources(pdev);
@@ -450,7 +450,10 @@ static int xgene_ahci_probe(struct platform_device *pdev)
 		goto disable_resources;
 	}
 
-	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info, 0, 0);
+	hflags = AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ;
+
+	rc = ahci_platform_init_host(pdev, hpriv, &xgene_ahci_port_info,
+				     hflags, 0, 0);
 	if (rc)
 		goto disable_resources;
 
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 7cb3a85719c0..3a5b4ed25a4f 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -283,6 +283,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources);
  * @pdev: platform device pointer for the host
  * @hpriv: ahci-host private data for the host
  * @pi_template: template for the ata_port_info to use
+ * @host_flags: ahci host flags used in ahci_host_priv
  * @force_port_map: param passed to ahci_save_initial_config
  * @mask_port_map: param passed to ahci_save_initial_config
  *
@@ -296,6 +297,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_get_resources);
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
+			    unsigned long host_flags,
 			    unsigned int force_port_map,
 			    unsigned int mask_port_map)
 {
@@ -312,7 +314,8 @@ int ahci_platform_init_host(struct platform_device *pdev,
 	}
 
 	/* prepare host */
-	hpriv->flags |= (unsigned long)pi.private_data;
+	pi.private_data = (void *)host_flags;
+	hpriv->flags |= host_flags;
 
 	ahci_save_initial_config(dev, hpriv, force_port_map, mask_port_map);
 
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 1f16d502600c..6dfd51a04d77 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -44,6 +44,7 @@ struct ahci_host_priv *ahci_platform_get_resources(
 int ahci_platform_init_host(struct platform_device *pdev,
 			    struct ahci_host_priv *hpriv,
 			    const struct ata_port_info *pi_template,
+			    unsigned long host_flags,
 			    unsigned int force_port_map,
 			    unsigned int mask_port_map);
 
-- 
cgit 


From 1a56f2aa4752293e5a9c0c3a2331620aa1fdb808 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 14 May 2014 13:43:37 +0900
Subject: workqueue: Remove deprecated flush[_delayed]_work_sync()

flush[_delayed]_work_sync() were deprecated by 4382973 ("workqueue:
deprecate flush[_delayed]_work_sync()") and have been deprecated
for a long time. In addition, these are not used anymore. So,
let's remove these functions.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1b22c42e9c2d..aa92d0295e28 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -587,18 +587,6 @@ static inline bool keventd_up(void)
 	return system_wq != NULL;
 }
 
-/* used to be different but now identical to flush_work(), deprecated */
-static inline bool __deprecated flush_work_sync(struct work_struct *work)
-{
-	return flush_work(work);
-}
-
-/* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
-{
-	return flush_delayed_work(dwork);
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
-- 
cgit 


From cf416171e7e1d966111f53bdae82f51af05e7bf8 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 14 May 2014 13:58:06 +0900
Subject: workqueue: Remove deprecated system_nrt[_freezable]_wq

system_nrt[_freezable]_wq were deprecated by 3b07e9c ("workqueue:
deprecate system_nrt[_freezable]_wq") and have been deprecated
for a long time. In addition, these are not used anymore. So,
let's remove these functions.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index aa92d0295e28..d93d28b2ec73 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -364,20 +364,6 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
-{
-	return system_wq;
-}
-
-static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
-{
-	return system_freezable_wq;
-}
-
-/* equivlalent to system_wq and system_freezable_wq, deprecated */
-#define system_nrt_wq			__system_nrt_wq()
-#define system_nrt_freezable_wq		__system_nrt_freezable_wq()
-
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
-- 
cgit 


From 2eacc23c422b4553030168f315cb49522fa1b1f6 Mon Sep 17 00:00:00 2001
From: Yuval Atias <yuvala@mellanox.com>
Date: Wed, 14 May 2014 12:15:10 +0300
Subject: net/mlx4_core: Enforce irq affinity changes immediatly

During heavy traffic, napi is constatntly polling the complition queue
and no interrupt is fired. Because of that, changes to irq affinity are
ignored until traffic is stopped and resumed.

By registering to the irq notifier mechanism, and forcing interrupt when
affinity is changed, irq affinity changes will be immediatly enforced.

Signed-off-by: Yuval Atias <yuvala@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cq.c    |  3 ++
 drivers/net/ethernet/mellanox/mlx4/en_rx.c | 11 +++++-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |  6 +++
 drivers/net/ethernet/mellanox/mlx4/eq.c    | 62 ++++++++++++++++++++++++++++++
 include/linux/mlx4/device.h                |  3 ++
 5 files changed, 83 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 0487121e4a0f..8542030b89cf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -293,6 +293,9 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 	atomic_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 
+	cq->irq = priv->eq_table.eq[cq->vector].irq;
+	cq->irq_affinity_change = false;
+
 	return 0;
 
 err_radix:
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a1512450816d..e8c0d2b832b7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -895,10 +895,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 	mlx4_en_cq_unlock_napi(cq);
 
 	/* If we used up all the quota - we're probably not done yet... */
-	if (done == budget)
+	if (done == budget) {
 		INC_PERF_COUNTER(priv->pstats.napi_quota);
-	else {
+		if (unlikely(cq->mcq.irq_affinity_change)) {
+			cq->mcq.irq_affinity_change = false;
+			napi_complete(napi);
+			mlx4_en_arm_cq(priv, cq);
+			return 0;
+		}
+	} else {
 		/* Done for now */
+		cq->mcq.irq_affinity_change = false;
 		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 89585c6311c3..cb964056d710 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -474,9 +474,15 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
 	/* If we used up all the quota - we're probably not done yet... */
 	if (done < budget) {
 		/* Done for now */
+		cq->mcq.irq_affinity_change = false;
 		napi_complete(napi);
 		mlx4_en_arm_cq(priv, cq);
 		return done;
+	} else if (unlikely(cq->mcq.irq_affinity_change)) {
+		cq->mcq.irq_affinity_change = false;
+		napi_complete(napi);
+		mlx4_en_arm_cq(priv, cq);
+		return 0;
 	}
 	return budget;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 6c088bc1845b..d954ec1eac17 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -53,6 +53,11 @@ enum {
 	MLX4_EQ_ENTRY_SIZE	= 0x20
 };
 
+struct mlx4_irq_notify {
+	void *arg;
+	struct irq_affinity_notify notify;
+};
+
 #define MLX4_EQ_STATUS_OK	   ( 0 << 28)
 #define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
 #define MLX4_EQ_OWNER_SW	   ( 0 << 24)
@@ -1083,6 +1088,57 @@ static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
 	iounmap(priv->clr_base);
 }
 
+static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct mlx4_irq_notify *n = container_of(notify,
+						 struct mlx4_irq_notify,
+						 notify);
+	struct mlx4_priv *priv = (struct mlx4_priv *)n->arg;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) {
+		struct mlx4_cq *cq = (struct mlx4_cq *)(*slot);
+
+		if (cq->irq == notify->irq)
+			cq->irq_affinity_change = true;
+	}
+}
+
+static void mlx4_release_irq_notifier(struct kref *ref)
+{
+	struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify,
+						 notify.kref);
+	kfree(n);
+}
+
+static void mlx4_assign_irq_notifier(struct mlx4_priv *priv,
+				     struct mlx4_dev *dev, int irq)
+{
+	struct mlx4_irq_notify *irq_notifier = NULL;
+	int err = 0;
+
+	irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL);
+	if (!irq_notifier) {
+		mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n",
+			  irq);
+		return;
+	}
+
+	irq_notifier->notify.irq = irq;
+	irq_notifier->notify.notify = mlx4_irq_notifier_notify;
+	irq_notifier->notify.release = mlx4_release_irq_notifier;
+	irq_notifier->arg = priv;
+	err = irq_set_affinity_notifier(irq, &irq_notifier->notify);
+	if (err) {
+		kfree(irq_notifier);
+		irq_notifier = NULL;
+		mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq);
+	}
+}
+
+
 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1353,6 +1409,9 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
 				continue;
 				/*we dont want to break here*/
 			}
+			mlx4_assign_irq_notifier(priv, dev,
+						 priv->eq_table.eq[vec].irq);
+
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
 	}
@@ -1379,6 +1438,9 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
 		  Belonging to a legacy EQ*/
 		mutex_lock(&priv->msix_ctl.pool_lock);
 		if (priv->msix_ctl.pool_bm & 1ULL << i) {
+			irq_set_affinity_notifier(
+				priv->eq_table.eq[vec].irq,
+				NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd21295a..c0468e6f0442 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -577,6 +577,9 @@ struct mlx4_cq {
 
 	u32			cons_index;
 
+	u16                     irq;
+	bool                    irq_affinity_change;
+
 	__be32		       *set_ci_db;
 	__be32		       *arm_db;
 	int			arm_sn;
-- 
cgit 


From 542ad4f8886b376dac9a4334bdb38f9c22a4d8da Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Apr 2014 15:08:29 -0700
Subject: Input: gpio_keys - convert struct descriptions to kernel-doc

This patch converts descriptions of the structures defined in
linux/gpio_keys.h to follow kernel-doc format.

There is no functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/gpio_keys.h | 48 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index a7e977ff4abf..8b622468952c 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -3,29 +3,53 @@
 
 struct device;
 
+/**
+ * struct gpio_keys_button - configuration parameters
+ * @code:		input event code (KEY_*, SW_*)
+ * @gpio:		%-1 if this key does not support gpio
+ * @active_low:		%true indicates that button is considered
+ *			depressed when gpio is low
+ * @desc:		label that will be attached to button's gpio
+ * @type:		input event type (%EV_KEY, %EV_SW, %EV_ABS)
+ * @wakeup:		configure the button as a wake-up source
+ * @debounce_interval:	debounce ticks interval in msecs
+ * @can_disable:	%true indicates that userspace is allowed to
+ *			disable button via sysfs
+ * @value:		axis value for %EV_ABS
+ * @irq:		Irq number in case of interrupt keys
+ */
 struct gpio_keys_button {
-	/* Configuration parameters */
-	unsigned int code;	/* input event code (KEY_*, SW_*) */
-	int gpio;		/* -1 if this key does not support gpio */
+	unsigned int code;
+	int gpio;
 	int active_low;
 	const char *desc;
-	unsigned int type;	/* input event type (EV_KEY, EV_SW, EV_ABS) */
-	int wakeup;		/* configure the button as a wake-up source */
-	int debounce_interval;	/* debounce ticks interval in msecs */
+	unsigned int type;
+	int wakeup;
+	int debounce_interval;
 	bool can_disable;
-	int value;		/* axis value for EV_ABS */
-	unsigned int irq;	/* Irq number in case of interrupt keys */
+	int value;
+	unsigned int irq;
 };
 
+/**
+ * struct gpio_keys_platform_data - platform data for gpio_keys driver
+ * @buttons:		pointer to array of &gpio_keys_button structures
+ *			describing buttons attached to the device
+ * @nbuttons:		number of elements in @buttons array
+ * @poll_interval:	polling interval in msecs - for polling driver only
+ * @rep:		enable input subsystem auto repeat
+ * @enable:		platform hook for enabling the device
+ * @disable:		platform hook for disabling the device
+ * @name:		input device name
+ */
 struct gpio_keys_platform_data {
 	struct gpio_keys_button *buttons;
 	int nbuttons;
-	unsigned int poll_interval;	/* polling interval in msecs -
-					   for polling driver only */
-	unsigned int rep:1;		/* enable input subsystem auto repeat */
+	unsigned int poll_interval;
+	unsigned int rep:1;
 	int (*enable)(struct device *dev);
 	void (*disable)(struct device *dev);
-	const char *name;		/* input device name */
+	const char *name;
 };
 
 #endif
-- 
cgit 


From bf1de9761c21f56d5b0c6a0acd3b792d801c61e6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 28 Apr 2014 10:49:51 -0700
Subject: Input: implement managed polled input devices

Managed resources are becoming more and more popular in drivers. Let's
implement managed polled input devices, to complement managed regular input
devices.

Similarly to managed regular input devices only one new call
devm_input_allocate_polled_device() is added and the rest of APIs is
modified to work with both managed and non-managed devices.

Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Tested-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/input-polldev.c | 118 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/input-polldev.h |   3 ++
 2 files changed, 119 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input-polldev.c b/drivers/input/input-polldev.c
index 4b191908d5de..3664f81655ca 100644
--- a/drivers/input/input-polldev.c
+++ b/drivers/input/input-polldev.c
@@ -176,6 +176,91 @@ struct input_polled_dev *input_allocate_polled_device(void)
 }
 EXPORT_SYMBOL(input_allocate_polled_device);
 
+struct input_polled_devres {
+	struct input_polled_dev *polldev;
+};
+
+static int devm_input_polldev_match(struct device *dev, void *res, void *data)
+{
+	struct input_polled_devres *devres = res;
+
+	return devres->polldev == data;
+}
+
+static void devm_input_polldev_release(struct device *dev, void *res)
+{
+	struct input_polled_devres *devres = res;
+	struct input_polled_dev *polldev = devres->polldev;
+
+	dev_dbg(dev, "%s: dropping reference/freeing %s\n",
+		__func__, dev_name(&polldev->input->dev));
+
+	input_put_device(polldev->input);
+	kfree(polldev);
+}
+
+static void devm_input_polldev_unregister(struct device *dev, void *res)
+{
+	struct input_polled_devres *devres = res;
+	struct input_polled_dev *polldev = devres->polldev;
+
+	dev_dbg(dev, "%s: unregistering device %s\n",
+		__func__, dev_name(&polldev->input->dev));
+	input_unregister_device(polldev->input);
+
+	/*
+	 * Note that we are still holding extra reference to the input
+	 * device so it will stick around until devm_input_polldev_release()
+	 * is called.
+	 */
+}
+
+/**
+ * devm_input_allocate_polled_device - allocate managed polled device
+ * @dev: device owning the polled device being created
+ *
+ * Returns prepared &struct input_polled_dev or %NULL.
+ *
+ * Managed polled input devices do not need to be explicitly unregistered
+ * or freed as it will be done automatically when owner device unbinds
+ * from * its driver (or binding fails). Once such managed polled device
+ * is allocated, it is ready to be set up and registered in the same
+ * fashion as regular polled input devices (using
+ * input_register_polled_device() function).
+ *
+ * If you want to manually unregister and free such managed polled devices,
+ * it can be still done by calling input_unregister_polled_device() and
+ * input_free_polled_device(), although it is rarely needed.
+ *
+ * NOTE: the owner device is set up as parent of input device and users
+ * should not override it.
+ */
+struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev)
+{
+	struct input_polled_dev *polldev;
+	struct input_polled_devres *devres;
+
+	devres = devres_alloc(devm_input_polldev_release, sizeof(*devres),
+			      GFP_KERNEL);
+	if (!devres)
+		return NULL;
+
+	polldev = input_allocate_polled_device();
+	if (!polldev) {
+		devres_free(devres);
+		return NULL;
+	}
+
+	polldev->input->dev.parent = dev;
+	polldev->devres_managed = true;
+
+	devres->polldev = polldev;
+	devres_add(dev, devres);
+
+	return polldev;
+}
+EXPORT_SYMBOL(devm_input_allocate_polled_device);
+
 /**
  * input_free_polled_device - free memory allocated for polled device
  * @dev: device to free
@@ -186,7 +271,12 @@ EXPORT_SYMBOL(input_allocate_polled_device);
 void input_free_polled_device(struct input_polled_dev *dev)
 {
 	if (dev) {
-		input_free_device(dev->input);
+		if (dev->devres_managed)
+			WARN_ON(devres_destroy(dev->input->dev.parent,
+						devm_input_polldev_release,
+						devm_input_polldev_match,
+						dev));
+		input_put_device(dev->input);
 		kfree(dev);
 	}
 }
@@ -204,9 +294,19 @@ EXPORT_SYMBOL(input_free_polled_device);
  */
 int input_register_polled_device(struct input_polled_dev *dev)
 {
+	struct input_polled_devres *devres = NULL;
 	struct input_dev *input = dev->input;
 	int error;
 
+	if (dev->devres_managed) {
+		devres = devres_alloc(devm_input_polldev_unregister,
+				      sizeof(*devres), GFP_KERNEL);
+		if (!devres)
+			return -ENOMEM;
+
+		devres->polldev = dev;
+	}
+
 	input_set_drvdata(input, dev);
 	INIT_DELAYED_WORK(&dev->work, input_polled_device_work);
 
@@ -221,8 +321,10 @@ int input_register_polled_device(struct input_polled_dev *dev)
 	input->dev.groups = input_polldev_attribute_groups;
 
 	error = input_register_device(input);
-	if (error)
+	if (error) {
+		devres_free(devres);
 		return error;
+	}
 
 	/*
 	 * Take extra reference to the underlying input device so
@@ -233,6 +335,12 @@ int input_register_polled_device(struct input_polled_dev *dev)
 	 */
 	input_get_device(input);
 
+	if (dev->devres_managed) {
+		dev_dbg(input->dev.parent, "%s: registering %s with devres.\n",
+			__func__, dev_name(&input->dev));
+		devres_add(input->dev.parent, devres);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(input_register_polled_device);
@@ -247,6 +355,12 @@ EXPORT_SYMBOL(input_register_polled_device);
  */
 void input_unregister_polled_device(struct input_polled_dev *dev)
 {
+	if (dev->devres_managed)
+		WARN_ON(devres_destroy(dev->input->dev.parent,
+					devm_input_polldev_unregister,
+					devm_input_polldev_match,
+					dev));
+
 	input_unregister_device(dev->input);
 }
 EXPORT_SYMBOL(input_unregister_polled_device);
diff --git a/include/linux/input-polldev.h b/include/linux/input-polldev.h
index ce0b72464eb8..2465182670db 100644
--- a/include/linux/input-polldev.h
+++ b/include/linux/input-polldev.h
@@ -48,9 +48,12 @@ struct input_polled_dev {
 
 /* private: */
 	struct delayed_work work;
+
+	bool devres_managed;
 };
 
 struct input_polled_dev *input_allocate_polled_device(void);
+struct input_polled_dev *devm_input_allocate_polled_device(struct device *dev);
 void input_free_polled_device(struct input_polled_dev *dev);
 int input_register_polled_device(struct input_polled_dev *dev);
 void input_unregister_polled_device(struct input_polled_dev *dev);
-- 
cgit 


From a97181adf1502128e2945b4fef2591249c565467 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 12 May 2014 14:04:47 +0200
Subject: clk: sunxi: Fixup clk_sunxi_mmc_phase_control to take a clk rather
 then a hw_clk

__clk_get_hw is supposed to be used by clk providers, not clk consumers.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/sunxi/clk-sunxi.c | 3 ++-
 include/linux/clk/sunxi.h     | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index 59f90401b900..4cc2b2a5aa75 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c
@@ -510,11 +510,12 @@ CLK_OF_DECLARE(sun7i_a20_gmac, "allwinner,sun7i-a20-gmac-clk",
  * clk_sunxi_mmc_phase_control() - configures MMC clock phase control
  */
 
-void clk_sunxi_mmc_phase_control(struct clk_hw *hw, u8 sample, u8 output)
+void clk_sunxi_mmc_phase_control(struct clk *clk, u8 sample, u8 output)
 {
 	#define to_clk_composite(_hw) container_of(_hw, struct clk_composite, hw)
 	#define to_clk_factors(_hw) container_of(_hw, struct clk_factors, hw)
 
+	struct clk_hw *hw = __clk_get_hw(clk);
 	struct clk_composite *composite = to_clk_composite(hw);
 	struct clk_hw *rate_hw = composite->rate_hw;
 	struct clk_factors *factors = to_clk_factors(rate_hw);
diff --git a/include/linux/clk/sunxi.h b/include/linux/clk/sunxi.h
index 1ef5c899e458..aed28c4451d9 100644
--- a/include/linux/clk/sunxi.h
+++ b/include/linux/clk/sunxi.h
@@ -17,6 +17,6 @@
 
 #include <linux/clk.h>
 
-void clk_sunxi_mmc_phase_control(struct clk_hw *hw, u8 sample, u8 output);
+void clk_sunxi_mmc_phase_control(struct clk *clk, u8 sample, u8 output);
 
 #endif
-- 
cgit 


From 4449bf927b61bdb4389393c6fea6837214d1ace7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 6 May 2014 13:10:24 -0400
Subject: tracing: Add __bitmask() macro to trace events to cpumasks and other
 bitmasks

Being able to show a cpumask of events can be useful as some events
may affect only some CPUs. There is no standard way to record the
cpumask and converting it to a string is rather expensive during
the trace as traces happen in hotpaths. It would be better to record
the raw event mask and be able to parse it at print time.

The following macros were added for use with the TRACE_EVENT() macro:

  __bitmask()
  __assign_bitmask()
  __get_bitmask()

To test this, I added this to the sched_migrate_task event, which
looked like this:

TRACE_EVENT(sched_migrate_task,

	TP_PROTO(struct task_struct *p, int dest_cpu, const struct cpumask *cpus),

	TP_ARGS(p, dest_cpu, cpus),

	TP_STRUCT__entry(
		__array(	char,	comm,	TASK_COMM_LEN	)
		__field(	pid_t,	pid			)
		__field(	int,	prio			)
		__field(	int,	orig_cpu		)
		__field(	int,	dest_cpu		)
		__bitmask(	cpumask, num_possible_cpus()	)
	),

	TP_fast_assign(
		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
		__entry->pid		= p->pid;
		__entry->prio		= p->prio;
		__entry->orig_cpu	= task_cpu(p);
		__entry->dest_cpu	= dest_cpu;
		__assign_bitmask(cpumask, cpumask_bits(cpus), num_possible_cpus());
	),

	TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d cpumask=%s",
		  __entry->comm, __entry->pid, __entry->prio,
		  __entry->orig_cpu, __entry->dest_cpu,
		  __get_bitmask(cpumask))
);

With the output of:

        ksmtuned-3613  [003] d..2   485.220508: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=3 dest_cpu=2 cpumask=00000000,0000000f
     migration/1-13    [001] d..5   485.221202: sched_migrate_task: comm=ksmtuned pid=3614 prio=120 orig_cpu=1 dest_cpu=0 cpumask=00000000,0000000f
             awk-3615  [002] d.H5   485.221747: sched_migrate_task: comm=rcu_preempt pid=7 prio=120 orig_cpu=0 dest_cpu=1 cpumask=00000000,000000ff
     migration/2-18    [002] d..5   485.222062: sched_migrate_task: comm=ksmtuned pid=3615 prio=120 orig_cpu=2 dest_cpu=3 cpumask=00000000,0000000f

Link: http://lkml.kernel.org/r/1399377998-14870-6-git-send-email-javi.merino@arm.com
Link: http://lkml.kernel.org/r/20140506132238.22e136d1@gandalf.local.home

Suggested-by: Javi Merino <javi.merino@arm.com>
Tested-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  3 +++
 include/linux/trace_seq.h    | 10 ++++++++
 include/trace/ftrace.h       | 57 +++++++++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace_output.c  | 41 +++++++++++++++++++++++++++++++
 4 files changed, 110 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d16da3e53bc7..cff3106ffe2c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -38,6 +38,9 @@ const char *ftrace_print_symbols_seq_u64(struct trace_seq *p,
 								 *symbol_array);
 #endif
 
+const char *ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+				     unsigned int bitmask_size);
+
 const char *ftrace_print_hex_seq(struct trace_seq *p,
 				 const unsigned char *buf, int len);
 
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index a32d86ec8bf2..136116924d8d 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -46,6 +46,9 @@ extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
+extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+			     int nmaskbits);
+
 #else /* CONFIG_TRACING */
 static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
@@ -57,6 +60,13 @@ trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 	return 0;
 }
 
+static inline int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	return 0;
+}
+
 static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
 	return 0;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 0a1a4f7caf09..9b7a989dcbcc 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -53,6 +53,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+
 #undef TP_STRUCT__entry
 #define TP_STRUCT__entry(args...) args
 
@@ -128,6 +131,9 @@
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 	struct ftrace_data_offsets_##call {				\
@@ -200,6 +206,15 @@
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field)						\
+	({								\
+		void *__bitmask = __get_dynamic_array(field);		\
+		unsigned int __bitmask_size;				\
+		__bitmask_size = (__entry->__data_loc_##field >> 16) & 0xffff; \
+		ftrace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
+	})
+
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
@@ -322,6 +337,9 @@ static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print)	\
 static int notrace __init						\
@@ -372,6 +390,29 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #define __string(item, src) __dynamic_array(char, item,			\
 		    strlen((src) ? (const char *)(src) : "(null)") + 1)
 
+/*
+ * __bitmask_size_in_bytes_raw is the number of bytes needed to hold
+ * num_possible_cpus().
+ */
+#define __bitmask_size_in_bytes_raw(nr_bits)	\
+	(((nr_bits) + 7) / 8)
+
+#define __bitmask_size_in_longs(nr_bits)			\
+	((__bitmask_size_in_bytes_raw(nr_bits) +		\
+	  ((BITS_PER_LONG / 8) - 1)) / (BITS_PER_LONG / 8))
+
+/*
+ * __bitmask_size_in_bytes is the number of bytes needed to hold
+ * num_possible_cpus() padded out to the nearest long. This is what
+ * is saved in the buffer, just to be consistent.
+ */
+#define __bitmask_size_in_bytes(nr_bits)				\
+	(__bitmask_size_in_longs(nr_bits) * (BITS_PER_LONG / 8))
+
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item,	\
+					 __bitmask_size_in_longs(nr_bits))
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static inline notrace int ftrace_get_offsets_##call(			\
@@ -513,12 +554,22 @@ static inline notrace int ftrace_get_offsets_##call(			\
 	__entry->__data_loc_##item = __data_offsets.item;
 
 #undef __string
-#define __string(item, src) __dynamic_array(char, item, -1)       	\
+#define __string(item, src) __dynamic_array(char, item, -1)
 
 #undef __assign_str
 #define __assign_str(dst, src)						\
 	strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
 
+#undef __bitmask
+#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
+#undef __assign_bitmask
+#define __assign_bitmask(dst, src, nr_bits)					\
+	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
+
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
@@ -586,6 +637,7 @@ static inline void ftrace_test_probe_##call(void)			\
 #undef __print_hex
 #undef __get_dynamic_array
 #undef __get_str
+#undef __get_bitmask
 
 #undef TP_printk
 #define TP_printk(fmt, args...) "\"" fmt "\", "  __stringify(args)
@@ -651,6 +703,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
 
+#undef __get_bitmask
+#define __get_bitmask(field) (char *)__get_dynamic_array(field)
+
 #undef __perf_addr
 #define __perf_addr(a)	(__addr = (a))
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index a436de18aa99..f3dad80c20b2 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -125,6 +125,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
+/**
+ * trace_seq_bitmask - put a list of longs as a bitmask print output
+ * @s:		trace sequence descriptor
+ * @maskp:	points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits:	The number of bits that are valid in @maskp
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ */
+int
+trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+		  int nmaskbits)
+{
+	int len = (PAGE_SIZE - 1) - s->len;
+	int ret;
+
+	if (s->full || !len)
+		return 0;
+
+	ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
+	s->len += ret;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(trace_seq_bitmask);
+
 /**
  * trace_seq_vprintf - sequence printing of trace information
  * @s: trace sequence descriptor
@@ -398,6 +426,19 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
 #endif
 
+const char *
+ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
+			 unsigned int bitmask_size)
+{
+	const char *ret = p->buffer + p->len;
+
+	trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
+
 const char *
 ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
 {
-- 
cgit 


From c6e126de43e7d4abfd6cf796b40589db3a046167 Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Thu, 15 May 2014 16:55:24 +0100
Subject: of: Keep track of populated platform devices

In "Device Tree powered" systems, platform devices are usually massively
populated with of_platform_populate() call, executed at some level of
initcalls, either by generic architecture or by platform-specific code.

There are situations though where certain devices must be created (and
bound with drivers) before all the others. This presents a challenge,
as devices created explicitly would be created again by
of_platform_populate().

This patch tries to solve that issue in a generic way, adding a
"populated" flag for a DT node description. Subsequent
of_platform_populate() will skip such nodes (and its children) in
a similar way to the non-available ones.

This patch also adds of_platform_depopulate() as an operation
complementary to the _populate() one. It removes a platform or an amba
device populated from the Device Tree, together with its all children
(leaving, however, devices without associated of_node untouched)
clearing the "populated" flag on the way.

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/platform.c       | 74 ++++++++++++++++++++++++++++++++++++++++++---
 include/linux/of.h          |  7 +++++
 include/linux/of_platform.h |  5 +++
 3 files changed, 81 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index bd47fbc53dc9..e8376d646d98 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -206,12 +206,13 @@ static struct platform_device *of_platform_device_create_pdata(
 {
 	struct platform_device *dev;
 
-	if (!of_device_is_available(np))
+	if (!of_device_is_available(np) ||
+	    of_node_test_and_set_flag(np, OF_POPULATED))
 		return NULL;
 
 	dev = of_device_alloc(np, bus_id, parent);
 	if (!dev)
-		return NULL;
+		goto err_clear_flag;
 
 #if defined(CONFIG_MICROBLAZE)
 	dev->archdata.dma_mask = 0xffffffffUL;
@@ -229,10 +230,14 @@ static struct platform_device *of_platform_device_create_pdata(
 
 	if (of_device_add(dev) != 0) {
 		platform_device_put(dev);
-		return NULL;
+		goto err_clear_flag;
 	}
 
 	return dev;
+
+err_clear_flag:
+	of_node_clear_flag(np, OF_POPULATED);
+	return NULL;
 }
 
 /**
@@ -264,14 +269,15 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 
 	pr_debug("Creating amba device %s\n", node->full_name);
 
-	if (!of_device_is_available(node))
+	if (!of_device_is_available(node) ||
+	    of_node_test_and_set_flag(node, OF_POPULATED))
 		return NULL;
 
 	dev = amba_device_alloc(NULL, 0, 0);
 	if (!dev) {
 		pr_err("%s(): amba_device_alloc() failed for %s\n",
 		       __func__, node->full_name);
-		return NULL;
+		goto err_clear_flag;
 	}
 
 	/* setup generic device info */
@@ -311,6 +317,8 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
 
 err_free:
 	amba_device_put(dev);
+err_clear_flag:
+	of_node_clear_flag(node, OF_POPULATED);
 	return NULL;
 }
 #else /* CONFIG_ARM_AMBA */
@@ -487,4 +495,60 @@ int of_platform_populate(struct device_node *root,
 	return rc;
 }
 EXPORT_SYMBOL_GPL(of_platform_populate);
+
+static int of_platform_device_destroy(struct device *dev, void *data)
+{
+	bool *children_left = data;
+
+	/* Do not touch devices not populated from the device tree */
+	if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) {
+		*children_left = true;
+		return 0;
+	}
+
+	/* Recurse, but don't touch this device if it has any children left */
+	if (of_platform_depopulate(dev) != 0) {
+		*children_left = true;
+		return 0;
+	}
+
+	if (dev->bus == &platform_bus_type)
+		platform_device_unregister(to_platform_device(dev));
+#ifdef CONFIG_ARM_AMBA
+	else if (dev->bus == &amba_bustype)
+		amba_device_unregister(to_amba_device(dev));
+#endif
+	else {
+		*children_left = true;
+		return 0;
+	}
+
+	of_node_clear_flag(dev->of_node, OF_POPULATED);
+
+	return 0;
+}
+
+/**
+ * of_platform_depopulate() - Remove devices populated from device tree
+ * @parent: device which childred will be removed
+ *
+ * Complementary to of_platform_populate(), this function removes children
+ * of the given device (and, recurrently, their children) that have been
+ * created from their respective device tree nodes (and only those,
+ * leaving others - eg. manually created - unharmed).
+ *
+ * Returns 0 when all children devices have been removed or
+ * -EBUSY when some children remained.
+ */
+int of_platform_depopulate(struct device *parent)
+{
+	bool children_left = false;
+
+	device_for_each_child(parent, &children_left,
+			      of_platform_device_destroy);
+
+	return children_left ? -EBUSY : 0;
+}
+EXPORT_SYMBOL_GPL(of_platform_depopulate);
+
 #endif /* CONFIG_OF_ADDRESS */
diff --git a/include/linux/of.h b/include/linux/of.h
index 3bad8d106e0e..4c50d0b78b89 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -130,6 +130,12 @@ static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
 	return test_bit(flag, &n->_flags);
 }
 
+static inline int of_node_test_and_set_flag(struct device_node *n,
+					    unsigned long flag)
+{
+	return test_and_set_bit(flag, &n->_flags);
+}
+
 static inline void of_node_set_flag(struct device_node *n, unsigned long flag)
 {
 	set_bit(flag, &n->_flags);
@@ -197,6 +203,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 /* flag descriptions */
 #define OF_DYNAMIC	1 /* node and properties were allocated via kmalloc */
 #define OF_DETACHED	2 /* node has been detached from the device tree */
+#define OF_POPULATED	3 /* device already created for the node */
 
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index 05cb4a928252..b1010eeaac0d 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -72,6 +72,7 @@ extern int of_platform_populate(struct device_node *root,
 				const struct of_device_id *matches,
 				const struct of_dev_auxdata *lookup,
 				struct device *parent);
+extern int of_platform_depopulate(struct device *parent);
 #else
 static inline int of_platform_populate(struct device_node *root,
 					const struct of_device_id *matches,
@@ -80,6 +81,10 @@ static inline int of_platform_populate(struct device_node *root,
 {
 	return -ENODEV;
 }
+static inline int of_platform_depopulate(struct device *parent)
+{
+	return -ENODEV;
+}
 #endif
 
 #endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit 


From 3b9334ac835bb431e2186645230c9f1eb94b5d49 Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Wed, 30 Apr 2014 16:46:29 +0100
Subject: mfd: vexpress: Convert custom func API to regmap

Components of the Versatile Express platform (configuration
microcontrollers on motherboard and daughterboards in particular)
talk to each other over a custom configuration bus. They
provide miscellaneous functions (from clock generator control
to energy sensors) which are represented as platform devices
(and Device Tree nodes). The transactions on the bus can
be generated by different "bridges" in the system, some
of which are universal for the whole platform (for the price
of high transfer latencies), others restricted to a subsystem
(but much faster).

Until now drivers for such functions were using custom "func"
API, which is being replaced in this patch by regmap calls.
This required:

* a rework (and move to drivers/bus directory, as suggested
  by Samuel and Arnd) of the config bus core, which is much
  simpler now and uses device model infrastructure (class)
  to keep track of the bridges; non-DT case (soon to be
  retired anyway) is simply covered by a special device
  registration function

* the new config-bus driver also takes over device population,
  so there is no need for special matching table for
  of_platform_populate nor "simple-bus" hack in the arm64
  model dtsi file (relevant bindings documentation has
  been updated); this allows all the vexpress devices
  fit into normal device model, making it possible
  to remove plenty of early inits and other hacks in
  the near future

* adaptation of the syscfg bridge implementation in the
  sysreg driver, again making it much simpler; there is
  a special case of the "energy" function spanning two
  registers, where they should be both defined in the tree
  now, but backward compatibility is maintained in the code

* modification of the relevant drivers:

  * hwmon - just a straight-forward API change
  * power/reset driver - API change
  * regulator - API change plus error handling
    simplification
  * osc clock driver - this one required larger rework
    in order to turn in into a standard platform driver

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Acked-by: Mark Brown <broonie@linaro.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Mike Turquette <mturquette@linaro.org>
---
 .../devicetree/bindings/arm/vexpress-sysreg.txt    |  43 ++-
 Documentation/devicetree/bindings/arm/vexpress.txt |  15 +-
 arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts         |   5 +-
 arch/arm/mach-vexpress/ct-ca9x4.c                  |  10 +-
 arch/arm/mach-vexpress/v2m.c                       |  18 +-
 arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi       |   2 +-
 drivers/bus/Kconfig                                |   9 +
 drivers/bus/Makefile                               |   2 +
 drivers/bus/vexpress-config.c                      | 202 +++++++++++
 drivers/clk/versatile/clk-vexpress-osc.c           |  96 +++--
 drivers/hwmon/vexpress.c                           |  17 +-
 drivers/mfd/Makefile                               |   2 +-
 drivers/mfd/vexpress-config.c                      | 287 ---------------
 drivers/mfd/vexpress-sysreg.c                      | 395 +++++++++++----------
 drivers/power/reset/vexpress-poweroff.c            |  16 +-
 drivers/regulator/vexpress.c                       |  50 +--
 include/linux/vexpress.h                           |  79 +----
 17 files changed, 568 insertions(+), 680 deletions(-)
 create mode 100644 drivers/bus/vexpress-config.c
 delete mode 100644 drivers/mfd/vexpress-config.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
index 5580e9c4bd85..57b423f78995 100644
--- a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
@@ -27,24 +27,45 @@ Example:
 This block also can also act a bridge to the platform's configuration
 bus via "system control" interface, addressing devices with site number,
 position in the board stack, config controller, function and device
-numbers - see motherboard's TRM for more details.
-
-The node describing a config device must refer to the sysreg node via
-"arm,vexpress,config-bridge" phandle (can be also defined in the node's
-parent) and relies on the board topology properties - see main vexpress
-node documentation for more details. It must also define the following
-property:
-- arm,vexpress-sysreg,func : must contain two cells:
-  - first cell defines function number (eg. 1 for clock generator,
-    2 for voltage regulators etc.)
-  - device number (eg. osc 0, osc 1 etc.)
+numbers - see motherboard's TRM for more details. All configuration
+controller accessible via this interface must reference the sysreg
+node via "arm,vexpress,config-bridge" phandle and define appropriate
+topology properties - see main vexpress node documentation for more
+details. Each child of such node describes one function and must
+define the following properties:
+- compatible value : must be one of (corresponding to the TRM):
+	"arm,vexpress-amp"
+	"arm,vexpress-dvimode"
+	"arm,vexpress-energy"
+	"arm,vexpress-muxfpga"
+	"arm,vexpress-osc"
+	"arm,vexpress-power"
+	"arm,vexpress-reboot"
+	"arm,vexpress-reset"
+	"arm,vexpress-scc"
+	"arm,vexpress-shutdown"
+	"arm,vexpress-temp"
+	"arm,vexpress-volt"
+- arm,vexpress-sysreg,func : must contain a set of two cells long groups:
+  - first cell of each group defines the function number
+    (eg. 1 for clock generator, 2 for voltage regulators etc.)
+  - second cell of each group defines device number (eg. osc 0,
+    osc 1 etc.)
+  - some functions (eg. energy meter, with its 64 bit long counter)
+    are using more than one function/device number pair
 
 Example:
 	mcc {
+		compatible = "arm,vexpress,config-bus";
 		arm,vexpress,config-bridge = <&v2m_sysreg>;
 
 		osc@0 {
 			compatible = "arm,vexpress-osc";
 			arm,vexpress-sysreg,func = <1 0>;
 		};
+
+		energy@0 {
+			compatible = "arm,vexpress-energy";
+			arm,vexpress-sysreg,func = <13 0>, <13 1>;
+		};
 	};
diff --git a/Documentation/devicetree/bindings/arm/vexpress.txt b/Documentation/devicetree/bindings/arm/vexpress.txt
index ae49161e478a..39844cd0bcce 100644
--- a/Documentation/devicetree/bindings/arm/vexpress.txt
+++ b/Documentation/devicetree/bindings/arm/vexpress.txt
@@ -80,12 +80,17 @@ but also control clock generators, voltage regulators, gather
 environmental data like temperature, power consumption etc. Even
 the video output switch (FPGA) is controlled that way.
 
-Nodes describing devices controlled by this infrastructure should
-point at the bridge device node:
+The controllers are not mapped into normal memory address space
+and must be accessed through bridges - other devices capable
+of generating transactions on the configuration bus.
+
+The nodes describing configuration controllers must define
+the following properties:
+- compatible value:
+	compatible = "arm,vexpress,config-bus";
 - bridge phandle:
 	arm,vexpress,config-bridge = <phandle>;
-This property can be also defined in a parent node (eg. for a DCC)
-and is effective for all children.
+and children describing available functions.
 
 
 Platform topology
@@ -197,7 +202,7 @@ Example of a VE tile description (simplified)
 	};
 
 	dcc {
-		compatible = "simple-bus";
+		compatible = "arm,vexpress,config-bus";
 		arm,vexpress,config-bridge = <&v2m_sysreg>;
 
 		osc@0 {
diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
index 15f98cbcb75a..a25c262326dc 100644
--- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
+++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts
@@ -312,6 +312,7 @@
 			arm,vexpress-sysreg,func = <12 0>;
 			label = "A15 Pcore";
 		};
+
 		power@1 {
 			/* Total power for the three A7 cores */
 			compatible = "arm,vexpress-power";
@@ -322,14 +323,14 @@
 		energy@0 {
 			/* Total energy for the two A15 cores */
 			compatible = "arm,vexpress-energy";
-			arm,vexpress-sysreg,func = <13 0>;
+			arm,vexpress-sysreg,func = <13 0>, <13 1>;
 			label = "A15 Jcore";
 		};
 
 		energy@2 {
 			/* Total energy for the three A7 cores */
 			compatible = "arm,vexpress-energy";
-			arm,vexpress-sysreg,func = <13 2>;
+			arm,vexpress-sysreg,func = <13 2>, <13 3>;
 			label = "A7 Jcore";
 		};
 	};
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 6f34497a4245..35e394aa00e5 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -128,6 +128,10 @@ static struct platform_device pmu_device = {
 	.resource	= pmu_resources,
 };
 
+static struct clk_lookup osc1_lookup = {
+	.dev_id		= "ct:clcd",
+};
+
 static struct platform_device osc1_device = {
 	.name		= "vexpress-osc",
 	.id		= 1,
@@ -135,6 +139,7 @@ static struct platform_device osc1_device = {
 	.resource	= (struct resource []) {
 		VEXPRESS_RES_FUNC(0xf, 1),
 	},
+	.dev.platform_data = &osc1_lookup,
 };
 
 static void __init ct_ca9x4_init(void)
@@ -155,10 +160,7 @@ static void __init ct_ca9x4_init(void)
 		amba_device_register(ct_ca9x4_amba_devs[i], &iomem_resource);
 
 	platform_device_register(&pmu_device);
-	platform_device_register(&osc1_device);
-
-	WARN_ON(clk_register_clkdev(vexpress_osc_setup(&osc1_device.dev),
-			NULL, "ct:clcd"));
+	vexpress_sysreg_config_device_register(&osc1_device);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 4f8b8cb17ff5..ac95220a5019 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -340,11 +340,6 @@ static void __init v2m_init(void)
 	regulator_register_fixed(0, v2m_eth_supplies,
 			ARRAY_SIZE(v2m_eth_supplies));
 
-	platform_device_register(&v2m_muxfpga_device);
-	platform_device_register(&v2m_shutdown_device);
-	platform_device_register(&v2m_reboot_device);
-	platform_device_register(&v2m_dvimode_device);
-
 	platform_device_register(&v2m_sysreg_device);
 	platform_device_register(&v2m_pcie_i2c_device);
 	platform_device_register(&v2m_ddc_i2c_device);
@@ -356,6 +351,11 @@ static void __init v2m_init(void)
 	for (i = 0; i < ARRAY_SIZE(v2m_amba_devs); i++)
 		amba_device_register(v2m_amba_devs[i], &iomem_resource);
 
+	vexpress_sysreg_config_device_register(&v2m_muxfpga_device);
+	vexpress_sysreg_config_device_register(&v2m_shutdown_device);
+	vexpress_sysreg_config_device_register(&v2m_reboot_device);
+	vexpress_sysreg_config_device_register(&v2m_dvimode_device);
+
 	ct_desc->init_tile();
 }
 
@@ -423,17 +423,11 @@ void __init v2m_dt_init_early(void)
 	versatile_sched_clock_init(vexpress_get_24mhz_clock_base(), 24000000);
 }
 
-static const struct of_device_id v2m_dt_bus_match[] __initconst = {
-	{ .compatible = "simple-bus", },
-	{ .compatible = "arm,amba-bus", },
-	{ .compatible = "arm,vexpress,config-bus", },
-	{}
-};
 
 static void __init v2m_dt_init(void)
 {
 	l2x0_of_init(0x00400000, 0xfe0fffff);
-	of_platform_populate(NULL, v2m_dt_bus_match, NULL, NULL);
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
 static const char * const v2m_dt_match[] __initconst = {
diff --git a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
index 2f2ecd217363..ac2cb2418025 100644
--- a/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
+++ b/arch/arm64/boot/dts/rtsm_ve-motherboard.dtsi
@@ -200,7 +200,7 @@
 		};
 
 		mcc {
-			compatible = "arm,vexpress,config-bus", "simple-bus";
+			compatible = "arm,vexpress,config-bus";
 			arm,vexpress,config-bridge = <&v2m_sysreg>;
 
 			v2m_oscclk1: osc@1 {
diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig
index 552373c4e362..f24e79dd51bf 100644
--- a/drivers/bus/Kconfig
+++ b/drivers/bus/Kconfig
@@ -41,4 +41,13 @@ config ARM_CCI
 	help
 	  Driver supporting the CCI cache coherent interconnect for ARM
 	  platforms.
+
+config VEXPRESS_CONFIG
+	bool "Versatile Express configuration bus"
+	default y if ARCH_VEXPRESS
+	depends on ARM || ARM64
+	select REGMAP
+	help
+	  Platform configuration infrastructure for the ARM Ltd.
+	  Versatile Express.
 endmenu
diff --git a/drivers/bus/Makefile b/drivers/bus/Makefile
index 8947bdd0de8b..f095aa771de9 100644
--- a/drivers/bus/Makefile
+++ b/drivers/bus/Makefile
@@ -10,3 +10,5 @@ obj-$(CONFIG_OMAP_OCP2SCP)	+= omap-ocp2scp.o
 obj-$(CONFIG_OMAP_INTERCONNECT)	+= omap_l3_smx.o omap_l3_noc.o
 # CCI cache coherent interconnect for ARM platforms
 obj-$(CONFIG_ARM_CCI)		+= arm-cci.o
+
+obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o
diff --git a/drivers/bus/vexpress-config.c b/drivers/bus/vexpress-config.c
new file mode 100644
index 000000000000..27a07dfcd626
--- /dev/null
+++ b/drivers/bus/vexpress-config.c
@@ -0,0 +1,202 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2014 ARM Limited
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/vexpress.h>
+
+
+struct vexpress_config_bridge {
+	struct vexpress_config_bridge_ops *ops;
+	void *context;
+};
+
+
+static DEFINE_MUTEX(vexpress_config_mutex);
+static struct class *vexpress_config_class;
+static u32 vexpress_config_site_master = VEXPRESS_SITE_MASTER;
+
+
+void vexpress_config_set_master(u32 site)
+{
+	vexpress_config_site_master = site;
+}
+
+u32 vexpress_config_get_master(void)
+{
+	return vexpress_config_site_master;
+}
+
+void vexpress_config_lock(void *arg)
+{
+	mutex_lock(&vexpress_config_mutex);
+}
+
+void vexpress_config_unlock(void *arg)
+{
+	mutex_unlock(&vexpress_config_mutex);
+}
+
+
+static void vexpress_config_find_prop(struct device_node *node,
+		const char *name, u32 *val)
+{
+	/* Default value */
+	*val = 0;
+
+	of_node_get(node);
+	while (node) {
+		if (of_property_read_u32(node, name, val) == 0) {
+			of_node_put(node);
+			return;
+		}
+		node = of_get_next_parent(node);
+	}
+}
+
+int vexpress_config_get_topo(struct device_node *node, u32 *site,
+		u32 *position, u32 *dcc)
+{
+	vexpress_config_find_prop(node, "arm,vexpress,site", site);
+	if (*site == VEXPRESS_SITE_MASTER)
+		*site = vexpress_config_site_master;
+	if (WARN_ON(vexpress_config_site_master == VEXPRESS_SITE_MASTER))
+		return -EINVAL;
+	vexpress_config_find_prop(node, "arm,vexpress,position", position);
+	vexpress_config_find_prop(node, "arm,vexpress,dcc", dcc);
+
+	return 0;
+}
+
+
+static void vexpress_config_devres_release(struct device *dev, void *res)
+{
+	struct vexpress_config_bridge *bridge = dev_get_drvdata(dev->parent);
+	struct regmap *regmap = res;
+
+	bridge->ops->regmap_exit(regmap, bridge->context);
+}
+
+struct regmap *devm_regmap_init_vexpress_config(struct device *dev)
+{
+	struct vexpress_config_bridge *bridge;
+	struct regmap *regmap;
+	struct regmap **res;
+
+	if (WARN_ON(dev->parent->class != vexpress_config_class))
+		return ERR_PTR(-ENODEV);
+
+	bridge = dev_get_drvdata(dev->parent);
+	if (WARN_ON(!bridge))
+		return ERR_PTR(-EINVAL);
+
+	res = devres_alloc(vexpress_config_devres_release, sizeof(*res),
+			GFP_KERNEL);
+	if (!res)
+		return ERR_PTR(-ENOMEM);
+
+	regmap = bridge->ops->regmap_init(dev, bridge->context);
+	if (IS_ERR(regmap)) {
+		devres_free(res);
+		return regmap;
+	}
+
+	*res = regmap;
+	devres_add(dev, res);
+
+	return regmap;
+}
+
+
+struct device *vexpress_config_bridge_register(struct device *parent,
+		struct vexpress_config_bridge_ops *ops, void *context)
+{
+	struct device *dev;
+	struct vexpress_config_bridge *bridge;
+
+	if (!vexpress_config_class) {
+		vexpress_config_class = class_create(THIS_MODULE,
+				"vexpress-config");
+		if (IS_ERR(vexpress_config_class))
+			return (void *)vexpress_config_class;
+	}
+
+	dev = device_create(vexpress_config_class, parent, 0,
+			NULL, "%s.bridge", dev_name(parent));
+
+	if (IS_ERR(dev))
+		return dev;
+
+	bridge = devm_kmalloc(dev, sizeof(*bridge), GFP_KERNEL);
+	if (!bridge) {
+		put_device(dev);
+		device_unregister(dev);
+		return ERR_PTR(-ENOMEM);
+	}
+	bridge->ops = ops;
+	bridge->context = context;
+
+	dev_set_drvdata(dev, bridge);
+
+	dev_dbg(parent, "Registered bridge '%s', parent node %p\n",
+			dev_name(dev), parent->of_node);
+
+	return dev;
+}
+
+
+static int vexpress_config_node_match(struct device *dev, const void *data)
+{
+	const struct device_node *node = data;
+
+	dev_dbg(dev, "Parent node %p, looking for %p\n",
+			dev->parent->of_node, node);
+
+	return dev->parent->of_node == node;
+}
+
+static int vexpress_config_populate(struct device_node *node)
+{
+	struct device_node *bridge;
+	struct device *parent;
+
+	bridge = of_parse_phandle(node, "arm,vexpress,config-bridge", 0);
+	if (!bridge)
+		return -EINVAL;
+
+	parent = class_find_device(vexpress_config_class, NULL, bridge,
+			vexpress_config_node_match);
+	if (WARN_ON(!parent))
+		return -ENODEV;
+
+	return of_platform_populate(node, NULL, NULL, parent);
+}
+
+static int __init vexpress_config_init(void)
+{
+	int err = 0;
+	struct device_node *node;
+
+	/* Need the config devices early, before the "normal" devices... */
+	for_each_compatible_node(node, NULL, "arm,vexpress,config-bus") {
+		err = vexpress_config_populate(node);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+postcore_initcall(vexpress_config_init);
+
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 422391242b39..529a59c0fbfa 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -11,8 +11,6 @@
  * Copyright (C) 2012 ARM Limited
  */
 
-#define pr_fmt(fmt) "vexpress-osc: " fmt
-
 #include <linux/clkdev.h>
 #include <linux/clk-provider.h>
 #include <linux/err.h>
@@ -22,7 +20,7 @@
 #include <linux/vexpress.h>
 
 struct vexpress_osc {
-	struct vexpress_config_func *func;
+	struct regmap *reg;
 	struct clk_hw hw;
 	unsigned long rate_min;
 	unsigned long rate_max;
@@ -36,7 +34,7 @@ static unsigned long vexpress_osc_recalc_rate(struct clk_hw *hw,
 	struct vexpress_osc *osc = to_vexpress_osc(hw);
 	u32 rate;
 
-	vexpress_config_read(osc->func, 0, &rate);
+	regmap_read(osc->reg, 0, &rate);
 
 	return rate;
 }
@@ -60,7 +58,7 @@ static int vexpress_osc_set_rate(struct clk_hw *hw, unsigned long rate,
 {
 	struct vexpress_osc *osc = to_vexpress_osc(hw);
 
-	return vexpress_config_write(osc->func, 0, rate);
+	return regmap_write(osc->reg, 0, rate);
 }
 
 static struct clk_ops vexpress_osc_ops = {
@@ -70,58 +68,31 @@ static struct clk_ops vexpress_osc_ops = {
 };
 
 
-struct clk * __init vexpress_osc_setup(struct device *dev)
-{
-	struct clk_init_data init;
-	struct vexpress_osc *osc = kzalloc(sizeof(*osc), GFP_KERNEL);
-
-	if (!osc)
-		return NULL;
-
-	osc->func = vexpress_config_func_get_by_dev(dev);
-	if (!osc->func) {
-		kfree(osc);
-		return NULL;
-	}
-
-	init.name = dev_name(dev);
-	init.ops = &vexpress_osc_ops;
-	init.flags = CLK_IS_ROOT;
-	init.num_parents = 0;
-	osc->hw.init = &init;
-
-	return clk_register(NULL, &osc->hw);
-}
-
-void __init vexpress_osc_of_setup(struct device_node *node)
+static int vexpress_osc_probe(struct platform_device *pdev)
 {
+	struct clk_lookup *cl = pdev->dev.platform_data; /* Non-DT lookup */
 	struct clk_init_data init;
 	struct vexpress_osc *osc;
 	struct clk *clk;
 	u32 range[2];
 
-	vexpress_sysreg_of_early_init();
-
-	osc = kzalloc(sizeof(*osc), GFP_KERNEL);
+	osc = devm_kzalloc(&pdev->dev, sizeof(*osc), GFP_KERNEL);
 	if (!osc)
-		return;
+		return -ENOMEM;
 
-	osc->func = vexpress_config_func_get_by_node(node);
-	if (!osc->func) {
-		pr_err("Failed to obtain config func for node '%s'!\n",
-				node->full_name);
-		goto error;
-	}
+	osc->reg = devm_regmap_init_vexpress_config(&pdev->dev);
+	if (IS_ERR(osc->reg))
+		return PTR_ERR(osc->reg);
 
-	if (of_property_read_u32_array(node, "freq-range", range,
+	if (of_property_read_u32_array(pdev->dev.of_node, "freq-range", range,
 			ARRAY_SIZE(range)) == 0) {
 		osc->rate_min = range[0];
 		osc->rate_max = range[1];
 	}
 
-	of_property_read_string(node, "clock-output-names", &init.name);
-	if (!init.name)
-		init.name = node->full_name;
+	if (of_property_read_string(pdev->dev.of_node, "clock-output-names",
+			&init.name) != 0)
+		init.name = dev_name(&pdev->dev);
 
 	init.ops = &vexpress_osc_ops;
 	init.flags = CLK_IS_ROOT;
@@ -130,20 +101,37 @@ void __init vexpress_osc_of_setup(struct device_node *node)
 	osc->hw.init = &init;
 
 	clk = clk_register(NULL, &osc->hw);
-	if (IS_ERR(clk)) {
-		pr_err("Failed to register clock '%s'!\n", init.name);
-		goto error;
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get, clk);
+
+	/* Only happens for non-DT cases */
+	if (cl) {
+		cl->clk = clk;
+		clkdev_add(cl);
 	}
 
-	of_clk_add_provider(node, of_clk_src_simple_get, clk);
+	dev_dbg(&pdev->dev, "Registered clock '%s'\n", init.name);
+
+	return 0;
+}
 
-	pr_debug("Registered clock '%s'\n", init.name);
+static struct of_device_id vexpress_osc_of_match[] = {
+	{ .compatible = "arm,vexpress-osc", },
+	{}
+};
 
-	return;
+static struct platform_driver vexpress_osc_driver = {
+	.driver	= {
+		.name = "vexpress-osc",
+		.of_match_table = vexpress_osc_of_match,
+	},
+	.probe = vexpress_osc_probe,
+};
 
-error:
-	if (osc->func)
-		vexpress_config_func_put(osc->func);
-	kfree(osc);
+static int __init vexpress_osc_init(void)
+{
+	return platform_driver_register(&vexpress_osc_driver);
 }
-CLK_OF_DECLARE(vexpress_soc, "arm,vexpress-osc", vexpress_osc_of_setup);
+core_initcall(vexpress_osc_init);
diff --git a/drivers/hwmon/vexpress.c b/drivers/hwmon/vexpress.c
index 8242b75d96c8..611f34c7333d 100644
--- a/drivers/hwmon/vexpress.c
+++ b/drivers/hwmon/vexpress.c
@@ -26,7 +26,7 @@
 
 struct vexpress_hwmon_data {
 	struct device *hwmon_dev;
-	struct vexpress_config_func *func;
+	struct regmap *reg;
 	const char *name;
 };
 
@@ -53,7 +53,7 @@ static ssize_t vexpress_hwmon_u32_show(struct device *dev,
 	int err;
 	u32 value;
 
-	err = vexpress_config_read(data->func, 0, &value);
+	err = regmap_read(data->reg, 0, &value);
 	if (err)
 		return err;
 
@@ -68,11 +68,11 @@ static ssize_t vexpress_hwmon_u64_show(struct device *dev,
 	int err;
 	u32 value_hi, value_lo;
 
-	err = vexpress_config_read(data->func, 0, &value_lo);
+	err = regmap_read(data->reg, 0, &value_lo);
 	if (err)
 		return err;
 
-	err = vexpress_config_read(data->func, 1, &value_hi);
+	err = regmap_read(data->reg, 1, &value_hi);
 	if (err)
 		return err;
 
@@ -234,9 +234,9 @@ static int vexpress_hwmon_probe(struct platform_device *pdev)
 	type = match->data;
 	data->name = type->name;
 
-	data->func = vexpress_config_func_get_by_dev(&pdev->dev);
-	if (!data->func)
-		return -ENODEV;
+	data->reg = devm_regmap_init_vexpress_config(&pdev->dev);
+	if (IS_ERR(data->reg))
+		return PTR_ERR(data->reg);
 
 	err = sysfs_create_groups(&pdev->dev.kobj, type->attr_groups);
 	if (err)
@@ -252,7 +252,6 @@ static int vexpress_hwmon_probe(struct platform_device *pdev)
 
 error:
 	sysfs_remove_group(&pdev->dev.kobj, match->data);
-	vexpress_config_func_put(data->func);
 	return err;
 }
 
@@ -266,8 +265,6 @@ static int vexpress_hwmon_remove(struct platform_device *pdev)
 	match = of_match_device(vexpress_hwmon_of_match, &pdev->dev);
 	sysfs_remove_group(&pdev->dev.kobj, match->data);
 
-	vexpress_config_func_put(data->func);
-
 	return 0;
 }
 
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2851275e2656..9ba838eb5131 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -161,7 +161,7 @@ obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
-obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-config.o vexpress-sysreg.o
+obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-sysreg.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
 obj-$(CONFIG_MFD_AS3722)	+= as3722.o
diff --git a/drivers/mfd/vexpress-config.c b/drivers/mfd/vexpress-config.c
deleted file mode 100644
index d0db89d13e01..000000000000
--- a/drivers/mfd/vexpress-config.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2012 ARM Limited
- */
-
-#define pr_fmt(fmt) "vexpress-config: " fmt
-
-#include <linux/bitops.h>
-#include <linux/completion.h>
-#include <linux/export.h>
-#include <linux/list.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/vexpress.h>
-
-
-#define VEXPRESS_CONFIG_MAX_BRIDGES 2
-
-static struct vexpress_config_bridge {
-	struct device_node *node;
-	struct vexpress_config_bridge_info *info;
-	struct list_head transactions;
-	spinlock_t transactions_lock;
-} vexpress_config_bridges[VEXPRESS_CONFIG_MAX_BRIDGES];
-
-static DECLARE_BITMAP(vexpress_config_bridges_map,
-		ARRAY_SIZE(vexpress_config_bridges));
-static DEFINE_MUTEX(vexpress_config_bridges_mutex);
-
-struct vexpress_config_bridge *vexpress_config_bridge_register(
-		struct device_node *node,
-		struct vexpress_config_bridge_info *info)
-{
-	struct vexpress_config_bridge *bridge;
-	int i;
-
-	pr_debug("Registering bridge '%s'\n", info->name);
-
-	mutex_lock(&vexpress_config_bridges_mutex);
-	i = find_first_zero_bit(vexpress_config_bridges_map,
-			ARRAY_SIZE(vexpress_config_bridges));
-	if (i >= ARRAY_SIZE(vexpress_config_bridges)) {
-		pr_err("Can't register more bridges!\n");
-		mutex_unlock(&vexpress_config_bridges_mutex);
-		return NULL;
-	}
-	__set_bit(i, vexpress_config_bridges_map);
-	bridge = &vexpress_config_bridges[i];
-
-	bridge->node = node;
-	bridge->info = info;
-	INIT_LIST_HEAD(&bridge->transactions);
-	spin_lock_init(&bridge->transactions_lock);
-
-	mutex_unlock(&vexpress_config_bridges_mutex);
-
-	return bridge;
-}
-EXPORT_SYMBOL(vexpress_config_bridge_register);
-
-void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge)
-{
-	struct vexpress_config_bridge __bridge = *bridge;
-	int i;
-
-	mutex_lock(&vexpress_config_bridges_mutex);
-	for (i = 0; i < ARRAY_SIZE(vexpress_config_bridges); i++)
-		if (&vexpress_config_bridges[i] == bridge)
-			__clear_bit(i, vexpress_config_bridges_map);
-	mutex_unlock(&vexpress_config_bridges_mutex);
-
-	WARN_ON(!list_empty(&__bridge.transactions));
-	while (!list_empty(&__bridge.transactions))
-		cpu_relax();
-}
-EXPORT_SYMBOL(vexpress_config_bridge_unregister);
-
-
-struct vexpress_config_func {
-	struct vexpress_config_bridge *bridge;
-	void *func;
-};
-
-struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
-		struct device_node *node)
-{
-	struct device_node *bridge_node;
-	struct vexpress_config_func *func;
-	int i;
-
-	if (WARN_ON(dev && node && dev->of_node != node))
-		return NULL;
-	if (dev && !node)
-		node = dev->of_node;
-
-	func = kzalloc(sizeof(*func), GFP_KERNEL);
-	if (!func)
-		return NULL;
-
-	bridge_node = of_node_get(node);
-	while (bridge_node) {
-		const __be32 *prop = of_get_property(bridge_node,
-				"arm,vexpress,config-bridge", NULL);
-
-		if (prop) {
-			bridge_node = of_find_node_by_phandle(
-					be32_to_cpup(prop));
-			break;
-		}
-
-		bridge_node = of_get_next_parent(bridge_node);
-	}
-
-	mutex_lock(&vexpress_config_bridges_mutex);
-	for (i = 0; i < ARRAY_SIZE(vexpress_config_bridges); i++) {
-		struct vexpress_config_bridge *bridge =
-				&vexpress_config_bridges[i];
-
-		if (test_bit(i, vexpress_config_bridges_map) &&
-				bridge->node == bridge_node) {
-			func->bridge = bridge;
-			func->func = bridge->info->func_get(dev, node);
-			break;
-		}
-	}
-	mutex_unlock(&vexpress_config_bridges_mutex);
-
-	if (!func->func) {
-		of_node_put(node);
-		kfree(func);
-		return NULL;
-	}
-
-	return func;
-}
-EXPORT_SYMBOL(__vexpress_config_func_get);
-
-void vexpress_config_func_put(struct vexpress_config_func *func)
-{
-	func->bridge->info->func_put(func->func);
-	of_node_put(func->bridge->node);
-	kfree(func);
-}
-EXPORT_SYMBOL(vexpress_config_func_put);
-
-struct vexpress_config_trans {
-	struct vexpress_config_func *func;
-	int offset;
-	bool write;
-	u32 *data;
-	int status;
-	struct completion completion;
-	struct list_head list;
-};
-
-static void vexpress_config_dump_trans(const char *what,
-		struct vexpress_config_trans *trans)
-{
-	pr_debug("%s %s trans %p func 0x%p offset %d data 0x%x status %d\n",
-			what, trans->write ? "write" : "read", trans,
-			trans->func->func, trans->offset,
-			trans->data ? *trans->data : 0, trans->status);
-}
-
-static int vexpress_config_schedule(struct vexpress_config_trans *trans)
-{
-	int status;
-	struct vexpress_config_bridge *bridge = trans->func->bridge;
-	unsigned long flags;
-
-	init_completion(&trans->completion);
-	trans->status = -EFAULT;
-
-	spin_lock_irqsave(&bridge->transactions_lock, flags);
-
-	if (list_empty(&bridge->transactions)) {
-		vexpress_config_dump_trans("Executing", trans);
-		status = bridge->info->func_exec(trans->func->func,
-				trans->offset, trans->write, trans->data);
-	} else {
-		vexpress_config_dump_trans("Queuing", trans);
-		status = VEXPRESS_CONFIG_STATUS_WAIT;
-	}
-
-	switch (status) {
-	case VEXPRESS_CONFIG_STATUS_DONE:
-		vexpress_config_dump_trans("Finished", trans);
-		trans->status = status;
-		break;
-	case VEXPRESS_CONFIG_STATUS_WAIT:
-		list_add_tail(&trans->list, &bridge->transactions);
-		break;
-	}
-
-	spin_unlock_irqrestore(&bridge->transactions_lock, flags);
-
-	return status;
-}
-
-void vexpress_config_complete(struct vexpress_config_bridge *bridge,
-		int status)
-{
-	struct vexpress_config_trans *trans;
-	unsigned long flags;
-	const char *message = "Completed";
-
-	spin_lock_irqsave(&bridge->transactions_lock, flags);
-
-	trans = list_first_entry(&bridge->transactions,
-			struct vexpress_config_trans, list);
-	trans->status = status;
-
-	do {
-		vexpress_config_dump_trans(message, trans);
-		list_del(&trans->list);
-		complete(&trans->completion);
-
-		if (list_empty(&bridge->transactions))
-			break;
-
-		trans = list_first_entry(&bridge->transactions,
-				struct vexpress_config_trans, list);
-		vexpress_config_dump_trans("Executing pending", trans);
-		trans->status = bridge->info->func_exec(trans->func->func,
-				trans->offset, trans->write, trans->data);
-		message = "Finished pending";
-	} while (trans->status == VEXPRESS_CONFIG_STATUS_DONE);
-
-	spin_unlock_irqrestore(&bridge->transactions_lock, flags);
-}
-EXPORT_SYMBOL(vexpress_config_complete);
-
-int vexpress_config_wait(struct vexpress_config_trans *trans)
-{
-	wait_for_completion(&trans->completion);
-
-	return trans->status;
-}
-EXPORT_SYMBOL(vexpress_config_wait);
-
-int vexpress_config_read(struct vexpress_config_func *func, int offset,
-		u32 *data)
-{
-	struct vexpress_config_trans trans = {
-		.func = func,
-		.offset = offset,
-		.write = false,
-		.data = data,
-		.status = 0,
-	};
-	int status = vexpress_config_schedule(&trans);
-
-	if (status == VEXPRESS_CONFIG_STATUS_WAIT)
-		status = vexpress_config_wait(&trans);
-
-	return status;
-}
-EXPORT_SYMBOL(vexpress_config_read);
-
-int vexpress_config_write(struct vexpress_config_func *func, int offset,
-		u32 data)
-{
-	struct vexpress_config_trans trans = {
-		.func = func,
-		.offset = offset,
-		.write = true,
-		.data = &data,
-		.status = 0,
-	};
-	int status = vexpress_config_schedule(&trans);
-
-	if (status == VEXPRESS_CONFIG_STATUS_WAIT)
-		status = vexpress_config_wait(&trans);
-
-	return status;
-}
-EXPORT_SYMBOL(vexpress_config_write);
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 35281e804e7e..b4138a7168db 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -16,8 +16,10 @@
 #include <linux/io.h>
 #include <linux/leds.h>
 #include <linux/of_address.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/timer.h>
@@ -72,9 +74,18 @@
 
 static void __iomem *vexpress_sysreg_base;
 static struct device *vexpress_sysreg_dev;
-static int vexpress_master_site;
+static LIST_HEAD(vexpress_sysreg_config_funcs);
+static struct device *vexpress_sysreg_config_bridge;
 
 
+static int vexpress_sysreg_get_master(void)
+{
+	if (readl(vexpress_sysreg_base + SYS_MISC) & SYS_MISC_MASTERSITE)
+		return VEXPRESS_SITE_DB2;
+
+	return VEXPRESS_SITE_DB1;
+}
+
 void vexpress_flags_set(u32 data)
 {
 	writel(~0, vexpress_sysreg_base + SYS_FLAGSCLR);
@@ -84,7 +95,7 @@ void vexpress_flags_set(u32 data)
 u32 vexpress_get_procid(int site)
 {
 	if (site == VEXPRESS_SITE_MASTER)
-		site = vexpress_master_site;
+		site = vexpress_sysreg_get_master();
 
 	return readl(vexpress_sysreg_base + (site == VEXPRESS_SITE_DB1 ?
 			SYS_PROCID0 : SYS_PROCID1));
@@ -114,130 +125,33 @@ void __iomem *vexpress_get_24mhz_clock_base(void)
 }
 
 
-static void vexpress_sysreg_find_prop(struct device_node *node,
-		const char *name, u32 *val)
-{
-	of_node_get(node);
-	while (node) {
-		if (of_property_read_u32(node, name, val) == 0) {
-			of_node_put(node);
-			return;
-		}
-		node = of_get_next_parent(node);
-	}
-}
-
-unsigned __vexpress_get_site(struct device *dev, struct device_node *node)
-{
-	u32 site = 0;
-
-	WARN_ON(dev && node && dev->of_node != node);
-	if (dev && !node)
-		node = dev->of_node;
-
-	if (node) {
-		vexpress_sysreg_find_prop(node, "arm,vexpress,site", &site);
-	} else if (dev && dev->bus == &platform_bus_type) {
-		struct platform_device *pdev = to_platform_device(dev);
-
-		if (pdev->num_resources == 1 &&
-				pdev->resource[0].flags == IORESOURCE_BUS)
-			site = pdev->resource[0].start;
-	} else if (dev && strncmp(dev_name(dev), "ct:", 3) == 0) {
-		site = VEXPRESS_SITE_MASTER;
-	}
-
-	if (site == VEXPRESS_SITE_MASTER)
-		site = vexpress_master_site;
-
-	return site;
-}
-
-
 struct vexpress_sysreg_config_func {
-	u32 template;
-	u32 device;
+	struct list_head list;
+	struct regmap *regmap;
+	int num_templates;
+	u32 template[0]; /* Keep this last */
 };
 
-static struct vexpress_config_bridge *vexpress_sysreg_config_bridge;
-static struct timer_list vexpress_sysreg_config_timer;
-static u32 *vexpress_sysreg_config_data;
-static int vexpress_sysreg_config_tries;
-
-static void *vexpress_sysreg_config_func_get(struct device *dev,
-		struct device_node *node)
+static int vexpress_sysreg_config_exec(struct vexpress_sysreg_config_func *func,
+		int index, bool write, u32 *data)
 {
-	struct vexpress_sysreg_config_func *config_func;
-	u32 site = 0;
-	u32 position = 0;
-	u32 dcc = 0;
-	u32 func_device[2];
-	int err = -EFAULT;
-
-	if (node) {
-		of_node_get(node);
-		vexpress_sysreg_find_prop(node, "arm,vexpress,site", &site);
-		vexpress_sysreg_find_prop(node, "arm,vexpress,position",
-				&position);
-		vexpress_sysreg_find_prop(node, "arm,vexpress,dcc", &dcc);
-		err = of_property_read_u32_array(node,
-				"arm,vexpress-sysreg,func", func_device,
-				ARRAY_SIZE(func_device));
-		of_node_put(node);
-	} else if (dev && dev->bus == &platform_bus_type) {
-		struct platform_device *pdev = to_platform_device(dev);
-
-		if (pdev->num_resources == 1 &&
-				pdev->resource[0].flags == IORESOURCE_BUS) {
-			site = pdev->resource[0].start;
-			func_device[0] = pdev->resource[0].end;
-			func_device[1] = pdev->id;
-			err = 0;
-		}
-	}
-	if (err)
-		return NULL;
-
-	config_func = kzalloc(sizeof(*config_func), GFP_KERNEL);
-	if (!config_func)
-		return NULL;
-
-	config_func->template = SYS_CFGCTRL_DCC(dcc);
-	config_func->template |= SYS_CFGCTRL_FUNC(func_device[0]);
-	config_func->template |= SYS_CFGCTRL_SITE(site == VEXPRESS_SITE_MASTER ?
-			vexpress_master_site : site);
-	config_func->template |= SYS_CFGCTRL_POSITION(position);
-	config_func->device |= func_device[1];
-
-	dev_dbg(vexpress_sysreg_dev, "func 0x%p = 0x%x, %d\n", config_func,
-			config_func->template, config_func->device);
-
-	return config_func;
-}
-
-static void vexpress_sysreg_config_func_put(void *func)
-{
-	kfree(func);
-}
-
-static int vexpress_sysreg_config_func_exec(void *func, int offset,
-		bool write, u32 *data)
-{
-	int status;
-	struct vexpress_sysreg_config_func *config_func = func;
-	u32 command;
+	u32 command, status;
+	int tries;
+	long timeout;
 
 	if (WARN_ON(!vexpress_sysreg_base))
 		return -ENOENT;
 
+	if (WARN_ON(index > func->num_templates))
+		return -EINVAL;
+
 	command = readl(vexpress_sysreg_base + SYS_CFGCTRL);
 	if (WARN_ON(command & SYS_CFGCTRL_START))
 		return -EBUSY;
 
-	command = SYS_CFGCTRL_START;
+	command = func->template[index];
+	command |= SYS_CFGCTRL_START;
 	command |= write ? SYS_CFGCTRL_WRITE : 0;
-	command |= config_func->template;
-	command |= SYS_CFGCTRL_DEVICE(config_func->device + offset);
 
 	/* Use a canary for reads */
 	if (!write)
@@ -250,90 +164,190 @@ static int vexpress_sysreg_config_func_exec(void *func, int offset,
 	writel(command, vexpress_sysreg_base + SYS_CFGCTRL);
 	mb();
 
-	if (vexpress_sysreg_dev) {
-		/* Schedule completion check */
-		if (!write)
-			vexpress_sysreg_config_data = data;
-		vexpress_sysreg_config_tries = 100;
-		mod_timer(&vexpress_sysreg_config_timer,
-				jiffies + usecs_to_jiffies(100));
-		status = VEXPRESS_CONFIG_STATUS_WAIT;
-	} else {
-		/* Early execution, no timer available, have to spin */
-		u32 cfgstat;
+	/* The operation can take ages... Go to sleep, 100us initially */
+	tries = 100;
+	timeout = 100;
+	do {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(usecs_to_jiffies(timeout));
+		if (signal_pending(current))
+			return -EINTR;
+
+		status = readl(vexpress_sysreg_base + SYS_CFGSTAT);
+		if (status & SYS_CFGSTAT_ERR)
+			return -EFAULT;
+
+		if (timeout > 20)
+			timeout -= 20;
+	} while (--tries && !(status & SYS_CFGSTAT_COMPLETE));
+	if (WARN_ON_ONCE(!tries))
+		return -ETIMEDOUT;
+
+	if (!write) {
+		*data = readl(vexpress_sysreg_base + SYS_CFGDATA);
+		dev_dbg(vexpress_sysreg_dev, "func %p, read data %x\n",
+				func, *data);
+	}
 
-		do {
-			cpu_relax();
-			cfgstat = readl(vexpress_sysreg_base + SYS_CFGSTAT);
-		} while (!cfgstat);
+	return 0;
+}
 
-		if (!write && (cfgstat & SYS_CFGSTAT_COMPLETE))
-			*data = readl(vexpress_sysreg_base + SYS_CFGDATA);
-		status = VEXPRESS_CONFIG_STATUS_DONE;
+static int vexpress_sysreg_config_read(void *context, unsigned int index,
+		unsigned int *val)
+{
+	struct vexpress_sysreg_config_func *func = context;
 
-		if (cfgstat & SYS_CFGSTAT_ERR)
-			status = -EINVAL;
-	}
+	return vexpress_sysreg_config_exec(func, index, false, val);
+}
 
-	return status;
+static int vexpress_sysreg_config_write(void *context, unsigned int index,
+		unsigned int val)
+{
+	struct vexpress_sysreg_config_func *func = context;
+
+	return vexpress_sysreg_config_exec(func, index, true, &val);
 }
 
-struct vexpress_config_bridge_info vexpress_sysreg_config_bridge_info = {
-	.name = "vexpress-sysreg",
-	.func_get = vexpress_sysreg_config_func_get,
-	.func_put = vexpress_sysreg_config_func_put,
-	.func_exec = vexpress_sysreg_config_func_exec,
+struct regmap_config vexpress_sysreg_regmap_config = {
+	.lock = vexpress_config_lock,
+	.unlock = vexpress_config_unlock,
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_read = vexpress_sysreg_config_read,
+	.reg_write = vexpress_sysreg_config_write,
+	.reg_format_endian = REGMAP_ENDIAN_LITTLE,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
 };
 
-static void vexpress_sysreg_config_complete(unsigned long data)
+static struct regmap *vexpress_sysreg_config_regmap_init(struct device *dev,
+		void *context)
 {
-	int status = VEXPRESS_CONFIG_STATUS_DONE;
-	u32 cfgstat = readl(vexpress_sysreg_base + SYS_CFGSTAT);
-
-	if (cfgstat & SYS_CFGSTAT_ERR)
-		status = -EINVAL;
-	if (!vexpress_sysreg_config_tries--)
-		status = -ETIMEDOUT;
-
-	if (status < 0) {
-		dev_err(vexpress_sysreg_dev, "error %d\n", status);
-	} else if (!(cfgstat & SYS_CFGSTAT_COMPLETE)) {
-		mod_timer(&vexpress_sysreg_config_timer,
-				jiffies + usecs_to_jiffies(50));
-		return;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct vexpress_sysreg_config_func *func;
+	struct property *prop;
+	const __be32 *val = NULL;
+	__be32 energy_quirk[4];
+	int num;
+	u32 site, position, dcc;
+	int err;
+	int i;
+
+	if (dev->of_node) {
+		err = vexpress_config_get_topo(dev->of_node, &site, &position,
+				&dcc);
+		if (err)
+			return ERR_PTR(err);
+
+		prop = of_find_property(dev->of_node,
+				"arm,vexpress-sysreg,func", NULL);
+		if (!prop)
+			return ERR_PTR(-EINVAL);
+
+		num = prop->length / sizeof(u32) / 2;
+		val = prop->value;
+	} else {
+		if (pdev->num_resources != 1 ||
+				pdev->resource[0].flags != IORESOURCE_BUS)
+			return ERR_PTR(-EFAULT);
+
+		site = pdev->resource[0].start;
+		if (site == VEXPRESS_SITE_MASTER)
+			site = vexpress_sysreg_get_master();
+		position = 0;
+		dcc = 0;
+		num = 1;
 	}
 
-	if (vexpress_sysreg_config_data) {
-		*vexpress_sysreg_config_data = readl(vexpress_sysreg_base +
-				SYS_CFGDATA);
-		dev_dbg(vexpress_sysreg_dev, "read data %x\n",
-				*vexpress_sysreg_config_data);
-		vexpress_sysreg_config_data = NULL;
+	/*
+	 * "arm,vexpress-energy" function used to be described
+	 * by its first device only, now it requires both
+	 */
+	if (num == 1 && of_device_is_compatible(dev->of_node,
+			"arm,vexpress-energy")) {
+		num = 2;
+		energy_quirk[0] = *val;
+		energy_quirk[2] = *val++;
+		energy_quirk[1] = *val;
+		energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1);
+		val = energy_quirk;
 	}
 
-	vexpress_config_complete(vexpress_sysreg_config_bridge, status);
-}
+	func = kzalloc(sizeof(*func) + sizeof(*func->template) * num,
+			GFP_KERNEL);
+	if (!func)
+		return NULL;
 
+	func->num_templates = num;
 
-void vexpress_sysreg_setup(struct device_node *node)
-{
-	if (WARN_ON(!vexpress_sysreg_base))
-		return;
+	for (i = 0; i < num; i++) {
+		u32 function, device;
 
-	if (readl(vexpress_sysreg_base + SYS_MISC) & SYS_MISC_MASTERSITE)
-		vexpress_master_site = VEXPRESS_SITE_DB2;
+		if (dev->of_node) {
+			function = be32_to_cpup(val++);
+			device = be32_to_cpup(val++);
+		} else {
+			function = pdev->resource[0].end;
+			device = pdev->id;
+		}
+
+		dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n",
+				func, site, position, dcc,
+				function, device);
+
+		func->template[i] = SYS_CFGCTRL_DCC(dcc);
+		func->template[i] |= SYS_CFGCTRL_SITE(site);
+		func->template[i] |= SYS_CFGCTRL_POSITION(position);
+		func->template[i] |= SYS_CFGCTRL_FUNC(function);
+		func->template[i] |= SYS_CFGCTRL_DEVICE(device);
+	}
+
+	vexpress_sysreg_regmap_config.max_register = num - 1;
+
+	func->regmap = regmap_init(dev, NULL, func,
+			&vexpress_sysreg_regmap_config);
+
+	if (IS_ERR(func->regmap))
+		kfree(func);
 	else
-		vexpress_master_site = VEXPRESS_SITE_DB1;
+		list_add(&func->list, &vexpress_sysreg_config_funcs);
 
-	vexpress_sysreg_config_bridge = vexpress_config_bridge_register(
-			node, &vexpress_sysreg_config_bridge_info);
-	WARN_ON(!vexpress_sysreg_config_bridge);
+	return func->regmap;
+}
+
+static void vexpress_sysreg_config_regmap_exit(struct regmap *regmap,
+		void *context)
+{
+	struct vexpress_sysreg_config_func *func, *tmp;
+
+	regmap_exit(regmap);
+
+	list_for_each_entry_safe(func, tmp, &vexpress_sysreg_config_funcs,
+			list) {
+		if (func->regmap == regmap) {
+			list_del(&vexpress_sysreg_config_funcs);
+			kfree(func);
+			break;
+		}
+	}
+}
+
+static struct vexpress_config_bridge_ops vexpress_sysreg_config_bridge_ops = {
+	.regmap_init = vexpress_sysreg_config_regmap_init,
+	.regmap_exit = vexpress_sysreg_config_regmap_exit,
+};
+
+int vexpress_sysreg_config_device_register(struct platform_device *pdev)
+{
+	pdev->dev.parent = vexpress_sysreg_config_bridge;
+
+	return platform_device_register(pdev);
 }
 
+
 void __init vexpress_sysreg_early_init(void __iomem *base)
 {
 	vexpress_sysreg_base = base;
-	vexpress_sysreg_setup(NULL);
+	vexpress_config_set_master(vexpress_sysreg_get_master());
 }
 
 void __init vexpress_sysreg_of_early_init(void)
@@ -344,10 +358,14 @@ void __init vexpress_sysreg_of_early_init(void)
 		return;
 
 	node = of_find_compatible_node(NULL, NULL, "arm,vexpress-sysreg");
-	if (node) {
-		vexpress_sysreg_base = of_iomap(node, 0);
-		vexpress_sysreg_setup(node);
-	}
+	if (WARN_ON(!node))
+		return;
+
+	vexpress_sysreg_base = of_iomap(node, 0);
+	if (WARN_ON(!vexpress_sysreg_base))
+		return;
+
+	vexpress_config_set_master(vexpress_sysreg_get_master());
 }
 
 
@@ -470,28 +488,22 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	if (!vexpress_sysreg_base) {
+	if (!vexpress_sysreg_base)
 		vexpress_sysreg_base = devm_ioremap(&pdev->dev, res->start,
 				resource_size(res));
-		vexpress_sysreg_setup(pdev->dev.of_node);
-	}
 
 	if (!vexpress_sysreg_base) {
 		dev_err(&pdev->dev, "Failed to obtain base address!\n");
 		return -EFAULT;
 	}
 
-	setup_timer(&vexpress_sysreg_config_timer,
-			vexpress_sysreg_config_complete, 0);
-
+	vexpress_config_set_master(vexpress_sysreg_get_master());
 	vexpress_sysreg_dev = &pdev->dev;
 
 #ifdef CONFIG_GPIOLIB
 	vexpress_sysreg_gpio_chip.dev = &pdev->dev;
 	err = gpiochip_add(&vexpress_sysreg_gpio_chip);
 	if (err) {
-		vexpress_config_bridge_unregister(
-				vexpress_sysreg_config_bridge);
 		dev_err(&pdev->dev, "Failed to register GPIO chip! (%d)\n",
 				err);
 		return err;
@@ -502,6 +514,10 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 			sizeof(vexpress_sysreg_leds_pdata));
 #endif
 
+	vexpress_sysreg_config_bridge = vexpress_config_bridge_register(
+			&pdev->dev, &vexpress_sysreg_config_bridge_ops, NULL);
+	WARN_ON(!vexpress_sysreg_config_bridge);
+
 	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
 
 	return 0;
@@ -522,7 +538,12 @@ static struct platform_driver vexpress_sysreg_driver = {
 
 static int __init vexpress_sysreg_init(void)
 {
-	vexpress_sysreg_of_early_init();
+	struct device_node *node;
+
+	/* Need the sysreg early, before any other device... */
+	for_each_matching_node(node, vexpress_sysreg_match)
+		of_platform_device_create(node, NULL, NULL);
+
 	return platform_driver_register(&vexpress_sysreg_driver);
 }
 core_initcall(vexpress_sysreg_init);
diff --git a/drivers/power/reset/vexpress-poweroff.c b/drivers/power/reset/vexpress-poweroff.c
index b95cf71ed695..4dc102e2b230 100644
--- a/drivers/power/reset/vexpress-poweroff.c
+++ b/drivers/power/reset/vexpress-poweroff.c
@@ -23,10 +23,10 @@
 static void vexpress_reset_do(struct device *dev, const char *what)
 {
 	int err = -ENOENT;
-	struct vexpress_config_func *func = dev_get_drvdata(dev);
+	struct regmap *reg = dev_get_drvdata(dev);
 
-	if (func) {
-		err = vexpress_config_write(func, 0, 0);
+	if (reg) {
+		err = regmap_write(reg, 0, 0);
 		if (!err)
 			mdelay(1000);
 	}
@@ -91,17 +91,17 @@ static int vexpress_reset_probe(struct platform_device *pdev)
 	enum vexpress_reset_func func;
 	const struct of_device_id *match =
 			of_match_device(vexpress_reset_of_match, &pdev->dev);
-	struct vexpress_config_func *config_func;
+	struct regmap *regmap;
 
 	if (match)
 		func = (enum vexpress_reset_func)match->data;
 	else
 		func = pdev->id_entry->driver_data;
 
-	config_func = vexpress_config_func_get_by_dev(&pdev->dev);
-	if (!config_func)
-		return -EINVAL;
-	dev_set_drvdata(&pdev->dev, config_func);
+	regmap = devm_regmap_init_vexpress_config(&pdev->dev);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+	dev_set_drvdata(&pdev->dev, regmap);
 
 	switch (func) {
 	case FUNC_SHUTDOWN:
diff --git a/drivers/regulator/vexpress.c b/drivers/regulator/vexpress.c
index f3ae28a7e663..2863428813e4 100644
--- a/drivers/regulator/vexpress.c
+++ b/drivers/regulator/vexpress.c
@@ -26,14 +26,14 @@
 struct vexpress_regulator {
 	struct regulator_desc desc;
 	struct regulator_dev *regdev;
-	struct vexpress_config_func *func;
+	struct regmap *regmap;
 };
 
 static int vexpress_regulator_get_voltage(struct regulator_dev *regdev)
 {
 	struct vexpress_regulator *reg = rdev_get_drvdata(regdev);
 	u32 uV;
-	int err = vexpress_config_read(reg->func, 0, &uV);
+	int err = regmap_read(reg->regmap, 0, &uV);
 
 	return err ? err : uV;
 }
@@ -43,7 +43,7 @@ static int vexpress_regulator_set_voltage(struct regulator_dev *regdev,
 {
 	struct vexpress_regulator *reg = rdev_get_drvdata(regdev);
 
-	return vexpress_config_write(reg->func, 0, min_uV);
+	return regmap_write(reg->regmap, 0, min_uV);
 }
 
 static struct regulator_ops vexpress_regulator_ops_ro = {
@@ -57,22 +57,17 @@ static struct regulator_ops vexpress_regulator_ops = {
 
 static int vexpress_regulator_probe(struct platform_device *pdev)
 {
-	int err;
 	struct vexpress_regulator *reg;
 	struct regulator_init_data *init_data;
 	struct regulator_config config = { };
 
 	reg = devm_kzalloc(&pdev->dev, sizeof(*reg), GFP_KERNEL);
-	if (!reg) {
-		err = -ENOMEM;
-		goto error_kzalloc;
-	}
+	if (!reg)
+		return -ENOMEM;
 
-	reg->func = vexpress_config_func_get_by_dev(&pdev->dev);
-	if (!reg->func) {
-		err = -ENXIO;
-		goto error_get_func;
-	}
+	reg->regmap = devm_regmap_init_vexpress_config(&pdev->dev);
+	if (IS_ERR(reg->regmap))
+		return PTR_ERR(reg->regmap);
 
 	reg->desc.name = dev_name(&pdev->dev);
 	reg->desc.type = REGULATOR_VOLTAGE;
@@ -80,10 +75,8 @@ static int vexpress_regulator_probe(struct platform_device *pdev)
 	reg->desc.continuous_voltage_range = true;
 
 	init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node);
-	if (!init_data) {
-		err = -EINVAL;
-		goto error_get_regulator_init_data;
-	}
+	if (!init_data)
+		return -EINVAL;
 
 	init_data->constraints.apply_uV = 0;
 	if (init_data->constraints.min_uV && init_data->constraints.max_uV)
@@ -97,29 +90,11 @@ static int vexpress_regulator_probe(struct platform_device *pdev)
 	config.of_node = pdev->dev.of_node;
 
 	reg->regdev = devm_regulator_register(&pdev->dev, &reg->desc, &config);
-	if (IS_ERR(reg->regdev)) {
-		err = PTR_ERR(reg->regdev);
-		goto error_regulator_register;
-	}
+	if (IS_ERR(reg->regdev))
+		return PTR_ERR(reg->regdev);
 
 	platform_set_drvdata(pdev, reg);
 
-	return 0;
-
-error_regulator_register:
-error_get_regulator_init_data:
-	vexpress_config_func_put(reg->func);
-error_get_func:
-error_kzalloc:
-	return err;
-}
-
-static int vexpress_regulator_remove(struct platform_device *pdev)
-{
-	struct vexpress_regulator *reg = platform_get_drvdata(pdev);
-
-	vexpress_config_func_put(reg->func);
-
 	return 0;
 }
 
@@ -130,7 +105,6 @@ static struct of_device_id vexpress_regulator_of_match[] = {
 
 static struct platform_driver vexpress_regulator_driver = {
 	.probe = vexpress_regulator_probe,
-	.remove = vexpress_regulator_remove,
 	.driver	= {
 		.name = DRVNAME,
 		.owner = THIS_MODULE,
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index 617c01b8f74a..6b206ba6aa0e 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -15,16 +15,15 @@
 #define _LINUX_VEXPRESS_H
 
 #include <linux/device.h>
+#include <linux/platform_device.h>
 #include <linux/reboot.h>
+#include <linux/regmap.h>
 
 #define VEXPRESS_SITE_MB		0
 #define VEXPRESS_SITE_DB1		1
 #define VEXPRESS_SITE_DB2		2
 #define VEXPRESS_SITE_MASTER		0xf
 
-#define VEXPRESS_CONFIG_STATUS_DONE	0
-#define VEXPRESS_CONFIG_STATUS_WAIT	1
-
 #define VEXPRESS_GPIO_MMC_CARDIN	0
 #define VEXPRESS_GPIO_MMC_WPROT		1
 #define VEXPRESS_GPIO_FLASH_WPn		2
@@ -44,63 +43,30 @@
 	.flags = IORESOURCE_BUS,	\
 }
 
-/* Config bridge API */
+/* Config infrastructure */
 
-/**
- * struct vexpress_config_bridge_info - description of the platform
- * configuration infrastructure bridge.
- *
- * @name:	Bridge name
- *
- * @func_get:	Obtains pointer to a configuration function for a given
- *		device or a Device Tree node, to be used with @func_put
- *		and @func_exec. The node pointer should take precedence
- *		over device pointer when both are passed.
- *
- * @func_put:	Tells the bridge that the function will not be used any
- *		more, so all allocated resources can be released.
- *
- * @func_exec:	Executes a configuration function read or write operation.
- *		The offset selects a 32 bit word of the value accessed.
- *		Must return VEXPRESS_CONFIG_STATUS_DONE when operation
- *		is finished immediately, VEXPRESS_CONFIG_STATUS_WAIT when
- *		will be completed in some time or negative value in case
- *		of error.
- */
-struct vexpress_config_bridge_info {
-	const char *name;
-	void *(*func_get)(struct device *dev, struct device_node *node);
-	void (*func_put)(void *func);
-	int (*func_exec)(void *func, int offset, bool write, u32 *data);
-};
+void vexpress_config_set_master(u32 site);
+u32 vexpress_config_get_master(void);
 
-struct vexpress_config_bridge;
+void vexpress_config_lock(void *arg);
+void vexpress_config_unlock(void *arg);
 
-struct vexpress_config_bridge *vexpress_config_bridge_register(
-		struct device_node *node,
-		struct vexpress_config_bridge_info *info);
-void vexpress_config_bridge_unregister(struct vexpress_config_bridge *bridge);
+int vexpress_config_get_topo(struct device_node *node, u32 *site,
+		u32 *position, u32 *dcc);
 
-void vexpress_config_complete(struct vexpress_config_bridge *bridge,
-		int status);
+/* Config bridge API */
 
-/* Config function API */
+struct vexpress_config_bridge_ops {
+	struct regmap * (*regmap_init)(struct device *dev, void *context);
+	void (*regmap_exit)(struct regmap *regmap, void *context);
+};
 
-struct vexpress_config_func;
+struct device *vexpress_config_bridge_register(struct device *parent,
+		struct vexpress_config_bridge_ops *ops, void *context);
 
-struct vexpress_config_func *__vexpress_config_func_get(struct device *dev,
-		struct device_node *node);
-#define vexpress_config_func_get_by_dev(dev) \
-		__vexpress_config_func_get(dev, NULL)
-#define vexpress_config_func_get_by_node(node) \
-		__vexpress_config_func_get(NULL, node)
-void vexpress_config_func_put(struct vexpress_config_func *func);
+/* Config regmap API */
 
-/* Both may sleep! */
-int vexpress_config_read(struct vexpress_config_func *func, int offset,
-		u32 *data);
-int vexpress_config_write(struct vexpress_config_func *func, int offset,
-		u32 data);
+struct regmap *devm_regmap_init_vexpress_config(struct device *dev);
 
 /* Platform control */
 
@@ -109,19 +75,12 @@ u32 vexpress_get_hbi(int site);
 void *vexpress_get_24mhz_clock_base(void);
 void vexpress_flags_set(u32 data);
 
-#define vexpress_get_site_by_node(node) __vexpress_get_site(NULL, node)
-#define vexpress_get_site_by_dev(dev) __vexpress_get_site(dev, NULL)
-unsigned __vexpress_get_site(struct device *dev, struct device_node *node);
-
 void vexpress_sysreg_early_init(void __iomem *base);
 void vexpress_sysreg_of_early_init(void);
+int vexpress_sysreg_config_device_register(struct platform_device *pdev);
 
 /* Clocks */
 
-struct clk *vexpress_osc_setup(struct device *dev);
-void vexpress_osc_of_setup(struct device_node *node);
-
 void vexpress_clk_init(void __iomem *sp810_base);
-void vexpress_clk_of_init(void);
 
 #endif
-- 
cgit 


From 29f9b6cf7bff6a118130163c848811e14f8022da Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Wed, 12 Feb 2014 10:47:10 +0000
Subject: mfd: syscon: Add platform data with a regmap config name

Define syscon platform data structure that can be used
to define a regmap config name. This is particularly useful
in the regmap debugfs when there is more than one syscon
device registered, to distinguish the register blocks.

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/syscon.c                 | 4 ++++
 include/linux/platform_data/syscon.h | 8 ++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 include/linux/platform_data/syscon.h

(limited to 'include/linux')

diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c
index dbea55de4397..e2a04bb8bc1e 100644
--- a/drivers/mfd/syscon.c
+++ b/drivers/mfd/syscon.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
+#include <linux/platform_data/syscon.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/mfd/syscon.h>
@@ -119,6 +120,7 @@ static struct regmap_config syscon_regmap_config = {
 static int syscon_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct syscon_platform_data *pdata = dev_get_platdata(dev);
 	struct syscon *syscon;
 	struct resource *res;
 	void __iomem *base;
@@ -136,6 +138,8 @@ static int syscon_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	syscon_regmap_config.max_register = res->end - res->start - 3;
+	if (pdata)
+		syscon_regmap_config.name = pdata->label;
 	syscon->regmap = devm_regmap_init_mmio(dev, base,
 					&syscon_regmap_config);
 	if (IS_ERR(syscon->regmap)) {
diff --git a/include/linux/platform_data/syscon.h b/include/linux/platform_data/syscon.h
new file mode 100644
index 000000000000..2354c6fa3726
--- /dev/null
+++ b/include/linux/platform_data/syscon.h
@@ -0,0 +1,8 @@
+#ifndef PLATFORM_DATA_SYSCON_H
+#define PLATFORM_DATA_SYSCON_H
+
+struct syscon_platform_data {
+	const char *label;
+};
+
+#endif
-- 
cgit 


From 974cc7b93441a0e78f030495436d1be7eb7c208d Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Wed, 23 Apr 2014 10:49:31 +0100
Subject: mfd: vexpress: Define the device as MFD cells

This patch - finally, after over 6 months! :-( - addresses
Samuel's request to split the vexpress-sysreg driver into
smaller portions and define the device in a form of MFD
cells:

* LEDs code has been completely removed and replaced with
  "gpio-leds" nodes in the tree (referencing dedicated
  GPIO subnodes in sysreg - bindings documentation updated);
  this also better fits the reality as some variants of the
  motherboard don't have all the LEDs populated

* syscfg bridge code has been extracted into a separate
  driver (placed in drivers/misc for no better place)

* all the ID & MISC registers are defined as sysconf
  making them available for other drivers should they need
  to use them (and also to the user via /sys/kernel/debug/regmap
  which can be helpful in platform debugging)

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/arm/vexpress-sysreg.txt    |  36 +-
 arch/arm/boot/dts/vexpress-v2m-rs1.dtsi            |  76 ++-
 arch/arm/boot/dts/vexpress-v2m.dtsi                |  76 ++-
 arch/arm/mach-vexpress/ct-ca9x4.c                  |   2 +-
 arch/arm/mach-vexpress/v2m.c                       |  15 +-
 drivers/mfd/Kconfig                                |  15 +-
 drivers/mfd/Makefile                               |   2 +-
 drivers/mfd/vexpress-sysreg.c                      | 533 ++++++---------------
 drivers/misc/Kconfig                               |   9 +
 drivers/misc/Makefile                              |   1 +
 drivers/misc/vexpress-syscfg.c                     | 324 +++++++++++++
 include/linux/vexpress.h                           |  16 +-
 12 files changed, 667 insertions(+), 438 deletions(-)
 create mode 100644 drivers/misc/vexpress-syscfg.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
index 57b423f78995..00318d083c9e 100644
--- a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
@@ -8,6 +8,8 @@ interrupt generation, MMC and NOR Flash control etc.
 Required node properties:
 - compatible value : = "arm,vexpress,sysreg";
 - reg : physical base address and the size of the registers window
+
+Deprecated properties, replaced by GPIO subnodes (see below):
 - gpio-controller : specifies that the node is a GPIO controller
 - #gpio-cells : size of the GPIO specifier, should be 2:
   - first cell is the pseudo-GPIO line number:
@@ -16,12 +18,42 @@ Required node properties:
     2 - NOR FLASH WPn
   - second cell can take standard GPIO flags (currently ignored).
 
+Control registers providing pseudo-GPIO lines must be represented
+by subnodes, each of them requiring the following properties:
+- compatible value : one of
+			"arm,vexpress-sysreg,sys_led"
+			"arm,vexpress-sysreg,sys_mci"
+			"arm,vexpress-sysreg,sys_flash"
+- gpio-controller : makes the node a GPIO controller
+- #gpio-cells : size of the GPIO specifier, must be 2:
+  - first cell is the function number:
+    - for sys_led : 0..7 = LED 0..7
+    - for sys_mci : 0 = MMC CARDIN, 1 = MMC WPROT
+    - for sys_flash : 0 = NOR FLASH WPn
+  - second cell can take standard GPIO flags (currently ignored).
+
 Example:
 	v2m_sysreg: sysreg@10000000 {
  		compatible = "arm,vexpress-sysreg";
  		reg = <0x10000000 0x1000>;
-		gpio-controller;
-		#gpio-cells = <2>;
+
+		v2m_led_gpios: sys_led@08 {
+			compatible = "arm,vexpress-sysreg,sys_led";
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		v2m_mmc_gpios: sys_mci@48 {
+			compatible = "arm,vexpress-sysreg,sys_mci";
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
+
+		v2m_flash_gpios: sys_flash@4c {
+			compatible = "arm,vexpress-sysreg,sys_flash";
+			gpio-controller;
+			#gpio-cells = <2>;
+		};
  	};
 
 This block also can also act a bridge to the platform's configuration
diff --git a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
index ac870fb3fa0d..756c986995a3 100644
--- a/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m-rs1.dtsi
@@ -74,8 +74,24 @@
 			v2m_sysreg: sysreg@010000 {
 				compatible = "arm,vexpress-sysreg";
 				reg = <0x010000 0x1000>;
-				gpio-controller;
-				#gpio-cells = <2>;
+
+				v2m_led_gpios: sys_led@08 {
+					compatible = "arm,vexpress-sysreg,sys_led";
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
+
+				v2m_mmc_gpios: sys_mci@48 {
+					compatible = "arm,vexpress-sysreg,sys_mci";
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
+
+				v2m_flash_gpios: sys_flash@4c {
+					compatible = "arm,vexpress-sysreg,sys_flash";
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
 			};
 
 			v2m_sysctl: sysctl@020000 {
@@ -113,8 +129,8 @@
 				compatible = "arm,pl180", "arm,primecell";
 				reg = <0x050000 0x1000>;
 				interrupts = <9 10>;
-				cd-gpios = <&v2m_sysreg 0 0>;
-				wp-gpios = <&v2m_sysreg 1 0>;
+				cd-gpios = <&v2m_mmc_gpios 0 0>;
+				wp-gpios = <&v2m_mmc_gpios 1 0>;
 				max-frequency = <12000000>;
 				vmmc-supply = <&v2m_fixed_3v3>;
 				clocks = <&v2m_clk24mhz>, <&smbclk>;
@@ -265,6 +281,58 @@
 			clock-output-names = "v2m:refclk32khz";
 		};
 
+		leds {
+			compatible = "gpio-leds";
+
+			user@1 {
+				label = "v2m:green:user1";
+				gpios = <&v2m_led_gpios 0 0>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			user@2 {
+				label = "v2m:green:user2";
+				gpios = <&v2m_led_gpios 1 0>;
+				linux,default-trigger = "mmc0";
+			};
+
+			user@3 {
+				label = "v2m:green:user3";
+				gpios = <&v2m_led_gpios 2 0>;
+				linux,default-trigger = "cpu0";
+			};
+
+			user@4 {
+				label = "v2m:green:user4";
+				gpios = <&v2m_led_gpios 3 0>;
+				linux,default-trigger = "cpu1";
+			};
+
+			user@5 {
+				label = "v2m:green:user5";
+				gpios = <&v2m_led_gpios 4 0>;
+				linux,default-trigger = "cpu2";
+			};
+
+			user@6 {
+				label = "v2m:green:user6";
+				gpios = <&v2m_led_gpios 5 0>;
+				linux,default-trigger = "cpu3";
+			};
+
+			user@7 {
+				label = "v2m:green:user7";
+				gpios = <&v2m_led_gpios 6 0>;
+				linux,default-trigger = "cpu4";
+			};
+
+			user@8 {
+				label = "v2m:green:user8";
+				gpios = <&v2m_led_gpios 7 0>;
+				linux,default-trigger = "cpu5";
+			};
+		};
+
 		mcc {
 			compatible = "arm,vexpress,config-bus";
 			arm,vexpress,config-bridge = <&v2m_sysreg>;
diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi
index f1420368355b..ba856d604fb7 100644
--- a/arch/arm/boot/dts/vexpress-v2m.dtsi
+++ b/arch/arm/boot/dts/vexpress-v2m.dtsi
@@ -73,8 +73,24 @@
 			v2m_sysreg: sysreg@00000 {
 				compatible = "arm,vexpress-sysreg";
 				reg = <0x00000 0x1000>;
-				gpio-controller;
-				#gpio-cells = <2>;
+
+				v2m_led_gpios: sys_led@08 {
+					compatible = "arm,vexpress-sysreg,sys_led";
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
+
+				v2m_mmc_gpios: sys_mci@48 {
+					compatible = "arm,vexpress-sysreg,sys_mci";
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
+
+				v2m_flash_gpios: sys_flash@4c {
+					compatible = "arm,vexpress-sysreg,sys_flash";
+					gpio-controller;
+					#gpio-cells = <2>;
+				};
 			};
 
 			v2m_sysctl: sysctl@01000 {
@@ -112,8 +128,8 @@
 				compatible = "arm,pl180", "arm,primecell";
 				reg = <0x05000 0x1000>;
 				interrupts = <9 10>;
-				cd-gpios = <&v2m_sysreg 0 0>;
-				wp-gpios = <&v2m_sysreg 1 0>;
+				cd-gpios = <&v2m_mmc_gpios 0 0>;
+				wp-gpios = <&v2m_mmc_gpios 1 0>;
 				max-frequency = <12000000>;
 				vmmc-supply = <&v2m_fixed_3v3>;
 				clocks = <&v2m_clk24mhz>, <&smbclk>;
@@ -264,6 +280,58 @@
 			clock-output-names = "v2m:refclk32khz";
 		};
 
+		leds {
+			compatible = "gpio-leds";
+
+			user@1 {
+				label = "v2m:green:user1";
+				gpios = <&v2m_led_gpios 0 0>;
+				linux,default-trigger = "heartbeat";
+			};
+
+			user@2 {
+				label = "v2m:green:user2";
+				gpios = <&v2m_led_gpios 1 0>;
+				linux,default-trigger = "mmc0";
+			};
+
+			user@3 {
+				label = "v2m:green:user3";
+				gpios = <&v2m_led_gpios 2 0>;
+				linux,default-trigger = "cpu0";
+			};
+
+			user@4 {
+				label = "v2m:green:user4";
+				gpios = <&v2m_led_gpios 3 0>;
+				linux,default-trigger = "cpu1";
+			};
+
+			user@5 {
+				label = "v2m:green:user5";
+				gpios = <&v2m_led_gpios 4 0>;
+				linux,default-trigger = "cpu2";
+			};
+
+			user@6 {
+				label = "v2m:green:user6";
+				gpios = <&v2m_led_gpios 5 0>;
+				linux,default-trigger = "cpu3";
+			};
+
+			user@7 {
+				label = "v2m:green:user7";
+				gpios = <&v2m_led_gpios 6 0>;
+				linux,default-trigger = "cpu4";
+			};
+
+			user@8 {
+				label = "v2m:green:user8";
+				gpios = <&v2m_led_gpios 7 0>;
+				linux,default-trigger = "cpu5";
+			};
+		};
+
 		mcc {
 			compatible = "arm,vexpress,config-bus";
 			arm,vexpress,config-bridge = <&v2m_sysreg>;
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
index 35e394aa00e5..494d70bfddad 100644
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ b/arch/arm/mach-vexpress/ct-ca9x4.c
@@ -160,7 +160,7 @@ static void __init ct_ca9x4_init(void)
 		amba_device_register(ct_ca9x4_amba_devs[i], &iomem_resource);
 
 	platform_device_register(&pmu_device);
-	vexpress_sysreg_config_device_register(&osc1_device);
+	vexpress_syscfg_device_register(&osc1_device);
 }
 
 #ifdef CONFIG_SMP
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index ac95220a5019..90f04c9b11d2 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -201,8 +201,9 @@ static struct platform_device v2m_cf_device = {
 
 static struct mmci_platform_data v2m_mmci_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
-	.gpio_wp	= VEXPRESS_GPIO_MMC_WPROT,
-	.gpio_cd	= VEXPRESS_GPIO_MMC_CARDIN,
+	.status		= vexpress_get_mci_cardin,
+	.gpio_cd	= -1,
+	.gpio_wp	= -1,
 };
 
 static struct resource v2m_sysreg_resources[] = {
@@ -351,10 +352,10 @@ static void __init v2m_init(void)
 	for (i = 0; i < ARRAY_SIZE(v2m_amba_devs); i++)
 		amba_device_register(v2m_amba_devs[i], &iomem_resource);
 
-	vexpress_sysreg_config_device_register(&v2m_muxfpga_device);
-	vexpress_sysreg_config_device_register(&v2m_shutdown_device);
-	vexpress_sysreg_config_device_register(&v2m_reboot_device);
-	vexpress_sysreg_config_device_register(&v2m_dvimode_device);
+	vexpress_syscfg_device_register(&v2m_muxfpga_device);
+	vexpress_syscfg_device_register(&v2m_shutdown_device);
+	vexpress_syscfg_device_register(&v2m_reboot_device);
+	vexpress_syscfg_device_register(&v2m_dvimode_device);
 
 	ct_desc->init_tile();
 }
@@ -409,8 +410,6 @@ void __init v2m_dt_init_early(void)
 {
 	u32 dt_hbi;
 
-	vexpress_sysreg_of_early_init();
-
 	/* Confirm board type against DT property, if available */
 	if (of_property_read_u32(of_allnodes, "arm,hbi", &dt_hbi) == 0) {
 		u32 hbi = vexpress_get_hbi(VEXPRESS_SITE_MASTER);
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 33834120d057..490fd48a9541 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1227,12 +1227,17 @@ config MCP_UCB1200_TS
 
 endmenu
 
-config VEXPRESS_CONFIG
-	bool "ARM Versatile Express platform infrastructure"
-	depends on ARM || ARM64
+config MFD_VEXPRESS_SYSREG
+	bool "Versatile Express System Registers"
+	depends on VEXPRESS_CONFIG
+	default y
+	select CLKSRC_MMIO
+	select GPIO_GENERIC_PLATFORM
+	select MFD_CORE
+	select MFD_SYSCON
 	help
-	  Platform configuration infrastructure for the ARM Ltd.
-	  Versatile Express.
+	  System Registers are the platform configuration block
+	  on the ARM Ltd. Versatile Express board.
 
 endmenu
 endif
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 9ba838eb5131..cec3487b539e 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -161,7 +161,7 @@ obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
-obj-$(CONFIG_VEXPRESS_CONFIG)	+= vexpress-sysreg.o
+obj-$(CONFIG_MFD_VEXPRESS_SYSREG)	+= vexpress-sysreg.o
 obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
 obj-$(CONFIG_MFD_AS3722)	+= as3722.o
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index b4138a7168db..952df843b6be 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -11,25 +11,22 @@
  * Copyright (C) 2012 ARM Limited
  */
 
+#include <linux/basic_mmio_gpio.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/io.h>
-#include <linux/leds.h>
+#include <linux/mfd/core.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
+#include <linux/platform_data/syscon.h>
 #include <linux/platform_device.h>
-#include <linux/regulator/driver.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
-#include <linux/timer.h>
 #include <linux/vexpress.h>
 
 #define SYS_ID			0x000
 #define SYS_SW			0x004
 #define SYS_LED			0x008
 #define SYS_100HZ		0x024
-#define SYS_FLAGS		0x030
 #define SYS_FLAGSSET		0x030
 #define SYS_FLAGSCLR		0x034
 #define SYS_NVFLAGS		0x038
@@ -51,36 +48,32 @@
 #define SYS_ID_HBI_SHIFT	16
 #define SYS_PROCIDx_HBI_SHIFT	0
 
-#define SYS_LED_LED(n)		(1 << (n))
-
 #define SYS_MCI_CARDIN		(1 << 0)
 #define SYS_MCI_WPROT		(1 << 1)
 
-#define SYS_FLASH_WPn		(1 << 0)
-
 #define SYS_MISC_MASTERSITE	(1 << 14)
 
-#define SYS_CFGCTRL_START	(1 << 31)
-#define SYS_CFGCTRL_WRITE	(1 << 30)
-#define SYS_CFGCTRL_DCC(n)	(((n) & 0xf) << 26)
-#define SYS_CFGCTRL_FUNC(n)	(((n) & 0x3f) << 20)
-#define SYS_CFGCTRL_SITE(n)	(((n) & 0x3) << 16)
-#define SYS_CFGCTRL_POSITION(n)	(((n) & 0xf) << 12)
-#define SYS_CFGCTRL_DEVICE(n)	(((n) & 0xfff) << 0)
 
-#define SYS_CFGSTAT_ERR		(1 << 1)
-#define SYS_CFGSTAT_COMPLETE	(1 << 0)
+static void __iomem *__vexpress_sysreg_base;
 
+static void __iomem *vexpress_sysreg_base(void)
+{
+	if (!__vexpress_sysreg_base) {
+		struct device_node *node = of_find_compatible_node(NULL, NULL,
+				"arm,vexpress-sysreg");
 
-static void __iomem *vexpress_sysreg_base;
-static struct device *vexpress_sysreg_dev;
-static LIST_HEAD(vexpress_sysreg_config_funcs);
-static struct device *vexpress_sysreg_config_bridge;
+		__vexpress_sysreg_base = of_iomap(node, 0);
+	}
+
+	WARN_ON(!__vexpress_sysreg_base);
+
+	return __vexpress_sysreg_base;
+}
 
 
 static int vexpress_sysreg_get_master(void)
 {
-	if (readl(vexpress_sysreg_base + SYS_MISC) & SYS_MISC_MASTERSITE)
+	if (readl(vexpress_sysreg_base() + SYS_MISC) & SYS_MISC_MASTERSITE)
 		return VEXPRESS_SITE_DB2;
 
 	return VEXPRESS_SITE_DB1;
@@ -88,8 +81,13 @@ static int vexpress_sysreg_get_master(void)
 
 void vexpress_flags_set(u32 data)
 {
-	writel(~0, vexpress_sysreg_base + SYS_FLAGSCLR);
-	writel(data, vexpress_sysreg_base + SYS_FLAGSSET);
+	writel(~0, vexpress_sysreg_base() + SYS_FLAGSCLR);
+	writel(data, vexpress_sysreg_base() + SYS_FLAGSSET);
+}
+
+unsigned int vexpress_get_mci_cardin(struct device *dev)
+{
+	return readl(vexpress_sysreg_base() + SYS_MCI) & SYS_MCI_CARDIN;
 }
 
 u32 vexpress_get_procid(int site)
@@ -97,7 +95,7 @@ u32 vexpress_get_procid(int site)
 	if (site == VEXPRESS_SITE_MASTER)
 		site = vexpress_sysreg_get_master();
 
-	return readl(vexpress_sysreg_base + (site == VEXPRESS_SITE_DB1 ?
+	return readl(vexpress_sysreg_base() + (site == VEXPRESS_SITE_DB1 ?
 			SYS_PROCID0 : SYS_PROCID1));
 }
 
@@ -107,7 +105,7 @@ u32 vexpress_get_hbi(int site)
 
 	switch (site) {
 	case VEXPRESS_SITE_MB:
-		id = readl(vexpress_sysreg_base + SYS_ID);
+		id = readl(vexpress_sysreg_base() + SYS_ID);
 		return (id >> SYS_ID_HBI_SHIFT) & SYS_HBI_MASK;
 	case VEXPRESS_SITE_MASTER:
 	case VEXPRESS_SITE_DB1:
@@ -121,406 +119,143 @@ u32 vexpress_get_hbi(int site)
 
 void __iomem *vexpress_get_24mhz_clock_base(void)
 {
-	return vexpress_sysreg_base + SYS_24MHZ;
-}
-
-
-struct vexpress_sysreg_config_func {
-	struct list_head list;
-	struct regmap *regmap;
-	int num_templates;
-	u32 template[0]; /* Keep this last */
-};
-
-static int vexpress_sysreg_config_exec(struct vexpress_sysreg_config_func *func,
-		int index, bool write, u32 *data)
-{
-	u32 command, status;
-	int tries;
-	long timeout;
-
-	if (WARN_ON(!vexpress_sysreg_base))
-		return -ENOENT;
-
-	if (WARN_ON(index > func->num_templates))
-		return -EINVAL;
-
-	command = readl(vexpress_sysreg_base + SYS_CFGCTRL);
-	if (WARN_ON(command & SYS_CFGCTRL_START))
-		return -EBUSY;
-
-	command = func->template[index];
-	command |= SYS_CFGCTRL_START;
-	command |= write ? SYS_CFGCTRL_WRITE : 0;
-
-	/* Use a canary for reads */
-	if (!write)
-		*data = 0xdeadbeef;
-
-	dev_dbg(vexpress_sysreg_dev, "command %x, data %x\n",
-			command, *data);
-	writel(*data, vexpress_sysreg_base + SYS_CFGDATA);
-	writel(0, vexpress_sysreg_base + SYS_CFGSTAT);
-	writel(command, vexpress_sysreg_base + SYS_CFGCTRL);
-	mb();
-
-	/* The operation can take ages... Go to sleep, 100us initially */
-	tries = 100;
-	timeout = 100;
-	do {
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(usecs_to_jiffies(timeout));
-		if (signal_pending(current))
-			return -EINTR;
-
-		status = readl(vexpress_sysreg_base + SYS_CFGSTAT);
-		if (status & SYS_CFGSTAT_ERR)
-			return -EFAULT;
-
-		if (timeout > 20)
-			timeout -= 20;
-	} while (--tries && !(status & SYS_CFGSTAT_COMPLETE));
-	if (WARN_ON_ONCE(!tries))
-		return -ETIMEDOUT;
-
-	if (!write) {
-		*data = readl(vexpress_sysreg_base + SYS_CFGDATA);
-		dev_dbg(vexpress_sysreg_dev, "func %p, read data %x\n",
-				func, *data);
-	}
-
-	return 0;
-}
-
-static int vexpress_sysreg_config_read(void *context, unsigned int index,
-		unsigned int *val)
-{
-	struct vexpress_sysreg_config_func *func = context;
-
-	return vexpress_sysreg_config_exec(func, index, false, val);
-}
-
-static int vexpress_sysreg_config_write(void *context, unsigned int index,
-		unsigned int val)
-{
-	struct vexpress_sysreg_config_func *func = context;
-
-	return vexpress_sysreg_config_exec(func, index, true, &val);
-}
-
-struct regmap_config vexpress_sysreg_regmap_config = {
-	.lock = vexpress_config_lock,
-	.unlock = vexpress_config_unlock,
-	.reg_bits = 32,
-	.val_bits = 32,
-	.reg_read = vexpress_sysreg_config_read,
-	.reg_write = vexpress_sysreg_config_write,
-	.reg_format_endian = REGMAP_ENDIAN_LITTLE,
-	.val_format_endian = REGMAP_ENDIAN_LITTLE,
-};
-
-static struct regmap *vexpress_sysreg_config_regmap_init(struct device *dev,
-		void *context)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	struct vexpress_sysreg_config_func *func;
-	struct property *prop;
-	const __be32 *val = NULL;
-	__be32 energy_quirk[4];
-	int num;
-	u32 site, position, dcc;
-	int err;
-	int i;
-
-	if (dev->of_node) {
-		err = vexpress_config_get_topo(dev->of_node, &site, &position,
-				&dcc);
-		if (err)
-			return ERR_PTR(err);
-
-		prop = of_find_property(dev->of_node,
-				"arm,vexpress-sysreg,func", NULL);
-		if (!prop)
-			return ERR_PTR(-EINVAL);
-
-		num = prop->length / sizeof(u32) / 2;
-		val = prop->value;
-	} else {
-		if (pdev->num_resources != 1 ||
-				pdev->resource[0].flags != IORESOURCE_BUS)
-			return ERR_PTR(-EFAULT);
-
-		site = pdev->resource[0].start;
-		if (site == VEXPRESS_SITE_MASTER)
-			site = vexpress_sysreg_get_master();
-		position = 0;
-		dcc = 0;
-		num = 1;
-	}
-
-	/*
-	 * "arm,vexpress-energy" function used to be described
-	 * by its first device only, now it requires both
-	 */
-	if (num == 1 && of_device_is_compatible(dev->of_node,
-			"arm,vexpress-energy")) {
-		num = 2;
-		energy_quirk[0] = *val;
-		energy_quirk[2] = *val++;
-		energy_quirk[1] = *val;
-		energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1);
-		val = energy_quirk;
-	}
-
-	func = kzalloc(sizeof(*func) + sizeof(*func->template) * num,
-			GFP_KERNEL);
-	if (!func)
-		return NULL;
-
-	func->num_templates = num;
-
-	for (i = 0; i < num; i++) {
-		u32 function, device;
-
-		if (dev->of_node) {
-			function = be32_to_cpup(val++);
-			device = be32_to_cpup(val++);
-		} else {
-			function = pdev->resource[0].end;
-			device = pdev->id;
-		}
-
-		dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n",
-				func, site, position, dcc,
-				function, device);
-
-		func->template[i] = SYS_CFGCTRL_DCC(dcc);
-		func->template[i] |= SYS_CFGCTRL_SITE(site);
-		func->template[i] |= SYS_CFGCTRL_POSITION(position);
-		func->template[i] |= SYS_CFGCTRL_FUNC(function);
-		func->template[i] |= SYS_CFGCTRL_DEVICE(device);
-	}
-
-	vexpress_sysreg_regmap_config.max_register = num - 1;
-
-	func->regmap = regmap_init(dev, NULL, func,
-			&vexpress_sysreg_regmap_config);
-
-	if (IS_ERR(func->regmap))
-		kfree(func);
-	else
-		list_add(&func->list, &vexpress_sysreg_config_funcs);
-
-	return func->regmap;
-}
-
-static void vexpress_sysreg_config_regmap_exit(struct regmap *regmap,
-		void *context)
-{
-	struct vexpress_sysreg_config_func *func, *tmp;
-
-	regmap_exit(regmap);
-
-	list_for_each_entry_safe(func, tmp, &vexpress_sysreg_config_funcs,
-			list) {
-		if (func->regmap == regmap) {
-			list_del(&vexpress_sysreg_config_funcs);
-			kfree(func);
-			break;
-		}
-	}
-}
-
-static struct vexpress_config_bridge_ops vexpress_sysreg_config_bridge_ops = {
-	.regmap_init = vexpress_sysreg_config_regmap_init,
-	.regmap_exit = vexpress_sysreg_config_regmap_exit,
-};
-
-int vexpress_sysreg_config_device_register(struct platform_device *pdev)
-{
-	pdev->dev.parent = vexpress_sysreg_config_bridge;
-
-	return platform_device_register(pdev);
+	return vexpress_sysreg_base() + SYS_24MHZ;
 }
 
 
 void __init vexpress_sysreg_early_init(void __iomem *base)
 {
-	vexpress_sysreg_base = base;
-	vexpress_config_set_master(vexpress_sysreg_get_master());
-}
-
-void __init vexpress_sysreg_of_early_init(void)
-{
-	struct device_node *node;
-
-	if (vexpress_sysreg_base)
-		return;
-
-	node = of_find_compatible_node(NULL, NULL, "arm,vexpress-sysreg");
-	if (WARN_ON(!node))
-		return;
-
-	vexpress_sysreg_base = of_iomap(node, 0);
-	if (WARN_ON(!vexpress_sysreg_base))
-		return;
+	__vexpress_sysreg_base = base;
 
 	vexpress_config_set_master(vexpress_sysreg_get_master());
 }
 
 
-#ifdef CONFIG_GPIOLIB
-
-#define VEXPRESS_SYSREG_GPIO(_name, _reg, _value) \
-	[VEXPRESS_GPIO_##_name] = { \
-		.reg = _reg, \
-		.value = _reg##_##_value, \
-	}
+/* The sysreg block is just a random collection of various functions... */
 
-static struct vexpress_sysreg_gpio {
-	unsigned long reg;
-	u32 value;
-} vexpress_sysreg_gpios[] = {
-	VEXPRESS_SYSREG_GPIO(MMC_CARDIN,	SYS_MCI,	CARDIN),
-	VEXPRESS_SYSREG_GPIO(MMC_WPROT,		SYS_MCI,	WPROT),
-	VEXPRESS_SYSREG_GPIO(FLASH_WPn,		SYS_FLASH,	WPn),
-	VEXPRESS_SYSREG_GPIO(LED0,		SYS_LED,	LED(0)),
-	VEXPRESS_SYSREG_GPIO(LED1,		SYS_LED,	LED(1)),
-	VEXPRESS_SYSREG_GPIO(LED2,		SYS_LED,	LED(2)),
-	VEXPRESS_SYSREG_GPIO(LED3,		SYS_LED,	LED(3)),
-	VEXPRESS_SYSREG_GPIO(LED4,		SYS_LED,	LED(4)),
-	VEXPRESS_SYSREG_GPIO(LED5,		SYS_LED,	LED(5)),
-	VEXPRESS_SYSREG_GPIO(LED6,		SYS_LED,	LED(6)),
-	VEXPRESS_SYSREG_GPIO(LED7,		SYS_LED,	LED(7)),
+static struct syscon_platform_data vexpress_sysreg_sys_id_pdata = {
+	.label = "sys_id",
 };
 
-static int vexpress_sysreg_gpio_direction_input(struct gpio_chip *chip,
-				       unsigned offset)
-{
-	return 0;
-}
-
-static int vexpress_sysreg_gpio_get(struct gpio_chip *chip,
-				       unsigned offset)
-{
-	struct vexpress_sysreg_gpio *gpio = &vexpress_sysreg_gpios[offset];
-	u32 reg_value = readl(vexpress_sysreg_base + gpio->reg);
-
-	return !!(reg_value & gpio->value);
-}
-
-static void vexpress_sysreg_gpio_set(struct gpio_chip *chip,
-				       unsigned offset, int value)
-{
-	struct vexpress_sysreg_gpio *gpio = &vexpress_sysreg_gpios[offset];
-	u32 reg_value = readl(vexpress_sysreg_base + gpio->reg);
-
-	if (value)
-		reg_value |= gpio->value;
-	else
-		reg_value &= ~gpio->value;
-
-	writel(reg_value, vexpress_sysreg_base + gpio->reg);
-}
-
-static int vexpress_sysreg_gpio_direction_output(struct gpio_chip *chip,
-						unsigned offset, int value)
-{
-	vexpress_sysreg_gpio_set(chip, offset, value);
-
-	return 0;
-}
-
-static struct gpio_chip vexpress_sysreg_gpio_chip = {
-	.label = "vexpress-sysreg",
-	.direction_input = vexpress_sysreg_gpio_direction_input,
-	.direction_output = vexpress_sysreg_gpio_direction_output,
-	.get = vexpress_sysreg_gpio_get,
-	.set = vexpress_sysreg_gpio_set,
-	.ngpio = ARRAY_SIZE(vexpress_sysreg_gpios),
-	.base = 0,
+static struct bgpio_pdata vexpress_sysreg_sys_led_pdata = {
+	.label = "sys_led",
+	.base = -1,
+	.ngpio = 8,
 };
 
-
-#define VEXPRESS_SYSREG_GREEN_LED(_name, _default_trigger, _gpio) \
-	{ \
-		.name = "v2m:green:"_name, \
-		.default_trigger = _default_trigger, \
-		.gpio = VEXPRESS_GPIO_##_gpio, \
-	}
-
-struct gpio_led vexpress_sysreg_leds[] = {
-	VEXPRESS_SYSREG_GREEN_LED("user1",	"heartbeat",	LED0),
-	VEXPRESS_SYSREG_GREEN_LED("user2",	"mmc0",		LED1),
-	VEXPRESS_SYSREG_GREEN_LED("user3",	"cpu0",		LED2),
-	VEXPRESS_SYSREG_GREEN_LED("user4",	"cpu1",		LED3),
-	VEXPRESS_SYSREG_GREEN_LED("user5",	"cpu2",		LED4),
-	VEXPRESS_SYSREG_GREEN_LED("user6",	"cpu3",		LED5),
-	VEXPRESS_SYSREG_GREEN_LED("user7",	"cpu4",		LED6),
-	VEXPRESS_SYSREG_GREEN_LED("user8",	"cpu5",		LED7),
+static struct bgpio_pdata vexpress_sysreg_sys_mci_pdata = {
+	.label = "sys_mci",
+	.base = -1,
+	.ngpio = 2,
 };
 
-struct gpio_led_platform_data vexpress_sysreg_leds_pdata = {
-	.num_leds = ARRAY_SIZE(vexpress_sysreg_leds),
-	.leds = vexpress_sysreg_leds,
+static struct bgpio_pdata vexpress_sysreg_sys_flash_pdata = {
+	.label = "sys_flash",
+	.base = -1,
+	.ngpio = 1,
 };
 
-#endif
-
+static struct syscon_platform_data vexpress_sysreg_sys_misc_pdata = {
+	.label = "sys_misc",
+};
 
-static ssize_t vexpress_sysreg_sys_id_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "0x%08x\n", readl(vexpress_sysreg_base + SYS_ID));
-}
+static struct syscon_platform_data vexpress_sysreg_sys_procid_pdata = {
+	.label = "sys_procid",
+};
 
-DEVICE_ATTR(sys_id, S_IRUGO, vexpress_sysreg_sys_id_show, NULL);
+static struct mfd_cell vexpress_sysreg_cells[] = {
+	{
+		.name = "syscon",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM(SYS_ID, 0x4),
+		},
+		.platform_data = &vexpress_sysreg_sys_id_pdata,
+		.pdata_size = sizeof(vexpress_sysreg_sys_id_pdata),
+	}, {
+		.name = "basic-mmio-gpio",
+		.of_compatible = "arm,vexpress-sysreg,sys_led",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM_NAMED(SYS_LED, 0x4, "dat"),
+		},
+		.platform_data = &vexpress_sysreg_sys_led_pdata,
+		.pdata_size = sizeof(vexpress_sysreg_sys_led_pdata),
+	}, {
+		.name = "basic-mmio-gpio",
+		.of_compatible = "arm,vexpress-sysreg,sys_mci",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM_NAMED(SYS_MCI, 0x4, "dat"),
+		},
+		.platform_data = &vexpress_sysreg_sys_mci_pdata,
+		.pdata_size = sizeof(vexpress_sysreg_sys_mci_pdata),
+	}, {
+		.name = "basic-mmio-gpio",
+		.of_compatible = "arm,vexpress-sysreg,sys_flash",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM_NAMED(SYS_FLASH, 0x4, "dat"),
+		},
+		.platform_data = &vexpress_sysreg_sys_flash_pdata,
+		.pdata_size = sizeof(vexpress_sysreg_sys_flash_pdata),
+	}, {
+		.name = "syscon",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM(SYS_MISC, 0x4),
+		},
+		.platform_data = &vexpress_sysreg_sys_misc_pdata,
+		.pdata_size = sizeof(vexpress_sysreg_sys_misc_pdata),
+	}, {
+		.name = "syscon",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM(SYS_PROCID0, 0x8),
+		},
+		.platform_data = &vexpress_sysreg_sys_procid_pdata,
+		.pdata_size = sizeof(vexpress_sysreg_sys_procid_pdata),
+	}, {
+		.name = "vexpress-syscfg",
+		.num_resources = 1,
+		.resources = (struct resource []) {
+			DEFINE_RES_MEM(SYS_CFGDATA, 0xc),
+		},
+	}
+};
 
 static int vexpress_sysreg_probe(struct platform_device *pdev)
 {
-	int err;
-	struct resource *res = platform_get_resource(pdev,
-			IORESOURCE_MEM, 0);
-
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-			resource_size(res), pdev->name)) {
-		dev_err(&pdev->dev, "Failed to request memory region!\n");
-		return -EBUSY;
-	}
+	struct resource *mem;
+	void __iomem *base;
+	struct bgpio_chip *mmc_gpio_chip;
 
-	if (!vexpress_sysreg_base)
-		vexpress_sysreg_base = devm_ioremap(&pdev->dev, res->start,
-				resource_size(res));
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem)
+		return -EINVAL;
 
-	if (!vexpress_sysreg_base) {
-		dev_err(&pdev->dev, "Failed to obtain base address!\n");
-		return -EFAULT;
-	}
+	base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
+	if (!base)
+		return -ENOMEM;
 
 	vexpress_config_set_master(vexpress_sysreg_get_master());
-	vexpress_sysreg_dev = &pdev->dev;
-
-#ifdef CONFIG_GPIOLIB
-	vexpress_sysreg_gpio_chip.dev = &pdev->dev;
-	err = gpiochip_add(&vexpress_sysreg_gpio_chip);
-	if (err) {
-		dev_err(&pdev->dev, "Failed to register GPIO chip! (%d)\n",
-				err);
-		return err;
-	}
 
-	platform_device_register_data(vexpress_sysreg_dev, "leds-gpio",
-			PLATFORM_DEVID_AUTO, &vexpress_sysreg_leds_pdata,
-			sizeof(vexpress_sysreg_leds_pdata));
-#endif
-
-	vexpress_sysreg_config_bridge = vexpress_config_bridge_register(
-			&pdev->dev, &vexpress_sysreg_config_bridge_ops, NULL);
-	WARN_ON(!vexpress_sysreg_config_bridge);
-
-	device_create_file(vexpress_sysreg_dev, &dev_attr_sys_id);
-
-	return 0;
+	/*
+	 * Duplicated SYS_MCI pseudo-GPIO controller for compatibility with
+	 * older trees using sysreg node for MMC control lines.
+	 */
+	mmc_gpio_chip = devm_kzalloc(&pdev->dev, sizeof(*mmc_gpio_chip),
+			GFP_KERNEL);
+	if (!mmc_gpio_chip)
+		return -ENOMEM;
+	bgpio_init(mmc_gpio_chip, &pdev->dev, 0x4, base + SYS_MCI,
+			NULL, NULL, NULL, NULL, 0);
+	mmc_gpio_chip->gc.ngpio = 2;
+	gpiochip_add(&mmc_gpio_chip->gc);
+
+	return mfd_add_devices(&pdev->dev, PLATFORM_DEVID_AUTO,
+			vexpress_sysreg_cells,
+			ARRAY_SIZE(vexpress_sysreg_cells), mem, 0, NULL);
 }
 
 static const struct of_device_id vexpress_sysreg_match[] = {
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 8baff0effc7d..d9663ef90ce8 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -515,6 +515,15 @@ config SRAM
 	  the genalloc API. It is supposed to be used for small on-chip SRAM
 	  areas found on many SoCs.
 
+config VEXPRESS_SYSCFG
+	bool "Versatile Express System Configuration driver"
+	depends on VEXPRESS_CONFIG
+	default y
+	help
+	  ARM Ltd. Versatile Express uses specialised platform configuration
+	  bus. System Configuration interface is one of the possible means
+	  of generating transactions on this bus.
+
 source "drivers/misc/c2port/Kconfig"
 source "drivers/misc/eeprom/Kconfig"
 source "drivers/misc/cb710/Kconfig"
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 7eb4b69580c0..d59ce1261b38 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -55,3 +55,4 @@ obj-$(CONFIG_SRAM)		+= sram.o
 obj-y				+= mic/
 obj-$(CONFIG_GENWQE)		+= genwqe/
 obj-$(CONFIG_ECHO)		+= echo/
+obj-$(CONFIG_VEXPRESS_SYSCFG)	+= vexpress-syscfg.o
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
new file mode 100644
index 000000000000..73068e50e56d
--- /dev/null
+++ b/drivers/misc/vexpress-syscfg.c
@@ -0,0 +1,324 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Copyright (C) 2014 ARM Limited
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/syscore_ops.h>
+#include <linux/vexpress.h>
+
+
+#define SYS_CFGDATA		0x0
+
+#define SYS_CFGCTRL		0x4
+#define SYS_CFGCTRL_START	(1 << 31)
+#define SYS_CFGCTRL_WRITE	(1 << 30)
+#define SYS_CFGCTRL_DCC(n)	(((n) & 0xf) << 26)
+#define SYS_CFGCTRL_FUNC(n)	(((n) & 0x3f) << 20)
+#define SYS_CFGCTRL_SITE(n)	(((n) & 0x3) << 16)
+#define SYS_CFGCTRL_POSITION(n)	(((n) & 0xf) << 12)
+#define SYS_CFGCTRL_DEVICE(n)	(((n) & 0xfff) << 0)
+
+#define SYS_CFGSTAT		0x8
+#define SYS_CFGSTAT_ERR		(1 << 1)
+#define SYS_CFGSTAT_COMPLETE	(1 << 0)
+
+
+struct vexpress_syscfg {
+	struct device *dev;
+	void __iomem *base;
+	struct list_head funcs;
+};
+
+struct vexpress_syscfg_func {
+	struct list_head list;
+	struct vexpress_syscfg *syscfg;
+	struct regmap *regmap;
+	int num_templates;
+	u32 template[0]; /* Keep it last! */
+};
+
+
+static int vexpress_syscfg_exec(struct vexpress_syscfg_func *func,
+		int index, bool write, u32 *data)
+{
+	struct vexpress_syscfg *syscfg = func->syscfg;
+	u32 command, status;
+	int tries;
+	long timeout;
+
+	if (WARN_ON(index > func->num_templates))
+		return -EINVAL;
+
+	command = readl(syscfg->base + SYS_CFGCTRL);
+	if (WARN_ON(command & SYS_CFGCTRL_START))
+		return -EBUSY;
+
+	command = func->template[index];
+	command |= SYS_CFGCTRL_START;
+	command |= write ? SYS_CFGCTRL_WRITE : 0;
+
+	/* Use a canary for reads */
+	if (!write)
+		*data = 0xdeadbeef;
+
+	dev_dbg(syscfg->dev, "func %p, command %x, data %x\n",
+			func, command, *data);
+	writel(*data, syscfg->base + SYS_CFGDATA);
+	writel(0, syscfg->base + SYS_CFGSTAT);
+	writel(command, syscfg->base + SYS_CFGCTRL);
+	mb();
+
+	/* The operation can take ages... Go to sleep, 100us initially */
+	tries = 100;
+	timeout = 100;
+	do {
+		if (!irqs_disabled()) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule_timeout(usecs_to_jiffies(timeout));
+			if (signal_pending(current))
+				return -EINTR;
+		} else {
+			udelay(timeout);
+		}
+
+		status = readl(syscfg->base + SYS_CFGSTAT);
+		if (status & SYS_CFGSTAT_ERR)
+			return -EFAULT;
+
+		if (timeout > 20)
+			timeout -= 20;
+	} while (--tries && !(status & SYS_CFGSTAT_COMPLETE));
+	if (WARN_ON_ONCE(!tries))
+		return -ETIMEDOUT;
+
+	if (!write) {
+		*data = readl(syscfg->base + SYS_CFGDATA);
+		dev_dbg(syscfg->dev, "func %p, read data %x\n", func, *data);
+	}
+
+	return 0;
+}
+
+static int vexpress_syscfg_read(void *context, unsigned int index,
+		unsigned int *val)
+{
+	struct vexpress_syscfg_func *func = context;
+
+	return vexpress_syscfg_exec(func, index, false, val);
+}
+
+static int vexpress_syscfg_write(void *context, unsigned int index,
+		unsigned int val)
+{
+	struct vexpress_syscfg_func *func = context;
+
+	return vexpress_syscfg_exec(func, index, true, &val);
+}
+
+struct regmap_config vexpress_syscfg_regmap_config = {
+	.lock = vexpress_config_lock,
+	.unlock = vexpress_config_unlock,
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_read = vexpress_syscfg_read,
+	.reg_write = vexpress_syscfg_write,
+	.reg_format_endian = REGMAP_ENDIAN_LITTLE,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+
+static struct regmap *vexpress_syscfg_regmap_init(struct device *dev,
+		void *context)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct vexpress_syscfg *syscfg = context;
+	struct vexpress_syscfg_func *func;
+	struct property *prop;
+	const __be32 *val = NULL;
+	__be32 energy_quirk[4];
+	int num;
+	u32 site, position, dcc;
+	int i;
+
+	if (dev->of_node) {
+		int err = vexpress_config_get_topo(dev->of_node, &site,
+				&position, &dcc);
+
+		if (err)
+			return ERR_PTR(err);
+
+		prop = of_find_property(dev->of_node,
+				"arm,vexpress-sysreg,func", NULL);
+		if (!prop)
+			return ERR_PTR(-EINVAL);
+
+		num = prop->length / sizeof(u32) / 2;
+		val = prop->value;
+	} else {
+		if (pdev->num_resources != 1 ||
+				pdev->resource[0].flags != IORESOURCE_BUS)
+			return ERR_PTR(-EFAULT);
+
+		site = pdev->resource[0].start;
+		if (site == VEXPRESS_SITE_MASTER)
+			site = vexpress_config_get_master();
+		position = 0;
+		dcc = 0;
+		num = 1;
+	}
+
+	/*
+	 * "arm,vexpress-energy" function used to be described
+	 * by its first device only, now it requires both
+	 */
+	if (num == 1 && of_device_is_compatible(dev->of_node,
+			"arm,vexpress-energy")) {
+		num = 2;
+		energy_quirk[0] = *val;
+		energy_quirk[2] = *val++;
+		energy_quirk[1] = *val;
+		energy_quirk[3] = cpu_to_be32(be32_to_cpup(val) + 1);
+		val = energy_quirk;
+	}
+
+	func = kzalloc(sizeof(*func) + sizeof(*func->template) * num,
+			GFP_KERNEL);
+	if (!func)
+		return NULL;
+
+	func->syscfg = syscfg;
+	func->num_templates = num;
+
+	for (i = 0; i < num; i++) {
+		u32 function, device;
+
+		if (dev->of_node) {
+			function = be32_to_cpup(val++);
+			device = be32_to_cpup(val++);
+		} else {
+			function = pdev->resource[0].end;
+			device = pdev->id;
+		}
+
+		dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n",
+				func, site, position, dcc,
+				function, device);
+
+		func->template[i] = SYS_CFGCTRL_DCC(dcc);
+		func->template[i] |= SYS_CFGCTRL_SITE(site);
+		func->template[i] |= SYS_CFGCTRL_POSITION(position);
+		func->template[i] |= SYS_CFGCTRL_FUNC(function);
+		func->template[i] |= SYS_CFGCTRL_DEVICE(device);
+	}
+
+	vexpress_syscfg_regmap_config.max_register = num - 1;
+
+	func->regmap = regmap_init(dev, NULL, func,
+			&vexpress_syscfg_regmap_config);
+
+	if (IS_ERR(func->regmap))
+		kfree(func);
+	else
+		list_add(&func->list, &syscfg->funcs);
+
+	return func->regmap;
+}
+
+static void vexpress_syscfg_regmap_exit(struct regmap *regmap, void *context)
+{
+	struct vexpress_syscfg *syscfg = context;
+	struct vexpress_syscfg_func *func, *tmp;
+
+	regmap_exit(regmap);
+
+	list_for_each_entry_safe(func, tmp, &syscfg->funcs, list) {
+		if (func->regmap == regmap) {
+			list_del(&syscfg->funcs);
+			kfree(func);
+			break;
+		}
+	}
+}
+
+static struct vexpress_config_bridge_ops vexpress_syscfg_bridge_ops = {
+	.regmap_init = vexpress_syscfg_regmap_init,
+	.regmap_exit = vexpress_syscfg_regmap_exit,
+};
+
+
+/* Non-DT hack, to be gone... */
+static struct device *vexpress_syscfg_bridge;
+
+int vexpress_syscfg_device_register(struct platform_device *pdev)
+{
+	pdev->dev.parent = vexpress_syscfg_bridge;
+
+	return platform_device_register(pdev);
+}
+
+
+int vexpress_syscfg_probe(struct platform_device *pdev)
+{
+	struct vexpress_syscfg *syscfg;
+	struct resource *res;
+	struct device *bridge;
+
+	syscfg = devm_kzalloc(&pdev->dev, sizeof(*syscfg), GFP_KERNEL);
+	if (!syscfg)
+		return -ENOMEM;
+	syscfg->dev = &pdev->dev;
+	INIT_LIST_HEAD(&syscfg->funcs);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+			resource_size(res), pdev->name))
+		return -EBUSY;
+
+	syscfg->base = devm_ioremap(&pdev->dev, res->start, resource_size(res));
+	if (!syscfg->base)
+		return -EFAULT;
+
+	/* Must use dev.parent (MFD), as that's where DT phandle points at... */
+	bridge = vexpress_config_bridge_register(pdev->dev.parent,
+			&vexpress_syscfg_bridge_ops, syscfg);
+	if (IS_ERR(bridge))
+		return PTR_ERR(bridge);
+
+	/* Non-DT case */
+	if (!pdev->dev.of_node)
+		vexpress_syscfg_bridge = bridge;
+
+	return 0;
+}
+
+static const struct platform_device_id vexpress_syscfg_id_table[] = {
+	{ "vexpress-syscfg", },
+	{},
+};
+
+static struct platform_driver vexpress_syscfg_driver = {
+	.driver.name = "vexpress-syscfg",
+	.id_table = vexpress_syscfg_id_table,
+	.probe = vexpress_syscfg_probe,
+};
+
+static int __init vexpress_syscfg_init(void)
+{
+	return platform_driver_register(&vexpress_syscfg_driver);
+}
+core_initcall(vexpress_syscfg_init);
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index 6b206ba6aa0e..46636e3f43fd 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -24,18 +24,6 @@
 #define VEXPRESS_SITE_DB2		2
 #define VEXPRESS_SITE_MASTER		0xf
 
-#define VEXPRESS_GPIO_MMC_CARDIN	0
-#define VEXPRESS_GPIO_MMC_WPROT		1
-#define VEXPRESS_GPIO_FLASH_WPn		2
-#define VEXPRESS_GPIO_LED0		3
-#define VEXPRESS_GPIO_LED1		4
-#define VEXPRESS_GPIO_LED2		5
-#define VEXPRESS_GPIO_LED3		6
-#define VEXPRESS_GPIO_LED4		7
-#define VEXPRESS_GPIO_LED5		8
-#define VEXPRESS_GPIO_LED6		9
-#define VEXPRESS_GPIO_LED7		10
-
 #define VEXPRESS_RES_FUNC(_site, _func)	\
 {					\
 	.start = (_site),		\
@@ -70,14 +58,14 @@ struct regmap *devm_regmap_init_vexpress_config(struct device *dev);
 
 /* Platform control */
 
+unsigned int vexpress_get_mci_cardin(struct device *dev);
 u32 vexpress_get_procid(int site);
 u32 vexpress_get_hbi(int site);
 void *vexpress_get_24mhz_clock_base(void);
 void vexpress_flags_set(u32 data);
 
 void vexpress_sysreg_early_init(void __iomem *base);
-void vexpress_sysreg_of_early_init(void);
-int vexpress_sysreg_config_device_register(struct platform_device *pdev);
+int vexpress_syscfg_device_register(struct platform_device *pdev);
 
 /* Clocks */
 
-- 
cgit 


From 6b2c31c71d6fa8896c5f3f2354d790a5bd3f0a1e Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Thu, 6 Feb 2014 14:33:44 +0000
Subject: ARM: vexpress: move HBI check to sysreg driver

The last reason for static memory mapping is the HBI (board
identification number) check early in the machine code.

Moving the check to the sysreg driver makes it possible to
completely remove the early mapping and init functions.

Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
---
 arch/arm/mach-vexpress/v2m.c  | 49 -------------------------------------------
 drivers/mfd/vexpress-sysreg.c | 30 ++++++++++----------------
 include/linux/vexpress.h      |  1 -
 3 files changed, 11 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index d8b419bcf3c3..38f4f6f37770 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -370,53 +370,6 @@ MACHINE_START(VEXPRESS, "ARM-Versatile Express")
 	.init_machine	= v2m_init,
 MACHINE_END
 
-static struct map_desc v2m_rs1_io_desc __initdata = {
-	.virtual	= V2M_PERIPH,
-	.pfn		= __phys_to_pfn(0x1c000000),
-	.length		= SZ_2M,
-	.type		= MT_DEVICE,
-};
-
-static int __init v2m_dt_scan_memory_map(unsigned long node, const char *uname,
-		int depth, void *data)
-{
-	const char **map = data;
-
-	if (strcmp(uname, "motherboard") != 0)
-		return 0;
-
-	*map = of_get_flat_dt_prop(node, "arm,v2m-memory-map", NULL);
-
-	return 1;
-}
-
-void __init v2m_dt_map_io(void)
-{
-	const char *map = NULL;
-
-	of_scan_flat_dt(v2m_dt_scan_memory_map, &map);
-
-	if (map && strcmp(map, "rs1") == 0)
-		iotable_init(&v2m_rs1_io_desc, 1);
-	else
-		iotable_init(v2m_io_desc, ARRAY_SIZE(v2m_io_desc));
-}
-
-void __init v2m_dt_init_early(void)
-{
-	u32 dt_hbi;
-
-	/* Confirm board type against DT property, if available */
-	if (of_property_read_u32(of_allnodes, "arm,hbi", &dt_hbi) == 0) {
-		u32 hbi = vexpress_get_hbi(VEXPRESS_SITE_MASTER);
-
-		if (WARN_ON(dt_hbi != hbi))
-			pr_warning("vexpress: DT HBI (%x) is not matching "
-					"hardware (%x)!\n", dt_hbi, hbi);
-	}
-}
-
-
 static void __init v2m_dt_init(void)
 {
 	l2x0_of_init(0x00400000, 0xfe0fffff);
@@ -432,7 +385,5 @@ DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.dt_compat	= v2m_dt_match,
 	.smp		= smp_ops(vexpress_smp_dt_ops),
 	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
-	.map_io		= v2m_dt_map_io,
-	.init_early	= v2m_dt_init_early,
 	.init_machine	= v2m_dt_init,
 MACHINE_END
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 952df843b6be..9e21e4fc9599 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -45,7 +45,6 @@
 #define SYS_CFGSTAT		0x0a8
 
 #define SYS_HBI_MASK		0xfff
-#define SYS_ID_HBI_SHIFT	16
 #define SYS_PROCIDx_HBI_SHIFT	0
 
 #define SYS_MCI_CARDIN		(1 << 0)
@@ -99,24 +98,6 @@ u32 vexpress_get_procid(int site)
 			SYS_PROCID0 : SYS_PROCID1));
 }
 
-u32 vexpress_get_hbi(int site)
-{
-	u32 id;
-
-	switch (site) {
-	case VEXPRESS_SITE_MB:
-		id = readl(vexpress_sysreg_base() + SYS_ID);
-		return (id >> SYS_ID_HBI_SHIFT) & SYS_HBI_MASK;
-	case VEXPRESS_SITE_MASTER:
-	case VEXPRESS_SITE_DB1:
-	case VEXPRESS_SITE_DB2:
-		id = vexpress_get_procid(site);
-		return (id >> SYS_PROCIDx_HBI_SHIFT) & SYS_HBI_MASK;
-	}
-
-	return ~0;
-}
-
 void __iomem *vexpress_get_24mhz_clock_base(void)
 {
 	return vexpress_sysreg_base() + SYS_24MHZ;
@@ -229,6 +210,7 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 	struct resource *mem;
 	void __iomem *base;
 	struct bgpio_chip *mmc_gpio_chip;
+	u32 dt_hbi;
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!mem)
@@ -240,6 +222,16 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 
 	vexpress_config_set_master(vexpress_sysreg_get_master());
 
+	/* Confirm board type against DT property, if available */
+	if (of_property_read_u32(of_allnodes, "arm,hbi", &dt_hbi) == 0) {
+		u32 id = vexpress_get_procid(VEXPRESS_SITE_MASTER);
+		u32 hbi = (id >> SYS_PROCIDx_HBI_SHIFT) & SYS_HBI_MASK;
+
+		if (WARN_ON(dt_hbi != hbi))
+			dev_warn(&pdev->dev, "DT HBI (%x) is not matching hardware (%x)!\n",
+					dt_hbi, hbi);
+	}
+
 	/*
 	 * Duplicated SYS_MCI pseudo-GPIO controller for compatibility with
 	 * older trees using sysreg node for MMC control lines.
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index 46636e3f43fd..a4c9547aae64 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -60,7 +60,6 @@ struct regmap *devm_regmap_init_vexpress_config(struct device *dev);
 
 unsigned int vexpress_get_mci_cardin(struct device *dev);
 u32 vexpress_get_procid(int site);
-u32 vexpress_get_hbi(int site);
 void *vexpress_get_24mhz_clock_base(void);
 void vexpress_flags_set(u32 data);
 
-- 
cgit 


From 200b916f3575bdf11609cb447661b8d5957b0bbf Mon Sep 17 00:00:00 2001
From: Cong Wang <cwang@twopensource.com>
Date: Mon, 12 May 2014 15:11:20 -0700
Subject: rtnetlink: wait for unregistering devices in rtnl_link_unregister()

From: Cong Wang <cwang@twopensource.com>

commit 50624c934db18ab90 (net: Delay default_device_exit_batch until no
devices are unregistering) introduced rtnl_lock_unregistering() for
default_device_exit_batch(). Same race could happen we when rmmod a driver
which calls rtnl_link_unregister() as we call dev->destructor without rtnl
lock.

For long term, I think we should clean up the mess of netdev_run_todo()
and net namespce exit code.

Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  5 +++++
 net/core/dev.c            |  2 +-
 net/core/net_namespace.c  |  2 +-
 net/core/rtnetlink.c      | 33 ++++++++++++++++++++++++++++++++-
 4 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 8e3e66ac0a52..953937ea5233 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -4,6 +4,7 @@
 
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
+#include <linux/wait.h>
 #include <uapi/linux/rtnetlink.h>
 
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
@@ -22,6 +23,10 @@ extern void rtnl_lock(void);
 extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
+
+extern wait_queue_head_t netdev_unregistering_wq;
+extern struct mutex net_mutex;
+
 #ifdef CONFIG_PROVE_LOCKING
 extern int lockdep_rtnl_is_held(void);
 #else
diff --git a/net/core/dev.c b/net/core/dev.c
index c619b8641337..6da649bde4f7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5541,7 +5541,7 @@ static int dev_new_index(struct net *net)
 
 /* Delayed registration/unregisteration */
 static LIST_HEAD(net_todo_list);
-static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
+DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
 
 static void net_set_todo(struct net_device *dev)
 {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 81d3a9a08453..7c8ffd974961 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -24,7 +24,7 @@
 
 static LIST_HEAD(pernet_list);
 static struct list_head *first_device = &pernet_list;
-static DEFINE_MUTEX(net_mutex);
+DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 EXPORT_SYMBOL_GPL(net_namespace_list);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9837bebf93ce..2d8d8fcfa060 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -353,15 +353,46 @@ void __rtnl_link_unregister(struct rtnl_link_ops *ops)
 }
 EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
 
+/* Return with the rtnl_lock held when there are no network
+ * devices unregistering in any network namespace.
+ */
+static void rtnl_lock_unregistering_all(void)
+{
+	struct net *net;
+	bool unregistering;
+	DEFINE_WAIT(wait);
+
+	for (;;) {
+		prepare_to_wait(&netdev_unregistering_wq, &wait,
+				TASK_UNINTERRUPTIBLE);
+		unregistering = false;
+		rtnl_lock();
+		for_each_net(net) {
+			if (net->dev_unreg_count > 0) {
+				unregistering = true;
+				break;
+			}
+		}
+		if (!unregistering)
+			break;
+		__rtnl_unlock();
+		schedule();
+	}
+	finish_wait(&netdev_unregistering_wq, &wait);
+}
+
 /**
  * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
  * @ops: struct rtnl_link_ops * to unregister
  */
 void rtnl_link_unregister(struct rtnl_link_ops *ops)
 {
-	rtnl_lock();
+	/* Close the race with cleanup_net() */
+	mutex_lock(&net_mutex);
+	rtnl_lock_unregistering_all();
 	__rtnl_link_unregister(ops);
 	rtnl_unlock();
+	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(rtnl_link_unregister);
 
-- 
cgit 


From 622582786c9e041d0bd52bde201787adeab249f8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 13 May 2014 19:50:46 -0700
Subject: net: filter: x86: internal BPF JIT

Maps all internal BPF instructions into x86_64 instructions.
This patch replaces original BPF x64 JIT with internal BPF x64 JIT.
sysctl net.core.bpf_jit_enable is reused as on/off switch.

Performance:

1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code.
  No performance difference is observed for filters that were JIT-able before

Example assembler code for BPF filter "tcpdump port 22"

original BPF -> old JIT:            original BPF -> internal BPF -> new JIT:
   0:   push   %rbp                      0:     push   %rbp
   1:   mov    %rsp,%rbp                 1:     mov    %rsp,%rbp
   4:   sub    $0x60,%rsp                4:     sub    $0x228,%rsp
   8:   mov    %rbx,-0x8(%rbp)           b:     mov    %rbx,-0x228(%rbp) // prologue
                                        12:     mov    %r13,-0x220(%rbp)
                                        19:     mov    %r14,-0x218(%rbp)
                                        20:     mov    %r15,-0x210(%rbp)
                                        27:     xor    %eax,%eax         // clear A
   c:   xor    %ebx,%ebx                29:     xor    %r13,%r13         // clear X
   e:   mov    0x68(%rdi),%r9d          2c:     mov    0x68(%rdi),%r9d
  12:   sub    0x6c(%rdi),%r9d          30:     sub    0x6c(%rdi),%r9d
  16:   mov    0xd8(%rdi),%r8           34:     mov    0xd8(%rdi),%r10
                                        3b:     mov    %rdi,%rbx
  1d:   mov    $0xc,%esi                3e:     mov    $0xc,%esi
  22:   callq  0xffffffffe1021e15       43:     callq  0xffffffffe102bd75
  27:   cmp    $0x86dd,%eax             48:     cmp    $0x86dd,%rax
  2c:   jne    0x0000000000000069       4f:     jne    0x000000000000009a
  2e:   mov    $0x14,%esi               51:     mov    $0x14,%esi
  33:   callq  0xffffffffe1021e31       56:     callq  0xffffffffe102bd91
  38:   cmp    $0x84,%eax               5b:     cmp    $0x84,%rax
  3d:   je     0x0000000000000049       62:     je     0x0000000000000074
  3f:   cmp    $0x6,%eax                64:     cmp    $0x6,%rax
  42:   je     0x0000000000000049       68:     je     0x0000000000000074
  44:   cmp    $0x11,%eax               6a:     cmp    $0x11,%rax
  47:   jne    0x00000000000000c6       6e:     jne    0x0000000000000117
  49:   mov    $0x36,%esi               74:     mov    $0x36,%esi
  4e:   callq  0xffffffffe1021e15       79:     callq  0xffffffffe102bd75
  53:   cmp    $0x16,%eax               7e:     cmp    $0x16,%rax
  56:   je     0x00000000000000bf       82:     je     0x0000000000000110
  58:   mov    $0x38,%esi               88:     mov    $0x38,%esi
  5d:   callq  0xffffffffe1021e15       8d:     callq  0xffffffffe102bd75
  62:   cmp    $0x16,%eax               92:     cmp    $0x16,%rax
  65:   je     0x00000000000000bf       96:     je     0x0000000000000110
  67:   jmp    0x00000000000000c6       98:     jmp    0x0000000000000117
  69:   cmp    $0x800,%eax              9a:     cmp    $0x800,%rax
  6e:   jne    0x00000000000000c6       a1:     jne    0x0000000000000117
  70:   mov    $0x17,%esi               a3:     mov    $0x17,%esi
  75:   callq  0xffffffffe1021e31       a8:     callq  0xffffffffe102bd91
  7a:   cmp    $0x84,%eax               ad:     cmp    $0x84,%rax
  7f:   je     0x000000000000008b       b4:     je     0x00000000000000c2
  81:   cmp    $0x6,%eax                b6:     cmp    $0x6,%rax
  84:   je     0x000000000000008b       ba:     je     0x00000000000000c2
  86:   cmp    $0x11,%eax               bc:     cmp    $0x11,%rax
  89:   jne    0x00000000000000c6       c0:     jne    0x0000000000000117
  8b:   mov    $0x14,%esi               c2:     mov    $0x14,%esi
  90:   callq  0xffffffffe1021e15       c7:     callq  0xffffffffe102bd75
  95:   test   $0x1fff,%ax              cc:     test   $0x1fff,%rax
  99:   jne    0x00000000000000c6       d3:     jne    0x0000000000000117
                                        d5:     mov    %rax,%r14
  9b:   mov    $0xe,%esi                d8:     mov    $0xe,%esi
  a0:   callq  0xffffffffe1021e44       dd:     callq  0xffffffffe102bd91 // MSH
                                        e2:     and    $0xf,%eax
                                        e5:     shl    $0x2,%eax
                                        e8:     mov    %rax,%r13
                                        eb:     mov    %r14,%rax
                                        ee:     mov    %r13,%rsi
  a5:   lea    0xe(%rbx),%esi           f1:     add    $0xe,%esi
  a8:   callq  0xffffffffe1021e0d       f4:     callq  0xffffffffe102bd6d
  ad:   cmp    $0x16,%eax               f9:     cmp    $0x16,%rax
  b0:   je     0x00000000000000bf       fd:     je     0x0000000000000110
                                        ff:     mov    %r13,%rsi
  b2:   lea    0x10(%rbx),%esi         102:     add    $0x10,%esi
  b5:   callq  0xffffffffe1021e0d      105:     callq  0xffffffffe102bd6d
  ba:   cmp    $0x16,%eax              10a:     cmp    $0x16,%rax
  bd:   jne    0x00000000000000c6      10e:     jne    0x0000000000000117
  bf:   mov    $0xffff,%eax            110:     mov    $0xffff,%eax
  c4:   jmp    0x00000000000000c8      115:     jmp    0x000000000000011c
  c6:   xor    %eax,%eax               117:     mov    $0x0,%eax
  c8:   mov    -0x8(%rbp),%rbx         11c:     mov    -0x228(%rbp),%rbx // epilogue
  cc:   leaveq                         123:     mov    -0x220(%rbp),%r13
  cd:   retq                           12a:     mov    -0x218(%rbp),%r14
                                       131:     mov    -0x210(%rbp),%r15
                                       138:     leaveq
                                       139:     retq

On fully cached SKBs both JITed functions take 12 nsec to execute.
BPF interpreter executes the program in 30 nsec.

The difference in generated assembler is due to the following:

Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function
inside bpf_jit.S.
New JIT removes the helper and does it explicitly, so ldx_msh cost
is the same for both JITs, but generated code looks longer.

New JIT has 4 registers to save, so prologue/epilogue are larger,
but the cost is within noise on x64.

Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'.
New JIT clears %rax unconditionally.

2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM
  extensions. New JIT supports all BPF extensions.
  Performance of such filters improves 2-4 times depending on a filter.
  The longer the filter the higher performance gain.
  Synthetic benchmarks with many ancillary loads see 20x speedup
  which seems to be the maximum gain from JIT

Notes:

. net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional
  and can be used to see generated assembler

. there are two jit_compile() functions and code flow for classic filters is:
  sk_attach_filter() - load classic BPF
  bpf_jit_compile() - try to JIT from classic BPF
  sk_convert_filter() - convert classic to internal
  bpf_int_jit_compile() - JIT from internal BPF

  seccomp and tracing filters will just call bpf_int_jit_compile()

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit.S      |   77 +--
 arch/x86/net/bpf_jit_comp.c | 1314 +++++++++++++++++++++++--------------------
 include/linux/filter.h      |    3 +
 net/core/filter.c           |    9 +-
 4 files changed, 748 insertions(+), 655 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S
index 01495755701b..6440221ced0d 100644
--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@@ -12,13 +12,16 @@
 
 /*
  * Calling convention :
- * rdi : skb pointer
+ * rbx : skb pointer (callee saved)
  * esi : offset of byte(s) to fetch in skb (can be scratched)
- * r8  : copy of skb->data
+ * r10 : copy of skb->data
  * r9d : hlen = skb->len - skb->data_len
  */
-#define SKBDATA	%r8
+#define SKBDATA	%r10
 #define SKF_MAX_NEG_OFF    $(-0x200000) /* SKF_LL_OFF from filter.h */
+#define MAX_BPF_STACK (512 /* from filter.h */ + \
+	32 /* space for rbx,r13,r14,r15 */ + \
+	8 /* space for skb_copy_bits */)
 
 sk_load_word:
 	.globl	sk_load_word
@@ -68,53 +71,31 @@ sk_load_byte_positive_offset:
 	movzbl	(SKBDATA,%rsi),%eax
 	ret
 
-/**
- * sk_load_byte_msh - BPF_S_LDX_B_MSH helper
- *
- * Implements BPF_S_LDX_B_MSH : ldxb  4*([offset]&0xf)
- * Must preserve A accumulator (%eax)
- * Inputs : %esi is the offset value
- */
-sk_load_byte_msh:
-	.globl	sk_load_byte_msh
-	test	%esi,%esi
-	js	bpf_slow_path_byte_msh_neg
-
-sk_load_byte_msh_positive_offset:
-	.globl	sk_load_byte_msh_positive_offset
-	cmp	%esi,%r9d      /* if (offset >= hlen) goto bpf_slow_path_byte_msh */
-	jle	bpf_slow_path_byte_msh
-	movzbl	(SKBDATA,%rsi),%ebx
-	and	$15,%bl
-	shl	$2,%bl
-	ret
-
 /* rsi contains offset and can be scratched */
 #define bpf_slow_path_common(LEN)		\
-	push	%rdi;    /* save skb */		\
+	mov	%rbx, %rdi; /* arg1 == skb */	\
 	push	%r9;				\
 	push	SKBDATA;			\
 /* rsi already has offset */			\
 	mov	$LEN,%ecx;	/* len */	\
-	lea	-12(%rbp),%rdx;			\
+	lea	- MAX_BPF_STACK + 32(%rbp),%rdx;			\
 	call	skb_copy_bits;			\
 	test    %eax,%eax;			\
 	pop	SKBDATA;			\
-	pop	%r9;				\
-	pop	%rdi
+	pop	%r9;
 
 
 bpf_slow_path_word:
 	bpf_slow_path_common(4)
 	js	bpf_error
-	mov	-12(%rbp),%eax
+	mov	- MAX_BPF_STACK + 32(%rbp),%eax
 	bswap	%eax
 	ret
 
 bpf_slow_path_half:
 	bpf_slow_path_common(2)
 	js	bpf_error
-	mov	-12(%rbp),%ax
+	mov	- MAX_BPF_STACK + 32(%rbp),%ax
 	rol	$8,%ax
 	movzwl	%ax,%eax
 	ret
@@ -122,21 +103,11 @@ bpf_slow_path_half:
 bpf_slow_path_byte:
 	bpf_slow_path_common(1)
 	js	bpf_error
-	movzbl	-12(%rbp),%eax
-	ret
-
-bpf_slow_path_byte_msh:
-	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
-	bpf_slow_path_common(1)
-	js	bpf_error
-	movzbl	-12(%rbp),%eax
-	and	$15,%al
-	shl	$2,%al
-	xchg	%eax,%ebx
+	movzbl	- MAX_BPF_STACK + 32(%rbp),%eax
 	ret
 
 #define sk_negative_common(SIZE)				\
-	push	%rdi;	/* save skb */				\
+	mov	%rbx, %rdi; /* arg1 == skb */			\
 	push	%r9;						\
 	push	SKBDATA;					\
 /* rsi already has offset */					\
@@ -145,10 +116,8 @@ bpf_slow_path_byte_msh:
 	test	%rax,%rax;					\
 	pop	SKBDATA;					\
 	pop	%r9;						\
-	pop	%rdi;						\
 	jz	bpf_error
 
-
 bpf_slow_path_word_neg:
 	cmp	SKF_MAX_NEG_OFF, %esi	/* test range */
 	jl	bpf_error	/* offset lower -> error  */
@@ -179,22 +148,12 @@ sk_load_byte_negative_offset:
 	movzbl	(%rax), %eax
 	ret
 
-bpf_slow_path_byte_msh_neg:
-	cmp	SKF_MAX_NEG_OFF, %esi
-	jl	bpf_error
-sk_load_byte_msh_negative_offset:
-	.globl	sk_load_byte_msh_negative_offset
-	xchg	%eax,%ebx /* dont lose A , X is about to be scratched */
-	sk_negative_common(1)
-	movzbl	(%rax),%eax
-	and	$15,%al
-	shl	$2,%al
-	xchg	%eax,%ebx
-	ret
-
 bpf_error:
 # force a return 0 from jit handler
-	xor		%eax,%eax
-	mov		-8(%rbp),%rbx
+	xor	%eax,%eax
+	mov	- MAX_BPF_STACK(%rbp),%rbx
+	mov	- MAX_BPF_STACK + 8(%rbp),%r13
+	mov	- MAX_BPF_STACK + 16(%rbp),%r14
+	mov	- MAX_BPF_STACK + 24(%rbp),%r15
 	leaveq
 	ret
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index c5fa7c9cb665..92aef8fdac2f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1,6 +1,7 @@
 /* bpf_jit_comp.c : BPF JIT compiler
  *
  * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com)
+ * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -14,28 +15,16 @@
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 
-/*
- * Conventions :
- *  EAX : BPF A accumulator
- *  EBX : BPF X accumulator
- *  RDI : pointer to skb   (first argument given to JIT function)
- *  RBP : frame pointer (even if CONFIG_FRAME_POINTER=n)
- *  ECX,EDX,ESI : scratch registers
- *  r9d : skb->len - skb->data_len (headlen)
- *  r8  : skb->data
- * -8(RBP) : saved RBX value
- * -16(RBP)..-80(RBP) : BPF_MEMWORDS values
- */
 int bpf_jit_enable __read_mostly;
 
 /*
  * assembly code in arch/x86/net/bpf_jit.S
  */
-extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[];
+extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
 extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
-extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[];
+extern u8 sk_load_byte_positive_offset[];
 extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
-extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[];
+extern u8 sk_load_byte_negative_offset[];
 
 static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 {
@@ -56,30 +45,44 @@ static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
 #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
 #define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
 #define EMIT4(b1, b2, b3, b4)   EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
-#define EMIT1_off32(b1, off)	do { EMIT1(b1); EMIT(off, 4);} while (0)
-
-#define CLEAR_A() EMIT2(0x31, 0xc0) /* xor %eax,%eax */
-#define CLEAR_X() EMIT2(0x31, 0xdb) /* xor %ebx,%ebx */
+#define EMIT1_off32(b1, off) \
+	do {EMIT1(b1); EMIT(off, 4); } while (0)
+#define EMIT2_off32(b1, b2, off) \
+	do {EMIT2(b1, b2); EMIT(off, 4); } while (0)
+#define EMIT3_off32(b1, b2, b3, off) \
+	do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
+#define EMIT4_off32(b1, b2, b3, b4, off) \
+	do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
 
 static inline bool is_imm8(int value)
 {
 	return value <= 127 && value >= -128;
 }
 
-static inline bool is_near(int offset)
+static inline bool is_simm32(s64 value)
 {
-	return offset <= 127 && offset >= -128;
+	return value == (s64) (s32) value;
 }
 
-#define EMIT_JMP(offset)						\
-do {									\
-	if (offset) {							\
-		if (is_near(offset))					\
-			EMIT2(0xeb, offset); /* jmp .+off8 */		\
-		else							\
-			EMIT1_off32(0xe9, offset); /* jmp .+off32 */	\
-	}								\
-} while (0)
+/* mov A, X */
+#define EMIT_mov(A, X) \
+	do {if (A != X) \
+		EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \
+	} while (0)
+
+static int bpf_size_to_x86_bytes(int bpf_size)
+{
+	if (bpf_size == BPF_W)
+		return 4;
+	else if (bpf_size == BPF_H)
+		return 2;
+	else if (bpf_size == BPF_B)
+		return 1;
+	else if (bpf_size == BPF_DW)
+		return 4; /* imm32 */
+	else
+		return 0;
+}
 
 /* list of x86 cond jumps opcodes (. + s8)
  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
@@ -90,27 +93,8 @@ do {									\
 #define X86_JNE 0x75
 #define X86_JBE 0x76
 #define X86_JA  0x77
-
-#define EMIT_COND_JMP(op, offset)				\
-do {								\
-	if (is_near(offset))					\
-		EMIT2(op, offset); /* jxx .+off8 */		\
-	else {							\
-		EMIT2(0x0f, op + 0x10);				\
-		EMIT(offset, 4); /* jxx .+off32 */		\
-	}							\
-} while (0)
-
-#define COND_SEL(CODE, TOP, FOP)	\
-	case CODE:			\
-		t_op = TOP;		\
-		f_op = FOP;		\
-		goto cond_branch
-
-
-#define SEEN_DATAREF 1 /* might call external helpers */
-#define SEEN_XREG    2 /* ebx is used */
-#define SEEN_MEM     4 /* use mem[] for temporary storage */
+#define X86_JGE 0x7D
+#define X86_JG  0x7F
 
 static inline void bpf_flush_icache(void *start, void *end)
 {
@@ -125,26 +109,6 @@ static inline void bpf_flush_icache(void *start, void *end)
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-/* Helper to find the offset of pkt_type in sk_buff
- * We want to make sure its still a 3bit field starting at a byte boundary.
- */
-#define PKT_TYPE_MAX 7
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 struct bpf_binary_header {
 	unsigned int	pages;
 	/* Note : for security reasons, bpf code will follow a randomly
@@ -178,546 +142,715 @@ static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
 	return header;
 }
 
+/* pick a register outside of BPF range for JIT internal work */
+#define AUX_REG (MAX_BPF_REG + 1)
+
+/* the following table maps BPF registers to x64 registers.
+ * x64 register r12 is unused, since if used as base address register
+ * in load/store instructions, it always needs an extra byte of encoding
+ */
+static const int reg2hex[] = {
+	[BPF_REG_0] = 0,  /* rax */
+	[BPF_REG_1] = 7,  /* rdi */
+	[BPF_REG_2] = 6,  /* rsi */
+	[BPF_REG_3] = 2,  /* rdx */
+	[BPF_REG_4] = 1,  /* rcx */
+	[BPF_REG_5] = 0,  /* r8 */
+	[BPF_REG_6] = 3,  /* rbx callee saved */
+	[BPF_REG_7] = 5,  /* r13 callee saved */
+	[BPF_REG_8] = 6,  /* r14 callee saved */
+	[BPF_REG_9] = 7,  /* r15 callee saved */
+	[BPF_REG_FP] = 5, /* rbp readonly */
+	[AUX_REG] = 3,    /* r11 temp register */
+};
+
+/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15
+ * which need extra byte of encoding.
+ * rax,rcx,...,rbp have simpler encoding
+ */
+static inline bool is_ereg(u32 reg)
+{
+	if (reg == BPF_REG_5 || reg == AUX_REG ||
+	    (reg >= BPF_REG_7 && reg <= BPF_REG_9))
+		return true;
+	else
+		return false;
+}
+
+/* add modifiers if 'reg' maps to x64 registers r8..r15 */
+static inline u8 add_1mod(u8 byte, u32 reg)
+{
+	if (is_ereg(reg))
+		byte |= 1;
+	return byte;
+}
+
+static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
+{
+	if (is_ereg(r1))
+		byte |= 1;
+	if (is_ereg(r2))
+		byte |= 4;
+	return byte;
+}
+
+/* encode dest register 'a_reg' into x64 opcode 'byte' */
+static inline u8 add_1reg(u8 byte, u32 a_reg)
+{
+	return byte + reg2hex[a_reg];
+}
+
+/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */
+static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg)
+{
+	return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3);
+}
+
 struct jit_context {
 	unsigned int cleanup_addr; /* epilogue code offset */
-	int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
-	u8 seen;
+	bool seen_ld_abs;
 };
 
 static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		  int oldproglen, struct jit_context *ctx)
 {
-	const struct sock_filter *filter = bpf_prog->insns;
-	int flen = bpf_prog->len;
+	struct sock_filter_int *insn = bpf_prog->insnsi;
+	int insn_cnt = bpf_prog->len;
 	u8 temp[64];
-	u8 *prog;
-	int ilen, i, proglen;
-	int t_offset, f_offset;
-	u8 t_op, f_op, seen = 0;
-	u8 *func;
-	unsigned int cleanup_addr = ctx->cleanup_addr;
-	u8 seen_or_pass0 = ctx->seen;
-
-		/* no prologue/epilogue for trivial filters (RET something) */
-		proglen = 0;
-		prog = temp;
+	int i;
+	int proglen = 0;
+	u8 *prog = temp;
+	int stacksize = MAX_BPF_STACK +
+		32 /* space for rbx, r13, r14, r15 */ +
+		8 /* space for skb_copy_bits() buffer */;
 
-		if (seen_or_pass0) {
-			EMIT4(0x55, 0x48, 0x89, 0xe5); /* push %rbp; mov %rsp,%rbp */
-			EMIT4(0x48, 0x83, 0xec, 96);	/* subq  $96,%rsp	*/
-			/* note : must save %rbx in case bpf_error is hit */
-			if (seen_or_pass0 & (SEEN_XREG | SEEN_DATAREF))
-				EMIT4(0x48, 0x89, 0x5d, 0xf8); /* mov %rbx, -8(%rbp) */
-			if (seen_or_pass0 & SEEN_XREG)
-				CLEAR_X(); /* make sure we dont leek kernel memory */
-
-			/*
-			 * If this filter needs to access skb data,
-			 * loads r9 and r8 with :
-			 *  r9 = skb->len - skb->data_len
-			 *  r8 = skb->data
-			 */
-			if (seen_or_pass0 & SEEN_DATAREF) {
-				if (offsetof(struct sk_buff, len) <= 127)
-					/* mov    off8(%rdi),%r9d */
-					EMIT4(0x44, 0x8b, 0x4f, offsetof(struct sk_buff, len));
-				else {
-					/* mov    off32(%rdi),%r9d */
-					EMIT3(0x44, 0x8b, 0x8f);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				if (is_imm8(offsetof(struct sk_buff, data_len)))
-					/* sub    off8(%rdi),%r9d */
-					EMIT4(0x44, 0x2b, 0x4f, offsetof(struct sk_buff, data_len));
-				else {
-					EMIT3(0x44, 0x2b, 0x8f);
-					EMIT(offsetof(struct sk_buff, data_len), 4);
-				}
+	EMIT1(0x55); /* push rbp */
+	EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */
 
-				if (is_imm8(offsetof(struct sk_buff, data)))
-					/* mov off8(%rdi),%r8 */
-					EMIT4(0x4c, 0x8b, 0x47, offsetof(struct sk_buff, data));
-				else {
-					/* mov off32(%rdi),%r8 */
-					EMIT3(0x4c, 0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, data), 4);
-				}
+	/* sub rsp, stacksize */
+	EMIT3_off32(0x48, 0x81, 0xEC, stacksize);
+
+	/* all classic BPF filters use R6(rbx) save it */
+
+	/* mov qword ptr [rbp-X],rbx */
+	EMIT3_off32(0x48, 0x89, 0x9D, -stacksize);
+
+	/* sk_convert_filter() maps classic BPF register X to R7 and uses R8
+	 * as temporary, so all tcpdump filters need to spill/fill R7(r13) and
+	 * R8(r14). R9(r15) spill could be made conditional, but there is only
+	 * one 'bpf_error' return path out of helper functions inside bpf_jit.S
+	 * The overhead of extra spill is negligible for any filter other
+	 * than synthetic ones. Therefore not worth adding complexity.
+	 */
+
+	/* mov qword ptr [rbp-X],r13 */
+	EMIT3_off32(0x4C, 0x89, 0xAD, -stacksize + 8);
+	/* mov qword ptr [rbp-X],r14 */
+	EMIT3_off32(0x4C, 0x89, 0xB5, -stacksize + 16);
+	/* mov qword ptr [rbp-X],r15 */
+	EMIT3_off32(0x4C, 0x89, 0xBD, -stacksize + 24);
+
+	/* clear A and X registers */
+	EMIT2(0x31, 0xc0); /* xor eax, eax */
+	EMIT3(0x4D, 0x31, 0xED); /* xor r13, r13 */
+
+	if (ctx->seen_ld_abs) {
+		/* r9d : skb->len - skb->data_len (headlen)
+		 * r10 : skb->data
+		 */
+		if (is_imm8(offsetof(struct sk_buff, len)))
+			/* mov %r9d, off8(%rdi) */
+			EMIT4(0x44, 0x8b, 0x4f,
+			      offsetof(struct sk_buff, len));
+		else
+			/* mov %r9d, off32(%rdi) */
+			EMIT3_off32(0x44, 0x8b, 0x8f,
+				    offsetof(struct sk_buff, len));
+
+		if (is_imm8(offsetof(struct sk_buff, data_len)))
+			/* sub %r9d, off8(%rdi) */
+			EMIT4(0x44, 0x2b, 0x4f,
+			      offsetof(struct sk_buff, data_len));
+		else
+			EMIT3_off32(0x44, 0x2b, 0x8f,
+				    offsetof(struct sk_buff, data_len));
+
+		if (is_imm8(offsetof(struct sk_buff, data)))
+			/* mov %r10, off8(%rdi) */
+			EMIT4(0x4c, 0x8b, 0x57,
+			      offsetof(struct sk_buff, data));
+		else
+			/* mov %r10, off32(%rdi) */
+			EMIT3_off32(0x4c, 0x8b, 0x97,
+				    offsetof(struct sk_buff, data));
+	}
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		const s32 K = insn->imm;
+		u32 a_reg = insn->a_reg;
+		u32 x_reg = insn->x_reg;
+		u8 b1 = 0, b2 = 0, b3 = 0;
+		s64 jmp_offset;
+		u8 jmp_cond;
+		int ilen;
+		u8 *func;
+
+		switch (insn->code) {
+			/* ALU */
+		case BPF_ALU | BPF_ADD | BPF_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
+		case BPF_ALU | BPF_AND | BPF_X:
+		case BPF_ALU | BPF_OR | BPF_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
+		case BPF_ALU64 | BPF_ADD | BPF_X:
+		case BPF_ALU64 | BPF_SUB | BPF_X:
+		case BPF_ALU64 | BPF_AND | BPF_X:
+		case BPF_ALU64 | BPF_OR | BPF_X:
+		case BPF_ALU64 | BPF_XOR | BPF_X:
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD: b2 = 0x01; break;
+			case BPF_SUB: b2 = 0x29; break;
+			case BPF_AND: b2 = 0x21; break;
+			case BPF_OR: b2 = 0x09; break;
+			case BPF_XOR: b2 = 0x31; break;
 			}
-		}
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, a_reg, x_reg));
+			else if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT1(add_2mod(0x40, a_reg, x_reg));
+			EMIT2(b2, add_2reg(0xC0, a_reg, x_reg));
+			break;
 
-		switch (filter[0].code) {
-		case BPF_S_RET_K:
-		case BPF_S_LD_W_LEN:
-		case BPF_S_ANC_PROTOCOL:
-		case BPF_S_ANC_IFINDEX:
-		case BPF_S_ANC_MARK:
-		case BPF_S_ANC_RXHASH:
-		case BPF_S_ANC_CPU:
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
-		case BPF_S_ANC_QUEUE:
-		case BPF_S_ANC_PKTTYPE:
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
-			/* first instruction sets A register (or is RET 'constant') */
+			/* mov A, X */
+		case BPF_ALU64 | BPF_MOV | BPF_X:
+			EMIT_mov(a_reg, x_reg);
 			break;
-		default:
-			/* make sure we dont leak kernel information to user */
-			CLEAR_A(); /* A = 0 */
-		}
 
-		for (i = 0; i < flen; i++) {
-			unsigned int K = filter[i].k;
+			/* mov32 A, X */
+		case BPF_ALU | BPF_MOV | BPF_X:
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT1(add_2mod(0x40, a_reg, x_reg));
+			EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg));
+			break;
 
-			switch (filter[i].code) {
-			case BPF_S_ALU_ADD_X: /* A += X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x01, 0xd8);		/* add %ebx,%eax */
-				break;
-			case BPF_S_ALU_ADD_K: /* A += K; */
-				if (!K)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xc0, K);	/* add imm8,%eax */
-				else
-					EMIT1_off32(0x05, K);	/* add imm32,%eax */
-				break;
-			case BPF_S_ALU_SUB_X: /* A -= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x29, 0xd8);		/* sub    %ebx,%eax */
-				break;
-			case BPF_S_ALU_SUB_K: /* A -= K */
-				if (!K)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xe8, K); /* sub imm8,%eax */
-				else
-					EMIT1_off32(0x2d, K); /* sub imm32,%eax */
-				break;
-			case BPF_S_ALU_MUL_X: /* A *= X; */
-				seen |= SEEN_XREG;
-				EMIT3(0x0f, 0xaf, 0xc3);	/* imul %ebx,%eax */
-				break;
-			case BPF_S_ALU_MUL_K: /* A *= K */
-				if (is_imm8(K))
-					EMIT3(0x6b, 0xc0, K); /* imul imm8,%eax,%eax */
-				else {
-					EMIT2(0x69, 0xc0);		/* imul imm32,%eax */
-					EMIT(K, 4);
-				}
-				break;
-			case BPF_S_ALU_DIV_X: /* A /= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (ctx->pc_ret0 > 0) {
-					/* addrs[pc_ret0 - 1] is start address of target
-					 * (addrs[i] - 4) is the address following this jmp
-					 * ("xor %edx,%edx; div %ebx" being 4 bytes long)
-					 */
-					EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
-								(addrs[i] - 4));
-				} else {
-					EMIT_COND_JMP(X86_JNE, 2 + 5);
-					CLEAR_A();
-					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 4)); /* jmp .+off32 */
-				}
-				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
-				break;
-			case BPF_S_ALU_MOD_X: /* A %= X; */
-				seen |= SEEN_XREG;
-				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
-				if (ctx->pc_ret0 > 0) {
-					/* addrs[pc_ret0 - 1] is start address of target
-					 * (addrs[i] - 6) is the address following this jmp
-					 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
-					 */
-					EMIT_COND_JMP(X86_JE, addrs[ctx->pc_ret0 - 1] -
-								(addrs[i] - 6));
-				} else {
-					EMIT_COND_JMP(X86_JNE, 2 + 5);
-					CLEAR_A();
-					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
-				}
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT2(0xf7, 0xf3);	/* div %ebx */
-				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
-				break;
-			case BPF_S_ALU_MOD_K: /* A %= K; */
-				if (K == 1) {
-					CLEAR_A();
-					break;
-				}
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
-				EMIT2(0xf7, 0xf1);	/* div %ecx */
-				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
-				break;
-			case BPF_S_ALU_DIV_K: /* A /= K */
-				if (K == 1)
-					break;
-				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
-				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
-				EMIT2(0xf7, 0xf1);	/* div %ecx */
-				break;
-			case BPF_S_ALU_AND_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x21, 0xd8);		/* and %ebx,%eax */
-				break;
-			case BPF_S_ALU_AND_K:
-				if (K >= 0xFFFFFF00) {
-					EMIT2(0x24, K & 0xFF); /* and imm8,%al */
-				} else if (K >= 0xFFFF0000) {
-					EMIT2(0x66, 0x25);	/* and imm16,%ax */
-					EMIT(K, 2);
-				} else {
-					EMIT1_off32(0x25, K);	/* and imm32,%eax */
-				}
-				break;
-			case BPF_S_ALU_OR_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x09, 0xd8);		/* or %ebx,%eax */
-				break;
-			case BPF_S_ALU_OR_K:
-				if (is_imm8(K))
-					EMIT3(0x83, 0xc8, K); /* or imm8,%eax */
-				else
-					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
-				break;
-			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-			case BPF_S_ALU_XOR_X:
-				seen |= SEEN_XREG;
-				EMIT2(0x31, 0xd8);		/* xor %ebx,%eax */
-				break;
-			case BPF_S_ALU_XOR_K: /* A ^= K; */
-				if (K == 0)
-					break;
-				if (is_imm8(K))
-					EMIT3(0x83, 0xf0, K);	/* xor imm8,%eax */
-				else
-					EMIT1_off32(0x35, K);	/* xor imm32,%eax */
-				break;
-			case BPF_S_ALU_LSH_X: /* A <<= X; */
-				seen |= SEEN_XREG;
-				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
-				break;
-			case BPF_S_ALU_LSH_K:
-				if (K == 0)
-					break;
-				else if (K == 1)
-					EMIT2(0xd1, 0xe0); /* shl %eax */
-				else
-					EMIT3(0xc1, 0xe0, K);
-				break;
-			case BPF_S_ALU_RSH_X: /* A >>= X; */
-				seen |= SEEN_XREG;
-				EMIT4(0x89, 0xd9, 0xd3, 0xe8);	/* mov %ebx,%ecx; shr %cl,%eax */
-				break;
-			case BPF_S_ALU_RSH_K: /* A >>= K; */
-				if (K == 0)
-					break;
-				else if (K == 1)
-					EMIT2(0xd1, 0xe8); /* shr %eax */
-				else
-					EMIT3(0xc1, 0xe8, K);
-				break;
-			case BPF_S_ALU_NEG:
-				EMIT2(0xf7, 0xd8);		/* neg %eax */
-				break;
-			case BPF_S_RET_K:
-				if (!K) {
-					if (ctx->pc_ret0 == -1)
-						ctx->pc_ret0 = i;
-					CLEAR_A();
-				} else {
-					EMIT1_off32(0xb8, K);	/* mov $imm32,%eax */
-				}
-				/* fallinto */
-			case BPF_S_RET_A:
-				if (seen_or_pass0) {
-					if (i != flen - 1) {
-						EMIT_JMP(cleanup_addr - addrs[i]);
-						break;
-					}
-					if (seen_or_pass0 & SEEN_XREG)
-						EMIT4(0x48, 0x8b, 0x5d, 0xf8);  /* mov  -8(%rbp),%rbx */
-					EMIT1(0xc9);		/* leaveq */
-				}
-				EMIT1(0xc3);		/* ret */
-				break;
-			case BPF_S_MISC_TAX: /* X = A */
-				seen |= SEEN_XREG;
-				EMIT2(0x89, 0xc3);	/* mov    %eax,%ebx */
-				break;
-			case BPF_S_MISC_TXA: /* A = X */
-				seen |= SEEN_XREG;
-				EMIT2(0x89, 0xd8);	/* mov    %ebx,%eax */
-				break;
-			case BPF_S_LD_IMM: /* A = K */
-				if (!K)
-					CLEAR_A();
-				else
-					EMIT1_off32(0xb8, K); /* mov $imm32,%eax */
+			/* neg A */
+		case BPF_ALU | BPF_NEG:
+		case BPF_ALU64 | BPF_NEG:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, a_reg));
+			else if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+			EMIT2(0xF7, add_1reg(0xD8, a_reg));
+			break;
+
+		case BPF_ALU | BPF_ADD | BPF_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
+		case BPF_ALU | BPF_AND | BPF_K:
+		case BPF_ALU | BPF_OR | BPF_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
+		case BPF_ALU64 | BPF_ADD | BPF_K:
+		case BPF_ALU64 | BPF_SUB | BPF_K:
+		case BPF_ALU64 | BPF_AND | BPF_K:
+		case BPF_ALU64 | BPF_OR | BPF_K:
+		case BPF_ALU64 | BPF_XOR | BPF_K:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, a_reg));
+			else if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+
+			switch (BPF_OP(insn->code)) {
+			case BPF_ADD: b3 = 0xC0; break;
+			case BPF_SUB: b3 = 0xE8; break;
+			case BPF_AND: b3 = 0xE0; break;
+			case BPF_OR: b3 = 0xC8; break;
+			case BPF_XOR: b3 = 0xF0; break;
+			}
+
+			if (is_imm8(K))
+				EMIT3(0x83, add_1reg(b3, a_reg), K);
+			else
+				EMIT2_off32(0x81, add_1reg(b3, a_reg), K);
+			break;
+
+		case BPF_ALU64 | BPF_MOV | BPF_K:
+			/* optimization: if imm32 is positive,
+			 * use 'mov eax, imm32' (which zero-extends imm32)
+			 * to save 2 bytes
+			 */
+			if (K < 0) {
+				/* 'mov rax, imm32' sign extends imm32 */
+				b1 = add_1mod(0x48, a_reg);
+				b2 = 0xC7;
+				b3 = 0xC0;
+				EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K);
 				break;
-			case BPF_S_LDX_IMM: /* X = K */
-				seen |= SEEN_XREG;
-				if (!K)
-					CLEAR_X();
+			}
+
+		case BPF_ALU | BPF_MOV | BPF_K:
+			/* mov %eax, imm32 */
+			if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+			EMIT1_off32(add_1reg(0xB8, a_reg), K);
+			break;
+
+			/* A %= X, A /= X, A %= K, A /= K */
+		case BPF_ALU | BPF_MOD | BPF_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
+		case BPF_ALU | BPF_MOD | BPF_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU64 | BPF_MOD | BPF_X:
+		case BPF_ALU64 | BPF_DIV | BPF_X:
+		case BPF_ALU64 | BPF_MOD | BPF_K:
+		case BPF_ALU64 | BPF_DIV | BPF_K:
+			EMIT1(0x50); /* push rax */
+			EMIT1(0x52); /* push rdx */
+
+			if (BPF_SRC(insn->code) == BPF_X)
+				/* mov r11, X */
+				EMIT_mov(AUX_REG, x_reg);
+			else
+				/* mov r11, K */
+				EMIT3_off32(0x49, 0xC7, 0xC3, K);
+
+			/* mov rax, A */
+			EMIT_mov(BPF_REG_0, a_reg);
+
+			/* xor edx, edx
+			 * equivalent to 'xor rdx, rdx', but one byte less
+			 */
+			EMIT2(0x31, 0xd2);
+
+			if (BPF_SRC(insn->code) == BPF_X) {
+				/* if (X == 0) return 0 */
+
+				/* cmp r11, 0 */
+				EMIT4(0x49, 0x83, 0xFB, 0x00);
+
+				/* jne .+9 (skip over pop, pop, xor and jmp) */
+				EMIT2(X86_JNE, 1 + 1 + 2 + 5);
+				EMIT1(0x5A); /* pop rdx */
+				EMIT1(0x58); /* pop rax */
+				EMIT2(0x31, 0xc0); /* xor eax, eax */
+
+				/* jmp cleanup_addr
+				 * addrs[i] - 11, because there are 11 bytes
+				 * after this insn: div, mov, pop, pop, mov
+				 */
+				jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
+				EMIT1_off32(0xE9, jmp_offset);
+			}
+
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				/* div r11 */
+				EMIT3(0x49, 0xF7, 0xF3);
+			else
+				/* div r11d */
+				EMIT3(0x41, 0xF7, 0xF3);
+
+			if (BPF_OP(insn->code) == BPF_MOD)
+				/* mov r11, rdx */
+				EMIT3(0x49, 0x89, 0xD3);
+			else
+				/* mov r11, rax */
+				EMIT3(0x49, 0x89, 0xC3);
+
+			EMIT1(0x5A); /* pop rdx */
+			EMIT1(0x58); /* pop rax */
+
+			/* mov A, r11 */
+			EMIT_mov(a_reg, AUX_REG);
+			break;
+
+		case BPF_ALU | BPF_MUL | BPF_K:
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU64 | BPF_MUL | BPF_K:
+		case BPF_ALU64 | BPF_MUL | BPF_X:
+			EMIT1(0x50); /* push rax */
+			EMIT1(0x52); /* push rdx */
+
+			/* mov r11, A */
+			EMIT_mov(AUX_REG, a_reg);
+
+			if (BPF_SRC(insn->code) == BPF_X)
+				/* mov rax, X */
+				EMIT_mov(BPF_REG_0, x_reg);
+			else
+				/* mov rax, K */
+				EMIT3_off32(0x48, 0xC7, 0xC0, K);
+
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, AUX_REG));
+			else if (is_ereg(AUX_REG))
+				EMIT1(add_1mod(0x40, AUX_REG));
+			/* mul(q) r11 */
+			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
+
+			/* mov r11, rax */
+			EMIT_mov(AUX_REG, BPF_REG_0);
+
+			EMIT1(0x5A); /* pop rdx */
+			EMIT1(0x58); /* pop rax */
+
+			/* mov A, r11 */
+			EMIT_mov(a_reg, AUX_REG);
+			break;
+
+			/* shifts */
+		case BPF_ALU | BPF_LSH | BPF_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
+		case BPF_ALU | BPF_ARSH | BPF_K:
+		case BPF_ALU64 | BPF_LSH | BPF_K:
+		case BPF_ALU64 | BPF_RSH | BPF_K:
+		case BPF_ALU64 | BPF_ARSH | BPF_K:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_1mod(0x48, a_reg));
+			else if (is_ereg(a_reg))
+				EMIT1(add_1mod(0x40, a_reg));
+
+			switch (BPF_OP(insn->code)) {
+			case BPF_LSH: b3 = 0xE0; break;
+			case BPF_RSH: b3 = 0xE8; break;
+			case BPF_ARSH: b3 = 0xF8; break;
+			}
+			EMIT3(0xC1, add_1reg(b3, a_reg), K);
+			break;
+
+		case BPF_ALU | BPF_END | BPF_FROM_BE:
+			switch (K) {
+			case 16:
+				/* emit 'ror %ax, 8' to swap lower 2 bytes */
+				EMIT1(0x66);
+				if (is_ereg(a_reg))
+					EMIT1(0x41);
+				EMIT3(0xC1, add_1reg(0xC8, a_reg), 8);
+				break;
+			case 32:
+				/* emit 'bswap eax' to swap lower 4 bytes */
+				if (is_ereg(a_reg))
+					EMIT2(0x41, 0x0F);
 				else
-					EMIT1_off32(0xbb, K); /* mov $imm32,%ebx */
-				break;
-			case BPF_S_LD_MEM: /* A = mem[K] : mov off8(%rbp),%eax */
-				seen |= SEEN_MEM;
-				EMIT3(0x8b, 0x45, 0xf0 - K*4);
+					EMIT1(0x0F);
+				EMIT1(add_1reg(0xC8, a_reg));
 				break;
-			case BPF_S_LDX_MEM: /* X = mem[K] : mov off8(%rbp),%ebx */
-				seen |= SEEN_XREG | SEEN_MEM;
-				EMIT3(0x8b, 0x5d, 0xf0 - K*4);
-				break;
-			case BPF_S_ST: /* mem[K] = A : mov %eax,off8(%rbp) */
-				seen |= SEEN_MEM;
-				EMIT3(0x89, 0x45, 0xf0 - K*4);
-				break;
-			case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
-				seen |= SEEN_XREG | SEEN_MEM;
-				EMIT3(0x89, 0x5d, 0xf0 - K*4);
-				break;
-			case BPF_S_LD_W_LEN: /*	A = skb->len; */
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
-				if (is_imm8(offsetof(struct sk_buff, len)))
-					/* mov    off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, len));
-				else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				break;
-			case BPF_S_LDX_W_LEN: /* X = skb->len; */
-				seen |= SEEN_XREG;
-				if (is_imm8(offsetof(struct sk_buff, len)))
-					/* mov off8(%rdi),%ebx */
-					EMIT3(0x8b, 0x5f, offsetof(struct sk_buff, len));
-				else {
-					EMIT2(0x8b, 0x9f);
-					EMIT(offsetof(struct sk_buff, len), 4);
-				}
-				break;
-			case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-				if (is_imm8(offsetof(struct sk_buff, protocol))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, protocol));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, protocol), 4);
-				}
-				EMIT2(0x86, 0xc4); /* ntohs() : xchg   %al,%ah */
-				break;
-			case BPF_S_ANC_IFINDEX:
-				if (is_imm8(offsetof(struct sk_buff, dev))) {
-					/* movq off8(%rdi),%rax */
-					EMIT4(0x48, 0x8b, 0x47, offsetof(struct sk_buff, dev));
-				} else {
-					EMIT3(0x48, 0x8b, 0x87); /* movq off32(%rdi),%rax */
-					EMIT(offsetof(struct sk_buff, dev), 4);
-				}
-				EMIT3(0x48, 0x85, 0xc0);	/* test %rax,%rax */
-				EMIT_COND_JMP(X86_JE, cleanup_addr - (addrs[i] - 6));
-				BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
-				EMIT2(0x8b, 0x80);	/* mov off32(%rax),%eax */
-				EMIT(offsetof(struct net_device, ifindex), 4);
-				break;
-			case BPF_S_ANC_MARK:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-				if (is_imm8(offsetof(struct sk_buff, mark))) {
-					/* mov off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, mark));
-				} else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, mark), 4);
-				}
-				break;
-			case BPF_S_ANC_RXHASH:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
-				if (is_imm8(offsetof(struct sk_buff, hash))) {
-					/* mov off8(%rdi),%eax */
-					EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash));
-				} else {
-					EMIT2(0x8b, 0x87);
-					EMIT(offsetof(struct sk_buff, hash), 4);
-				}
-				break;
-			case BPF_S_ANC_QUEUE:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-				if (is_imm8(offsetof(struct sk_buff, queue_mapping))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, queue_mapping));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, queue_mapping), 4);
-				}
-				break;
-			case BPF_S_ANC_CPU:
-#ifdef CONFIG_SMP
-				EMIT4(0x65, 0x8b, 0x04, 0x25); /* mov %gs:off32,%eax */
-				EMIT((u32)(unsigned long)&cpu_number, 4); /* A = smp_processor_id(); */
-#else
-				CLEAR_A();
-#endif
-				break;
-			case BPF_S_ANC_VLAN_TAG:
-			case BPF_S_ANC_VLAN_TAG_PRESENT:
-				BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-				if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
-					/* movzwl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
-				} else {
-					EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
-					EMIT(offsetof(struct sk_buff, vlan_tci), 4);
-				}
-				BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
-				if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
-					EMIT3(0x80, 0xe4, 0xef); /* and    $0xef,%ah */
-				} else {
-					EMIT3(0xc1, 0xe8, 0x0c); /* shr    $0xc,%eax */
-					EMIT3(0x83, 0xe0, 0x01); /* and    $0x1,%eax */
-				}
-				break;
-			case BPF_S_ANC_PKTTYPE:
-			{
-				int off = pkt_type_offset();
-
-				if (off < 0)
-					return -EINVAL;
-				if (is_imm8(off)) {
-					/* movzbl off8(%rdi),%eax */
-					EMIT4(0x0f, 0xb6, 0x47, off);
-				} else {
-					/* movbl off32(%rdi),%eax */
-					EMIT3(0x0f, 0xb6, 0x87);
-					EMIT(off, 4);
-				}
-				EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and    $0x7,%eax */
+			case 64:
+				/* emit 'bswap rax' to swap 8 bytes */
+				EMIT3(add_1mod(0x48, a_reg), 0x0F,
+				      add_1reg(0xC8, a_reg));
 				break;
 			}
-			case BPF_S_LD_W_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_word);
-common_load:			seen |= SEEN_DATAREF;
-				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K); /* mov imm32,%esi */
-				EMIT1_off32(0xe8, t_offset); /* call */
-				break;
-			case BPF_S_LD_H_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_half);
-				goto common_load;
-			case BPF_S_LD_B_ABS:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
-				goto common_load;
-			case BPF_S_LDX_B_MSH:
-				func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
-				seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = func - (image + addrs[i]);
-				EMIT1_off32(0xbe, K);	/* mov imm32,%esi */
-				EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */
-				break;
-			case BPF_S_LD_W_IND:
-				func = sk_load_word;
-common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
-				t_offset = func - (image + addrs[i]);
-				if (K) {
-					if (is_imm8(K)) {
-						EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */
-					} else {
-						EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */
-						EMIT(K, 4);
-					}
-				} else {
-					EMIT2(0x89,0xde); /* mov %ebx,%esi */
-				}
-				EMIT1_off32(0xe8, t_offset);	/* call sk_load_xxx_ind */
-				break;
-			case BPF_S_LD_H_IND:
-				func = sk_load_half;
-				goto common_load_ind;
-			case BPF_S_LD_B_IND:
-				func = sk_load_byte;
-				goto common_load_ind;
-			case BPF_S_JMP_JA:
-				t_offset = addrs[i + K] - addrs[i];
-				EMIT_JMP(t_offset);
-				break;
-			COND_SEL(BPF_S_JMP_JGT_K, X86_JA, X86_JBE);
-			COND_SEL(BPF_S_JMP_JGE_K, X86_JAE, X86_JB);
-			COND_SEL(BPF_S_JMP_JEQ_K, X86_JE, X86_JNE);
-			COND_SEL(BPF_S_JMP_JSET_K,X86_JNE, X86_JE);
-			COND_SEL(BPF_S_JMP_JGT_X, X86_JA, X86_JBE);
-			COND_SEL(BPF_S_JMP_JGE_X, X86_JAE, X86_JB);
-			COND_SEL(BPF_S_JMP_JEQ_X, X86_JE, X86_JNE);
-			COND_SEL(BPF_S_JMP_JSET_X,X86_JNE, X86_JE);
-
-cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
-				t_offset = addrs[i + filter[i].jt] - addrs[i];
-
-				/* same targets, can avoid doing the test :) */
-				if (filter[i].jt == filter[i].jf) {
-					EMIT_JMP(t_offset);
-					break;
-				}
+			break;
+
+		case BPF_ALU | BPF_END | BPF_FROM_LE:
+			break;
+
+			/* ST: *(u8*)(a_reg + off) = imm */
+		case BPF_ST | BPF_MEM | BPF_B:
+			if (is_ereg(a_reg))
+				EMIT2(0x41, 0xC6);
+			else
+				EMIT1(0xC6);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_H:
+			if (is_ereg(a_reg))
+				EMIT3(0x66, 0x41, 0xC7);
+			else
+				EMIT2(0x66, 0xC7);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_W:
+			if (is_ereg(a_reg))
+				EMIT2(0x41, 0xC7);
+			else
+				EMIT1(0xC7);
+			goto st;
+		case BPF_ST | BPF_MEM | BPF_DW:
+			EMIT2(add_1mod(0x48, a_reg), 0xC7);
+
+st:			if (is_imm8(insn->off))
+				EMIT2(add_1reg(0x40, a_reg), insn->off);
+			else
+				EMIT1_off32(add_1reg(0x80, a_reg), insn->off);
+
+			EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
+			break;
+
+			/* STX: *(u8*)(a_reg + off) = x_reg */
+		case BPF_STX | BPF_MEM | BPF_B:
+			/* emit 'mov byte ptr [rax + off], al' */
+			if (is_ereg(a_reg) || is_ereg(x_reg) ||
+			    /* have to add extra byte for x86 SIL, DIL regs */
+			    x_reg == BPF_REG_1 || x_reg == BPF_REG_2)
+				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88);
+			else
+				EMIT1(0x88);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_H:
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89);
+			else
+				EMIT2(0x66, 0x89);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_W:
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89);
+			else
+				EMIT1(0x89);
+			goto stx;
+		case BPF_STX | BPF_MEM | BPF_DW:
+			EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89);
+stx:			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+					    insn->off);
+			break;
+
+			/* LDX: a_reg = *(u8*)(x_reg + off) */
+		case BPF_LDX | BPF_MEM | BPF_B:
+			/* emit 'movzx rax, byte ptr [rax + off]' */
+			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_H:
+			/* emit 'movzx rax, word ptr [rax + off]' */
+			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_W:
+			/* emit 'mov eax, dword ptr [rax+0x14]' */
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B);
+			else
+				EMIT1(0x8B);
+			goto ldx;
+		case BPF_LDX | BPF_MEM | BPF_DW:
+			/* emit 'mov rax, qword ptr [rax+0x14]' */
+			EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B);
+ldx:			/* if insn->off == 0 we can save one extra byte, but
+			 * special case of x86 r13 which always needs an offset
+			 * is not worth the hassle
+			 */
+			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, x_reg, a_reg),
+					    insn->off);
+			break;
+
+			/* STX XADD: lock *(u32*)(a_reg + off) += x_reg */
+		case BPF_STX | BPF_XADD | BPF_W:
+			/* emit 'lock add dword ptr [rax + off], eax' */
+			if (is_ereg(a_reg) || is_ereg(x_reg))
+				EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01);
+			else
+				EMIT2(0xF0, 0x01);
+			goto xadd;
+		case BPF_STX | BPF_XADD | BPF_DW:
+			EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01);
+xadd:			if (is_imm8(insn->off))
+				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+			else
+				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+					    insn->off);
+			break;
+
+			/* call */
+		case BPF_JMP | BPF_CALL:
+			func = (u8 *) __bpf_call_base + K;
+			jmp_offset = func - (image + addrs[i]);
+			if (ctx->seen_ld_abs) {
+				EMIT2(0x41, 0x52); /* push %r10 */
+				EMIT2(0x41, 0x51); /* push %r9 */
+				/* need to adjust jmp offset, since
+				 * pop %r9, pop %r10 take 4 bytes after call insn
+				 */
+				jmp_offset += 4;
+			}
+			if (!K || !is_simm32(jmp_offset)) {
+				pr_err("unsupported bpf func %d addr %p image %p\n",
+				       K, func, image);
+				return -EINVAL;
+			}
+			EMIT1_off32(0xE8, jmp_offset);
+			if (ctx->seen_ld_abs) {
+				EMIT2(0x41, 0x59); /* pop %r9 */
+				EMIT2(0x41, 0x5A); /* pop %r10 */
+			}
+			break;
+
+			/* cond jump */
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JNE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JSGT | BPF_X:
+		case BPF_JMP | BPF_JSGE | BPF_X:
+			/* cmp a_reg, x_reg */
+			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39,
+			      add_2reg(0xC0, a_reg, x_reg));
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* test a_reg, x_reg */
+			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85,
+			      add_2reg(0xC0, a_reg, x_reg));
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JSET | BPF_K:
+			/* test a_reg, imm32 */
+			EMIT1(add_1mod(0x48, a_reg));
+			EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K);
+			goto emit_cond_jmp;
+
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JNE | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JSGT | BPF_K:
+		case BPF_JMP | BPF_JSGE | BPF_K:
+			/* cmp a_reg, imm8/32 */
+			EMIT1(add_1mod(0x48, a_reg));
+
+			if (is_imm8(K))
+				EMIT3(0x83, add_1reg(0xF8, a_reg), K);
+			else
+				EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K);
+
+emit_cond_jmp:		/* convert BPF opcode to x86 */
+			switch (BPF_OP(insn->code)) {
+			case BPF_JEQ:
+				jmp_cond = X86_JE;
+				break;
+			case BPF_JSET:
+			case BPF_JNE:
+				jmp_cond = X86_JNE;
+				break;
+			case BPF_JGT:
+				/* GT is unsigned '>', JA in x86 */
+				jmp_cond = X86_JA;
+				break;
+			case BPF_JGE:
+				/* GE is unsigned '>=', JAE in x86 */
+				jmp_cond = X86_JAE;
+				break;
+			case BPF_JSGT:
+				/* signed '>', GT in x86 */
+				jmp_cond = X86_JG;
+				break;
+			case BPF_JSGE:
+				/* signed '>=', GE in x86 */
+				jmp_cond = X86_JGE;
+				break;
+			default: /* to silence gcc warning */
+				return -EFAULT;
+			}
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (is_imm8(jmp_offset)) {
+				EMIT2(jmp_cond, jmp_offset);
+			} else if (is_simm32(jmp_offset)) {
+				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+			} else {
+				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+
+			break;
 
-				switch (filter[i].code) {
-				case BPF_S_JMP_JGT_X:
-				case BPF_S_JMP_JGE_X:
-				case BPF_S_JMP_JEQ_X:
-					seen |= SEEN_XREG;
-					EMIT2(0x39, 0xd8); /* cmp %ebx,%eax */
-					break;
-				case BPF_S_JMP_JSET_X:
-					seen |= SEEN_XREG;
-					EMIT2(0x85, 0xd8); /* test %ebx,%eax */
-					break;
-				case BPF_S_JMP_JEQ_K:
-					if (K == 0) {
-						EMIT2(0x85, 0xc0); /* test   %eax,%eax */
-						break;
-					}
-				case BPF_S_JMP_JGT_K:
-				case BPF_S_JMP_JGE_K:
-					if (K <= 127)
-						EMIT3(0x83, 0xf8, K); /* cmp imm8,%eax */
+		case BPF_JMP | BPF_JA:
+			jmp_offset = addrs[i + insn->off] - addrs[i];
+			if (!jmp_offset)
+				/* optimize out nop jumps */
+				break;
+emit_jmp:
+			if (is_imm8(jmp_offset)) {
+				EMIT2(0xEB, jmp_offset);
+			} else if (is_simm32(jmp_offset)) {
+				EMIT1_off32(0xE9, jmp_offset);
+			} else {
+				pr_err("jmp gen bug %llx\n", jmp_offset);
+				return -EFAULT;
+			}
+			break;
+
+		case BPF_LD | BPF_IND | BPF_W:
+			func = sk_load_word;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_W:
+			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
+common_load:		ctx->seen_ld_abs = true;
+			jmp_offset = func - (image + addrs[i]);
+			if (!func || !is_simm32(jmp_offset)) {
+				pr_err("unsupported bpf func %d addr %p image %p\n",
+				       K, func, image);
+				return -EINVAL;
+			}
+			if (BPF_MODE(insn->code) == BPF_ABS) {
+				/* mov %esi, imm32 */
+				EMIT1_off32(0xBE, K);
+			} else {
+				/* mov %rsi, x_reg */
+				EMIT_mov(BPF_REG_2, x_reg);
+				if (K) {
+					if (is_imm8(K))
+						/* add %esi, imm8 */
+						EMIT3(0x83, 0xC6, K);
 					else
-						EMIT1_off32(0x3d, K); /* cmp imm32,%eax */
-					break;
-				case BPF_S_JMP_JSET_K:
-					if (K <= 0xFF)
-						EMIT2(0xa8, K); /* test imm8,%al */
-					else if (!(K & 0xFFFF00FF))
-						EMIT3(0xf6, 0xc4, K >> 8); /* test imm8,%ah */
-					else if (K <= 0xFFFF) {
-						EMIT2(0x66, 0xa9); /* test imm16,%ax */
-						EMIT(K, 2);
-					} else {
-						EMIT1_off32(0xa9, K); /* test imm32,%eax */
-					}
-					break;
+						/* add %esi, imm32 */
+						EMIT2_off32(0x81, 0xC6, K);
 				}
-				if (filter[i].jt != 0) {
-					if (filter[i].jf && f_offset)
-						t_offset += is_near(f_offset) ? 2 : 5;
-					EMIT_COND_JMP(t_op, t_offset);
-					if (filter[i].jf)
-						EMIT_JMP(f_offset);
-					break;
-				}
-				EMIT_COND_JMP(f_op, f_offset);
-				break;
+			}
+			/* skb pointer is in R6 (%rbx), it will be copied into
+			 * %rdi if skb_copy_bits() call is necessary.
+			 * sk_load_* helpers also use %r10 and %r9d.
+			 * See bpf_jit.S
+			 */
+			EMIT1_off32(0xE8, jmp_offset); /* call */
+			break;
+
+		case BPF_LD | BPF_IND | BPF_H:
+			func = sk_load_half;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_H:
+			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
+			goto common_load;
+		case BPF_LD | BPF_IND | BPF_B:
+			func = sk_load_byte;
+			goto common_load;
+		case BPF_LD | BPF_ABS | BPF_B:
+			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
+			goto common_load;
+
+		case BPF_JMP | BPF_EXIT:
+			if (i != insn_cnt - 1) {
+				jmp_offset = ctx->cleanup_addr - addrs[i];
+				goto emit_jmp;
+			}
+			/* update cleanup_addr */
+			ctx->cleanup_addr = proglen;
+			/* mov rbx, qword ptr [rbp-X] */
+			EMIT3_off32(0x48, 0x8B, 0x9D, -stacksize);
+			/* mov r13, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xAD, -stacksize + 8);
+			/* mov r14, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xB5, -stacksize + 16);
+			/* mov r15, qword ptr [rbp-X] */
+			EMIT3_off32(0x4C, 0x8B, 0xBD, -stacksize + 24);
+
+			EMIT1(0xC9); /* leave */
+			EMIT1(0xC3); /* ret */
+			break;
+
 		default:
-			/* hmm, too complex filter, give up with jit compiler */
+			/* By design x64 JIT should support all BPF instructions
+			 * This error will be seen if new instruction was added
+			 * to interpreter, but not to JIT
+			 * or if there is junk in sk_filter
+			 */
+			pr_err("bpf_jit: unknown opcode %02x\n", insn->code);
 			return -EINVAL;
 		}
+
 		ilen = prog - temp;
 		if (image) {
 			if (unlikely(proglen + ilen > oldproglen)) {
-				pr_err("bpb_jit_compile fatal error\n");
+				pr_err("bpf_jit_compile fatal error\n");
 				return -EFAULT;
 			}
 			memcpy(image + proglen, temp, ilen);
@@ -726,21 +859,14 @@ cond_branch:			f_offset = addrs[i + filter[i].jf] - addrs[i];
 		addrs[i] = proglen;
 		prog = temp;
 	}
-	/* last bpf instruction is always a RET :
-	 * use it to give the cleanup instruction(s) addr
-	 */
-	ctx->cleanup_addr = proglen - 1; /* ret */
-	if (seen_or_pass0)
-		ctx->cleanup_addr -= 1; /* leaveq */
-	if (seen_or_pass0 & SEEN_XREG)
-		ctx->cleanup_addr -= 4; /* mov  -8(%rbp),%rbx */
-
-	ctx->seen = seen;
-
 	return proglen;
 }
 
 void bpf_jit_compile(struct sk_filter *prog)
+{
+}
+
+void bpf_int_jit_compile(struct sk_filter *prog)
 {
 	struct bpf_binary_header *header = NULL;
 	int proglen, oldproglen = 0;
@@ -768,8 +894,6 @@ void bpf_jit_compile(struct sk_filter *prog)
 		addrs[i] = proglen;
 	}
 	ctx.cleanup_addr = proglen;
-	ctx.seen = SEEN_XREG | SEEN_DATAREF | SEEN_MEM;
-	ctx.pc_ret0 = -1;
 
 	for (pass = 0; pass < 10; pass++) {
 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4457b383961c..9d5ae0a2c954 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -207,6 +207,9 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
 void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 
+u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+void bpf_int_jit_compile(struct sk_filter *fp);
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
diff --git a/net/core/filter.c b/net/core/filter.c
index c442a0d7d0f7..32c5b44c537e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1524,6 +1524,10 @@ out_err:
 	return ERR_PTR(err);
 }
 
+void __weak bpf_int_jit_compile(struct sk_filter *prog)
+{
+}
+
 static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 					     struct sock *sk)
 {
@@ -1544,9 +1548,12 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 	/* JIT compiler couldn't process this filter, so do the
 	 * internal BPF translation for the optimized interpreter.
 	 */
-	if (!fp->jited)
+	if (!fp->jited) {
 		fp = __sk_migrate_filter(fp, sk);
 
+		/* Probe if internal BPF can be jit-ed */
+		bpf_int_jit_compile(fp);
+	}
 	return fp;
 }
 
-- 
cgit 


From a0bf37edb4d34c21bdaa19a1624378924b917491 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Sun, 6 Oct 2013 20:23:49 +0200
Subject: HSI: method to unregister clients from an hsi port

This exports a method to unregister all clients from
an hsi port.

Signed-off-by: Sebastian Reichel <sre@kernel.org>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Tested-By: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
---
 drivers/hsi/hsi.c       | 10 ++++++++++
 include/linux/hsi/hsi.h |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index 749f7b5c8179..e96a9874b1a4 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -129,6 +129,16 @@ static void hsi_port_release(struct device *dev)
 	kfree(to_hsi_port(dev));
 }
 
+/**
+ * hsi_unregister_port - Unregister an HSI port
+ * @port: The HSI port to unregister
+ */
+void hsi_port_unregister_clients(struct hsi_port *port)
+{
+	device_for_each_child(&port->device, NULL, hsi_remove_client);
+}
+EXPORT_SYMBOL_GPL(hsi_port_unregister_clients);
+
 /**
  * hsi_unregister_controller - Unregister an HSI controller
  * @hsi: The HSI controller to register
diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h
index 39bfd5b89077..5a9f1210ed22 100644
--- a/include/linux/hsi/hsi.h
+++ b/include/linux/hsi/hsi.h
@@ -282,6 +282,7 @@ struct hsi_controller *hsi_alloc_controller(unsigned int n_ports, gfp_t flags);
 void hsi_put_controller(struct hsi_controller *hsi);
 int hsi_register_controller(struct hsi_controller *hsi);
 void hsi_unregister_controller(struct hsi_controller *hsi);
+void hsi_port_unregister_clients(struct hsi_port *port);
 
 static inline void hsi_controller_set_drvdata(struct hsi_controller *hsi,
 								void *data)
-- 
cgit 


From a088cf161cc87b39e83c7c53b9f239773422d212 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Fri, 28 Mar 2014 22:48:23 +0100
Subject: HSI: Add channel resource support to HSI clients

Make HSI channel ids platform data, which can be provided
by platform data.

Signed-off-by: Sebastian Reichel <sre@kernel.org>
Tested-By: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
---
 drivers/hsi/clients/hsi_char.c | 12 +++++------
 drivers/hsi/hsi.c              | 46 +++++++++++++++++++++++++++++++++++++++++-
 include/linux/hsi/hsi.h        | 24 ++++++++++++++++++----
 3 files changed, 71 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hsi/clients/hsi_char.c b/drivers/hsi/clients/hsi_char.c
index 30733209fde2..57f70c28fa38 100644
--- a/drivers/hsi/clients/hsi_char.c
+++ b/drivers/hsi/clients/hsi_char.c
@@ -367,7 +367,7 @@ static int hsc_rx_set(struct hsi_client *cl, struct hsc_rx_config *rxc)
 		return -EINVAL;
 	tmp = cl->rx_cfg;
 	cl->rx_cfg.mode = rxc->mode;
-	cl->rx_cfg.channels = rxc->channels;
+	cl->rx_cfg.num_hw_channels = rxc->channels;
 	cl->rx_cfg.flow = rxc->flow;
 	ret = hsi_setup(cl);
 	if (ret < 0) {
@@ -383,7 +383,7 @@ static int hsc_rx_set(struct hsi_client *cl, struct hsc_rx_config *rxc)
 static inline void hsc_rx_get(struct hsi_client *cl, struct hsc_rx_config *rxc)
 {
 	rxc->mode = cl->rx_cfg.mode;
-	rxc->channels = cl->rx_cfg.channels;
+	rxc->channels = cl->rx_cfg.num_hw_channels;
 	rxc->flow = cl->rx_cfg.flow;
 }
 
@@ -402,7 +402,7 @@ static int hsc_tx_set(struct hsi_client *cl, struct hsc_tx_config *txc)
 		return -EINVAL;
 	tmp = cl->tx_cfg;
 	cl->tx_cfg.mode = txc->mode;
-	cl->tx_cfg.channels = txc->channels;
+	cl->tx_cfg.num_hw_channels = txc->channels;
 	cl->tx_cfg.speed = txc->speed;
 	cl->tx_cfg.arb_mode = txc->arb_mode;
 	ret = hsi_setup(cl);
@@ -417,7 +417,7 @@ static int hsc_tx_set(struct hsi_client *cl, struct hsc_tx_config *txc)
 static inline void hsc_tx_get(struct hsi_client *cl, struct hsc_tx_config *txc)
 {
 	txc->mode = cl->tx_cfg.mode;
-	txc->channels = cl->tx_cfg.channels;
+	txc->channels = cl->tx_cfg.num_hw_channels;
 	txc->speed = cl->tx_cfg.speed;
 	txc->arb_mode = cl->tx_cfg.arb_mode;
 }
@@ -435,7 +435,7 @@ static ssize_t hsc_read(struct file *file, char __user *buf, size_t len,
 		return -EINVAL;
 	if (len > max_data_size)
 		len = max_data_size;
-	if (channel->ch >= channel->cl->rx_cfg.channels)
+	if (channel->ch >= channel->cl->rx_cfg.num_hw_channels)
 		return -ECHRNG;
 	if (test_and_set_bit(HSC_CH_READ, &channel->flags))
 		return -EBUSY;
@@ -492,7 +492,7 @@ static ssize_t hsc_write(struct file *file, const char __user *buf, size_t len,
 		return -EINVAL;
 	if (len > max_data_size)
 		len = max_data_size;
-	if (channel->ch >= channel->cl->tx_cfg.channels)
+	if (channel->ch >= channel->cl->tx_cfg.num_hw_channels)
 		return -ECHRNG;
 	if (test_and_set_bit(HSC_CH_WRITE, &channel->flags))
 		return -EBUSY;
diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index e96a9874b1a4..de2ad8f20d55 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -62,18 +62,36 @@ static struct bus_type hsi_bus_type = {
 
 static void hsi_client_release(struct device *dev)
 {
-	kfree(to_hsi_client(dev));
+	struct hsi_client *cl = to_hsi_client(dev);
+
+	kfree(cl->tx_cfg.channels);
+	kfree(cl->rx_cfg.channels);
+	kfree(cl);
 }
 
 static void hsi_new_client(struct hsi_port *port, struct hsi_board_info *info)
 {
 	struct hsi_client *cl;
+	size_t size;
 
 	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
 	if (!cl)
 		return;
+
 	cl->tx_cfg = info->tx_cfg;
+	if (cl->tx_cfg.channels) {
+		size = cl->tx_cfg.num_channels * sizeof(*cl->tx_cfg.channels);
+		cl->tx_cfg.channels = kzalloc(size , GFP_KERNEL);
+		memcpy(cl->tx_cfg.channels, info->tx_cfg.channels, size);
+	}
+
 	cl->rx_cfg = info->rx_cfg;
+	if (cl->rx_cfg.channels) {
+		size = cl->rx_cfg.num_channels * sizeof(*cl->rx_cfg.channels);
+		cl->rx_cfg.channels = kzalloc(size , GFP_KERNEL);
+		memcpy(cl->rx_cfg.channels, info->rx_cfg.channels, size);
+	}
+
 	cl->device.bus = &hsi_bus_type;
 	cl->device.parent = &port->device;
 	cl->device.release = hsi_client_release;
@@ -502,6 +520,32 @@ int hsi_event(struct hsi_port *port, unsigned long event)
 }
 EXPORT_SYMBOL_GPL(hsi_event);
 
+/**
+ * hsi_get_channel_id_by_name - acquire channel id by channel name
+ * @cl: HSI client, which uses the channel
+ * @name: name the channel is known under
+ *
+ * Clients can call this function to get the hsi channel ids similar to
+ * requesting IRQs or GPIOs by name. This function assumes the same
+ * channel configuration is used for RX and TX.
+ *
+ * Returns -errno on error or channel id on success.
+ */
+int hsi_get_channel_id_by_name(struct hsi_client *cl, char *name)
+{
+	int i;
+
+	if (!cl->rx_cfg.channels)
+		return -ENOENT;
+
+	for (i = 0; i < cl->rx_cfg.num_channels; i++)
+		if (!strcmp(cl->rx_cfg.channels[i].name, name))
+			return cl->rx_cfg.channels[i].id;
+
+	return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(hsi_get_channel_id_by_name);
+
 static int __init hsi_init(void)
 {
 	return bus_register(&hsi_bus_type);
diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h
index 5a9f1210ed22..e3cff94bef04 100644
--- a/include/linux/hsi/hsi.h
+++ b/include/linux/hsi/hsi.h
@@ -67,18 +67,32 @@ enum {
 	HSI_EVENT_STOP_RX,
 };
 
+/**
+ * struct hsi_channel - channel resource used by the hsi clients
+ * @id: Channel number
+ * @name: Channel name
+ */
+struct hsi_channel {
+	unsigned int	id;
+	const char	*name;
+};
+
 /**
  * struct hsi_config - Configuration for RX/TX HSI modules
  * @mode: Bit transmission mode (STREAM or FRAME)
- * @channels: Number of channels to use [1..16]
+ * @channels: Channel resources used by the client
+ * @num_channels: Number of channel resources
+ * @num_hw_channels: Number of channels the transceiver is configured for [1..16]
  * @speed: Max bit transmission speed (Kbit/s)
  * @flow: RX flow type (SYNCHRONIZED or PIPELINE)
  * @arb_mode: Arbitration mode for TX frame (Round robin, priority)
  */
 struct hsi_config {
-	unsigned int	mode;
-	unsigned int	channels;
-	unsigned int	speed;
+	unsigned int		mode;
+	struct hsi_channel	*channels;
+	unsigned int		num_channels;
+	unsigned int		num_hw_channels;
+	unsigned int		speed;
 	union {
 		unsigned int	flow;		/* RX only */
 		unsigned int	arb_mode;	/* TX only */
@@ -306,6 +320,8 @@ static inline struct hsi_port *hsi_find_port_num(struct hsi_controller *hsi,
  */
 int hsi_async(struct hsi_client *cl, struct hsi_msg *msg);
 
+int hsi_get_channel_id_by_name(struct hsi_client *cl, char *name);
+
 /**
  * hsi_id - Get HSI controller ID associated to a client
  * @cl: Pointer to a HSI client
-- 
cgit 


From 8491451024bcfabdcebd772ce9ec2fc5757acd42 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Fri, 28 Mar 2014 22:54:25 +0100
Subject: HSI: export method to (un)register clients

Expose method for registering and unregistering HSI clients, so that
client drivers can register other client drivers.

This is useful for HSI drivers, which want to use the functionality
of other HSI drivers. For example the N900 modem driver can load HSI
drivers for mcsaab protocol and speech protocol.

Signed-off-by: Sebastian Reichel <sre@kernel.org>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Tested-By: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
---
 drivers/hsi/hsi.c       | 11 ++++++++---
 include/linux/hsi/hsi.h |  3 +++
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index de2ad8f20d55..834a2d6b444e 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -69,14 +69,15 @@ static void hsi_client_release(struct device *dev)
 	kfree(cl);
 }
 
-static void hsi_new_client(struct hsi_port *port, struct hsi_board_info *info)
+struct hsi_client *hsi_new_client(struct hsi_port *port,
+						struct hsi_board_info *info)
 {
 	struct hsi_client *cl;
 	size_t size;
 
 	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
 	if (!cl)
-		return;
+		return NULL;
 
 	cl->tx_cfg = info->tx_cfg;
 	if (cl->tx_cfg.channels) {
@@ -103,7 +104,10 @@ static void hsi_new_client(struct hsi_port *port, struct hsi_board_info *info)
 		pr_err("hsi: failed to register client: %s\n", info->name);
 		put_device(&cl->device);
 	}
+
+	return cl;
 }
+EXPORT_SYMBOL_GPL(hsi_new_client);
 
 static void hsi_scan_board_info(struct hsi_controller *hsi)
 {
@@ -119,12 +123,13 @@ static void hsi_scan_board_info(struct hsi_controller *hsi)
 		}
 }
 
-static int hsi_remove_client(struct device *dev, void *data __maybe_unused)
+int hsi_remove_client(struct device *dev, void *data __maybe_unused)
 {
 	device_unregister(dev);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(hsi_remove_client);
 
 static int hsi_remove_port(struct device *dev, void *data __maybe_unused)
 {
diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h
index e3cff94bef04..e20a3999a696 100644
--- a/include/linux/hsi/hsi.h
+++ b/include/linux/hsi/hsi.h
@@ -296,6 +296,9 @@ struct hsi_controller *hsi_alloc_controller(unsigned int n_ports, gfp_t flags);
 void hsi_put_controller(struct hsi_controller *hsi);
 int hsi_register_controller(struct hsi_controller *hsi);
 void hsi_unregister_controller(struct hsi_controller *hsi);
+struct hsi_client *hsi_new_client(struct hsi_port *port,
+						struct hsi_board_info *info);
+int hsi_remove_client(struct device *dev, void *data);
 void hsi_port_unregister_clients(struct hsi_port *port);
 
 static inline void hsi_controller_set_drvdata(struct hsi_controller *hsi,
-- 
cgit 


From a2aa24734d9dbbd3b9062c2459936c336278fa6a Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Fri, 28 Mar 2014 22:59:43 +0100
Subject: HSI: Add common DT binding for HSI client devices

Implement and document generic DT bindings for HSI clients.

Signed-off-by: Sebastian Reichel <sre@kernel.org>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Tested-By: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
---
 .../devicetree/bindings/hsi/client-devices.txt     |  44 +++++
 drivers/hsi/hsi.c                                  | 208 ++++++++++++++++++++-
 include/linux/hsi/hsi.h                            |  11 ++
 3 files changed, 261 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/hsi/client-devices.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/hsi/client-devices.txt b/Documentation/devicetree/bindings/hsi/client-devices.txt
new file mode 100644
index 000000000000..104c9a3e57a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/hsi/client-devices.txt
@@ -0,0 +1,44 @@
+Each HSI port is supposed to have one child node, which
+symbols the remote device connected to the HSI port. The
+following properties are standardized for HSI clients:
+
+Required HSI configuration properties:
+
+- hsi-channel-ids:	A list of channel ids
+
+- hsi-rx-mode:		Receiver Bit transmission mode ("stream" or "frame")
+- hsi-tx-mode:		Transmitter Bit transmission mode ("stream" or "frame")
+- hsi-mode:		May be used instead hsi-rx-mode and hsi-tx-mode if
+			the transmission mode is the same for receiver and
+			transmitter
+- hsi-speed-kbps:	Max bit transmission speed in kbit/s
+- hsi-flow:		RX flow type ("synchronized" or "pipeline")
+- hsi-arb-mode:		Arbitration mode for TX frame ("round-robin", "priority")
+
+Optional HSI configuration properties:
+
+- hsi-channel-names:	A list with one name per channel specified in the
+			hsi-channel-ids property
+
+
+Device Tree node example for an HSI client:
+
+hsi-controller {
+	hsi-port {
+		modem: hsi-client {
+			compatible = "nokia,n900-modem";
+
+			hsi-channel-ids = <0>, <1>, <2>, <3>;
+			hsi-channel-names = "mcsaab-control",
+					    "speech-control",
+					    "speech-data",
+					    "mcsaab-data";
+			hsi-speed-kbps = <55000>;
+			hsi-mode = "frame";
+			hsi-flow = "synchronized";
+			hsi-arb-mode = "round-robin";
+
+			/* more client specific properties */
+		};
+	};
+};
diff --git a/drivers/hsi/hsi.c b/drivers/hsi/hsi.c
index 834a2d6b444e..fe9371271ce2 100644
--- a/drivers/hsi/hsi.c
+++ b/drivers/hsi/hsi.c
@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include "hsi_core.h"
 
 static ssize_t modalias_show(struct device *dev,
@@ -50,7 +52,13 @@ static int hsi_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 
 static int hsi_bus_match(struct device *dev, struct device_driver *driver)
 {
-	return strcmp(dev_name(dev), driver->name) == 0;
+	if (of_driver_match_device(dev, driver))
+		return true;
+
+	if (strcmp(dev_name(dev), driver->name) == 0)
+		return true;
+
+	return false;
 }
 
 static struct bus_type hsi_bus_type = {
@@ -123,6 +131,202 @@ static void hsi_scan_board_info(struct hsi_controller *hsi)
 		}
 }
 
+#ifdef CONFIG_OF
+static struct hsi_board_info hsi_char_dev_info = {
+	.name = "hsi_char",
+};
+
+static int hsi_of_property_parse_mode(struct device_node *client, char *name,
+				      unsigned int *result)
+{
+	const char *mode;
+	int err;
+
+	err = of_property_read_string(client, name, &mode);
+	if (err < 0)
+		return err;
+
+	if (strcmp(mode, "stream") == 0)
+		*result = HSI_MODE_STREAM;
+	else if (strcmp(mode, "frame") == 0)
+		*result = HSI_MODE_FRAME;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int hsi_of_property_parse_flow(struct device_node *client, char *name,
+				      unsigned int *result)
+{
+	const char *flow;
+	int err;
+
+	err = of_property_read_string(client, name, &flow);
+	if (err < 0)
+		return err;
+
+	if (strcmp(flow, "synchronized") == 0)
+		*result = HSI_FLOW_SYNC;
+	else if (strcmp(flow, "pipeline") == 0)
+		*result = HSI_FLOW_PIPE;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int hsi_of_property_parse_arb_mode(struct device_node *client,
+					  char *name, unsigned int *result)
+{
+	const char *arb_mode;
+	int err;
+
+	err = of_property_read_string(client, name, &arb_mode);
+	if (err < 0)
+		return err;
+
+	if (strcmp(arb_mode, "round-robin") == 0)
+		*result = HSI_ARB_RR;
+	else if (strcmp(arb_mode, "priority") == 0)
+		*result = HSI_ARB_PRIO;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static void hsi_add_client_from_dt(struct hsi_port *port,
+						struct device_node *client)
+{
+	struct hsi_client *cl;
+	struct hsi_channel channel;
+	struct property *prop;
+	char name[32];
+	int length, cells, err, i, max_chan, mode;
+
+	cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+	if (!cl)
+		return;
+
+	err = of_modalias_node(client, name, sizeof(name));
+	if (err)
+		goto err;
+
+	dev_set_name(&cl->device, "%s", name);
+
+	err = hsi_of_property_parse_mode(client, "hsi-mode", &mode);
+	if (err) {
+		err = hsi_of_property_parse_mode(client, "hsi-rx-mode",
+						 &cl->rx_cfg.mode);
+		if (err)
+			goto err;
+
+		err = hsi_of_property_parse_mode(client, "hsi-tx-mode",
+						 &cl->tx_cfg.mode);
+		if (err)
+			goto err;
+	} else {
+		cl->rx_cfg.mode = mode;
+		cl->tx_cfg.mode = mode;
+	}
+
+	err = of_property_read_u32(client, "hsi-speed-kbps",
+				   &cl->tx_cfg.speed);
+	if (err)
+		goto err;
+	cl->rx_cfg.speed = cl->tx_cfg.speed;
+
+	err = hsi_of_property_parse_flow(client, "hsi-flow",
+					 &cl->rx_cfg.flow);
+	if (err)
+		goto err;
+
+	err = hsi_of_property_parse_arb_mode(client, "hsi-arb-mode",
+					     &cl->rx_cfg.arb_mode);
+	if (err)
+		goto err;
+
+	prop = of_find_property(client, "hsi-channel-ids", &length);
+	if (!prop) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	cells = length / sizeof(u32);
+
+	cl->rx_cfg.num_channels = cells;
+	cl->tx_cfg.num_channels = cells;
+
+	cl->rx_cfg.channels = kzalloc(cells * sizeof(channel), GFP_KERNEL);
+	if (!cl->rx_cfg.channels) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	cl->tx_cfg.channels = kzalloc(cells * sizeof(channel), GFP_KERNEL);
+	if (!cl->tx_cfg.channels) {
+		err = -ENOMEM;
+		goto err2;
+	}
+
+	max_chan = 0;
+	for (i = 0; i < cells; i++) {
+		err = of_property_read_u32_index(client, "hsi-channel-ids", i,
+						 &channel.id);
+		if (err)
+			goto err3;
+
+		err = of_property_read_string_index(client, "hsi-channel-names",
+						    i, &channel.name);
+		if (err)
+			channel.name = NULL;
+
+		if (channel.id > max_chan)
+			max_chan = channel.id;
+
+		cl->rx_cfg.channels[i] = channel;
+		cl->tx_cfg.channels[i] = channel;
+	}
+
+	cl->rx_cfg.num_hw_channels = max_chan + 1;
+	cl->tx_cfg.num_hw_channels = max_chan + 1;
+
+	cl->device.bus = &hsi_bus_type;
+	cl->device.parent = &port->device;
+	cl->device.release = hsi_client_release;
+	cl->device.of_node = client;
+
+	if (device_register(&cl->device) < 0) {
+		pr_err("hsi: failed to register client: %s\n", name);
+		put_device(&cl->device);
+		goto err3;
+	}
+
+	return;
+
+err3:
+	kfree(cl->tx_cfg.channels);
+err2:
+	kfree(cl->rx_cfg.channels);
+err:
+	kfree(cl);
+	pr_err("hsi client: missing or incorrect of property: err=%d\n", err);
+}
+
+void hsi_add_clients_from_dt(struct hsi_port *port, struct device_node *clients)
+{
+	struct device_node *child;
+
+	/* register hsi-char device */
+	hsi_new_client(port, &hsi_char_dev_info);
+
+	for_each_available_child_of_node(clients, child)
+		hsi_add_client_from_dt(port, child);
+}
+EXPORT_SYMBOL_GPL(hsi_add_clients_from_dt);
+#endif
+
 int hsi_remove_client(struct device *dev, void *data __maybe_unused)
 {
 	device_unregister(dev);
@@ -505,7 +709,7 @@ int hsi_unregister_port_event(struct hsi_client *cl)
 EXPORT_SYMBOL_GPL(hsi_unregister_port_event);
 
 /**
- * hsi_event -Notifies clients about port events
+ * hsi_event - Notifies clients about port events
  * @port: Port where the event occurred
  * @event: The event type
  *
diff --git a/include/linux/hsi/hsi.h b/include/linux/hsi/hsi.h
index e20a3999a696..3ec06300d535 100644
--- a/include/linux/hsi/hsi.h
+++ b/include/linux/hsi/hsi.h
@@ -301,6 +301,17 @@ struct hsi_client *hsi_new_client(struct hsi_port *port,
 int hsi_remove_client(struct device *dev, void *data);
 void hsi_port_unregister_clients(struct hsi_port *port);
 
+#ifdef CONFIG_OF
+void hsi_add_clients_from_dt(struct hsi_port *port,
+			     struct device_node *clients);
+#else
+static inline void hsi_add_clients_from_dt(struct hsi_port *port,
+					   struct device_node *clients)
+{
+	return;
+}
+#endif
+
 static inline void hsi_controller_set_drvdata(struct hsi_controller *hsi,
 								void *data)
 {
-- 
cgit 


From dc7bf5d7186849aa36b9f0e42e250a813a7b0bdb Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Fri, 15 Nov 2013 10:50:32 +0000
Subject: HSI: Introduce driver for SSI Protocol

This adds a driver for the SSI McSAAB protocol as used in
the Nokia N900.

Signed-off-by: Carlos Chinea <carlos.chinea@nokia.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Tested-By: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
---
 drivers/hsi/clients/Kconfig        |    8 +
 drivers/hsi/clients/Makefile       |    3 +-
 drivers/hsi/clients/ssi_protocol.c | 1191 ++++++++++++++++++++++++++++++++++++
 include/linux/hsi/ssi_protocol.h   |   42 ++
 4 files changed, 1243 insertions(+), 1 deletion(-)
 create mode 100644 drivers/hsi/clients/ssi_protocol.c
 create mode 100644 include/linux/hsi/ssi_protocol.h

(limited to 'include/linux')

diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig
index 3bacd275f479..1457cfb5b453 100644
--- a/drivers/hsi/clients/Kconfig
+++ b/drivers/hsi/clients/Kconfig
@@ -4,6 +4,14 @@
 
 comment "HSI clients"
 
+config SSI_PROTOCOL
+	tristate "SSI protocol"
+	depends on HSI && PHONET && (OMAP_SSI=y || OMAP_SSI=m)
+	help
+	If you say Y here, you will enable the SSI protocol aka McSAAB.
+
+	If unsure, say N.
+
 config HSI_CHAR
 	tristate "HSI/SSI character driver"
 	depends on HSI
diff --git a/drivers/hsi/clients/Makefile b/drivers/hsi/clients/Makefile
index 327c0e27c8b0..ccbf768ea42b 100644
--- a/drivers/hsi/clients/Makefile
+++ b/drivers/hsi/clients/Makefile
@@ -2,4 +2,5 @@
 # Makefile for HSI clients
 #
 
-obj-$(CONFIG_HSI_CHAR)	+= hsi_char.o
+obj-$(CONFIG_SSI_PROTOCOL)	+= ssi_protocol.o
+obj-$(CONFIG_HSI_CHAR)		+= hsi_char.o
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
new file mode 100644
index 000000000000..ce4be3738d46
--- /dev/null
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -0,0 +1,1191 @@
+/*
+ * ssi_protocol.c
+ *
+ * Implementation of the SSI McSAAB improved protocol.
+ *
+ * Copyright (C) 2010 Nokia Corporation. All rights reserved.
+ * Copyright (C) 2013 Sebastian Reichel <sre@kernel.org>
+ *
+ * Contact: Carlos Chinea <carlos.chinea@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/atomic.h>
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio.h>
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/if_phonet.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/hsi/hsi.h>
+#include <linux/hsi/ssi_protocol.h>
+
+void ssi_waketest(struct hsi_client *cl, unsigned int enable);
+
+#define SSIP_TXQUEUE_LEN	100
+#define SSIP_MAX_MTU		65535
+#define SSIP_DEFAULT_MTU	4000
+#define PN_MEDIA_SOS		21
+#define SSIP_MIN_PN_HDR		6	/* FIXME: Revisit */
+#define SSIP_WDTOUT		2000	/* FIXME: has to be 500 msecs */
+#define SSIP_KATOUT		15	/* 15 msecs */
+#define SSIP_MAX_CMDS		5 /* Number of pre-allocated commands buffers */
+#define SSIP_BYTES_TO_FRAMES(x) ((((x) - 1) >> 2) + 1)
+#define SSIP_CMT_LOADER_SYNC	0x11223344
+/*
+ * SSI protocol command definitions
+ */
+#define SSIP_COMMAND(data)	((data) >> 28)
+#define SSIP_PAYLOAD(data)	((data) & 0xfffffff)
+/* Commands */
+#define SSIP_SW_BREAK		0
+#define SSIP_BOOTINFO_REQ	1
+#define SSIP_BOOTINFO_RESP	2
+#define SSIP_WAKETEST_RESULT	3
+#define SSIP_START_TRANS	4
+#define SSIP_READY		5
+/* Payloads */
+#define SSIP_DATA_VERSION(data)	((data) & 0xff)
+#define SSIP_LOCAL_VERID	1
+#define SSIP_WAKETEST_OK	0
+#define SSIP_WAKETEST_FAILED	1
+#define SSIP_PDU_LENGTH(data)	(((data) >> 8) & 0xffff)
+#define SSIP_MSG_ID(data)	((data) & 0xff)
+/* Generic Command */
+#define SSIP_CMD(cmd, payload)	(((cmd) << 28) | ((payload) & 0xfffffff))
+/* Commands for the control channel */
+#define SSIP_BOOTINFO_REQ_CMD(ver) \
+		SSIP_CMD(SSIP_BOOTINFO_REQ, SSIP_DATA_VERSION(ver))
+#define SSIP_BOOTINFO_RESP_CMD(ver) \
+		SSIP_CMD(SSIP_BOOTINFO_RESP, SSIP_DATA_VERSION(ver))
+#define SSIP_START_TRANS_CMD(pdulen, id) \
+		SSIP_CMD(SSIP_START_TRANS, (((pdulen) << 8) | SSIP_MSG_ID(id)))
+#define SSIP_READY_CMD		SSIP_CMD(SSIP_READY, 0)
+#define SSIP_SWBREAK_CMD	SSIP_CMD(SSIP_SW_BREAK, 0)
+
+/* Main state machine states */
+enum {
+	INIT,
+	HANDSHAKE,
+	ACTIVE,
+};
+
+/* Send state machine states */
+enum {
+	SEND_IDLE,
+	WAIT4READY,
+	SEND_READY,
+	SENDING,
+	SENDING_SWBREAK,
+};
+
+/* Receive state machine states */
+enum {
+	RECV_IDLE,
+	RECV_READY,
+	RECEIVING,
+};
+
+/**
+ * struct ssi_protocol - SSI protocol (McSAAB) data
+ * @main_state: Main state machine
+ * @send_state: TX state machine
+ * @recv_state: RX state machine
+ * @waketest: Flag to follow wake line test
+ * @rxid: RX data id
+ * @txid: TX data id
+ * @txqueue_len: TX queue length
+ * @tx_wd: TX watchdog
+ * @rx_wd: RX watchdog
+ * @keep_alive: Workaround for SSI HW bug
+ * @lock: To serialize access to this struct
+ * @netdev: Phonet network device
+ * @txqueue: TX data queue
+ * @cmdqueue: Queue of free commands
+ * @cl: HSI client own reference
+ * @link: Link for ssip_list
+ * @tx_usecount: Refcount to keep track the slaves that use the wake line
+ * @channel_id_cmd: HSI channel id for command stream
+ * @channel_id_data: HSI channel id for data stream
+ */
+struct ssi_protocol {
+	unsigned int		main_state;
+	unsigned int		send_state;
+	unsigned int		recv_state;
+	unsigned int		waketest:1;
+	u8			rxid;
+	u8			txid;
+	unsigned int		txqueue_len;
+	struct timer_list	tx_wd;
+	struct timer_list	rx_wd;
+	struct timer_list	keep_alive; /* wake-up workaround */
+	spinlock_t		lock;
+	struct net_device	*netdev;
+	struct list_head	txqueue;
+	struct list_head	cmdqueue;
+	struct hsi_client	*cl;
+	struct list_head	link;
+	atomic_t		tx_usecnt;
+	int			channel_id_cmd;
+	int			channel_id_data;
+};
+
+/* List of ssi protocol instances */
+static LIST_HEAD(ssip_list);
+
+static void ssip_rxcmd_complete(struct hsi_msg *msg);
+
+static inline void ssip_set_cmd(struct hsi_msg *msg, u32 cmd)
+{
+	u32 *data;
+
+	data = sg_virt(msg->sgt.sgl);
+	*data = cmd;
+}
+
+static inline u32 ssip_get_cmd(struct hsi_msg *msg)
+{
+	u32 *data;
+
+	data = sg_virt(msg->sgt.sgl);
+
+	return *data;
+}
+
+static void ssip_skb_to_msg(struct sk_buff *skb, struct hsi_msg *msg)
+{
+	skb_frag_t *frag;
+	struct scatterlist *sg;
+	int i;
+
+	BUG_ON(msg->sgt.nents != (unsigned int)(skb_shinfo(skb)->nr_frags + 1));
+
+	sg = msg->sgt.sgl;
+	sg_set_buf(sg, skb->data, skb_headlen(skb));
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		sg = sg_next(sg);
+		BUG_ON(!sg);
+		frag = &skb_shinfo(skb)->frags[i];
+		sg_set_page(sg, frag->page.p, frag->size, frag->page_offset);
+	}
+}
+
+static void ssip_free_data(struct hsi_msg *msg)
+{
+	struct sk_buff *skb;
+
+	skb = msg->context;
+	pr_debug("free data: msg %p context %p skb %p\n", msg, msg->context,
+								skb);
+	msg->destructor = NULL;
+	dev_kfree_skb(skb);
+	hsi_free_msg(msg);
+}
+
+static struct hsi_msg *ssip_alloc_data(struct ssi_protocol *ssi,
+					struct sk_buff *skb, gfp_t flags)
+{
+	struct hsi_msg *msg;
+
+	msg = hsi_alloc_msg(skb_shinfo(skb)->nr_frags + 1, flags);
+	if (!msg)
+		return NULL;
+	ssip_skb_to_msg(skb, msg);
+	msg->destructor = ssip_free_data;
+	msg->channel = ssi->channel_id_data;
+	msg->context = skb;
+
+	return msg;
+}
+
+static inline void ssip_release_cmd(struct hsi_msg *msg)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(msg->cl);
+
+	dev_dbg(&msg->cl->device, "Release cmd 0x%08x\n", ssip_get_cmd(msg));
+	spin_lock_bh(&ssi->lock);
+	list_add_tail(&msg->link, &ssi->cmdqueue);
+	spin_unlock_bh(&ssi->lock);
+}
+
+static struct hsi_msg *ssip_claim_cmd(struct ssi_protocol *ssi)
+{
+	struct hsi_msg *msg;
+
+	BUG_ON(list_empty(&ssi->cmdqueue));
+
+	spin_lock_bh(&ssi->lock);
+	msg = list_first_entry(&ssi->cmdqueue, struct hsi_msg, link);
+	list_del(&msg->link);
+	spin_unlock_bh(&ssi->lock);
+	msg->destructor = ssip_release_cmd;
+
+	return msg;
+}
+
+static void ssip_free_cmds(struct ssi_protocol *ssi)
+{
+	struct hsi_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &ssi->cmdqueue, link) {
+		list_del(&msg->link);
+		msg->destructor = NULL;
+		kfree(sg_virt(msg->sgt.sgl));
+		hsi_free_msg(msg);
+	}
+}
+
+static int ssip_alloc_cmds(struct ssi_protocol *ssi)
+{
+	struct hsi_msg *msg;
+	u32 *buf;
+	unsigned int i;
+
+	for (i = 0; i < SSIP_MAX_CMDS; i++) {
+		msg = hsi_alloc_msg(1, GFP_KERNEL);
+		if (!msg)
+			goto out;
+		buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+		if (!buf) {
+			hsi_free_msg(msg);
+			goto out;
+		}
+		sg_init_one(msg->sgt.sgl, buf, sizeof(*buf));
+		msg->channel = ssi->channel_id_cmd;
+		list_add_tail(&msg->link, &ssi->cmdqueue);
+	}
+
+	return 0;
+out:
+	ssip_free_cmds(ssi);
+
+	return -ENOMEM;
+}
+
+static void ssip_set_rxstate(struct ssi_protocol *ssi, unsigned int state)
+{
+	ssi->recv_state = state;
+	switch (state) {
+	case RECV_IDLE:
+		del_timer(&ssi->rx_wd);
+		if (ssi->send_state == SEND_IDLE)
+			del_timer(&ssi->keep_alive);
+		break;
+	case RECV_READY:
+		/* CMT speech workaround */
+		if (atomic_read(&ssi->tx_usecnt))
+			break;
+		/* Otherwise fall through */
+	case RECEIVING:
+		mod_timer(&ssi->keep_alive, jiffies +
+						msecs_to_jiffies(SSIP_KATOUT));
+		mod_timer(&ssi->rx_wd, jiffies + msecs_to_jiffies(SSIP_WDTOUT));
+		break;
+	default:
+		break;
+	}
+}
+
+static void ssip_set_txstate(struct ssi_protocol *ssi, unsigned int state)
+{
+	ssi->send_state = state;
+	switch (state) {
+	case SEND_IDLE:
+	case SEND_READY:
+		del_timer(&ssi->tx_wd);
+		if (ssi->recv_state == RECV_IDLE)
+			del_timer(&ssi->keep_alive);
+		break;
+	case WAIT4READY:
+	case SENDING:
+	case SENDING_SWBREAK:
+		mod_timer(&ssi->keep_alive,
+				jiffies + msecs_to_jiffies(SSIP_KATOUT));
+		mod_timer(&ssi->tx_wd, jiffies + msecs_to_jiffies(SSIP_WDTOUT));
+		break;
+	default:
+		break;
+	}
+}
+
+struct hsi_client *ssip_slave_get_master(struct hsi_client *slave)
+{
+	struct hsi_client *master = ERR_PTR(-ENODEV);
+	struct ssi_protocol *ssi;
+
+	list_for_each_entry(ssi, &ssip_list, link)
+		if (slave->device.parent == ssi->cl->device.parent) {
+			master = ssi->cl;
+			break;
+		}
+
+	return master;
+}
+EXPORT_SYMBOL_GPL(ssip_slave_get_master);
+
+int ssip_slave_start_tx(struct hsi_client *master)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(master);
+
+	dev_dbg(&master->device, "start TX %d\n", atomic_read(&ssi->tx_usecnt));
+	spin_lock_bh(&ssi->lock);
+	if (ssi->send_state == SEND_IDLE) {
+		ssip_set_txstate(ssi, WAIT4READY);
+		hsi_start_tx(master);
+	}
+	spin_unlock_bh(&ssi->lock);
+	atomic_inc(&ssi->tx_usecnt);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ssip_slave_start_tx);
+
+int ssip_slave_stop_tx(struct hsi_client *master)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(master);
+
+	WARN_ON_ONCE(atomic_read(&ssi->tx_usecnt) == 0);
+
+	if (atomic_dec_and_test(&ssi->tx_usecnt)) {
+		spin_lock_bh(&ssi->lock);
+		if ((ssi->send_state == SEND_READY) ||
+			(ssi->send_state == WAIT4READY)) {
+			ssip_set_txstate(ssi, SEND_IDLE);
+			hsi_stop_tx(master);
+		}
+		spin_unlock_bh(&ssi->lock);
+	}
+	dev_dbg(&master->device, "stop TX %d\n", atomic_read(&ssi->tx_usecnt));
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ssip_slave_stop_tx);
+
+int ssip_slave_running(struct hsi_client *master)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(master);
+	return netif_running(ssi->netdev);
+}
+EXPORT_SYMBOL_GPL(ssip_slave_running);
+
+static void ssip_reset(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct list_head *head, *tmp;
+	struct hsi_msg *msg;
+
+	if (netif_running(ssi->netdev))
+		netif_carrier_off(ssi->netdev);
+	hsi_flush(cl);
+	spin_lock_bh(&ssi->lock);
+	if (ssi->send_state != SEND_IDLE)
+		hsi_stop_tx(cl);
+	if (ssi->waketest)
+		ssi_waketest(cl, 0);
+	del_timer(&ssi->rx_wd);
+	del_timer(&ssi->tx_wd);
+	del_timer(&ssi->keep_alive);
+	ssi->main_state = 0;
+	ssi->send_state = 0;
+	ssi->recv_state = 0;
+	ssi->waketest = 0;
+	ssi->rxid = 0;
+	ssi->txid = 0;
+	list_for_each_safe(head, tmp, &ssi->txqueue) {
+		msg = list_entry(head, struct hsi_msg, link);
+		dev_dbg(&cl->device, "Pending TX data\n");
+		list_del(head);
+		ssip_free_data(msg);
+	}
+	ssi->txqueue_len = 0;
+	spin_unlock_bh(&ssi->lock);
+}
+
+static void ssip_dump_state(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg;
+
+	spin_lock_bh(&ssi->lock);
+	dev_err(&cl->device, "Main state: %d\n", ssi->main_state);
+	dev_err(&cl->device, "Recv state: %d\n", ssi->recv_state);
+	dev_err(&cl->device, "Send state: %d\n", ssi->send_state);
+	dev_err(&cl->device, "CMT %s\n", (ssi->main_state == ACTIVE) ?
+							"Online" : "Offline");
+	dev_err(&cl->device, "Wake test %d\n", ssi->waketest);
+	dev_err(&cl->device, "Data RX id: %d\n", ssi->rxid);
+	dev_err(&cl->device, "Data TX id: %d\n", ssi->txid);
+
+	list_for_each_entry(msg, &ssi->txqueue, link)
+		dev_err(&cl->device, "pending TX data (%p)\n", msg);
+	spin_unlock_bh(&ssi->lock);
+}
+
+static void ssip_error(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg;
+
+	ssip_dump_state(cl);
+	ssip_reset(cl);
+	msg = ssip_claim_cmd(ssi);
+	msg->complete = ssip_rxcmd_complete;
+	hsi_async_read(cl, msg);
+}
+
+static void ssip_keep_alive(unsigned long data)
+{
+	struct hsi_client *cl = (struct hsi_client *)data;
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+
+	dev_dbg(&cl->device, "Keep alive kick in: m(%d) r(%d) s(%d)\n",
+		ssi->main_state, ssi->recv_state, ssi->send_state);
+
+	spin_lock(&ssi->lock);
+	if (ssi->recv_state == RECV_IDLE)
+		switch (ssi->send_state) {
+		case SEND_READY:
+			if (atomic_read(&ssi->tx_usecnt) == 0)
+				break;
+			/*
+			 * Fall through. Workaround for cmt-speech
+			 * in that case we relay on audio timers.
+			 */
+		case SEND_IDLE:
+			spin_unlock(&ssi->lock);
+			return;
+		}
+	mod_timer(&ssi->keep_alive, jiffies + msecs_to_jiffies(SSIP_KATOUT));
+	spin_unlock(&ssi->lock);
+}
+
+static void ssip_wd(unsigned long data)
+{
+	struct hsi_client *cl = (struct hsi_client *)data;
+
+	dev_err(&cl->device, "Watchdog trigerred\n");
+	ssip_error(cl);
+}
+
+static void ssip_send_bootinfo_req_cmd(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg;
+
+	dev_dbg(&cl->device, "Issuing BOOT INFO REQ command\n");
+	msg = ssip_claim_cmd(ssi);
+	ssip_set_cmd(msg, SSIP_BOOTINFO_REQ_CMD(SSIP_LOCAL_VERID));
+	msg->complete = ssip_release_cmd;
+	hsi_async_write(cl, msg);
+	dev_dbg(&cl->device, "Issuing RX command\n");
+	msg = ssip_claim_cmd(ssi);
+	msg->complete = ssip_rxcmd_complete;
+	hsi_async_read(cl, msg);
+}
+
+static void ssip_start_rx(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg;
+
+	dev_dbg(&cl->device, "RX start M(%d) R(%d)\n", ssi->main_state,
+						ssi->recv_state);
+	spin_lock(&ssi->lock);
+	/*
+	 * We can have two UP events in a row due to a short low
+	 * high transition. Therefore we need to ignore the sencond UP event.
+	 */
+	if ((ssi->main_state != ACTIVE) || (ssi->recv_state == RECV_READY)) {
+		if (ssi->main_state == INIT) {
+			ssi->main_state = HANDSHAKE;
+			spin_unlock(&ssi->lock);
+			ssip_send_bootinfo_req_cmd(cl);
+		} else {
+			spin_unlock(&ssi->lock);
+		}
+		return;
+	}
+	ssip_set_rxstate(ssi, RECV_READY);
+	spin_unlock(&ssi->lock);
+
+	msg = ssip_claim_cmd(ssi);
+	ssip_set_cmd(msg, SSIP_READY_CMD);
+	msg->complete = ssip_release_cmd;
+	dev_dbg(&cl->device, "Send READY\n");
+	hsi_async_write(cl, msg);
+}
+
+static void ssip_stop_rx(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+
+	dev_dbg(&cl->device, "RX stop M(%d)\n", ssi->main_state);
+	spin_lock(&ssi->lock);
+	if (likely(ssi->main_state == ACTIVE))
+		ssip_set_rxstate(ssi, RECV_IDLE);
+	spin_unlock(&ssi->lock);
+}
+
+static void ssip_free_strans(struct hsi_msg *msg)
+{
+	ssip_free_data(msg->context);
+	ssip_release_cmd(msg);
+}
+
+static void ssip_strans_complete(struct hsi_msg *msg)
+{
+	struct hsi_client *cl = msg->cl;
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *data;
+
+	data = msg->context;
+	ssip_release_cmd(msg);
+	spin_lock(&ssi->lock);
+	ssip_set_txstate(ssi, SENDING);
+	spin_unlock(&ssi->lock);
+	hsi_async_write(cl, data);
+}
+
+static int ssip_xmit(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg, *dmsg;
+	struct sk_buff *skb;
+
+	spin_lock_bh(&ssi->lock);
+	if (list_empty(&ssi->txqueue)) {
+		spin_unlock_bh(&ssi->lock);
+		return 0;
+	}
+	dmsg = list_first_entry(&ssi->txqueue, struct hsi_msg, link);
+	list_del(&dmsg->link);
+	ssi->txqueue_len--;
+	spin_unlock_bh(&ssi->lock);
+
+	msg = ssip_claim_cmd(ssi);
+	skb = dmsg->context;
+	msg->context = dmsg;
+	msg->complete = ssip_strans_complete;
+	msg->destructor = ssip_free_strans;
+
+	spin_lock_bh(&ssi->lock);
+	ssip_set_cmd(msg, SSIP_START_TRANS_CMD(SSIP_BYTES_TO_FRAMES(skb->len),
+								ssi->txid));
+	ssi->txid++;
+	ssip_set_txstate(ssi, SENDING);
+	spin_unlock_bh(&ssi->lock);
+
+	dev_dbg(&cl->device, "Send STRANS (%d frames)\n",
+						SSIP_BYTES_TO_FRAMES(skb->len));
+
+	return hsi_async_write(cl, msg);
+}
+
+/* In soft IRQ context */
+static void ssip_pn_rx(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	if (unlikely(!netif_running(dev))) {
+		dev_dbg(&dev->dev, "Drop RX packet\n");
+		dev->stats.rx_dropped++;
+		dev_kfree_skb(skb);
+		return;
+	}
+	if (unlikely(!pskb_may_pull(skb, SSIP_MIN_PN_HDR))) {
+		dev_dbg(&dev->dev, "Error drop RX packet\n");
+		dev->stats.rx_errors++;
+		dev->stats.rx_length_errors++;
+		dev_kfree_skb(skb);
+		return;
+	}
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+
+	/* length field is exchanged in network byte order */
+	((u16 *)skb->data)[2] = ntohs(((u16 *)skb->data)[2]);
+	dev_dbg(&dev->dev, "RX length fixed (%04x -> %u)\n",
+			((u16 *)skb->data)[2], ntohs(((u16 *)skb->data)[2]));
+
+	skb->protocol = htons(ETH_P_PHONET);
+	skb_reset_mac_header(skb);
+	__skb_pull(skb, 1);
+	netif_rx(skb);
+}
+
+static void ssip_rx_data_complete(struct hsi_msg *msg)
+{
+	struct hsi_client *cl = msg->cl;
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct sk_buff *skb;
+
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&cl->device, "RX data error\n");
+		ssip_free_data(msg);
+		ssip_error(cl);
+		return;
+	}
+	del_timer(&ssi->rx_wd); /* FIXME: Revisit */
+	skb = msg->context;
+	ssip_pn_rx(skb);
+	hsi_free_msg(msg);
+}
+
+static void ssip_rx_bootinforeq(struct hsi_client *cl, u32 cmd)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg;
+
+	/* Workaroud: Ignore CMT Loader message leftover */
+	if (cmd == SSIP_CMT_LOADER_SYNC)
+		return;
+
+	switch (ssi->main_state) {
+	case ACTIVE:
+		dev_err(&cl->device, "Boot info req on active state\n");
+		ssip_error(cl);
+		/* Fall through */
+	case INIT:
+		spin_lock(&ssi->lock);
+		ssi->main_state = HANDSHAKE;
+		if (!ssi->waketest) {
+			ssi->waketest = 1;
+			ssi_waketest(cl, 1); /* FIXME: To be removed */
+		}
+		/* Start boot handshake watchdog */
+		mod_timer(&ssi->tx_wd, jiffies + msecs_to_jiffies(SSIP_WDTOUT));
+		spin_unlock(&ssi->lock);
+		dev_dbg(&cl->device, "Send BOOTINFO_RESP\n");
+		if (SSIP_DATA_VERSION(cmd) != SSIP_LOCAL_VERID)
+			dev_warn(&cl->device, "boot info req verid mismatch\n");
+		msg = ssip_claim_cmd(ssi);
+		ssip_set_cmd(msg, SSIP_BOOTINFO_RESP_CMD(SSIP_LOCAL_VERID));
+		msg->complete = ssip_release_cmd;
+		hsi_async_write(cl, msg);
+		break;
+	case HANDSHAKE:
+		/* Ignore */
+		break;
+	default:
+		dev_dbg(&cl->device, "Wrong state M(%d)\n", ssi->main_state);
+		break;
+	}
+}
+
+static void ssip_rx_bootinforesp(struct hsi_client *cl, u32 cmd)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+
+	if (SSIP_DATA_VERSION(cmd) != SSIP_LOCAL_VERID)
+		dev_warn(&cl->device, "boot info resp verid mismatch\n");
+
+	spin_lock(&ssi->lock);
+	if (ssi->main_state != ACTIVE)
+		/* Use tx_wd as a boot watchdog in non ACTIVE state */
+		mod_timer(&ssi->tx_wd, jiffies + msecs_to_jiffies(SSIP_WDTOUT));
+	else
+		dev_dbg(&cl->device, "boot info resp ignored M(%d)\n",
+							ssi->main_state);
+	spin_unlock(&ssi->lock);
+}
+
+static void ssip_rx_waketest(struct hsi_client *cl, u32 cmd)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	unsigned int wkres = SSIP_PAYLOAD(cmd);
+
+	spin_lock(&ssi->lock);
+	if (ssi->main_state != HANDSHAKE) {
+		dev_dbg(&cl->device, "wake lines test ignored M(%d)\n",
+							ssi->main_state);
+		spin_unlock(&ssi->lock);
+		return;
+	}
+	if (ssi->waketest) {
+		ssi->waketest = 0;
+		ssi_waketest(cl, 0); /* FIXME: To be removed */
+	}
+	ssi->main_state = ACTIVE;
+	del_timer(&ssi->tx_wd); /* Stop boot handshake timer */
+	spin_unlock(&ssi->lock);
+
+	dev_notice(&cl->device, "WAKELINES TEST %s\n",
+				wkres & SSIP_WAKETEST_FAILED ? "FAILED" : "OK");
+	if (wkres & SSIP_WAKETEST_FAILED) {
+		ssip_error(cl);
+		return;
+	}
+	dev_dbg(&cl->device, "CMT is ONLINE\n");
+	netif_wake_queue(ssi->netdev);
+	netif_carrier_on(ssi->netdev);
+}
+
+static void ssip_rx_ready(struct hsi_client *cl)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+
+	spin_lock(&ssi->lock);
+	if (unlikely(ssi->main_state != ACTIVE)) {
+		dev_dbg(&cl->device, "READY on wrong state: S(%d) M(%d)\n",
+					ssi->send_state, ssi->main_state);
+		spin_unlock(&ssi->lock);
+		return;
+	}
+	if (ssi->send_state != WAIT4READY) {
+		dev_dbg(&cl->device, "Ignore spurious READY command\n");
+		spin_unlock(&ssi->lock);
+		return;
+	}
+	ssip_set_txstate(ssi, SEND_READY);
+	spin_unlock(&ssi->lock);
+	ssip_xmit(cl);
+}
+
+static void ssip_rx_strans(struct hsi_client *cl, u32 cmd)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct sk_buff *skb;
+	struct hsi_msg *msg;
+	int len = SSIP_PDU_LENGTH(cmd);
+
+	dev_dbg(&cl->device, "RX strans: %d frames\n", len);
+	spin_lock(&ssi->lock);
+	if (unlikely(ssi->main_state != ACTIVE)) {
+		dev_err(&cl->device, "START TRANS wrong state: S(%d) M(%d)\n",
+					ssi->send_state, ssi->main_state);
+		spin_unlock(&ssi->lock);
+		return;
+	}
+	ssip_set_rxstate(ssi, RECEIVING);
+	if (unlikely(SSIP_MSG_ID(cmd) != ssi->rxid)) {
+		dev_err(&cl->device, "START TRANS id %d expeceted %d\n",
+					SSIP_MSG_ID(cmd), ssi->rxid);
+		spin_unlock(&ssi->lock);
+		goto out1;
+	}
+	ssi->rxid++;
+	spin_unlock(&ssi->lock);
+	skb = netdev_alloc_skb(ssi->netdev, len * 4);
+	if (unlikely(!skb)) {
+		dev_err(&cl->device, "No memory for rx skb\n");
+		goto out1;
+	}
+	skb->dev = ssi->netdev;
+	skb_put(skb, len * 4);
+	msg = ssip_alloc_data(ssi, skb, GFP_ATOMIC);
+	if (unlikely(!msg)) {
+		dev_err(&cl->device, "No memory for RX data msg\n");
+		goto out2;
+	}
+	msg->complete = ssip_rx_data_complete;
+	hsi_async_read(cl, msg);
+
+	return;
+out2:
+	dev_kfree_skb(skb);
+out1:
+	ssip_error(cl);
+}
+
+static void ssip_rxcmd_complete(struct hsi_msg *msg)
+{
+	struct hsi_client *cl = msg->cl;
+	u32 cmd = ssip_get_cmd(msg);
+	unsigned int cmdid = SSIP_COMMAND(cmd);
+
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&cl->device, "RX error detected\n");
+		ssip_release_cmd(msg);
+		ssip_error(cl);
+		return;
+	}
+	hsi_async_read(cl, msg);
+	dev_dbg(&cl->device, "RX cmd: 0x%08x\n", cmd);
+	switch (cmdid) {
+	case SSIP_SW_BREAK:
+		/* Ignored */
+		break;
+	case SSIP_BOOTINFO_REQ:
+		ssip_rx_bootinforeq(cl, cmd);
+		break;
+	case SSIP_BOOTINFO_RESP:
+		ssip_rx_bootinforesp(cl, cmd);
+		break;
+	case SSIP_WAKETEST_RESULT:
+		ssip_rx_waketest(cl, cmd);
+		break;
+	case SSIP_START_TRANS:
+		ssip_rx_strans(cl, cmd);
+		break;
+	case SSIP_READY:
+		ssip_rx_ready(cl);
+		break;
+	default:
+		dev_warn(&cl->device, "command 0x%08x not supported\n", cmd);
+		break;
+	}
+}
+
+static void ssip_swbreak_complete(struct hsi_msg *msg)
+{
+	struct hsi_client *cl = msg->cl;
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+
+	ssip_release_cmd(msg);
+	spin_lock(&ssi->lock);
+	if (list_empty(&ssi->txqueue)) {
+		if (atomic_read(&ssi->tx_usecnt)) {
+			ssip_set_txstate(ssi, SEND_READY);
+		} else {
+			ssip_set_txstate(ssi, SEND_IDLE);
+			hsi_stop_tx(cl);
+		}
+		spin_unlock(&ssi->lock);
+	} else {
+		spin_unlock(&ssi->lock);
+		ssip_xmit(cl);
+	}
+	netif_wake_queue(ssi->netdev);
+}
+
+static void ssip_tx_data_complete(struct hsi_msg *msg)
+{
+	struct hsi_client *cl = msg->cl;
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *cmsg;
+
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&cl->device, "TX data error\n");
+		ssip_error(cl);
+		goto out;
+	}
+	spin_lock(&ssi->lock);
+	if (list_empty(&ssi->txqueue)) {
+		ssip_set_txstate(ssi, SENDING_SWBREAK);
+		spin_unlock(&ssi->lock);
+		cmsg = ssip_claim_cmd(ssi);
+		ssip_set_cmd(cmsg, SSIP_SWBREAK_CMD);
+		cmsg->complete = ssip_swbreak_complete;
+		dev_dbg(&cl->device, "Send SWBREAK\n");
+		hsi_async_write(cl, cmsg);
+	} else {
+		spin_unlock(&ssi->lock);
+		ssip_xmit(cl);
+	}
+out:
+	ssip_free_data(msg);
+}
+
+void ssip_port_event(struct hsi_client *cl, unsigned long event)
+{
+	switch (event) {
+	case HSI_EVENT_START_RX:
+		ssip_start_rx(cl);
+		break;
+	case HSI_EVENT_STOP_RX:
+		ssip_stop_rx(cl);
+		break;
+	default:
+		return;
+	}
+}
+
+static int ssip_pn_open(struct net_device *dev)
+{
+	struct hsi_client *cl = to_hsi_client(dev->dev.parent);
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	int err;
+
+	err = hsi_claim_port(cl, 1);
+	if (err < 0) {
+		dev_err(&cl->device, "SSI port already claimed\n");
+		return err;
+	}
+	err = hsi_register_port_event(cl, ssip_port_event);
+	if (err < 0) {
+		dev_err(&cl->device, "Register HSI port event failed (%d)\n",
+			err);
+		return err;
+	}
+	dev_dbg(&cl->device, "Configuring SSI port\n");
+	hsi_setup(cl);
+	spin_lock_bh(&ssi->lock);
+	if (!ssi->waketest) {
+		ssi->waketest = 1;
+		ssi_waketest(cl, 1); /* FIXME: To be removed */
+	}
+	ssi->main_state = INIT;
+	spin_unlock_bh(&ssi->lock);
+
+	return 0;
+}
+
+static int ssip_pn_stop(struct net_device *dev)
+{
+	struct hsi_client *cl = to_hsi_client(dev->dev.parent);
+
+	ssip_reset(cl);
+	hsi_unregister_port_event(cl);
+	hsi_release_port(cl);
+
+	return 0;
+}
+
+static int ssip_pn_set_mtu(struct net_device *dev, int new_mtu)
+{
+	if (new_mtu > SSIP_MAX_MTU || new_mtu < PHONET_MIN_MTU)
+		return -EINVAL;
+	dev->mtu = new_mtu;
+
+	return 0;
+}
+
+static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct hsi_client *cl = to_hsi_client(dev->dev.parent);
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+	struct hsi_msg *msg;
+
+	if ((skb->protocol != htons(ETH_P_PHONET)) ||
+					(skb->len < SSIP_MIN_PN_HDR))
+		goto drop;
+	/* Pad to 32-bits - FIXME: Revisit*/
+	if ((skb->len & 3) && skb_pad(skb, 4 - (skb->len & 3)))
+		goto drop;
+
+	/*
+	 * Modem sends Phonet messages over SSI with its own endianess...
+	 * Assume that modem has the same endianess as we do.
+	 */
+	if (skb_cow_head(skb, 0))
+		goto drop;
+
+	/* length field is exchanged in network byte order */
+	((u16 *)skb->data)[2] = htons(((u16 *)skb->data)[2]);
+
+	msg = ssip_alloc_data(ssi, skb, GFP_ATOMIC);
+	if (!msg) {
+		dev_dbg(&cl->device, "Dropping tx data: No memory\n");
+		goto drop;
+	}
+	msg->complete = ssip_tx_data_complete;
+
+	spin_lock_bh(&ssi->lock);
+	if (unlikely(ssi->main_state != ACTIVE)) {
+		spin_unlock_bh(&ssi->lock);
+		dev_dbg(&cl->device, "Dropping tx data: CMT is OFFLINE\n");
+		goto drop2;
+	}
+	list_add_tail(&msg->link, &ssi->txqueue);
+	ssi->txqueue_len++;
+	if (dev->tx_queue_len < ssi->txqueue_len) {
+		dev_info(&cl->device, "TX queue full %d\n", ssi->txqueue_len);
+		netif_stop_queue(dev);
+	}
+	if (ssi->send_state == SEND_IDLE) {
+		ssip_set_txstate(ssi, WAIT4READY);
+		spin_unlock_bh(&ssi->lock);
+		dev_dbg(&cl->device, "Start TX qlen %d\n", ssi->txqueue_len);
+		hsi_start_tx(cl);
+	} else if (ssi->send_state == SEND_READY) {
+		/* Needed for cmt-speech workaround */
+		dev_dbg(&cl->device, "Start TX on SEND READY qlen %d\n",
+							ssi->txqueue_len);
+		spin_unlock_bh(&ssi->lock);
+		ssip_xmit(cl);
+	} else {
+		spin_unlock_bh(&ssi->lock);
+	}
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += skb->len;
+
+	return 0;
+drop2:
+	hsi_free_msg(msg);
+drop:
+	dev->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+
+	return 0;
+}
+
+/* CMT reset event handler */
+void ssip_reset_event(struct hsi_client *master)
+{
+	struct ssi_protocol *ssi = hsi_client_drvdata(master);
+	dev_err(&ssi->cl->device, "CMT reset detected!\n");
+	ssip_error(ssi->cl);
+}
+EXPORT_SYMBOL_GPL(ssip_reset_event);
+
+static const struct net_device_ops ssip_pn_ops = {
+	.ndo_open	= ssip_pn_open,
+	.ndo_stop	= ssip_pn_stop,
+	.ndo_start_xmit	= ssip_pn_xmit,
+	.ndo_change_mtu	= ssip_pn_set_mtu,
+};
+
+static void ssip_pn_setup(struct net_device *dev)
+{
+	dev->features		= 0;
+	dev->netdev_ops		= &ssip_pn_ops;
+	dev->type		= ARPHRD_PHONET;
+	dev->flags		= IFF_POINTOPOINT | IFF_NOARP;
+	dev->mtu		= SSIP_DEFAULT_MTU;
+	dev->hard_header_len	= 1;
+	dev->dev_addr[0]	= PN_MEDIA_SOS;
+	dev->addr_len		= 1;
+	dev->tx_queue_len	= SSIP_TXQUEUE_LEN;
+
+	dev->destructor		= free_netdev;
+	dev->header_ops		= &phonet_header_ops;
+}
+
+static int ssi_protocol_probe(struct device *dev)
+{
+	static const char ifname[] = "phonet%d";
+	struct hsi_client *cl = to_hsi_client(dev);
+	struct ssi_protocol *ssi;
+	int err;
+
+	ssi = kzalloc(sizeof(*ssi), GFP_KERNEL);
+	if (!ssi) {
+		dev_err(dev, "No memory for ssi protocol\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&ssi->lock);
+	init_timer_deferrable(&ssi->rx_wd);
+	init_timer_deferrable(&ssi->tx_wd);
+	init_timer(&ssi->keep_alive);
+	ssi->rx_wd.data = (unsigned long)cl;
+	ssi->rx_wd.function = ssip_wd;
+	ssi->tx_wd.data = (unsigned long)cl;
+	ssi->tx_wd.function = ssip_wd;
+	ssi->keep_alive.data = (unsigned long)cl;
+	ssi->keep_alive.function = ssip_keep_alive;
+	INIT_LIST_HEAD(&ssi->txqueue);
+	INIT_LIST_HEAD(&ssi->cmdqueue);
+	atomic_set(&ssi->tx_usecnt, 0);
+	hsi_client_set_drvdata(cl, ssi);
+	ssi->cl = cl;
+
+	ssi->channel_id_cmd = hsi_get_channel_id_by_name(cl, "mcsaab-control");
+	if (ssi->channel_id_cmd < 0) {
+		err = ssi->channel_id_cmd;
+		dev_err(dev, "Could not get cmd channel (%d)\n", err);
+		goto out;
+	}
+
+	ssi->channel_id_data = hsi_get_channel_id_by_name(cl, "mcsaab-data");
+	if (ssi->channel_id_data < 0) {
+		err = ssi->channel_id_data;
+		dev_err(dev, "Could not get data channel (%d)\n", err);
+		goto out;
+	}
+
+	err = ssip_alloc_cmds(ssi);
+	if (err < 0) {
+		dev_err(dev, "No memory for commands\n");
+		goto out;
+	}
+
+	ssi->netdev = alloc_netdev(0, ifname, ssip_pn_setup);
+	if (!ssi->netdev) {
+		dev_err(dev, "No memory for netdev\n");
+		err = -ENOMEM;
+		goto out1;
+	}
+
+	SET_NETDEV_DEV(ssi->netdev, dev);
+	netif_carrier_off(ssi->netdev);
+	err = register_netdev(ssi->netdev);
+	if (err < 0) {
+		dev_err(dev, "Register netdev failed (%d)\n", err);
+		goto out2;
+	}
+
+	list_add(&ssi->link, &ssip_list);
+
+	dev_dbg(dev, "channel configuration: cmd=%d, data=%d\n",
+		ssi->channel_id_cmd, ssi->channel_id_data);
+
+	return 0;
+out2:
+	free_netdev(ssi->netdev);
+out1:
+	ssip_free_cmds(ssi);
+out:
+	kfree(ssi);
+
+	return err;
+}
+
+static int ssi_protocol_remove(struct device *dev)
+{
+	struct hsi_client *cl = to_hsi_client(dev);
+	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
+
+	list_del(&ssi->link);
+	unregister_netdev(ssi->netdev);
+	ssip_free_cmds(ssi);
+	hsi_client_set_drvdata(cl, NULL);
+	kfree(ssi);
+
+	return 0;
+}
+
+static struct hsi_client_driver ssip_driver = {
+	.driver = {
+		.name	= "ssi-protocol",
+		.owner	= THIS_MODULE,
+		.probe	= ssi_protocol_probe,
+		.remove	= ssi_protocol_remove,
+	},
+};
+
+static int __init ssip_init(void)
+{
+	pr_info("SSI protocol aka McSAAB added\n");
+
+	return hsi_register_client_driver(&ssip_driver);
+}
+module_init(ssip_init);
+
+static void __exit ssip_exit(void)
+{
+	hsi_unregister_client_driver(&ssip_driver);
+	pr_info("SSI protocol driver removed\n");
+}
+module_exit(ssip_exit);
+
+MODULE_ALIAS("hsi:ssi-protocol");
+MODULE_AUTHOR("Carlos Chinea <carlos.chinea@nokia.com>");
+MODULE_AUTHOR("Remi Denis-Courmont <remi.denis-courmont@nokia.com>");
+MODULE_DESCRIPTION("SSI protocol improved aka McSAAB");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/hsi/ssi_protocol.h b/include/linux/hsi/ssi_protocol.h
new file mode 100644
index 000000000000..1433651be0dc
--- /dev/null
+++ b/include/linux/hsi/ssi_protocol.h
@@ -0,0 +1,42 @@
+/*
+ * ssip_slave.h
+ *
+ * SSIP slave support header file
+ *
+ * Copyright (C) 2010 Nokia Corporation. All rights reserved.
+ *
+ * Contact: Carlos Chinea <carlos.chinea@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __LINUX_SSIP_SLAVE_H__
+#define __LINUX_SSIP_SLAVE_H__
+
+#include <linux/hsi/hsi.h>
+
+static inline void ssip_slave_put_master(struct hsi_client *master)
+{
+}
+
+struct hsi_client *ssip_slave_get_master(struct hsi_client *slave);
+int ssip_slave_start_tx(struct hsi_client *master);
+int ssip_slave_stop_tx(struct hsi_client *master);
+void ssip_reset_event(struct hsi_client *master);
+
+int ssip_slave_running(struct hsi_client *master);
+
+#endif /* __LINUX_SSIP_SLAVE_H__ */
+
-- 
cgit 


From 1f0b63866fc1be700260547be8edf8e6f0af37f2 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 15 May 2014 23:29:57 +0200
Subject: ACPI / PM: Hold ACPI scan lock over the "freeze" sleep state

The "freeze" sleep state suffers from the same issue that was
addressed by commit ad07277e82de (ACPI / PM: Hold acpi_scan_lock over
system PM transitions) for ACPI sleep states, that is, things break
if ->remove() is called for devices whose system resume callbacks
haven't been executed yet.

It also can be addressed in the same way, by holding the ACPI scan
lock over the "freeze" sleep state and PM transitions to and from
that state, but ->begin() and ->end() platform operations for the
"freeze" sleep state are needed for this purpose.

This change has been tested on Acer Aspire S5 with Thunderbolt.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/sleep.c    | 18 ++++++++++++++++++
 include/linux/suspend.h |  7 +++++++
 kernel/power/suspend.c  | 15 +++++++++++++++
 3 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 2281ca31c1bc..c11e3795431b 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -612,6 +612,22 @@ static const struct platform_suspend_ops acpi_suspend_ops_old = {
 	.recover = acpi_pm_finish,
 };
 
+static int acpi_freeze_begin(void)
+{
+	acpi_scan_lock_acquire();
+	return 0;
+}
+
+static void acpi_freeze_end(void)
+{
+	acpi_scan_lock_release();
+}
+
+static const struct platform_freeze_ops acpi_freeze_ops = {
+	.begin = acpi_freeze_begin,
+	.end = acpi_freeze_end,
+};
+
 static void acpi_sleep_suspend_setup(void)
 {
 	int i;
@@ -622,7 +638,9 @@ static void acpi_sleep_suspend_setup(void)
 
 	suspend_set_ops(old_suspend_ordering ?
 		&acpi_suspend_ops_old : &acpi_suspend_ops);
+	freeze_set_ops(&acpi_freeze_ops);
 }
+
 #else /* !CONFIG_SUSPEND */
 static inline void acpi_sleep_suspend_setup(void) {}
 #endif /* !CONFIG_SUSPEND */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f73cabf59012..91d66fd8dce1 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -187,6 +187,11 @@ struct platform_suspend_ops {
 	void (*recover)(void);
 };
 
+struct platform_freeze_ops {
+	int (*begin)(void);
+	void (*end)(void);
+};
+
 #ifdef CONFIG_SUSPEND
 /**
  * suspend_set_ops - set platform dependent suspend operations
@@ -194,6 +199,7 @@ struct platform_suspend_ops {
  */
 extern void suspend_set_ops(const struct platform_suspend_ops *ops);
 extern int suspend_valid_only_mem(suspend_state_t state);
+extern void freeze_set_ops(const struct platform_freeze_ops *ops);
 extern void freeze_wake(void);
 
 /**
@@ -220,6 +226,7 @@ extern int pm_suspend(suspend_state_t state);
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
+static inline void freeze_set_ops(const struct platform_freeze_ops *ops) {}
 static inline void freeze_wake(void) {}
 #endif /* !CONFIG_SUSPEND */
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8233cd4047d7..73a905f83972 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -38,6 +38,7 @@ const char *const pm_states[PM_SUSPEND_MAX] = {
 };
 
 static const struct platform_suspend_ops *suspend_ops;
+static const struct platform_freeze_ops *freeze_ops;
 
 static bool need_suspend_ops(suspend_state_t state)
 {
@@ -47,6 +48,13 @@ static bool need_suspend_ops(suspend_state_t state)
 static DECLARE_WAIT_QUEUE_HEAD(suspend_freeze_wait_head);
 static bool suspend_freeze_wake;
 
+void freeze_set_ops(const struct platform_freeze_ops *ops)
+{
+	lock_system_sleep();
+	freeze_ops = ops;
+	unlock_system_sleep();
+}
+
 static void freeze_begin(void)
 {
 	suspend_freeze_wake = false;
@@ -269,6 +277,10 @@ int suspend_devices_and_enter(suspend_state_t state)
 		error = suspend_ops->begin(state);
 		if (error)
 			goto Close;
+	} else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) {
+		error = freeze_ops->begin();
+		if (error)
+			goto Close;
 	}
 	suspend_console();
 	suspend_test_start();
@@ -294,6 +306,9 @@ int suspend_devices_and_enter(suspend_state_t state)
  Close:
 	if (need_suspend_ops(state) && suspend_ops->end)
 		suspend_ops->end();
+	else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
+		freeze_ops->end();
+
 	trace_machine_suspend(PWR_EVENT_EXIT);
 	return error;
 
-- 
cgit 


From 7b6ef1262549f6afc5c881aaef80beb8fd15f908 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 7 May 2014 15:44:05 +0000
Subject: genirq: Provide generic hwirq allocation facility

Not really the solution to the problem, but at least it confines the
mess in the core code and allows to get rid of the create/destroy_irq
variants from hell, i.e. 3 implementations with different semantics
plus the x86 specific variants __create_irqs and create_irq_nr
which have been invented in another circle of hell.

x86 : x86 should be converted to irq domains and I'm deliberately
      making it impossible to do the multi-vector MSI support by
      adding more crap to the current mess. It's not that hard to do
      and I'm really tired of the trainwrecks which have been invented
      by baindaid engineering so far. Any attempt to do multi-vector
      MSI or ioapic hotplug without converting to irq domains is NAKed
      hereby.

tile: Might use irq domains as well, but it has a very limited
      interrupt space, so handling it via this functionality might be
      the right thing to do even in the long run.

ia64: That's an hopeless case, as I doubt that anyone has the stomach
      to rewrite the homebrewn dynamic allocation facilities. I stared
      at it for a couple of hours and gave up. The create/destroy_irq
      mess could be made private to itanic right away if there
      wouldn't be the iommu/dmar driver being shared with x86. So to
      do that I'm going to add a separate ia64 specific implementation
      later in order not to deep-six itanic right away.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/20140507154334.208629358@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h  | 15 +++++++++++++++
 kernel/irq/Kconfig   |  5 +++++
 kernel/irq/irqdesc.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5c57efb863d0..c75dd161d37f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -637,6 +637,21 @@ static inline int irq_reserve_irq(unsigned int irq)
 	return irq_reserve_irqs(irq, 1);
 }
 
+#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+unsigned int irq_alloc_hwirqs(int cnt, int node);
+static inline unsigned int irq_alloc_hwirq(int node)
+{
+	return irq_alloc_hwirqs(1, node);
+}
+void irq_free_hwirqs(unsigned int from, int cnt);
+static inline void irq_free_hwirq(unsigned int irq)
+{
+	return irq_free_hwirqs(irq, 1);
+}
+int arch_setup_hwirq(unsigned int irq, int node);
+void arch_teardown_hwirq(unsigned int irq);
+#endif
+
 #ifndef irq_reg_writel
 # define irq_reg_writel(val, addr)	writel(val, addr)
 #endif
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 07cbdfea9ae2..a83f10e406c1 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -17,6 +17,11 @@ config GENERIC_IRQ_SHOW
 config GENERIC_IRQ_SHOW_LEVEL
        bool
 
+# Facility to allocate a hardware interrupt. This is legacy support
+# and should not be used in new code. Use irq domains instead.
+config GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+       bool
+
 # Support for delayed migration from interrupt context
 config GENERIC_PENDING_IRQ
 	bool
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index bb07f2928f4b..f388ade5e792 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -396,6 +396,57 @@ err:
 }
 EXPORT_SYMBOL_GPL(__irq_alloc_descs);
 
+#ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+/**
+ * irq_alloc_hwirqs - Allocate an irq descriptor and initialize the hardware
+ * @cnt:	number of interrupts to allocate
+ * @node:	node on which to allocate
+ *
+ * Returns an interrupt number > 0 or 0, if the allocation fails.
+ */
+unsigned int irq_alloc_hwirqs(int cnt, int node)
+{
+	int i, irq = __irq_alloc_descs(-1, 0, cnt, node, NULL);
+
+	if (irq < 0)
+		return 0;
+
+	for (i = irq; cnt > 0; i++, cnt--) {
+		if (arch_setup_hwirq(i, node))
+			goto err;
+		irq_clear_status_flags(i, _IRQ_NOREQUEST);
+	}
+	return irq;
+
+err:
+	for (i--; i >= irq; i--) {
+		irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
+		arch_teardown_hwirq(i);
+	}
+	irq_free_descs(irq, cnt);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
+
+/**
+ * irq_free_hwirqs - Free irq descriptor and cleanup the hardware
+ * @from:	Free from irq number
+ * @cnt:	number of interrupts to free
+ *
+ */
+void irq_free_hwirqs(unsigned int from, int cnt)
+{
+	int i;
+
+	for (i = from; cnt > 0; i++, cnt--) {
+		irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
+		arch_teardown_hwirq(i);
+	}
+	irq_free_descs(from, cnt);
+}
+EXPORT_SYMBOL_GPL(irq_free_hwirqs);
+#endif
+
 /**
  * irq_reserve_irqs - mark irqs allocated
  * @from:	mark from irq number
-- 
cgit 


From 54859f59fc18e5c104a4095420b3fcef8bc3ae63 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 7 May 2014 15:44:12 +0000
Subject: x86: Remove create/destroy_irq()

No more users. Remove the cruft

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: x86@kernel.org
Link: http://lkml.kernel.org/r/20140507154336.760446122@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/apic/io_apic.c | 106 +----------------------------------------
 include/linux/irq.h            |   4 --
 2 files changed, 1 insertion(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index be3b5741badb..efda2f648f59 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -281,18 +281,6 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
 	return cfg;
 }
 
-static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
-{
-	return irq_alloc_descs_from(from, count, node);
-}
-
-static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
-{
-	free_irq_cfg(at, cfg);
-	irq_free_desc(at);
-}
-
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -2916,100 +2904,8 @@ static int __init ioapic_init_ops(void)
 device_initcall(ioapic_init_ops);
 
 /*
- * Dynamic irq allocate and deallocation
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
  */
-unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
-{
-	struct irq_cfg **cfg;
-	unsigned long flags;
-	int irq, i;
-
-	if (from < nr_irqs_gsi)
-		from = nr_irqs_gsi;
-
-	cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
-	if (!cfg)
-		return 0;
-
-	irq = alloc_irqs_from(from, count, node);
-	if (irq < 0)
-		goto out_cfgs;
-
-	for (i = 0; i < count; i++) {
-		cfg[i] = alloc_irq_cfg(irq + i, node);
-		if (!cfg[i])
-			goto out_irqs;
-	}
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	for (i = 0; i < count; i++)
-		if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
-			goto out_vecs;
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	for (i = 0; i < count; i++) {
-		irq_set_chip_data(irq + i, cfg[i]);
-		irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
-	}
-
-	kfree(cfg);
-	return irq;
-
-out_vecs:
-	for (i--; i >= 0; i--)
-		__clear_irq_vector(irq + i, cfg[i]);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-out_irqs:
-	for (i = 0; i < count; i++)
-		free_irq_at(irq + i, cfg[i]);
-out_cfgs:
-	kfree(cfg);
-	return 0;
-}
-
-unsigned int create_irq_nr(unsigned int from, int node)
-{
-	return __create_irqs(from, 1, node);
-}
-
-int create_irq(void)
-{
-	int node = cpu_to_node(0);
-	unsigned int irq_want;
-	int irq;
-
-	irq_want = nr_irqs_gsi;
-	irq = create_irq_nr(irq_want, node);
-
-	if (irq == 0)
-		irq = -1;
-
-	return irq;
-}
-
-void destroy_irq(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
-	unsigned long flags;
-
-	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
-
-	free_remapped_irq(irq);
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq, cfg);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	free_irq_at(irq, cfg);
-}
-
-void destroy_irqs(unsigned int irq, unsigned int count)
-{
-	unsigned int i;
-
-	for (i = 0; i < count; i++)
-		destroy_irq(irq + i);
-}
-
 int arch_setup_hwirq(unsigned int irq, int node)
 {
 	struct irq_cfg *cfg;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c75dd161d37f..7549ed59d3d4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -526,12 +526,8 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 }
 
 /* Handle dynamic irq creation and destruction */
-extern unsigned int create_irq_nr(unsigned int irq_want, int node);
-extern unsigned int __create_irqs(unsigned int from, unsigned int count,
-				  int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
-extern void destroy_irqs(unsigned int irq, unsigned int count);
 
 /*
  * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
-- 
cgit 


From e8784e4f9a578344023ae4e08a509b7c5eab5eb0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 7 May 2014 15:44:17 +0000
Subject: genirq: Make create/destroy_irq() ia64 private

No more users outside of itanic. Confine it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Link: http://lkml.kernel.org/r/20140507154338.700598389@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/ia64/include/asm/irq.h | 3 +++
 include/linux/irq.h         | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h
index 91b920fd7d53..820667cbea7e 100644
--- a/arch/ia64/include/asm/irq.h
+++ b/arch/ia64/include/asm/irq.h
@@ -31,4 +31,7 @@ bool is_affinity_mask_valid(const struct cpumask *cpumask);
 
 #define is_affinity_mask_valid is_affinity_mask_valid
 
+int create_irq(void);
+void destroy_irq(unsigned int irq);
+
 #endif /* _ASM_IA64_IRQ_H */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 7549ed59d3d4..ac9634286f42 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -525,10 +525,6 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 			     IRQ_NOPROBE | IRQ_PER_CPU_DEVID);
 }
 
-/* Handle dynamic irq creation and destruction */
-extern int create_irq(void);
-extern void destroy_irq(unsigned int irq);
-
 /*
  * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
  * irq_free_desc instead.
-- 
cgit 


From 1d008353ba088fdec0b2a944e140ff9154a5fb20 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 7 May 2014 15:44:21 +0000
Subject: genirq: Remove irq_reserve_irq[s]

No more users. And it's not going to come back. If you need
hotplugable irq chips, use irq domains.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-and-acked-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140507154340.302183048@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h  |  7 -------
 kernel/irq/irqdesc.c | 25 -------------------------
 2 files changed, 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ac9634286f42..2110f46fcafa 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -617,18 +617,11 @@ int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
 	irq_alloc_descs(-1, from, cnt, node)
 
 void irq_free_descs(unsigned int irq, unsigned int cnt);
-int irq_reserve_irqs(unsigned int from, unsigned int cnt);
-
 static inline void irq_free_desc(unsigned int irq)
 {
 	irq_free_descs(irq, 1);
 }
 
-static inline int irq_reserve_irq(unsigned int irq)
-{
-	return irq_reserve_irqs(irq, 1);
-}
-
 #ifdef CONFIG_GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 unsigned int irq_alloc_hwirqs(int cnt, int node);
 static inline unsigned int irq_alloc_hwirq(int node)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 24029348729b..d514ed6080e1 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -454,31 +454,6 @@ void irq_free_hwirqs(unsigned int from, int cnt)
 EXPORT_SYMBOL_GPL(irq_free_hwirqs);
 #endif
 
-/**
- * irq_reserve_irqs - mark irqs allocated
- * @from:	mark from irq number
- * @cnt:	number of irqs to mark
- *
- * Returns 0 on success or an appropriate error code
- */
-int irq_reserve_irqs(unsigned int from, unsigned int cnt)
-{
-	unsigned int start;
-	int ret = 0;
-
-	if (!cnt || (from + cnt) > nr_irqs)
-		return -EINVAL;
-
-	mutex_lock(&sparse_irq_lock);
-	start = bitmap_find_next_zero_area(allocated_irqs, nr_irqs, from, cnt, 0);
-	if (start == from)
-		bitmap_set(allocated_irqs, start, cnt);
-	else
-		ret = -EEXIST;
-	mutex_unlock(&sparse_irq_lock);
-	return ret;
-}
-
 /**
  * irq_get_next_irq - get next allocated irq number
  * @offset:	where to start the search
-- 
cgit 


From c940e01c94e73a2a5318f1b82038e0746aaec753 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 7 May 2014 15:44:22 +0000
Subject: genirq: Replace dynamic_irq_init/cleanup

Create a new interface and confine it with a config switch which makes
clear that this is just legacy support and not to be used for new code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140507154340.574437049@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h  | 4 ++++
 kernel/irq/Kconfig   | 4 ++++
 kernel/irq/irqdesc.c | 7 +++++++
 3 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 2110f46fcafa..8ff71d14365a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -637,6 +637,10 @@ int arch_setup_hwirq(unsigned int irq, int node);
 void arch_teardown_hwirq(unsigned int irq);
 #endif
 
+#ifdef CONFIG_GENERIC_IRQ_LEGACY
+void irq_init_desc(unsigned int irq);
+#endif
+
 #ifndef irq_reg_writel
 # define irq_reg_writel(val, addr)	writel(val, addr)
 #endif
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index a83f10e406c1..d269cecdfbf0 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -5,6 +5,10 @@ menu "IRQ subsystem"
 config MAY_HAVE_SPARSE_IRQ
        bool
 
+# Legacy support, required for itanic
+config GENERIC_IRQ_LEGACY
+       bool
+
 # Enable the generic irq autoprobe mechanism
 config GENERIC_IRQ_PROBE
 	bool
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index d514ed6080e1..7f267799a717 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -306,6 +306,13 @@ void irq_mark_irq(unsigned int irq)
 	mutex_unlock(&sparse_irq_lock);
 }
 
+#ifdef CONFIG_GENERIC_IRQ_LEGACY
+void irq_init_desc(unsigned int irq)
+{
+	dynamic_irq_cleanup(irq);
+}
+#endif
+
 #endif /* !CONFIG_SPARSE_IRQ */
 
 /**
-- 
cgit 


From d8179bc0db8d0c9654d5de43de2874bf6d0a58fa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 7 May 2014 15:44:23 +0000
Subject: genirq: Remove dynamic_irq mess

No more users. Get rid of the cruft.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140507154341.012847637@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h  | 10 ----------
 kernel/irq/irqdesc.c | 23 +++++++----------------
 2 files changed, 7 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ff71d14365a..0d998d8b01d8 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -525,16 +525,6 @@ static inline void irq_set_percpu_devid_flags(unsigned int irq)
 			     IRQ_NOPROBE | IRQ_PER_CPU_DEVID);
 }
 
-/*
- * Dynamic irq helper functions. Obsolete. Use irq_alloc_desc* and
- * irq_free_desc instead.
- */
-extern void dynamic_irq_cleanup(unsigned int irq);
-static inline void dynamic_irq_init(unsigned int irq)
-{
-	dynamic_irq_cleanup(irq);
-}
-
 /* Set/get chip/data for an IRQ: */
 extern int irq_set_chip(unsigned int irq, struct irq_chip *chip);
 extern int irq_set_handler_data(unsigned int irq, void *data);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7f267799a717..7339e42a85ab 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -278,7 +278,12 @@ EXPORT_SYMBOL(irq_to_desc);
 
 static void free_desc(unsigned int irq)
 {
-	dynamic_irq_cleanup(irq);
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc_set_defaults(irq, desc, desc_node(desc), NULL);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
 static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
@@ -309,7 +314,7 @@ void irq_mark_irq(unsigned int irq)
 #ifdef CONFIG_GENERIC_IRQ_LEGACY
 void irq_init_desc(unsigned int irq)
 {
-	dynamic_irq_cleanup(irq);
+	free_desc(irq);
 }
 #endif
 
@@ -522,20 +527,6 @@ int irq_set_percpu_devid(unsigned int irq)
 	return 0;
 }
 
-/**
- * dynamic_irq_cleanup - cleanup a dynamically allocated irq
- * @irq:	irq number to initialize
- */
-void dynamic_irq_cleanup(unsigned int irq)
-{
-	struct irq_desc *desc = irq_to_desc(irq);
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&desc->lock, flags);
-	desc_set_defaults(irq, desc, desc_node(desc), NULL);
-	raw_spin_unlock_irqrestore(&desc->lock, flags);
-}
-
 void kstat_incr_irq_this_cpu(unsigned int irq)
 {
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
-- 
cgit 


From cdf86cd233207ed992a647f0b9d42c60735756e7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 8 May 2014 15:42:25 +0200
Subject: gpio: include linux/bug.h in interface header

Today's linux-next kernel started showing build errors for the
use of WARN_ON in linux/gpio/consumer.h:

In file included from drivers/video/backlight/pwm_bl.c:13:0:
include/linux/gpio/consumer.h: In function 'gpiod_put':
include/linux/gpio/consumer.h:81:2: error: implicit declaration of function 'WARN_ON' [-Werror=implicit-function-declaration]

It's not clear why this never happened before, but this patch
fixes it by including the header that contains the defintion
of this macro.

Signed-off-by: Arnd Bergmann <arnd@arnd.de>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/consumer.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 6a37ef0dc59c..05e53ccb708b 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -1,6 +1,7 @@
 #ifndef __LINUX_GPIO_CONSUMER_H
 #define __LINUX_GPIO_CONSUMER_H
 
+#include <linux/bug.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
 
-- 
cgit 


From 3b514d24e200fcdcde0a57c354a51d3677a86743 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:47 -0400
Subject: cgroup: skip refcnting on normal root csses and cgrp_dfl_root self
 css

9395a4500404 ("cgroup: enable refcnting for root csses") enabled
reference counting for root csses (cgroup_subsys_states) so that
cgroup's self csses can be used to manage the lifetime of the
containing cgroups.

Unfortunately, this change was incorrect.  During early init,
cgrp_dfl_root self css refcnt is used.  percpu_ref can't initialized
during early init and its initialization is deferred till
cgroup_init() time.  This means that cpu was using percpu_ref which
wasn't properly initialized.  Due to the way percpu variables are laid
out on x86, this didn't blow up immediately on x86 but ended up
incrementing and decrementing the percpu variable at offset zero,
whatever it may be; however, on other archs, this caused fault and
early boot failure.

As cgroup self csses for root cgroups of non-dfl hierarchies need
working refcounting, we can't revert 9395a4500404.  This patch adds
CSS_NO_REF which explicitly inhibits reference counting on the css and
sets it on all normal (non-self) csses and cgroup_dfl_root self css.

v2: cgrp_dfl_root.self is the offending one.  Set the flag on it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Warren <swarren@nvidia.com>
Tested-by: Stephen Warren <swarren@nvidia.com>
Fixes: 9395a4500404 ("cgroup: enable refcnting for root csses")
---
 include/linux/cgroup.h | 11 ++++++++---
 kernel/cgroup.c        | 11 +++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 76dadd77a120..1737db0c63fe 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -77,6 +77,7 @@ struct cgroup_subsys_state {
 
 /* bits in struct cgroup_subsys_state flags field */
 enum {
+	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
 };
 
@@ -88,7 +89,8 @@ enum {
  */
 static inline void css_get(struct cgroup_subsys_state *css)
 {
-	percpu_ref_get(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_get(&css->refcnt);
 }
 
 /**
@@ -103,7 +105,9 @@ static inline void css_get(struct cgroup_subsys_state *css)
  */
 static inline bool css_tryget_online(struct cgroup_subsys_state *css)
 {
-	return percpu_ref_tryget_live(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		return percpu_ref_tryget_live(&css->refcnt);
+	return true;
 }
 
 /**
@@ -114,7 +118,8 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css)
  */
 static inline void css_put(struct cgroup_subsys_state *css)
 {
-	percpu_ref_put(&css->refcnt);
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_put(&css->refcnt);
 }
 
 /* bits in struct cgroup flags field */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c01e8e8dfad0..0343d7ee6d62 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4593,11 +4593,17 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
 	/* We don't handle early failures gracefully */
 	BUG_ON(IS_ERR(css));
 	init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
+
+	/*
+	 * Root csses are never destroyed and we can't initialize
+	 * percpu_ref during early init.  Disable refcnting.
+	 */
+	css->flags |= CSS_NO_REF;
+
 	if (early) {
 		/* allocation can't be done safely during early init */
 		css->id = 1;
 	} else {
-		BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 		css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
 		BUG_ON(css->id < 0);
 	}
@@ -4636,6 +4642,8 @@ int __init cgroup_init_early(void)
 	int i;
 
 	init_cgroup_root(&cgrp_dfl_root, &opts);
+	cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
+
 	RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
 
 	for_each_subsys(ss, i) {
@@ -4684,7 +4692,6 @@ int __init cgroup_init(void)
 			struct cgroup_subsys_state *css =
 				init_css_set.subsys[ss->id];
 
-			BUG_ON(percpu_ref_init(&css->refcnt, css_release));
 			css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
 						   GFP_KERNEL);
 			BUG_ON(css->id < 0);
-- 
cgit 


From 5c9d535b893f30266ea29fe377cb9b002fcd76aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:48 -0400
Subject: cgroup: remove css_parent()

cgroup in general is moving towards using cgroup_subsys_state as the
fundamental structural component and css_parent() was introduced to
convert from using cgroup->parent to css->parent.  It was quite some
time ago and we're moving forward with making css more prominent.

This patch drops the trivial wrapper css_parent() and let the users
dereference css->parent.  While at it, explicitly mark fields of css
which are public and immutable.

v2: New usage from device_cgroup.c converted.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: "David S. Miller" <davem@davemloft.net>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-cgroup.h           |  2 +-
 include/linux/cgroup.h       | 29 +++++++++++------------------
 kernel/cgroup.c              |  8 ++++----
 kernel/cgroup_freezer.c      |  2 +-
 kernel/cpuset.c              |  2 +-
 kernel/sched/core.c          |  2 +-
 kernel/sched/cpuacct.c       |  2 +-
 mm/hugetlb_cgroup.c          |  2 +-
 mm/memcontrol.c              | 14 +++++++-------
 net/core/netclassid_cgroup.c |  2 +-
 net/core/netprio_cgroup.c    |  2 +-
 security/device_cgroup.c     |  8 ++++----
 12 files changed, 34 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 371fe8e92ab5..d692b29c083a 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -204,7 +204,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
  */
 static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
 {
-	return css_to_blkcg(css_parent(&blkcg->css));
+	return css_to_blkcg(blkcg->css.parent);
 }
 
 /**
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1737db0c63fe..2549493d518d 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -48,22 +48,28 @@ enum cgroup_subsys_id {
 };
 #undef SUBSYS
 
-/* Per-subsystem/per-cgroup state maintained by the system. */
+/*
+ * Per-subsystem/per-cgroup state maintained by the system.  This is the
+ * fundamental structural building block that controllers deal with.
+ *
+ * Fields marked with "PI:" are public and immutable and may be accessed
+ * directly without synchronization.
+ */
 struct cgroup_subsys_state {
-	/* the cgroup that this css is attached to */
+	/* PI: the cgroup that this css is attached to */
 	struct cgroup *cgroup;
 
-	/* the cgroup subsystem that this css is attached to */
+	/* PI: the cgroup subsystem that this css is attached to */
 	struct cgroup_subsys *ss;
 
 	/* reference count - access via css_[try]get() and css_put() */
 	struct percpu_ref refcnt;
 
-	/* the parent css */
+	/* PI: the parent css */
 	struct cgroup_subsys_state *parent;
 
 	/*
-	 * Subsys-unique ID.  0 is unused and root is always 1.  The
+	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
 	 * matching css can be looked up using css_from_id().
 	 */
 	int id;
@@ -669,19 +675,6 @@ struct cgroup_subsys {
 #include <linux/cgroup_subsys.h>
 #undef SUBSYS
 
-/**
- * css_parent - find the parent css
- * @css: the target cgroup_subsys_state
- *
- * Return the parent css of @css.  This function is guaranteed to return
- * non-NULL parent as long as @css isn't the root.
- */
-static inline
-struct cgroup_subsys_state *css_parent(struct cgroup_subsys_state *css)
-{
-	return css->parent;
-}
-
 /**
  * task_css_set_check - obtain a task's css_set with extra access conditions
  * @task: the task to obtain css_set for
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0343d7ee6d62..929bbbc539e9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3176,10 +3176,10 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos,
 
 	/* no child, visit my or the closest ancestor's next sibling */
 	while (pos != root) {
-		next = css_next_child(pos, css_parent(pos));
+		next = css_next_child(pos, pos->parent);
 		if (next)
 			return next;
-		pos = css_parent(pos);
+		pos = pos->parent;
 	}
 
 	return NULL;
@@ -3261,12 +3261,12 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 		return NULL;
 
 	/* if there's an unvisited sibling, visit its leftmost descendant */
-	next = css_next_child(pos, css_parent(pos));
+	next = css_next_child(pos, pos->parent);
 	if (next)
 		return css_leftmost_descendant(next);
 
 	/* no sibling left, visit parent */
-	return css_parent(pos);
+	return pos->parent;
 }
 
 static bool cgroup_has_live_children(struct cgroup *cgrp)
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 6b4e60e33a9a..a79e40f9d700 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -59,7 +59,7 @@ static inline struct freezer *task_freezer(struct task_struct *task)
 
 static struct freezer *parent_freezer(struct freezer *freezer)
 {
-	return css_freezer(css_parent(&freezer->css));
+	return css_freezer(freezer->css.parent);
 }
 
 bool cgroup_freezing(struct task_struct *task)
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 2f4b08b8db24..5b2a31082f4f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -124,7 +124,7 @@ static inline struct cpuset *task_cs(struct task_struct *task)
 
 static inline struct cpuset *parent_cs(struct cpuset *cs)
 {
-	return css_cs(css_parent(&cs->css));
+	return css_cs(cs->css.parent);
 }
 
 #ifdef CONFIG_NUMA
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 268a45ea238c..ac61ad1a5f9f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7586,7 +7586,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct task_group *tg = css_tg(css);
-	struct task_group *parent = css_tg(css_parent(css));
+	struct task_group *parent = css_tg(css->parent);
 
 	if (parent)
 		sched_online_group(tg, parent);
diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c
index c143ee380e3a..9cf350c94ec4 100644
--- a/kernel/sched/cpuacct.c
+++ b/kernel/sched/cpuacct.c
@@ -46,7 +46,7 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk)
 
 static inline struct cpuacct *parent_ca(struct cpuacct *ca)
 {
-	return css_ca(css_parent(&ca->css));
+	return css_ca(ca->css.parent);
 }
 
 static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index a380681ab3cf..493f758445e7 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -52,7 +52,7 @@ static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
 static inline struct hugetlb_cgroup *
 parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
 {
-	return hugetlb_cgroup_from_css(css_parent(&h_cg->css));
+	return hugetlb_cgroup_from_css(h_cg->css.parent);
 }
 
 static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b638a79209ee..a5e0417b4f9a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1540,7 +1540,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 int mem_cgroup_swappiness(struct mem_cgroup *memcg)
 {
 	/* root ? */
-	if (!css_parent(&memcg->css))
+	if (!memcg->css.parent)
 		return vm_swappiness;
 
 	return memcg->swappiness;
@@ -4909,7 +4909,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css,
 {
 	int retval = 0;
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent_memcg = mem_cgroup_from_css(memcg->css.parent);
 
 	mutex_lock(&memcg_create_mutex);
 
@@ -5207,8 +5207,8 @@ static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
 	if (!memcg->use_hierarchy)
 		goto out;
 
-	while (css_parent(&memcg->css)) {
-		memcg = mem_cgroup_from_css(css_parent(&memcg->css));
+	while (memcg->css.parent) {
+		memcg = mem_cgroup_from_css(memcg->css.parent);
 		if (!memcg->use_hierarchy)
 			break;
 		tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
@@ -5443,7 +5443,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
 				       struct cftype *cft, u64 val)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent);
 
 	if (val > 100 || !parent)
 		return -EINVAL;
@@ -5790,7 +5790,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
 	struct cftype *cft, u64 val)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(&memcg->css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(memcg->css.parent);
 
 	/* cannot set to root cgroup and only 0 and 1 are allowed */
 	if (!parent || !((val == 0) || (val == 1)))
@@ -6407,7 +6407,7 @@ static int
 mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
+	struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
 
 	if (css->id > MEM_CGROUP_ID_MAX)
 		return -ENOSPC;
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 22931e1b99b4..30d903b19c62 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -42,7 +42,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
 	struct cgroup_cls_state *cs = css_cls_state(css);
-	struct cgroup_cls_state *parent = css_cls_state(css_parent(css));
+	struct cgroup_cls_state *parent = css_cls_state(css->parent);
 
 	if (parent)
 		cs->classid = parent->classid;
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index b990cefd906b..2f385b9bccc0 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -140,7 +140,7 @@ cgrp_css_alloc(struct cgroup_subsys_state *parent_css)
 
 static int cgrp_css_online(struct cgroup_subsys_state *css)
 {
-	struct cgroup_subsys_state *parent_css = css_parent(css);
+	struct cgroup_subsys_state *parent_css = css->parent;
 	struct net_device *dev;
 	int ret = 0;
 
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 7dbac4061b1c..ce14a31b1337 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -182,7 +182,7 @@ static inline bool is_devcg_online(const struct dev_cgroup *devcg)
 static int devcgroup_online(struct cgroup_subsys_state *css)
 {
 	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
-	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css_parent(css));
+	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
 	int ret = 0;
 
 	mutex_lock(&devcgroup_mutex);
@@ -455,7 +455,7 @@ static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
 static int parent_has_perm(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
 
 	if (!parent)
 		return 1;
@@ -476,7 +476,7 @@ static int parent_has_perm(struct dev_cgroup *childcg,
 static bool parent_allows_removal(struct dev_cgroup *childcg,
 				  struct dev_exception_item *ex)
 {
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&childcg->css));
+	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
 
 	if (!parent)
 		return true;
@@ -614,7 +614,7 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 	char temp[12];		/* 11 + 1 characters needed for a u32 */
 	int count, rc = 0;
 	struct dev_exception_item ex;
-	struct dev_cgroup *parent = css_to_devcgroup(css_parent(&devcgroup->css));
+	struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-- 
cgit 


From d51f39b05ce0008118c45945e681b20484990571 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:48 -0400
Subject: cgroup: remove cgroup->parent

cgroup->parent is redundant as cgroup->self.parent can also be used to
determine the parent cgroup and we're moving towards using
cgroup_subsys_states as the fundamental structural blocks.  This patch
introduces cgroup_parent() which follows cgroup->self.parent and
removes cgroup->parent.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 -
 kernel/cgroup.c        | 52 +++++++++++++++++++++++++++++---------------------
 2 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2549493d518d..fd538f4c2bb6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -178,7 +178,6 @@ struct cgroup {
 	struct list_head sibling;	/* my parent's children */
 	struct list_head children;	/* my children */
 
-	struct cgroup *parent;		/* my parent */
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 929bbbc539e9..8c67a739aea4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -218,6 +218,15 @@ static void cgroup_idr_remove(struct idr *idr, int id)
 	spin_unlock_bh(&cgroup_idr_lock);
 }
 
+static struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+	struct cgroup_subsys_state *parent_css = cgrp->self.parent;
+
+	if (parent_css)
+		return container_of(parent_css, struct cgroup, self);
+	return NULL;
+}
+
 /**
  * cgroup_css - obtain a cgroup's css for the specified subsystem
  * @cgrp: the cgroup of interest
@@ -260,9 +269,9 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	if (!(cgrp->root->subsys_mask & (1 << ss->id)))
 		return NULL;
 
-	while (cgrp->parent &&
-	       !(cgrp->parent->child_subsys_mask & (1 << ss->id)))
-		cgrp = cgrp->parent;
+	while (cgroup_parent(cgrp) &&
+	       !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
+		cgrp = cgroup_parent(cgrp);
 
 	return cgroup_css(cgrp, ss);
 }
@@ -307,7 +316,7 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
 	while (cgrp) {
 		if (cgrp == ancestor)
 			return true;
-		cgrp = cgrp->parent;
+		cgrp = cgroup_parent(cgrp);
 	}
 	return false;
 }
@@ -454,7 +463,7 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
 
 		if (cgrp->populated_kn)
 			kernfs_notify(cgrp->populated_kn);
-		cgrp = cgrp->parent;
+		cgrp = cgroup_parent(cgrp);
 	} while (cgrp);
 }
 
@@ -2018,7 +2027,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 	 * Except for the root, child_subsys_mask must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
-	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && dst_cgrp->parent &&
+	if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) &&
 	    dst_cgrp->child_subsys_mask)
 		return -EBUSY;
 
@@ -2427,7 +2436,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	cgroup_print_ss_mask(seq, cgrp->parent->child_subsys_mask);
+	cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
 	return 0;
 }
 
@@ -2610,8 +2619,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 
 			/* unavailable or not enabled on the parent? */
 			if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
-			    (cgrp->parent &&
-			     !(cgrp->parent->child_subsys_mask & (1 << ssid)))) {
+			    (cgroup_parent(cgrp) &&
+			     !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
 				ret = -ENOENT;
 				goto out_unlock;
 			}
@@ -2640,7 +2649,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 	 * Except for the root, child_subsys_mask must be zero for a cgroup
 	 * with tasks so that child cgroups don't compete against tasks.
 	 */
-	if (enable && cgrp->parent && !list_empty(&cgrp->cset_links)) {
+	if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -2898,9 +2907,9 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			continue;
 		if ((cft->flags & CFTYPE_INSANE) && cgroup_sane_behavior(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
+		if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
 			continue;
-		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
+		if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
 			continue;
 
 		if (is_add) {
@@ -4092,14 +4101,14 @@ static void css_free_work_fn(struct work_struct *work)
 		atomic_dec(&cgrp->root->nr_cgrps);
 		cgroup_pidlist_destroy_all(cgrp);
 
-		if (cgrp->parent) {
+		if (cgroup_parent(cgrp)) {
 			/*
 			 * We get a ref to the parent, and put the ref when
 			 * this cgroup is being freed, so it's guaranteed
 			 * that the parent won't be destroyed before its
 			 * children.
 			 */
-			cgroup_put(cgrp->parent);
+			cgroup_put(cgroup_parent(cgrp));
 			kernfs_put(cgrp->kn);
 			kfree(cgrp);
 		} else {
@@ -4163,8 +4172,8 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	css->ss = ss;
 	css->flags = 0;
 
-	if (cgrp->parent) {
-		css->parent = cgroup_css(cgrp->parent, ss);
+	if (cgroup_parent(cgrp)) {
+		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
 		css_get(css->parent);
 	}
 
@@ -4218,7 +4227,7 @@ static void offline_css(struct cgroup_subsys_state *css)
  */
 static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 {
-	struct cgroup *parent = cgrp->parent;
+	struct cgroup *parent = cgroup_parent(cgrp);
 	struct cgroup_subsys_state *css;
 	int err;
 
@@ -4251,7 +4260,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
 		goto err_clear_dir;
 
 	if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
-	    parent->parent) {
+	    cgroup_parent(parent)) {
 		pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
 			current->comm, current->pid, ss->name);
 		if (!strcmp(ss->name, "memory"))
@@ -4309,7 +4318,6 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
 	init_cgroup_housekeeping(cgrp);
 
-	cgrp->parent = parent;
 	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 
@@ -4336,7 +4344,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	cgrp->serial_nr = cgroup_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
-	list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
+	list_add_tail_rcu(&cgrp->sibling, &cgroup_parent(cgrp)->children);
 	atomic_inc(&root->nr_cgrps);
 	cgroup_get(parent);
 
@@ -4531,8 +4539,8 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	kernfs_remove(cgrp->kn);
 
-	set_bit(CGRP_RELEASABLE, &cgrp->parent->flags);
-	check_for_release(cgrp->parent);
+	set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
+	check_for_release(cgroup_parent(cgrp));
 
 	/* put the base reference */
 	percpu_ref_kill(&cgrp->self.refcnt);
-- 
cgit 


From d5c419b68e368fdd9f1857bf8d4bb4480edb9b80 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:48 -0400
Subject: cgroup: move cgroup->sibling and ->children into cgroup_subsys_state

We're moving towards using cgroup_subsys_states as the fundamental
structural blocks.  Let's move cgroup->sibling and ->children into
cgroup_subsys_state.  This is pure move without functional change and
only cgroup->self's fields are actually used.  Other csses will make
use of the fields later.

While at it, update init_and_link_css() so that it zeroes the whole
css before initializing it and remove explicit zeroing of ->flags.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 11 ++++-------
 kernel/cgroup.c        | 38 ++++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index fd538f4c2bb6..cf8ba26b7c6e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -68,6 +68,10 @@ struct cgroup_subsys_state {
 	/* PI: the parent css */
 	struct cgroup_subsys_state *parent;
 
+	/* siblings list anchored at the parent's ->children */
+	struct list_head sibling;
+	struct list_head children;
+
 	/*
 	 * PI: Subsys-unique ID.  0 is unused and root is always 1.  The
 	 * matching css can be looked up using css_from_id().
@@ -171,13 +175,6 @@ struct cgroup {
 	 */
 	int populated_cnt;
 
-	/*
-	 * We link our 'sibling' struct into our parent's 'children'.
-	 * Our children link their 'sibling' into our 'children'.
-	 */
-	struct list_head sibling;	/* my parent's children */
-	struct list_head children;	/* my children */
-
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c67a739aea4..5385839e727b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -378,7 +378,7 @@ static int notify_on_release(const struct cgroup *cgrp)
 
 /* iterate over child cgrps, lock should be held throughout iteration */
 #define cgroup_for_each_live_child(child, cgrp)				\
-	list_for_each_entry((child), &(cgrp)->children, sibling)	\
+	list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
 		if (({ lockdep_assert_held(&cgroup_mutex);		\
 		       cgroup_is_dead(child); }))			\
 			;						\
@@ -870,7 +870,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
 	mutex_lock(&cgroup_mutex);
 
 	BUG_ON(atomic_read(&root->nr_cgrps));
-	BUG_ON(!list_empty(&cgrp->children));
+	BUG_ON(!list_empty(&cgrp->self.children));
 
 	/* Rebind all subsystems back to the default hierarchy */
 	rebind_subsystems(&cgrp_dfl_root, root->subsys_mask);
@@ -1432,7 +1432,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
 	}
 
 	/* remounting is not allowed for populated hierarchies */
-	if (!list_empty(&root->cgrp.children)) {
+	if (!list_empty(&root->cgrp.self.children)) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -1512,8 +1512,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	struct cgroup_subsys *ss;
 	int ssid;
 
-	INIT_LIST_HEAD(&cgrp->sibling);
-	INIT_LIST_HEAD(&cgrp->children);
+	INIT_LIST_HEAD(&cgrp->self.sibling);
+	INIT_LIST_HEAD(&cgrp->self.children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
 	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
@@ -1612,7 +1612,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		link_css_set(&tmp_links, cset, root_cgrp);
 	up_write(&css_set_rwsem);
 
-	BUG_ON(!list_empty(&root_cgrp->children));
+	BUG_ON(!list_empty(&root_cgrp->self.children));
 	BUG_ON(atomic_read(&root->nr_cgrps) != 1);
 
 	kernfs_activate(root_cgrp->kn);
@@ -3128,11 +3128,11 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	 * cgroup is removed or iteration and removal race.
 	 */
 	if (!pos) {
-		next = list_entry_rcu(cgrp->children.next, struct cgroup, sibling);
+		next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
 	} else if (likely(!cgroup_is_dead(pos))) {
-		next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
+		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
 	} else {
-		list_for_each_entry_rcu(next, &cgrp->children, sibling)
+		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
 			if (next->serial_nr > pos->serial_nr)
 				break;
 	}
@@ -3142,12 +3142,12 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	 * the next sibling; however, it might have @ss disabled.  If so,
 	 * fast-forward to the next enabled one.
 	 */
-	while (&next->sibling != &cgrp->children) {
+	while (&next->self.sibling != &cgrp->self.children) {
 		struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss);
 
 		if (next_css)
 			return next_css;
-		next = list_entry_rcu(next->sibling.next, struct cgroup, sibling);
+		next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling);
 	}
 	return NULL;
 }
@@ -3283,7 +3283,7 @@ static bool cgroup_has_live_children(struct cgroup *cgrp)
 	struct cgroup *child;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(child, &cgrp->children, sibling) {
+	list_for_each_entry_rcu(child, &cgrp->self.children, self.sibling) {
 		if (!cgroup_is_dead(child)) {
 			rcu_read_unlock();
 			return true;
@@ -4144,7 +4144,7 @@ static void css_release_work_fn(struct work_struct *work)
 	} else {
 		/* cgroup release path */
 		mutex_lock(&cgroup_mutex);
-		list_del_rcu(&cgrp->sibling);
+		list_del_rcu(&cgrp->self.sibling);
 		mutex_unlock(&cgroup_mutex);
 
 		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
@@ -4168,9 +4168,11 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 {
 	cgroup_get(cgrp);
 
+	memset(css, 0, sizeof(*css));
 	css->cgroup = cgrp;
 	css->ss = ss;
-	css->flags = 0;
+	INIT_LIST_HEAD(&css->sibling);
+	INIT_LIST_HEAD(&css->children);
 
 	if (cgroup_parent(cgrp)) {
 		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4344,7 +4346,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	cgrp->serial_nr = cgroup_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
-	list_add_tail_rcu(&cgrp->sibling, &cgroup_parent(cgrp)->children);
+	list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
 	atomic_inc(&root->nr_cgrps);
 	cgroup_get(parent);
 
@@ -4507,9 +4509,9 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 		return -EBUSY;
 
 	/*
-	 * Make sure there's no live children.  We can't test ->children
-	 * emptiness as dead children linger on it while being destroyed;
-	 * otherwise, "rmdir parent/child parent" may fail with -EBUSY.
+	 * Make sure there's no live children.  We can't test emptiness of
+	 * ->self.children as dead children linger on it while being
+	 * drained; otherwise, "rmdir parent/child parent" may fail.
 	 */
 	if (cgroup_has_live_children(cgrp))
 		return -EBUSY;
-- 
cgit 


From 0cb51d71c1fa9234afe4213089844be76ec1765a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:49 -0400
Subject: cgroup: move cgroup->serial_nr into cgroup_subsys_state

We're moving towards using cgroup_subsys_states as the fundamental
structural blocks.  All csses including the cgroup->self and actual
ones now form trees through css->children and ->sibling which follow
the same rules as what cgroup->children and ->sibling followed.  This
patch moves cgroup->serial_nr which is used to implement css iteration
into css.

Note that all csses, regardless of their types, allocate their serial
numbers from the same monotonically increasing counter.  This doesn't
affect the ordering needed by css iteration or cause any other
material behavior changes.  This will be used to update css iteration.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 16 ++++++++--------
 kernel/cgroup.c        | 20 +++++++++++---------
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index cf8ba26b7c6e..ebe7ce49f4b7 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -80,6 +80,14 @@ struct cgroup_subsys_state {
 
 	unsigned int flags;
 
+	/*
+	 * Monotonically increasing unique serial number which defines a
+	 * uniform order among all csses.  It's guaranteed that all
+	 * ->children lists are in the ascending order of ->serial_nr and
+	 * used to allow interrupting and resuming iterations.
+	 */
+	u64 serial_nr;
+
 	/* percpu_ref killing and RCU release */
 	struct rcu_head rcu_head;
 	struct work_struct destroy_work;
@@ -178,14 +186,6 @@ struct cgroup {
 	struct kernfs_node *kn;		/* cgroup kernfs entry */
 	struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */
 
-	/*
-	 * Monotonically increasing unique serial number which defines a
-	 * uniform order among all cgroups.  It's guaranteed that all
-	 * ->children lists are in the ascending order of ->serial_nr.
-	 * It's used to allow interrupting and resuming iterations.
-	 */
-	u64 serial_nr;
-
 	/* the bitmask of subsystems enabled on the child cgroups */
 	unsigned int child_subsys_mask;
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dcb06e181ce4..d5af128ec1ec 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -157,14 +157,13 @@ static int cgroup_root_count;
 static DEFINE_IDR(cgroup_hierarchy_idr);
 
 /*
- * Assign a monotonically increasing serial number to cgroups.  It
- * guarantees cgroups with bigger numbers are newer than those with smaller
- * numbers.  Also, as cgroups are always appended to the parent's
- * ->children list, it guarantees that sibling cgroups are always sorted in
- * the ascending serial number order on the list.  Protected by
- * cgroup_mutex.
+ * Assign a monotonically increasing serial number to csses.  It guarantees
+ * cgroups with bigger numbers are newer than those with smaller numbers.
+ * Also, as csses are always appended to the parent's ->children list, it
+ * guarantees that sibling csses are always sorted in the ascending serial
+ * number order on the list.  Protected by cgroup_mutex.
  */
-static u64 cgroup_serial_nr_next = 1;
+static u64 css_serial_nr_next = 1;
 
 /* This flag indicates whether tasks in the fork and exit paths should
  * check for fork/exit handlers to call. This avoids us having to do
@@ -3133,7 +3132,7 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
 	} else {
 		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
-			if (next->serial_nr > pos->serial_nr)
+			if (next->self.serial_nr > pos->self.serial_nr)
 				break;
 	}
 
@@ -4168,6 +4167,8 @@ static void css_release(struct percpu_ref *ref)
 static void init_and_link_css(struct cgroup_subsys_state *css,
 			      struct cgroup_subsys *ss, struct cgroup *cgrp)
 {
+	lockdep_assert_held(&cgroup_mutex);
+
 	cgroup_get(cgrp);
 
 	memset(css, 0, sizeof(*css));
@@ -4175,6 +4176,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 	css->ss = ss;
 	INIT_LIST_HEAD(&css->sibling);
 	INIT_LIST_HEAD(&css->children);
+	css->serial_nr = css_serial_nr_next++;
 
 	if (cgroup_parent(cgrp)) {
 		css->parent = cgroup_css(cgroup_parent(cgrp), ss);
@@ -4348,7 +4350,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 	 */
 	kernfs_get(kn);
 
-	cgrp->serial_nr = cgroup_serial_nr_next++;
+	cgrp->self.serial_nr = css_serial_nr_next++;
 
 	/* allocation complete, commit to creation */
 	list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
-- 
cgit 


From de3f034182ecbf0efbcef7ab8b253c6c3049a592 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:49 -0400
Subject: cgroup: introduce CSS_RELEASED and reduce css iteration fallback
 window

css iterations allow the caller to drop RCU read lock.  As long as the
caller keeps the current position accessible, it can simply re-grab
RCU read lock later and continue iteration.  This is achieved by using
CGRP_DEAD to detect whether the current positions next pointer is safe
to dereference and if not re-iterate from the beginning to the next
position using ->serial_nr.

CGRP_DEAD is used as the marker to invalidate the next pointer and the
only requirement is that the marker is set before the next sibling
starts its RCU grace period.  Because CGRP_DEAD is set at the end of
cgroup_destroy_locked() but the cgroup is unlinked when the reference
count reaches zero, we currently have a rather large window where this
fallback re-iteration logic can be triggered.

This patch introduces CSS_RELEASED which is set when a css is unlinked
from its sibling list.  This still keeps the re-iteration logic
working while drastically reducing the window of its activation.
While at it, rewrite the comment in css_next_child() to reflect the
new flag and better explain the synchronization.

This will also enable iterating csses directly instead of through
cgroups.

v2: CSS_RELEASED now assigned to 1 << 2 as 1 << 0 is used by
    CSS_NO_REF.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 41 ++++++++++++++++++++---------------------
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ebe7ce49f4b7..5375582ea5f6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -97,6 +97,7 @@ struct cgroup_subsys_state {
 enum {
 	CSS_NO_REF	= (1 << 0), /* no reference counting for this css */
 	CSS_ONLINE	= (1 << 1), /* between ->css_online() and ->css_offline() */
+	CSS_RELEASED	= (1 << 2), /* refcnt reached zero, released */
 };
 
 /**
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d5af128ec1ec..5544e685f2da 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3108,27 +3108,28 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	cgroup_assert_mutex_or_rcu_locked();
 
 	/*
-	 * @pos could already have been removed.  Once a cgroup is removed,
-	 * its ->sibling.next is no longer updated when its next sibling
-	 * changes.  As CGRP_DEAD assertion is serialized and happens
-	 * before the cgroup is taken off the ->sibling list, if we see it
-	 * unasserted, it's guaranteed that the next sibling hasn't
-	 * finished its grace period even if it's already removed, and thus
-	 * safe to dereference from this RCU critical section.  If
-	 * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
-	 * to be visible as %true here.
+	 * @pos could already have been unlinked from the sibling list.
+	 * Once a cgroup is removed, its ->sibling.next is no longer
+	 * updated when its next sibling changes.  CSS_RELEASED is set when
+	 * @pos is taken off list, at which time its next pointer is valid,
+	 * and, as releases are serialized, the one pointed to by the next
+	 * pointer is guaranteed to not have started release yet.  This
+	 * implies that if we observe !CSS_RELEASED on @pos in this RCU
+	 * critical section, the one pointed to by its next pointer is
+	 * guaranteed to not have finished its RCU grace period even if we
+	 * have dropped rcu_read_lock() inbetween iterations.
 	 *
-	 * If @pos is dead, its next pointer can't be dereferenced;
-	 * however, as each cgroup is given a monotonically increasing
-	 * unique serial number and always appended to the sibling list,
-	 * the next one can be found by walking the parent's children until
-	 * we see a cgroup with higher serial number than @pos's.  While
-	 * this path can be slower, it's taken only when either the current
-	 * cgroup is removed or iteration and removal race.
+	 * If @pos has CSS_RELEASED set, its next pointer can't be
+	 * dereferenced; however, as each css is given a monotonically
+	 * increasing unique serial number and always appended to the
+	 * sibling list, the next one can be found by walking the parent's
+	 * children until the first css with higher serial number than
+	 * @pos's.  While this path can be slower, it happens iff iteration
+	 * races against release and the race window is very small.
 	 */
 	if (!pos) {
 		next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
-	} else if (likely(!cgroup_is_dead(pos))) {
+	} else if (likely(!(pos->self.flags & CSS_RELEASED))) {
 		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
 	} else {
 		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
@@ -4139,6 +4140,7 @@ static void css_release_work_fn(struct work_struct *work)
 
 	mutex_lock(&cgroup_mutex);
 
+	css->flags |= CSS_RELEASED;
 	list_del_rcu(&css->sibling);
 
 	if (ss) {
@@ -4525,10 +4527,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 
 	/*
 	 * Mark @cgrp dead.  This prevents further task migration and child
-	 * creation by disabling cgroup_lock_live_group().  Note that
-	 * CGRP_DEAD assertion is depended upon by css_next_child() to
-	 * resume iteration after dropping RCU read lock.  See
-	 * css_next_child() for details.
+	 * creation by disabling cgroup_lock_live_group().
 	 */
 	set_bit(CGRP_DEAD, &cgrp->flags);
 
-- 
cgit 


From c2931b70a32c705b9bd5762f5044f9eac8a52bb3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:51 -0400
Subject: cgroup: iterate cgroup_subsys_states directly

Currently, css_next_child() is implemented as finding the next child
cgroup which has the css enabled, which used to be the only way to do
it as only cgroups participated in sibling lists and thus could be
iteratd.  This works as long as what's required during iteration is
not missing online csses; however, it turns out that there are use
cases where offlined but not yet released csses need to be iterated.
This is difficult to implement through cgroup iteration the unified
hierarchy as there may be multiple dying csses for the same subsystem
associated with single cgroup.

After the recent changes, the cgroup self and regular csses behave
identically in how they're linked and unlinked from the sibling lists
including assertion of CSS_RELEASED and css_next_child() can simply
switch to iterating csses directly.  This both simplifies the logic
and ensures that all visible non-released csses are included in the
iteration whether there are multiple dying csses for a subsystem or
not.

As all other iterators depend on css_next_child() for sibling
iteration, this changes behaviors of all css iterators.  Add and
update explanations on the css states which are included in traversal
to all iterators.

As css iteration could always contain offlined csses, this shouldn't
break any of the current users and new usages which need iteration of
all on and offline csses can make use of the new semantics.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h | 44 ++++++++++++++++++++---------------
 kernel/cgroup.c        | 62 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 63 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5375582ea5f6..f2ff578fc03a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -764,14 +764,14 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
  * @pos: the css * to use as the loop cursor
  * @parent: css whose children to walk
  *
- * Walk @parent's children.  Must be called under rcu_read_lock().  A child
- * css which hasn't finished ->css_online() or already has finished
- * ->css_offline() may show up during traversal and it's each subsystem's
- * responsibility to verify that each @pos is alive.
+ * Walk @parent's children.  Must be called under rcu_read_lock().
  *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, a css which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  *
  * It is allowed to temporarily drop RCU read lock during iteration.  The
  * caller is responsible for ensuring that @pos remains accessible until
@@ -794,17 +794,16 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
  * @root: css whose descendants to walk
  *
  * Walk @root's descendants.  @root is included in the iteration and the
- * first node to be visited.  Must be called under rcu_read_lock().  A
- * descendant css which hasn't finished ->css_online() or already has
- * finished ->css_offline() may show up during traversal and it's each
- * subsystem's responsibility to verify that each @pos is alive.
+ * first node to be visited.  Must be called under rcu_read_lock().
  *
- * If a subsystem synchronizes against the parent in its ->css_online() and
- * before starting iterating, and synchronizes against @pos on each
- * iteration, any descendant css which finished ->css_online() is
- * guaranteed to be visible in the future iterations.
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  *
- * In other words, the following guarantees that a descendant can't escape
+ * For example, the following guarantees that a descendant can't escape
  * state updates of its ancestors.
  *
  * my_online(@css)
@@ -860,8 +859,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
  *
  * Similar to css_for_each_descendant_pre() but performs post-order
  * traversal instead.  @root is included in the iteration and the last
- * node to be visited.  Note that the walk visibility guarantee described
- * in pre-order walk doesn't apply the same to post-order walks.
+ * node to be visited.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
+ *
+ * Note that the walk visibility guarantee example described in pre-order
+ * walk doesn't apply the same to post-order walks.
  */
 #define css_for_each_descendant_post(pos, css)				\
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5544e685f2da..097a1fc1e1e8 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3089,21 +3089,25 @@ static int cgroup_task_count(const struct cgroup *cgrp)
 
 /**
  * css_next_child - find the next child of a given css
- * @pos_css: the current position (%NULL to initiate traversal)
- * @parent_css: css whose children to walk
+ * @pos: the current position (%NULL to initiate traversal)
+ * @parent: css whose children to walk
  *
- * This function returns the next child of @parent_css and should be called
+ * This function returns the next child of @parent and should be called
  * under either cgroup_mutex or RCU read lock.  The only requirement is
- * that @parent_css and @pos_css are accessible.  The next sibling is
- * guaranteed to be returned regardless of their states.
+ * that @parent and @pos are accessible.  The next sibling is guaranteed to
+ * be returned regardless of their states.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
-struct cgroup_subsys_state *
-css_next_child(struct cgroup_subsys_state *pos_css,
-	       struct cgroup_subsys_state *parent_css)
+struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
+					   struct cgroup_subsys_state *parent)
 {
-	struct cgroup *pos = pos_css ? pos_css->cgroup : NULL;
-	struct cgroup *cgrp = parent_css->cgroup;
-	struct cgroup *next;
+	struct cgroup_subsys_state *next;
 
 	cgroup_assert_mutex_or_rcu_locked();
 
@@ -3128,27 +3132,21 @@ css_next_child(struct cgroup_subsys_state *pos_css,
 	 * races against release and the race window is very small.
 	 */
 	if (!pos) {
-		next = list_entry_rcu(cgrp->self.children.next, struct cgroup, self.sibling);
-	} else if (likely(!(pos->self.flags & CSS_RELEASED))) {
-		next = list_entry_rcu(pos->self.sibling.next, struct cgroup, self.sibling);
+		next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
+	} else if (likely(!(pos->flags & CSS_RELEASED))) {
+		next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
 	} else {
-		list_for_each_entry_rcu(next, &cgrp->self.children, self.sibling)
-			if (next->self.serial_nr > pos->self.serial_nr)
+		list_for_each_entry_rcu(next, &parent->children, sibling)
+			if (next->serial_nr > pos->serial_nr)
 				break;
 	}
 
 	/*
 	 * @next, if not pointing to the head, can be dereferenced and is
-	 * the next sibling; however, it might have @ss disabled.  If so,
-	 * fast-forward to the next enabled one.
+	 * the next sibling.
 	 */
-	while (&next->self.sibling != &cgrp->self.children) {
-		struct cgroup_subsys_state *next_css = cgroup_css(next, parent_css->ss);
-
-		if (next_css)
-			return next_css;
-		next = list_entry_rcu(next->self.sibling.next, struct cgroup, self.sibling);
-	}
+	if (&next->sibling != &parent->children)
+		return next;
 	return NULL;
 }
 
@@ -3165,6 +3163,13 @@ css_next_child(struct cgroup_subsys_state *pos_css,
  * doesn't require the whole traversal to be contained in a single critical
  * section.  This function will return the correct next descendant as long
  * as both @pos and @root are accessible and @pos is a descendant of @root.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
 struct cgroup_subsys_state *
 css_next_descendant_pre(struct cgroup_subsys_state *pos,
@@ -3252,6 +3257,13 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
  * section.  This function will return the correct next descendant as long
  * as both @pos and @cgroup are accessible and @pos is a descendant of
  * @cgroup.
+ *
+ * If a subsystem synchronizes ->css_online() and the start of iteration, a
+ * css which finished ->css_online() is guaranteed to be visible in the
+ * future iterations and will stay visible until the last reference is put.
+ * A css which hasn't finished ->css_online() or already finished
+ * ->css_offline() may show up during traversal.  It's each subsystem's
+ * responsibility to synchronize against on/offlining.
  */
 struct cgroup_subsys_state *
 css_next_descendant_post(struct cgroup_subsys_state *pos,
-- 
cgit 


From 184faf32328c65c9d86b19577b8d8b90bdd2cd2e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:51 -0400
Subject: cgroup: use CSS_ONLINE instead of CGRP_DEAD

Use CSS_ONLINE on the self css to indicate whether a cgroup has been
killed instead of CGRP_DEAD.  This will allow re-using css online test
for cgroup liveliness test.  This doesn't introduce any functional
change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h | 2 --
 kernel/cgroup.c        | 7 ++++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f2ff578fc03a..51a339c99eb6 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -143,8 +143,6 @@ static inline void css_put(struct cgroup_subsys_state *css)
 
 /* bits in struct cgroup flags field */
 enum {
-	/* Control Group is dead */
-	CGRP_DEAD,
 	/*
 	 * Control Group has previously had a child cgroup or a task,
 	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 097a1fc1e1e8..004004fd0ded 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -278,7 +278,7 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 /* convenient tests for these bits */
 static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 {
-	return test_bit(CGRP_DEAD, &cgrp->flags);
+	return !(cgrp->self.flags & CSS_ONLINE);
 }
 
 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
@@ -1518,6 +1518,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
 	cgrp->self.cgroup = cgrp;
+	cgrp->self.flags |= CSS_ONLINE;
 
 	for_each_subsys(ss, ssid)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
@@ -4541,13 +4542,13 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 * Mark @cgrp dead.  This prevents further task migration and child
 	 * creation by disabling cgroup_lock_live_group().
 	 */
-	set_bit(CGRP_DEAD, &cgrp->flags);
+	cgrp->self.flags &= ~CSS_ONLINE;
 
 	/* initiate massacre of all css's */
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
-	/* CGRP_DEAD is set, remove from ->release_list for the last time */
+	/* CSS_ONLINE is clear, remove from ->release_list for the last time */
 	raw_spin_lock(&release_list_lock);
 	if (!list_empty(&cgrp->release_list))
 		list_del_init(&cgrp->release_list);
-- 
cgit 


From f3d4650015301d1c880df4523f7e7ef320a38aab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:52 -0400
Subject: cgroup: convert cgroup_has_live_children() into
 css_has_online_children()

Now that cgroup liveliness and css onliness are the same state,
convert cgroup_has_live_children() into css_has_online_children() so
that it can be used for actual csses too.  The function now uses
css_for_each_child() for iteration and is published.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 32 ++++++++++++++++++++------------
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 51a339c99eb6..b76999954beb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -873,6 +873,8 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 	for ((pos) = css_next_descendant_post(NULL, (css)); (pos);	\
 	     (pos) = css_next_descendant_post((pos), (css)))
 
+bool css_has_online_children(struct cgroup_subsys_state *css);
+
 /* A css_task_iter should be treated as an opaque object */
 struct css_task_iter {
 	struct cgroup_subsys		*ss;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 004004fd0ded..082bb842b11a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -175,7 +175,6 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_base_files[];
 
 static void cgroup_put(struct cgroup *cgrp);
-static bool cgroup_has_live_children(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -1769,7 +1768,7 @@ static void cgroup_kill_sb(struct super_block *sb)
 	 * This prevents new mounts by disabling percpu_ref_tryget_live().
 	 * cgroup_mount() may wait for @root's release.
 	 */
-	if (cgroup_has_live_children(&root->cgrp))
+	if (css_has_online_children(&root->cgrp.self))
 		cgroup_put(&root->cgrp);
 	else
 		percpu_ref_kill(&root->cgrp.self.refcnt);
@@ -3291,19 +3290,28 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
 	return pos->parent;
 }
 
-static bool cgroup_has_live_children(struct cgroup *cgrp)
+/**
+ * css_has_online_children - does a css have online children
+ * @css: the target css
+ *
+ * Returns %true if @css has any online children; otherwise, %false.  This
+ * function can be called from any context but the caller is responsible
+ * for synchronizing against on/offlining as necessary.
+ */
+bool css_has_online_children(struct cgroup_subsys_state *css)
 {
-	struct cgroup *child;
+	struct cgroup_subsys_state *child;
+	bool ret = false;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(child, &cgrp->self.children, self.sibling) {
-		if (!cgroup_is_dead(child)) {
-			rcu_read_unlock();
-			return true;
+	css_for_each_child(child, css) {
+		if (css->flags & CSS_ONLINE) {
+			ret = true;
+			break;
 		}
 	}
 	rcu_read_unlock();
-	return false;
+	return ret;
 }
 
 /**
@@ -4535,7 +4543,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 * ->self.children as dead children linger on it while being
 	 * drained; otherwise, "rmdir parent/child parent" may fail.
 	 */
-	if (cgroup_has_live_children(cgrp))
+	if (css_has_online_children(&cgrp->self))
 		return -EBUSY;
 
 	/*
@@ -5014,8 +5022,8 @@ void cgroup_exit(struct task_struct *tsk)
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) &&
-	    list_empty(&cgrp->cset_links) && !cgroup_has_live_children(cgrp)) {
+	if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
+	    !css_has_online_children(&cgrp->self)) {
 		/*
 		 * Control Group is currently removeable. If it's not
 		 * already queued for a userspace notification, queue
-- 
cgit 


From 6f4524d355a86769b65d5420a6ef47fb0bba9b72 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 16 May 2014 13:22:52 -0400
Subject: cgroup: implement css_tryget()

Implement css_tryget() which tries to grab a cgroup_subsys_state's
reference as long as it already hasn't reached zero.  Combined with
the recent css iterator changes to include offline && !released csses
during traversal, this can be used to access csses regardless of its
online state.

v2: Take the new flag CSS_NO_REF into account.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/cgroup.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b76999954beb..4afe544d3547 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -112,6 +112,24 @@ static inline void css_get(struct cgroup_subsys_state *css)
 		percpu_ref_get(&css->refcnt);
 }
 
+/**
+ * css_tryget - try to obtain a reference on the specified css
+ * @css: target css
+ *
+ * Obtain a reference on @css unless it already has reached zero and is
+ * being released.  This function doesn't care whether @css is on or
+ * offline.  The caller naturally needs to ensure that @css is accessible
+ * but doesn't have to be holding a reference on it - IOW, RCU protected
+ * access is good enough for this function.  Returns %true if a reference
+ * count was successfully obtained; %false otherwise.
+ */
+static inline bool css_tryget(struct cgroup_subsys_state *css)
+{
+	if (!(css->flags & CSS_NO_REF))
+		return percpu_ref_tryget(&css->refcnt);
+	return true;
+}
+
 /**
  * css_tryget_online - try to obtain a reference on the specified css if online
  * @css: target css
-- 
cgit 


From 1836eea209546b870dd83f3f4ef234d6598a560d Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Sat, 10 May 2014 10:32:57 -0400
Subject: lib/crc7: Shift crc7() output left 1 bit

This eliminates a 1-bit left shift in every single caller,
and makes the inner loop of the CRC computation more efficient.

Renamed crc7 to crc7_be (big-endian) since the interface changed.

Also purged #include <linux/crc7.h> from files that don't use it at all.

Signed-off-by: George Spelvin <linux@horizon.com>
Reviewed-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/mmc/host/mmc_spi.c           |  2 +-
 drivers/net/wireless/ti/wl1251/acx.c |  1 -
 drivers/net/wireless/ti/wl1251/cmd.c |  1 -
 drivers/net/wireless/ti/wl1251/spi.c |  3 +-
 drivers/net/wireless/ti/wlcore/spi.c |  3 +-
 include/linux/crc7.h                 |  8 ++--
 lib/crc7.c                           | 84 ++++++++++++++++++++----------------
 7 files changed, 53 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 0a87e5691341..338e2202eaaa 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -472,7 +472,7 @@ mmc_spi_command_send(struct mmc_spi_host *host,
 	*cp++ = (u8)(arg >> 16);
 	*cp++ = (u8)(arg >> 8);
 	*cp++ = (u8)arg;
-	*cp++ = (crc7(0, &data->status[1], 5) << 1) | 0x01;
+	*cp++ = crc7_be(0, &data->status[1], 5) | 0x01;
 
 	/* Then, read up to 13 bytes (while writing all-ones):
 	 *  - N(CR) (== 1..8) bytes of all-ones
diff --git a/drivers/net/wireless/ti/wl1251/acx.c b/drivers/net/wireless/ti/wl1251/acx.c
index 5a4ec56c83d0..5695628757ee 100644
--- a/drivers/net/wireless/ti/wl1251/acx.c
+++ b/drivers/net/wireless/ti/wl1251/acx.c
@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crc7.h>
 
 #include "wl1251.h"
 #include "reg.h"
diff --git a/drivers/net/wireless/ti/wl1251/cmd.c b/drivers/net/wireless/ti/wl1251/cmd.c
index bf1fa18b9786..ede31f048ef9 100644
--- a/drivers/net/wireless/ti/wl1251/cmd.c
+++ b/drivers/net/wireless/ti/wl1251/cmd.c
@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/crc7.h>
 #include <linux/etherdevice.h>
 
 #include "wl1251.h"
diff --git a/drivers/net/wireless/ti/wl1251/spi.c b/drivers/net/wireless/ti/wl1251/spi.c
index b06d36d99362..e94b57cd5a22 100644
--- a/drivers/net/wireless/ti/wl1251/spi.c
+++ b/drivers/net/wireless/ti/wl1251/spi.c
@@ -122,8 +122,7 @@ static void wl1251_spi_wake(struct wl1251 *wl)
 	crc[3] = cmd[6];
 	crc[4] = cmd[5];
 
-	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
-	cmd[4] |= WSPI_INIT_CMD_END;
+	cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
 
 	t.tx_buf = cmd;
 	t.len = WSPI_INIT_CMD_LEN;
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 5f3a389dd74c..1d4ddabe6063 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -151,8 +151,7 @@ static void wl12xx_spi_init(struct device *child)
 	crc[3] = cmd[6];
 	crc[4] = cmd[5];
 
-	cmd[4] |= crc7(0, crc, WSPI_INIT_CMD_CRC_LEN) << 1;
-	cmd[4] |= WSPI_INIT_CMD_END;
+	cmd[4] = crc7_be(0, crc, WSPI_INIT_CMD_CRC_LEN) | WSPI_INIT_CMD_END;
 
 	t.tx_buf = cmd;
 	t.len = WSPI_INIT_CMD_LEN;
diff --git a/include/linux/crc7.h b/include/linux/crc7.h
index 1786e772d5c6..d590765106f3 100644
--- a/include/linux/crc7.h
+++ b/include/linux/crc7.h
@@ -2,13 +2,13 @@
 #define _LINUX_CRC7_H
 #include <linux/types.h>
 
-extern const u8 crc7_syndrome_table[256];
+extern const u8 crc7_be_syndrome_table[256];
 
-static inline u8 crc7_byte(u8 crc, u8 data)
+static inline u8 crc7_be_byte(u8 crc, u8 data)
 {
-	return crc7_syndrome_table[(crc << 1) ^ data];
+	return crc7_be_syndrome_table[crc ^ data];
 }
 
-extern u8 crc7(u8 crc, const u8 *buffer, size_t len);
+extern u8 crc7_be(u8 crc, const u8 *buffer, size_t len);
 
 #endif
diff --git a/lib/crc7.c b/lib/crc7.c
index f1c3a144cec1..bf6255e23919 100644
--- a/lib/crc7.c
+++ b/lib/crc7.c
@@ -10,42 +10,47 @@
 #include <linux/crc7.h>
 
 
-/* Table for CRC-7 (polynomial x^7 + x^3 + 1) */
-const u8 crc7_syndrome_table[256] = {
-	0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
-	0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77,
-	0x19, 0x10, 0x0b, 0x02, 0x3d, 0x34, 0x2f, 0x26,
-	0x51, 0x58, 0x43, 0x4a, 0x75, 0x7c, 0x67, 0x6e,
-	0x32, 0x3b, 0x20, 0x29, 0x16, 0x1f, 0x04, 0x0d,
-	0x7a, 0x73, 0x68, 0x61, 0x5e, 0x57, 0x4c, 0x45,
-	0x2b, 0x22, 0x39, 0x30, 0x0f, 0x06, 0x1d, 0x14,
-	0x63, 0x6a, 0x71, 0x78, 0x47, 0x4e, 0x55, 0x5c,
-	0x64, 0x6d, 0x76, 0x7f, 0x40, 0x49, 0x52, 0x5b,
-	0x2c, 0x25, 0x3e, 0x37, 0x08, 0x01, 0x1a, 0x13,
-	0x7d, 0x74, 0x6f, 0x66, 0x59, 0x50, 0x4b, 0x42,
-	0x35, 0x3c, 0x27, 0x2e, 0x11, 0x18, 0x03, 0x0a,
-	0x56, 0x5f, 0x44, 0x4d, 0x72, 0x7b, 0x60, 0x69,
-	0x1e, 0x17, 0x0c, 0x05, 0x3a, 0x33, 0x28, 0x21,
-	0x4f, 0x46, 0x5d, 0x54, 0x6b, 0x62, 0x79, 0x70,
-	0x07, 0x0e, 0x15, 0x1c, 0x23, 0x2a, 0x31, 0x38,
-	0x41, 0x48, 0x53, 0x5a, 0x65, 0x6c, 0x77, 0x7e,
-	0x09, 0x00, 0x1b, 0x12, 0x2d, 0x24, 0x3f, 0x36,
-	0x58, 0x51, 0x4a, 0x43, 0x7c, 0x75, 0x6e, 0x67,
-	0x10, 0x19, 0x02, 0x0b, 0x34, 0x3d, 0x26, 0x2f,
-	0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c,
-	0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04,
-	0x6a, 0x63, 0x78, 0x71, 0x4e, 0x47, 0x5c, 0x55,
-	0x22, 0x2b, 0x30, 0x39, 0x06, 0x0f, 0x14, 0x1d,
-	0x25, 0x2c, 0x37, 0x3e, 0x01, 0x08, 0x13, 0x1a,
-	0x6d, 0x64, 0x7f, 0x76, 0x49, 0x40, 0x5b, 0x52,
-	0x3c, 0x35, 0x2e, 0x27, 0x18, 0x11, 0x0a, 0x03,
-	0x74, 0x7d, 0x66, 0x6f, 0x50, 0x59, 0x42, 0x4b,
-	0x17, 0x1e, 0x05, 0x0c, 0x33, 0x3a, 0x21, 0x28,
-	0x5f, 0x56, 0x4d, 0x44, 0x7b, 0x72, 0x69, 0x60,
-	0x0e, 0x07, 0x1c, 0x15, 0x2a, 0x23, 0x38, 0x31,
-	0x46, 0x4f, 0x54, 0x5d, 0x62, 0x6b, 0x70, 0x79
+/*
+ * Table for CRC-7 (polynomial x^7 + x^3 + 1).
+ * This is a big-endian CRC (msbit is highest power of x),
+ * aligned so the msbit of the byte is the x^6 coefficient
+ * and the lsbit is not used.
+ */
+const u8 crc7_be_syndrome_table[256] = {
+	0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+	0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee,
+	0x32, 0x20, 0x16, 0x04, 0x7a, 0x68, 0x5e, 0x4c,
+	0xa2, 0xb0, 0x86, 0x94, 0xea, 0xf8, 0xce, 0xdc,
+	0x64, 0x76, 0x40, 0x52, 0x2c, 0x3e, 0x08, 0x1a,
+	0xf4, 0xe6, 0xd0, 0xc2, 0xbc, 0xae, 0x98, 0x8a,
+	0x56, 0x44, 0x72, 0x60, 0x1e, 0x0c, 0x3a, 0x28,
+	0xc6, 0xd4, 0xe2, 0xf0, 0x8e, 0x9c, 0xaa, 0xb8,
+	0xc8, 0xda, 0xec, 0xfe, 0x80, 0x92, 0xa4, 0xb6,
+	0x58, 0x4a, 0x7c, 0x6e, 0x10, 0x02, 0x34, 0x26,
+	0xfa, 0xe8, 0xde, 0xcc, 0xb2, 0xa0, 0x96, 0x84,
+	0x6a, 0x78, 0x4e, 0x5c, 0x22, 0x30, 0x06, 0x14,
+	0xac, 0xbe, 0x88, 0x9a, 0xe4, 0xf6, 0xc0, 0xd2,
+	0x3c, 0x2e, 0x18, 0x0a, 0x74, 0x66, 0x50, 0x42,
+	0x9e, 0x8c, 0xba, 0xa8, 0xd6, 0xc4, 0xf2, 0xe0,
+	0x0e, 0x1c, 0x2a, 0x38, 0x46, 0x54, 0x62, 0x70,
+	0x82, 0x90, 0xa6, 0xb4, 0xca, 0xd8, 0xee, 0xfc,
+	0x12, 0x00, 0x36, 0x24, 0x5a, 0x48, 0x7e, 0x6c,
+	0xb0, 0xa2, 0x94, 0x86, 0xf8, 0xea, 0xdc, 0xce,
+	0x20, 0x32, 0x04, 0x16, 0x68, 0x7a, 0x4c, 0x5e,
+	0xe6, 0xf4, 0xc2, 0xd0, 0xae, 0xbc, 0x8a, 0x98,
+	0x76, 0x64, 0x52, 0x40, 0x3e, 0x2c, 0x1a, 0x08,
+	0xd4, 0xc6, 0xf0, 0xe2, 0x9c, 0x8e, 0xb8, 0xaa,
+	0x44, 0x56, 0x60, 0x72, 0x0c, 0x1e, 0x28, 0x3a,
+	0x4a, 0x58, 0x6e, 0x7c, 0x02, 0x10, 0x26, 0x34,
+	0xda, 0xc8, 0xfe, 0xec, 0x92, 0x80, 0xb6, 0xa4,
+	0x78, 0x6a, 0x5c, 0x4e, 0x30, 0x22, 0x14, 0x06,
+	0xe8, 0xfa, 0xcc, 0xde, 0xa0, 0xb2, 0x84, 0x96,
+	0x2e, 0x3c, 0x0a, 0x18, 0x66, 0x74, 0x42, 0x50,
+	0xbe, 0xac, 0x9a, 0x88, 0xf6, 0xe4, 0xd2, 0xc0,
+	0x1c, 0x0e, 0x38, 0x2a, 0x54, 0x46, 0x70, 0x62,
+	0x8c, 0x9e, 0xa8, 0xba, 0xc4, 0xd6, 0xe0, 0xf2
 };
-EXPORT_SYMBOL(crc7_syndrome_table);
+EXPORT_SYMBOL(crc7_be_syndrome_table);
 
 /**
  * crc7 - update the CRC7 for the data buffer
@@ -55,14 +60,17 @@ EXPORT_SYMBOL(crc7_syndrome_table);
  * Context: any
  *
  * Returns the updated CRC7 value.
+ * The CRC7 is left-aligned in the byte (the lsbit is always 0), as that
+ * makes the computation easier, and all callers want it in that form.
+ *
  */
-u8 crc7(u8 crc, const u8 *buffer, size_t len)
+u8 crc7_be(u8 crc, const u8 *buffer, size_t len)
 {
 	while (len--)
-		crc = crc7_byte(crc, *buffer++);
+		crc = crc7_be_byte(crc, *buffer++);
 	return crc;
 }
-EXPORT_SYMBOL(crc7);
+EXPORT_SYMBOL(crc7_be);
 
 MODULE_DESCRIPTION("CRC7 calculations");
 MODULE_LICENSE("GPL");
-- 
cgit 


From ce8d9e0d6746ff67c1870386b7121a4448f21130 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 15 May 2014 15:29:27 +0300
Subject: net/mlx4_core: Add UPDATE_QP SRIOV wrapper support

This patch adds UPDATE_QP SRIOV wrapper support.

The mechanism is a general one, but currently only source MAC
index changes are allowed for VFs.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  4 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  6 +++
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 35 ++++++++++++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 54 ++++++++++++++++++++++
 include/linux/mlx4/qp.h                            | 11 +++++
 5 files changed, 108 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 78099eab7673..92d3249f63f1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1253,12 +1253,12 @@ static struct mlx4_cmd_info cmd_info[] = {
 	},
 	{
 		.opcode = MLX4_CMD_UPDATE_QP,
-		.has_inbox = false,
+		.has_inbox = true,
 		.has_outbox = false,
 		.out_is_imm = false,
 		.encode_slave_id = false,
 		.verify = NULL,
-		.wrapper = mlx4_CMD_EPERM_wrapper
+		.wrapper = mlx4_UPDATE_QP_wrapper
 	},
 	{
 		.opcode = MLX4_CMD_GET_OP_REQ,
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f9c465101963..212cea440f90 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1195,6 +1195,12 @@ int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_cmd_mailbox *outbox,
 			   struct mlx4_cmd_info *cmd);
 
+int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+
 int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
 			 struct mlx4_vhcr *vhcr,
 			 struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 61d64ebffd56..fbd32af89c7c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -389,6 +389,41 @@ err_icm:
 
 EXPORT_SYMBOL_GPL(mlx4_qp_alloc);
 
+#define MLX4_UPDATE_QP_SUPPORTED_ATTRS MLX4_UPDATE_QP_SMAC
+int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp,
+		   enum mlx4_update_qp_attr attr,
+		   struct mlx4_update_qp_params *params)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_update_qp_context *cmd;
+	u64 pri_addr_path_mask = 0;
+	int err = 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	cmd = (struct mlx4_update_qp_context *)mailbox->buf;
+
+	if (!attr || (attr & ~MLX4_UPDATE_QP_SUPPORTED_ATTRS))
+		return -EINVAL;
+
+	if (attr & MLX4_UPDATE_QP_SMAC) {
+		pri_addr_path_mask |= 1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX;
+		cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
+	}
+
+	cmd->primary_addr_path_mask = cpu_to_be64(pri_addr_path_mask);
+
+	err = mlx4_cmd(dev, mailbox->dma, qp->qpn & 0xffffff, 0,
+		       MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
+		       MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_update_qp);
+
 void mlx4_qp_remove(struct mlx4_dev *dev, struct mlx4_qp *qp)
 {
 	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 1c3fdd4a1f7d..8f1254a79832 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -3895,6 +3895,60 @@ static int add_eth_header(struct mlx4_dev *dev, int slave,
 
 }
 
+#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)
+int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd_info)
+{
+	int err;
+	u32 qpn = vhcr->in_modifier & 0xffffff;
+	struct res_qp *rqp;
+	u64 mac;
+	unsigned port;
+	u64 pri_addr_path_mask;
+	struct mlx4_update_qp_context *cmd;
+	int smac_index;
+
+	cmd = (struct mlx4_update_qp_context *)inbox->buf;
+
+	pri_addr_path_mask = be64_to_cpu(cmd->primary_addr_path_mask);
+	if (cmd->qp_mask || cmd->secondary_addr_path_mask ||
+	    (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
+		return -EPERM;
+
+	/* Just change the smac for the QP */
+	err = get_res(dev, slave, qpn, RES_QP, &rqp);
+	if (err) {
+		mlx4_err(dev, "Updating qpn 0x%x for slave %d rejected\n", qpn, slave);
+		return err;
+	}
+
+	port = (rqp->sched_queue >> 6 & 1) + 1;
+	smac_index = cmd->qp_context.pri_path.grh_mylmc;
+	err = mac_find_smac_ix_in_slave(dev, slave, port,
+					smac_index, &mac);
+	if (err) {
+		mlx4_err(dev, "Failed to update qpn 0x%x, MAC is invalid. smac_ix: %d\n",
+			 qpn, smac_index);
+		goto err_mac;
+	}
+
+	err = mlx4_cmd(dev, inbox->dma,
+		       vhcr->in_modifier, 0,
+		       MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
+		       MLX4_CMD_NATIVE);
+	if (err) {
+		mlx4_err(dev, "Failed to update qpn on qpn 0x%x, command failed\n", qpn);
+		goto err_mac;
+	}
+
+err_mac:
+	put_res(dev, slave, qpn, RES_QP);
+	return err;
+}
+
 int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
 					 struct mlx4_vhcr *vhcr,
 					 struct mlx4_cmd_mailbox *inbox,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index b66e7610d4ee..7040dc98ff8b 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -421,6 +421,17 @@ struct mlx4_wqe_inline_seg {
 	__be32			byte_count;
 };
 
+enum mlx4_update_qp_attr {
+	MLX4_UPDATE_QP_SMAC		= 1 << 0,
+};
+
+struct mlx4_update_qp_params {
+	u8	smac_index;
+};
+
+int mlx4_update_qp(struct mlx4_dev *dev, struct mlx4_qp *qp,
+		   enum mlx4_update_qp_attr attr,
+		   struct mlx4_update_qp_params *params);
 int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   enum mlx4_qp_state cur_state, enum mlx4_qp_state new_state,
 		   struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
-- 
cgit 


From aae4518b3124b29f8dc81c829c704fd2df72e98b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 16 May 2014 02:46:50 +0200
Subject: PM / sleep: Mechanism to avoid resuming runtime-suspended devices
 unnecessarily

Currently, some subsystems (e.g. PCI and the ACPI PM domain) have to
resume all runtime-suspended devices during system suspend, mostly
because those devices may need to be reprogrammed due to different
wakeup settings for system sleep and for runtime PM.

For some devices, though, it's OK to remain in runtime suspend
throughout a complete system suspend/resume cycle (if the device was in
runtime suspend at the start of the cycle).  We would like to do this
whenever possible, to avoid the overhead of extra power-up and power-down
events.

However, problems may arise because the device's descendants may require
it to be at full power at various points during the cycle.  Therefore the
most straightforward way to do this safely is if the device and all its
descendants can remain runtime suspended until the complete stage of
system resume.

To this end, introduce a new device PM flag, power.direct_complete
and modify the PM core to use that flag as follows.

If the ->prepare() callback of a device returns a positive number,
the PM core will regard that as an indication that it may leave the
device runtime-suspended.  It will then check if the system power
transition in progress is a suspend (and not hibernation in particular)
and if the device is, indeed, runtime-suspended.  In that case, the PM
core will set the device's power.direct_complete flag.  Otherwise it
will clear power.direct_complete for the device and it also will later
clear it for the device's parent (if there's one).

Next, the PM core will not invoke the ->suspend() ->suspend_late(),
->suspend_irq(), ->resume_irq(), ->resume_early(), or ->resume()
callbacks for all devices having power.direct_complete set.  It
will invoke their ->complete() callbacks, however, and those
callbacks are then responsible for resuming the devices as
appropriate, if necessary.  For example, in some cases they may
need to queue up runtime resume requests for the devices using
pm_request_resume().

Changelog partly based on an Alan Stern's description of the idea
(http://marc.info/?l=linux-pm&m=139940466625569&w=2).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
---
 drivers/base/power/main.c  | 66 +++++++++++++++++++++++++++++++++++-----------
 include/linux/pm.h         | 36 +++++++++++++++++++------
 include/linux/pm_runtime.h |  6 +++++
 3 files changed, 85 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 86d5e4fb5b98..343ffad59377 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -479,7 +479,7 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	if (dev->power.syscore)
+	if (dev->power.syscore || dev->power.direct_complete)
 		goto Out;
 
 	if (!dev->power.is_noirq_suspended)
@@ -605,7 +605,7 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn
 	TRACE_DEVICE(dev);
 	TRACE_RESUME(0);
 
-	if (dev->power.syscore)
+	if (dev->power.syscore || dev->power.direct_complete)
 		goto Out;
 
 	if (!dev->power.is_late_suspended)
@@ -735,6 +735,12 @@ static int device_resume(struct device *dev, pm_message_t state, bool async)
 	if (dev->power.syscore)
 		goto Complete;
 
+	if (dev->power.direct_complete) {
+		/* Match the pm_runtime_disable() in __device_suspend(). */
+		pm_runtime_enable(dev);
+		goto Complete;
+	}
+
 	dpm_wait(dev->parent, async);
 	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
@@ -1007,7 +1013,7 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a
 		goto Complete;
 	}
 
-	if (dev->power.syscore)
+	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
 	dpm_wait_for_children(dev, async);
@@ -1146,7 +1152,7 @@ static int __device_suspend_late(struct device *dev, pm_message_t state, bool as
 		goto Complete;
 	}
 
-	if (dev->power.syscore)
+	if (dev->power.syscore || dev->power.direct_complete)
 		goto Complete;
 
 	dpm_wait_for_children(dev, async);
@@ -1332,6 +1338,17 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 	if (dev->power.syscore)
 		goto Complete;
 
+	if (dev->power.direct_complete) {
+		if (pm_runtime_status_suspended(dev)) {
+			pm_runtime_disable(dev);
+			if (pm_runtime_suspended_if_enabled(dev))
+				goto Complete;
+
+			pm_runtime_enable(dev);
+		}
+		dev->power.direct_complete = false;
+	}
+
 	dpm_watchdog_set(&wd, dev);
 	device_lock(dev);
 
@@ -1382,10 +1399,19 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
 
  End:
 	if (!error) {
+		struct device *parent = dev->parent;
+
 		dev->power.is_suspended = true;
-		if (dev->power.wakeup_path
-		    && dev->parent && !dev->parent->power.ignore_children)
-			dev->parent->power.wakeup_path = true;
+		if (parent) {
+			spin_lock_irq(&parent->power.lock);
+
+			dev->parent->power.direct_complete = false;
+			if (dev->power.wakeup_path
+			    && !dev->parent->power.ignore_children)
+				dev->parent->power.wakeup_path = true;
+
+			spin_unlock_irq(&parent->power.lock);
+		}
 	}
 
 	device_unlock(dev);
@@ -1487,7 +1513,7 @@ static int device_prepare(struct device *dev, pm_message_t state)
 {
 	int (*callback)(struct device *) = NULL;
 	char *info = NULL;
-	int error = 0;
+	int ret = 0;
 
 	if (dev->power.syscore)
 		return 0;
@@ -1523,17 +1549,27 @@ static int device_prepare(struct device *dev, pm_message_t state)
 		callback = dev->driver->pm->prepare;
 	}
 
-	if (callback) {
-		error = callback(dev);
-		suspend_report_result(callback, error);
-	}
+	if (callback)
+		ret = callback(dev);
 
 	device_unlock(dev);
 
-	if (error)
+	if (ret < 0) {
+		suspend_report_result(callback, ret);
 		pm_runtime_put(dev);
-
-	return error;
+		return ret;
+	}
+	/*
+	 * A positive return value from ->prepare() means "this device appears
+	 * to be runtime-suspended and its state is fine, so if it really is
+	 * runtime-suspended, you can leave it in that state provided that you
+	 * will do the same thing with all of its descendants".  This only
+	 * applies to suspend transitions, however.
+	 */
+	spin_lock_irq(&dev->power.lock);
+	dev->power.direct_complete = ret > 0 && state.event == PM_EVENT_SUSPEND;
+	spin_unlock_irq(&dev->power.lock);
+	return 0;
 }
 
 /**
diff --git a/include/linux/pm.h b/include/linux/pm.h
index d915d0345fa1..72c0fe098a27 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -93,13 +93,23 @@ typedef struct pm_message {
  *	been registered) to recover from the race condition.
  *	This method is executed for all kinds of suspend transitions and is
  *	followed by one of the suspend callbacks: @suspend(), @freeze(), or
- *	@poweroff().  The PM core executes subsystem-level @prepare() for all
- *	devices before starting to invoke suspend callbacks for any of them, so
- *	generally devices may be assumed to be functional or to respond to
- *	runtime resume requests while @prepare() is being executed.  However,
- *	device drivers may NOT assume anything about the availability of user
- *	space at that time and it is NOT valid to request firmware from within
- *	@prepare() (it's too late to do that).  It also is NOT valid to allocate
+ *	@poweroff().  If the transition is a suspend to memory or standby (that
+ *	is, not related to hibernation), the return value of @prepare() may be
+ *	used to indicate to the PM core to leave the device in runtime suspend
+ *	if applicable.  Namely, if @prepare() returns a positive number, the PM
+ *	core will understand that as a declaration that the device appears to be
+ *	runtime-suspended and it may be left in that state during the entire
+ *	transition and during the subsequent resume if all of its descendants
+ *	are left in runtime suspend too.  If that happens, @complete() will be
+ *	executed directly after @prepare() and it must ensure the proper
+ *	functioning of the device after the system resume.
+ *	The PM core executes subsystem-level @prepare() for all devices before
+ *	starting to invoke suspend callbacks for any of them, so generally
+ *	devices may be assumed to be functional or to respond to runtime resume
+ *	requests while @prepare() is being executed.  However, device drivers
+ *	may NOT assume anything about the availability of user space at that
+ *	time and it is NOT valid to request firmware from within @prepare()
+ *	(it's too late to do that).  It also is NOT valid to allocate
  *	substantial amounts of memory from @prepare() in the GFP_KERNEL mode.
  *	[To work around these limitations, drivers may register suspend and
  *	hibernation notifiers to be executed before the freezing of tasks.]
@@ -112,7 +122,16 @@ typedef struct pm_message {
  *	of the other devices that the PM core has unsuccessfully attempted to
  *	suspend earlier).
  *	The PM core executes subsystem-level @complete() after it has executed
- *	the appropriate resume callbacks for all devices.
+ *	the appropriate resume callbacks for all devices.  If the corresponding
+ *	@prepare() at the beginning of the suspend transition returned a
+ *	positive number and the device was left in runtime suspend (without
+ *	executing any suspend and resume callbacks for it), @complete() will be
+ *	the only callback executed for the device during resume.  In that case,
+ *	@complete() must be prepared to do whatever is necessary to ensure the
+ *	proper functioning of the device after the system resume.  To this end,
+ *	@complete() can check the power.direct_complete flag of the device to
+ *	learn whether (unset) or not (set) the previous suspend and resume
+ *	callbacks have been executed for it.
  *
  * @suspend: Executed before putting the system into a sleep state in which the
  *	contents of main memory are preserved.  The exact action to perform
@@ -546,6 +565,7 @@ struct dev_pm_info {
 	bool			is_late_suspended:1;
 	bool			ignore_children:1;
 	bool			early_init:1;	/* Owned by the PM core */
+	bool			direct_complete:1;	/* Owned by the PM core */
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 2a5897a4afbc..43fd6716f662 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -101,6 +101,11 @@ static inline bool pm_runtime_status_suspended(struct device *dev)
 	return dev->power.runtime_status == RPM_SUSPENDED;
 }
 
+static inline bool pm_runtime_suspended_if_enabled(struct device *dev)
+{
+	return pm_runtime_status_suspended(dev) && dev->power.disable_depth == 1;
+}
+
 static inline bool pm_runtime_enabled(struct device *dev)
 {
 	return !dev->power.disable_depth;
@@ -150,6 +155,7 @@ static inline void device_set_run_wake(struct device *dev, bool enable) {}
 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
 static inline bool pm_runtime_active(struct device *dev) { return true; }
 static inline bool pm_runtime_status_suspended(struct device *dev) { return false; }
+static inline bool pm_runtime_suspended_if_enabled(struct device *dev) { return false; }
 static inline bool pm_runtime_enabled(struct device *dev) { return false; }
 
 static inline void pm_runtime_no_callbacks(struct device *dev) {}
-- 
cgit 


From a75951217472c522c324adb0a4de3ba69d656ef5 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 16 May 2014 16:14:04 +0200
Subject: net: phy: extend fixed driver with fixed_phy_register()

The existing fixed_phy_add() function has several drawbacks that
prevents it from being used as is for OF-based declaration of fixed
PHYs:

 * The address of the PHY on the fake bus needs to be passed, while a
   dynamic allocation is desired.

 * Since the phy_device instantiation is post-poned until the next
   mdiobus scan, there is no way to associate the fixed PHY with its
   OF node, which later prevents of_phy_connect() from finding this
   fixed PHY from a given OF node.

To solve this, this commit introduces fixed_phy_register(), which will
allocate an available PHY address, add the PHY using fixed_phy_add()
and instantiate the phy_device structure associated with the provided
OF node.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed.c   | 61 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy_fixed.h | 11 +++++++++
 2 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index e41546da105e..d60d875cb445 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c
@@ -21,6 +21,7 @@
 #include <linux/phy_fixed.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/of.h>
 
 #define MII_REGS_NUM 29
 
@@ -203,6 +204,66 @@ err_regs:
 }
 EXPORT_SYMBOL_GPL(fixed_phy_add);
 
+void fixed_phy_del(int phy_addr)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp, *tmp;
+
+	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
+		if (fp->addr == phy_addr) {
+			list_del(&fp->node);
+			kfree(fp);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(fixed_phy_del);
+
+static int phy_fixed_addr;
+static DEFINE_SPINLOCK(phy_fixed_addr_lock);
+
+int fixed_phy_register(unsigned int irq,
+		       struct fixed_phy_status *status,
+		       struct device_node *np)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct phy_device *phy;
+	int phy_addr;
+	int ret;
+
+	/* Get the next available PHY address, up to PHY_MAX_ADDR */
+	spin_lock(&phy_fixed_addr_lock);
+	if (phy_fixed_addr == PHY_MAX_ADDR) {
+		spin_unlock(&phy_fixed_addr_lock);
+		return -ENOSPC;
+	}
+	phy_addr = phy_fixed_addr++;
+	spin_unlock(&phy_fixed_addr_lock);
+
+	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
+	if (ret < 0)
+		return ret;
+
+	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
+	if (!phy || IS_ERR(phy)) {
+		fixed_phy_del(phy_addr);
+		return -EINVAL;
+	}
+
+	of_node_get(np);
+	phy->dev.of_node = np;
+
+	ret = phy_device_register(phy);
+	if (ret) {
+		phy_device_free(phy);
+		of_node_put(np);
+		fixed_phy_del(phy_addr);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int __init fixed_mdio_bus_init(void)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 509d8f5f984e..4f2478b47136 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -9,15 +9,26 @@ struct fixed_phy_status {
 	int asym_pause;
 };
 
+struct device_node;
+
 #ifdef CONFIG_FIXED_PHY
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
+extern int fixed_phy_register(unsigned int irq,
+			      struct fixed_phy_status *status,
+			      struct device_node *np);
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
 {
 	return -ENODEV;
 }
+static inline int fixed_phy_register(unsigned int irq,
+				     struct fixed_phy_status *status,
+				     struct device_node *np)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_FIXED_PHY */
 
 /*
-- 
cgit 


From 3be2a49e5c08d268f8af0dd4fe89a24ea8cdc339 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 16 May 2014 16:14:05 +0200
Subject: of: provide a binding for fixed link PHYs

Some Ethernet MACs have a "fixed link", and are not connected to a
normal MDIO-managed PHY device. For those situations, a Device Tree
binding allows to describe a "fixed link" using a special PHY node.

This patch adds:

 * A documentation for the fixed PHY Device Tree binding.

 * An of_phy_is_fixed_link() function that an Ethernet driver can call
   on its PHY phandle to find out whether it's a fixed link PHY or
   not. It should typically be used to know if
   of_phy_register_fixed_link() should be called.

 * An of_phy_register_fixed_link() function that instantiates the
   fixed PHY into the PHY subsystem, so that when the driver calls
   of_phy_connect(), the PHY device associated to the OF node will be
   found.

These two additional functions also support the old fixed-link Device
Tree binding used on PowerPC platforms, so that ultimately, the
network device drivers for those platforms could be converted to use
of_phy_is_fixed_link() and of_phy_register_fixed_link() instead of
of_phy_connect_fixed_link(), while keeping compatibility with their
respective Device Tree bindings.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/fixed-link.txt         | 30 ++++++++++
 drivers/of/of_mdio.c                               | 67 ++++++++++++++++++++++
 include/linux/of_mdio.h                            | 15 +++++
 3 files changed, 112 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/fixed-link.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt
new file mode 100644
index 000000000000..e956de1be935
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fixed-link.txt
@@ -0,0 +1,30 @@
+Fixed link Device Tree binding
+------------------------------
+
+Some Ethernet MACs have a "fixed link", and are not connected to a
+normal MDIO-managed PHY device. For those situations, a Device Tree
+binding allows to describe a "fixed link".
+
+Such a fixed link situation is described by creating a 'fixed-link'
+sub-node of the Ethernet MAC device node, with the following
+properties:
+
+* 'speed' (integer, mandatory), to indicate the link speed. Accepted
+  values are 10, 100 and 1000
+* 'full-duplex' (boolean, optional), to indicate that full duplex is
+  used. When absent, half duplex is assumed.
+* 'pause' (boolean, optional), to indicate that pause should be
+  enabled.
+* 'asym-pause' (boolean, optional), to indicate that asym_pause should
+  be enabled.
+
+Example:
+
+ethernet@0 {
+	...
+	fixed-link {
+	      speed = <1000>;
+	      full-duplex;
+	};
+	...
+};
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 9a95831bd065..1def0bb5cb37 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/err.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
@@ -301,3 +302,69 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 	return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_attach);
+
+#if defined(CONFIG_FIXED_PHY)
+/*
+ * of_phy_is_fixed_link() and of_phy_register_fixed_link() must
+ * support two DT bindings:
+ * - the old DT binding, where 'fixed-link' was a property with 5
+ *   cells encoding various informations about the fixed PHY
+ * - the new DT binding, where 'fixed-link' is a sub-node of the
+ *   Ethernet device.
+ */
+bool of_phy_is_fixed_link(struct device_node *np)
+{
+	struct device_node *dn;
+	int len;
+
+	/* New binding */
+	dn = of_get_child_by_name(np, "fixed-link");
+	if (dn) {
+		of_node_put(dn);
+		return true;
+	}
+
+	/* Old binding */
+	if (of_get_property(np, "fixed-link", &len) &&
+	    len == (5 * sizeof(__be32)))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(of_phy_is_fixed_link);
+
+int of_phy_register_fixed_link(struct device_node *np)
+{
+	struct fixed_phy_status status = {};
+	struct device_node *fixed_link_node;
+	const __be32 *fixed_link_prop;
+	int len;
+
+	/* New binding */
+	fixed_link_node = of_get_child_by_name(np, "fixed-link");
+	if (fixed_link_node) {
+		status.link = 1;
+		status.duplex = of_property_read_bool(np, "full-duplex");
+		if (of_property_read_u32(fixed_link_node, "speed", &status.speed))
+			return -EINVAL;
+		status.pause = of_property_read_bool(np, "pause");
+		status.asym_pause = of_property_read_bool(np, "asym-pause");
+		of_node_put(fixed_link_node);
+		return fixed_phy_register(PHY_POLL, &status, np);
+	}
+
+	/* Old binding */
+	fixed_link_prop = of_get_property(np, "fixed-link", &len);
+	if (fixed_link_prop && len == (5 * sizeof(__be32))) {
+		status.link = 1;
+		status.duplex = be32_to_cpu(fixed_link_prop[1]);
+		status.speed = be32_to_cpu(fixed_link_prop[2]);
+		status.pause = be32_to_cpu(fixed_link_prop[3]);
+		status.asym_pause = be32_to_cpu(fixed_link_prop[4]);
+		return fixed_phy_register(PHY_POLL, &status, np);
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(of_phy_register_fixed_link);
+#endif
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 881a7c3571f4..0aa367e316cb 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -72,4 +72,19 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 }
 #endif /* CONFIG_OF */
 
+#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
+extern int of_phy_register_fixed_link(struct device_node *np);
+extern bool of_phy_is_fixed_link(struct device_node *np);
+#else
+static inline int of_phy_register_fixed_link(struct device_node *np)
+{
+	return -ENOSYS;
+}
+static inline bool of_phy_is_fixed_link(struct device_node *np)
+{
+	return false;
+}
+#endif
+
+
 #endif /* __LINUX_OF_MDIO_H */
-- 
cgit 


From 3e9c156e2c210ab67b12b1b692983a6b97c19d3f Mon Sep 17 00:00:00 2001
From: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Date: Fri, 16 May 2014 17:46:44 +0200
Subject: ieee802154: add netlink interfaces for llsec

This patch adds user-visible interfaces for the llsec infrastructure.
For the added methods, the only major difference between all add/remove
implementation lies in how the specific object is parsed, and for dump
requests, how objects are written into netlink messages.

To save on boilerplate code, table dumps are routed through a helper
function that handles netlink dump state, leaving the actual dumping
code to care only about iterating over the table to be dumped and
filling netlink messages. For add/remove methods, the boilerplate
required to work is not quite as large, but still enough to also move
into a local helper.

Signed-off-by: Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/nl802154.h    |  31 ++
 net/ieee802154/ieee802154.h |  19 ++
 net/ieee802154/netlink.c    |  20 ++
 net/ieee802154/nl-mac.c     | 807 ++++++++++++++++++++++++++++++++++++++++++++
 net/ieee802154/nl_policy.c  |  16 +
 5 files changed, 893 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index c8d7f3965fff..20163b9a0eae 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -80,6 +80,22 @@ enum {
 
 	IEEE802154_ATTR_FRAME_RETRIES,
 
+	IEEE802154_ATTR_LLSEC_ENABLED,
+	IEEE802154_ATTR_LLSEC_SECLEVEL,
+	IEEE802154_ATTR_LLSEC_KEY_MODE,
+	IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+	IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+	IEEE802154_ATTR_LLSEC_KEY_ID,
+	IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+	IEEE802154_ATTR_LLSEC_KEY_BYTES,
+	IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+	IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+	IEEE802154_ATTR_LLSEC_FRAME_TYPE,
+	IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+	IEEE802154_ATTR_LLSEC_SECLEVELS,
+	IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+	IEEE802154_ATTR_LLSEC_DEV_KEY_MODE,
+
 	__IEEE802154_ATTR_MAX,
 };
 
@@ -134,6 +150,21 @@ enum {
 
 	IEEE802154_SET_MACPARAMS,
 
+	IEEE802154_LLSEC_GETPARAMS,
+	IEEE802154_LLSEC_SETPARAMS,
+	IEEE802154_LLSEC_LIST_KEY,
+	IEEE802154_LLSEC_ADD_KEY,
+	IEEE802154_LLSEC_DEL_KEY,
+	IEEE802154_LLSEC_LIST_DEV,
+	IEEE802154_LLSEC_ADD_DEV,
+	IEEE802154_LLSEC_DEL_DEV,
+	IEEE802154_LLSEC_LIST_DEVKEY,
+	IEEE802154_LLSEC_ADD_DEVKEY,
+	IEEE802154_LLSEC_DEL_DEVKEY,
+	IEEE802154_LLSEC_LIST_SECLEVEL,
+	IEEE802154_LLSEC_ADD_SECLEVEL,
+	IEEE802154_LLSEC_DEL_SECLEVEL,
+
 	__IEEE802154_CMD_MAX,
 };
 
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index 6693a5cf01ce..8b83a231299e 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -68,4 +68,23 @@ int ieee802154_list_iface(struct sk_buff *skb, struct genl_info *info);
 int ieee802154_dump_iface(struct sk_buff *skb, struct netlink_callback *cb);
 int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info);
 
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_keys(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devs(struct sk_buff *skb,
+			       struct netlink_callback *cb);
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+				  struct netlink_callback *cb);
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info);
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+				    struct netlink_callback *cb);
+
 #endif
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 04b20589d97a..26efcf4fd2ff 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -124,6 +124,26 @@ static const struct genl_ops ieee8021154_ops[] = {
 	IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface,
 			ieee802154_dump_iface),
 	IEEE802154_OP(IEEE802154_SET_MACPARAMS, ieee802154_set_macparams),
+	IEEE802154_OP(IEEE802154_LLSEC_GETPARAMS, ieee802154_llsec_getparams),
+	IEEE802154_OP(IEEE802154_LLSEC_SETPARAMS, ieee802154_llsec_setparams),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_KEY, NULL,
+			ieee802154_llsec_dump_keys),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_KEY, ieee802154_llsec_add_key),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_KEY, ieee802154_llsec_del_key),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEV, NULL,
+			ieee802154_llsec_dump_devs),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_DEV, ieee802154_llsec_add_dev),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_DEV, ieee802154_llsec_del_dev),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_DEVKEY, NULL,
+			ieee802154_llsec_dump_devkeys),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_DEVKEY, ieee802154_llsec_add_devkey),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_DEVKEY, ieee802154_llsec_del_devkey),
+	IEEE802154_DUMP(IEEE802154_LLSEC_LIST_SECLEVEL, NULL,
+			ieee802154_llsec_dump_seclevels),
+	IEEE802154_OP(IEEE802154_LLSEC_ADD_SECLEVEL,
+		      ieee802154_llsec_add_seclevel),
+	IEEE802154_OP(IEEE802154_LLSEC_DEL_SECLEVEL,
+		      ieee802154_llsec_del_seclevel),
 };
 
 static const struct genl_multicast_group ieee802154_mcgrps[] = {
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 5d285498c0f6..5617b4c6d6d5 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -715,3 +715,810 @@ out:
 	dev_put(dev);
 	return rc;
 }
+
+
+
+static int
+ieee802154_llsec_parse_key_id(struct genl_info *info,
+			      struct ieee802154_llsec_key_id *desc)
+{
+	memset(desc, 0, sizeof(*desc));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE])
+		return -EINVAL;
+
+	desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]);
+
+	if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+		if (!info->attrs[IEEE802154_ATTR_PAN_ID] &&
+		    !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] ||
+		      info->attrs[IEEE802154_ATTR_HW_ADDR]))
+			return -EINVAL;
+
+		desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+
+		if (info->attrs[IEEE802154_ATTR_SHORT_ADDR]) {
+			desc->device_addr.mode = IEEE802154_ADDR_SHORT;
+			desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+		} else {
+			desc->device_addr.mode = IEEE802154_ADDR_LONG;
+			desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+		}
+	}
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID])
+		return -EINVAL;
+
+	if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT])
+		return -EINVAL;
+
+	if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED])
+		return -EINVAL;
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT)
+		desc->id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_ID]);
+
+	switch (desc->mode) {
+	case IEEE802154_SCF_KEY_SHORT_INDEX:
+	{
+		u32 source = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT]);
+		desc->short_source = cpu_to_le32(source);
+		break;
+	}
+	case IEEE802154_SCF_KEY_HW_INDEX:
+		desc->extended_source = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED]);
+		break;
+	}
+
+	return 0;
+}
+
+static int
+ieee802154_llsec_fill_key_id(struct sk_buff *msg,
+			     const struct ieee802154_llsec_key_id *desc)
+{
+	if (nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_MODE, desc->mode))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
+		if (nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID,
+				      desc->device_addr.pan_id))
+			return -EMSGSIZE;
+
+		if (desc->device_addr.mode == IEEE802154_ADDR_SHORT &&
+		    nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+				      desc->device_addr.short_addr))
+			return -EMSGSIZE;
+
+		if (desc->device_addr.mode == IEEE802154_ADDR_LONG &&
+		    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR,
+				   desc->device_addr.extended_addr))
+			return -EMSGSIZE;
+	}
+
+	if (desc->mode != IEEE802154_SCF_KEY_IMPLICIT &&
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_ID, desc->id))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_SHORT_INDEX &&
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT,
+			le32_to_cpu(desc->short_source)))
+		return -EMSGSIZE;
+
+	if (desc->mode == IEEE802154_SCF_KEY_HW_INDEX &&
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED,
+			   desc->extended_source))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	struct net_device *dev = NULL;
+	int rc = -ENOBUFS;
+	struct ieee802154_mlme_ops *ops;
+	void *hdr;
+	struct ieee802154_llsec_params params;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	ops = ieee802154_mlme_ops(dev);
+	if (!ops->llsec)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		goto out_dev;
+
+	hdr = genlmsg_put(msg, 0, info->snd_seq, &nl802154_family, 0,
+		IEEE802154_LLSEC_GETPARAMS);
+	if (!hdr)
+		goto out_free;
+
+	rc = ops->llsec->get_params(dev, &params);
+	if (rc < 0)
+		goto out_free;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_ENABLED, params.enabled) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			be32_to_cpu(params.frame_counter)) ||
+	    ieee802154_llsec_fill_key_id(msg, &params.out_key))
+		goto out_free;
+
+	dev_put(dev);
+
+	return ieee802154_nl_reply(msg, info);
+out_free:
+	nlmsg_free(msg);
+out_dev:
+	dev_put(dev);
+	return rc;
+}
+
+int ieee802154_llsec_setparams(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net_device *dev = NULL;
+	int rc = -EINVAL;
+	struct ieee802154_mlme_ops *ops;
+	struct ieee802154_llsec_params params;
+	int changed = 0;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_ENABLED] &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE] &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL])
+		goto out;
+
+	ops = ieee802154_mlme_ops(dev);
+	if (!ops->llsec) {
+		rc = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL] &&
+	    nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) > 7)
+		goto out;
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]) {
+		params.enabled = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_ENABLED]);
+		changed |= IEEE802154_LLSEC_PARAM_ENABLED;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]) {
+		if (ieee802154_llsec_parse_key_id(info, &params.out_key))
+			goto out;
+
+		changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]) {
+		params.out_level = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVEL]);
+		changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
+	}
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]) {
+		u32 fc = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+		params.frame_counter = cpu_to_be32(fc);
+		changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
+	}
+
+	rc = ops->llsec->set_params(dev, &params, changed);
+
+	dev_put(dev);
+
+	return rc;
+out:
+	dev_put(dev);
+	return rc;
+}
+
+
+
+struct llsec_dump_data {
+	struct sk_buff *skb;
+	int s_idx, s_idx2;
+	int portid;
+	int nlmsg_seq;
+	struct net_device *dev;
+	struct ieee802154_mlme_ops *ops;
+	struct ieee802154_llsec_table *table;
+};
+
+static int
+ieee802154_llsec_dump_table(struct sk_buff *skb, struct netlink_callback *cb,
+			    int (*step)(struct llsec_dump_data*))
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	struct llsec_dump_data data;
+	int idx = 0;
+	int first_dev = cb->args[0];
+	int rc;
+
+	for_each_netdev(net, dev) {
+		if (idx < first_dev || dev->type != ARPHRD_IEEE802154)
+			goto skip;
+
+		data.ops = ieee802154_mlme_ops(dev);
+		if (!data.ops->llsec)
+			goto skip;
+
+		data.skb = skb;
+		data.s_idx = cb->args[1];
+		data.s_idx2 = cb->args[2];
+		data.dev = dev;
+		data.portid = NETLINK_CB(cb->skb).portid;
+		data.nlmsg_seq = cb->nlh->nlmsg_seq;
+
+		data.ops->llsec->lock_table(dev);
+		data.ops->llsec->get_table(data.dev, &data.table);
+		rc = step(&data);
+		data.ops->llsec->unlock_table(dev);
+
+		if (rc < 0)
+			break;
+
+skip:
+		idx++;
+	}
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
+static int
+ieee802154_nl_llsec_change(struct sk_buff *skb, struct genl_info *info,
+			   int (*fn)(struct net_device*, struct genl_info*))
+{
+	struct net_device *dev = NULL;
+	int rc = -EINVAL;
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	if (!ieee802154_mlme_ops(dev)->llsec)
+		rc = -EOPNOTSUPP;
+	else
+		rc = fn(dev, info);
+
+	dev_put(dev);
+	return rc;
+}
+
+
+
+static int
+ieee802154_llsec_parse_key(struct genl_info *info,
+			   struct ieee802154_llsec_key *key)
+{
+	u8 frames;
+	u32 commands[256 / 32];
+
+	memset(key, 0, sizeof(*key));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES])
+		return -EINVAL;
+
+	frames = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES]);
+	if ((frames & BIT(IEEE802154_FC_TYPE_MAC_CMD)) &&
+	    !info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS])
+		return -EINVAL;
+
+	if (info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS]) {
+		nla_memcpy(commands,
+			   info->attrs[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS],
+			   256 / 8);
+
+		if (commands[0] || commands[1] || commands[2] || commands[3] ||
+		    commands[4] || commands[5] || commands[6] ||
+		    commands[7] >= BIT(IEEE802154_CMD_GTS_REQ + 1))
+			return -EINVAL;
+
+		key->cmd_frame_ids = commands[7];
+	}
+
+	key->frame_types = frames;
+
+	nla_memcpy(key->key, info->attrs[IEEE802154_ATTR_LLSEC_KEY_BYTES],
+		   IEEE802154_LLSEC_KEY_SIZE);
+
+	return 0;
+}
+
+static int llsec_add_key(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_key key;
+	struct ieee802154_llsec_key_id id;
+
+	if (ieee802154_llsec_parse_key(info, &key) ||
+	    ieee802154_llsec_parse_key_id(info, &id))
+		return -EINVAL;
+
+	return ops->llsec->add_key(dev, &id, &key);
+}
+
+int ieee802154_llsec_add_key(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_key);
+}
+
+static int llsec_remove_key(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_key_id id;
+
+	if (ieee802154_llsec_parse_key_id(info, &id))
+		return -EINVAL;
+
+	return ops->llsec->del_key(dev, &id);
+}
+
+int ieee802154_llsec_del_key(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_remove_key);
+}
+
+static int
+ieee802154_nl_fill_key(struct sk_buff *msg, u32 portid, u32 seq,
+		       const struct ieee802154_llsec_key_entry *key,
+		       const struct net_device *dev)
+{
+	void *hdr;
+	u32 commands[256 / 32];
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_KEY);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    ieee802154_llsec_fill_key_id(msg, &key->id) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES,
+		       key->key->frame_types))
+		goto nla_put_failure;
+
+	if (key->key->frame_types & BIT(IEEE802154_FC_TYPE_MAC_CMD)) {
+		memset(commands, 0, sizeof(commands));
+		commands[7] = key->key->cmd_frame_ids;
+		if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS,
+			    sizeof(commands), commands))
+			goto nla_put_failure;
+	}
+
+	if (nla_put(msg, IEEE802154_ATTR_LLSEC_KEY_BYTES,
+		    IEEE802154_LLSEC_KEY_SIZE, key->key->key))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_keys(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_key_entry *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->keys, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_key(data->skb, data->portid,
+					   data->nlmsg_seq, pos, data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_keys(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_keys);
+}
+
+
+
+static int
+llsec_parse_dev(struct genl_info *info,
+		struct ieee802154_llsec_device *dev)
+{
+	memset(dev, 0, sizeof(*dev));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+	    !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] ||
+	    (!!info->attrs[IEEE802154_ATTR_PAN_ID] !=
+	     !!info->attrs[IEEE802154_ATTR_SHORT_ADDR]))
+		return -EINVAL;
+
+	if (info->attrs[IEEE802154_ATTR_PAN_ID]) {
+		dev->pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
+		dev->short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
+	} else {
+		dev->short_addr = cpu_to_le16(IEEE802154_ADDR_UNDEF);
+	}
+
+	dev->hwaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+	dev->frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+	dev->seclevel_exempt = !!nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+	dev->key_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE]);
+
+	if (dev->key_mode >= __IEEE802154_LLSEC_DEVKEY_MAX)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int llsec_add_dev(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device desc;
+
+	if (llsec_parse_dev(info, &desc))
+		return -EINVAL;
+
+	return ops->llsec->add_dev(dev, &desc);
+}
+
+int ieee802154_llsec_add_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_dev);
+}
+
+static int llsec_del_dev(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_HW_ADDR])
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+	return ops->llsec->del_dev(dev, devaddr);
+}
+
+int ieee802154_llsec_del_dev(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_dev);
+}
+
+static int
+ieee802154_nl_fill_dev(struct sk_buff *msg, u32 portid, u32 seq,
+		       const struct ieee802154_llsec_device *desc,
+		       const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_DEV);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_shortaddr(msg, IEEE802154_ATTR_PAN_ID, desc->pan_id) ||
+	    nla_put_shortaddr(msg, IEEE802154_ATTR_SHORT_ADDR,
+			      desc->short_addr) ||
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, desc->hwaddr) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			desc->frame_counter) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+		       desc->seclevel_exempt) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_KEY_MODE, desc->key_mode))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_devs(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_device *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->devices, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_dev(data->skb, data->portid,
+					   data->nlmsg_seq, pos, data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_devs(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devs);
+}
+
+
+
+static int llsec_add_devkey(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device_key key;
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] ||
+	    !info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    ieee802154_llsec_parse_key_id(info, &key.key_id))
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+	key.frame_counter = nla_get_u32(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_COUNTER]);
+
+	return ops->llsec->add_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_add_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_devkey);
+}
+
+static int llsec_del_devkey(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_device_key key;
+	__le64 devaddr;
+
+	if (!info->attrs[IEEE802154_ATTR_HW_ADDR] ||
+	    ieee802154_llsec_parse_key_id(info, &key.key_id))
+		return -EINVAL;
+
+	devaddr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
+
+	return ops->llsec->del_devkey(dev, devaddr, &key);
+}
+
+int ieee802154_llsec_del_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_devkey);
+}
+
+static int
+ieee802154_nl_fill_devkey(struct sk_buff *msg, u32 portid, u32 seq,
+			  __le64 devaddr,
+			  const struct ieee802154_llsec_device_key *devkey,
+			  const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_DEVKEY);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_hwaddr(msg, IEEE802154_ATTR_HW_ADDR, devaddr) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
+			devkey->frame_counter) ||
+	    ieee802154_llsec_fill_key_id(msg, &devkey->key_id))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_devkeys(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_device *dpos;
+	struct ieee802154_llsec_device_key *kpos;
+	int rc = 0, idx = 0, idx2;
+
+	list_for_each_entry(dpos, &data->table->devices, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		idx2 = 0;
+
+		list_for_each_entry(kpos, &dpos->keys, list) {
+			if (idx2++ < data->s_idx2)
+				continue;
+
+			if (ieee802154_nl_fill_devkey(data->skb, data->portid,
+						      data->nlmsg_seq,
+						      dpos->hwaddr, kpos,
+						      data->dev)) {
+				return rc = -EMSGSIZE;
+			}
+
+			data->s_idx2++;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_devkeys(struct sk_buff *skb,
+				  struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_devkeys);
+}
+
+
+
+static int
+llsec_parse_seclevel(struct genl_info *info,
+		     struct ieee802154_llsec_seclevel *sl)
+{
+	memset(sl, 0, sizeof(*sl));
+
+	if (!info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS] ||
+	    !info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE])
+		return -EINVAL;
+
+	sl->frame_type = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_FRAME_TYPE]);
+	if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD) {
+		if (!info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID])
+			return -EINVAL;
+
+		sl->cmd_frame_id = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID]);
+	}
+
+	sl->sec_levels = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_SECLEVELS]);
+	sl->device_override = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE]);
+
+	return 0;
+}
+
+static int llsec_add_seclevel(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info, &sl))
+		return -EINVAL;
+
+	return ops->llsec->add_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_add_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+	if ((info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) !=
+	    (NLM_F_CREATE | NLM_F_EXCL))
+		return -EINVAL;
+
+	return ieee802154_nl_llsec_change(skb, info, llsec_add_seclevel);
+}
+
+static int llsec_del_seclevel(struct net_device *dev, struct genl_info *info)
+{
+	struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
+	struct ieee802154_llsec_seclevel sl;
+
+	if (llsec_parse_seclevel(info, &sl))
+		return -EINVAL;
+
+	return ops->llsec->del_seclevel(dev, &sl);
+}
+
+int ieee802154_llsec_del_seclevel(struct sk_buff *skb, struct genl_info *info)
+{
+	return ieee802154_nl_llsec_change(skb, info, llsec_del_seclevel);
+}
+
+static int
+ieee802154_nl_fill_seclevel(struct sk_buff *msg, u32 portid, u32 seq,
+			    const struct ieee802154_llsec_seclevel *sl,
+			    const struct net_device *dev)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, 0, seq, &nl802154_family, NLM_F_MULTI,
+			  IEEE802154_LLSEC_LIST_SECLEVEL);
+	if (!hdr)
+		goto out;
+
+	if (nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name) ||
+	    nla_put_u32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_FRAME_TYPE, sl->frame_type) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVELS, sl->sec_levels) ||
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_DEV_OVERRIDE,
+		       sl->device_override))
+		goto nla_put_failure;
+
+	if (sl->frame_type == IEEE802154_FC_TYPE_MAC_CMD &&
+	    nla_put_u8(msg, IEEE802154_ATTR_LLSEC_CMD_FRAME_ID,
+		       sl->cmd_frame_id))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
+static int llsec_iter_seclevels(struct llsec_dump_data *data)
+{
+	struct ieee802154_llsec_seclevel *pos;
+	int rc = 0, idx = 0;
+
+	list_for_each_entry(pos, &data->table->security_levels, list) {
+		if (idx++ < data->s_idx)
+			continue;
+
+		if (ieee802154_nl_fill_seclevel(data->skb, data->portid,
+						data->nlmsg_seq, pos,
+						data->dev)) {
+			rc = -EMSGSIZE;
+			break;
+		}
+
+		data->s_idx++;
+	}
+
+	return rc;
+}
+
+int ieee802154_llsec_dump_seclevels(struct sk_buff *skb,
+				    struct netlink_callback *cb)
+{
+	return ieee802154_llsec_dump_table(skb, cb, llsec_iter_seclevels);
+}
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index fd7be5e45cef..3a703ab88348 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -62,5 +62,21 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_CSMA_MAX_BE] = { .type = NLA_U8, },
 
 	[IEEE802154_ATTR_FRAME_RETRIES] = { .type = NLA_S8, },
+
+	[IEEE802154_ATTR_LLSEC_ENABLED] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_SECLEVEL] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_MODE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_SOURCE_SHORT] = { .type = NLA_U32, },
+	[IEEE802154_ATTR_LLSEC_KEY_SOURCE_EXTENDED] = { .type = NLA_HW_ADDR, },
+	[IEEE802154_ATTR_LLSEC_KEY_ID] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_FRAME_COUNTER] = { .type = NLA_U32 },
+	[IEEE802154_ATTR_LLSEC_KEY_BYTES] = { .len = 16, },
+	[IEEE802154_ATTR_LLSEC_KEY_USAGE_FRAME_TYPES] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_KEY_USAGE_COMMANDS] = { .len = 258 / 8 },
+	[IEEE802154_ATTR_LLSEC_FRAME_TYPE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_CMD_FRAME_ID] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_SECLEVELS] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_DEV_OVERRIDE] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_LLSEC_DEV_KEY_MODE] = { .type = NLA_U8, },
 };
 
-- 
cgit 


From 4085ebe8c31face855fd01ee40372cb4aab1df3a Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 16 May 2014 17:04:53 -0400
Subject: net: Find the nesting level of a given device by type.

Multiple devices in the kernel can be stacked/nested and they
need to know their nesting level for the purposes of lockdep.
This patch provides a generic function that determines a nesting
level of a particular device by its type (ex: vlan, macvlan, etc).
We only care about nesting of the same type of devices.

For example:
  eth0 <- vlan0.10 <- macvlan0 <- vlan1.20

The nesting level of vlan1.20 would be 1, since there is another vlan
in the stack under it.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ++++++++++
 net/core/dev.c            | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 20e99efb1ca6..fb912e8e5c7f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3077,6 +3077,14 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 	     priv; \
 	     priv = netdev_lower_get_next_private_rcu(dev, &(iter)))
 
+void *netdev_lower_get_next(struct net_device *dev,
+				struct list_head **iter);
+#define netdev_for_each_lower_dev(dev, ldev, iter) \
+	for (iter = &(dev)->adj_list.lower, \
+	     ldev = netdev_lower_get_next(dev, &(iter)); \
+	     ldev; \
+	     ldev = netdev_lower_get_next(dev, &(iter)))
+
 void *netdev_adjacent_get_private(struct list_head *adj_list);
 void *netdev_lower_get_first_private_rcu(struct net_device *dev);
 struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
@@ -3092,6 +3100,8 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
 void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev);
+int dev_get_nest_level(struct net_device *dev,
+		       bool (*type_check)(struct net_device *dev));
 int skb_checksum_help(struct sk_buff *skb);
 struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
diff --git a/net/core/dev.c b/net/core/dev.c
index ed928e846559..6ee3ac25ed72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4622,6 +4622,32 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
 
+/**
+ * netdev_lower_get_next - Get the next device from the lower neighbour
+ *                         list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour lower
+ * list will remain unchainged.
+ */
+void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+	struct netdev_adjacent *lower;
+
+	lower = list_entry((*iter)->next, struct netdev_adjacent, list);
+
+	if (&lower->list == &dev->adj_list.lower)
+		return NULL;
+
+	*iter = &lower->list;
+
+	return lower->dev;
+}
+EXPORT_SYMBOL(netdev_lower_get_next);
+
 /**
  * netdev_lower_get_first_private_rcu - Get the first ->private from the
  *				       lower neighbour list, RCU
@@ -5072,6 +5098,30 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_lower_dev_get_private);
 
+
+int dev_get_nest_level(struct net_device *dev,
+		       bool (*type_check)(struct net_device *dev))
+{
+	struct net_device *lower = NULL;
+	struct list_head *iter;
+	int max_nest = -1;
+	int nest;
+
+	ASSERT_RTNL();
+
+	netdev_for_each_lower_dev(dev, lower, iter) {
+		nest = dev_get_nest_level(lower, type_check);
+		if (max_nest < nest)
+			max_nest = nest;
+	}
+
+	if (type_check(dev))
+		max_nest++;
+
+	return max_nest;
+}
+EXPORT_SYMBOL(dev_get_nest_level);
+
 static void dev_change_rx_flags(struct net_device *dev, int flags)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
-- 
cgit 


From 25175ba5c9bff9aaf0229df34bb5d54c81633ec3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 16 May 2014 17:04:54 -0400
Subject: net: Allow for more then a single subclass for netif_addr_lock

Currently netif_addr_lock_nested assumes that there can be only
a single nesting level between 2 devices.  However, if we
have multiple devices of the same type stacked, this fails.
For example:
 eth0 <-- vlan0.10 <-- vlan0.10.20

A more complicated configuration may stack more then one type of
device in different order.
Ex:
  eth0 <-- vlan0.10 <-- macvlan0 <-- vlan1.10.20 <-- macvlan1

This patch adds an ndo_* function that allows each stackable
device to report its nesting level.  If the device doesn't
provide this function default subclass of 1 is used.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fb912e8e5c7f..9d4b1f1b6b75 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1144,6 +1144,7 @@ struct net_device_ops {
 	netdev_tx_t		(*ndo_dfwd_start_xmit) (struct sk_buff *skb,
 							struct net_device *dev,
 							void *priv);
+	int			(*ndo_get_lock_subclass)(struct net_device *dev);
 };
 
 /**
@@ -2950,7 +2951,12 @@ static inline void netif_addr_lock(struct net_device *dev)
 
 static inline void netif_addr_lock_nested(struct net_device *dev)
 {
-	spin_lock_nested(&dev->addr_list_lock, SINGLE_DEPTH_NESTING);
+	int subclass = SINGLE_DEPTH_NESTING;
+
+	if (dev->netdev_ops->ndo_get_lock_subclass)
+		subclass = dev->netdev_ops->ndo_get_lock_subclass(dev);
+
+	spin_lock_nested(&dev->addr_list_lock, subclass);
 }
 
 static inline void netif_addr_lock_bh(struct net_device *dev)
-- 
cgit 


From d38569ab2bba6e6b3233acfc3a84cdbcfbd1f79f Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 16 May 2014 17:04:55 -0400
Subject: vlan: Fix lockdep warning with stacked vlan devices.

This reverts commit dc8eaaa006350d24030502a4521542e74b5cb39f.
	vlan: Fix lockdep warning when vlan dev handle notification

Instead we use the new new API to find the lock subclass of
our vlan device.  This way we can support configurations where
vlans are interspersed with other devices:
  bond -> vlan -> macvlan -> vlan

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h |  3 ++-
 net/8021q/vlan.c        |  1 +
 net/8021q/vlan_dev.c    | 52 +++++++++----------------------------------------
 net/core/dev.c          |  1 -
 4 files changed, 12 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 13bbbde00e68..724bde8477b2 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -73,7 +73,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
-static inline int is_vlan_dev(struct net_device *dev)
+static inline bool is_vlan_dev(struct net_device *dev)
 {
         return dev->priv_flags & IFF_802_1Q_VLAN;
 }
@@ -159,6 +159,7 @@ struct vlan_dev_priv {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll				*netpoll;
 #endif
+	unsigned int				nest_level;
 };
 
 static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 175273f38cb1..44ebd5c2cd4a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -169,6 +169,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (err < 0)
 		goto out_uninit_mvrp;
 
+	vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1;
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out_uninit_mvrp;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 733ec283ed1b..019efb79708f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -493,48 +493,10 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 	}
 }
 
-static int vlan_calculate_locking_subclass(struct net_device *real_dev)
-{
-	int subclass = 0;
-
-	while (is_vlan_dev(real_dev)) {
-		subclass++;
-		real_dev = vlan_dev_priv(real_dev)->real_dev;
-	}
-
-	return subclass;
-}
-
-static void vlan_dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0, subclass;
-
-	subclass = vlan_calculate_locking_subclass(to);
-
-	spin_lock_nested(&to->addr_list_lock, subclass);
-	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
-	if (!err)
-		__dev_set_rx_mode(to);
-	spin_unlock(&to->addr_list_lock);
-}
-
-static void vlan_dev_uc_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0, subclass;
-
-	subclass = vlan_calculate_locking_subclass(to);
-
-	spin_lock_nested(&to->addr_list_lock, subclass);
-	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-	if (!err)
-		__dev_set_rx_mode(to);
-	spin_unlock(&to->addr_list_lock);
-}
-
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
-	vlan_dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
-	vlan_dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+	dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
@@ -562,6 +524,11 @@ static void vlan_dev_set_lockdep_class(struct net_device *dev, int subclass)
 	netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, &subclass);
 }
 
+static int vlan_dev_get_lock_subclass(struct net_device *dev)
+{
+	return vlan_dev_priv(dev)->nest_level;
+}
+
 static const struct header_ops vlan_header_ops = {
 	.create	 = vlan_dev_hard_header,
 	.rebuild = vlan_dev_rebuild_header,
@@ -597,7 +564,6 @@ static const struct net_device_ops vlan_netdev_ops;
 static int vlan_dev_init(struct net_device *dev)
 {
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
-	int subclass = 0;
 
 	netif_carrier_off(dev);
 
@@ -646,8 +612,7 @@ static int vlan_dev_init(struct net_device *dev)
 
 	SET_NETDEV_DEVTYPE(dev, &vlan_type);
 
-	subclass = vlan_calculate_locking_subclass(dev);
-	vlan_dev_set_lockdep_class(dev, subclass);
+	vlan_dev_set_lockdep_class(dev, vlan_dev_get_lock_subclass(dev));
 
 	vlan_dev_priv(dev)->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
 	if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
@@ -819,6 +784,7 @@ static const struct net_device_ops vlan_netdev_ops = {
 	.ndo_netpoll_cleanup	= vlan_dev_netpoll_cleanup,
 #endif
 	.ndo_fix_features	= vlan_dev_fix_features,
+	.ndo_get_lock_subclass  = vlan_dev_get_lock_subclass,
 };
 
 void vlan_setup(struct net_device *dev)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6ee3ac25ed72..2b872bfbd172 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5287,7 +5287,6 @@ void __dev_set_rx_mode(struct net_device *dev)
 	if (ops->ndo_set_rx_mode)
 		ops->ndo_set_rx_mode(dev);
 }
-EXPORT_SYMBOL(__dev_set_rx_mode);
 
 void dev_set_rx_mode(struct net_device *dev)
 {
-- 
cgit 


From c674ac30c549596295eb0a5af7f4714c0b905b6f Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 16 May 2014 17:04:56 -0400
Subject: macvlan: Fix lockdep warnings with stacked macvlan devices

Macvlan devices try to avoid stacking, but that's not always
successfull or even desired.  As an example, the following
configuration is perefectly legal and valid:

eth0 <--- macvlan0 <---- vlan0.10 <--- macvlan1

However, this configuration produces the following lockdep
trace:
[  115.620418] ======================================================
[  115.620477] [ INFO: possible circular locking dependency detected ]
[  115.620516] 3.15.0-rc1+ #24 Not tainted
[  115.620540] -------------------------------------------------------
[  115.620577] ip/1704 is trying to acquire lock:
[  115.620604]  (&vlan_netdev_addr_lock_key/1){+.....}, at: [<ffffffff815df49c>] dev_uc_sync+0x3c/0x80
[  115.620686]
but task is already holding lock:
[  115.620723]  (&macvlan_netdev_addr_lock_key){+.....}, at: [<ffffffff815da5be>] dev_set_rx_mode+0x1e/0x40
[  115.620795]
which lock already depends on the new lock.

[  115.620853]
the existing dependency chain (in reverse order) is:
[  115.620894]
-> #1 (&macvlan_netdev_addr_lock_key){+.....}:
[  115.620935]        [<ffffffff810d57f2>] lock_acquire+0xa2/0x130
[  115.620974]        [<ffffffff816f62e7>] _raw_spin_lock_nested+0x37/0x50
[  115.621019]        [<ffffffffa07296c3>] vlan_dev_set_rx_mode+0x53/0x110 [8021q]
[  115.621066]        [<ffffffff815da557>] __dev_set_rx_mode+0x57/0xa0
[  115.621105]        [<ffffffff815da5c6>] dev_set_rx_mode+0x26/0x40
[  115.621143]        [<ffffffff815da6be>] __dev_open+0xde/0x140
[  115.621174]        [<ffffffff815da9ad>] __dev_change_flags+0x9d/0x170
[  115.621174]        [<ffffffff815daaa9>] dev_change_flags+0x29/0x60
[  115.621174]        [<ffffffff815e7f11>] do_setlink+0x321/0x9a0
[  115.621174]        [<ffffffff815ea59f>] rtnl_newlink+0x51f/0x730
[  115.621174]        [<ffffffff815e6e75>] rtnetlink_rcv_msg+0x95/0x250
[  115.621174]        [<ffffffff81608b19>] netlink_rcv_skb+0xa9/0xc0
[  115.621174]        [<ffffffff815e6dca>] rtnetlink_rcv+0x2a/0x40
[  115.621174]        [<ffffffff81608150>] netlink_unicast+0xf0/0x1c0
[  115.621174]        [<ffffffff8160851f>] netlink_sendmsg+0x2ff/0x740
[  115.621174]        [<ffffffff815bc9db>] sock_sendmsg+0x8b/0xc0
[  115.621174]        [<ffffffff815bd4b9>] ___sys_sendmsg+0x369/0x380
[  115.621174]        [<ffffffff815bdbb2>] __sys_sendmsg+0x42/0x80
[  115.621174]        [<ffffffff815bdc02>] SyS_sendmsg+0x12/0x20
[  115.621174]        [<ffffffff816ffd69>] system_call_fastpath+0x16/0x1b
[  115.621174]
-> #0 (&vlan_netdev_addr_lock_key/1){+.....}:
[  115.621174]        [<ffffffff810d4d43>] __lock_acquire+0x1773/0x1a60
[  115.621174]        [<ffffffff810d57f2>] lock_acquire+0xa2/0x130
[  115.621174]        [<ffffffff816f62e7>] _raw_spin_lock_nested+0x37/0x50
[  115.621174]        [<ffffffff815df49c>] dev_uc_sync+0x3c/0x80
[  115.621174]        [<ffffffffa0696d2a>] macvlan_set_mac_lists+0xca/0x110 [macvlan]
[  115.621174]        [<ffffffff815da557>] __dev_set_rx_mode+0x57/0xa0
[  115.621174]        [<ffffffff815da5c6>] dev_set_rx_mode+0x26/0x40
[  115.621174]        [<ffffffff815da6be>] __dev_open+0xde/0x140
[  115.621174]        [<ffffffff815da9ad>] __dev_change_flags+0x9d/0x170
[  115.621174]        [<ffffffff815daaa9>] dev_change_flags+0x29/0x60
[  115.621174]        [<ffffffff815e7f11>] do_setlink+0x321/0x9a0
[  115.621174]        [<ffffffff815ea59f>] rtnl_newlink+0x51f/0x730
[  115.621174]        [<ffffffff815e6e75>] rtnetlink_rcv_msg+0x95/0x250
[  115.621174]        [<ffffffff81608b19>] netlink_rcv_skb+0xa9/0xc0
[  115.621174]        [<ffffffff815e6dca>] rtnetlink_rcv+0x2a/0x40
[  115.621174]        [<ffffffff81608150>] netlink_unicast+0xf0/0x1c0
[  115.621174]        [<ffffffff8160851f>] netlink_sendmsg+0x2ff/0x740
[  115.621174]        [<ffffffff815bc9db>] sock_sendmsg+0x8b/0xc0
[  115.621174]        [<ffffffff815bd4b9>] ___sys_sendmsg+0x369/0x380
[  115.621174]        [<ffffffff815bdbb2>] __sys_sendmsg+0x42/0x80
[  115.621174]        [<ffffffff815bdc02>] SyS_sendmsg+0x12/0x20
[  115.621174]        [<ffffffff816ffd69>] system_call_fastpath+0x16/0x1b
[  115.621174]
other info that might help us debug this:

[  115.621174]  Possible unsafe locking scenario:

[  115.621174]        CPU0                    CPU1
[  115.621174]        ----                    ----
[  115.621174]   lock(&macvlan_netdev_addr_lock_key);
[  115.621174]                                lock(&vlan_netdev_addr_lock_key/1);
[  115.621174]                                lock(&macvlan_netdev_addr_lock_key);
[  115.621174]   lock(&vlan_netdev_addr_lock_key/1);
[  115.621174]
 *** DEADLOCK ***

[  115.621174] 2 locks held by ip/1704:
[  115.621174]  #0:  (rtnl_mutex){+.+.+.}, at: [<ffffffff815e6dbb>] rtnetlink_rcv+0x1b/0x40
[  115.621174]  #1:  (&macvlan_netdev_addr_lock_key){+.....}, at: [<ffffffff815da5be>] dev_set_rx_mode+0x1e/0x40
[  115.621174]
stack backtrace:
[  115.621174] CPU: 3 PID: 1704 Comm: ip Not tainted 3.15.0-rc1+ #24
[  115.621174] Hardware name: Hewlett-Packard HP xw8400 Workstation/0A08h, BIOS 786D5 v02.38 10/25/2010
[  115.621174]  ffffffff82339ae0 ffff880465f79568 ffffffff816ee20c ffffffff82339ae0
[  115.621174]  ffff880465f795a8 ffffffff816e9e1b ffff880465f79600 ffff880465b019c8
[  115.621174]  0000000000000001 0000000000000002 ffff880465b019c8 ffff880465b01230
[  115.621174] Call Trace:
[  115.621174]  [<ffffffff816ee20c>] dump_stack+0x4d/0x66
[  115.621174]  [<ffffffff816e9e1b>] print_circular_bug+0x200/0x20e
[  115.621174]  [<ffffffff810d4d43>] __lock_acquire+0x1773/0x1a60
[  115.621174]  [<ffffffff810d3172>] ? trace_hardirqs_on_caller+0xb2/0x1d0
[  115.621174]  [<ffffffff810d57f2>] lock_acquire+0xa2/0x130
[  115.621174]  [<ffffffff815df49c>] ? dev_uc_sync+0x3c/0x80
[  115.621174]  [<ffffffff816f62e7>] _raw_spin_lock_nested+0x37/0x50
[  115.621174]  [<ffffffff815df49c>] ? dev_uc_sync+0x3c/0x80
[  115.621174]  [<ffffffff815df49c>] dev_uc_sync+0x3c/0x80
[  115.621174]  [<ffffffffa0696d2a>] macvlan_set_mac_lists+0xca/0x110 [macvlan]
[  115.621174]  [<ffffffff815da557>] __dev_set_rx_mode+0x57/0xa0
[  115.621174]  [<ffffffff815da5c6>] dev_set_rx_mode+0x26/0x40
[  115.621174]  [<ffffffff815da6be>] __dev_open+0xde/0x140
[  115.621174]  [<ffffffff815da9ad>] __dev_change_flags+0x9d/0x170
[  115.621174]  [<ffffffff815daaa9>] dev_change_flags+0x29/0x60
[  115.621174]  [<ffffffff811e1db1>] ? mem_cgroup_bad_page_check+0x21/0x30
[  115.621174]  [<ffffffff815e7f11>] do_setlink+0x321/0x9a0
[  115.621174]  [<ffffffff810d394c>] ? __lock_acquire+0x37c/0x1a60
[  115.621174]  [<ffffffff815ea59f>] rtnl_newlink+0x51f/0x730
[  115.621174]  [<ffffffff815ea169>] ? rtnl_newlink+0xe9/0x730
[  115.621174]  [<ffffffff815e6e75>] rtnetlink_rcv_msg+0x95/0x250
[  115.621174]  [<ffffffff810d329d>] ? trace_hardirqs_on+0xd/0x10
[  115.621174]  [<ffffffff815e6dbb>] ? rtnetlink_rcv+0x1b/0x40
[  115.621174]  [<ffffffff815e6de0>] ? rtnetlink_rcv+0x40/0x40
[  115.621174]  [<ffffffff81608b19>] netlink_rcv_skb+0xa9/0xc0
[  115.621174]  [<ffffffff815e6dca>] rtnetlink_rcv+0x2a/0x40
[  115.621174]  [<ffffffff81608150>] netlink_unicast+0xf0/0x1c0
[  115.621174]  [<ffffffff8160851f>] netlink_sendmsg+0x2ff/0x740
[  115.621174]  [<ffffffff815bc9db>] sock_sendmsg+0x8b/0xc0
[  115.621174]  [<ffffffff8119d4af>] ? might_fault+0x5f/0xb0
[  115.621174]  [<ffffffff8119d4f8>] ? might_fault+0xa8/0xb0
[  115.621174]  [<ffffffff8119d4af>] ? might_fault+0x5f/0xb0
[  115.621174]  [<ffffffff815cb51e>] ? verify_iovec+0x5e/0xe0
[  115.621174]  [<ffffffff815bd4b9>] ___sys_sendmsg+0x369/0x380
[  115.621174]  [<ffffffff816faa0d>] ? __do_page_fault+0x11d/0x570
[  115.621174]  [<ffffffff810cfe9f>] ? up_read+0x1f/0x40
[  115.621174]  [<ffffffff816fab04>] ? __do_page_fault+0x214/0x570
[  115.621174]  [<ffffffff8120a10b>] ? mntput_no_expire+0x6b/0x1c0
[  115.621174]  [<ffffffff8120a0b7>] ? mntput_no_expire+0x17/0x1c0
[  115.621174]  [<ffffffff8120a284>] ? mntput+0x24/0x40
[  115.621174]  [<ffffffff815bdbb2>] __sys_sendmsg+0x42/0x80
[  115.621174]  [<ffffffff815bdc02>] SyS_sendmsg+0x12/0x20
[  115.621174]  [<ffffffff816ffd69>] system_call_fastpath+0x16/0x1b

Fix this by correctly providing macvlan lockdep class.

Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 12 ++++++++++--
 include/linux/if_macvlan.h |  1 +
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c5fb9cf95c12..d53e299ae1d9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -517,6 +517,11 @@ static struct lock_class_key macvlan_netdev_addr_lock_key;
 #define MACVLAN_STATE_MASK \
 	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
 
+static int macvlan_get_nest_level(struct net_device *dev)
+{
+	return ((struct macvlan_dev *)netdev_priv(dev))->nest_level;
+}
+
 static void macvlan_set_lockdep_class_one(struct net_device *dev,
 					  struct netdev_queue *txq,
 					  void *_unused)
@@ -527,8 +532,9 @@ static void macvlan_set_lockdep_class_one(struct net_device *dev,
 
 static void macvlan_set_lockdep_class(struct net_device *dev)
 {
-	lockdep_set_class(&dev->addr_list_lock,
-			  &macvlan_netdev_addr_lock_key);
+	lockdep_set_class_and_subclass(&dev->addr_list_lock,
+				       &macvlan_netdev_addr_lock_key,
+				       macvlan_get_nest_level(dev));
 	netdev_for_each_tx_queue(dev, macvlan_set_lockdep_class_one, NULL);
 }
 
@@ -723,6 +729,7 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_fdb_add		= macvlan_fdb_add,
 	.ndo_fdb_del		= macvlan_fdb_del,
 	.ndo_fdb_dump		= ndo_dflt_fdb_dump,
+	.ndo_get_lock_subclass  = macvlan_get_nest_level,
 };
 
 void macvlan_common_setup(struct net_device *dev)
@@ -851,6 +858,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	vlan->dev      = dev;
 	vlan->port     = port;
 	vlan->set_features = MACVLAN_FEATURES;
+	vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1;
 
 	vlan->mode     = MACVLAN_MODE_VEPA;
 	if (data && data[IFLA_MACVLAN_MODE])
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 7c8b20b120ea..a9a53b12397b 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -56,6 +56,7 @@ struct macvlan_dev {
 	int			numqueues;
 	netdev_features_t	tap_features;
 	int			minor;
+	int			nest_level;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-- 
cgit 


From 44a4085538c844e79d6ee6bcf46fabf7c57a9a38 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Fri, 16 May 2014 17:20:38 -0400
Subject: bonding: Fix stacked device detection in arp monitoring

Prior to commit fbd929f2dce460456807a51e18d623db3db9f077
	bonding: support QinQ for bond arp interval

the arp monitoring code allowed for proper detection of devices
stacked on top of vlans.  Since the above commit, the
code can still detect a device stacked on top of single
vlan, but not a device stacked on top of Q-in-Q configuration.
The search will only set the inner vlan tag if the route
device is the vlan device.  However, this is not always the
case, as it is possible to extend the stacked configuration.

With this patch it is possible to provision devices on
top Q-in-Q vlan configuration that should be used as
a source of ARP monitoring information.

For example:
ip link add link bond0 vlan10 type vlan proto 802.1q id 10
ip link add link vlan10 vlan100 type vlan proto 802.1q id 100
ip link add link vlan100 type macvlan

Note:  This patch limites the number of stacked VLANs to 2,
just like before.  The original, however had another issue
in that if we had more then 2 levels of VLANs, we would end
up generating incorrectly tagged traffic.  This is no longer
possible.

Fixes: fbd929f2dce460456807a51e18d623db3db9f077 (bonding: support QinQ for bond arp interval)
CC: Jay Vosburgh <j.vosburgh@gmail.com>
CC: Veaceslav Falico <vfalico@redhat.com>
CC: Andy Gospodarek <andy@greyhouse.net>
CC: Ding Tianhong <dingtianhong@huawei.com>
CC: Patric McHardy <kaber@trash.net>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 134 +++++++++++++++++++---------------------
 drivers/net/bonding/bonding.h   |   1 +
 include/linux/if_vlan.h         |   6 ++
 include/linux/netdevice.h       |   9 +++
 net/core/dev.c                  |  26 ++++++++
 5 files changed, 107 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 69aff72c8957..d3a67896d435 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2126,10 +2126,10 @@ static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
  */
 static void bond_arp_send(struct net_device *slave_dev, int arp_op,
 			  __be32 dest_ip, __be32 src_ip,
-			  struct bond_vlan_tag *inner,
-			  struct bond_vlan_tag *outer)
+			  struct bond_vlan_tag *tags)
 {
 	struct sk_buff *skb;
+	int i;
 
 	pr_debug("arp %d on slave %s: dst %pI4 src %pI4\n",
 		 arp_op, slave_dev->name, &dest_ip, &src_ip);
@@ -2141,21 +2141,26 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op,
 		net_err_ratelimited("ARP packet allocation failed\n");
 		return;
 	}
-	if (outer->vlan_id) {
-		if (inner->vlan_id) {
-			pr_debug("inner tag: proto %X vid %X\n",
-				 ntohs(inner->vlan_proto), inner->vlan_id);
-			skb = __vlan_put_tag(skb, inner->vlan_proto,
-					     inner->vlan_id);
-			if (!skb) {
-				net_err_ratelimited("failed to insert inner VLAN tag\n");
-				return;
-			}
-		}
 
-		pr_debug("outer reg: proto %X vid %X\n",
-			 ntohs(outer->vlan_proto), outer->vlan_id);
-		skb = vlan_put_tag(skb, outer->vlan_proto, outer->vlan_id);
+	/* Go through all the tags backwards and add them to the packet */
+	for (i = BOND_MAX_VLAN_ENCAP - 1; i > 0; i--) {
+		if (!tags[i].vlan_id)
+			continue;
+
+		pr_debug("inner tag: proto %X vid %X\n",
+			 ntohs(tags[i].vlan_proto), tags[i].vlan_id);
+		skb = __vlan_put_tag(skb, tags[i].vlan_proto,
+				     tags[i].vlan_id);
+		if (!skb) {
+			net_err_ratelimited("failed to insert inner VLAN tag\n");
+			return;
+		}
+	}
+	/* Set the outer tag */
+	if (tags[0].vlan_id) {
+		pr_debug("outer tag: proto %X vid %X\n",
+			 ntohs(tags[0].vlan_proto), tags[0].vlan_id);
+		skb = vlan_put_tag(skb, tags[0].vlan_proto, tags[0].vlan_id);
 		if (!skb) {
 			net_err_ratelimited("failed to insert outer VLAN tag\n");
 			return;
@@ -2164,22 +2169,52 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op,
 	arp_xmit(skb);
 }
 
+/* Validate the device path between the @start_dev and the @end_dev.
+ * The path is valid if the @end_dev is reachable through device
+ * stacking.
+ * When the path is validated, collect any vlan information in the
+ * path.
+ */
+static bool bond_verify_device_path(struct net_device *start_dev,
+				    struct net_device *end_dev,
+				    struct bond_vlan_tag *tags)
+{
+	struct net_device *upper;
+	struct list_head  *iter;
+	int  idx;
+
+	if (start_dev == end_dev)
+		return true;
+
+	netdev_for_each_upper_dev_rcu(start_dev, upper, iter) {
+		if (bond_verify_device_path(upper, end_dev, tags)) {
+			if (is_vlan_dev(upper)) {
+				idx = vlan_get_encap_level(upper);
+				if (idx >= BOND_MAX_VLAN_ENCAP)
+					return false;
+
+				tags[idx].vlan_proto =
+						    vlan_dev_vlan_proto(upper);
+				tags[idx].vlan_id = vlan_dev_vlan_id(upper);
+			}
+			return true;
+		}
+	}
+
+	return false;
+}
 
 static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 {
-	struct net_device *upper, *vlan_upper;
-	struct list_head *iter, *vlan_iter;
 	struct rtable *rt;
-	struct bond_vlan_tag inner, outer;
+	struct bond_vlan_tag tags[BOND_MAX_VLAN_ENCAP];
 	__be32 *targets = bond->params.arp_targets, addr;
 	int i;
+	bool ret;
 
 	for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
 		pr_debug("basa: target %pI4\n", &targets[i]);
-		inner.vlan_proto = 0;
-		inner.vlan_id = 0;
-		outer.vlan_proto = 0;
-		outer.vlan_id = 0;
+		memset(tags, 0, sizeof(tags));
 
 		/* Find out through which dev should the packet go */
 		rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
@@ -2192,7 +2227,8 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 				net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
 						     bond->dev->name,
 						     &targets[i]);
-			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 0, &inner, &outer);
+			bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
+				      0, tags);
 			continue;
 		}
 
@@ -2201,52 +2237,12 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
 			goto found;
 
 		rcu_read_lock();
-		/* first we search only for vlan devices. for every vlan
-		 * found we verify its upper dev list, searching for the
-		 * rt->dst.dev. If found we save the tag of the vlan and
-		 * proceed to send the packet.
-		 */
-		netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper,
-						  vlan_iter) {
-			if (!is_vlan_dev(vlan_upper))
-				continue;
-
-			if (vlan_upper == rt->dst.dev) {
-				outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper);
-				outer.vlan_id = vlan_dev_vlan_id(vlan_upper);
-				rcu_read_unlock();
-				goto found;
-			}
-			netdev_for_each_all_upper_dev_rcu(vlan_upper, upper,
-							  iter) {
-				if (upper == rt->dst.dev) {
-					/* If the upper dev is a vlan dev too,
-					 *  set the vlan tag to inner tag.
-					 */
-					if (is_vlan_dev(upper)) {
-						inner.vlan_proto = vlan_dev_vlan_proto(upper);
-						inner.vlan_id = vlan_dev_vlan_id(upper);
-					}
-					outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper);
-					outer.vlan_id = vlan_dev_vlan_id(vlan_upper);
-					rcu_read_unlock();
-					goto found;
-				}
-			}
-		}
-
-		/* if the device we're looking for is not on top of any of
-		 * our upper vlans, then just search for any dev that
-		 * matches, and in case it's a vlan - save the id
-		 */
-		netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
-			if (upper == rt->dst.dev) {
-				rcu_read_unlock();
-				goto found;
-			}
-		}
+		ret = bond_verify_device_path(bond->dev, rt->dst.dev, tags);
 		rcu_read_unlock();
 
+		if (ret)
+			goto found;
+
 		/* Not our device - skip */
 		pr_debug("%s: no path to arp_ip_target %pI4 via rt.dev %s\n",
 			 bond->dev->name, &targets[i],
@@ -2259,7 +2255,7 @@ found:
 		addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
 		ip_rt_put(rt);
 		bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
-			      addr, &inner, &outer);
+			      addr, tags);
 	}
 }
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index b8bdd0acc8f3..00bea320e3b5 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -36,6 +36,7 @@
 
 #define bond_version DRV_DESCRIPTION ": v" DRV_VERSION " (" DRV_RELDATE ")\n"
 
+#define BOND_MAX_VLAN_ENCAP	2
 #define BOND_MAX_ARP_TARGETS	16
 
 #define BOND_DEFAULT_MIIMON	100
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 724bde8477b2..c901b13b6f03 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -484,4 +484,10 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 		 */
 		skb->protocol = htons(ETH_P_802_2);
 }
+
+static inline int vlan_get_encap_level(struct net_device *dev)
+{
+	BUG_ON(!is_vlan_dev(dev));
+	return vlan_dev_priv(dev)->nest_level;
+}
 #endif /* !(_LINUX_IF_VLAN_H_) */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9d4b1f1b6b75..b42d07b0390b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3056,9 +3056,18 @@ extern int		weight_p;
 extern int		bpf_jit_enable;
 
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+						     struct list_head **iter);
 struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 						     struct list_head **iter);
 
+/* iterate through upper list, must be called under RCU read lock */
+#define netdev_for_each_upper_dev_rcu(dev, updev, iter) \
+	for (iter = &(dev)->adj_list.upper, \
+	     updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \
+	     updev; \
+	     updev = netdev_upper_get_next_dev_rcu(dev, &(iter)))
+
 /* iterate through upper list, must be called under RCU read lock */
 #define netdev_for_each_all_upper_dev_rcu(dev, updev, iter) \
 	for (iter = &(dev)->all_adj_list.upper, \
diff --git a/net/core/dev.c b/net/core/dev.c
index 2b872bfbd172..9abc503b19b7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4541,6 +4541,32 @@ void *netdev_adjacent_get_private(struct list_head *adj_list)
 }
 EXPORT_SYMBOL(netdev_adjacent_get_private);
 
+/**
+ * netdev_upper_get_next_dev_rcu - Get the next dev from upper list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next device from the dev's upper list, starting from iter
+ * position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
+						 struct list_head **iter)
+{
+	struct netdev_adjacent *upper;
+
+	WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
+
+	upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
+
+	if (&upper->list == &dev->adj_list.upper)
+		return NULL;
+
+	*iter = &upper->list;
+
+	return upper->dev;
+}
+EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
+
 /**
  * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list
  * @dev: device
-- 
cgit 


From 6c4e548ff36672eeb78f8288a2920d66fa4a6a66 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:22 +0200
Subject: net: cdc_ncm: use ethtool to tune coalescing settings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Datagram coalescing is an integral part of the NCM and MBIM
protocols, intended to reduce the interrupt load primarily
on the device end of the USB link.  As with all coalescing
solutions, there is a trade-off between buffering and
interrupts.

The current defaults are based on the assumption that device
side buffers should be the limiting factor.  However, many
modern high speed LTE modems suffers from buffer-bloat,
making this assumption fail. This results in sub-optimal
performance due to excessive coalescing.  And in cases where
such modems are connected to cheap embedded hosts there is
often severe buffer allocation issues, giving very noticeable
performance degradation .

A start on improving this is going from build time hard
coded limits to per device user configurable limits.  The
ethtool coalescing API was selected as user interface
because, although the tuned values are buffer sizes, these
settings directly control datagram coalescing.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 71 +++++++++++++++++++++++++++++++++++++++++++--
 include/linux/usb/cdc_ncm.h |  6 +++-
 2 files changed, 74 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2ec3790a4db8..141dbec912be 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -65,6 +65,67 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx);
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
 static struct usb_driver cdc_ncm_driver;
 
+static int cdc_ncm_get_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+
+	/* assuming maximum sized dgrams and ignoring NDPs */
+	ec->rx_max_coalesced_frames = ctx->rx_max / ctx->max_datagram_size;
+	ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size;
+
+	/* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */
+	ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC;
+	return 0;
+}
+
+static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx);
+
+static int cdc_ncm_set_coalesce(struct net_device *netdev,
+				struct ethtool_coalesce *ec)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 new_rx_max = ctx->rx_max;
+	u32 new_tx_max = ctx->tx_max;
+
+	/* assuming maximum sized dgrams and a single NDP */
+	if (ec->rx_max_coalesced_frames)
+		new_rx_max = ec->rx_max_coalesced_frames * ctx->max_datagram_size;
+	if (ec->tx_max_coalesced_frames)
+		new_tx_max = ec->tx_max_coalesced_frames * ctx->max_datagram_size;
+
+	if (ec->tx_coalesce_usecs &&
+	    (ec->tx_coalesce_usecs < CDC_NCM_TIMER_INTERVAL_MIN * CDC_NCM_TIMER_PENDING_CNT ||
+	     ec->tx_coalesce_usecs > CDC_NCM_TIMER_INTERVAL_MAX * CDC_NCM_TIMER_PENDING_CNT))
+		return -EINVAL;
+
+	spin_lock_bh(&ctx->mtx);
+	ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT;
+	if (!ctx->timer_interval)
+		ctx->tx_timer_pending = 0;
+	spin_unlock_bh(&ctx->mtx);
+
+	/* inform device of new values */
+	if (new_rx_max != ctx->rx_max || new_tx_max != ctx->tx_max)
+		cdc_ncm_update_rxtx_max(dev, new_rx_max, new_tx_max);
+	return 0;
+}
+
+static const struct ethtool_ops cdc_ncm_ethtool_ops = {
+	.get_settings      = usbnet_get_settings,
+	.set_settings      = usbnet_set_settings,
+	.get_link          = usbnet_get_link,
+	.nway_reset        = usbnet_nway_reset,
+	.get_drvinfo       = usbnet_get_drvinfo,
+	.get_msglevel      = usbnet_get_msglevel,
+	.set_msglevel      = usbnet_set_msglevel,
+	.get_ts_info       = ethtool_op_get_ts_info,
+	.get_coalesce      = cdc_ncm_get_coalesce,
+	.set_coalesce      = cdc_ncm_set_coalesce,
+};
+
 /* handle rx_max and tx_max changes */
 static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
 {
@@ -257,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev)
 			(ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX))
 		ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX;
 
+	/* initial coalescing timer interval */
+	ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC;
+
 	return 0;
 }
 
@@ -596,6 +660,9 @@ advance:
 	/* finish setting up the device specific data */
 	cdc_ncm_setup(dev);
 
+	/* override ethtool_ops */
+	dev->net->ethtool_ops = &cdc_ncm_ethtool_ops;
+
 	return 0;
 
 error2:
@@ -863,7 +930,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ctx->tx_curr_skb = skb_out;
 		goto exit_no_skb;
 
-	} else if ((n < ctx->tx_max_datagrams) && (ready2send == 0)) {
+	} else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) {
 		/* wait for more frames */
 		/* push variables */
 		ctx->tx_curr_skb = skb_out;
@@ -915,7 +982,7 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx)
 	/* start timer, if not already started */
 	if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop)))
 		hrtimer_start(&ctx->tx_timer,
-				ktime_set(0, CDC_NCM_TIMER_INTERVAL),
+				ktime_set(0, ctx->timer_interval),
 				HRTIMER_MODE_REL);
 }
 
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 55b6feead93b..5c1066b4dc41 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -72,7 +72,9 @@
 /* Restart the timer, if amount of datagrams is less than given value */
 #define	CDC_NCM_RESTART_TIMER_DATAGRAM_CNT	3
 #define	CDC_NCM_TIMER_PENDING_CNT		2
-#define CDC_NCM_TIMER_INTERVAL			(400UL * NSEC_PER_USEC)
+#define CDC_NCM_TIMER_INTERVAL_USEC		400UL
+#define CDC_NCM_TIMER_INTERVAL_MIN		5UL
+#define CDC_NCM_TIMER_INTERVAL_MAX		(15UL * USEC_PER_SEC)
 
 /* The following macro defines the minimum header space */
 #define	CDC_NCM_MIN_HDR_SIZE \
@@ -107,6 +109,8 @@ struct cdc_ncm_ctx {
 	spinlock_t mtx;
 	atomic_t stop;
 
+	u64 timer_interval;
+
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
 	u32 rx_max;
-- 
cgit 


From 70559b8970e52aa9962dc823fd4498af06809544 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:23 +0200
Subject: net: cdc_ncm: use true max dgram count for header estimates
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Many newer NCM and MBIM devices will request a maximum tx
datagram count which is much smaller than our hard-coded
absolute max. We can reduce the overhead without sacrificing
any of the simplicity for these devices, by simply using the
true negotiated count in when calculated the maximum NTH and
NDP header sizes.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   |  9 ++++++---
 include/linux/usb/cdc_ncm.h | 10 +---------
 2 files changed, 7 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 141dbec912be..b9b562b9128a 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -174,7 +174,7 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
 	}
 
 	/* clamp new_tx to sane values */
-	min = CDC_NCM_MIN_HDR_SIZE + ctx->max_datagram_size;
+	min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16);
 	max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
 
 	/* some devices set dwNtbOutMaxSize too low for the above default */
@@ -318,6 +318,9 @@ static int cdc_ncm_init(struct usbnet *dev)
 			(ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX))
 		ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX;
 
+	/* set up maximum NDP size */
+	ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp16) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe16);
+
 	/* initial coalescing timer interval */
 	ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC;
 
@@ -800,7 +803,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 	cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_max);
 
 	/* verify that there is room for the NDP and the datagram (reserve) */
-	if ((ctx->tx_max - skb->len - reserve) < CDC_NCM_NDP_SIZE)
+	if ((ctx->tx_max - skb->len - reserve) < ctx->max_ndp_size)
 		return NULL;
 
 	/* link to it */
@@ -810,7 +813,7 @@ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp(struct cdc_ncm_ctx *ctx, struct sk_
 		nth16->wNdpIndex = cpu_to_le16(skb->len);
 
 	/* push a new empty NDP */
-	ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, CDC_NCM_NDP_SIZE), 0, CDC_NCM_NDP_SIZE);
+	ndp16 = (struct usb_cdc_ncm_ndp16 *)memset(skb_put(skb, ctx->max_ndp_size), 0, ctx->max_ndp_size);
 	ndp16->dwSignature = sign;
 	ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16));
 	return ndp16;
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 5c1066b4dc41..60a44b8a464e 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -76,15 +76,6 @@
 #define CDC_NCM_TIMER_INTERVAL_MIN		5UL
 #define CDC_NCM_TIMER_INTERVAL_MAX		(15UL * USEC_PER_SEC)
 
-/* The following macro defines the minimum header space */
-#define	CDC_NCM_MIN_HDR_SIZE \
-	(sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16) + \
-	(CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
-#define CDC_NCM_NDP_SIZE \
-	(sizeof(struct usb_cdc_ncm_ndp16) +				\
-	      (CDC_NCM_DPT_DATAGRAMS_MAX + 1) * sizeof(struct usb_cdc_ncm_dpe16))
-
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
 #define cdc_ncm_data_intf_is_mbim(x)  ((x)->desc.bInterfaceProtocol == USB_CDC_MBIM_PROTO_NTB)
@@ -110,6 +101,7 @@ struct cdc_ncm_ctx {
 	atomic_t stop;
 
 	u64 timer_interval;
+	u32 max_ndp_size;
 
 	u32 tx_timer_pending;
 	u32 tx_curr_frame_num;
-- 
cgit 


From 43e4c6dfc0fd781e68f20caf563a06f5c6ece995 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:24 +0200
Subject: net: cdc_ncm: set reasonable padding limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We pad frames larger than X to maximum size for devices which
don't need a ZLP after maximum sized frames. This allows the
device to optimize its transfers for one fixed buffer size.

X was arbitrarily set at 512 bytes regardless of real buffer
maximum, causing extreme overheads due to excessive padding of
larger tx buffers. Limit the padding to at most 3 full USB
packets, still allowing the overhead to payload ratio of 3/1.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 8 ++++++--
 include/linux/usb/cdc_ncm.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index b9b562b9128a..9592d4669435 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -213,6 +213,10 @@ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx)
 
 	/* max qlen depend on hard_mtu and rx_urb_size */
 	usbnet_update_max_qlen(dev);
+
+	/* never pad more than 3 full USB packets per transfer */
+	ctx->min_tx_pkt = clamp_t(u16, ctx->tx_max - 3 * usb_maxpacket(dev->udev, dev->out, 1),
+				  CDC_NCM_MIN_TX_PKT, ctx->tx_max);
 }
 
 /* helpers for NCM and MBIM differences */
@@ -947,7 +951,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		/* variables will be reset at next call */
 	}
 
-	/* If collected data size is less or equal CDC_NCM_MIN_TX_PKT
+	/* If collected data size is less or equal ctx->min_tx_pkt
 	 * bytes, we send buffers as it is. If we get more data, it
 	 * would be more efficient for USB HS mobile device with DMA
 	 * engine to receive a full size NTB, than canceling DMA
@@ -957,7 +961,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	 * a ZLP after full sized NTBs.
 	 */
 	if (!(dev->driver_info->flags & FLAG_SEND_ZLP) &&
-	    skb_out->len > CDC_NCM_MIN_TX_PKT)
+	    skb_out->len > ctx->min_tx_pkt)
 		memset(skb_put(skb_out, ctx->tx_max - skb_out->len), 0,
 		       ctx->tx_max - skb_out->len);
 	else if (skb_out->len < ctx->tx_max && (skb_out->len % dev->maxpacket) == 0)
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 60a44b8a464e..79de6724d398 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -115,6 +115,7 @@ struct cdc_ncm_ctx {
 	u16 tx_seq;
 	u16 rx_seq;
 	u16 connected;
+	u16 min_tx_pkt;
 };
 
 u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
-- 
cgit 


From beeecd42c3b41d17d0bf1d839db99274c287f514 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:25 +0200
Subject: net: cdc_ncm/cdc_mbim: adding NCM protocol statistics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To have an idea of the effects of the protocol coalescing
it's useful to have some counters showing the different
aspects.

Due to the asymmetrical usbnet interface the netdev
rx_bytes counter has been counting real received payload,
while the tx_bytes counter has included the NCM/MBIM
framing overhead. This overhead can be many times the
payload because of the aggressive padding strategy of
this driver, and will vary a lot depending on device
and traffic.

With very few exceptions, users are only interested in
the payload size.  Having an somewhat accurate payload
byte counter is particularly important for mobile
broadband devices, which many NCM devices and of course
all MBIM devices are. Users and userspace applications
will use this counter to monitor account quotas.

Having protocol specific counters for the overhead, we are
now able to correct the tx_bytes netdev counter so that
it shows the real payload

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_mbim.c  |  6 +++
 drivers/net/usb/cdc_ncm.c   | 91 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/cdc_ncm.h | 11 ++++++
 3 files changed, 108 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index bc23273d0455..5ee7a1dbc023 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -420,6 +420,7 @@ static int cdc_mbim_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 	struct usb_cdc_ncm_dpe16 *dpe16;
 	int ndpoffset;
 	int loopcount = 50; /* arbitrary max preventing infinite loop */
+	u32 payload = 0;
 	u8 *c;
 	u16 tci;
 
@@ -482,6 +483,7 @@ next_ndp:
 			if (!skb)
 				goto error;
 			usbnet_skb_return(dev, skb);
+			payload += len;	/* count payload bytes in this NTB */
 		}
 	}
 err_ndp:
@@ -490,6 +492,10 @@ err_ndp:
 	if (ndpoffset && loopcount--)
 		goto next_ndp;
 
+	/* update stats */
+	ctx->rx_overhead += skb_in->len - payload;
+	ctx->rx_ntbs++;
+
 	return 1;
 error:
 	return 0;
diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 9592d4669435..f4b439847d04 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -65,6 +65,68 @@ static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx);
 static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer);
 static struct usb_driver cdc_ncm_driver;
 
+struct cdc_ncm_stats {
+	char stat_string[ETH_GSTRING_LEN];
+	int sizeof_stat;
+	int stat_offset;
+};
+
+#define CDC_NCM_STAT(str, m) { \
+		.stat_string = str, \
+		.sizeof_stat = sizeof(((struct cdc_ncm_ctx *)0)->m), \
+		.stat_offset = offsetof(struct cdc_ncm_ctx, m) }
+#define CDC_NCM_SIMPLE_STAT(m)	CDC_NCM_STAT(__stringify(m), m)
+
+static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = {
+	CDC_NCM_SIMPLE_STAT(tx_reason_ntb_full),
+	CDC_NCM_SIMPLE_STAT(tx_reason_ndp_full),
+	CDC_NCM_SIMPLE_STAT(tx_reason_timeout),
+	CDC_NCM_SIMPLE_STAT(tx_reason_max_datagram),
+	CDC_NCM_SIMPLE_STAT(tx_overhead),
+	CDC_NCM_SIMPLE_STAT(tx_ntbs),
+	CDC_NCM_SIMPLE_STAT(rx_overhead),
+	CDC_NCM_SIMPLE_STAT(rx_ntbs),
+};
+
+static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(cdc_ncm_gstrings_stats);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void cdc_ncm_get_ethtool_stats(struct net_device *netdev,
+				    struct ethtool_stats __always_unused *stats,
+				    u64 *data)
+{
+	struct usbnet *dev = netdev_priv(netdev);
+	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	int i;
+	char *p = NULL;
+
+	for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) {
+		p = (char *)ctx + cdc_ncm_gstrings_stats[i].stat_offset;
+		data[i] = (cdc_ncm_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+	}
+}
+
+static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 stringset, u8 *data)
+{
+	u8 *p = data;
+	int i;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) {
+			memcpy(p, cdc_ncm_gstrings_stats[i].stat_string, ETH_GSTRING_LEN);
+			p += ETH_GSTRING_LEN;
+		}
+	}
+}
+
 static int cdc_ncm_get_coalesce(struct net_device *netdev,
 				struct ethtool_coalesce *ec)
 {
@@ -122,6 +184,9 @@ static const struct ethtool_ops cdc_ncm_ethtool_ops = {
 	.get_msglevel      = usbnet_get_msglevel,
 	.set_msglevel      = usbnet_set_msglevel,
 	.get_ts_info       = ethtool_op_get_ts_info,
+	.get_sset_count    = cdc_ncm_get_sset_count,
+	.get_strings       = cdc_ncm_get_strings,
+	.get_ethtool_stats = cdc_ncm_get_ethtool_stats,
 	.get_coalesce      = cdc_ncm_get_coalesce,
 	.set_coalesce      = cdc_ncm_set_coalesce,
 };
@@ -862,6 +927,9 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
 		/* count total number of frames in this NTB */
 		ctx->tx_curr_frame_num = 0;
+
+		/* recent payload counter for this skb_out */
+		ctx->tx_curr_frame_payload = 0;
 	}
 
 	for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) {
@@ -899,6 +967,7 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 				ctx->tx_rem_sign = sign;
 				skb = NULL;
 				ready2send = 1;
+				ctx->tx_reason_ntb_full++;	/* count reason for transmitting */
 			}
 			break;
 		}
@@ -912,12 +981,14 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len);
 		ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16));
 		memcpy(skb_put(skb_out, skb->len), skb->data, skb->len);
+		ctx->tx_curr_frame_payload += skb->len;	/* count real tx payload data */
 		dev_kfree_skb_any(skb);
 		skb = NULL;
 
 		/* send now if this NDP is full */
 		if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) {
 			ready2send = 1;
+			ctx->tx_reason_ndp_full++;	/* count reason for transmitting */
 			break;
 		}
 	}
@@ -947,6 +1018,8 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 		goto exit_no_skb;
 
 	} else {
+		if (n == ctx->tx_max_datagrams)
+			ctx->tx_reason_max_datagram++;	/* count reason for transmitting */
 		/* frame goes out */
 		/* variables will be reset at next call */
 	}
@@ -974,6 +1047,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 	/* return skb */
 	ctx->tx_curr_skb = NULL;
 	dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
+
+	/* keep private stats: framing overhead and number of NTBs */
+	ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
+	ctx->tx_ntbs++;
+
+	/* usbnet has already counted all the framing overhead.
+	 * Adjust the stats so that the tx_bytes counter show real
+	 * payload data instead.
+	 */
+	dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
+
 	return skb_out;
 
 exit_no_skb:
@@ -1014,6 +1098,7 @@ static void cdc_ncm_txpath_bh(unsigned long param)
 		cdc_ncm_tx_timeout_start(ctx);
 		spin_unlock_bh(&ctx->mtx);
 	} else if (dev->net != NULL) {
+		ctx->tx_reason_timeout++;	/* count reason for transmitting */
 		spin_unlock_bh(&ctx->mtx);
 		netif_tx_lock_bh(dev->net);
 		usbnet_start_xmit(NULL, dev->net);
@@ -1149,6 +1234,7 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in)
 	struct usb_cdc_ncm_dpe16 *dpe16;
 	int ndpoffset;
 	int loopcount = 50; /* arbitrary max preventing infinite loop */
+	u32 payload = 0;
 
 	ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in);
 	if (ndpoffset < 0)
@@ -1201,6 +1287,7 @@ next_ndp:
 			skb->data = ((u8 *)skb_in->data) + offset;
 			skb_set_tail_pointer(skb, len);
 			usbnet_skb_return(dev, skb);
+			payload += len;	/* count payload bytes in this NTB */
 		}
 	}
 err_ndp:
@@ -1209,6 +1296,10 @@ err_ndp:
 	if (ndpoffset && loopcount--)
 		goto next_ndp;
 
+	/* update stats */
+	ctx->rx_overhead += skb_in->len - payload;
+	ctx->rx_ntbs++;
+
 	return 1;
 error:
 	return 0;
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 79de6724d398..88d2d7f1820f 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -116,6 +116,17 @@ struct cdc_ncm_ctx {
 	u16 rx_seq;
 	u16 connected;
 	u16 min_tx_pkt;
+
+	/* statistics */
+	u32 tx_curr_frame_payload;
+	u32 tx_reason_ntb_full;
+	u32 tx_reason_ndp_full;
+	u32 tx_reason_timeout;
+	u32 tx_reason_max_datagram;
+	u64 tx_overhead;
+	u64 tx_ntbs;
+	u64 rx_overhead;
+	u64 rx_ntbs;
 };
 
 u8 cdc_ncm_select_altsetting(struct usb_interface *intf);
-- 
cgit 


From 50f1cb1cc8f50fa88dbeaf990ae2bae91b9ff306 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:26 +0200
Subject: net: cdc_ncm: use sane defaults for rx/tx buffers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lots of devices request much larger buffers than reasonable. This
cause real problems for users of hosts with limited resources.

Reducing the default buffer size to 16kB for such devices is
a reasonable trade-off between allowing them to aggregate traffic
and avoiding memory exhaustion on resource restrained hosts.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 12 ++++++++++--
 include/linux/usb/cdc_ncm.h |  4 ++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index f4b439847d04..bb53abe1f3a1 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -492,10 +492,18 @@ static void cdc_ncm_fix_modulus(struct usbnet *dev)
 static int cdc_ncm_setup(struct usbnet *dev)
 {
 	struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
+	u32 def_rx, def_tx;
+
+	/* be conservative when selecting intial buffer size to
+	 * increase the number of hosts this will work for
+	 */
+	def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX,
+		       le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize));
+	def_tx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_TX,
+		       le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
 
 	/* clamp rx_max and tx_max and inform device */
-	cdc_ncm_update_rxtx_max(dev, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize),
-				le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize));
+	cdc_ncm_update_rxtx_max(dev, def_rx, def_tx);
 
 	/* sanitize the modulus and remainder values */
 	cdc_ncm_fix_modulus(dev);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 88d2d7f1820f..cde506731c48 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -52,6 +52,10 @@
 #define	CDC_NCM_NTB_MAX_SIZE_TX			32768	/* bytes */
 #define	CDC_NCM_NTB_MAX_SIZE_RX			32768	/* bytes */
 
+/* Initial NTB length */
+#define	CDC_NCM_NTB_DEF_SIZE_TX			16384	/* bytes */
+#define	CDC_NCM_NTB_DEF_SIZE_RX			16384	/* bytes */
+
 /* Minimum value for MaxDatagramSize, ch. 6.2.9 */
 #define	CDC_NCM_MIN_DATAGRAM_SIZE		1514	/* bytes */
 
-- 
cgit 


From fa83dbeee55865678025b6c1637ca08860209f87 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Fri, 16 May 2014 21:48:28 +0200
Subject: net: cdc_ncm: remove redundant "disconnected" flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calling netif_carrier_{on,off} is sufficient.  There is no need
to duplicate the carrier state in a driver specific flag.

Acked-by: Enrico Mioso <mrkiko.rs@gmail.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c        | 19 ++-----------------
 drivers/net/usb/huawei_cdc_ncm.c | 13 -------------
 include/linux/usb/cdc_ncm.h      |  1 -
 3 files changed, 2 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 1d1ff2fa8ae1..783c4ed96395 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -1364,11 +1364,10 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 		 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be
 		 * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE.
 		 */
-		ctx->connected = le16_to_cpu(event->wValue);
 		netif_info(dev, link, dev->net,
 			   "network connection: %sconnected\n",
-			   ctx->connected ? "" : "dis");
-		usbnet_link_change(dev, ctx->connected, 0);
+			   !!event->wValue ? "" : "dis");
+		usbnet_link_change(dev, !!event->wValue, 0);
 		break;
 
 	case USB_CDC_NOTIFY_SPEED_CHANGE:
@@ -1388,23 +1387,11 @@ static void cdc_ncm_status(struct usbnet *dev, struct urb *urb)
 	}
 }
 
-static int cdc_ncm_check_connect(struct usbnet *dev)
-{
-	struct cdc_ncm_ctx *ctx;
-
-	ctx = (struct cdc_ncm_ctx *)dev->data[0];
-	if (ctx == NULL)
-		return 1;	/* disconnected */
-
-	return !ctx->connected;
-}
-
 static const struct driver_info cdc_ncm_info = {
 	.description = "CDC NCM",
 	.flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
@@ -1418,7 +1405,6 @@ static const struct driver_info wwan_info = {
 			| FLAG_WWAN,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
@@ -1432,7 +1418,6 @@ static const struct driver_info wwan_noarp_info = {
 			| FLAG_WWAN | FLAG_NOARP,
 	.bind = cdc_ncm_bind,
 	.unbind = cdc_ncm_unbind,
-	.check_connect = cdc_ncm_check_connect,
 	.manage_power = usbnet_manage_power,
 	.status = cdc_ncm_status,
 	.rx_fixup = cdc_ncm_rx_fixup,
diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 312178d7b698..f9822bc75425 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -172,24 +172,11 @@ err:
 	return ret;
 }
 
-static int huawei_cdc_ncm_check_connect(struct usbnet *usbnet_dev)
-{
-	struct cdc_ncm_ctx *ctx;
-
-	ctx = (struct cdc_ncm_ctx *)usbnet_dev->data[0];
-
-	if (ctx == NULL)
-		return 1; /* disconnected */
-
-	return !ctx->connected;
-}
-
 static const struct driver_info huawei_cdc_ncm_info = {
 	.description = "Huawei CDC NCM device",
 	.flags = FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_WWAN,
 	.bind = huawei_cdc_ncm_bind,
 	.unbind = huawei_cdc_ncm_unbind,
-	.check_connect = huawei_cdc_ncm_check_connect,
 	.manage_power = huawei_cdc_ncm_manage_power,
 	.rx_fixup = cdc_ncm_rx_fixup,
 	.tx_fixup = cdc_ncm_tx_fixup,
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index cde506731c48..8c5e38819828 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -118,7 +118,6 @@ struct cdc_ncm_ctx {
 	u16 tx_ndp_modulus;
 	u16 tx_seq;
 	u16 rx_seq;
-	u16 connected;
 	u16 min_tx_pkt;
 
 	/* statistics */
-- 
cgit 


From a17597d3b418ca5a394d14724ccfc295cb3186c8 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Fri, 16 May 2014 11:42:43 +0930
Subject: virtio-rng: fixes for device registration/unregistration

There are several fixes in this patch (mostly because it's hard
splitting them up):

 - Revert the name field in struct hwrng back to 'const'. Also, don't
do an extra kmalloc for the name - just wasteful.
 - Deal with allocation failures properly.
 - Use IDA to allocate device number instead of brute forcing one.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/char/hw_random/virtio-rng.c | 41 +++++++++++++++++++++----------------
 include/linux/hw_random.h           |  2 +-
 2 files changed, 24 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 5b25daa7f798..f3e71501de54 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -25,6 +25,7 @@
 #include <linux/virtio_rng.h>
 #include <linux/module.h>
 
+static DEFINE_IDA(rng_index_ida);
 
 struct virtrng_info {
 	struct virtio_device *vdev;
@@ -33,6 +34,8 @@ struct virtrng_info {
 	unsigned int data_avail;
 	struct completion have_data;
 	bool busy;
+	char name[25];
+	int index;
 };
 
 static void random_recv_done(struct virtqueue *vq)
@@ -92,41 +95,45 @@ static void virtio_cleanup(struct hwrng *rng)
 
 static int probe_common(struct virtio_device *vdev)
 {
-	int err, i;
+	int err, index;
 	struct virtrng_info *vi = NULL;
 
 	vi = kzalloc(sizeof(struct virtrng_info), GFP_KERNEL);
-	vi->hwrng.name = kmalloc(40, GFP_KERNEL);
+	if (!vi)
+		return -ENOMEM;
+
+	vi->index = index = ida_simple_get(&rng_index_ida, 0, 0, GFP_KERNEL);
+	if (index < 0) {
+		kfree(vi);
+		return index;
+	}
+	sprintf(vi->name, "virtio_rng.%d", index);
 	init_completion(&vi->have_data);
 
-	vi->hwrng.read = virtio_read;
-	vi->hwrng.cleanup = virtio_cleanup;
-	vi->hwrng.priv = (unsigned long)vi;
+	vi->hwrng = (struct hwrng) {
+		.read = virtio_read,
+		.cleanup = virtio_cleanup,
+		.priv = (unsigned long)vi,
+		.name = vi->name,
+	};
 	vdev->priv = vi;
 
 	/* We expect a single virtqueue. */
 	vi->vq = virtio_find_single_vq(vdev, random_recv_done, "input");
 	if (IS_ERR(vi->vq)) {
 		err = PTR_ERR(vi->vq);
-		kfree(vi->hwrng.name);
 		vi->vq = NULL;
 		kfree(vi);
-		vi = NULL;
+		ida_simple_remove(&rng_index_ida, index);
 		return err;
 	}
 
-	i = 0;
-	do {
-		sprintf(vi->hwrng.name, "virtio_rng.%d", i++);
-		err = hwrng_register(&vi->hwrng);
-	} while (err == -EEXIST);
-
+	err = hwrng_register(&vi->hwrng);
 	if (err) {
 		vdev->config->del_vqs(vdev);
-		kfree(vi->hwrng.name);
 		vi->vq = NULL;
 		kfree(vi);
-		vi = NULL;
+		ida_simple_remove(&rng_index_ida, index);
 		return err;
 	}
 
@@ -140,10 +147,8 @@ static void remove_common(struct virtio_device *vdev)
 	vi->busy = false;
 	hwrng_unregister(&vi->hwrng);
 	vdev->config->del_vqs(vdev);
-	kfree(vi->hwrng.name);
-	vi->vq = NULL;
+	ida_simple_remove(&rng_index_ida, vi->index);
 	kfree(vi);
-	vi = NULL;
 }
 
 static int virtrng_probe(struct virtio_device *vdev)
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 02d9c87be54c..b4b0eef5fddf 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -31,7 +31,7 @@
  * @priv:		Private data, for use by the RNG driver.
  */
 struct hwrng {
-	char *name;
+	const char *name;
 	int (*init)(struct hwrng *rng);
 	void (*cleanup)(struct hwrng *rng);
 	int (*data_present)(struct hwrng *rng, int wait);
-- 
cgit 


From 678e30df2e5664619e06fcfea5490a476826d8fe Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 19 May 2014 01:25:59 +0100
Subject: ethtool: Expand documentation of ethtool_ops::{get,set}_rxfh()

Some corner-cases are not explained properly.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 include/linux/ethtool.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 212f537fc686..886e127d51a6 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -162,15 +162,16 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	Will not be called if @get_rxfh_indir_size returns zero.
  * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
  *	key.
- *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
- *	returns zero.
+ *	Will only be called if one or both of @get_rxfh_indir_size and
+ *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
- * @set_rxfh: Set the contents of the RX flow hash indirection table and
- *	hash key.
- *	Will not be called if @get_rxfh_indir_size and @get_rxfh_key_size
- *	returns zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
+ *	hash key.  Either or both arguments may be %NULL if that attribute
+ *	is not to be changed.
+ *	Will only be called if one or both of @get_rxfh_indir_size and
+ *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -244,8 +245,8 @@ struct ethtool_ops {
 	int	(*reset)(struct net_device *, u32 *);
 	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
-	int	(*get_rxfh)(struct net_device *, u32 *, u8 *);
-	int	(*set_rxfh)(struct net_device *, u32 *, u8 *);
+	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
+	int	(*set_rxfh)(struct net_device *, u32 *indir, u8 *key);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
-- 
cgit 


From 61d88c6811f216de4ec26aafe24e650dc1aeb00e Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 19 May 2014 01:29:42 +0100
Subject: ethtool: Disallow ETHTOOL_SRSSH with both indir table and hash key
 unchanged

This would be a no-op, so there is no reason to request it.

This also allows conversion of the current implementations of
ethtool_ops::{get,set}_rxfh_indir to ethtool_ops::{get,set}_rxfh
with no change other than their parameters.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 include/linux/ethtool.h | 4 ++--
 net/core/ethtool.c      | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 886e127d51a6..de687a97c6e7 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -168,8 +168,8 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
  *	Will not be called if @get_rxfh_indir_size returns zero.
  * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
- *	hash key.  Either or both arguments may be %NULL if that attribute
- *	is not to be changed.
+ *	hash key.  In case only the indirection table or hash key is to be
+ *	changed, the other argument will be %NULL.
  *	Will only be called if one or both of @get_rxfh_indir_size and
  *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 7156fe5ca876..b8857348bdf3 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -802,11 +802,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		return -EFAULT;
 
 	/* If either indir or hash key is valid, proceed further.
+	 * It is not valid to request that both be unchanged.
 	 */
 	if ((user_indir_size &&
 	     user_indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
 	     user_indir_size != dev_indir_size) ||
-	    (user_key_size && (user_key_size != dev_key_size)))
+	    (user_key_size && (user_key_size != dev_key_size)) ||
+	    (user_indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
+	     user_key_size == 0))
 		return -EINVAL;
 
 	if (user_indir_size != ETH_RXFH_INDIR_NO_CHANGE)
-- 
cgit 


From 33cb0fa7888510b5bd2096352b200cfe29db10fe Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 15 May 2014 02:01:23 +0100
Subject: ethtool, be2net: constify array pointer parameters to
 ethtool_ops::set_rxfh

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c    | 2 +-
 drivers/net/ethernet/emulex/benet/be_cmds.h    | 2 +-
 drivers/net/ethernet/emulex/benet/be_ethtool.c | 3 ++-
 include/linux/ethtool.h                        | 3 ++-
 4 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 476752d0a6a4..7b59da241ccb 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -2033,7 +2033,7 @@ int be_cmd_reset_function(struct be_adapter *adapter)
 }
 
 int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
-		      u32 rss_hash_opts, u16 table_size, u8 *rss_hkey)
+		      u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey)
 {
 	struct be_mcc_wrb *wrb;
 	struct be_cmd_req_rss_config *req;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 228d4b611084..451f3138b8fb 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2068,7 +2068,7 @@ int be_cmd_query_fw_cfg(struct be_adapter *adapter, u32 *port_num,
 			u32 *function_mode, u32 *function_caps, u16 *asic_rev);
 int be_cmd_reset_function(struct be_adapter *adapter);
 int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable,
-		      u32 rss_hash_opts, u16 table_size, u8 *rss_hkey);
+		      u32 rss_hash_opts, u16 table_size, const u8 *rss_hkey);
 int be_process_mcc(struct be_adapter *adapter);
 int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, u8 beacon,
 			    u8 status, u8 state);
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 970ae337daac..e2da4d20dd3d 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1117,7 +1117,8 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
 	return 0;
 }
 
-static int be_set_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
+static int be_set_rxfh(struct net_device *netdev, const u32 *indir,
+		       const u8 *hkey)
 {
 	int rc = 0, i, j;
 	struct be_adapter *adapter = netdev_priv(netdev);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de687a97c6e7..874fde01d398 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -246,7 +246,8 @@ struct ethtool_ops {
 	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
 	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
-	int	(*set_rxfh)(struct net_device *, u32 *indir, u8 *key);
+	int	(*set_rxfh)(struct net_device *, const u32 *indir,
+			    const u8 *key);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
-- 
cgit 


From b8802f76fe473d91886220498aeda157c492f2d1 Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@linaro.org>
Date: Sun, 11 May 2014 16:05:58 +0800
Subject: irqchip: gic: Use mask field in GICC_IAR

Bit[9:0] is interrupt ID field in GICC_IAR. Bit[12:10] is CPU ID field,
and others are reserved.

So we should use GICC_IAR_INT_ID_MASK to get interrupt ID. It's not a good way
to use ~0x1c00 (CPU ID field) to get interrupt ID.

Signed-off-by: Haojian Zhuang <haojian.zhuang@linaro.org>
Link: https://lkml.kernel.org/r/1399795571-17231-3-git-send-email-haojian.zhuang@linaro.org
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic.c       | 2 +-
 include/linux/irqchip/arm-gic.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4300b6606f5e..f711fb6af7a9 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -287,7 +287,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 
 	do {
 		irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
-		irqnr = irqstat & ~0x1c00;
+		irqnr = irqstat & GICC_IAR_INT_ID_MASK;
 
 		if (likely(irqnr > 15 && irqnr < 1021)) {
 			irqnr = irq_find_mapping(gic->domain, irqnr);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 7ed92d0560d5..45e2d8c15bd2 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -21,6 +21,8 @@
 #define GIC_CPU_ACTIVEPRIO		0xd0
 #define GIC_CPU_IDENT			0xfc
 
+#define GICC_IAR_INT_ID_MASK		0x3ff
+
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
 #define GIC_DIST_IGROUP			0x080
-- 
cgit 


From d3091298570006fa538ec9beacbfb1098964962e Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@ravnborg.org>
Date: Fri, 16 May 2014 23:26:05 +0200
Subject: sparc: fix sparse warnings in smp_32.c + smp_64.c

Fix following warnings:
smp_32.c:177:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static?
smp_64.c:1202:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static?
smp_64.c:989:6: warning: symbol 'kgdb_roundup_cpus' was not declared. Should it be static?

Add prototype to include/linux/profile.h of setup_profiling_timer
Add missing include to smp_64.c

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_32.c | 1 +
 arch/sparc/kernel/smp_64.c | 1 +
 include/linux/profile.h    | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 9d3297d8d730..7958242d63c5 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/cache.h>
 #include <linux/delay.h>
+#include <linux/profile.h>
 #include <linux/cpu.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index df91e78dbd95..afc71bf719b1 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -25,6 +25,7 @@
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
+#include <linux/kgdb.h>
 
 #include <asm/head.h>
 #include <asm/ptrace.h>
diff --git a/include/linux/profile.h b/include/linux/profile.h
index aaad3861beb8..b537a25ffa17 100644
--- a/include/linux/profile.h
+++ b/include/linux/profile.h
@@ -44,6 +44,7 @@ extern int prof_on __read_mostly;
 int profile_init(void);
 int profile_setup(char *str);
 void profile_tick(int type);
+int setup_profiling_timer(unsigned int multiplier);
 
 /*
  * Add multiple profiler hits to a given address:
-- 
cgit 


From 3b36fbb01dc50f58e7803006f5a99683daf26c8c Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Sun, 18 May 2014 22:44:35 -0700
Subject: Input: pixcir_i2c_ts - initialize interrupt mode and power mode

Introduce helper functions to configure power and interrupt registers.
Default to IDLE mode on probe as device supports auto wakeup to ACVIE mode
on detecting finger touch.

Configure interrupt mode and polarity on start up.  Power down on device
closure or module removal.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/pixcir_i2c_ts.c | 182 ++++++++++++++++++++++++++++--
 include/linux/input/pixcir_ts.h           |  42 +++++++
 2 files changed, 216 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 5d36243bdc79..6c6f6dacb858 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -29,7 +29,7 @@ struct pixcir_i2c_ts_data {
 	struct i2c_client *client;
 	struct input_dev *input;
 	const struct pixcir_ts_platform_data *chip;
-	bool exiting;
+	bool running;
 };
 
 static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data)
@@ -88,7 +88,7 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 {
 	struct pixcir_i2c_ts_data *tsdata = dev_id;
 
-	while (!tsdata->exiting) {
+	while (tsdata->running) {
 		pixcir_ts_poscheck(tsdata);
 
 		if (tsdata->chip->attb_read_val())
@@ -100,6 +100,164 @@ static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static int pixcir_set_power_mode(struct pixcir_i2c_ts_data *ts,
+				 enum pixcir_power_mode mode)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_POWER_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~PIXCIR_POWER_MODE_MASK;
+	ret |= mode;
+
+	/* Always AUTO_IDLE */
+	ret |= PIXCIR_POWER_ALLOW_IDLE;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_POWER_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_POWER_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Set the interrupt mode for the device i.e. ATTB line behaviour
+ *
+ * @polarity : 1 for active high, 0 for active low.
+ */
+static int pixcir_set_int_mode(struct pixcir_i2c_ts_data *ts,
+			       enum pixcir_int_mode mode, bool polarity)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	ret &= ~PIXCIR_INT_MODE_MASK;
+	ret |= mode;
+
+	if (polarity)
+		ret |= PIXCIR_INT_POL_HIGH;
+	else
+		ret &= ~PIXCIR_INT_POL_HIGH;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Enable/disable interrupt generation
+ */
+static int pixcir_int_enable(struct pixcir_i2c_ts_data *ts, bool enable)
+{
+	struct device *dev = &ts->client->dev;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(ts->client, PIXCIR_REG_INT_MODE);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't read reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	if (enable)
+		ret |= PIXCIR_INT_ENABLE;
+	else
+		ret &= ~PIXCIR_INT_ENABLE;
+
+	ret = i2c_smbus_write_byte_data(ts->client, PIXCIR_REG_INT_MODE, ret);
+	if (ret < 0) {
+		dev_err(dev, "%s: can't write reg 0x%x : %d\n",
+			__func__, PIXCIR_REG_INT_MODE, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int pixcir_start(struct pixcir_i2c_ts_data *ts)
+{
+	struct device *dev = &ts->client->dev;
+	int error;
+
+	/* LEVEL_TOUCH interrupt with active low polarity */
+	error = pixcir_set_int_mode(ts, PIXCIR_INT_LEVEL_TOUCH, 0);
+	if (error) {
+		dev_err(dev, "Failed to set interrupt mode: %d\n", error);
+		return error;
+	}
+
+	ts->running = true;
+	mb();	/* Update status before IRQ can fire */
+
+	/* enable interrupt generation */
+	error = pixcir_int_enable(ts, true);
+	if (error) {
+		dev_err(dev, "Failed to enable interrupt generation: %d\n",
+			error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int pixcir_stop(struct pixcir_i2c_ts_data *ts)
+{
+	int error;
+
+	/* Disable interrupt generation */
+	error = pixcir_int_enable(ts, false);
+	if (error) {
+		dev_err(&ts->client->dev,
+			"Failed to disable interrupt generation: %d\n",
+			error);
+		return error;
+	}
+
+	/* Exit ISR if running, no more report parsing */
+	ts->running = false;
+	mb();	/* update status before we synchronize irq */
+
+	/* Wait till running ISR is complete */
+	synchronize_irq(ts->client->irq);
+
+	return 0;
+}
+
+static int pixcir_input_open(struct input_dev *dev)
+{
+	struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev);
+
+	return pixcir_start(ts);
+}
+
+static void pixcir_input_close(struct input_dev *dev)
+{
+	struct pixcir_i2c_ts_data *ts = input_get_drvdata(dev);
+
+	pixcir_stop(ts);
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int pixcir_i2c_ts_suspend(struct device *dev)
 {
@@ -156,6 +314,8 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 	input->name = client->name;
 	input->id.bustype = BUS_I2C;
+	input->open = pixcir_input_open;
+	input->close = pixcir_input_close;
 	input->dev.parent = &client->dev;
 
 	__set_bit(EV_KEY, input->evbit);
@@ -176,11 +336,22 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 		return error;
 	}
 
+	/* Always be in IDLE mode to save power, device supports auto wake */
+	error = pixcir_set_power_mode(tsdata, PIXCIR_POWER_IDLE);
+	if (error) {
+		dev_err(dev, "Failed to set IDLE mode\n");
+		return error;
+	}
+
+	/* Stop device till opened */
+	error = pixcir_stop(tsdata);
+	if (error)
+		return error;
+
 	error = input_register_device(input);
 	if (error)
 		return error;
 
-	i2c_set_clientdata(client, tsdata);
 	device_init_wakeup(&client->dev, 1);
 
 	return 0;
@@ -188,13 +359,8 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 static int pixcir_i2c_ts_remove(struct i2c_client *client)
 {
-	struct pixcir_i2c_ts_data *tsdata = i2c_get_clientdata(client);
-
 	device_init_wakeup(&client->dev, 0);
 
-	tsdata->exiting = true;
-	mb();
-
 	return 0;
 }
 
diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
index 7163d91c0373..7942804464d3 100644
--- a/include/linux/input/pixcir_ts.h
+++ b/include/linux/input/pixcir_ts.h
@@ -1,6 +1,48 @@
 #ifndef	_PIXCIR_I2C_TS_H
 #define	_PIXCIR_I2C_TS_H
 
+/*
+ * Register map
+ */
+#define PIXCIR_REG_POWER_MODE	51
+#define PIXCIR_REG_INT_MODE	52
+
+/*
+ * Power modes:
+ * active: max scan speed
+ * idle: lower scan speed with automatic transition to active on touch
+ * halt: datasheet says sleep but this is more like halt as the chip
+ *       clocks are cut and it can only be brought out of this mode
+ *	 using the RESET pin.
+ */
+enum pixcir_power_mode {
+	PIXCIR_POWER_ACTIVE,
+	PIXCIR_POWER_IDLE,
+	PIXCIR_POWER_HALT,
+};
+
+#define PIXCIR_POWER_MODE_MASK	0x03
+#define PIXCIR_POWER_ALLOW_IDLE (1UL << 2)
+
+/*
+ * Interrupt modes:
+ * periodical: interrupt is asserted periodicaly
+ * diff coordinates: interrupt is asserted when coordinates change
+ * level on touch: interrupt level asserted during touch
+ * pulse on touch: interrupt pulse asserted druing touch
+ *
+ */
+enum pixcir_int_mode {
+	PIXCIR_INT_PERIODICAL,
+	PIXCIR_INT_DIFF_COORD,
+	PIXCIR_INT_LEVEL_TOUCH,
+	PIXCIR_INT_PULSE_TOUCH,
+};
+
+#define PIXCIR_INT_MODE_MASK	0x03
+#define PIXCIR_INT_ENABLE	(1UL << 3)
+#define PIXCIR_INT_POL_HIGH	(1UL << 2)
+
 struct pixcir_ts_platform_data {
 	int (*attb_read_val)(void);
 	int x_max;
-- 
cgit 


From 0dfc8d41bfa091a61354eea73199a5af0eaae9c0 Mon Sep 17 00:00:00 2001
From: Roger Quadros <rogerq@ti.com>
Date: Sun, 18 May 2014 22:46:43 -0700
Subject: Input: pixcir_i2c_ts - get rid of pdata->attb_read_val()

Get rid of the attb_read_val() platform hook. Instead, read the ATTB gpio
directly from the driver.

Fail if valid ATTB gpio is not provided by patform data.

Signed-off-by: Roger Quadros <rogerq@ti.com>
Acked-by: Mugunthan V N <mugunthanvnm@ti.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/pixcir_i2c_ts.c | 16 +++++++++++++++-
 include/linux/input/pixcir_ts.h           |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/pixcir_i2c_ts.c b/drivers/input/touchscreen/pixcir_i2c_ts.c
index 6c6f6dacb858..f2c0ae18e24a 100644
--- a/drivers/input/touchscreen/pixcir_i2c_ts.c
+++ b/drivers/input/touchscreen/pixcir_i2c_ts.c
@@ -24,6 +24,7 @@
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/input/pixcir_ts.h>
+#include <linux/gpio.h>
 
 struct pixcir_i2c_ts_data {
 	struct i2c_client *client;
@@ -87,11 +88,12 @@ static void pixcir_ts_poscheck(struct pixcir_i2c_ts_data *data)
 static irqreturn_t pixcir_ts_isr(int irq, void *dev_id)
 {
 	struct pixcir_i2c_ts_data *tsdata = dev_id;
+	const struct pixcir_ts_platform_data *pdata = tsdata->chip;
 
 	while (tsdata->running) {
 		pixcir_ts_poscheck(tsdata);
 
-		if (tsdata->chip->attb_read_val())
+		if (gpio_get_value(pdata->gpio_attb))
 			break;
 
 		msleep(20);
@@ -298,6 +300,11 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 		return -EINVAL;
 	}
 
+	if (!gpio_is_valid(pdata->gpio_attb)) {
+		dev_err(dev, "Invalid gpio_attb in pdata\n");
+		return -EINVAL;
+	}
+
 	tsdata = devm_kzalloc(dev, sizeof(*tsdata), GFP_KERNEL);
 	if (!tsdata)
 		return -ENOMEM;
@@ -328,6 +335,13 @@ static int pixcir_i2c_ts_probe(struct i2c_client *client,
 
 	input_set_drvdata(input, tsdata);
 
+	error = devm_gpio_request_one(dev, pdata->gpio_attb,
+				      GPIOF_DIR_IN, "pixcir_i2c_attb");
+	if (error) {
+		dev_err(dev, "Failed to request ATTB gpio\n");
+		return error;
+	}
+
 	error = devm_request_threaded_irq(dev, client->irq, NULL, pixcir_ts_isr,
 					  IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 					  client->name, tsdata);
diff --git a/include/linux/input/pixcir_ts.h b/include/linux/input/pixcir_ts.h
index 7942804464d3..160cf353aa39 100644
--- a/include/linux/input/pixcir_ts.h
+++ b/include/linux/input/pixcir_ts.h
@@ -44,9 +44,9 @@ enum pixcir_int_mode {
 #define PIXCIR_INT_POL_HIGH	(1UL << 2)
 
 struct pixcir_ts_platform_data {
-	int (*attb_read_val)(void);
 	int x_max;
 	int y_max;
+	int gpio_attb;		/* GPIO connected to ATTB line */
 };
 
 #endif
-- 
cgit 


From 2cefdb1f0a27150755ef2730bafc58bf2ed16571 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 22:59:20 -0700
Subject: Input: atmel_mxt_ts - remove unnecessary platform data

It is not necessary to download these values to the maXTouch chip on every
probe, since they are stored in NVRAM. It makes life difficult when tuning
the device to keep them in sync with the config array/file, and requires a
new kernel build for minor tweaks.

These parameters only represent a tiny subset of the available
configuration options, tracking all of these options in platform data would
be a endless task. In addition, different versions of maXTouch chips may
have these values in different places or may not even have them at all.

Having these values also makes life more complex for device tree and other
platforms where having to define a static configuration isn't helpful.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-s5pv210/mach-goni.c         |  5 ----
 drivers/input/touchscreen/atmel_mxt_ts.c  | 50 -------------------------------
 drivers/platform/chrome/chromeos_laptop.c | 10 -------
 include/linux/i2c/atmel_mxt_ts.h          |  6 +---
 4 files changed, 1 insertion(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index b41a38a75844..e549ecf0e5dc 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -239,13 +239,8 @@ static void __init goni_radio_init(void)
 
 /* TSP */
 static struct mxt_platform_data qt602240_platform_data = {
-	.x_line		= 17,
-	.y_line		= 11,
 	.x_size		= 800,
 	.y_size		= 480,
-	.blen		= 0x21,
-	.threshold	= 0x28,
-	.voltage	= 2800000,              /* 2.8V */
 	.orient		= MXT_DIAGONAL,
 	.irqflags	= IRQF_TRIGGER_FALLING,
 };
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index a70400754e92..7eb515caf215 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -685,54 +685,6 @@ static int mxt_make_highchg(struct mxt_data *data)
 	return 0;
 }
 
-static void mxt_handle_pdata(struct mxt_data *data)
-{
-	const struct mxt_platform_data *pdata = data->pdata;
-	u8 voltage;
-
-	/* Set touchscreen lines */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE,
-			pdata->x_line);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE,
-			pdata->y_line);
-
-	/* Set touchscreen orient */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT,
-			pdata->orient);
-
-	/* Set touchscreen burst length */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_BLEN, pdata->blen);
-
-	/* Set touchscreen threshold */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_TCHTHR, pdata->threshold);
-
-	/* Set touchscreen resolution */
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff);
-	mxt_write_object(data, MXT_TOUCH_MULTI_T9,
-			MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8);
-
-	/* Set touchscreen voltage */
-	if (pdata->voltage) {
-		if (pdata->voltage < MXT_VOLTAGE_DEFAULT) {
-			voltage = (MXT_VOLTAGE_DEFAULT - pdata->voltage) /
-				MXT_VOLTAGE_STEP;
-			voltage = 0xff - voltage + 1;
-		} else
-			voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) /
-				MXT_VOLTAGE_STEP;
-
-		mxt_write_object(data, MXT_SPT_CTECONFIG_T28,
-				MXT_CTE_VOLTAGE, voltage);
-	}
-}
-
 static int mxt_get_info(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
@@ -840,8 +792,6 @@ static int mxt_initialize(struct mxt_data *data)
 	if (error)
 		goto err_free_object_table;
 
-	mxt_handle_pdata(data);
-
 	/* Backup to memory */
 	mxt_write_object(data, MXT_GEN_COMMAND_T6,
 			MXT_COMMAND_BACKUPNV,
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 7f3aad0e115c..2559a0407c58 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -85,13 +85,8 @@ static struct i2c_board_info tsl2563_als_device = {
 };
 
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
-	.x_line			= 18,
-	.y_line			= 12,
 	.x_size			= 102*20,
 	.y_size			= 68*20,
-	.blen			= 0x80,	/* Gain setting is in upper 4 bits */
-	.threshold		= 0x32,
-	.voltage		= 0,	/* 3.3V */
 	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.is_tp			= true,
@@ -110,13 +105,8 @@ static struct i2c_board_info atmel_224s_tp_device = {
 };
 
 static struct mxt_platform_data atmel_1664s_platform_data = {
-	.x_line			= 32,
-	.y_line			= 50,
 	.x_size			= 1700,
 	.y_size			= 2560,
-	.blen			= 0x89,	/* Gain setting is in upper 4 bits */
-	.threshold		= 0x28,
-	.voltage		= 0,	/* 3.3V */
 	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.is_tp			= false,
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index 99e379b74398..eff0cdc08843 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -33,14 +33,10 @@ struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
 
-	unsigned int x_line;
-	unsigned int y_line;
 	unsigned int x_size;
 	unsigned int y_size;
-	unsigned int blen;
-	unsigned int threshold;
-	unsigned int voltage;
 	unsigned char orient;
+
 	unsigned long irqflags;
 	bool is_tp;
 	const unsigned int key_map[MXT_NUM_GPIO];
-- 
cgit 


From fb5e4c3ee140b29e1935b4bbb19c319177bed231 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 23:00:15 -0700
Subject: Input: atmel_mxt_ts - improve T19 GPIO keys handling

 * The mapping of the GPIO numbers into the T19 status byte varies between
   different maXTouch chips. Some have up to 7 GPIOs. Allowing a keycode array
   of up to 8 items is simpler and more generic. So replace #define with
   configurable number of keys which also allows the removal of is_tp.
 * Rename platform data parameters to include "t19" to prevent confusion with
   T15 key array.
 * Probe aborts early on when pdata is NULL, so no need to check.
 * Move "int i" to beginning of function (mixed declarations and code)
 * Use API calls rather than __set_bit()
 * Remove unused dev variable.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Yufeng Shen <miletus@chromium.org>
Reviewed-by: Henrik Rydberg <rydberg@euromail.se>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c  | 44 ++++++++++++-------------------
 drivers/platform/chrome/chromeos_laptop.c | 17 +++++++-----
 include/linux/i2c/atmel_mxt_ts.h          |  7 ++---
 3 files changed, 30 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 7eb515caf215..65df362cf327 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -180,12 +180,6 @@
 
 #define MXT_FWRESET_TIME	175	/* msec */
 
-/* MXT_SPT_GPIOPWM_T19 field */
-#define MXT_GPIO0_MASK		0x04
-#define MXT_GPIO1_MASK		0x08
-#define MXT_GPIO2_MASK		0x10
-#define MXT_GPIO3_MASK		0x20
-
 /* Command to unlock bootloader */
 #define MXT_UNLOCK_CMD_MSB	0xaa
 #define MXT_UNLOCK_CMD_LSB	0xdc
@@ -250,7 +244,6 @@ struct mxt_data {
 	const struct mxt_platform_data *pdata;
 	struct mxt_object *object_table;
 	struct mxt_info info;
-	bool is_tp;
 
 	unsigned int irq;
 	unsigned int max_x;
@@ -515,15 +508,16 @@ static int mxt_write_object(struct mxt_data *data,
 static void mxt_input_button(struct mxt_data *data, struct mxt_message *message)
 {
 	struct input_dev *input = data->input_dev;
+	const struct mxt_platform_data *pdata = data->pdata;
 	bool button;
 	int i;
 
 	/* Active-low switch */
-	for (i = 0; i < MXT_NUM_GPIO; i++) {
-		if (data->pdata->key_map[i] == KEY_RESERVED)
+	for (i = 0; i < pdata->t19_num_keys; i++) {
+		if (pdata->t19_keymap[i] == KEY_RESERVED)
 			continue;
-		button = !(message->message[0] & MXT_GPIO0_MASK << i);
-		input_report_key(input, data->pdata->key_map[i], button);
+		button = !(message->message[0] & (1 << i));
+		input_report_key(input, pdata->t19_keymap[i], button);
 	}
 }
 
@@ -1084,6 +1078,8 @@ static int mxt_probe(struct i2c_client *client,
 	struct input_dev *input_dev;
 	int error;
 	unsigned int num_mt_slots;
+	unsigned int mt_flags = 0;
+	int i;
 
 	if (!pdata)
 		return -EINVAL;
@@ -1096,10 +1092,7 @@ static int mxt_probe(struct i2c_client *client,
 		goto err_free_mem;
 	}
 
-	data->is_tp = pdata && pdata->is_tp;
-
-	input_dev->name = (data->is_tp) ? "Atmel maXTouch Touchpad" :
-					  "Atmel maXTouch Touchscreen";
+	input_dev->name = "Atmel maXTouch Touchscreen";
 	snprintf(data->phys, sizeof(data->phys), "i2c-%u-%04x/input0",
 		 client->adapter->nr, client->addr);
 
@@ -1125,20 +1118,15 @@ static int mxt_probe(struct i2c_client *client,
 	__set_bit(EV_KEY, input_dev->evbit);
 	__set_bit(BTN_TOUCH, input_dev->keybit);
 
-	if (data->is_tp) {
-		int i;
-		__set_bit(INPUT_PROP_POINTER, input_dev->propbit);
+	if (pdata->t19_num_keys) {
 		__set_bit(INPUT_PROP_BUTTONPAD, input_dev->propbit);
 
-		for (i = 0; i < MXT_NUM_GPIO; i++)
-			if (pdata->key_map[i] != KEY_RESERVED)
-				__set_bit(pdata->key_map[i], input_dev->keybit);
+		for (i = 0; i < pdata->t19_num_keys; i++)
+			if (pdata->t19_keymap[i] != KEY_RESERVED)
+				input_set_capability(input_dev, EV_KEY,
+						     pdata->t19_keymap[i]);
 
-		__set_bit(BTN_TOOL_FINGER, input_dev->keybit);
-		__set_bit(BTN_TOOL_DOUBLETAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_QUADTAP, input_dev->keybit);
-		__set_bit(BTN_TOOL_QUINTTAP, input_dev->keybit);
+		mt_flags |= INPUT_MT_POINTER;
 
 		input_abs_set_res(input_dev, ABS_X, MXT_PIXELS_PER_MM);
 		input_abs_set_res(input_dev, ABS_Y, MXT_PIXELS_PER_MM);
@@ -1146,6 +1134,8 @@ static int mxt_probe(struct i2c_client *client,
 				  MXT_PIXELS_PER_MM);
 		input_abs_set_res(input_dev, ABS_MT_POSITION_Y,
 				  MXT_PIXELS_PER_MM);
+
+		input_dev->name = "Atmel maXTouch Touchpad";
 	}
 
 	/* For single touch */
@@ -1158,7 +1148,7 @@ static int mxt_probe(struct i2c_client *client,
 
 	/* For multi touch */
 	num_mt_slots = data->T9_reportid_max - data->T9_reportid_min + 1;
-	error = input_mt_init_slots(input_dev, num_mt_slots, 0);
+	error = input_mt_init_slots(input_dev, num_mt_slots, mt_flags);
 	if (error)
 		goto err_free_object;
 	input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR,
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 2559a0407c58..8b7523ab62e5 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -84,16 +84,22 @@ static struct i2c_board_info tsl2563_als_device = {
 	I2C_BOARD_INFO("tsl2563", TAOS_ALS_I2C_ADDR),
 };
 
+static int mxt_t19_keys[] = {
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	KEY_RESERVED,
+	BTN_LEFT
+};
+
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
 	.x_size			= 102*20,
 	.y_size			= 68*20,
 	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.is_tp			= true,
-	.key_map		= { KEY_RESERVED,
-				    KEY_RESERVED,
-				    KEY_RESERVED,
-				    BTN_LEFT },
+	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
+	.t19_keymap		= mxt_t19_keys,
 	.config			= NULL,
 	.config_length		= 0,
 };
@@ -109,7 +115,6 @@ static struct mxt_platform_data atmel_1664s_platform_data = {
 	.y_size			= 2560,
 	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
-	.is_tp			= false,
 	.config			= NULL,
 	.config_length		= 0,
 };
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index eff0cdc08843..d26080dc606c 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -15,9 +15,6 @@
 
 #include <linux/types.h>
 
-/* For key_map array */
-#define MXT_NUM_GPIO		4
-
 /* Orient */
 #define MXT_NORMAL		0x0
 #define MXT_DIAGONAL		0x1
@@ -38,8 +35,8 @@ struct mxt_platform_data {
 	unsigned char orient;
 
 	unsigned long irqflags;
-	bool is_tp;
-	const unsigned int key_map[MXT_NUM_GPIO];
+	u8 t19_num_keys;
+	const unsigned int *t19_keymap;
 };
 
 #endif /* __LINUX_ATMEL_MXT_TS_H */
-- 
cgit 


From c3f78043d5aea39205a14c580babd87fbdcfa148 Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 23:04:46 -0700
Subject: Input: atmel_mxt_ts - implement CRC check for configuration data

The configuration is stored in NVRAM on the maXTouch chip. When the device
is reset it reports a CRC of the stored configuration values. Therefore it
isn't necessary to send the configuration on each probe - we can check the
CRC matches and avoid a timeconsuming backup/reset cycle.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/atmel_mxt_ts.c | 60 +++++++++++++++++++++++++++-----
 include/linux/i2c/atmel_mxt_ts.h         |  1 +
 2 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 278f364ec6e1..61f9ef221d12 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -188,6 +188,7 @@
 #define MXT_BACKUP_TIME		50	/* msec */
 #define MXT_RESET_TIME		200	/* msec */
 #define MXT_RESET_TIMEOUT	3000	/* msec */
+#define MXT_CRC_TIMEOUT		1000	/* msec */
 #define MXT_FW_RESET_TIME	3000	/* msec */
 #define MXT_FW_CHG_TIMEOUT	300	/* msec */
 
@@ -259,6 +260,7 @@ struct mxt_data {
 	unsigned int max_x;
 	unsigned int max_y;
 	bool in_bootloader;
+	u32 config_crc;
 
 	/* Cached parameters from object table */
 	u8 T6_reportid;
@@ -272,6 +274,9 @@ struct mxt_data {
 
 	/* for reset handling */
 	struct completion reset_completion;
+
+	/* for config update handling */
+	struct completion crc_completion;
 };
 
 static size_t mxt_obj_size(const struct mxt_object *obj)
@@ -636,7 +641,7 @@ static void mxt_input_touchevent(struct mxt_data *data,
 	}
 }
 
-static unsigned mxt_extract_T6_csum(const u8 *csum)
+static u16 mxt_extract_T6_csum(const u8 *csum)
 {
 	return csum[0] | (csum[1] << 8) | (csum[2] << 16);
 }
@@ -654,6 +659,7 @@ static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data)
 	struct device *dev = &data->client->dev;
 	u8 reportid;
 	bool update_input = false;
+	u32 crc;
 
 	do {
 		if (mxt_read_message(data, &message)) {
@@ -665,9 +671,15 @@ static irqreturn_t mxt_process_messages_until_invalid(struct mxt_data *data)
 
 		if (reportid == data->T6_reportid) {
 			u8 status = payload[0];
-			unsigned csum = mxt_extract_T6_csum(&payload[1]);
+
+			crc = mxt_extract_T6_csum(&payload[1]);
+			if (crc != data->config_crc) {
+				data->config_crc = crc;
+				complete(&data->crc_completion);
+			}
+
 			dev_dbg(dev, "Status: %02x Config Checksum: %06x\n",
-				status, csum);
+				status, data->config_crc);
 
 			if (status & MXT_T6_STATUS_RESET)
 				complete(&data->reset_completion);
@@ -757,6 +769,24 @@ static int mxt_soft_reset(struct mxt_data *data)
 	return 0;
 }
 
+static void mxt_update_crc(struct mxt_data *data, u8 cmd, u8 value)
+{
+	/*
+	 * On failure, CRC is set to 0 and config will always be
+	 * downloaded.
+	 */
+	data->config_crc = 0;
+	reinit_completion(&data->crc_completion);
+
+	mxt_t6_command(data, cmd, value, true);
+
+	/*
+	 * Wait for crc message. On failure, CRC is set to 0 and config will
+	 * always be downloaded.
+	 */
+	mxt_wait_for_completion(data, &data->crc_completion, MXT_CRC_TIMEOUT);
+}
+
 static int mxt_check_reg_init(struct mxt_data *data)
 {
 	const struct mxt_platform_data *pdata = data->pdata;
@@ -771,6 +801,16 @@ static int mxt_check_reg_init(struct mxt_data *data)
 		return 0;
 	}
 
+	mxt_update_crc(data, MXT_COMMAND_REPORTALL, 1);
+
+	if (data->config_crc == pdata->config_crc) {
+		dev_info(dev, "Config CRC 0x%06X: OK\n", data->config_crc);
+		return 0;
+	}
+
+	dev_info(dev, "Config CRC 0x%06X: does not match 0x%06X\n",
+		 data->config_crc, pdata->config_crc);
+
 	for (i = 0; i < data->info.object_num; i++) {
 		object = data->object_table + i;
 
@@ -790,6 +830,14 @@ static int mxt_check_reg_init(struct mxt_data *data)
 		index += size;
 	}
 
+	mxt_update_crc(data, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE);
+
+	ret = mxt_soft_reset(data);
+	if (ret)
+		return ret;
+
+	dev_info(dev, "Config successfully updated\n");
+
 	return 0;
 }
 
@@ -929,11 +977,6 @@ static int mxt_initialize(struct mxt_data *data)
 		goto err_free_object_table;
 	}
 
-	error = mxt_t6_command(data, MXT_COMMAND_BACKUPNV,
-			       MXT_BACKUP_VALUE, false);
-	if (!error)
-		mxt_soft_reset(data);
-
 	/* Update matrix size at info struct */
 	error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val);
 	if (error)
@@ -1263,6 +1306,7 @@ static int mxt_probe(struct i2c_client *client,
 
 	init_completion(&data->bl_completion);
 	init_completion(&data->reset_completion);
+	init_completion(&data->crc_completion);
 
 	mxt_calc_resolution(data);
 
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index d26080dc606c..9f92135b6620 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -29,6 +29,7 @@
 struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
+	u32 config_crc;
 
 	unsigned int x_size;
 	unsigned int y_size;
-- 
cgit 


From fd1159318e55e901cf269de90163b19fd62938cb Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Sat, 17 May 2014 00:03:54 +0400
Subject: can: add Renesas R-Car CAN driver

Add support for the CAN controller found in Renesas R-Car SoCs.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/Kconfig               |  10 +
 drivers/net/can/Makefile              |   1 +
 drivers/net/can/rcar_can.c            | 876 ++++++++++++++++++++++++++++++++++
 include/linux/can/platform/rcar_can.h |  17 +
 4 files changed, 904 insertions(+)
 create mode 100644 drivers/net/can/rcar_can.c
 create mode 100644 include/linux/can/platform/rcar_can.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index ac67afa7735c..714b18790caf 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -119,6 +119,16 @@ config CAN_GRCAN
 	  endian syntheses of the cores would need some modifications on
 	  the hardware level to work.
 
+config CAN_RCAR
+	tristate "Renesas R-Car CAN controller"
+	depends on ARM
+	---help---
+	  Say Y here if you want to use CAN controller found on Renesas R-Car
+	  SoCs.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called rcar_can.
+
 source "drivers/net/can/mscan/Kconfig"
 
 source "drivers/net/can/sja1000/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index c42058868b0f..90f538c73f8c 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -25,5 +25,6 @@ obj-$(CONFIG_CAN_JANZ_ICAN3)	+= janz-ican3.o
 obj-$(CONFIG_CAN_FLEXCAN)	+= flexcan.o
 obj-$(CONFIG_PCH_CAN)		+= pch_can.o
 obj-$(CONFIG_CAN_GRCAN)		+= grcan.o
+obj-$(CONFIG_CAN_RCAR)		+= rcar_can.o
 
 ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/rcar_can.c b/drivers/net/can/rcar_can.c
new file mode 100644
index 000000000000..5268d216ecfa
--- /dev/null
+++ b/drivers/net/can/rcar_can.c
@@ -0,0 +1,876 @@
+/* Renesas R-Car CAN device driver
+ *
+ * Copyright (C) 2013 Cogent Embedded, Inc. <source@cogentembedded.com>
+ * Copyright (C) 2013 Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/can/led.h>
+#include <linux/can/dev.h>
+#include <linux/clk.h>
+#include <linux/can/platform/rcar_can.h>
+
+#define RCAR_CAN_DRV_NAME	"rcar_can"
+
+/* Mailbox configuration:
+ * mailbox 60 - 63 - Rx FIFO mailboxes
+ * mailbox 56 - 59 - Tx FIFO mailboxes
+ * non-FIFO mailboxes are not used
+ */
+#define RCAR_CAN_N_MBX		64 /* Number of mailboxes in non-FIFO mode */
+#define RCAR_CAN_RX_FIFO_MBX	60 /* Mailbox - window to Rx FIFO */
+#define RCAR_CAN_TX_FIFO_MBX	56 /* Mailbox - window to Tx FIFO */
+#define RCAR_CAN_FIFO_DEPTH	4
+
+/* Mailbox registers structure */
+struct rcar_can_mbox_regs {
+	u32 id;		/* IDE and RTR bits, SID and EID */
+	u8 stub;	/* Not used */
+	u8 dlc;		/* Data Length Code - bits [0..3] */
+	u8 data[8];	/* Data Bytes */
+	u8 tsh;		/* Time Stamp Higher Byte */
+	u8 tsl;		/* Time Stamp Lower Byte */
+};
+
+struct rcar_can_regs {
+	struct rcar_can_mbox_regs mb[RCAR_CAN_N_MBX]; /* Mailbox registers */
+	u32 mkr_2_9[8];	/* Mask Registers 2-9 */
+	u32 fidcr[2];	/* FIFO Received ID Compare Register */
+	u32 mkivlr1;	/* Mask Invalid Register 1 */
+	u32 mier1;	/* Mailbox Interrupt Enable Register 1 */
+	u32 mkr_0_1[2];	/* Mask Registers 0-1 */
+	u32 mkivlr0;    /* Mask Invalid Register 0*/
+	u32 mier0;      /* Mailbox Interrupt Enable Register 0 */
+	u8 pad_440[0x3c0];
+	u8 mctl[64];	/* Message Control Registers */
+	u16 ctlr;	/* Control Register */
+	u16 str;	/* Status register */
+	u8 bcr[3];	/* Bit Configuration Register */
+	u8 clkr;	/* Clock Select Register */
+	u8 rfcr;	/* Receive FIFO Control Register */
+	u8 rfpcr;	/* Receive FIFO Pointer Control Register */
+	u8 tfcr;	/* Transmit FIFO Control Register */
+	u8 tfpcr;       /* Transmit FIFO Pointer Control Register */
+	u8 eier;	/* Error Interrupt Enable Register */
+	u8 eifr;	/* Error Interrupt Factor Judge Register */
+	u8 recr;	/* Receive Error Count Register */
+	u8 tecr;        /* Transmit Error Count Register */
+	u8 ecsr;	/* Error Code Store Register */
+	u8 cssr;	/* Channel Search Support Register */
+	u8 mssr;	/* Mailbox Search Status Register */
+	u8 msmr;	/* Mailbox Search Mode Register */
+	u16 tsr;	/* Time Stamp Register */
+	u8 afsr;	/* Acceptance Filter Support Register */
+	u8 pad_857;
+	u8 tcr;		/* Test Control Register */
+	u8 pad_859[7];
+	u8 ier;		/* Interrupt Enable Register */
+	u8 isr;		/* Interrupt Status Register */
+	u8 pad_862;
+	u8 mbsmr;	/* Mailbox Search Mask Register */
+};
+
+struct rcar_can_priv {
+	struct can_priv can;	/* Must be the first member! */
+	struct net_device *ndev;
+	struct napi_struct napi;
+	struct rcar_can_regs __iomem *regs;
+	struct clk *clk;
+	u8 tx_dlc[RCAR_CAN_FIFO_DEPTH];
+	u32 tx_head;
+	u32 tx_tail;
+	u8 clock_select;
+	u8 ier;
+};
+
+static const struct can_bittiming_const rcar_can_bittiming_const = {
+	.name = RCAR_CAN_DRV_NAME,
+	.tseg1_min = 4,
+	.tseg1_max = 16,
+	.tseg2_min = 2,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 1024,
+	.brp_inc = 1,
+};
+
+/* Control Register bits */
+#define RCAR_CAN_CTLR_BOM	(3 << 11) /* Bus-Off Recovery Mode Bits */
+#define RCAR_CAN_CTLR_BOM_ENT	(1 << 11) /* Entry to halt mode */
+					/* at bus-off entry */
+#define RCAR_CAN_CTLR_SLPM	(1 << 10)
+#define RCAR_CAN_CTLR_CANM	(3 << 8) /* Operating Mode Select Bit */
+#define RCAR_CAN_CTLR_CANM_HALT	(1 << 9)
+#define RCAR_CAN_CTLR_CANM_RESET (1 << 8)
+#define RCAR_CAN_CTLR_CANM_FORCE_RESET (3 << 8)
+#define RCAR_CAN_CTLR_MLM	(1 << 3) /* Message Lost Mode Select */
+#define RCAR_CAN_CTLR_IDFM	(3 << 1) /* ID Format Mode Select Bits */
+#define RCAR_CAN_CTLR_IDFM_MIXED (1 << 2) /* Mixed ID mode */
+#define RCAR_CAN_CTLR_MBM	(1 << 0) /* Mailbox Mode select */
+
+/* Status Register bits */
+#define RCAR_CAN_STR_RSTST	(1 << 8) /* Reset Status Bit */
+
+/* FIFO Received ID Compare Registers 0 and 1 bits */
+#define RCAR_CAN_FIDCR_IDE	(1 << 31) /* ID Extension Bit */
+#define RCAR_CAN_FIDCR_RTR	(1 << 30) /* Remote Transmission Request Bit */
+
+/* Receive FIFO Control Register bits */
+#define RCAR_CAN_RFCR_RFEST	(1 << 7) /* Receive FIFO Empty Status Flag */
+#define RCAR_CAN_RFCR_RFE	(1 << 0) /* Receive FIFO Enable */
+
+/* Transmit FIFO Control Register bits */
+#define RCAR_CAN_TFCR_TFUST	(7 << 1) /* Transmit FIFO Unsent Message */
+					/* Number Status Bits */
+#define RCAR_CAN_TFCR_TFUST_SHIFT 1	/* Offset of Transmit FIFO Unsent */
+					/* Message Number Status Bits */
+#define RCAR_CAN_TFCR_TFE	(1 << 0) /* Transmit FIFO Enable */
+
+#define RCAR_CAN_N_RX_MKREGS1	2	/* Number of mask registers */
+					/* for Rx mailboxes 0-31 */
+#define RCAR_CAN_N_RX_MKREGS2	8
+
+/* Bit Configuration Register settings */
+#define RCAR_CAN_BCR_TSEG1(x)	(((x) & 0x0f) << 20)
+#define RCAR_CAN_BCR_BPR(x)	(((x) & 0x3ff) << 8)
+#define RCAR_CAN_BCR_SJW(x)	(((x) & 0x3) << 4)
+#define RCAR_CAN_BCR_TSEG2(x)	((x) & 0x07)
+
+/* Mailbox and Mask Registers bits */
+#define RCAR_CAN_IDE		(1 << 31)
+#define RCAR_CAN_RTR		(1 << 30)
+#define RCAR_CAN_SID_SHIFT	18
+
+/* Mailbox Interrupt Enable Register 1 bits */
+#define RCAR_CAN_MIER1_RXFIE	(1 << 28) /* Receive  FIFO Interrupt Enable */
+#define RCAR_CAN_MIER1_TXFIE	(1 << 24) /* Transmit FIFO Interrupt Enable */
+
+/* Interrupt Enable Register bits */
+#define RCAR_CAN_IER_ERSIE	(1 << 5) /* Error (ERS) Interrupt Enable Bit */
+#define RCAR_CAN_IER_RXFIE	(1 << 4) /* Reception FIFO Interrupt */
+					/* Enable Bit */
+#define RCAR_CAN_IER_TXFIE	(1 << 3) /* Transmission FIFO Interrupt */
+					/* Enable Bit */
+/* Interrupt Status Register bits */
+#define RCAR_CAN_ISR_ERSF	(1 << 5) /* Error (ERS) Interrupt Status Bit */
+#define RCAR_CAN_ISR_RXFF	(1 << 4) /* Reception FIFO Interrupt */
+					/* Status Bit */
+#define RCAR_CAN_ISR_TXFF	(1 << 3) /* Transmission FIFO Interrupt */
+					/* Status Bit */
+
+/* Error Interrupt Enable Register bits */
+#define RCAR_CAN_EIER_BLIE	(1 << 7) /* Bus Lock Interrupt Enable */
+#define RCAR_CAN_EIER_OLIE	(1 << 6) /* Overload Frame Transmit */
+					/* Interrupt Enable */
+#define RCAR_CAN_EIER_ORIE	(1 << 5) /* Receive Overrun  Interrupt Enable */
+#define RCAR_CAN_EIER_BORIE	(1 << 4) /* Bus-Off Recovery Interrupt Enable */
+#define RCAR_CAN_EIER_BOEIE	(1 << 3) /* Bus-Off Entry Interrupt Enable */
+#define RCAR_CAN_EIER_EPIE	(1 << 2) /* Error Passive Interrupt Enable */
+#define RCAR_CAN_EIER_EWIE	(1 << 1) /* Error Warning Interrupt Enable */
+#define RCAR_CAN_EIER_BEIE	(1 << 0) /* Bus Error Interrupt Enable */
+
+/* Error Interrupt Factor Judge Register bits */
+#define RCAR_CAN_EIFR_BLIF	(1 << 7) /* Bus Lock Detect Flag */
+#define RCAR_CAN_EIFR_OLIF	(1 << 6) /* Overload Frame Transmission */
+					 /* Detect Flag */
+#define RCAR_CAN_EIFR_ORIF	(1 << 5) /* Receive Overrun Detect Flag */
+#define RCAR_CAN_EIFR_BORIF	(1 << 4) /* Bus-Off Recovery Detect Flag */
+#define RCAR_CAN_EIFR_BOEIF	(1 << 3) /* Bus-Off Entry Detect Flag */
+#define RCAR_CAN_EIFR_EPIF	(1 << 2) /* Error Passive Detect Flag */
+#define RCAR_CAN_EIFR_EWIF	(1 << 1) /* Error Warning Detect Flag */
+#define RCAR_CAN_EIFR_BEIF	(1 << 0) /* Bus Error Detect Flag */
+
+/* Error Code Store Register bits */
+#define RCAR_CAN_ECSR_EDPM	(1 << 7) /* Error Display Mode Select Bit */
+#define RCAR_CAN_ECSR_ADEF	(1 << 6) /* ACK Delimiter Error Flag */
+#define RCAR_CAN_ECSR_BE0F	(1 << 5) /* Bit Error (dominant) Flag */
+#define RCAR_CAN_ECSR_BE1F	(1 << 4) /* Bit Error (recessive) Flag */
+#define RCAR_CAN_ECSR_CEF	(1 << 3) /* CRC Error Flag */
+#define RCAR_CAN_ECSR_AEF	(1 << 2) /* ACK Error Flag */
+#define RCAR_CAN_ECSR_FEF	(1 << 1) /* Form Error Flag */
+#define RCAR_CAN_ECSR_SEF	(1 << 0) /* Stuff Error Flag */
+
+#define RCAR_CAN_NAPI_WEIGHT	4
+#define MAX_STR_READS		0x100
+
+static void tx_failure_cleanup(struct net_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < RCAR_CAN_FIFO_DEPTH; i++)
+		can_free_echo_skb(ndev, i);
+}
+
+static void rcar_can_error(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 eifr, txerr = 0, rxerr = 0;
+
+	/* Propagate the error condition to the CAN stack */
+	skb = alloc_can_err_skb(ndev, &cf);
+
+	eifr = readb(&priv->regs->eifr);
+	if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) {
+		txerr = readb(&priv->regs->tecr);
+		rxerr = readb(&priv->regs->recr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[6] = txerr;
+			cf->data[7] = rxerr;
+		}
+	}
+	if (eifr & RCAR_CAN_EIFR_BEIF) {
+		int rx_errors = 0, tx_errors = 0;
+		u8 ecsr;
+
+		netdev_dbg(priv->ndev, "Bus error interrupt:\n");
+		if (skb) {
+			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
+			cf->data[2] = CAN_ERR_PROT_UNSPEC;
+		}
+		ecsr = readb(&priv->regs->ecsr);
+		if (ecsr & RCAR_CAN_ECSR_ADEF) {
+			netdev_dbg(priv->ndev, "ACK Delimiter Error\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_ADEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK_DEL;
+		}
+		if (ecsr & RCAR_CAN_ECSR_BE0F) {
+			netdev_dbg(priv->ndev, "Bit Error (dominant)\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_BE0F, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_BIT0;
+		}
+		if (ecsr & RCAR_CAN_ECSR_BE1F) {
+			netdev_dbg(priv->ndev, "Bit Error (recessive)\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_BE1F, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_BIT1;
+		}
+		if (ecsr & RCAR_CAN_ECSR_CEF) {
+			netdev_dbg(priv->ndev, "CRC Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_CEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+		}
+		if (ecsr & RCAR_CAN_ECSR_AEF) {
+			netdev_dbg(priv->ndev, "ACK Error\n");
+			tx_errors++;
+			writeb(~RCAR_CAN_ECSR_AEF, &priv->regs->ecsr);
+			if (skb) {
+				cf->can_id |= CAN_ERR_ACK;
+				cf->data[3] |= CAN_ERR_PROT_LOC_ACK;
+			}
+		}
+		if (ecsr & RCAR_CAN_ECSR_FEF) {
+			netdev_dbg(priv->ndev, "Form Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_FEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_FORM;
+		}
+		if (ecsr & RCAR_CAN_ECSR_SEF) {
+			netdev_dbg(priv->ndev, "Stuff Error\n");
+			rx_errors++;
+			writeb(~RCAR_CAN_ECSR_SEF, &priv->regs->ecsr);
+			if (skb)
+				cf->data[2] |= CAN_ERR_PROT_STUFF;
+		}
+
+		priv->can.can_stats.bus_error++;
+		ndev->stats.rx_errors += rx_errors;
+		ndev->stats.tx_errors += tx_errors;
+		writeb(~RCAR_CAN_EIFR_BEIF, &priv->regs->eifr);
+	}
+	if (eifr & RCAR_CAN_EIFR_EWIF) {
+		netdev_dbg(priv->ndev, "Error warning interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_WARNING;
+		priv->can.can_stats.error_warning++;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_EWIF, &priv->regs->eifr);
+		if (skb)
+			cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_WARNING :
+					      CAN_ERR_CRTL_RX_WARNING;
+	}
+	if (eifr & RCAR_CAN_EIFR_EPIF) {
+		netdev_dbg(priv->ndev, "Error passive interrupt\n");
+		priv->can.state = CAN_STATE_ERROR_PASSIVE;
+		priv->can.can_stats.error_passive++;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_EPIF, &priv->regs->eifr);
+		if (skb)
+			cf->data[1] = txerr > rxerr ? CAN_ERR_CRTL_TX_PASSIVE :
+					      CAN_ERR_CRTL_RX_PASSIVE;
+	}
+	if (eifr & RCAR_CAN_EIFR_BOEIF) {
+		netdev_dbg(priv->ndev, "Bus-off entry interrupt\n");
+		tx_failure_cleanup(ndev);
+		priv->ier = RCAR_CAN_IER_ERSIE;
+		writeb(priv->ier, &priv->regs->ier);
+		priv->can.state = CAN_STATE_BUS_OFF;
+		/* Clear interrupt condition */
+		writeb(~RCAR_CAN_EIFR_BOEIF, &priv->regs->eifr);
+		can_bus_off(ndev);
+		if (skb)
+			cf->can_id |= CAN_ERR_BUSOFF;
+	}
+	if (eifr & RCAR_CAN_EIFR_ORIF) {
+		netdev_dbg(priv->ndev, "Receive overrun error interrupt\n");
+		ndev->stats.rx_over_errors++;
+		ndev->stats.rx_errors++;
+		writeb(~RCAR_CAN_EIFR_ORIF, &priv->regs->eifr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_CRTL;
+			cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW;
+		}
+	}
+	if (eifr & RCAR_CAN_EIFR_OLIF) {
+		netdev_dbg(priv->ndev,
+			   "Overload Frame Transmission error interrupt\n");
+		ndev->stats.rx_over_errors++;
+		ndev->stats.rx_errors++;
+		writeb(~RCAR_CAN_EIFR_OLIF, &priv->regs->eifr);
+		if (skb) {
+			cf->can_id |= CAN_ERR_PROT;
+			cf->data[2] |= CAN_ERR_PROT_OVERLOAD;
+		}
+	}
+
+	if (skb) {
+		stats->rx_packets++;
+		stats->rx_bytes += cf->can_dlc;
+		netif_rx(skb);
+	}
+}
+
+static void rcar_can_tx_done(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct net_device_stats *stats = &ndev->stats;
+	u8 isr;
+
+	while (1) {
+		u8 unsent = readb(&priv->regs->tfcr);
+
+		unsent = (unsent & RCAR_CAN_TFCR_TFUST) >>
+			  RCAR_CAN_TFCR_TFUST_SHIFT;
+		if (priv->tx_head - priv->tx_tail <= unsent)
+			break;
+		stats->tx_packets++;
+		stats->tx_bytes += priv->tx_dlc[priv->tx_tail %
+						RCAR_CAN_FIFO_DEPTH];
+		priv->tx_dlc[priv->tx_tail % RCAR_CAN_FIFO_DEPTH] = 0;
+		can_get_echo_skb(ndev, priv->tx_tail % RCAR_CAN_FIFO_DEPTH);
+		priv->tx_tail++;
+		netif_wake_queue(ndev);
+	}
+	/* Clear interrupt */
+	isr = readb(&priv->regs->isr);
+	writeb(isr & ~RCAR_CAN_ISR_TXFF, &priv->regs->isr);
+	can_led_event(ndev, CAN_LED_EVENT_TX);
+}
+
+static irqreturn_t rcar_can_interrupt(int irq, void *dev_id)
+{
+	struct net_device *ndev = dev_id;
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u8 isr;
+
+	isr = readb(&priv->regs->isr);
+	if (!(isr & priv->ier))
+		return IRQ_NONE;
+
+	if (isr & RCAR_CAN_ISR_ERSF)
+		rcar_can_error(ndev);
+
+	if (isr & RCAR_CAN_ISR_TXFF)
+		rcar_can_tx_done(ndev);
+
+	if (isr & RCAR_CAN_ISR_RXFF) {
+		if (napi_schedule_prep(&priv->napi)) {
+			/* Disable Rx FIFO interrupts */
+			priv->ier &= ~RCAR_CAN_IER_RXFIE;
+			writeb(priv->ier, &priv->regs->ier);
+			__napi_schedule(&priv->napi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void rcar_can_set_bittiming(struct net_device *dev)
+{
+	struct rcar_can_priv *priv = netdev_priv(dev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	u32 bcr;
+
+	bcr = RCAR_CAN_BCR_TSEG1(bt->phase_seg1 + bt->prop_seg - 1) |
+	      RCAR_CAN_BCR_BPR(bt->brp - 1) | RCAR_CAN_BCR_SJW(bt->sjw - 1) |
+	      RCAR_CAN_BCR_TSEG2(bt->phase_seg2 - 1);
+	/* Don't overwrite CLKR with 32-bit BCR access; CLKR has 8-bit access.
+	 * All the registers are big-endian but they get byte-swapped on 32-bit
+	 * read/write (but not on 8-bit, contrary to the manuals)...
+	 */
+	writel((bcr << 8) | priv->clock_select, &priv->regs->bcr);
+}
+
+static void rcar_can_start(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int i;
+
+	/* Set controller to known mode:
+	 * - FIFO mailbox mode
+	 * - accept all messages
+	 * - overrun mode
+	 * CAN is in sleep mode after MCU hardware or software reset.
+	 */
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	/* Go to reset mode */
+	ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET;
+	writew(ctlr, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)
+			break;
+	}
+	rcar_can_set_bittiming(ndev);
+	ctlr |= RCAR_CAN_CTLR_IDFM_MIXED; /* Select mixed ID mode */
+	ctlr |= RCAR_CAN_CTLR_BOM_ENT;	/* Entry to halt mode automatically */
+					/* at bus-off */
+	ctlr |= RCAR_CAN_CTLR_MBM;	/* Select FIFO mailbox mode */
+	ctlr |= RCAR_CAN_CTLR_MLM;	/* Overrun mode */
+	writew(ctlr, &priv->regs->ctlr);
+
+	/* Accept all SID and EID */
+	writel(0, &priv->regs->mkr_2_9[6]);
+	writel(0, &priv->regs->mkr_2_9[7]);
+	/* In FIFO mailbox mode, write "0" to bits 24 to 31 */
+	writel(0, &priv->regs->mkivlr1);
+	/* Accept all frames */
+	writel(0, &priv->regs->fidcr[0]);
+	writel(RCAR_CAN_FIDCR_IDE | RCAR_CAN_FIDCR_RTR, &priv->regs->fidcr[1]);
+	/* Enable and configure FIFO mailbox interrupts */
+	writel(RCAR_CAN_MIER1_RXFIE | RCAR_CAN_MIER1_TXFIE, &priv->regs->mier1);
+
+	priv->ier = RCAR_CAN_IER_ERSIE | RCAR_CAN_IER_RXFIE |
+		    RCAR_CAN_IER_TXFIE;
+	writeb(priv->ier, &priv->regs->ier);
+
+	/* Accumulate error codes */
+	writeb(RCAR_CAN_ECSR_EDPM, &priv->regs->ecsr);
+	/* Enable error interrupts */
+	writeb(RCAR_CAN_EIER_EWIE | RCAR_CAN_EIER_EPIE | RCAR_CAN_EIER_BOEIE |
+	       (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING ?
+	       RCAR_CAN_EIER_BEIE : 0) | RCAR_CAN_EIER_ORIE |
+	       RCAR_CAN_EIER_OLIE, &priv->regs->eier);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	/* Go to operation mode */
+	writew(ctlr & ~RCAR_CAN_CTLR_CANM, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (!(readw(&priv->regs->str) & RCAR_CAN_STR_RSTST))
+			break;
+	}
+	/* Enable Rx and Tx FIFO */
+	writeb(RCAR_CAN_RFCR_RFE, &priv->regs->rfcr);
+	writeb(RCAR_CAN_TFCR_TFE, &priv->regs->tfcr);
+}
+
+static int rcar_can_open(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err) {
+		netdev_err(ndev, "clk_prepare_enable() failed, error %d\n",
+			   err);
+		goto out;
+	}
+	err = open_candev(ndev);
+	if (err) {
+		netdev_err(ndev, "open_candev() failed, error %d\n", err);
+		goto out_clock;
+	}
+	napi_enable(&priv->napi);
+	err = request_irq(ndev->irq, rcar_can_interrupt, 0, ndev->name, ndev);
+	if (err) {
+		netdev_err(ndev, "error requesting interrupt %x\n", ndev->irq);
+		goto out_close;
+	}
+	can_led_event(ndev, CAN_LED_EVENT_OPEN);
+	rcar_can_start(ndev);
+	netif_start_queue(ndev);
+	return 0;
+out_close:
+	napi_disable(&priv->napi);
+	close_candev(ndev);
+out_clock:
+	clk_disable_unprepare(priv->clk);
+out:
+	return err;
+}
+
+static void rcar_can_stop(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int i;
+
+	/* Go to (force) reset mode */
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_CANM_FORCE_RESET;
+	writew(ctlr, &priv->regs->ctlr);
+	for (i = 0; i < MAX_STR_READS; i++) {
+		if (readw(&priv->regs->str) & RCAR_CAN_STR_RSTST)
+			break;
+	}
+	writel(0, &priv->regs->mier0);
+	writel(0, &priv->regs->mier1);
+	writeb(0, &priv->regs->ier);
+	writeb(0, &priv->regs->eier);
+	/* Go to sleep mode */
+	ctlr |= RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_STOPPED;
+}
+
+static int rcar_can_close(struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	rcar_can_stop(ndev);
+	free_irq(ndev->irq, ndev);
+	napi_disable(&priv->napi);
+	clk_disable_unprepare(priv->clk);
+	close_candev(ndev);
+	can_led_event(ndev, CAN_LED_EVENT_STOP);
+	return 0;
+}
+
+static netdev_tx_t rcar_can_start_xmit(struct sk_buff *skb,
+				       struct net_device *ndev)
+{
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	u32 data, i;
+
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
+	if (cf->can_id & CAN_EFF_FLAG)	/* Extended frame format */
+		data = (cf->can_id & CAN_EFF_MASK) | RCAR_CAN_IDE;
+	else				/* Standard frame format */
+		data = (cf->can_id & CAN_SFF_MASK) << RCAR_CAN_SID_SHIFT;
+
+	if (cf->can_id & CAN_RTR_FLAG) { /* Remote transmission request */
+		data |= RCAR_CAN_RTR;
+	} else {
+		for (i = 0; i < cf->can_dlc; i++)
+			writeb(cf->data[i],
+			       &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].data[i]);
+	}
+
+	writel(data, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].id);
+
+	writeb(cf->can_dlc, &priv->regs->mb[RCAR_CAN_TX_FIFO_MBX].dlc);
+
+	priv->tx_dlc[priv->tx_head % RCAR_CAN_FIFO_DEPTH] = cf->can_dlc;
+	can_put_echo_skb(skb, ndev, priv->tx_head % RCAR_CAN_FIFO_DEPTH);
+	priv->tx_head++;
+	/* Start Tx: write 0xff to the TFPCR register to increment
+	 * the CPU-side pointer for the transmit FIFO to the next
+	 * mailbox location
+	 */
+	writeb(0xff, &priv->regs->tfpcr);
+	/* Stop the queue if we've filled all FIFO entries */
+	if (priv->tx_head - priv->tx_tail >= RCAR_CAN_FIFO_DEPTH)
+		netif_stop_queue(ndev);
+
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops rcar_can_netdev_ops = {
+	.ndo_open = rcar_can_open,
+	.ndo_stop = rcar_can_close,
+	.ndo_start_xmit = rcar_can_start_xmit,
+};
+
+static void rcar_can_rx_pkt(struct rcar_can_priv *priv)
+{
+	struct net_device_stats *stats = &priv->ndev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u32 data;
+	u8 dlc;
+
+	skb = alloc_can_skb(priv->ndev, &cf);
+	if (!skb) {
+		stats->rx_dropped++;
+		return;
+	}
+
+	data = readl(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].id);
+	if (data & RCAR_CAN_IDE)
+		cf->can_id = (data & CAN_EFF_MASK) | CAN_EFF_FLAG;
+	else
+		cf->can_id = (data >> RCAR_CAN_SID_SHIFT) & CAN_SFF_MASK;
+
+	dlc = readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].dlc);
+	cf->can_dlc = get_can_dlc(dlc);
+	if (data & RCAR_CAN_RTR) {
+		cf->can_id |= CAN_RTR_FLAG;
+	} else {
+		for (dlc = 0; dlc < cf->can_dlc; dlc++)
+			cf->data[dlc] =
+			readb(&priv->regs->mb[RCAR_CAN_RX_FIFO_MBX].data[dlc]);
+	}
+
+	can_led_event(priv->ndev, CAN_LED_EVENT_RX);
+
+	stats->rx_bytes += cf->can_dlc;
+	stats->rx_packets++;
+	netif_receive_skb(skb);
+}
+
+static int rcar_can_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct rcar_can_priv *priv = container_of(napi,
+						  struct rcar_can_priv, napi);
+	int num_pkts;
+
+	for (num_pkts = 0; num_pkts < quota; num_pkts++) {
+		u8 rfcr, isr;
+
+		isr = readb(&priv->regs->isr);
+		/* Clear interrupt bit */
+		if (isr & RCAR_CAN_ISR_RXFF)
+			writeb(isr & ~RCAR_CAN_ISR_RXFF, &priv->regs->isr);
+		rfcr = readb(&priv->regs->rfcr);
+		if (rfcr & RCAR_CAN_RFCR_RFEST)
+			break;
+		rcar_can_rx_pkt(priv);
+		/* Write 0xff to the RFPCR register to increment
+		 * the CPU-side pointer for the receive FIFO
+		 * to the next mailbox location
+		 */
+		writeb(0xff, &priv->regs->rfpcr);
+	}
+	/* All packets processed */
+	if (num_pkts < quota) {
+		napi_complete(napi);
+		priv->ier |= RCAR_CAN_IER_RXFIE;
+		writeb(priv->ier, &priv->regs->ier);
+	}
+	return num_pkts;
+}
+
+static int rcar_can_do_set_mode(struct net_device *ndev, enum can_mode mode)
+{
+	switch (mode) {
+	case CAN_MODE_START:
+		rcar_can_start(ndev);
+		netif_wake_queue(ndev);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int rcar_can_get_berr_counter(const struct net_device *dev,
+				     struct can_berr_counter *bec)
+{
+	struct rcar_can_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = clk_prepare_enable(priv->clk);
+	if (err)
+		return err;
+	bec->txerr = readb(&priv->regs->tecr);
+	bec->rxerr = readb(&priv->regs->recr);
+	clk_disable_unprepare(priv->clk);
+	return 0;
+}
+
+static int rcar_can_probe(struct platform_device *pdev)
+{
+	struct rcar_can_platform_data *pdata;
+	struct rcar_can_priv *priv;
+	struct net_device *ndev;
+	struct resource *mem;
+	void __iomem *addr;
+	int err = -ENODEV;
+	int irq;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "No platform data provided!\n");
+		goto fail;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq) {
+		dev_err(&pdev->dev, "No IRQ resource\n");
+		goto fail;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(addr)) {
+		err = PTR_ERR(addr);
+		goto fail;
+	}
+
+	ndev = alloc_candev(sizeof(struct rcar_can_priv), RCAR_CAN_FIFO_DEPTH);
+	if (!ndev) {
+		dev_err(&pdev->dev, "alloc_candev() failed\n");
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	priv = netdev_priv(ndev);
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		err = PTR_ERR(priv->clk);
+		dev_err(&pdev->dev, "cannot get clock: %d\n", err);
+		goto fail_clk;
+	}
+
+	ndev->netdev_ops = &rcar_can_netdev_ops;
+	ndev->irq = irq;
+	ndev->flags |= IFF_ECHO;
+	priv->ndev = ndev;
+	priv->regs = addr;
+	priv->clock_select = pdata->clock_select;
+	priv->can.clock.freq = clk_get_rate(priv->clk);
+	priv->can.bittiming_const = &rcar_can_bittiming_const;
+	priv->can.do_set_mode = rcar_can_do_set_mode;
+	priv->can.do_get_berr_counter = rcar_can_get_berr_counter;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_BERR_REPORTING;
+	platform_set_drvdata(pdev, ndev);
+	SET_NETDEV_DEV(ndev, &pdev->dev);
+
+	netif_napi_add(ndev, &priv->napi, rcar_can_rx_poll,
+		       RCAR_CAN_NAPI_WEIGHT);
+	err = register_candev(ndev);
+	if (err) {
+		dev_err(&pdev->dev, "register_candev() failed, error %d\n",
+			err);
+		goto fail_candev;
+	}
+
+	devm_can_led_init(ndev);
+
+	dev_info(&pdev->dev, "device registered (reg_base=%p, irq=%u)\n",
+		 priv->regs, ndev->irq);
+
+	return 0;
+fail_candev:
+	netif_napi_del(&priv->napi);
+fail_clk:
+	free_candev(ndev);
+fail:
+	return err;
+}
+
+static int rcar_can_remove(struct platform_device *pdev)
+{
+	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+
+	unregister_candev(ndev);
+	netif_napi_del(&priv->napi);
+	free_candev(ndev);
+	return 0;
+}
+
+static int __maybe_unused rcar_can_suspend(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+
+	if (netif_running(ndev)) {
+		netif_stop_queue(ndev);
+		netif_device_detach(ndev);
+	}
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_CANM_HALT;
+	writew(ctlr, &priv->regs->ctlr);
+	ctlr |= RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_SLEEPING;
+
+	clk_disable(priv->clk);
+	return 0;
+}
+
+static int __maybe_unused rcar_can_resume(struct device *dev)
+{
+	struct net_device *ndev = dev_get_drvdata(dev);
+	struct rcar_can_priv *priv = netdev_priv(ndev);
+	u16 ctlr;
+	int err;
+
+	err = clk_enable(priv->clk);
+	if (err) {
+		netdev_err(ndev, "clk_enable() failed, error %d\n", err);
+		return err;
+	}
+
+	ctlr = readw(&priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_SLPM;
+	writew(ctlr, &priv->regs->ctlr);
+	ctlr &= ~RCAR_CAN_CTLR_CANM;
+	writew(ctlr, &priv->regs->ctlr);
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+
+	if (netif_running(ndev)) {
+		netif_device_attach(ndev);
+		netif_start_queue(ndev);
+	}
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(rcar_can_pm_ops, rcar_can_suspend, rcar_can_resume);
+
+static struct platform_driver rcar_can_driver = {
+	.driver = {
+		.name = RCAR_CAN_DRV_NAME,
+		.owner = THIS_MODULE,
+		.pm = &rcar_can_pm_ops,
+	},
+	.probe = rcar_can_probe,
+	.remove = rcar_can_remove,
+};
+
+module_platform_driver(rcar_can_driver);
+
+MODULE_AUTHOR("Cogent Embedded, Inc.");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CAN driver for Renesas R-Car SoC");
+MODULE_ALIAS("platform:" RCAR_CAN_DRV_NAME);
diff --git a/include/linux/can/platform/rcar_can.h b/include/linux/can/platform/rcar_can.h
new file mode 100644
index 000000000000..0f4a2f3df504
--- /dev/null
+++ b/include/linux/can/platform/rcar_can.h
@@ -0,0 +1,17 @@
+#ifndef _CAN_PLATFORM_RCAR_CAN_H_
+#define _CAN_PLATFORM_RCAR_CAN_H_
+
+#include <linux/types.h>
+
+/* Clock Select Register settings */
+enum CLKR {
+	CLKR_CLKP1 = 0,	/* Peripheral clock (clkp1) */
+	CLKR_CLKP2 = 1,	/* Peripheral clock (clkp2) */
+	CLKR_CLKEXT = 3	/* Externally input clock */
+};
+
+struct rcar_can_platform_data {
+	enum CLKR clock_select;	/* Clock source select */
+};
+
+#endif	/* !_CAN_PLATFORM_RCAR_CAN_H_ */
-- 
cgit 


From 42193e3efb632c84d686acacd7b2327f2b1f8c63 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 15 May 2014 20:31:56 +0200
Subject: can: unify identifiers to ensure unique include processing

Armin pointed me to the fact that the identifier which is used to ensure the
unique include processing in lunux/include/uapi/linux/can.h is CAN_H.
This clashed with his own source as includes from libraries and APIs should
use an underscore '_' at the identifier start.

This patch fixes the protection identifiers in all CAN relavant includes.

Reported-by: Armin Burchardt <armin@uni-bremen.de>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/core.h             | 6 +++---
 include/linux/can/dev.h              | 6 +++---
 include/linux/can/led.h              | 6 +++---
 include/linux/can/platform/cc770.h   | 6 +++---
 include/linux/can/platform/mcp251x.h | 6 +++---
 include/linux/can/platform/sja1000.h | 6 +++---
 include/linux/can/platform/ti_hecc.h | 6 +++---
 include/linux/can/skb.h              | 6 +++---
 include/uapi/linux/can.h             | 6 +++---
 include/uapi/linux/can/bcm.h         | 6 +++---
 include/uapi/linux/can/error.h       | 6 +++---
 include/uapi/linux/can/gw.h          | 6 +++---
 include/uapi/linux/can/netlink.h     | 6 +++---
 include/uapi/linux/can/raw.h         | 6 +++---
 14 files changed, 42 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/can/core.h b/include/linux/can/core.h
index 78c6c52073ad..a0875001b13c 100644
--- a/include/linux/can/core.h
+++ b/include/linux/can/core.h
@@ -10,8 +10,8 @@
  *
  */
 
-#ifndef CAN_CORE_H
-#define CAN_CORE_H
+#ifndef _CAN_CORE_H
+#define _CAN_CORE_H
 
 #include <linux/can.h>
 #include <linux/skbuff.h>
@@ -58,4 +58,4 @@ extern void can_rx_unregister(struct net_device *dev, canid_t can_id,
 extern int can_send(struct sk_buff *skb, int loop);
 extern int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
-#endif /* CAN_CORE_H */
+#endif /* !_CAN_CORE_H */
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 3ce5e526525f..6992afc6ba7f 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -10,8 +10,8 @@
  *
  */
 
-#ifndef CAN_DEV_H
-#define CAN_DEV_H
+#ifndef _CAN_DEV_H
+#define _CAN_DEV_H
 
 #include <linux/can.h>
 #include <linux/can/netlink.h>
@@ -132,4 +132,4 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
 struct sk_buff *alloc_can_err_skb(struct net_device *dev,
 				  struct can_frame **cf);
 
-#endif /* CAN_DEV_H */
+#endif /* !_CAN_DEV_H */
diff --git a/include/linux/can/led.h b/include/linux/can/led.h
index 9c1167baf273..e0475c5cbb92 100644
--- a/include/linux/can/led.h
+++ b/include/linux/can/led.h
@@ -6,8 +6,8 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef CAN_LED_H
-#define CAN_LED_H
+#ifndef _CAN_LED_H
+#define _CAN_LED_H
 
 #include <linux/if.h>
 #include <linux/leds.h>
@@ -48,4 +48,4 @@ static inline void can_led_notifier_exit(void)
 
 #endif
 
-#endif
+#endif /* !_CAN_LED_H */
diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h
index 7702641f87ee..78b2d44f04cf 100644
--- a/include/linux/can/platform/cc770.h
+++ b/include/linux/can/platform/cc770.h
@@ -1,5 +1,5 @@
-#ifndef _CAN_PLATFORM_CC770_H_
-#define _CAN_PLATFORM_CC770_H_
+#ifndef _CAN_PLATFORM_CC770_H
+#define _CAN_PLATFORM_CC770_H
 
 /* CPU Interface Register (0x02) */
 #define CPUIF_CEN	0x01	/* Clock Out Enable */
@@ -30,4 +30,4 @@ struct cc770_platform_data {
 	u8 bcr;		/* Bus Configuration Register */
 };
 
-#endif	/* !_CAN_PLATFORM_CC770_H_ */
+#endif	/* !_CAN_PLATFORM_CC770_H */
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
index dc029dba7a03..d44fcae274ff 100644
--- a/include/linux/can/platform/mcp251x.h
+++ b/include/linux/can/platform/mcp251x.h
@@ -1,5 +1,5 @@
-#ifndef __CAN_PLATFORM_MCP251X_H__
-#define __CAN_PLATFORM_MCP251X_H__
+#ifndef _CAN_PLATFORM_MCP251X_H
+#define _CAN_PLATFORM_MCP251X_H
 
 /*
  *
@@ -18,4 +18,4 @@ struct mcp251x_platform_data {
 	unsigned long oscillator_frequency;
 };
 
-#endif /* __CAN_PLATFORM_MCP251X_H__ */
+#endif /* !_CAN_PLATFORM_MCP251X_H */
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 96f8fcc78d78..93570b61ec6c 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -1,5 +1,5 @@
-#ifndef _CAN_PLATFORM_SJA1000_H_
-#define _CAN_PLATFORM_SJA1000_H_
+#ifndef _CAN_PLATFORM_SJA1000_H
+#define _CAN_PLATFORM_SJA1000_H
 
 /* clock divider register */
 #define CDR_CLKOUT_MASK 0x07
@@ -32,4 +32,4 @@ struct sja1000_platform_data {
 	u8 cdr;		/* clock divider register */
 };
 
-#endif	/* !_CAN_PLATFORM_SJA1000_H_ */
+#endif	/* !_CAN_PLATFORM_SJA1000_H */
diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h
index af17cb3f7a84..a52f47ca6c8a 100644
--- a/include/linux/can/platform/ti_hecc.h
+++ b/include/linux/can/platform/ti_hecc.h
@@ -1,5 +1,5 @@
-#ifndef __CAN_PLATFORM_TI_HECC_H__
-#define __CAN_PLATFORM_TI_HECC_H__
+#ifndef _CAN_PLATFORM_TI_HECC_H
+#define _CAN_PLATFORM_TI_HECC_H
 
 /*
  * TI HECC (High End CAN Controller) driver platform header
@@ -41,4 +41,4 @@ struct ti_hecc_platform_data {
 	u32 version;
 	void (*transceiver_switch) (int);
 };
-#endif
+#endif /* !_CAN_PLATFORM_TI_HECC_H */
diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h
index f9bbbb472663..cc00d15c6107 100644
--- a/include/linux/can/skb.h
+++ b/include/linux/can/skb.h
@@ -7,8 +7,8 @@
  *
  */
 
-#ifndef CAN_SKB_H
-#define CAN_SKB_H
+#ifndef _CAN_SKB_H
+#define _CAN_SKB_H
 
 #include <linux/types.h>
 #include <linux/skbuff.h>
@@ -80,4 +80,4 @@ static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb)
 	return skb;
 }
 
-#endif /* CAN_SKB_H */
+#endif /* !_CAN_SKB_H */
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 5d9d1d140718..41892f720057 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -42,8 +42,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_H
-#define CAN_H
+#ifndef _UAPI_CAN_H
+#define _UAPI_CAN_H
 
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -191,4 +191,4 @@ struct can_filter {
 
 #define CAN_INV_FILTER 0x20000000U /* to be set in can_filter.can_id */
 
-#endif /* CAN_H */
+#endif /* !_UAPI_CAN_H */
diff --git a/include/uapi/linux/can/bcm.h b/include/uapi/linux/can/bcm.h
index 382251a1d214..89ddb9dc9bdf 100644
--- a/include/uapi/linux/can/bcm.h
+++ b/include/uapi/linux/can/bcm.h
@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_BCM_H
-#define CAN_BCM_H
+#ifndef _UAPI_CAN_BCM_H
+#define _UAPI_CAN_BCM_H
 
 #include <linux/types.h>
 #include <linux/can.h>
@@ -95,4 +95,4 @@ enum {
 #define TX_RESET_MULTI_IDX  0x0200
 #define RX_RTR_FRAME        0x0400
 
-#endif /* CAN_BCM_H */
+#endif /* !_UAPI_CAN_BCM_H */
diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h
index b63204545320..c247446ab25a 100644
--- a/include/uapi/linux/can/error.h
+++ b/include/uapi/linux/can/error.h
@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_ERROR_H
-#define CAN_ERROR_H
+#ifndef _UAPI_CAN_ERROR_H
+#define _UAPI_CAN_ERROR_H
 
 #define CAN_ERR_DLC 8 /* dlc for error message frames */
 
@@ -120,4 +120,4 @@
 
 /* controller specific additional information / data[5..7] */
 
-#endif /* CAN_ERROR_H */
+#endif /* _UAPI_CAN_ERROR_H */
diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 844c8964bdfe..3e6184cf2f6d 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -41,8 +41,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_GW_H
-#define CAN_GW_H
+#ifndef _UAPI_CAN_GW_H
+#define _UAPI_CAN_GW_H
 
 #include <linux/types.h>
 #include <linux/can.h>
@@ -200,4 +200,4 @@ enum {
  *         Beware of sending unpacked or aligned structs!
  */
 
-#endif
+#endif /* !_UAPI_CAN_GW_H */
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index 7e2e1863db16..813d11f54977 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -15,8 +15,8 @@
  * GNU General Public License for more details.
  */
 
-#ifndef CAN_NETLINK_H
-#define CAN_NETLINK_H
+#ifndef _UAPI_CAN_NETLINK_H
+#define _UAPI_CAN_NETLINK_H
 
 #include <linux/types.h>
 
@@ -130,4 +130,4 @@ enum {
 
 #define IFLA_CAN_MAX	(__IFLA_CAN_MAX - 1)
 
-#endif /* CAN_NETLINK_H */
+#endif /* !_UAPI_CAN_NETLINK_H */
diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index c7d8c334e0ce..78ec76fd89a6 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -42,8 +42,8 @@
  * DAMAGE.
  */
 
-#ifndef CAN_RAW_H
-#define CAN_RAW_H
+#ifndef _UAPI_CAN_RAW_H
+#define _UAPI_CAN_RAW_H
 
 #include <linux/can.h>
 
@@ -59,4 +59,4 @@ enum {
 	CAN_RAW_FD_FRAMES,	/* allow CAN FD frames (default:off) */
 };
 
-#endif
+#endif /* !_UAPI_CAN_RAW_H */
-- 
cgit 


From b69cf53640da2b86439596118cfa95233154ee76 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 14 Mar 2014 10:50:33 +0100
Subject: perf: Fix a race between ring_buffer_detach() and
 ring_buffer_attach()

Alexander noticed that we use RCU iteration on rb->event_list but do
not use list_{add,del}_rcu() to add,remove entries to that list, nor
do we observe proper grace periods when re-using the entries.

Merge ring_buffer_detach() into ring_buffer_attach() such that
attaching to the NULL buffer is detaching.

Furthermore, ensure that between any 'detach' and 'attach' of the same
event we observe the required grace period, but only when strictly
required. In effect this means that only ioctl(.request =
PERF_EVENT_IOC_SET_OUTPUT) will wait for a grace period, while the
normal initial attach and final detach will not be delayed.

This patch should, I think, do the right thing under all
circumstances, the 'normal' cases all should never see the extra grace
period, but the two cases:

 1) PERF_EVENT_IOC_SET_OUTPUT on an event which already has a
    ring_buffer set, will now observe the required grace period between
    removing itself from the old and attaching itself to the new buffer.

    This case is 'simple' in that both buffers are present in
    perf_event_set_output() one could think an unconditional
    synchronize_rcu() would be sufficient; however...

 2) an event that has a buffer attached, the buffer is destroyed
    (munmap) and then the event is attached to a new/different buffer
    using PERF_EVENT_IOC_SET_OUTPUT.

    This case is more complex because the buffer destruction does:
      ring_buffer_attach(.rb = NULL)
    followed by the ioctl() doing:
      ring_buffer_attach(.rb = foo);

    and we still need to observe the grace period between these two
    calls due to us reusing the event->rb_entry list_head.

In order to make 2 happen we use Paul's latest cond_synchronize_rcu()
call.

Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Reported-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140507123526.GD13658@twins.programming.kicks-ass.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/perf_event.h |   2 +
 kernel/events/core.c       | 109 ++++++++++++++++++++-------------------------
 2 files changed, 51 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3356abcfff18..3ef6ea12806a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -402,6 +402,8 @@ struct perf_event {
 
 	struct ring_buffer		*rb;
 	struct list_head		rb_entry;
+	unsigned long			rcu_batches;
+	int				rcu_pending;
 
 	/* poll related */
 	wait_queue_head_t		waitq;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index feb1329ca331..440eefc67397 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3192,7 +3192,8 @@ static void free_event_rcu(struct rcu_head *head)
 }
 
 static void ring_buffer_put(struct ring_buffer *rb);
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
+static void ring_buffer_attach(struct perf_event *event,
+			       struct ring_buffer *rb);
 
 static void unaccount_event_cpu(struct perf_event *event, int cpu)
 {
@@ -3252,8 +3253,6 @@ static void free_event(struct perf_event *event)
 	unaccount_event(event);
 
 	if (event->rb) {
-		struct ring_buffer *rb;
-
 		/*
 		 * Can happen when we close an event with re-directed output.
 		 *
@@ -3261,12 +3260,7 @@ static void free_event(struct perf_event *event)
 		 * over us; possibly making our ring_buffer_put() the last.
 		 */
 		mutex_lock(&event->mmap_mutex);
-		rb = event->rb;
-		if (rb) {
-			rcu_assign_pointer(event->rb, NULL);
-			ring_buffer_detach(event, rb);
-			ring_buffer_put(rb); /* could be last */
-		}
+		ring_buffer_attach(event, NULL);
 		mutex_unlock(&event->mmap_mutex);
 	}
 
@@ -3850,28 +3844,47 @@ unlock:
 static void ring_buffer_attach(struct perf_event *event,
 			       struct ring_buffer *rb)
 {
+	struct ring_buffer *old_rb = NULL;
 	unsigned long flags;
 
-	if (!list_empty(&event->rb_entry))
-		return;
+	if (event->rb) {
+		/*
+		 * Should be impossible, we set this when removing
+		 * event->rb_entry and wait/clear when adding event->rb_entry.
+		 */
+		WARN_ON_ONCE(event->rcu_pending);
 
-	spin_lock_irqsave(&rb->event_lock, flags);
-	if (list_empty(&event->rb_entry))
-		list_add(&event->rb_entry, &rb->event_list);
-	spin_unlock_irqrestore(&rb->event_lock, flags);
-}
+		old_rb = event->rb;
+		event->rcu_batches = get_state_synchronize_rcu();
+		event->rcu_pending = 1;
 
-static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
-{
-	unsigned long flags;
+		spin_lock_irqsave(&old_rb->event_lock, flags);
+		list_del_rcu(&event->rb_entry);
+		spin_unlock_irqrestore(&old_rb->event_lock, flags);
+	}
 
-	if (list_empty(&event->rb_entry))
-		return;
+	if (event->rcu_pending && rb) {
+		cond_synchronize_rcu(event->rcu_batches);
+		event->rcu_pending = 0;
+	}
 
-	spin_lock_irqsave(&rb->event_lock, flags);
-	list_del_init(&event->rb_entry);
-	wake_up_all(&event->waitq);
-	spin_unlock_irqrestore(&rb->event_lock, flags);
+	if (rb) {
+		spin_lock_irqsave(&rb->event_lock, flags);
+		list_add_rcu(&event->rb_entry, &rb->event_list);
+		spin_unlock_irqrestore(&rb->event_lock, flags);
+	}
+
+	rcu_assign_pointer(event->rb, rb);
+
+	if (old_rb) {
+		ring_buffer_put(old_rb);
+		/*
+		 * Since we detached before setting the new rb, so that we
+		 * could attach the new rb, we could have missed a wakeup.
+		 * Provide it now.
+		 */
+		wake_up_all(&event->waitq);
+	}
 }
 
 static void ring_buffer_wakeup(struct perf_event *event)
@@ -3940,7 +3953,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 {
 	struct perf_event *event = vma->vm_file->private_data;
 
-	struct ring_buffer *rb = event->rb;
+	struct ring_buffer *rb = ring_buffer_get(event);
 	struct user_struct *mmap_user = rb->mmap_user;
 	int mmap_locked = rb->mmap_locked;
 	unsigned long size = perf_data_size(rb);
@@ -3948,18 +3961,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	atomic_dec(&rb->mmap_count);
 
 	if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
-		return;
+		goto out_put;
 
-	/* Detach current event from the buffer. */
-	rcu_assign_pointer(event->rb, NULL);
-	ring_buffer_detach(event, rb);
+	ring_buffer_attach(event, NULL);
 	mutex_unlock(&event->mmap_mutex);
 
 	/* If there's still other mmap()s of this buffer, we're done. */
-	if (atomic_read(&rb->mmap_count)) {
-		ring_buffer_put(rb); /* can't be last */
-		return;
-	}
+	if (atomic_read(&rb->mmap_count))
+		goto out_put;
 
 	/*
 	 * No other mmap()s, detach from all other events that might redirect
@@ -3989,11 +3998,9 @@ again:
 		 * still restart the iteration to make sure we're not now
 		 * iterating the wrong list.
 		 */
-		if (event->rb == rb) {
-			rcu_assign_pointer(event->rb, NULL);
-			ring_buffer_detach(event, rb);
-			ring_buffer_put(rb); /* can't be last, we still have one */
-		}
+		if (event->rb == rb)
+			ring_buffer_attach(event, NULL);
+
 		mutex_unlock(&event->mmap_mutex);
 		put_event(event);
 
@@ -4018,6 +4025,7 @@ again:
 	vma->vm_mm->pinned_vm -= mmap_locked;
 	free_uid(mmap_user);
 
+out_put:
 	ring_buffer_put(rb); /* could be last */
 }
 
@@ -4135,7 +4143,6 @@ again:
 	vma->vm_mm->pinned_vm += extra;
 
 	ring_buffer_attach(event, rb);
-	rcu_assign_pointer(event->rb, rb);
 
 	perf_event_init_userpage(event);
 	perf_event_update_userpage(event);
@@ -6934,7 +6941,7 @@ err_size:
 static int
 perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 {
-	struct ring_buffer *rb = NULL, *old_rb = NULL;
+	struct ring_buffer *rb = NULL;
 	int ret = -EINVAL;
 
 	if (!output_event)
@@ -6962,8 +6969,6 @@ set:
 	if (atomic_read(&event->mmap_count))
 		goto unlock;
 
-	old_rb = event->rb;
-
 	if (output_event) {
 		/* get the rb we want to redirect to */
 		rb = ring_buffer_get(output_event);
@@ -6971,23 +6976,7 @@ set:
 			goto unlock;
 	}
 
-	if (old_rb)
-		ring_buffer_detach(event, old_rb);
-
-	if (rb)
-		ring_buffer_attach(event, rb);
-
-	rcu_assign_pointer(event->rb, rb);
-
-	if (old_rb) {
-		ring_buffer_put(old_rb);
-		/*
-		 * Since we detached before setting the new rb, so that we
-		 * could attach the new rb, we could have missed a wakeup.
-		 * Provide it now.
-		 */
-		wake_up_all(&event->waitq);
-	}
+	ring_buffer_attach(event, rb);
 
 	ret = 0;
 unlock:
-- 
cgit 


From 1429d7c9467e1e3de0b0ff91d7e4d67c1a92f8a3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 19 May 2014 09:23:55 -0600
Subject: blk-mq: switch ctx pending map to the sparser blk_align_bitmap

Each hardware queue has a bitmap of software queues with pending
requests. When new IO is queued on a software queue, the bit is
set, and when IO is pruned on a hardware queue run, the bit is
cleared. This causes a lot of traffic. Switch this from the regular
BITS_PER_LONG bitmap to a sparser layout, similarly to what was
done for blk-mq tagging.

20% performance increase was observed for single threaded IO, and
about 15% performanc increase on multiple threads driving the
same device.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 119 +++++++++++++++++++++++++++++++++++++------------
 include/linux/blk-mq.h |  10 ++++-
 2 files changed, 99 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 526feee31bff..e862c4408427 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -56,21 +56,40 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
 {
 	unsigned int i;
 
-	for (i = 0; i < hctx->nr_ctx_map; i++)
-		if (hctx->ctx_map[i])
+	for (i = 0; i < hctx->ctx_map.map_size; i++)
+		if (hctx->ctx_map.map[i].word)
 			return true;
 
 	return false;
 }
 
+static inline struct blk_align_bitmap *get_bm(struct blk_mq_hw_ctx *hctx,
+					      struct blk_mq_ctx *ctx)
+{
+	return &hctx->ctx_map.map[ctx->index_hw / hctx->ctx_map.bits_per_word];
+}
+
+#define CTX_TO_BIT(hctx, ctx)	\
+	((ctx)->index_hw & ((hctx)->ctx_map.bits_per_word - 1))
+
 /*
  * Mark this ctx as having pending work in this hardware queue
  */
 static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
 				     struct blk_mq_ctx *ctx)
 {
-	if (!test_bit(ctx->index_hw, hctx->ctx_map))
-		set_bit(ctx->index_hw, hctx->ctx_map);
+	struct blk_align_bitmap *bm = get_bm(hctx, ctx);
+
+	if (!test_bit(CTX_TO_BIT(hctx, ctx), &bm->word))
+		set_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
+}
+
+static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
+				      struct blk_mq_ctx *ctx)
+{
+	struct blk_align_bitmap *bm = get_bm(hctx, ctx);
+
+	clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
 }
 
 static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
@@ -614,6 +633,40 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
 	return false;
 }
 
+/*
+ * Process software queues that have been marked busy, splicing them
+ * to the for-dispatch
+ */
+static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list)
+{
+	struct blk_mq_ctx *ctx;
+	int i;
+
+	for (i = 0; i < hctx->ctx_map.map_size; i++) {
+		struct blk_align_bitmap *bm = &hctx->ctx_map.map[i];
+		unsigned int off, bit;
+
+		if (!bm->word)
+			continue;
+
+		bit = 0;
+		off = i * hctx->ctx_map.bits_per_word;
+		do {
+			bit = find_next_bit(&bm->word, bm->depth, bit);
+			if (bit >= bm->depth)
+				break;
+
+			ctx = hctx->ctxs[bit + off];
+			clear_bit(bit, &bm->word);
+			spin_lock(&ctx->lock);
+			list_splice_tail_init(&ctx->rq_list, list);
+			spin_unlock(&ctx->lock);
+
+			bit++;
+		} while (1);
+	}
+}
+
 /*
  * Run this hardware queue, pulling any software queues mapped to it in.
  * Note that this function currently has various problems around ordering
@@ -623,10 +676,9 @@ static bool blk_mq_attempt_merge(struct request_queue *q,
 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
-	struct blk_mq_ctx *ctx;
 	struct request *rq;
 	LIST_HEAD(rq_list);
-	int bit, queued;
+	int queued;
 
 	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
 
@@ -638,14 +690,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	/*
 	 * Touch any software queue that has pending entries.
 	 */
-	for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) {
-		clear_bit(bit, hctx->ctx_map);
-		ctx = hctx->ctxs[bit];
-
-		spin_lock(&ctx->lock);
-		list_splice_tail_init(&ctx->rq_list, &rq_list);
-		spin_unlock(&ctx->lock);
-	}
+	flush_busy_ctxs(hctx, &rq_list);
 
 	/*
 	 * If we have previous entries on our dispatch list, grab them
@@ -658,14 +703,10 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		spin_unlock(&hctx->lock);
 	}
 
-	/*
-	 * Delete and return all entries from our dispatch list
-	 */
-	queued = 0;
-
 	/*
 	 * Now process all the entries, sending them to the driver.
 	 */
+	queued = 0;
 	while (!list_empty(&rq_list)) {
 		int ret;
 
@@ -1158,7 +1199,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	spin_lock(&ctx->lock);
 	if (!list_empty(&ctx->rq_list)) {
 		list_splice_init(&ctx->rq_list, &tmp);
-		clear_bit(ctx->index_hw, hctx->ctx_map);
+		blk_mq_hctx_clear_pending(hctx, ctx);
 	}
 	spin_unlock(&ctx->lock);
 
@@ -1298,6 +1339,34 @@ fail:
 	return NULL;
 }
 
+static void blk_mq_free_bitmap(struct blk_mq_ctxmap *bitmap)
+{
+	kfree(bitmap->map);
+}
+
+static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
+{
+	unsigned int bpw = 8, total, num_maps, i;
+
+	bitmap->bits_per_word = bpw;
+
+	num_maps = ALIGN(nr_cpu_ids, bpw) / bpw;
+	bitmap->map = kzalloc_node(num_maps * sizeof(struct blk_align_bitmap),
+					GFP_KERNEL, node);
+	if (!bitmap->map)
+		return -ENOMEM;
+
+	bitmap->map_size = num_maps;
+
+	total = nr_cpu_ids;
+	for (i = 0; i < num_maps; i++) {
+		bitmap->map[i].depth = min(total, bitmap->bits_per_word);
+		total -= bitmap->map[i].depth;
+	}
+
+	return 0;
+}
+
 static int blk_mq_init_hw_queues(struct request_queue *q,
 		struct blk_mq_tag_set *set)
 {
@@ -1308,7 +1377,6 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 	 * Initialize hardware queues
 	 */
 	queue_for_each_hw_ctx(q, hctx, i) {
-		unsigned int num_maps;
 		int node;
 
 		node = hctx->numa_node;
@@ -1339,13 +1407,9 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 		if (!hctx->ctxs)
 			break;
 
-		num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG;
-		hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long),
-						GFP_KERNEL, node);
-		if (!hctx->ctx_map)
+		if (blk_mq_alloc_bitmap(&hctx->ctx_map, node))
 			break;
 
-		hctx->nr_ctx_map = num_maps;
 		hctx->nr_ctx = 0;
 
 		if (set->ops->init_hctx &&
@@ -1368,7 +1432,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q,
 
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
 		kfree(hctx->ctxs);
-		kfree(hctx->ctx_map);
+		blk_mq_free_bitmap(&hctx->ctx_map);
 	}
 
 	return 1;
@@ -1542,7 +1606,6 @@ void blk_mq_free_queue(struct request_queue *q)
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		kfree(hctx->ctx_map);
 		kfree(hctx->ctxs);
 		blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
 		if (q->mq_ops->exit_hctx)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f83d15f6e1c1..952e558ee598 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -11,6 +11,12 @@ struct blk_mq_cpu_notifier {
 	void (*notify)(void *data, unsigned long action, unsigned int cpu);
 };
 
+struct blk_mq_ctxmap {
+	unsigned int map_size;
+	unsigned int bits_per_word;
+	struct blk_align_bitmap *map;
+};
+
 struct blk_mq_hw_ctx {
 	struct {
 		spinlock_t		lock;
@@ -31,8 +37,8 @@ struct blk_mq_hw_ctx {
 
 	void			*driver_data;
 
-	unsigned int 		nr_ctx_map;
-	unsigned long		*ctx_map;
+	struct blk_mq_ctxmap	ctx_map;
+
 	unsigned int		nr_ctx;
 	struct blk_mq_ctx	**ctxs;
 
-- 
cgit 


From 61f38db3e3c0e4c3be0858750e2cabeadaecac0c Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 26 Apr 2014 23:15:35 -0700
Subject: rcu: Provide API to suppress stall warnings while sysrc runs

Some sysrq handlers can run for a long time, because they dump a lot
of data onto a serial console. Having RCU stall warnings pop up in
the middle of them only makes the problem worse.

This commit provides rcu_sysrq_start() and rcu_sysrq_end() APIs to
temporarily suppress RCU CPU stall warnings while a sysrq request is
handled.

Signed-off-by: Rik van Riel <riel@redhat.com>
[ paulmck: Fix TINY_RCU build error. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 12 ++++++++++++
 kernel/rcu/update.c      | 12 ++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9ccd644c1234..5a75d19aa661 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -248,6 +248,18 @@ void rcu_idle_exit(void);
 void rcu_irq_enter(void);
 void rcu_irq_exit(void);
 
+#ifdef CONFIG_RCU_STALL_COMMON
+void rcu_sysrq_start(void);
+void rcu_sysrq_end(void);
+#else /* #ifdef CONFIG_RCU_STALL_COMMON */
+static inline void rcu_sysrq_start(void)
+{
+}
+static inline void rcu_sysrq_end(void)
+{
+}
+#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
+
 #ifdef CONFIG_RCU_USER_QS
 void rcu_user_enter(void);
 void rcu_user_exit(void);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index ed7a0d72562c..a2aeb4df0f60 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -320,6 +320,18 @@ int rcu_jiffies_till_stall_check(void)
 	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
 }
 
+void rcu_sysrq_start(void)
+{
+	if (!rcu_cpu_stall_suppress)
+		rcu_cpu_stall_suppress = 2;
+}
+
+void rcu_sysrq_end(void)
+{
+	if (rcu_cpu_stall_suppress == 2)
+		rcu_cpu_stall_suppress = 0;
+}
+
 static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
 {
 	rcu_cpu_stall_suppress = 1;
-- 
cgit 


From 5533e0114425dcdb878f11b291f2727af8667a7c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 14 May 2014 19:33:07 -0400
Subject: cgroup: disallow debug controller on the default hierarchy

The debug controller, as its name suggests, exposes cgroup core
internals to userland to aid debugging.  Unfortunately, except for the
name, there's no provision to prevent its usage in production
configurations and the controller is widely enabled and mounted
leaking internal details to userland.  Like most other debug
information, the information exposed by debug isn't interesting even
for debugging itself once the related parts are working reliably.

This controller has no reason for existing.  This patch implements
cgrp_dfl_root_inhibit_ss_mask which can suppress specific subsystems
on the default hierarchy and adds the debug subsystem to it so that it
can be gradually deprecated as usages move towards the unified
hierarchy.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h        |  2 ++
 include/linux/cgroup_subsys.h | 11 +++++++----
 kernel/cgroup.c               | 21 ++++++++++++++++++---
 3 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 4afe544d3547..8a111dd42d7a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -305,6 +305,8 @@ enum {
 	 *   the flag is not created.
 	 *
 	 * - blkcg: blk-throttle becomes properly hierarchical.
+	 *
+	 * - debug: disallowed on the default hierarchy.
 	 */
 	CGRP_ROOT_SANE_BEHAVIOR	= (1 << 0),
 
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 768fe44e19f0..98c4f9b12b03 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -7,10 +7,6 @@
 SUBSYS(cpuset)
 #endif
 
-#if IS_ENABLED(CONFIG_CGROUP_DEBUG)
-SUBSYS(debug)
-#endif
-
 #if IS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu)
 #endif
@@ -50,6 +46,13 @@ SUBSYS(net_prio)
 #if IS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
+
+/*
+ * The following subsystems are not supported on the default hierarchy.
+ */
+#if IS_ENABLED(CONFIG_CGROUP_DEBUG)
+SUBSYS(debug)
+#endif
 /*
  * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 082bb842b11a..a5f75ac4e793 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -148,6 +148,13 @@ struct cgroup_root cgrp_dfl_root;
  */
 static bool cgrp_dfl_root_visible;
 
+/* some controllers are not supported in the default hierarchy */
+static const unsigned int cgrp_dfl_root_inhibit_ss_mask = 0
+#ifdef CONFIG_CGROUP_DEBUG
+	| (1 << debug_cgrp_id)
+#endif
+	;
+
 /* The list of hierarchy roots */
 
 static LIST_HEAD(cgroup_roots);
@@ -1126,6 +1133,7 @@ static void cgroup_clear_dir(struct cgroup *cgrp, unsigned int subsys_mask)
 static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 {
 	struct cgroup_subsys *ss;
+	unsigned int tmp_ss_mask;
 	int ssid, i, ret;
 
 	lockdep_assert_held(&cgroup_mutex);
@@ -1143,7 +1151,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
 			return -EBUSY;
 	}
 
-	ret = cgroup_populate_dir(&dst_root->cgrp, ss_mask);
+	/* skip creating root files on dfl_root for inhibited subsystems */
+	tmp_ss_mask = ss_mask;
+	if (dst_root == &cgrp_dfl_root)
+		tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask;
+
+	ret = cgroup_populate_dir(&dst_root->cgrp, tmp_ss_mask);
 	if (ret) {
 		if (dst_root != &cgrp_dfl_root)
 			return ret;
@@ -2426,7 +2439,8 @@ static int cgroup_root_controllers_show(struct seq_file *seq, void *v)
 {
 	struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask);
+	cgroup_print_ss_mask(seq, cgrp->root->subsys_mask &
+			     ~cgrp_dfl_root_inhibit_ss_mask);
 	return 0;
 }
 
@@ -2564,7 +2578,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		if (tok[0] == '\0')
 			continue;
 		for_each_subsys(ss, ssid) {
-			if (ss->disabled || strcmp(tok + 1, ss->name))
+			if (ss->disabled || strcmp(tok + 1, ss->name) ||
+			    ((1 << ss->id) & cgrp_dfl_root_inhibit_ss_mask))
 				continue;
 
 			if (*tok == '+') {
-- 
cgit 


From 4f4aa2ec24dc45881849833a439558d3a378028c Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 18 May 2014 00:22:38 +0200
Subject: ssb: sprom: add dev_id field for value overriding standard ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some devices may have different features despite sharing the same ID
(e.g. PCI ID). For example 14e4:4331 is usually a dual band, but this
can be "limited". Device with "pci/x/y/devid=0x4332" supports 2.4 GHz
only. Similarly 0x4333 will mean support for 5 GHz only.
Add entry in SPROM so info described above can be extracted and stored.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/sprom.c | 1 +
 include/linux/ssb/ssb.h   | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index a8b5408dd349..da4cdb16844e 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c
@@ -168,6 +168,7 @@ static void nvram_read_alpha2(const char *prefix, const char *name,
 static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom,
 					const char *prefix, bool fallback)
 {
+	nvram_read_u16(prefix, NULL, "devid", &sprom->dev_id, 0, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh0", &sprom->gpio0, 0xff, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh1", &sprom->gpio1, 0xff, fallback);
 	nvram_read_u8(prefix, NULL, "ledbh2", &sprom->gpio2, 0xff, fallback);
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 07ef9b82b66d..4568a5cc9ab8 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -33,6 +33,7 @@ struct ssb_sprom {
 	u8 et1phyaddr;		/* MII address for enet1 */
 	u8 et0mdcport;		/* MDIO for enet0 */
 	u8 et1mdcport;		/* MDIO for enet1 */
+	u16 dev_id;		/* Device ID overriding e.g. PCI ID */
 	u16 board_rev;		/* Board revision number from SPROM. */
 	u16 board_num;		/* Board number from SPROM. */
 	u16 board_type;		/* Board type from SPROM. */
-- 
cgit 


From 8d9e9857c576d8d710ae6a6152a6ddcd29772bb1 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Mon, 19 May 2014 14:34:09 +0100
Subject: goldfish: fix >> 32 warning

We should be checking for a 64bit platform not 64bit DMA address types in
the case of Goldfish. The Goldfish virtual platform is either 32/32 or
64/64.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/goldfish.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/goldfish.h b/include/linux/goldfish.h
index 9cc28902b54c..569236e6b2bc 100644
--- a/include/linux/goldfish.h
+++ b/include/linux/goldfish.h
@@ -7,7 +7,7 @@ static inline void gf_write64(unsigned long data,
 		void __iomem *portl, void __iomem *porth)
 {
 	writel((u32)data, portl);
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+#ifdef CONFIG_64BIT
 	writel(data>>32, porth);
 #endif
 }
-- 
cgit 


From 5080a08d0f8a4b2ba3a15e5ddc5ece84a444cad8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 21 Mar 2014 10:46:39 +0100
Subject: mmc: mmci: Enforce max frequency configuration through DT

Remove the option to provide a maximum frequency as platform data,
enforce it through DT.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmci.c   | 5 +----
 include/linux/amba/mmci.h | 4 ----
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 9c60325f1a30..758efea184c9 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1500,13 +1500,10 @@ static int mmci_probe(struct amba_device *dev,
 	 * If no maximum operating frequency is supplied, fall back to use
 	 * the module parameter, which has a (low) default value in case it
 	 * is not specified. Either value must not exceed the clock rate into
-	 * the block, of course. Also note that DT takes precedence over
-	 * platform data.
+	 * the block, of course.
 	 */
 	if (mmc->f_max)
 		mmc->f_max = min(host->mclk, mmc->f_max);
-	else if (plat->f_max)
-		mmc->f_max = min(host->mclk, plat->f_max);
 	else
 		mmc->f_max = min(host->mclk, fmax);
 	dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index b992fc931295..3f95d32d5277 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -12,9 +12,6 @@ struct dma_chan;
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
  * (also known as PL180) block.
- * @f_max: the maximum operational frequency for this host in this
- * platform configuration. When this is specified it takes precedence
- * over the module parameter for the same frequency.
  * @ocr_mask: available voltages on the 4 pins from the block, this
  * is ignored if a regulator is used, see the MMC_VDD_* masks in
  * mmc/host.h
@@ -42,7 +39,6 @@ struct dma_chan;
  * bidirectional channel
  */
 struct mmci_platform_data {
-	unsigned int f_max;
 	unsigned int ocr_mask;
 	int (*ios_handler)(struct device *, struct mmc_ios *);
 	unsigned int (*status)(struct device *);
-- 
cgit 


From f1af9d3af308145478749194346f11efad1134b2 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Wed, 29 Jan 2014 18:16:01 +0100
Subject: mm/memblock: Do some refactoring, enhance API

Refactor the memblock code and extend the memblock API to make it
more flexible. With the extended API it is simple to define and
work with additional memory lists.

The static functions memblock_add_region and __memblock_remove are
renamed to memblock_add_range and meblock_remove_range and added to
the memblock API.

The __next_free_mem_range and __next_free_mem_range_rev functions
are replaced with calls to the more generic list walkers
__next_mem_range and __next_mem_range_rev.

To walk an arbitrary memory list two new macros for_each_mem_range
and for_each_mem_range_rev are added. These new macros are used
to define for_each_free_mem_range and for_each_free_mem_range_reverse.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/memblock.h |  75 ++++++++++++++----
 mm/memblock.c            | 193 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 183 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8a20a51ed42d..f669016874b3 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -71,6 +71,63 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
 int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size);
+
+/* Low level functions */
+int memblock_add_range(struct memblock_type *type,
+		       phys_addr_t base, phys_addr_t size,
+		       int nid, unsigned long flags);
+
+int memblock_remove_range(struct memblock_type *type,
+			  phys_addr_t base,
+			  phys_addr_t size);
+
+void __next_mem_range(u64 *idx, int nid, struct memblock_type *type_a,
+		      struct memblock_type *type_b, phys_addr_t *out_start,
+		      phys_addr_t *out_end, int *out_nid);
+
+void __next_mem_range_rev(u64 *idx, int nid, struct memblock_type *type_a,
+			  struct memblock_type *type_b, phys_addr_t *out_start,
+			  phys_addr_t *out_end, int *out_nid);
+
+/**
+ * for_each_mem_range - iterate through memblock areas from type_a and not
+ * included in type_b. Or just type_a if type_b is NULL.
+ * @i: u64 used as loop variable
+ * @type_a: ptr to memblock_type to iterate
+ * @type_b: ptr to memblock_type which excludes from the iteration
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ */
+#define for_each_mem_range(i, type_a, type_b, nid,			\
+			   p_start, p_end, p_nid)			\
+	for (i = 0, __next_mem_range(&i, nid, type_a, type_b,		\
+				     p_start, p_end, p_nid);		\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_range(&i, nid, type_a, type_b,			\
+			      p_start, p_end, p_nid))
+
+/**
+ * for_each_mem_range_rev - reverse iterate through memblock areas from
+ * type_a and not included in type_b. Or just type_a if type_b is NULL.
+ * @i: u64 used as loop variable
+ * @type_a: ptr to memblock_type to iterate
+ * @type_b: ptr to memblock_type which excludes from the iteration
+ * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
+ * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
+ * @p_nid: ptr to int for nid of the range, can be %NULL
+ */
+#define for_each_mem_range_rev(i, type_a, type_b, nid,			\
+			       p_start, p_end, p_nid)			\
+	for (i = (u64)ULLONG_MAX,					\
+		     __next_mem_range_rev(&i, nid, type_a, type_b,	\
+					 p_start, p_end, p_nid);	\
+	     i != (u64)ULLONG_MAX;					\
+	     __next_mem_range_rev(&i, nid, type_a, type_b,		\
+				  p_start, p_end, p_nid))
+
 #ifdef CONFIG_MOVABLE_NODE
 static inline bool memblock_is_hotpluggable(struct memblock_region *m)
 {
@@ -113,9 +170,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	     i >= 0; __next_mem_pfn_range(&i, nid, p_start, p_end, p_nid))
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
-			   phys_addr_t *out_end, int *out_nid);
-
 /**
  * for_each_free_mem_range - iterate through free memblock areas
  * @i: u64 used as loop variable
@@ -128,13 +182,8 @@ void __next_free_mem_range(u64 *idx, int nid, phys_addr_t *out_start,
  * soon as memblock is initialized.
  */
 #define for_each_free_mem_range(i, nid, p_start, p_end, p_nid)		\
-	for (i = 0,							\
-	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid);	\
-	     i != (u64)ULLONG_MAX;					\
-	     __next_free_mem_range(&i, nid, p_start, p_end, p_nid))
-
-void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
-			       phys_addr_t *out_end, int *out_nid);
+	for_each_mem_range(i, &memblock.memory, &memblock.reserved,	\
+			   nid, p_start, p_end, p_nid)
 
 /**
  * for_each_free_mem_range_reverse - rev-iterate through free memblock areas
@@ -148,10 +197,8 @@ void __next_free_mem_range_rev(u64 *idx, int nid, phys_addr_t *out_start,
  * order.  Available as soon as memblock is initialized.
  */
 #define for_each_free_mem_range_reverse(i, nid, p_start, p_end, p_nid)	\
-	for (i = (u64)ULLONG_MAX,					\
-	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid);	\
-	     i != (u64)ULLONG_MAX;					\
-	     __next_free_mem_range_rev(&i, nid, p_start, p_end, p_nid))
+	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
+			       nid, p_start, p_end, p_nid)
 
 static inline void memblock_set_region_flags(struct memblock_region *r,
 					     unsigned long flags)
diff --git a/mm/memblock.c b/mm/memblock.c
index e9d6ca9a01a9..9edd0928daab 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -472,7 +472,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
 }
 
 /**
- * memblock_add_region - add new memblock region
+ * memblock_add_range - add new memblock region
  * @type: memblock type to add new region into
  * @base: base address of the new region
  * @size: size of the new region
@@ -487,7 +487,7 @@ static void __init_memblock memblock_insert_region(struct memblock_type *type,
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-static int __init_memblock memblock_add_region(struct memblock_type *type,
+int __init_memblock memblock_add_range(struct memblock_type *type,
 				phys_addr_t base, phys_addr_t size,
 				int nid, unsigned long flags)
 {
@@ -569,12 +569,12 @@ repeat:
 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
 				       int nid)
 {
-	return memblock_add_region(&memblock.memory, base, size, nid, 0);
+	return memblock_add_range(&memblock.memory, base, size, nid, 0);
 }
 
 int __init_memblock memblock_add(phys_addr_t base, phys_addr_t size)
 {
-	return memblock_add_region(&memblock.memory, base, size,
+	return memblock_add_range(&memblock.memory, base, size,
 				   MAX_NUMNODES, 0);
 }
 
@@ -654,8 +654,8 @@ static int __init_memblock memblock_isolate_range(struct memblock_type *type,
 	return 0;
 }
 
-static int __init_memblock __memblock_remove(struct memblock_type *type,
-					     phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_remove_range(struct memblock_type *type,
+					  phys_addr_t base, phys_addr_t size)
 {
 	int start_rgn, end_rgn;
 	int i, ret;
@@ -671,9 +671,10 @@ static int __init_memblock __memblock_remove(struct memblock_type *type,
 
 int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 {
-	return __memblock_remove(&memblock.memory, base, size);
+	return memblock_remove_range(&memblock.memory, base, size);
 }
 
+
 int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 {
 	memblock_dbg("   memblock_free: [%#016llx-%#016llx] %pF\n",
@@ -681,7 +682,7 @@ int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
 		     (unsigned long long)base + size - 1,
 		     (void *)_RET_IP_);
 
-	return __memblock_remove(&memblock.reserved, base, size);
+	return memblock_remove_range(&memblock.reserved, base, size);
 }
 
 static int __init_memblock memblock_reserve_region(phys_addr_t base,
@@ -696,7 +697,7 @@ static int __init_memblock memblock_reserve_region(phys_addr_t base,
 		     (unsigned long long)base + size - 1,
 		     flags, (void *)_RET_IP_);
 
-	return memblock_add_region(_rgn, base, size, nid, flags);
+	return memblock_add_range(_rgn, base, size, nid, flags);
 }
 
 int __init_memblock memblock_reserve(phys_addr_t base, phys_addr_t size)
@@ -758,17 +759,19 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
 }
 
 /**
- * __next_free_mem_range - next function for for_each_free_mem_range()
+ * __next__mem_range - next function for for_each_free_mem_range() etc.
  * @idx: pointer to u64 loop variable
  * @nid: node selector, %NUMA_NO_NODE for all nodes
+ * @type_a: pointer to memblock_type from where the range is taken
+ * @type_b: pointer to memblock_type which excludes memory from being taken
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
- * Find the first free area from *@idx which matches @nid, fill the out
+ * Find the first area from *@idx which matches @nid, fill the out
  * parameters, and update *@idx for the next iteration.  The lower 32bit of
- * *@idx contains index into memory region and the upper 32bit indexes the
- * areas before each reserved region.  For example, if reserved regions
+ * *@idx contains index into type_a and the upper 32bit indexes the
+ * areas before each region in type_b.	For example, if type_b regions
  * look like the following,
  *
  *	0:[0-16), 1:[32-48), 2:[128-130)
@@ -780,53 +783,77 @@ int __init_memblock memblock_clear_hotplug(phys_addr_t base, phys_addr_t size)
  * As both region arrays are sorted, the function advances the two indices
  * in lockstep and returns each intersection.
  */
-void __init_memblock __next_free_mem_range(u64 *idx, int nid,
-					   phys_addr_t *out_start,
-					   phys_addr_t *out_end, int *out_nid)
+void __init_memblock __next_mem_range(u64 *idx, int nid,
+				      struct memblock_type *type_a,
+				      struct memblock_type *type_b,
+				      phys_addr_t *out_start,
+				      phys_addr_t *out_end, int *out_nid)
 {
-	struct memblock_type *mem = &memblock.memory;
-	struct memblock_type *rsv = &memblock.reserved;
-	int mi = *idx & 0xffffffff;
-	int ri = *idx >> 32;
+	int idx_a = *idx & 0xffffffff;
+	int idx_b = *idx >> 32;
 
-	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
+	if (WARN_ONCE(nid == MAX_NUMNODES,
+	"Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
 		nid = NUMA_NO_NODE;
 
-	for ( ; mi < mem->cnt; mi++) {
-		struct memblock_region *m = &mem->regions[mi];
+	for (; idx_a < type_a->cnt; idx_a++) {
+		struct memblock_region *m = &type_a->regions[idx_a];
+
 		phys_addr_t m_start = m->base;
 		phys_addr_t m_end = m->base + m->size;
+		int	    m_nid = memblock_get_region_node(m);
 
 		/* only memory regions are associated with nodes, check it */
-		if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
+		if (nid != NUMA_NO_NODE && nid != m_nid)
 			continue;
 
-		/* scan areas before each reservation for intersection */
-		for ( ; ri < rsv->cnt + 1; ri++) {
-			struct memblock_region *r = &rsv->regions[ri];
-			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
-			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
+		if (!type_b) {
+			if (out_start)
+				*out_start = m_start;
+			if (out_end)
+				*out_end = m_end;
+			if (out_nid)
+				*out_nid = m_nid;
+			idx_a++;
+			*idx = (u32)idx_a | (u64)idx_b << 32;
+			return;
+		}
+
+		/* scan areas before each reservation */
+		for (; idx_b < type_b->cnt + 1; idx_b++) {
+			struct memblock_region *r;
+			phys_addr_t r_start;
+			phys_addr_t r_end;
+
+			r = &type_b->regions[idx_b];
+			r_start = idx_b ? r[-1].base + r[-1].size : 0;
+			r_end = idx_b < type_b->cnt ?
+				r->base : ULLONG_MAX;
 
-			/* if ri advanced past mi, break out to advance mi */
+			/*
+			 * if idx_b advanced past idx_a,
+			 * break out to advance idx_a
+			 */
 			if (r_start >= m_end)
 				break;
 			/* if the two regions intersect, we're done */
 			if (m_start < r_end) {
 				if (out_start)
-					*out_start = max(m_start, r_start);
+					*out_start =
+						max(m_start, r_start);
 				if (out_end)
 					*out_end = min(m_end, r_end);
 				if (out_nid)
-					*out_nid = memblock_get_region_node(m);
+					*out_nid = m_nid;
 				/*
-				 * The region which ends first is advanced
-				 * for the next iteration.
+				 * The region which ends first is
+				 * advanced for the next iteration.
 				 */
 				if (m_end <= r_end)
-					mi++;
+					idx_a++;
 				else
-					ri++;
-				*idx = (u32)mi | (u64)ri << 32;
+					idx_b++;
+				*idx = (u32)idx_a | (u64)idx_b << 32;
 				return;
 			}
 		}
@@ -837,57 +864,80 @@ void __init_memblock __next_free_mem_range(u64 *idx, int nid,
 }
 
 /**
- * __next_free_mem_range_rev - next function for for_each_free_mem_range_reverse()
+ * __next_mem_range_rev - generic next function for for_each_*_range_rev()
+ *
+ * Finds the next range from type_a which is not marked as unsuitable
+ * in type_b.
+ *
  * @idx: pointer to u64 loop variable
  * @nid: nid: node selector, %NUMA_NO_NODE for all nodes
+ * @type_a: pointer to memblock_type from where the range is taken
+ * @type_b: pointer to memblock_type which excludes memory from being taken
  * @out_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @out_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @out_nid: ptr to int for nid of the range, can be %NULL
  *
- * Reverse of __next_free_mem_range().
- *
- * Linux kernel cannot migrate pages used by itself. Memory hotplug users won't
- * be able to hot-remove hotpluggable memory used by the kernel. So this
- * function skip hotpluggable regions if needed when allocating memory for the
- * kernel.
+ * Reverse of __next_mem_range().
  */
-void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
-					   phys_addr_t *out_start,
-					   phys_addr_t *out_end, int *out_nid)
+void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
+					  struct memblock_type *type_a,
+					  struct memblock_type *type_b,
+					  phys_addr_t *out_start,
+					  phys_addr_t *out_end, int *out_nid)
 {
-	struct memblock_type *mem = &memblock.memory;
-	struct memblock_type *rsv = &memblock.reserved;
-	int mi = *idx & 0xffffffff;
-	int ri = *idx >> 32;
+	int idx_a = *idx & 0xffffffff;
+	int idx_b = *idx >> 32;
 
 	if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
 		nid = NUMA_NO_NODE;
 
 	if (*idx == (u64)ULLONG_MAX) {
-		mi = mem->cnt - 1;
-		ri = rsv->cnt;
+		idx_a = type_a->cnt - 1;
+		idx_b = type_b->cnt;
 	}
 
-	for ( ; mi >= 0; mi--) {
-		struct memblock_region *m = &mem->regions[mi];
+	for (; idx_a >= 0; idx_a--) {
+		struct memblock_region *m = &type_a->regions[idx_a];
+
 		phys_addr_t m_start = m->base;
 		phys_addr_t m_end = m->base + m->size;
+		int m_nid = memblock_get_region_node(m);
 
 		/* only memory regions are associated with nodes, check it */
-		if (nid != NUMA_NO_NODE && nid != memblock_get_region_node(m))
+		if (nid != NUMA_NO_NODE && nid != m_nid)
 			continue;
 
 		/* skip hotpluggable memory regions if needed */
 		if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
 			continue;
 
-		/* scan areas before each reservation for intersection */
-		for ( ; ri >= 0; ri--) {
-			struct memblock_region *r = &rsv->regions[ri];
-			phys_addr_t r_start = ri ? r[-1].base + r[-1].size : 0;
-			phys_addr_t r_end = ri < rsv->cnt ? r->base : ULLONG_MAX;
+		if (!type_b) {
+			if (out_start)
+				*out_start = m_start;
+			if (out_end)
+				*out_end = m_end;
+			if (out_nid)
+				*out_nid = m_nid;
+			idx_a++;
+			*idx = (u32)idx_a | (u64)idx_b << 32;
+			return;
+		}
+
+		/* scan areas before each reservation */
+		for (; idx_b >= 0; idx_b--) {
+			struct memblock_region *r;
+			phys_addr_t r_start;
+			phys_addr_t r_end;
+
+			r = &type_b->regions[idx_b];
+			r_start = idx_b ? r[-1].base + r[-1].size : 0;
+			r_end = idx_b < type_b->cnt ?
+				r->base : ULLONG_MAX;
+			/*
+			 * if idx_b advanced past idx_a,
+			 * break out to advance idx_a
+			 */
 
-			/* if ri advanced past mi, break out to advance mi */
 			if (r_end <= m_start)
 				break;
 			/* if the two regions intersect, we're done */
@@ -897,18 +947,17 @@ void __init_memblock __next_free_mem_range_rev(u64 *idx, int nid,
 				if (out_end)
 					*out_end = min(m_end, r_end);
 				if (out_nid)
-					*out_nid = memblock_get_region_node(m);
-
+					*out_nid = m_nid;
 				if (m_start >= r_start)
-					mi--;
+					idx_a--;
 				else
-					ri--;
-				*idx = (u32)mi | (u64)ri << 32;
+					idx_b--;
+				*idx = (u32)idx_a | (u64)idx_b << 32;
 				return;
 			}
 		}
 	}
-
+	/* signal end of iteration */
 	*idx = ULLONG_MAX;
 }
 
@@ -1201,7 +1250,7 @@ void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
 		     __func__, (u64)base, (u64)base + size - 1,
 		     (void *)_RET_IP_);
 	kmemleak_free_part(__va(base), size);
-	__memblock_remove(&memblock.reserved, base, size);
+	memblock_remove_range(&memblock.reserved, base, size);
 }
 
 /*
@@ -1287,8 +1336,10 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
 	}
 
 	/* truncate both memory and reserved regions */
-	__memblock_remove(&memblock.memory, max_addr, (phys_addr_t)ULLONG_MAX);
-	__memblock_remove(&memblock.reserved, max_addr, (phys_addr_t)ULLONG_MAX);
+	memblock_remove_range(&memblock.memory, max_addr,
+			      (phys_addr_t)ULLONG_MAX);
+	memblock_remove_range(&memblock.reserved, max_addr,
+			      (phys_addr_t)ULLONG_MAX);
 }
 
 static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
-- 
cgit 


From 70210ed950b538ee7eb811dccc402db9df1c9be4 Mon Sep 17 00:00:00 2001
From: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Date: Wed, 29 Jan 2014 18:16:01 +0100
Subject: mm/memblock: add physical memory list

Add the physmem list to the memblock structure. This list only exists
if HAVE_MEMBLOCK_PHYS_MAP is selected and contains the unmodified
list of physically available memory. It differs from the memblock
memory list as it always contains all memory ranges even if the
memory has been restricted, e.g. by use of the mem= kernel parameter.

Signed-off-by: Philipp Hachtmann <phacht@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/memblock.h |  4 ++++
 mm/Kconfig               |  3 +++
 mm/memblock.c            | 12 ++++++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index f669016874b3..73dc382e72d8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -18,6 +18,7 @@
 #include <linux/mm.h>
 
 #define INIT_MEMBLOCK_REGIONS	128
+#define INIT_PHYSMEM_REGIONS	4
 
 /* Definition of memblock flags. */
 #define MEMBLOCK_HOTPLUG	0x1	/* hotpluggable region */
@@ -43,6 +44,9 @@ struct memblock {
 	phys_addr_t current_limit;
 	struct memblock_type memory;
 	struct memblock_type reserved;
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+	struct memblock_type physmem;
+#endif
 };
 
 extern struct memblock memblock;
diff --git a/mm/Kconfig b/mm/Kconfig
index 1b5a95f0fa01..28cec518f4d4 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -134,6 +134,9 @@ config HAVE_MEMBLOCK
 config HAVE_MEMBLOCK_NODE_MAP
 	boolean
 
+config HAVE_MEMBLOCK_PHYS_MAP
+	boolean
+
 config ARCH_DISCARD_MEMBLOCK
 	boolean
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 9edd0928daab..a810ba923cdd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -27,6 +27,9 @@
 
 static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
 static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS] __initdata_memblock;
+#endif
 
 struct memblock memblock __initdata_memblock = {
 	.memory.regions		= memblock_memory_init_regions,
@@ -37,6 +40,12 @@ struct memblock memblock __initdata_memblock = {
 	.reserved.cnt		= 1,	/* empty dummy entry */
 	.reserved.max		= INIT_MEMBLOCK_REGIONS,
 
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+	.physmem.regions	= memblock_physmem_init_regions,
+	.physmem.cnt		= 1,	/* empty dummy entry */
+	.physmem.max		= INIT_PHYSMEM_REGIONS,
+#endif
+
 	.bottom_up		= false,
 	.current_limit		= MEMBLOCK_ALLOC_ANYWHERE,
 };
@@ -1553,6 +1562,9 @@ static int __init memblock_init_debugfs(void)
 		return -ENXIO;
 	debugfs_create_file("memory", S_IRUGO, root, &memblock.memory, &memblock_debug_fops);
 	debugfs_create_file("reserved", S_IRUGO, root, &memblock.reserved, &memblock_debug_fops);
+#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
+	debugfs_create_file("physmem", S_IRUGO, root, &memblock.physmem, &memblock_debug_fops);
+#endif
 
 	return 0;
 }
-- 
cgit 


From 4cf563c5d97c83d4b2fb3a778dd7d5e362cc3e34 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 15 May 2014 16:40:23 +0300
Subject: ACPI / PM: Export rest of the subsys PM callbacks

No reason for excluding the remaining ones.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
[rjw: Rebased and exported the new acpi_subsys_complete() too.]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 5 ++++-
 include/linux/acpi.h     | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 9e5fd9c440b7..49a51277f81d 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -928,7 +928,7 @@ EXPORT_SYMBOL_GPL(acpi_subsys_prepare);
  * acpi_subsys_complete - Finalize device's resume during system resume.
  * @dev: Device to handle.
  */
-static void acpi_subsys_complete(struct device *dev)
+void acpi_subsys_complete(struct device *dev)
 {
 	/*
 	 * If the device had been runtime-suspended before the system went into
@@ -938,6 +938,7 @@ static void acpi_subsys_complete(struct device *dev)
 	if (dev->power.direct_complete)
 		pm_request_resume(dev);
 }
+EXPORT_SYMBOL_GPL(acpi_subsys_complete);
 
 /**
  * acpi_subsys_suspend - Run the device driver's suspend callback.
@@ -951,6 +952,7 @@ int acpi_subsys_suspend(struct device *dev)
 	pm_runtime_resume(dev);
 	return pm_generic_suspend(dev);
 }
+EXPORT_SYMBOL_GPL(acpi_subsys_suspend);
 
 /**
  * acpi_subsys_suspend_late - Suspend device using ACPI.
@@ -996,6 +998,7 @@ int acpi_subsys_freeze(struct device *dev)
 	pm_runtime_resume(dev);
 	return pm_generic_freeze(dev);
 }
+EXPORT_SYMBOL_GPL(acpi_subsys_freeze);
 
 #endif /* CONFIG_PM_SLEEP */
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7a8f2cd66c8b..4c007262e891 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -554,14 +554,20 @@ static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
 int acpi_dev_suspend_late(struct device *dev);
 int acpi_dev_resume_early(struct device *dev);
 int acpi_subsys_prepare(struct device *dev);
+void acpi_subsys_complete(struct device *dev);
 int acpi_subsys_suspend_late(struct device *dev);
 int acpi_subsys_resume_early(struct device *dev);
+int acpi_subsys_suspend(struct device *dev);
+int acpi_subsys_freeze(struct device *dev);
 #else
 static inline int acpi_dev_suspend_late(struct device *dev) { return 0; }
 static inline int acpi_dev_resume_early(struct device *dev) { return 0; }
 static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
+static inline void acpi_subsys_complete(struct device *dev) {}
 static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; }
 static inline int acpi_subsys_resume_early(struct device *dev) { return 0; }
+static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
+static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
-- 
cgit 


From e2d0e90fae82809667f1dcf4d0d9baa421691c7a Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 15 May 2014 16:40:25 +0300
Subject: clk: new basic clk type for fractional divider

Fractional divider clocks are fairly common. This adds basic
type for them.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/clk/Makefile                 |   1 +
 drivers/clk/clk-fractional-divider.c | 135 +++++++++++++++++++++++++++++++++++
 include/linux/clk-provider.h         |  31 ++++++++
 3 files changed, 167 insertions(+)
 create mode 100644 drivers/clk/clk-fractional-divider.c

(limited to 'include/linux')

diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 5f8a28735c96..0745059b1834 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_COMMON_CLK)	+= clk-fixed-rate.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-gate.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-mux.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-composite.o
+obj-$(CONFIG_COMMON_CLK)	+= clk-fractional-divider.o
 
 # hardware specific clock types
 # please keep this section sorted lexicographically by file/directory path name
diff --git a/drivers/clk/clk-fractional-divider.c b/drivers/clk/clk-fractional-divider.c
new file mode 100644
index 000000000000..ede685ca0d20
--- /dev/null
+++ b/drivers/clk/clk-fractional-divider.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Adjustable fractional divider clock implementation.
+ * Output rate = (m / n) * parent_rate.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/gcd.h>
+
+#define to_clk_fd(_hw) container_of(_hw, struct clk_fractional_divider, hw)
+
+static unsigned long clk_fd_recalc_rate(struct clk_hw *hw,
+					unsigned long parent_rate)
+{
+	struct clk_fractional_divider *fd = to_clk_fd(hw);
+	unsigned long flags = 0;
+	u32 val, m, n;
+	u64 ret;
+
+	if (fd->lock)
+		spin_lock_irqsave(fd->lock, flags);
+
+	val = clk_readl(fd->reg);
+
+	if (fd->lock)
+		spin_unlock_irqrestore(fd->lock, flags);
+
+	m = (val & fd->mmask) >> fd->mshift;
+	n = (val & fd->nmask) >> fd->nshift;
+
+	ret = parent_rate * m;
+	do_div(ret, n);
+
+	return ret;
+}
+
+static long clk_fd_round_rate(struct clk_hw *hw, unsigned long rate,
+			      unsigned long *prate)
+{
+	struct clk_fractional_divider *fd = to_clk_fd(hw);
+	unsigned maxn = (fd->nmask >> fd->nshift) + 1;
+	unsigned div;
+
+	if (!rate || rate >= *prate)
+		return *prate;
+
+	div = gcd(*prate, rate);
+
+	while ((*prate / div) > maxn) {
+		div <<= 1;
+		rate <<= 1;
+	}
+
+	return rate;
+}
+
+static int clk_fd_set_rate(struct clk_hw *hw, unsigned long rate,
+			   unsigned long parent_rate)
+{
+	struct clk_fractional_divider *fd = to_clk_fd(hw);
+	unsigned long flags = 0;
+	unsigned long div;
+	unsigned n, m;
+	u32 val;
+
+	div = gcd(parent_rate, rate);
+	m = rate / div;
+	n = parent_rate / div;
+
+	if (fd->lock)
+		spin_lock_irqsave(fd->lock, flags);
+
+	val = clk_readl(fd->reg);
+	val &= ~(fd->mmask | fd->nmask);
+	val |= (m << fd->mshift) | (n << fd->nshift);
+	clk_writel(val, fd->reg);
+
+	if (fd->lock)
+		spin_unlock_irqrestore(fd->lock, flags);
+
+	return 0;
+}
+
+const struct clk_ops clk_fractional_divider_ops = {
+	.recalc_rate = clk_fd_recalc_rate,
+	.round_rate = clk_fd_round_rate,
+	.set_rate = clk_fd_set_rate,
+};
+EXPORT_SYMBOL_GPL(clk_fractional_divider_ops);
+
+struct clk *clk_register_fractional_divider(struct device *dev,
+		const char *name, const char *parent_name, unsigned long flags,
+		void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth,
+		u8 clk_divider_flags, spinlock_t *lock)
+{
+	struct clk_fractional_divider *fd;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	fd = kzalloc(sizeof(*fd), GFP_KERNEL);
+	if (!fd) {
+		dev_err(dev, "could not allocate fractional divider clk\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	init.name = name;
+	init.ops = &clk_fractional_divider_ops;
+	init.flags = flags | CLK_IS_BASIC;
+	init.parent_names = parent_name ? &parent_name : NULL;
+	init.num_parents = parent_name ? 1 : 0;
+
+	fd->reg = reg;
+	fd->mshift = mshift;
+	fd->mmask = (BIT(mwidth) - 1) << mshift;
+	fd->nshift = nshift;
+	fd->nmask = (BIT(nwidth) - 1) << nshift;
+	fd->flags = clk_divider_flags;
+	fd->lock = lock;
+	fd->hw.init = &init;
+
+	clk = clk_register(dev, &fd->hw);
+	if (IS_ERR(clk))
+		kfree(fd);
+
+	return clk;
+}
+EXPORT_SYMBOL_GPL(clk_register_fractional_divider);
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 511917416fb0..fb4eca6907cd 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -413,6 +413,37 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		unsigned int mult, unsigned int div);
 
+/**
+ * struct clk_fractional_divider - adjustable fractional divider clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @reg:	register containing the divider
+ * @mshift:	shift to the numerator bit field
+ * @mwidth:	width of the numerator bit field
+ * @nshift:	shift to the denominator bit field
+ * @nwidth:	width of the denominator bit field
+ * @lock:	register lock
+ *
+ * Clock with adjustable fractional divider affecting its output frequency.
+ */
+
+struct clk_fractional_divider {
+	struct clk_hw	hw;
+	void __iomem	*reg;
+	u8		mshift;
+	u32		mmask;
+	u8		nshift;
+	u32		nmask;
+	u8		flags;
+	spinlock_t	*lock;
+};
+
+extern const struct clk_ops clk_fractional_divider_ops;
+struct clk *clk_register_fractional_divider(struct device *dev,
+		const char *name, const char *parent_name, unsigned long flags,
+		void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth,
+		u8 clk_divider_flags, spinlock_t *lock);
+
 /***
  * struct clk_composite - aggregate clock of mux, divider and gate clocks
  *
-- 
cgit 


From e1618d461ca18d40f9c3ef70598abb72e75d27ae Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevic@redhat.com>
Date: Tue, 20 May 2014 10:59:26 -0400
Subject: vlan: Fix build error wth vlan_get_encap_level()

The new function vlan_get_encap_level() uses vlan_dev_priv()
which is only conditionally avaialble when VLAN support is
enabled.  Make vlan_get_encap_level() conditionally available
as well.

Fixes: 44a4085538c8 ("bonding: Fix stacked device detection in arp monitoring")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Vlad Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index c901b13b6f03..b2acc4a1b13c 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -198,6 +198,12 @@ extern void vlan_vids_del_by_dev(struct net_device *dev,
 				 const struct net_device *by_dev);
 
 extern bool vlan_uses_dev(const struct net_device *dev);
+
+static inline int vlan_get_encap_level(struct net_device *dev)
+{
+	BUG_ON(!is_vlan_dev(dev));
+	return vlan_dev_priv(dev)->nest_level;
+}
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev,
@@ -264,6 +270,11 @@ static inline bool vlan_uses_dev(const struct net_device *dev)
 {
 	return false;
 }
+static inline int vlan_get_encap_level(struct net_device *dev)
+{
+	BUG();
+	return 0;
+}
 #endif
 
 static inline bool vlan_hw_offload_capable(netdev_features_t features,
@@ -485,9 +496,4 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 		skb->protocol = htons(ETH_P_802_2);
 }
 
-static inline int vlan_get_encap_level(struct net_device *dev)
-{
-	BUG_ON(!is_vlan_dev(dev));
-	return vlan_dev_priv(dev)->nest_level;
-}
 #endif /* !(_LINUX_IF_VLAN_H_) */
-- 
cgit 


From e3a2b3f931f59d5284abd13faf8bded726884ffd Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 20 May 2014 11:49:02 -0600
Subject: blk-mq: allow changing of queue depth through sysfs

For request_fn based devices, the block layer exports a 'nr_requests'
file through sysfs to allow adjusting of queue depth on the fly.
Currently this returns -EINVAL for blk-mq, since it's not wired up.
Wire this up for blk-mq, so that it now also always dynamic
adjustments of the allowed queue depth for any given block device
managed by blk-mq.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 41 ++++++++++++++++++++++++++
 block/blk-mq-tag.c     | 80 +++++++++++++++++++++++++++++++++++---------------
 block/blk-mq-tag.h     |  1 +
 block/blk-mq.c         | 22 ++++++++++++++
 block/blk-mq.h         |  1 +
 block/blk-sysfs.c      | 45 ++++++----------------------
 block/blk.h            |  2 ++
 include/linux/blk-mq.h |  2 +-
 8 files changed, 134 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index a6bd3e702201..fe81e19099a1 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -848,6 +848,47 @@ static void freed_request(struct request_list *rl, unsigned int flags)
 		__freed_request(rl, sync ^ 1);
 }
 
+int blk_update_nr_requests(struct request_queue *q, unsigned int nr)
+{
+	struct request_list *rl;
+
+	spin_lock_irq(q->queue_lock);
+	q->nr_requests = nr;
+	blk_queue_congestion_threshold(q);
+
+	/* congestion isn't cgroup aware and follows root blkcg for now */
+	rl = &q->root_rl;
+
+	if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
+		blk_set_queue_congested(q, BLK_RW_SYNC);
+	else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
+		blk_clear_queue_congested(q, BLK_RW_SYNC);
+
+	if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
+		blk_set_queue_congested(q, BLK_RW_ASYNC);
+	else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
+		blk_clear_queue_congested(q, BLK_RW_ASYNC);
+
+	blk_queue_for_each_rl(rl, q) {
+		if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
+			blk_set_rl_full(rl, BLK_RW_SYNC);
+		} else {
+			blk_clear_rl_full(rl, BLK_RW_SYNC);
+			wake_up(&rl->wait[BLK_RW_SYNC]);
+		}
+
+		if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
+			blk_set_rl_full(rl, BLK_RW_ASYNC);
+		} else {
+			blk_clear_rl_full(rl, BLK_RW_ASYNC);
+			wake_up(&rl->wait[BLK_RW_ASYNC]);
+		}
+	}
+
+	spin_unlock_irq(q->queue_lock);
+	return 0;
+}
+
 /*
  * Determine if elevator data should be initialized when allocating the
  * request associated with @bio.
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e6b3fbae9862..f6dea968b710 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -57,23 +57,13 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 }
 
 /*
- * If a previously busy queue goes inactive, potential waiters could now
- * be allowed to queue. Wake them up and check.
+ * Wakeup all potentially sleeping on normal (non-reserved) tags
  */
-void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
 {
-	struct blk_mq_tags *tags = hctx->tags;
 	struct blk_mq_bitmap_tags *bt;
 	int i, wake_index;
 
-	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-		return;
-
-	atomic_dec(&tags->active_queues);
-
-	/*
-	 * Will only throttle depth on non-reserved tags
-	 */
 	bt = &tags->bitmap_tags;
 	wake_index = bt->wake_index;
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
@@ -86,6 +76,22 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 	}
 }
 
+/*
+ * If a previously busy queue goes inactive, potential waiters could now
+ * be allowed to queue. Wake them up and check.
+ */
+void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_tags *tags = hctx->tags;
+
+	if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+		return;
+
+	atomic_dec(&tags->active_queues);
+
+	blk_mq_tag_wakeup_all(tags);
+}
+
 /*
  * For shared tag users, we track the number of currently active users
  * and attempt to provide a fair share of the tag depth for each of them.
@@ -408,6 +414,28 @@ static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
 	return bt->depth - used;
 }
 
+static void bt_update_count(struct blk_mq_bitmap_tags *bt,
+			    unsigned int depth)
+{
+	unsigned int tags_per_word = 1U << bt->bits_per_word;
+	unsigned int map_depth = depth;
+
+	if (depth) {
+		int i;
+
+		for (i = 0; i < bt->map_nr; i++) {
+			bt->map[i].depth = min(map_depth, tags_per_word);
+			map_depth -= bt->map[i].depth;
+		}
+	}
+
+	bt->wake_cnt = BT_WAIT_BATCH;
+	if (bt->wake_cnt > depth / 4)
+		bt->wake_cnt = max(1U, depth / 4);
+
+	bt->depth = depth;
+}
+
 static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
 			int node, bool reserved)
 {
@@ -420,7 +448,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
 	 * condition.
 	 */
 	if (depth) {
-		unsigned int nr, i, map_depth, tags_per_word;
+		unsigned int nr, tags_per_word;
 
 		tags_per_word = (1 << bt->bits_per_word);
 
@@ -444,11 +472,6 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
 			return -ENOMEM;
 
 		bt->map_nr = nr;
-		map_depth = depth;
-		for (i = 0; i < nr; i++) {
-			bt->map[i].depth = min(map_depth, tags_per_word);
-			map_depth -= tags_per_word;
-		}
 	}
 
 	bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL);
@@ -460,11 +483,7 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth,
 	for (i = 0; i < BT_WAIT_QUEUES; i++)
 		init_waitqueue_head(&bt->bs[i].wait);
 
-	bt->wake_cnt = BT_WAIT_BATCH;
-	if (bt->wake_cnt > depth / 4)
-		bt->wake_cnt = max(1U, depth / 4);
-
-	bt->depth = depth;
+	bt_update_count(bt, depth);
 	return 0;
 }
 
@@ -525,6 +544,21 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag)
 	*tag = prandom_u32() % depth;
 }
 
+int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
+{
+	tdepth -= tags->nr_reserved_tags;
+	if (tdepth > tags->nr_tags)
+		return -EINVAL;
+
+	/*
+	 * Don't need (or can't) update reserved tags here, they remain
+	 * static and should never need resizing.
+	 */
+	bt_update_count(&tags->bitmap_tags, tdepth);
+	blk_mq_tag_wakeup_all(tags);
+	return 0;
+}
+
 ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
 {
 	char *orig_page = page;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index e144f68ec45f..e7ff5ceeeb97 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -55,6 +55,7 @@ extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data
 extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
 extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
+extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
 
 enum {
 	BLK_MQ_TAG_CACHE_MIN	= 1,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0fbef7e9bef1..7b71ab1b1536 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1789,6 +1789,28 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_free_tag_set);
 
+int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
+{
+	struct blk_mq_tag_set *set = q->tag_set;
+	struct blk_mq_hw_ctx *hctx;
+	int i, ret;
+
+	if (!set || nr > set->queue_depth)
+		return -EINVAL;
+
+	ret = 0;
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_mq_tag_update_depth(hctx->tags, nr);
+		if (ret)
+			break;
+	}
+
+	if (!ret)
+		q->nr_requests = nr;
+
+	return ret;
+}
+
 void blk_mq_disable_hotplug(void)
 {
 	mutex_lock(&all_q_mutex);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5e5a378962b7..7db4fe4bd002 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -32,6 +32,7 @@ void blk_mq_drain_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
 void blk_mq_clone_flush_request(struct request *flush_rq,
 		struct request *orig_rq);
+int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 
 /*
  * CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 7500f876dae4..4d6811ac13fd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page)
 static ssize_t
 queue_requests_store(struct request_queue *q, const char *page, size_t count)
 {
-	struct request_list *rl;
 	unsigned long nr;
-	int ret;
+	int ret, err;
 
-	if (!q->request_fn)
+	if (!q->request_fn && !q->mq_ops)
 		return -EINVAL;
 
 	ret = queue_var_store(&nr, page, count);
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 	if (nr < BLKDEV_MIN_RQ)
 		nr = BLKDEV_MIN_RQ;
 
-	spin_lock_irq(q->queue_lock);
-	q->nr_requests = nr;
-	blk_queue_congestion_threshold(q);
-
-	/* congestion isn't cgroup aware and follows root blkcg for now */
-	rl = &q->root_rl;
-
-	if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q))
-		blk_set_queue_congested(q, BLK_RW_SYNC);
-	else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q))
-		blk_clear_queue_congested(q, BLK_RW_SYNC);
-
-	if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q))
-		blk_set_queue_congested(q, BLK_RW_ASYNC);
-	else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q))
-		blk_clear_queue_congested(q, BLK_RW_ASYNC);
-
-	blk_queue_for_each_rl(rl, q) {
-		if (rl->count[BLK_RW_SYNC] >= q->nr_requests) {
-			blk_set_rl_full(rl, BLK_RW_SYNC);
-		} else {
-			blk_clear_rl_full(rl, BLK_RW_SYNC);
-			wake_up(&rl->wait[BLK_RW_SYNC]);
-		}
-
-		if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) {
-			blk_set_rl_full(rl, BLK_RW_ASYNC);
-		} else {
-			blk_clear_rl_full(rl, BLK_RW_ASYNC);
-			wake_up(&rl->wait[BLK_RW_ASYNC]);
-		}
-	}
+	if (q->request_fn)
+		err = blk_update_nr_requests(q, nr);
+	else
+		err = blk_mq_update_nr_requests(q, nr);
+
+	if (err)
+		return err;
 
-	spin_unlock_irq(q->queue_lock);
 	return ret;
 }
 
diff --git a/block/blk.h b/block/blk.h
index 95cab70000e3..45385e9abf6f 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -188,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
 	return q->nr_congestion_off;
 }
 
+extern int blk_update_nr_requests(struct request_queue *, unsigned int);
+
 /*
  * Contribute to IO statistics IFF:
  *
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a06ca7b5ea05..f45424453338 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -63,7 +63,7 @@ struct blk_mq_hw_ctx {
 struct blk_mq_tag_set {
 	struct blk_mq_ops	*ops;
 	unsigned int		nr_hw_queues;
-	unsigned int		queue_depth;
+	unsigned int		queue_depth;	/* max hw supported */
 	unsigned int		reserved_tags;
 	unsigned int		cmd_size;	/* per-request extra data */
 	int			numa_node;
-- 
cgit 


From 78d683e838a60ec4ba4591cca4364cba84a9e626 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 19 May 2014 15:58:32 -0700
Subject: mm, fs: Add vm_ops->name as an alternative to arch_vma_name

arch_vma_name sucks.  It's a silly hack, and it's annoying to
implement correctly.  In fact, AFAICS, even the straightforward x86
implementation is incorrect (I suspect that it breaks if the vdso
mapping is split or gets remapped).

This adds a new vm_ops->name operation that can replace it.  The
followup patches will remove all uses of arch_vma_name on x86,
fixing a couple of annoyances in the process.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/2eee21791bb36a0a408c5c2bdb382a9e6a41ca4a.1400538962.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 fs/binfmt_elf.c    | 8 ++++++++
 fs/proc/task_mmu.c | 6 ++++++
 include/linux/mm.h | 6 ++++++
 3 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index aa3cb626671e..df9ea4186d75 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1108,6 +1108,14 @@ static bool always_dump_vma(struct vm_area_struct *vma)
 	/* Any vsyscall mappings? */
 	if (vma == get_gate_vma(vma->vm_mm))
 		return true;
+
+	/*
+	 * Assume that all vmas with a .name op should always be dumped.
+	 * If this changes, a new vm_ops field can easily be added.
+	 */
+	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
+		return true;
+
 	/*
 	 * arch_vma_name() returns non-NULL for special architecture mappings,
 	 * such as vDSO sections.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 442177b1119a..9b2f5d62ce63 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -300,6 +300,12 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 		goto done;
 	}
 
+	if (vma->vm_ops && vma->vm_ops->name) {
+		name = vma->vm_ops->name(vma);
+		if (name)
+			goto done;
+	}
+
 	name = arch_vma_name(vma);
 	if (!name) {
 		pid_t tid;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bf9811e1321a..63f8d4efe303 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -239,6 +239,12 @@ struct vm_operations_struct {
 	 */
 	int (*access)(struct vm_area_struct *vma, unsigned long addr,
 		      void *buf, int len, int write);
+
+	/* Called by the /proc/PID/maps code to ask the vma whether it
+	 * has a special name.  Returning non-NULL will also cause this
+	 * vma to be dumped unconditionally. */
+	const char *(*name)(struct vm_area_struct *vma);
+
 #ifdef CONFIG_NUMA
 	/*
 	 * set_policy() op must add a reference to any non-NULL @new mempolicy
-- 
cgit 


From a62c34bd2a8a3f159945becd57401e478818d51c Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 19 May 2014 15:58:33 -0700
Subject: x86, mm: Improve _install_special_mapping and fix x86 vdso naming

Using arch_vma_name to give special mappings a name is awkward.  x86
currently implements it by comparing the start address of the vma to
the expected address of the vdso.  This requires tracking the start
address of special mappings and is probably buggy if a special vma
is split or moved.

Improve _install_special_mapping to just name the vma directly.  Use
it to give the x86 vvar area a name, which should make CRIU's life
easier.

As a side effect, the vvar area will show up in core dumps.  This
could be considered weird and is fixable.

[hpa: I say we accept this as-is but be prepared to deal with knocking
 out the vvars from core dumps if this becomes a problem.]

Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/276b39b6b645fb11e345457b503f17b83c2c6fd0.1400538962.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/vdso.h  |  6 ++-
 arch/x86/mm/init_64.c        |  3 --
 arch/x86/vdso/vdso2c.h       |  5 ++-
 arch/x86/vdso/vdso32-setup.c |  7 ----
 arch/x86/vdso/vma.c          | 25 ++++++++-----
 include/linux/mm.h           |  4 +-
 include/linux/mm_types.h     |  6 +++
 mm/mmap.c                    | 89 +++++++++++++++++++++++++++++---------------
 8 files changed, 94 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index d0a2c909c72d..30be253dd283 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -7,10 +7,14 @@
 
 #ifndef __ASSEMBLER__
 
+#include <linux/mm_types.h>
+
 struct vdso_image {
 	void *data;
 	unsigned long size;   /* Always a multiple of PAGE_SIZE */
-	struct page **pages;  /* Big enough for data/size page pointers */
+
+	/* text_mapping.pages is big enough for data/size page pointers */
+	struct vm_special_mapping text_mapping;
 
 	unsigned long alt, alt_len;
 
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6f881842116c..9deb59b0baea 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1223,9 +1223,6 @@ int in_gate_area_no_mm(unsigned long addr)
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	if (vma->vm_mm && vma->vm_start ==
-	    (long __force)vma->vm_mm->context.vdso)
-		return "[vdso]";
 	if (vma == &gate_vma)
 		return "[vsyscall]";
 	return NULL;
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index ed2e894e89ab..3dcc61e796e9 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -136,7 +136,10 @@ static int GOFUNC(void *addr, size_t len, FILE *outfile, const char *name)
 	fprintf(outfile, "const struct vdso_image %s = {\n", name);
 	fprintf(outfile, "\t.data = raw_data,\n");
 	fprintf(outfile, "\t.size = %lu,\n", data_size);
-	fprintf(outfile, "\t.pages = pages,\n");
+	fprintf(outfile, "\t.text_mapping = {\n");
+	fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
+	fprintf(outfile, "\t\t.pages = pages,\n");
+	fprintf(outfile, "\t},\n");
 	if (alt_sec) {
 		fprintf(outfile, "\t.alt = %lu,\n",
 			(unsigned long)alt_sec->sh_offset);
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index c3ed708e50f4..e4f7781ee162 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -119,13 +119,6 @@ __initcall(ia32_binfmt_init);
 
 #else  /* CONFIG_X86_32 */
 
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
-	if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
-		return "[vdso]";
-	return NULL;
-}
-
 struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 {
 	return NULL;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 8ad0081df7a8..e1513c47872a 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -30,7 +30,8 @@ void __init init_vdso_image(const struct vdso_image *image)
 
 	BUG_ON(image->size % PAGE_SIZE != 0);
 	for (i = 0; i < npages; i++)
-		image->pages[i] = virt_to_page(image->data + i*PAGE_SIZE);
+		image->text_mapping.pages[i] =
+			virt_to_page(image->data + i*PAGE_SIZE);
 
 	apply_alternatives((struct alt_instr *)(image->data + image->alt),
 			   (struct alt_instr *)(image->data + image->alt +
@@ -91,6 +92,10 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	unsigned long addr;
 	int ret = 0;
 	static struct page *no_pages[] = {NULL};
+	static struct vm_special_mapping vvar_mapping = {
+		.name = "[vvar]",
+		.pages = no_pages,
+	};
 
 	if (calculate_addr) {
 		addr = vdso_addr(current->mm->start_stack,
@@ -112,21 +117,23 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
 	/*
 	 * MAYWRITE to allow gdb to COW and set breakpoints
 	 */
-	ret = install_special_mapping(mm,
-				      addr,
-				      image->size,
-				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				      image->pages);
+	vma = _install_special_mapping(mm,
+				       addr,
+				       image->size,
+				       VM_READ|VM_EXEC|
+				       VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				       &image->text_mapping);
 
-	if (ret)
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
 		goto up_fail;
+	}
 
 	vma = _install_special_mapping(mm,
 				       addr + image->size,
 				       image->sym_end_mapping - image->size,
 				       VM_READ,
-				       no_pages);
+				       &vvar_mapping);
 
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63f8d4efe303..05aab09803e6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1782,7 +1782,9 @@ extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
 extern struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
-				   unsigned long flags, struct page **pages);
+				   unsigned long flags,
+				   const struct vm_special_mapping *spec);
+/* This is an obsolete alternative to _install_special_mapping. */
 extern int install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
 				   unsigned long flags, struct page **pages);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8967e20cbe57..22c6f4e16d10 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -510,4 +510,10 @@ static inline void clear_tlb_flush_pending(struct mm_struct *mm)
 }
 #endif
 
+struct vm_special_mapping
+{
+	const char *name;
+	struct page **pages;
+};
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/mm/mmap.c b/mm/mmap.c
index b1202cf81f4b..52bbc9514d9d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2872,6 +2872,31 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
 	return 1;
 }
 
+static int special_mapping_fault(struct vm_area_struct *vma,
+				 struct vm_fault *vmf);
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static const char *special_mapping_name(struct vm_area_struct *vma)
+{
+	return ((struct vm_special_mapping *)vma->vm_private_data)->name;
+}
+
+static const struct vm_operations_struct special_mapping_vmops = {
+	.close = special_mapping_close,
+	.fault = special_mapping_fault,
+	.name = special_mapping_name,
+};
+
+static const struct vm_operations_struct legacy_special_mapping_vmops = {
+	.close = special_mapping_close,
+	.fault = special_mapping_fault,
+};
 
 static int special_mapping_fault(struct vm_area_struct *vma,
 				struct vm_fault *vmf)
@@ -2887,7 +2912,13 @@ static int special_mapping_fault(struct vm_area_struct *vma,
 	 */
 	pgoff = vmf->pgoff - vma->vm_pgoff;
 
-	for (pages = vma->vm_private_data; pgoff && *pages; ++pages)
+	if (vma->vm_ops == &legacy_special_mapping_vmops)
+		pages = vma->vm_private_data;
+	else
+		pages = ((struct vm_special_mapping *)vma->vm_private_data)->
+			pages;
+
+	for (; pgoff && *pages; ++pages)
 		pgoff--;
 
 	if (*pages) {
@@ -2900,30 +2931,11 @@ static int special_mapping_fault(struct vm_area_struct *vma,
 	return VM_FAULT_SIGBUS;
 }
 
-/*
- * Having a close hook prevents vma merging regardless of flags.
- */
-static void special_mapping_close(struct vm_area_struct *vma)
-{
-}
-
-static const struct vm_operations_struct special_mapping_vmops = {
-	.close = special_mapping_close,
-	.fault = special_mapping_fault,
-};
-
-/*
- * Called with mm->mmap_sem held for writing.
- * Insert a new vma covering the given region, with the given flags.
- * Its pages are supplied by the given array of struct page *.
- * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
- * The region past the last page supplied will always produce SIGBUS.
- * The array pointer and the pages it points to are assumed to stay alive
- * for as long as this mapping might exist.
- */
-struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
-			    unsigned long addr, unsigned long len,
-			    unsigned long vm_flags, struct page **pages)
+static struct vm_area_struct *__install_special_mapping(
+	struct mm_struct *mm,
+	unsigned long addr, unsigned long len,
+	unsigned long vm_flags, const struct vm_operations_struct *ops,
+	void *priv)
 {
 	int ret;
 	struct vm_area_struct *vma;
@@ -2940,8 +2952,8 @@ struct vm_area_struct *_install_special_mapping(struct mm_struct *mm,
 	vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
-	vma->vm_ops = &special_mapping_vmops;
-	vma->vm_private_data = pages;
+	vma->vm_ops = ops;
+	vma->vm_private_data = priv;
 
 	ret = insert_vm_struct(mm, vma);
 	if (ret)
@@ -2958,12 +2970,31 @@ out:
 	return ERR_PTR(ret);
 }
 
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+struct vm_area_struct *_install_special_mapping(
+	struct mm_struct *mm,
+	unsigned long addr, unsigned long len,
+	unsigned long vm_flags, const struct vm_special_mapping *spec)
+{
+	return __install_special_mapping(mm, addr, len, vm_flags,
+					 &special_mapping_vmops, (void *)spec);
+}
+
 int install_special_mapping(struct mm_struct *mm,
 			    unsigned long addr, unsigned long len,
 			    unsigned long vm_flags, struct page **pages)
 {
-	struct vm_area_struct *vma = _install_special_mapping(mm,
-			    addr, len, vm_flags, pages);
+	struct vm_area_struct *vma = __install_special_mapping(
+		mm, addr, len, vm_flags, &legacy_special_mapping_vmops,
+		(void *)pages);
 
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
-- 
cgit 


From 9dd3107576c4bbd40e1c2c8b24d560abf9a7b991 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 8 May 2014 16:06:17 -0500
Subject: of: align RESERVEDMEM_OF_DECLARE function callbacks to other
 callbacks

All the parameters for RESERVEDMEM_OF_DECLARE function callbacks are
members of struct reserved_mem, so just pass the struct ptr to callback
functions so the function callback is more in line with other OF match
table callbacks.

Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/of_reserved_mem.c    | 2 +-
 include/linux/of_reserved_mem.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index e420eb52e5c9..632aae861375 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -188,7 +188,7 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
 		if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
 			continue;
 
-		if (initfn(rmem, rmem->fdt_node, rmem->name) == 0) {
+		if (initfn(rmem) == 0) {
 			pr_info("Reserved memory: initialized node %s, compatible id %s\n",
 				rmem->name, compat);
 			return 0;
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 9b1fbb7f29fc..4c81b84e95ff 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -21,8 +21,8 @@ struct reserved_mem_ops {
 				  struct device *dev);
 };
 
-typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem,
-				      unsigned long node, const char *uname);
+typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
+
 
 #ifdef CONFIG_OF_RESERVED_MEM
 void fdt_init_reserved_mem(void);
-- 
cgit 


From 54196ccbe0ba1f268a646059473313589db35b01 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 8 May 2014 16:09:24 -0500
Subject: of: consolidate linker section OF match table declarations

We now have several OF match tables using linker sections that are
nearly the same definition. The only variation is the callback function
prototype. Create a common define for creating linker section OF match
table entries which each table declaration can use.

Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/clocksource/clksrc-of.c |  2 +-
 drivers/irqchip/irqchip.h       |  7 +++----
 include/linux/clk-provider.h    |  5 +----
 include/linux/clocksource.h     | 16 +++-------------
 include/linux/of.h              | 22 ++++++++++++++++++++++
 include/linux/of_reserved_mem.h | 18 ++----------------
 6 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/clksrc-of.c b/drivers/clocksource/clksrc-of.c
index ae2e4278c42a..0093a8e49e14 100644
--- a/drivers/clocksource/clksrc-of.c
+++ b/drivers/clocksource/clksrc-of.c
@@ -27,7 +27,7 @@ void __init clocksource_of_init(void)
 {
 	struct device_node *np;
 	const struct of_device_id *match;
-	clocksource_of_init_fn init_func;
+	of_init_fn_1 init_func;
 	unsigned clocksources = 0;
 
 	for_each_matching_node_and_match(np, __clksrc_of_table, &match) {
diff --git a/drivers/irqchip/irqchip.h b/drivers/irqchip/irqchip.h
index e445ba2d6add..0f6486d4f1b0 100644
--- a/drivers/irqchip/irqchip.h
+++ b/drivers/irqchip/irqchip.h
@@ -11,6 +11,8 @@
 #ifndef _IRQCHIP_H
 #define _IRQCHIP_H
 
+#include <linux/of.h>
+
 /*
  * This macro must be used by the different irqchip drivers to declare
  * the association between their DT compatible string and their
@@ -21,9 +23,6 @@
  * @compstr: compatible string of the irqchip driver
  * @fn: initialization function
  */
-#define IRQCHIP_DECLARE(name,compstr,fn)				\
-	static const struct of_device_id irqchip_of_match_##name	\
-	__used __section(__irqchip_of_table)				\
-	= { .compatible = compstr, .data = fn }
+#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn)
 
 #endif
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 511917416fb0..a6e4008a0bf7 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -498,10 +498,7 @@ struct clk_onecell_data {
 
 extern struct of_device_id __clk_of_table;
 
-#define CLK_OF_DECLARE(name, compat, fn)			\
-	static const struct of_device_id __clk_of_table_##name	\
-		__used __section(__clk_of_table)		\
-		= { .compatible = compat, .data = fn };
+#define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn)
 
 #ifdef CONFIG_OF
 int of_clk_add_provider(struct device_node *np,
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 67301a405712..a16b497d5159 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -339,23 +339,13 @@ extern int clocksource_mmio_init(void __iomem *, const char *,
 
 extern int clocksource_i8253_init(void);
 
-struct device_node;
-typedef void(*clocksource_of_init_fn)(struct device_node *);
+#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
+	OF_DECLARE_1(clksrc, name, compat, fn)
+
 #ifdef CONFIG_CLKSRC_OF
 extern void clocksource_of_init(void);
-
-#define CLOCKSOURCE_OF_DECLARE(name, compat, fn)			\
-	static const struct of_device_id __clksrc_of_table_##name	\
-		__used __section(__clksrc_of_table)			\
-		 = { .compatible = compat,				\
-		     .data = (fn == (clocksource_of_init_fn)NULL) ? fn : fn }
 #else
 static inline void clocksource_of_init(void) {}
-#define CLOCKSOURCE_OF_DECLARE(name, compat, fn)			\
-	static const struct of_device_id __clksrc_of_table_##name	\
-		__attribute__((unused))					\
-		 = { .compatible = compat,				\
-		     .data = (fn == (clocksource_of_init_fn)NULL) ? fn : fn }
 #endif
 
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/include/linux/of.h b/include/linux/of.h
index 3bad8d106e0e..bf65335b4d05 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -757,4 +757,26 @@ static inline int of_get_available_child_count(const struct device_node *np)
 	return num;
 }
 
+#ifdef CONFIG_OF
+#define _OF_DECLARE(table, name, compat, fn, fn_type)			\
+	static const struct of_device_id __of_table_##name		\
+		__used __section(__##table##_of_table)			\
+		 = { .compatible = compat,				\
+		     .data = (fn == (fn_type)NULL) ? fn : fn  }
+#else
+#define _OF_DECLARE(table, name, compat, fn, fn_type)					\
+	static const struct of_device_id __of_table_##name		\
+		__attribute__((unused))					\
+		 = { .compatible = compat,				\
+		     .data = (fn == (fn_type)NULL) ? fn : fn }
+#endif
+
+typedef int (*of_init_fn_2)(struct device_node *, struct device_node *);
+typedef void (*of_init_fn_1)(struct device_node *);
+
+#define OF_DECLARE_1(table, name, compat, fn) \
+		_OF_DECLARE(table, name, compat, fn, of_init_fn_1)
+#define OF_DECLARE_2(table, name, compat, fn) \
+		_OF_DECLARE(table, name, compat, fn, of_init_fn_2)
+
 #endif /* _LINUX_OF_H */
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 4c81b84e95ff..4669ddfdd5af 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -23,31 +23,17 @@ struct reserved_mem_ops {
 
 typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 
+#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
+	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
 
 #ifdef CONFIG_OF_RESERVED_MEM
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
-
-#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
-	static const struct of_device_id __reservedmem_of_table_##name	\
-		__used __section(__reservedmem_of_table)		\
-		 = { .compatible = compat,				\
-		     .data = (init == (reservedmem_of_init_fn)NULL) ?	\
-				init : init }
-
 #else
 static inline void fdt_init_reserved_mem(void) { }
 static inline void fdt_reserved_mem_save_node(unsigned long node,
 		const char *uname, phys_addr_t base, phys_addr_t size) { }
-
-#define RESERVEDMEM_OF_DECLARE(name, compat, init)			\
-	static const struct of_device_id __reservedmem_of_table_##name	\
-		__attribute__((unused))					\
-		 = { .compatible = compat,				\
-		     .data = (init == (reservedmem_of_init_fn)NULL) ?	\
-				init : init }
-
 #endif
 
 #endif /* __OF_RESERVED_MEM_H */
-- 
cgit 


From b0b6abd34c1b508d4ac95dbc614f36c49d29e65a Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 27 Mar 2014 08:06:16 -0500
Subject: serial: earlycon: add DT support

This adds the infrastructure to generic earlycon for earlycon setup
using DT. The actual setup is not enabled until a following commit to
add the FDT parsing.

Signed-off-by: Rob Herring <robh@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/tty/serial/earlycon.c     | 28 ++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h |  4 +++-
 include/linux/serial_core.h       |  6 ++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/earlycon.c b/drivers/tty/serial/earlycon.c
index c92e83088adb..5131b5ee6164 100644
--- a/drivers/tty/serial/earlycon.c
+++ b/drivers/tty/serial/earlycon.c
@@ -15,6 +15,8 @@
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/serial_core.h>
+#include <linux/sizes.h>
+#include <linux/mod_devicetable.h>
 
 #ifdef CONFIG_FIX_EARLYCON_MEM
 #include <asm/fixmap.h>
@@ -32,6 +34,9 @@ static struct earlycon_device early_console_dev = {
 	.con = &early_con,
 };
 
+static const struct of_device_id __earlycon_of_table_sentinel
+	__used __section(__earlycon_of_table_end);
+
 static void __iomem * __init earlycon_map(unsigned long paddr, size_t size)
 {
 	void __iomem *base;
@@ -142,3 +147,26 @@ int __init setup_earlycon(char *buf, const char *match,
 	register_console(early_console_dev.con);
 	return 0;
 }
+
+int __init of_setup_earlycon(unsigned long addr,
+			     int (*setup)(struct earlycon_device *, const char *))
+{
+	int err;
+	struct uart_port *port = &early_console_dev.port;
+
+	port->iotype = UPIO_MEM;
+	port->mapbase = addr;
+	port->uartclk = BASE_BAUD * 16;
+	port->membase = earlycon_map(addr, SZ_4K);
+
+	early_console_dev.con->data = &early_console_dev;
+	err = setup(&early_console_dev, NULL);
+	if (err < 0)
+		return err;
+	if (!early_console_dev.con->write)
+		return -ENODEV;
+
+
+	register_console(early_console_dev.con);
+	return 0;
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b9404f6590f1..d647637cd699 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -155,6 +155,7 @@
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
 #define RESERVEDMEM_OF_TABLES()	OF_TABLE(CONFIG_OF_RESERVED_MEM, reservedmem)
 #define CPU_METHOD_OF_TABLES()	OF_TABLE(CONFIG_SMP, cpu_method)
+#define EARLYCON_OF_TABLES()	OF_TABLE(CONFIG_SERIAL_EARLYCON, earlycon)
 
 #define KERNEL_DTB()							\
 	STRUCT_ALIGN();							\
@@ -483,7 +484,8 @@
 	CLKSRC_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	KERNEL_DTB()							\
-	IRQCHIP_OF_MATCH_TABLE()
+	IRQCHIP_OF_MATCH_TABLE()					\
+	EARLYCON_OF_TABLES()
 
 #define INIT_TEXT							\
 	*(.init.text)							\
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 7a15b5b24c0b..5bbb809ee197 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -294,6 +294,9 @@ struct earlycon_device {
 int setup_earlycon(char *buf, const char *match,
 		   int (*setup)(struct earlycon_device *, const char *));
 
+extern int of_setup_earlycon(unsigned long addr,
+			     int (*setup)(struct earlycon_device *, const char *));
+
 #define EARLYCON_DECLARE(name, func) \
 static int __init name ## _setup_earlycon(char *buf) \
 { \
@@ -301,6 +304,9 @@ static int __init name ## _setup_earlycon(char *buf) \
 } \
 early_param("earlycon", name ## _setup_earlycon);
 
+#define OF_EARLYCON_DECLARE(name, compat, fn)				\
+	_OF_DECLARE(earlycon, name, compat, fn, void *)
+
 struct uart_port *uart_get_console(struct uart_port *ports, int nr,
 				   struct console *c);
 void uart_parse_options(char *options, int *baud, int *parity, int *bits,
-- 
cgit 


From e06e8b27082852bdab417af884241a4ed2037c73 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 27 Mar 2014 07:37:43 -0500
Subject: of/fdt: add FDT address translation support

Copy u-boot's FDT address translation code from common/fdt_support. This
code was originally based on the kernel's unflattened DT address parsing
code.

This commit can be reverted once relicensing of this code to GPLv2/BSD
is done and it is added to libfdt.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/Makefile      |   2 +
 drivers/of/fdt_address.c | 241 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_fdt.h   |   1 +
 3 files changed, 244 insertions(+)
 create mode 100644 drivers/of/fdt_address.c

(limited to 'include/linux')

diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 9891232f999e..099b1fb00af4 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,5 +1,6 @@
 obj-y = base.o device.o platform.o
 obj-$(CONFIG_OF_FLATTREE) += fdt.o
+obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
 obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
 obj-$(CONFIG_OF_IRQ)    += irq.o
@@ -12,3 +13,4 @@ obj-$(CONFIG_OF_MTD)	+= of_mtd.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
 
 CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
+CFLAGS_fdt_address.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/fdt_address.c b/drivers/of/fdt_address.c
new file mode 100644
index 000000000000..8d3dc6fbdb7a
--- /dev/null
+++ b/drivers/of/fdt_address.c
@@ -0,0 +1,241 @@
+/*
+ * FDT Address translation based on u-boot fdt_support.c which in turn was
+ * based on the kernel unflattened DT address translation code.
+ *
+ * (C) Copyright 2007
+ * Gerald Van Baren, Custom IDEAS, vanbaren@cideas.com
+ *
+ * Copyright 2010-2011 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/sizes.h>
+
+/* Max address size we deal with */
+#define OF_MAX_ADDR_CELLS	4
+#define OF_CHECK_COUNTS(na, ns)	((na) > 0 && (na) <= OF_MAX_ADDR_CELLS && \
+			(ns) > 0)
+
+/* Debug utility */
+#ifdef DEBUG
+static void __init of_dump_addr(const char *s, const __be32 *addr, int na)
+{
+	pr_debug("%s", s);
+	while(na--)
+		pr_cont(" %08x", *(addr++));
+	pr_debug("\n");
+}
+#else
+static void __init of_dump_addr(const char *s, const __be32 *addr, int na) { }
+#endif
+
+/* Callbacks for bus specific translators */
+struct of_bus {
+	void		(*count_cells)(const void *blob, int parentoffset,
+				int *addrc, int *sizec);
+	u64		(*map)(__be32 *addr, const __be32 *range,
+				int na, int ns, int pna);
+	int		(*translate)(__be32 *addr, u64 offset, int na);
+};
+
+/* Default translator (generic bus) */
+static void __init fdt_bus_default_count_cells(const void *blob, int parentoffset,
+					       int *addrc, int *sizec)
+{
+	const __be32 *prop;
+
+	if (addrc) {
+		prop = fdt_getprop(blob, parentoffset, "#address-cells", NULL);
+		if (prop)
+			*addrc = be32_to_cpup(prop);
+		else
+			*addrc = dt_root_addr_cells;
+	}
+
+	if (sizec) {
+		prop = fdt_getprop(blob, parentoffset, "#size-cells", NULL);
+		if (prop)
+			*sizec = be32_to_cpup(prop);
+		else
+			*sizec = dt_root_size_cells;
+	}
+}
+
+static u64 __init fdt_bus_default_map(__be32 *addr, const __be32 *range,
+				      int na, int ns, int pna)
+{
+	u64 cp, s, da;
+
+	cp = of_read_number(range, na);
+	s  = of_read_number(range + na + pna, ns);
+	da = of_read_number(addr, na);
+
+	pr_debug("FDT: default map, cp=%llx, s=%llx, da=%llx\n",
+	    cp, s, da);
+
+	if (da < cp || da >= (cp + s))
+		return OF_BAD_ADDR;
+	return da - cp;
+}
+
+static int __init fdt_bus_default_translate(__be32 *addr, u64 offset, int na)
+{
+	u64 a = of_read_number(addr, na);
+	memset(addr, 0, na * 4);
+	a += offset;
+	if (na > 1)
+		addr[na - 2] = cpu_to_fdt32(a >> 32);
+	addr[na - 1] = cpu_to_fdt32(a & 0xffffffffu);
+
+	return 0;
+}
+
+/* Array of bus specific translators */
+static const struct of_bus of_busses[] __initconst = {
+	/* Default */
+	{
+		.count_cells = fdt_bus_default_count_cells,
+		.map = fdt_bus_default_map,
+		.translate = fdt_bus_default_translate,
+	},
+};
+
+static int __init fdt_translate_one(const void *blob, int parent,
+				    const struct of_bus *bus,
+				    const struct of_bus *pbus, __be32 *addr,
+				    int na, int ns, int pna, const char *rprop)
+{
+	const __be32 *ranges;
+	int rlen;
+	int rone;
+	u64 offset = OF_BAD_ADDR;
+
+	ranges = fdt_getprop(blob, parent, rprop, &rlen);
+	if (!ranges)
+		return 1;
+	if (rlen == 0) {
+		offset = of_read_number(addr, na);
+		memset(addr, 0, pna * 4);
+		pr_debug("FDT: empty ranges, 1:1 translation\n");
+		goto finish;
+	}
+
+	pr_debug("FDT: walking ranges...\n");
+
+	/* Now walk through the ranges */
+	rlen /= 4;
+	rone = na + pna + ns;
+	for (; rlen >= rone; rlen -= rone, ranges += rone) {
+		offset = bus->map(addr, ranges, na, ns, pna);
+		if (offset != OF_BAD_ADDR)
+			break;
+	}
+	if (offset == OF_BAD_ADDR) {
+		pr_debug("FDT: not found !\n");
+		return 1;
+	}
+	memcpy(addr, ranges + na, 4 * pna);
+
+ finish:
+	of_dump_addr("FDT: parent translation for:", addr, pna);
+	pr_debug("FDT: with offset: %llx\n", offset);
+
+	/* Translate it into parent bus space */
+	return pbus->translate(addr, offset, pna);
+}
+
+/*
+ * Translate an address from the device-tree into a CPU physical address,
+ * this walks up the tree and applies the various bus mappings on the
+ * way.
+ *
+ * Note: We consider that crossing any level with #size-cells == 0 to mean
+ * that translation is impossible (that is we are not dealing with a value
+ * that can be mapped to a cpu physical address). This is not really specified
+ * that way, but this is traditionally the way IBM at least do things
+ */
+u64 __init fdt_translate_address(const void *blob, int node_offset)
+{
+	int parent, len;
+	const struct of_bus *bus, *pbus;
+	const __be32 *reg;
+	__be32 addr[OF_MAX_ADDR_CELLS];
+	int na, ns, pna, pns;
+	u64 result = OF_BAD_ADDR;
+
+	pr_debug("FDT: ** translation for device %s **\n",
+		 fdt_get_name(blob, node_offset, NULL));
+
+	reg = fdt_getprop(blob, node_offset, "reg", &len);
+	if (!reg) {
+		pr_err("FDT: warning: device tree node '%s' has no address.\n",
+			fdt_get_name(blob, node_offset, NULL));
+		goto bail;
+	}
+
+	/* Get parent & match bus type */
+	parent = fdt_parent_offset(blob, node_offset);
+	if (parent < 0)
+		goto bail;
+	bus = &of_busses[0];
+
+	/* Cound address cells & copy address locally */
+	bus->count_cells(blob, parent, &na, &ns);
+	if (!OF_CHECK_COUNTS(na, ns)) {
+		pr_err("FDT: Bad cell count for %s\n",
+		       fdt_get_name(blob, node_offset, NULL));
+		goto bail;
+	}
+	memcpy(addr, reg, na * 4);
+
+	pr_debug("FDT: bus (na=%d, ns=%d) on %s\n",
+		 na, ns, fdt_get_name(blob, parent, NULL));
+	of_dump_addr("OF: translating address:", addr, na);
+
+	/* Translate */
+	for (;;) {
+		/* Switch to parent bus */
+		node_offset = parent;
+		parent = fdt_parent_offset(blob, node_offset);
+
+		/* If root, we have finished */
+		if (parent < 0) {
+			pr_debug("FDT: reached root node\n");
+			result = of_read_number(addr, na);
+			break;
+		}
+
+		/* Get new parent bus and counts */
+		pbus = &of_busses[0];
+		pbus->count_cells(blob, parent, &pna, &pns);
+		if (!OF_CHECK_COUNTS(pna, pns)) {
+			pr_err("FDT: Bad cell count for %s\n",
+				fdt_get_name(blob, node_offset, NULL));
+			break;
+		}
+
+		pr_debug("FDT: parent bus (na=%d, ns=%d) on %s\n",
+			 pna, pns, fdt_get_name(blob, parent, NULL));
+
+		/* Apply bus translation */
+		if (fdt_translate_one(blob, node_offset, bus, pbus,
+					addr, na, ns, pna, "ranges"))
+			break;
+
+		/* Complete the move up one level */
+		na = pna;
+		ns = pns;
+		bus = pbus;
+
+		of_dump_addr("FDT: one level translation:", addr, na);
+	}
+ bail:
+	return result;
+}
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index 5c0ab057eecf..05117899fcb4 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -83,6 +83,7 @@ extern void unflatten_device_tree(void);
 extern void unflatten_and_copy_device_tree(void);
 extern void early_init_devtree(void *);
 extern void early_get_first_memblock_info(void *, phys_addr_t *);
+extern u64 fdt_translate_address(const void *blob, int node_offset);
 #else /* CONFIG_OF_FLATTREE */
 static inline void early_init_fdt_scan_reserved_mem(void) {}
 static inline const char *of_flat_dt_get_machine_name(void) { return NULL; }
-- 
cgit 


From 77f2ea2f8d0833f9e976368481fb9a0775acf9e7 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 30 Apr 2014 11:20:53 -0600
Subject: DMA-API: Clarify physical/bus address distinction

The DMA-API documentation sometimes refers to "physical addresses" when it
really means "bus addresses."  Sometimes these are identical, but they may
be different if the bridge leading to the bus performs address translation.
Update the documentation to use "bus address" when appropriate.

Also, consistently capitalize "DMA", use parens with function names, use
dev_printk() in examples, and reword a few sections for clarity.

No functional change; documentation changes only.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: James Bottomley <jbottomley@Parallels.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
---
 Documentation/DMA-API-HOWTO.txt | 192 +++++++++++++++++++++++++---------------
 Documentation/DMA-API.txt       | 139 +++++++++++++++--------------
 Documentation/DMA-ISA-LPC.txt   |   4 +-
 include/linux/dma-mapping.h     |   6 ++
 include/linux/types.h           |   1 +
 5 files changed, 204 insertions(+), 138 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 5e983031cc11..fd3727b94ac2 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -9,16 +9,76 @@ This is a guide to device driver writers on how to use the DMA API
 with example pseudo-code.  For a concise description of the API, see
 DMA-API.txt.
 
-Most of the 64bit platforms have special hardware that translates bus
-addresses (DMA addresses) into physical addresses.  This is similar to
-how page tables and/or a TLB translates virtual addresses to physical
-addresses on a CPU.  This is needed so that e.g. PCI devices can
-access with a Single Address Cycle (32bit DMA address) any page in the
-64bit physical address space.  Previously in Linux those 64bit
-platforms had to set artificial limits on the maximum RAM size in the
-system, so that the virt_to_bus() static scheme works (the DMA address
-translation tables were simply filled on bootup to map each bus
-address to the physical page __pa(bus_to_virt())).
+                       CPU and DMA addresses
+
+There are several kinds of addresses involved in the DMA API, and it's
+important to understand the differences.
+
+The kernel normally uses virtual addresses.  Any address returned by
+kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
+be stored in a "void *".
+
+The virtual memory system (TLB, page tables, etc.) translates virtual
+addresses to CPU physical addresses, which are stored as "phys_addr_t" or
+"resource_size_t".  The kernel manages device resources like registers as
+physical addresses.  These are the addresses in /proc/iomem.  The physical
+address is not directly useful to a driver; it must use ioremap() to map
+the space and produce a virtual address.
+
+I/O devices use a third kind of address: a "bus address" or "DMA address".
+If a device has registers at an MMIO address, or if it performs DMA to read
+or write system memory, the addresses used by the device are bus addresses.
+In some systems, bus addresses are identical to CPU physical addresses, but
+in general they are not.  IOMMUs and host bridges can produce arbitrary
+mappings between physical and bus addresses.
+
+Here's a picture and some examples:
+
+               CPU                  CPU                  Bus
+             Virtual              Physical             Address
+             Address              Address               Space
+              Space                Space
+
+            +-------+             +------+             +------+
+            |       |             |MMIO  |   Offset    |      |
+            |       |  Virtual    |Space |   applied   |      |
+          C +-------+ --------> B +------+ ----------> +------+ A
+            |       |  mapping    |      |   by host   |      |
+  +-----+   |       |             |      |   bridge    |      |   +--------+
+  |     |   |       |             +------+             |      |   |        |
+  | CPU |   |       |             | RAM  |             |      |   | Device |
+  |     |   |       |             |      |             |      |   |        |
+  +-----+   +-------+             +------+             +------+   +--------+
+            |       |  Virtual    |Buffer|   Mapping   |      |
+          X +-------+ --------> Y +------+ <---------- +------+ Z
+            |       |  mapping    | RAM  |   by IOMMU
+            |       |             |      |
+            |       |             |      |
+            +-------+             +------+
+
+During the enumeration process, the kernel learns about I/O devices and
+their MMIO space and the host bridges that connect them to the system.  For
+example, if a PCI device has a BAR, the kernel reads the bus address (A)
+from the BAR and converts it to a CPU physical address (B).  The address B
+is stored in a struct resource and usually exposed via /proc/iomem.  When a
+driver claims a device, it typically uses ioremap() to map physical address
+B at a virtual address (C).  It can then use, e.g., ioread32(C), to access
+the device registers at bus address A.
+
+If the device supports DMA, the driver sets up a buffer using kmalloc() or
+a similar interface, which returns a virtual address (X).  The virtual
+memory system maps X to a physical address (Y) in system RAM.  The driver
+can use virtual address X to access the buffer, but the device itself
+cannot because DMA doesn't go through the CPU virtual memory system.
+
+In some simple systems, the device can do DMA directly to physical address
+Y.  But in many others, there is IOMMU hardware that translates bus
+addresses to physical addresses, e.g., it translates Z to Y.  This is part
+of the reason for the DMA API: the driver can give a virtual address X to
+an interface like dma_map_single(), which sets up any required IOMMU
+mapping and returns the bus address Z.  The driver then tells the device to
+do DMA to Z, and the IOMMU maps it to the buffer at address Y in system
+RAM.
 
 So that Linux can use the dynamic DMA mapping, it needs some help from the
 drivers, namely it has to take into account that DMA addresses should be
@@ -29,17 +89,17 @@ The following API will work of course even on platforms where no such
 hardware exists.
 
 Note that the DMA API works with any bus independent of the underlying
-microprocessor architecture. You should use the DMA API rather than
-the bus specific DMA API (e.g. pci_dma_*).
+microprocessor architecture. You should use the DMA API rather than the
+bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
+pci_map_*() interfaces.
 
 First of all, you should make sure
 
 #include <linux/dma-mapping.h>
 
-is in your driver. This file will obtain for you the definition of the
-dma_addr_t (which can hold any valid DMA address for the platform)
-type which should be used everywhere you hold a DMA (bus) address
-returned from the DMA mapping functions.
+is in your driver, which provides the definition of dma_addr_t.  This type
+can hold any valid DMA or bus address for the platform and should be used
+everywhere you hold a DMA address returned from the DMA mapping functions.
 
 			 What memory is DMA'able?
 
@@ -123,9 +183,9 @@ Here, dev is a pointer to the device struct of your device, and mask
 is a bit mask describing which bits of an address your device
 supports.  It returns zero if your card can perform DMA properly on
 the machine given the address mask you provided.  In general, the
-device struct of your device is embedded in the bus specific device
-struct of your device.  For example, a pointer to the device struct of
-your PCI device is pdev->dev (pdev is a pointer to the PCI device
+device struct of your device is embedded in the bus-specific device
+struct of your device.  For example, &pdev->dev is a pointer to the
+device struct of a PCI device (pdev is a pointer to the PCI device
 struct of your device).
 
 If it returns non-zero, your device cannot perform DMA properly on
@@ -147,8 +207,7 @@ exactly why.
 The standard 32-bit addressing device would do something like this:
 
 	if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
-		printk(KERN_WARNING
-		       "mydev: No suitable DMA available.\n");
+		dev_warn(dev, "mydev: No suitable DMA available\n");
 		goto ignore_this_device;
 	}
 
@@ -170,8 +229,7 @@ all 64-bits when accessing streaming DMA:
 	} else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
 		using_dac = 0;
 	} else {
-		printk(KERN_WARNING
-		       "mydev: No suitable DMA available.\n");
+		dev_warn(dev, "mydev: No suitable DMA available\n");
 		goto ignore_this_device;
 	}
 
@@ -187,8 +245,7 @@ the case would look like this:
 		using_dac = 0;
 		consistent_using_dac = 0;
 	} else {
-		printk(KERN_WARNING
-		       "mydev: No suitable DMA available.\n");
+		dev_warn(dev, "mydev: No suitable DMA available\n");
 		goto ignore_this_device;
 	}
 
@@ -201,8 +258,7 @@ Finally, if your device can only drive the low 24-bits of
 address you might do something like:
 
 	if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
-		printk(KERN_WARNING
-		       "mydev: 24-bit DMA addressing not available.\n");
+		dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
 		goto ignore_this_device;
 	}
 
@@ -232,14 +288,14 @@ Here is pseudo-code showing how this might be done:
 		card->playback_enabled = 1;
 	} else {
 		card->playback_enabled = 0;
-		printk(KERN_WARNING "%s: Playback disabled due to DMA limitations.\n",
+		dev_warn(dev, "%s: Playback disabled due to DMA limitations\n",
 		       card->name);
 	}
 	if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
 		card->record_enabled = 1;
 	} else {
 		card->record_enabled = 0;
-		printk(KERN_WARNING "%s: Record disabled due to DMA limitations.\n",
+		dev_warn(dev, "%s: Record disabled due to DMA limitations\n",
 		       card->name);
 	}
 
@@ -331,7 +387,7 @@ context with the GFP_ATOMIC flag.
 Size is the length of the region you want to allocate, in bytes.
 
 This routine will allocate RAM for that region, so it acts similarly to
-__get_free_pages (but takes size instead of a page order).  If your
+__get_free_pages() (but takes size instead of a page order).  If your
 driver needs regions sized smaller than a page, you may prefer using
 the dma_pool interface, described below.
 
@@ -343,11 +399,11 @@ the consistent DMA mask has been explicitly changed via
 dma_set_coherent_mask().  This is true of the dma_pool interface as
 well.
 
-dma_alloc_coherent returns two values: the virtual address which you
+dma_alloc_coherent() returns two values: the virtual address which you
 can use to access it from the CPU and dma_handle which you pass to the
 card.
 
-The cpu return address and the DMA bus master address are both
+The CPU virtual address and the DMA bus address are both
 guaranteed to be aligned to the smallest PAGE_SIZE order which
 is greater than or equal to the requested size.  This invariant
 exists (for example) to guarantee that if you allocate a chunk
@@ -359,13 +415,13 @@ To unmap and free such a DMA region, you call:
 	dma_free_coherent(dev, size, cpu_addr, dma_handle);
 
 where dev, size are the same as in the above call and cpu_addr and
-dma_handle are the values dma_alloc_coherent returned to you.
+dma_handle are the values dma_alloc_coherent() returned to you.
 This function may not be called in interrupt context.
 
 If your driver needs lots of smaller memory regions, you can write
-custom code to subdivide pages returned by dma_alloc_coherent,
+custom code to subdivide pages returned by dma_alloc_coherent(),
 or you can use the dma_pool API to do that.  A dma_pool is like
-a kmem_cache, but it uses dma_alloc_coherent not __get_free_pages.
+a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
 Also, it understands common hardware constraints for alignment,
 like queue heads needing to be aligned on N byte boundaries.
 
@@ -381,29 +437,29 @@ type of data is "align" (which is expressed in bytes, and must be a
 power of two).  If your device has no boundary crossing restrictions,
 pass 0 for alloc; passing 4096 says memory allocated from this pool
 must not cross 4KByte boundaries (but at that time it may be better to
-go for dma_alloc_coherent directly instead).
+use dma_alloc_coherent() directly instead).
 
-Allocate memory from a dma pool like this:
+Allocate memory from a DMA pool like this:
 
 	cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
 
 flags are SLAB_KERNEL if blocking is permitted (not in_interrupt nor
-holding SMP locks), SLAB_ATOMIC otherwise.  Like dma_alloc_coherent,
+holding SMP locks), SLAB_ATOMIC otherwise.  Like dma_alloc_coherent(),
 this returns two values, cpu_addr and dma_handle.
 
 Free memory that was allocated from a dma_pool like this:
 
 	dma_pool_free(pool, cpu_addr, dma_handle);
 
-where pool is what you passed to dma_pool_alloc, and cpu_addr and
-dma_handle are the values dma_pool_alloc returned. This function
+where pool is what you passed to dma_pool_alloc(), and cpu_addr and
+dma_handle are the values dma_pool_alloc() returned. This function
 may be called in interrupt context.
 
 Destroy a dma_pool by calling:
 
 	dma_pool_destroy(pool);
 
-Make sure you've called dma_pool_free for all memory allocated
+Make sure you've called dma_pool_free() for all memory allocated
 from a pool before you destroy the pool. This function may not
 be called in interrupt context.
 
@@ -418,7 +474,7 @@ one of the following values:
  DMA_FROM_DEVICE
  DMA_NONE
 
-One should provide the exact DMA direction if you know it.
+You should provide the exact DMA direction if you know it.
 
 DMA_TO_DEVICE means "from main memory to the device"
 DMA_FROM_DEVICE means "from the device to main memory"
@@ -489,14 +545,14 @@ and to unmap it:
 	dma_unmap_single(dev, dma_handle, size, direction);
 
 You should call dma_mapping_error() as dma_map_single() could fail and return
-error. Not all dma implementations support dma_mapping_error() interface.
+error. Not all DMA implementations support the dma_mapping_error() interface.
 However, it is a good practice to call dma_mapping_error() interface, which
 will invoke the generic mapping error check interface. Doing so will ensure
-that the mapping code will work correctly on all dma implementations without
+that the mapping code will work correctly on all DMA implementations without
 any dependency on the specifics of the underlying implementation. Using the
 returned address without checking for errors could result in failures ranging
 from panics to silent data corruption. A couple of examples of incorrect ways
-to check for errors that make assumptions about the underlying dma
+to check for errors that make assumptions about the underlying DMA
 implementation are as follows and these are applicable to dma_map_page() as
 well.
 
@@ -516,12 +572,12 @@ Incorrect example 2:
 		goto map_error;
 	}
 
-You should call dma_unmap_single when the DMA activity is finished, e.g.
+You should call dma_unmap_single() when the DMA activity is finished, e.g.,
 from the interrupt which told you that the DMA transfer is done.
 
-Using cpu pointers like this for single mappings has a disadvantage,
+Using cpu pointers like this for single mappings has a disadvantage:
 you cannot reference HIGHMEM memory in this way.  Thus, there is a
-map/unmap interface pair akin to dma_{map,unmap}_single.  These
+map/unmap interface pair akin to dma_{map,unmap}_single().  These
 interfaces deal with page/offset pairs instead of cpu pointers.
 Specifically:
 
@@ -550,7 +606,7 @@ Here, "offset" means byte offset within the given page.
 You should call dma_mapping_error() as dma_map_page() could fail and return
 error as outlined under the dma_map_single() discussion.
 
-You should call dma_unmap_page when the DMA activity is finished, e.g.
+You should call dma_unmap_page() when the DMA activity is finished, e.g.,
 from the interrupt which told you that the DMA transfer is done.
 
 With scatterlists, you map a region gathered from several regions by:
@@ -588,18 +644,16 @@ PLEASE NOTE:  The 'nents' argument to the dma_unmap_sg call must be
 	      it should _NOT_ be the 'count' value _returned_ from the
               dma_map_sg call.
 
-Every dma_map_{single,sg} call should have its dma_unmap_{single,sg}
-counterpart, because the bus address space is a shared resource (although
-in some ports the mapping is per each BUS so less devices contend for the
-same bus address space) and you could render the machine unusable by eating
-all bus addresses.
+Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
+counterpart, because the bus address space is a shared resource and
+you could render the machine unusable by consuming all bus addresses.
 
 If you need to use the same streaming DMA region multiple times and touch
 the data in between the DMA transfers, the buffer needs to be synced
-properly in order for the cpu and device to see the most uptodate and
+properly in order for the cpu and device to see the most up-to-date and
 correct copy of the DMA buffer.
 
-So, firstly, just map it with dma_map_{single,sg}, and after each DMA
+So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
 transfer call either:
 
 	dma_sync_single_for_cpu(dev, dma_handle, size, direction);
@@ -623,9 +677,9 @@ or:
 as appropriate.
 
 After the last DMA transfer call one of the DMA unmap routines
-dma_unmap_{single,sg}. If you don't touch the data from the first dma_map_*
-call till dma_unmap_*, then you don't have to call the dma_sync_*
-routines at all.
+dma_unmap_{single,sg}(). If you don't touch the data from the first
+dma_map_*() call till dma_unmap_*(), then you don't have to call the
+dma_sync_*() routines at all.
 
 Here is pseudo code which shows a situation in which you would need
 to use the dma_sync_*() interfaces.
@@ -690,12 +744,12 @@ to use the dma_sync_*() interfaces.
 		}
 	}
 
-Drivers converted fully to this interface should not use virt_to_bus any
-longer, nor should they use bus_to_virt. Some drivers have to be changed a
-little bit, because there is no longer an equivalent to bus_to_virt in the
+Drivers converted fully to this interface should not use virt_to_bus() any
+longer, nor should they use bus_to_virt(). Some drivers have to be changed a
+little bit, because there is no longer an equivalent to bus_to_virt() in the
 dynamic DMA mapping scheme - you have to always store the DMA addresses
-returned by the dma_alloc_coherent, dma_pool_alloc, and dma_map_single
-calls (dma_map_sg stores them in the scatterlist itself if the platform
+returned by the dma_alloc_coherent(), dma_pool_alloc(), and dma_map_single()
+calls (dma_map_sg() stores them in the scatterlist itself if the platform
 supports dynamic DMA mapping in hardware) in your driver structures and/or
 in the card registers.
 
@@ -709,9 +763,9 @@ as it is impossible to correctly support them.
 DMA address space is limited on some architectures and an allocation
 failure can be determined by:
 
-- checking if dma_alloc_coherent returns NULL or dma_map_sg returns 0
+- checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
 
-- checking the returned dma_addr_t of dma_map_single and dma_map_page
+- checking the dma_addr_t returned from dma_map_single() and dma_map_page()
   by using dma_mapping_error():
 
 	dma_addr_t dma_handle;
@@ -794,7 +848,7 @@ Example 2: (if buffers are allocated in a loop, unmap all mapped buffers when
 		dma_unmap_single(array[i].dma_addr);
 	}
 
-Networking drivers must call dev_kfree_skb to free the socket buffer
+Networking drivers must call dev_kfree_skb() to free the socket buffer
 and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
 (ndo_start_xmit). This means that the socket buffer is just dropped in
 the failure case.
@@ -831,7 +885,7 @@ transform some example code.
 		DEFINE_DMA_UNMAP_LEN(len);
 	};
 
-2) Use dma_unmap_{addr,len}_set to set these values.
+2) Use dma_unmap_{addr,len}_set() to set these values.
    Example, before:
 
 	ringp->mapping = FOO;
@@ -842,7 +896,7 @@ transform some example code.
 	dma_unmap_addr_set(ringp, mapping, FOO);
 	dma_unmap_len_set(ringp, len, BAR);
 
-3) Use dma_unmap_{addr,len} to access these values.
+3) Use dma_unmap_{addr,len}() to access these values.
    Example, before:
 
 	dma_unmap_single(dev, ringp->mapping, ringp->len,
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index e865279cec58..1147eba43128 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -4,22 +4,26 @@
         James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
 
 This document describes the DMA API.  For a more gentle introduction
-of the API (and actual examples) see
-Documentation/DMA-API-HOWTO.txt.
+of the API (and actual examples), see Documentation/DMA-API-HOWTO.txt.
 
-This API is split into two pieces.  Part I describes the API.  Part II
-describes the extensions to the API for supporting non-consistent
-memory machines.  Unless you know that your driver absolutely has to
-support non-consistent platforms (this is usually only legacy
-platforms) you should only use the API described in part I.
+This API is split into two pieces.  Part I describes the basic API.
+Part II describes extensions for supporting non-consistent memory
+machines.  Unless you know that your driver absolutely has to support
+non-consistent platforms (this is usually only legacy platforms) you
+should only use the API described in part I.
 
 Part I - dma_ API
 -------------------------------------
 
-To get the dma_ API, you must #include <linux/dma-mapping.h>
+To get the dma_ API, you must #include <linux/dma-mapping.h>.  This
+provides dma_addr_t and the interfaces described below.
 
+A dma_addr_t can hold any valid DMA or bus address for the platform.  It
+can be given to a device to use as a DMA source or target.  A cpu cannot
+reference a dma_addr_t directly because there may be translation between
+its physical address space and the bus address space.
 
-Part Ia - Using large dma-coherent buffers
+Part Ia - Using large DMA-coherent buffers
 ------------------------------------------
 
 void *
@@ -33,20 +37,21 @@ to make sure to flush the processor's write buffers before telling
 devices to read that memory.)
 
 This routine allocates a region of <size> bytes of consistent memory.
-It also returns a <dma_handle> which may be cast to an unsigned
-integer the same width as the bus and used as the physical address
-base of the region.
 
-Returns: a pointer to the allocated region (in the processor's virtual
+It returns a pointer to the allocated region (in the processor's virtual
 address space) or NULL if the allocation failed.
 
+It also returns a <dma_handle> which may be cast to an unsigned integer the
+same width as the bus and given to the device as the bus address base of
+the region.
+
 Note: consistent memory can be expensive on some platforms, and the
 minimum allocation length may be as big as a page, so you should
 consolidate your requests for consistent memory as much as possible.
 The simplest way to do that is to use the dma_pool calls (see below).
 
-The flag parameter (dma_alloc_coherent only) allows the caller to
-specify the GFP_ flags (see kmalloc) for the allocation (the
+The flag parameter (dma_alloc_coherent() only) allows the caller to
+specify the GFP_ flags (see kmalloc()) for the allocation (the
 implementation may choose to ignore flags that affect the location of
 the returned memory, like GFP_DMA).
 
@@ -61,24 +66,24 @@ void
 dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
 			   dma_addr_t dma_handle)
 
-Free the region of consistent memory you previously allocated.  dev,
-size and dma_handle must all be the same as those passed into the
-consistent allocate.  cpu_addr must be the virtual address returned by
-the consistent allocate.
+Free a region of consistent memory you previously allocated.  dev,
+size and dma_handle must all be the same as those passed into
+dma_alloc_coherent().  cpu_addr must be the virtual address returned by
+the dma_alloc_coherent().
 
 Note that unlike their sibling allocation calls, these routines
 may only be called with IRQs enabled.
 
 
-Part Ib - Using small dma-coherent buffers
+Part Ib - Using small DMA-coherent buffers
 ------------------------------------------
 
 To get this part of the dma_ API, you must #include <linux/dmapool.h>
 
-Many drivers need lots of small dma-coherent memory regions for DMA
+Many drivers need lots of small DMA-coherent memory regions for DMA
 descriptors or I/O buffers.  Rather than allocating in units of a page
 or more using dma_alloc_coherent(), you can use DMA pools.  These work
-much like a struct kmem_cache, except that they use the dma-coherent allocator,
+much like a struct kmem_cache, except that they use the DMA-coherent allocator,
 not __get_free_pages().  Also, they understand common hardware constraints
 for alignment, like queue heads needing to be aligned on N-byte boundaries.
 
@@ -87,7 +92,7 @@ for alignment, like queue heads needing to be aligned on N-byte boundaries.
 	dma_pool_create(const char *name, struct device *dev,
 			size_t size, size_t align, size_t alloc);
 
-The pool create() routines initialize a pool of dma-coherent buffers
+dma_pool_create() initializes a pool of DMA-coherent buffers
 for use with a given device.  It must be called in a context which
 can sleep.
 
@@ -102,25 +107,26 @@ from this pool must not cross 4KByte boundaries.
 	void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
 			dma_addr_t *dma_handle);
 
-This allocates memory from the pool; the returned memory will meet the size
-and alignment requirements specified at creation time.  Pass GFP_ATOMIC to
-prevent blocking, or if it's permitted (not in_interrupt, not holding SMP locks),
-pass GFP_KERNEL to allow blocking.  Like dma_alloc_coherent(), this returns
-two values:  an address usable by the cpu, and the dma address usable by the
-pool's device.
+This allocates memory from the pool; the returned memory will meet the
+size and alignment requirements specified at creation time.  Pass
+GFP_ATOMIC to prevent blocking, or if it's permitted (not
+in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
+blocking.  Like dma_alloc_coherent(), this returns two values:  an
+address usable by the cpu, and the DMA address usable by the pool's
+device.
 
 
 	void dma_pool_free(struct dma_pool *pool, void *vaddr,
 			dma_addr_t addr);
 
 This puts memory back into the pool.  The pool is what was passed to
-the pool allocation routine; the cpu (vaddr) and dma addresses are what
+dma_pool_alloc(); the cpu (vaddr) and DMA addresses are what
 were returned when that routine allocated the memory being freed.
 
 
 	void dma_pool_destroy(struct dma_pool *pool);
 
-The pool destroy() routines free the resources of the pool.  They must be
+dma_pool_destroy() frees the resources of the pool.  It must be
 called in a context which can sleep.  Make sure you've freed all allocated
 memory back to the pool before you destroy it.
 
@@ -187,9 +193,9 @@ dma_map_single(struct device *dev, void *cpu_addr, size_t size,
 		      enum dma_data_direction direction)
 
 Maps a piece of processor virtual memory so it can be accessed by the
-device and returns the physical handle of the memory.
+device and returns the bus address of the memory.
 
-The direction for both api's may be converted freely by casting.
+The direction for both APIs may be converted freely by casting.
 However the dma_ API uses a strongly typed enumerator for its
 direction:
 
@@ -198,31 +204,30 @@ DMA_TO_DEVICE		data is going from the memory to the device
 DMA_FROM_DEVICE		data is coming from the device to the memory
 DMA_BIDIRECTIONAL	direction isn't known
 
-Notes:  Not all memory regions in a machine can be mapped by this
-API.  Further, regions that appear to be physically contiguous in
-kernel virtual space may not be contiguous as physical memory.  Since
-this API does not provide any scatter/gather capability, it will fail
-if the user tries to map a non-physically contiguous piece of memory.
-For this reason, it is recommended that memory mapped by this API be
-obtained only from sources which guarantee it to be physically contiguous
-(like kmalloc).
-
-Further, the physical address of the memory must be within the
-dma_mask of the device (the dma_mask represents a bit mask of the
-addressable region for the device.  I.e., if the physical address of
-the memory anded with the dma_mask is still equal to the physical
-address, then the device can perform DMA to the memory).  In order to
+Notes:  Not all memory regions in a machine can be mapped by this API.
+Further, contiguous kernel virtual space may not be contiguous as
+physical memory.  Since this API does not provide any scatter/gather
+capability, it will fail if the user tries to map a non-physically
+contiguous piece of memory.  For this reason, memory to be mapped by
+this API should be obtained from sources which guarantee it to be
+physically contiguous (like kmalloc).
+
+Further, the bus address of the memory must be within the
+dma_mask of the device (the dma_mask is a bit mask of the
+addressable region for the device, i.e., if the bus address of
+the memory ANDed with the dma_mask is still equal to the bus
+address, then the device can perform DMA to the memory).  To
 ensure that the memory allocated by kmalloc is within the dma_mask,
 the driver may specify various platform-dependent flags to restrict
-the physical memory range of the allocation (e.g. on x86, GFP_DMA
-guarantees to be within the first 16Mb of available physical memory,
+the bus address range of the allocation (e.g., on x86, GFP_DMA
+guarantees to be within the first 16MB of available bus addresses,
 as required by ISA devices).
 
 Note also that the above constraints on physical contiguity and
 dma_mask may not apply if the platform has an IOMMU (a device which
-supplies a physical to virtual mapping between the I/O memory bus and
-the device).  However, to be portable, device driver writers may *not*
-assume that such an IOMMU exists.
+maps an I/O bus address to a physical memory address).  However, to be
+portable, device driver writers may *not* assume that such an IOMMU
+exists.
 
 Warnings:  Memory coherency operates at a granularity called the cache
 line width.  In order for memory mapped by this API to operate
@@ -281,9 +286,9 @@ cache width is.
 int
 dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 
-In some circumstances dma_map_single and dma_map_page will fail to create
+In some circumstances dma_map_single() and dma_map_page() will fail to create
 a mapping. A driver can check for these errors by testing the returned
-dma address with dma_mapping_error(). A non-zero return value means the mapping
+DMA address with dma_mapping_error(). A non-zero return value means the mapping
 could not be created and the driver should take appropriate action (e.g.
 reduce current DMA mapping usage or delay and try again later).
 
@@ -291,7 +296,7 @@ reduce current DMA mapping usage or delay and try again later).
 	dma_map_sg(struct device *dev, struct scatterlist *sg,
 		int nents, enum dma_data_direction direction)
 
-Returns: the number of physical segments mapped (this may be shorter
+Returns: the number of bus address segments mapped (this may be shorter
 than <nents> passed in if some elements of the scatter/gather list are
 physically or virtually adjacent and an IOMMU maps them with a single
 entry).
@@ -299,7 +304,7 @@ entry).
 Please note that the sg cannot be mapped again if it has been mapped once.
 The mapping process is allowed to destroy information in the sg.
 
-As with the other mapping interfaces, dma_map_sg can fail. When it
+As with the other mapping interfaces, dma_map_sg() can fail. When it
 does, 0 is returned and a driver must take appropriate action. It is
 critical that the driver do something, in the case of a block driver
 aborting the request or even oopsing is better than doing nothing and
@@ -335,7 +340,7 @@ must be the same as those and passed in to the scatter/gather mapping
 API.
 
 Note: <nents> must be the number you passed in, *not* the number of
-physical entries returned.
+bus address entries returned.
 
 void
 dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
@@ -391,10 +396,10 @@ The four functions above are just like the counterpart functions
 without the _attrs suffixes, except that they pass an optional
 struct dma_attrs*.
 
-struct dma_attrs encapsulates a set of "dma attributes". For the
+struct dma_attrs encapsulates a set of "DMA attributes". For the
 definition of struct dma_attrs see linux/dma-attrs.h.
 
-The interpretation of dma attributes is architecture-specific, and
+The interpretation of DMA attributes is architecture-specific, and
 each attribute should be documented in Documentation/DMA-attributes.txt.
 
 If struct dma_attrs* is NULL, the semantics of each of these
@@ -458,7 +463,7 @@ Note: where the platform can return consistent memory, it will
 guarantee that the sync points become nops.
 
 Warning:  Handling non-consistent memory is a real pain.  You should
-only ever use this API if you positively know your driver will be
+only use this API if you positively know your driver will be
 required to work on one of the rare (usually non-PCI) architectures
 that simply cannot make consistent memory.
 
@@ -496,26 +501,26 @@ dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 			    dma_addr_t device_addr, size_t size, int
 			    flags)
 
-Declare region of memory to be handed out by dma_alloc_coherent when
+Declare region of memory to be handed out by dma_alloc_coherent() when
 it's asked for coherent memory for this device.
 
 bus_addr is the physical address to which the memory is currently
 assigned in the bus responding region (this will be used by the
 platform to perform the mapping).
 
-device_addr is the physical address the device needs to be programmed
+device_addr is the bus address the device needs to be programmed
 with actually to address this memory (this will be handed out as the
 dma_addr_t in dma_alloc_coherent()).
 
 size is the size of the area (must be multiples of PAGE_SIZE).
 
-flags can be or'd together and are:
+flags can be ORed together and are:
 
 DMA_MEMORY_MAP - request that the memory returned from
 dma_alloc_coherent() be directly writable.
 
 DMA_MEMORY_IO - request that the memory returned from
-dma_alloc_coherent() be addressable using read/write/memcpy_toio etc.
+dma_alloc_coherent() be addressable using read()/write()/memcpy_toio() etc.
 
 One or both of these flags must be present.
 
@@ -572,7 +577,7 @@ region is occupied.
 Part III - Debug drivers use of the DMA-API
 -------------------------------------------
 
-The DMA-API as described above as some constraints. DMA addresses must be
+The DMA-API as described above has some constraints. DMA addresses must be
 released with the corresponding function with the same size for example. With
 the advent of hardware IOMMUs it becomes more and more important that drivers
 do not violate those constraints. In the worst case such a violation can
@@ -690,11 +695,11 @@ architectural default.
 void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
 dma-debug interface debug_dma_mapping_error() to debug drivers that fail
-to check dma mapping errors on addresses returned by dma_map_single() and
+to check DMA mapping errors on addresses returned by dma_map_single() and
 dma_map_page() interfaces. This interface clears a flag set by
 debug_dma_map_page() to indicate that dma_mapping_error() has been called by
 the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
 this flag is still set, prints warning message that includes call trace that
 leads up to the unmap. This interface can be called from dma_mapping_error()
-routines to enable dma mapping error check debugging.
+routines to enable DMA mapping error check debugging.
 
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
index e767805b4182..b1a19835e907 100644
--- a/Documentation/DMA-ISA-LPC.txt
+++ b/Documentation/DMA-ISA-LPC.txt
@@ -16,7 +16,7 @@ To do ISA style DMA you need to include two headers:
 #include <asm/dma.h>
 
 The first is the generic DMA API used to convert virtual addresses to
-physical addresses (see Documentation/DMA-API.txt for details).
+bus addresses (see Documentation/DMA-API.txt for details).
 
 The second contains the routines specific to ISA DMA transfers. Since
 this is not present on all platforms make sure you construct your
@@ -50,7 +50,7 @@ early as possible and not release it until the driver is unloaded.)
 Part III - Address translation
 ------------------------------
 
-To translate the virtual address to a physical use the normal DMA
+To translate the virtual address to a bus address, use the normal DMA
 API. Do _not_ use isa_virt_to_phys() even though it does the same
 thing. The reason for this is that the function isa_virt_to_phys()
 will require a Kconfig dependency to ISA, not just ISA_DMA_API which
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index fd4aee29ad10..b9aa2b97aab5 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -8,6 +8,12 @@
 #include <linux/dma-direction.h>
 #include <linux/scatterlist.h>
 
+/*
+ * A dma_addr_t can hold any valid DMA or bus address for the platform.
+ * It can be given to a device to use as a DMA source or target.  A CPU cannot
+ * reference a dma_addr_t directly because there may be translation between
+ * its physical address space and the bus address space.
+ */
 struct dma_map_ops {
 	void* (*alloc)(struct device *dev, size_t size,
 				dma_addr_t *dma_handle, gfp_t gfp,
diff --git a/include/linux/types.h b/include/linux/types.h
index 4d118ba11349..a0bb7048687f 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -142,6 +142,7 @@ typedef unsigned long blkcnt_t;
 #define pgoff_t unsigned long
 #endif
 
+/* A dma_addr_t can hold any valid DMA or bus address for the platform */
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
 typedef u64 dma_addr_t;
 #else
-- 
cgit 


From 88a984ba0795f14a3847edbd7fabe652289ea89b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 20 May 2014 16:54:22 -0600
Subject: DMA-API: Change dma_declare_coherent_memory() CPU address to
 phys_addr_t

dma_declare_coherent_memory() takes two addresses for a region of memory: a
"bus_addr" and a "device_addr".  I think the intent is that "bus_addr" is
the physical address a *CPU* would use to access the region, and
"device_addr" is the bus address the *device* would use to address the
region.

Rename "bus_addr" to "phys_addr" and change its type to phys_addr_t.
Most callers already supply a phys_addr_t for this argument.  The others
supply a 32-bit integer (a constant, unsigned int, or __u32) and need no
change.

Use "unsigned long", not phys_addr_t, to hold PFNs.

No functional change (this could theoretically fix a truncation in a config
with 32-bit dma_addr_t and 64-bit phys_addr_t, but I don't think there are
any such cases involving this code).

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: James Bottomley <jbottomley@Parallels.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
---
 Documentation/DMA-API.txt          |  9 ++++-----
 drivers/base/dma-coherent.c        | 10 +++++-----
 drivers/base/dma-mapping.c         |  6 +++---
 include/asm-generic/dma-coherent.h | 13 +++++--------
 include/linux/dma-mapping.h        |  7 ++++---
 5 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 1147eba43128..4f1cdc5febd1 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -497,19 +497,18 @@ continuing on for size.  Again, you *must* observe the cache line
 boundaries when doing this.
 
 int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 			    dma_addr_t device_addr, size_t size, int
 			    flags)
 
 Declare region of memory to be handed out by dma_alloc_coherent() when
 it's asked for coherent memory for this device.
 
-bus_addr is the physical address to which the memory is currently
-assigned in the bus responding region (this will be used by the
-platform to perform the mapping).
+phys_addr is the cpu physical address to which the memory is currently
+assigned (this will be ioremapped so the cpu can access the region).
 
 device_addr is the bus address the device needs to be programmed
-with actually to address this memory (this will be handed out as the
+with to actually address this memory (this will be handed out as the
 dma_addr_t in dma_alloc_coherent()).
 
 size is the size of the area (must be multiples of PAGE_SIZE).
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index bc256b641027..7d6e84a51424 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -10,13 +10,13 @@
 struct dma_coherent_mem {
 	void		*virt_base;
 	dma_addr_t	device_base;
-	phys_addr_t	pfn_base;
+	unsigned long	pfn_base;
 	int		size;
 	int		flags;
 	unsigned long	*bitmap;
 };
 
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				dma_addr_t device_addr, size_t size, int flags)
 {
 	void __iomem *mem_base = NULL;
@@ -32,7 +32,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 
 	/* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
 
-	mem_base = ioremap(bus_addr, size);
+	mem_base = ioremap(phys_addr, size);
 	if (!mem_base)
 		goto out;
 
@@ -45,7 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 
 	dev->dma_mem->virt_base = mem_base;
 	dev->dma_mem->device_base = device_addr;
-	dev->dma_mem->pfn_base = PFN_DOWN(bus_addr);
+	dev->dma_mem->pfn_base = PFN_DOWN(phys_addr);
 	dev->dma_mem->size = pages;
 	dev->dma_mem->flags = flags;
 
@@ -208,7 +208,7 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
 
 		*ret = -ENXIO;
 		if (off < count && user_count <= count - off) {
-			unsigned pfn = mem->pfn_base + start + off;
+			unsigned long pfn = mem->pfn_base + start + off;
 			*ret = remap_pfn_range(vma, vma->vm_start, pfn,
 					       user_count << PAGE_SHIFT,
 					       vma->vm_page_prot);
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 0ce39a33b3c2..6cd08e145bfa 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -175,7 +175,7 @@ static void dmam_coherent_decl_release(struct device *dev, void *res)
 /**
  * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
  * @dev: Device to declare coherent memory for
- * @bus_addr: Bus address of coherent memory to be declared
+ * @phys_addr: Physical address of coherent memory to be declared
  * @device_addr: Device address of coherent memory to be declared
  * @size: Size of coherent memory to be declared
  * @flags: Flags
@@ -185,7 +185,7 @@ static void dmam_coherent_decl_release(struct device *dev, void *res)
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 				 dma_addr_t device_addr, size_t size, int flags)
 {
 	void *res;
@@ -195,7 +195,7 @@ int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
 	if (!res)
 		return -ENOMEM;
 
-	rc = dma_declare_coherent_memory(dev, bus_addr, device_addr, size,
+	rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
 					 flags);
 	if (rc == 0)
 		devres_add(dev, res);
diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h
index 2be8a2dbc868..0297e5875798 100644
--- a/include/asm-generic/dma-coherent.h
+++ b/include/asm-generic/dma-coherent.h
@@ -16,16 +16,13 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
  * Standard interface
  */
 #define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-extern int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-			    dma_addr_t device_addr, size_t size, int flags);
+int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+				dma_addr_t device_addr, size_t size, int flags);
 
-extern void
-dma_release_declared_memory(struct device *dev);
+void dma_release_declared_memory(struct device *dev);
 
-extern void *
-dma_mark_declared_memory_occupied(struct device *dev,
-				  dma_addr_t device_addr, size_t size);
+void *dma_mark_declared_memory_occupied(struct device *dev,
+					dma_addr_t device_addr, size_t size);
 #else
 #define dma_alloc_from_coherent(dev, size, handle, ret) (0)
 #define dma_release_from_coherent(dev, order, vaddr) (0)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index b9aa2b97aab5..0c3eab1e39ac 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -192,7 +192,7 @@ static inline int dma_get_cache_alignment(void)
 
 #ifndef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
 static inline int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
 			    dma_addr_t device_addr, size_t size, int flags)
 {
 	return 0;
@@ -223,13 +223,14 @@ extern void *dmam_alloc_noncoherent(struct device *dev, size_t size,
 extern void dmam_free_noncoherent(struct device *dev, size_t size, void *vaddr,
 				  dma_addr_t dma_handle);
 #ifdef ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-extern int dmam_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
+extern int dmam_declare_coherent_memory(struct device *dev,
+					phys_addr_t phys_addr,
 					dma_addr_t device_addr, size_t size,
 					int flags);
 extern void dmam_release_declared_memory(struct device *dev);
 #else /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
 static inline int dmam_declare_coherent_memory(struct device *dev,
-				dma_addr_t bus_addr, dma_addr_t device_addr,
+				phys_addr_t phys_addr, dma_addr_t device_addr,
 				size_t size, gfp_t gfp)
 {
 	return 0;
-- 
cgit 


From 66507c7bc8895f0da6b4ad87e96d61a9f7d7a118 Mon Sep 17 00:00:00 2001
From: Kamal Dasu <kdasu.kdev@gmail.com>
Date: Thu, 1 May 2014 20:51:19 -0400
Subject: mtd: nand: Add support to use nand_base poi databuf as bounce buffer

nand_base can be passed a kmap()'d buffers from highmem by
filesystems like jffs2. This results in failure to map the
physical address of the DMA buffer on various contoller
driver on different platforms. This change adds a chip option
to use preallocated databuf as bounce buffers used in
nand_do_read_ops() and nand_do_write_ops().
This allows for specific nand controller driver to set this
option as needed.

Signed-off-by: Kamal Dasu <kdasu.kdev@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c | 37 +++++++++++++++++++++++++++++++------
 include/linux/mtd/nand.h     |  5 +++++
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 7853b9b0a05e..1b844b8c621f 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -37,6 +37,7 @@
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -1500,6 +1501,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		mtd->oobavail : mtd->oobsize;
 
 	uint8_t *bufpoi, *oob, *buf;
+	int use_bufpoi;
 	unsigned int max_bitflips = 0;
 	int retry_mode = 0;
 	bool ecc_fail = false;
@@ -1522,9 +1524,20 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		bytes = min(mtd->writesize - col, readlen);
 		aligned = (bytes == mtd->writesize);
 
+		if (!aligned)
+			use_bufpoi = 1;
+		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
+			use_bufpoi = !virt_addr_valid(buf);
+		else
+			use_bufpoi = 0;
+
 		/* Is the current page in the buffer? */
 		if (realpage != chip->pagebuf || oob) {
-			bufpoi = aligned ? buf : chip->buffers->databuf;
+			bufpoi = use_bufpoi ? chip->buffers->databuf : buf;
+
+			if (use_bufpoi && aligned)
+				pr_debug("%s: using read bounce buffer for buf@%p\n",
+						 __func__, buf);
 
 read_retry:
 			chip->cmdfunc(mtd, NAND_CMD_READ0, 0x00, page);
@@ -1546,7 +1559,7 @@ read_retry:
 				ret = chip->ecc.read_page(mtd, chip, bufpoi,
 							  oob_required, page);
 			if (ret < 0) {
-				if (!aligned)
+				if (use_bufpoi)
 					/* Invalidate page cache */
 					chip->pagebuf = -1;
 				break;
@@ -1555,7 +1568,7 @@ read_retry:
 			max_bitflips = max_t(unsigned int, max_bitflips, ret);
 
 			/* Transfer not aligned data */
-			if (!aligned) {
+			if (use_bufpoi) {
 				if (!NAND_HAS_SUBPAGE_READ(chip) && !oob &&
 				    !(mtd->ecc_stats.failed - ecc_failures) &&
 				    (ops->mode != MTD_OPS_RAW)) {
@@ -2375,11 +2388,23 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		int bytes = mtd->writesize;
 		int cached = writelen > bytes && page != blockmask;
 		uint8_t *wbuf = buf;
+		int use_bufpoi;
+		int part_pagewr = (column || writelen < (mtd->writesize - 1));
+
+		if (part_pagewr)
+			use_bufpoi = 1;
+		else if (chip->options & NAND_USE_BOUNCE_BUFFER)
+			use_bufpoi = !virt_addr_valid(buf);
+		else
+			use_bufpoi = 0;
 
-		/* Partial page write? */
-		if (unlikely(column || writelen < (mtd->writesize - 1))) {
+		/* Partial page write?, or need to use bounce buffer */
+		if (use_bufpoi) {
+			pr_debug("%s: using write bounce buffer for buf@%p\n",
+					 __func__, buf);
 			cached = 0;
-			bytes = min_t(int, bytes - column, (int) writelen);
+			if (part_pagewr)
+				bytes = min_t(int, bytes - column, writelen);
 			chip->pagebuf = -1;
 			memset(chip->buffers->databuf, 0xff, mtd->writesize);
 			memcpy(&chip->buffers->databuf[column], buf, bytes);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 7a922e6c4e4b..2f0af2891f0f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -175,6 +175,11 @@ typedef enum {
 #define NAND_OWN_BUFFERS	0x00020000
 /* Chip may not exist, so silence any errors in scan */
 #define NAND_SCAN_SILENT_NODEV	0x00040000
+/*
+ * This option could be defined by controller drivers to protect against
+ * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers
+ */
+#define NAND_USE_BOUNCE_BUFFER	0x00080000
 /*
  * Autodetect nand buswidth with readid/onfi.
  * This suppose the driver will configure the hardware in 8 bits mode
-- 
cgit 


From 27c9fd607587e6c3b517590df4cd35ac85f3d0bd Mon Sep 17 00:00:00 2001
From: pekon gupta <pekon@ti.com>
Date: Mon, 19 May 2014 13:24:39 +0530
Subject: mtd: nand: omap: add support for BCH16_ECC - GPMC driver updates

This patch add support for BCH16_ECC in GPMC (controller) driver:
- extends configuration space to include BCH16 registers
- extends parsing of DT binding for selecting BCH16 ecc-scheme

Signed-off-by: Pekon Gupta <pekon@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 arch/arm/mach-omap2/gpmc.c                   | 15 +++++++++++++++
 include/linux/platform_data/mtd-nand-omap2.h |  5 +++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index ab43755364f5..9b27773db040 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -68,6 +68,9 @@
 #define	GPMC_ECC_BCH_RESULT_1	0x244	/* not available on OMAP2 */
 #define	GPMC_ECC_BCH_RESULT_2	0x248	/* not available on OMAP2 */
 #define	GPMC_ECC_BCH_RESULT_3	0x24c	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_4	0x300	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_5	0x304	/* not available on OMAP2 */
+#define	GPMC_ECC_BCH_RESULT_6	0x308	/* not available on OMAP2 */
 
 /* GPMC ECC control settings */
 #define GPMC_ECC_CTRL_ECCCLEAR		0x100
@@ -666,6 +669,12 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs)
 					   GPMC_BCH_SIZE * i;
 		reg->gpmc_bch_result3[i] = gpmc_base + GPMC_ECC_BCH_RESULT_3 +
 					   GPMC_BCH_SIZE * i;
+		reg->gpmc_bch_result4[i] = gpmc_base + GPMC_ECC_BCH_RESULT_4 +
+					   i * GPMC_BCH_SIZE;
+		reg->gpmc_bch_result5[i] = gpmc_base + GPMC_ECC_BCH_RESULT_5 +
+					   i * GPMC_BCH_SIZE;
+		reg->gpmc_bch_result6[i] = gpmc_base + GPMC_ECC_BCH_RESULT_6 +
+					   i * GPMC_BCH_SIZE;
 	}
 }
 
@@ -1401,6 +1410,12 @@ static int gpmc_probe_nand_child(struct platform_device *pdev,
 		else
 			gpmc_nand_data->ecc_opt =
 				OMAP_ECC_BCH8_CODE_HW_DETECTION_SW;
+	else if (!strcmp(s, "bch16"))
+		if (gpmc_nand_data->elm_of_node)
+			gpmc_nand_data->ecc_opt =
+				OMAP_ECC_BCH16_CODE_HW;
+		else
+			pr_err("%s: BCH16 requires ELM support\n", __func__);
 	else
 		pr_err("%s: ti,nand-ecc-opt invalid value\n", __func__);
 
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 3e9dd6676b97..660c029d694f 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -31,6 +31,8 @@ enum omap_ecc {
 	OMAP_ECC_BCH8_CODE_HW_DETECTION_SW,
 	/* 8-bit  ECC calculation by GPMC, Error detection by ELM */
 	OMAP_ECC_BCH8_CODE_HW,
+	/* 16-bit ECC calculation by GPMC, Error detection by ELM */
+	OMAP_ECC_BCH16_CODE_HW,
 };
 
 struct gpmc_nand_regs {
@@ -50,6 +52,9 @@ struct gpmc_nand_regs {
 	void __iomem	*gpmc_bch_result1[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result2[GPMC_BCH_NUM_REMAINDER];
 	void __iomem	*gpmc_bch_result3[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER];
+	void __iomem	*gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER];
 };
 
 struct omap_nand_platform_data {
-- 
cgit 


From 2be589e4b28457f148640dc6addf6da24af64b7f Mon Sep 17 00:00:00 2001
From: pekon gupta <pekon@ti.com>
Date: Mon, 19 May 2014 13:24:40 +0530
Subject: mtd: nand: omap: add support for BCH16_ECC - ELM driver updates

ELM hardware engine is used to detect ECC errors for BCHx ecc-schemes
(like BCH4/BCH8/BCH16). This patch extends configuration of ELM registers
for adding support of BCH16_HW ecc-scheme.

Signed-off-by: Pekon Gupta <pekon@ti.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/elm.c         | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/platform_data/elm.h |  3 ++-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/elm.c b/drivers/mtd/devices/elm.c
index 0a037b15c11b..7df86948e6d4 100644
--- a/drivers/mtd/devices/elm.c
+++ b/drivers/mtd/devices/elm.c
@@ -213,6 +213,28 @@ static void elm_load_syndrome(struct elm_info *info,
 				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
 				elm_write_reg(info, offset, val);
 				break;
+			case BCH16_ECC:
+				val = cpu_to_be32(*(u32 *) &ecc[22]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[18]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[14]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[10]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[6]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[2]);
+				elm_write_reg(info, offset, val);
+				offset += 4;
+				val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
+				elm_write_reg(info, offset, val);
+				break;
 			default:
 				pr_err("invalid config bch_type\n");
 			}
@@ -436,6 +458,13 @@ static int elm_context_save(struct elm_info *info)
 	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
 		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
 		switch (bch_type) {
+		case BCH16_ECC:
+			regs->elm_syndrome_fragment_6[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_6 + offset);
+			regs->elm_syndrome_fragment_5[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_5 + offset);
+			regs->elm_syndrome_fragment_4[i] = elm_read_reg(info,
+					ELM_SYNDROME_FRAGMENT_4 + offset);
 		case BCH8_ECC:
 			regs->elm_syndrome_fragment_3[i] = elm_read_reg(info,
 					ELM_SYNDROME_FRAGMENT_3 + offset);
@@ -474,6 +503,13 @@ static int elm_context_restore(struct elm_info *info)
 	for (i = 0; i < ERROR_VECTOR_MAX; i++) {
 		offset = i * SYNDROME_FRAGMENT_REG_SIZE;
 		switch (bch_type) {
+		case BCH16_ECC:
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_6 + offset,
+					regs->elm_syndrome_fragment_6[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_5 + offset,
+					regs->elm_syndrome_fragment_5[i]);
+			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_4 + offset,
+					regs->elm_syndrome_fragment_4[i]);
 		case BCH8_ECC:
 			elm_write_reg(info, ELM_SYNDROME_FRAGMENT_3 + offset,
 					regs->elm_syndrome_fragment_3[i]);
diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
index 4edb40676b3f..780d1e97f620 100644
--- a/include/linux/platform_data/elm.h
+++ b/include/linux/platform_data/elm.h
@@ -21,6 +21,7 @@
 enum bch_ecc {
 	BCH4_ECC = 0,
 	BCH8_ECC,
+	BCH16_ECC,
 };
 
 /* ELM support 8 error syndrome process */
@@ -38,7 +39,7 @@ struct elm_errorvec {
 	bool error_reported;
 	bool error_uncorrectable;
 	int error_count;
-	int error_loc[ERROR_VECTOR_MAX];
+	int error_loc[16];
 };
 
 void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
-- 
cgit 


From f0ba3d05c9c647ab42ed6a0dbdfdeae42bfbd6de Mon Sep 17 00:00:00 2001
From: Eyal Perry <eyalpe@mellanox.com>
Date: Tue, 20 May 2014 17:57:00 +0300
Subject: genirq: Provide !SMP stub for irq_set_affinity_notifier()

Instead of requiring each consumer of the IRQ affinity notifier to have
themselves be explicitly dependent on CONFIG_SMP, make the definition of
struct irq_affinity_notify to exist independently of that config option
and introduce a stub for irq_set_affinity_notifier() under non SMP
configuration.

Fixes: 2eacc23 ("net/mlx4_core: Enforce irq affinity changes
immediatly")

Signed-off-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Cc: Ben Hutchings <bhutchings@solarflare.com>
Cc: Yevgeny Petrilin <yevgenyp@mellanox.com>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: David S. Miller <davem@davemloft.net>
Link: http://lkml.kernel.org/r/1400597820-30685-1-git-send-email-amirv@mellanox.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 97ac926c78a7..3f74c0593171 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -199,6 +199,26 @@ extern int check_wakeup_irqs(void);
 static inline int check_wakeup_irqs(void) { return 0; }
 #endif
 
+/**
+ * struct irq_affinity_notify - context for notification of IRQ affinity changes
+ * @irq:		Interrupt to which notification applies
+ * @kref:		Reference count, for internal use
+ * @work:		Work item, for internal use
+ * @notify:		Function to be called on change.  This will be
+ *			called in process context.
+ * @release:		Function to be called on release.  This will be
+ *			called in process context.  Once registered, the
+ *			structure must only be freed when this function is
+ *			called or later.
+ */
+struct irq_affinity_notify {
+	unsigned int irq;
+	struct kref kref;
+	struct work_struct work;
+	void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
+	void (*release)(struct kref *ref);
+};
+
 #if defined(CONFIG_SMP)
 
 extern cpumask_var_t irq_default_affinity;
@@ -242,26 +262,6 @@ extern int irq_select_affinity(unsigned int irq);
 
 extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
 
-/**
- * struct irq_affinity_notify - context for notification of IRQ affinity changes
- * @irq:		Interrupt to which notification applies
- * @kref:		Reference count, for internal use
- * @work:		Work item, for internal use
- * @notify:		Function to be called on change.  This will be
- *			called in process context.
- * @release:		Function to be called on release.  This will be
- *			called in process context.  Once registered, the
- *			structure must only be freed when this function is
- *			called or later.
- */
-struct irq_affinity_notify {
-	unsigned int irq;
-	struct kref kref;
-	struct work_struct work;
-	void (*notify)(struct irq_affinity_notify *, const cpumask_t *mask);
-	void (*release)(struct kref *ref);
-};
-
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
@@ -284,6 +284,12 @@ static inline int irq_set_affinity_hint(unsigned int irq,
 {
 	return -EINVAL;
 }
+
+static inline int
+irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify)
+{
+	return 0;
+}
 #endif /* CONFIG_SMP */
 
 /*
-- 
cgit 


From 7d10d2610cc02d432168ca0c5d964cd9e85c1b06 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Mon, 19 May 2014 09:21:09 +0200
Subject: net: cdc_ncm: fix 64bit division build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The upper timer_interval limit is arbitrary and much higher
than anything usable in the real world.  Reducing it from 15s
to ~4s to make the timer_interval fit in an u32 does not make
much difference.  The limit is still outside the practical
bounds.

This eliminates the need for a 64bit timer_interval, fixing a
build error related to 64bit division:

 drivers/built-in.o: In function `cdc_ncm_get_coalesce':
 ak8975.c:(.text+0x1ac994): undefined reference to `__aeabi_uldivmod'

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc_ncm.c   | 4 ++--
 include/linux/usb/cdc_ncm.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c
index 2d0caf1eea25..ad2a386a6e92 100644
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@ -138,7 +138,7 @@ static int cdc_ncm_get_coalesce(struct net_device *netdev,
 	ec->tx_max_coalesced_frames = ctx->tx_max / ctx->max_datagram_size;
 
 	/* the timer will fire CDC_NCM_TIMER_PENDING_CNT times in a row */
-	ec->tx_coalesce_usecs = (ctx->timer_interval * CDC_NCM_TIMER_PENDING_CNT) / NSEC_PER_USEC;
+	ec->tx_coalesce_usecs = ctx->timer_interval / (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT);
 	return 0;
 }
 
@@ -164,7 +164,7 @@ static int cdc_ncm_set_coalesce(struct net_device *netdev,
 		return -EINVAL;
 
 	spin_lock_bh(&ctx->mtx);
-	ctx->timer_interval = ec->tx_coalesce_usecs * NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT;
+	ctx->timer_interval = ec->tx_coalesce_usecs * (NSEC_PER_USEC / CDC_NCM_TIMER_PENDING_CNT);
 	if (!ctx->timer_interval)
 		ctx->tx_timer_pending = 0;
 	spin_unlock_bh(&ctx->mtx);
diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h
index 8c5e38819828..7c9b484735c5 100644
--- a/include/linux/usb/cdc_ncm.h
+++ b/include/linux/usb/cdc_ncm.h
@@ -78,7 +78,7 @@
 #define	CDC_NCM_TIMER_PENDING_CNT		2
 #define CDC_NCM_TIMER_INTERVAL_USEC		400UL
 #define CDC_NCM_TIMER_INTERVAL_MIN		5UL
-#define CDC_NCM_TIMER_INTERVAL_MAX		(15UL * USEC_PER_SEC)
+#define CDC_NCM_TIMER_INTERVAL_MAX		(U32_MAX / NSEC_PER_USEC)
 
 #define cdc_ncm_comm_intf_is_mbim(x)  ((x)->desc.bInterfaceSubClass == USB_CDC_SUBCLASS_MBIM && \
 				       (x)->desc.bInterfaceProtocol == USB_CDC_PROTO_NONE)
@@ -104,7 +104,7 @@ struct cdc_ncm_ctx {
 	spinlock_t mtx;
 	atomic_t stop;
 
-	u64 timer_interval;
+	u32 timer_interval;
 	u32 max_ndp_size;
 
 	u32 tx_timer_pending;
-- 
cgit 


From a8246fedacadaab18b23b280ea3cf916ef5fc30e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 8 May 2014 16:56:12 +0200
Subject: dmaengine: omap: hide filter_fn for built-in drivers

It is not possible to reference the omap_dma_filter_fn filter
function from a built-in driver if the dmaengine driver itself
is a loadable module, which is a valid configuration otherwise.

This provides only the dummy alternative if the function
is referenced by a built-in driver to allow a successful
build. The filter function is only required by ATAGS based
platforms, which will continue to be broken after this change
for the bogus configuration. When booting from DT, with the
dma channels correctly listed there, it will work fine.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Tony Lindgren <tony@atomide.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: dmaengine@vger.kernel.org
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/omap-dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 41a13e70f41f..7944cdc27bed 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -10,7 +10,7 @@
 
 struct dma_chan;
 
-#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE)
+#if defined(CONFIG_DMA_OMAP) || (defined(CONFIG_DMA_OMAP_MODULE) && defined(MODULE))
 bool omap_dma_filter_fn(struct dma_chan *, void *);
 #else
 static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
-- 
cgit 


From 3796ce1d4d4b330a75005c5eda105603ce9d4071 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 19 May 2014 22:42:32 +0200
Subject: pwm: add period and polarity to struct pwm_lookup

Add period and polarity members to struct pwm_lookup so that platforms
using the lookup table can be treated the same way as those using the
device tree.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 8 +++++++-
 include/linux/pwm.h | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index a80471399c20..4b66bf09ee55 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -661,10 +661,16 @@ struct pwm_device *pwm_get(struct device *dev, const char *con_id)
 		}
 	}
 
+	mutex_unlock(&pwm_lookup_lock);
+
 	if (chip)
 		pwm = pwm_request_from_chip(chip, index, con_id ?: dev_id);
+	if (IS_ERR(pwm))
+		return pwm;
+
+	pwm_set_period(pwm, p->period);
+	pwm_set_polarity(pwm, p->polarity);
 
-	mutex_unlock(&pwm_lookup_lock);
 
 	return pwm;
 }
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 4717f54051cb..2f45e2fe5b93 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -274,6 +274,8 @@ struct pwm_lookup {
 	unsigned int index;
 	const char *dev_id;
 	const char *con_id;
+	unsigned int period;
+	enum pwm_polarity polarity;
 };
 
 #define PWM_LOOKUP(_provider, _index, _dev_id, _con_id)	\
-- 
cgit 


From eed542d6962ba33a689b4007a389f466e407bd74 Mon Sep 17 00:00:00 2001
From: AKASHI Takahiro <takahiro.akashi@linaro.org>
Date: Tue, 20 May 2014 20:31:04 +0900
Subject: ftrace: Make CALLER_ADDRx macros more generic

Most archs with HAVE_ARCH_CALLER_ADDR have pretty much the same
definitions of CALLER_ADDRx(n). Instead of duplicating the code for all
the archs, define a ftrace_return_address0() and
ftrace_return_address(n) that can be overwritten by the archs if they
need to do something different. Instead of 7 macros in every arch, we
now only have at most 2 (and actually only 1 as
ftrace_return_address0() should be the same for all archs).

The CALLER_ADDRx(n) will now be defined in linux/ftrace.h and use the
ftrace_return_address*(n?) macros. This removes a lot of the duplicate
code.

Link: http://lkml.kernel.org/p/1400585464-30333-1-git-send-email-takahiro.akashi@linaro.org

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/arm/include/asm/ftrace.h      | 10 +---------
 arch/blackfin/include/asm/ftrace.h | 11 +----------
 arch/parisc/include/asm/ftrace.h   | 10 +---------
 arch/sh/include/asm/ftrace.h       | 10 +---------
 arch/xtensa/include/asm/ftrace.h   | 14 ++++----------
 include/linux/ftrace.h             | 34 ++++++++++++++++++----------------
 6 files changed, 26 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index f89515adac60..eb577f4f5f70 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -52,15 +52,7 @@ extern inline void *return_address(unsigned int level)
 
 #endif
 
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_addr(n) return_address(n)
 
 #endif /* ifndef __ASSEMBLY__ */
 
diff --git a/arch/blackfin/include/asm/ftrace.h b/arch/blackfin/include/asm/ftrace.h
index 8a029505d7b7..2f1c3c2657ad 100644
--- a/arch/blackfin/include/asm/ftrace.h
+++ b/arch/blackfin/include/asm/ftrace.h
@@ -66,16 +66,7 @@ extern inline void *return_address(unsigned int level)
 
 #endif /* CONFIG_FRAME_POINTER */
 
-#define HAVE_ARCH_CALLER_ADDR
-
-/* inline function or macro may lead to unexpected result */
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
index 72c0fafaa039..544ed8ef87eb 100644
--- a/arch/parisc/include/asm/ftrace.h
+++ b/arch/parisc/include/asm/ftrace.h
@@ -24,15 +24,7 @@ extern void return_to_handler(void);
 
 extern unsigned long return_address(unsigned int);
 
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 return_address(1)
-#define CALLER_ADDR2 return_address(2)
-#define CALLER_ADDR3 return_address(3)
-#define CALLER_ADDR4 return_address(4)
-#define CALLER_ADDR5 return_address(5)
-#define CALLER_ADDR6 return_address(6)
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/sh/include/asm/ftrace.h b/arch/sh/include/asm/ftrace.h
index 13e9966464c2..e79fb6ebaa42 100644
--- a/arch/sh/include/asm/ftrace.h
+++ b/arch/sh/include/asm/ftrace.h
@@ -40,15 +40,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
 /* arch/sh/kernel/return_address.c */
 extern void *return_address(unsigned int);
 
-#define HAVE_ARCH_CALLER_ADDR
-
-#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#define CALLER_ADDR1 ((unsigned long)return_address(1))
-#define CALLER_ADDR2 ((unsigned long)return_address(2))
-#define CALLER_ADDR3 ((unsigned long)return_address(3))
-#define CALLER_ADDR4 ((unsigned long)return_address(4))
-#define CALLER_ADDR5 ((unsigned long)return_address(5))
-#define CALLER_ADDR6 ((unsigned long)return_address(6))
+#define ftrace_return_address(n) return_address(n)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/xtensa/include/asm/ftrace.h b/arch/xtensa/include/asm/ftrace.h
index 736b9d214d80..6c6d9a9f185f 100644
--- a/arch/xtensa/include/asm/ftrace.h
+++ b/arch/xtensa/include/asm/ftrace.h
@@ -12,24 +12,18 @@
 
 #include <asm/processor.h>
 
-#define HAVE_ARCH_CALLER_ADDR
 #ifndef __ASSEMBLY__
-#define CALLER_ADDR0 ({ unsigned long a0, a1; \
+#define ftrace_return_address0 ({ unsigned long a0, a1; \
 		__asm__ __volatile__ ( \
 			"mov %0, a0\n" \
 			"mov %1, a1\n" \
 			: "=r"(a0), "=r"(a1)); \
 		MAKE_PC_FROM_RA(a0, a1); })
+
 #ifdef CONFIG_FRAME_POINTER
 extern unsigned long return_address(unsigned level);
-#define CALLER_ADDR1 return_address(1)
-#define CALLER_ADDR2 return_address(2)
-#define CALLER_ADDR3 return_address(3)
-#else /* CONFIG_FRAME_POINTER */
-#define CALLER_ADDR1 (0)
-#define CALLER_ADDR2 (0)
-#define CALLER_ADDR3 (0)
-#endif /* CONFIG_FRAME_POINTER */
+#define ftrace_return_address(n) return_address(n)
+#endif
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ae9504b4b67d..2018751cad9e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -616,25 +616,27 @@ static inline void __ftrace_enabled_restore(int enabled)
 #endif
 }
 
-#ifndef HAVE_ARCH_CALLER_ADDR
+/* All archs should have this, but we define it for consistency */
+#ifndef ftrace_return_address0
+# define ftrace_return_address0 __builtin_return_address(0)
+#endif
+
+/* Archs may use other ways for ADDR1 and beyond */
+#ifndef ftrace_return_address
 # ifdef CONFIG_FRAME_POINTER
-#  define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#  define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
-#  define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
-#  define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
-#  define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
-#  define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
-#  define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
+#  define ftrace_return_address(n) __builtin_return_address(n)
 # else
-#  define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
-#  define CALLER_ADDR1 0UL
-#  define CALLER_ADDR2 0UL
-#  define CALLER_ADDR3 0UL
-#  define CALLER_ADDR4 0UL
-#  define CALLER_ADDR5 0UL
-#  define CALLER_ADDR6 0UL
+#  define ftrace_return_address(n) 0UL
 # endif
-#endif /* ifndef HAVE_ARCH_CALLER_ADDR */
+#endif
+
+#define CALLER_ADDR0 ((unsigned long)ftrace_return_address0)
+#define CALLER_ADDR1 ((unsigned long)ftrace_return_address(1))
+#define CALLER_ADDR2 ((unsigned long)ftrace_return_address(2))
+#define CALLER_ADDR3 ((unsigned long)ftrace_return_address(3))
+#define CALLER_ADDR4 ((unsigned long)ftrace_return_address(4))
+#define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5))
+#define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6))
 
 #ifdef CONFIG_IRQSOFF_TRACER
   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
-- 
cgit 


From f01d907582f8461546379aa415a7c6d5cfb8e5fd Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Sat, 17 May 2014 14:54:50 +0900
Subject: gpio: make of_get_named_gpiod_flags() private

of_get_named_gpiod_flags() is visible and directly usable by GPIO
consumers, but it really should not as the gpiod interface relies
on the simpler gpiod_get() to provide properly-configured GPIOs.

of_get_named_gpiod_flags() is just used internally by gpiolib to
implement gpiod_get(), and by the old of_get_named_gpio_flags()
function, therefore it makes sense to make it gpiolib-private.

As a side-effect, the unused (and unneeded) of_get_gpiod_flags()
inline function is also removed, and of_get_named_gpio_flags() is moved
from a static inline function to a regular one in gpiolib-of.c

This results in all references to gpiod_* functions in of_gpio.h being
gone, which is the way it should be since this file is part of the old
integer GPIO interface.

Changes since v1:
- Fixed compilation error when CONFIG_OF_GPIO is not defined
- Fixed warning due to of_gpio_flags enum not being declared
  in private gpiolib.h header

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 14 ++++++++++++++
 drivers/gpio/gpiolib.h    |  5 +++++
 include/linux/of_gpio.h   | 35 +++--------------------------------
 3 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index db98d3a12f70..af7e25c9a9ae 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -96,6 +96,20 @@ struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
 }
 EXPORT_SYMBOL(of_get_named_gpiod_flags);
 
+int of_get_named_gpio_flags(struct device_node *np, const char *list_name,
+			    int index, enum of_gpio_flags *flags)
+{
+	struct gpio_desc *desc;
+
+	desc = of_get_named_gpiod_flags(np, list_name, index, flags);
+
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+	else
+		return desc_to_gpio(desc);
+}
+EXPORT_SYMBOL(of_get_named_gpio_flags);
+
 /**
  * of_gpio_simple_xlate - translate gpio_spec to the GPIO number and flags
  * @gc:		pointer to the gpio_chip structure
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index cf092941a9fd..1a4103dd38df 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -15,6 +15,8 @@
 #include <linux/err.h>
 #include <linux/device.h>
 
+enum of_gpio_flags;
+
 /**
  * struct acpi_gpio_info - ACPI GPIO specific information
  * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
@@ -46,4 +48,7 @@ acpi_get_gpiod_by_index(struct device *dev, int index,
 int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
 void gpiochip_free_own_desc(struct gpio_desc *desc);
 
+struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
+		   const char *list_name, int index, enum of_gpio_flags *flags);
+
 #endif /* GPIOLIB_H */
diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index f14123a5a9df..38fc05036015 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -19,7 +19,6 @@
 #include <linux/errno.h>
 #include <linux/gpio.h>
 #include <linux/of.h>
-#include <linux/gpio/consumer.h>
 
 struct device_node;
 
@@ -48,7 +47,7 @@ static inline struct of_mm_gpio_chip *to_of_mm_gpio_chip(struct gpio_chip *gc)
 	return container_of(gc, struct of_mm_gpio_chip, gc);
 }
 
-extern struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
+extern int of_get_named_gpio_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags);
 
 extern int of_mm_gpiochip_add(struct device_node *np,
@@ -63,10 +62,10 @@ extern int of_gpio_simple_xlate(struct gpio_chip *gc,
 #else /* CONFIG_OF_GPIO */
 
 /* Drivers may not strictly depend on the GPIO support, so let them link. */
-static inline struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np,
+static inline int of_get_named_gpio_flags(struct device_node *np,
 		const char *list_name, int index, enum of_gpio_flags *flags)
 {
-	return ERR_PTR(-ENOSYS);
+	return -ENOSYS;
 }
 
 static inline int of_gpio_simple_xlate(struct gpio_chip *gc,
@@ -81,18 +80,6 @@ static inline void of_gpiochip_remove(struct gpio_chip *gc) { }
 
 #endif /* CONFIG_OF_GPIO */
 
-static inline int of_get_named_gpio_flags(struct device_node *np,
-		const char *list_name, int index, enum of_gpio_flags *flags)
-{
-	struct gpio_desc *desc;
-	desc = of_get_named_gpiod_flags(np, list_name, index, flags);
-
-	if (IS_ERR(desc))
-		return PTR_ERR(desc);
-	else
-		return desc_to_gpio(desc);
-}
-
 /**
  * of_gpio_named_count() - Count GPIOs for a device
  * @np:		device node to count GPIOs for
@@ -129,22 +116,6 @@ static inline int of_gpio_count(struct device_node *np)
 	return of_gpio_named_count(np, "gpios");
 }
 
-/**
- * of_get_gpiod_flags() - Get a GPIO descriptor and flags to use with GPIO API
- * @np:		device node to get GPIO from
- * @index:	index of the GPIO
- * @flags:	a flags pointer to fill in
- *
- * Returns GPIO descriptor to use with Linux generic GPIO API, or a errno
- * value on the error condition. If @flags is not NULL the function also fills
- * in flags for the GPIO.
- */
-static inline struct gpio_desc *of_get_gpiod_flags(struct device_node *np,
-					int index, enum of_gpio_flags *flags)
-{
-	return of_get_named_gpiod_flags(np, "gpios", index, flags);
-}
-
 static inline int of_get_gpio_flags(struct device_node *np, int index,
 		      enum of_gpio_flags *flags)
 {
-- 
cgit 


From dc671157139918eaf61f73db1bd6dd02960b66e2 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 19 May 2014 22:42:34 +0200
Subject: pwm: renesas-tpu: remove unused struct tpu_pwm_platform_data

The struct is not used anymore and the polarity initialization will be
done using the PWM lookup table (or device tree).

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/pwm-renesas-tpu.c                 | 19 +++----------------
 include/linux/platform_data/pwm-renesas-tpu.h | 16 ----------------
 2 files changed, 3 insertions(+), 32 deletions(-)
 delete mode 100644 include/linux/platform_data/pwm-renesas-tpu.h

(limited to 'include/linux')

diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index cc13ff4cfab0..3b71b42e89d5 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -21,13 +21,14 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
-#include <linux/platform_data/pwm-renesas-tpu.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+#define TPU_CHANNEL_MAX		4
+
 #define TPU_TSTR		0x00	/* Timer start register (shared) */
 
 #define TPU_TCRn		0x00	/* Timer control register */
@@ -87,7 +88,6 @@ struct tpu_pwm_device {
 
 struct tpu_device {
 	struct platform_device *pdev;
-	enum pwm_polarity polarities[TPU_CHANNEL_MAX];
 	struct pwm_chip chip;
 	spinlock_t lock;
 
@@ -229,7 +229,7 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *_pwm)
 
 	pwm->tpu = tpu;
 	pwm->channel = _pwm->hwpwm;
-	pwm->polarity = tpu->polarities[pwm->channel];
+	pwm->polarity = PWM_POLARITY_NORMAL;
 	pwm->prescaler = 0;
 	pwm->period = 0;
 	pwm->duty = 0;
@@ -388,16 +388,6 @@ static const struct pwm_ops tpu_pwm_ops = {
  * Probe and remove
  */
 
-static void tpu_parse_pdata(struct tpu_device *tpu)
-{
-	struct tpu_pwm_platform_data *pdata = tpu->pdev->dev.platform_data;
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(tpu->polarities); ++i)
-		tpu->polarities[i] = pdata ? pdata->channels[i].polarity
-				   : PWM_POLARITY_NORMAL;
-}
-
 static int tpu_probe(struct platform_device *pdev)
 {
 	struct tpu_device *tpu;
@@ -411,9 +401,6 @@ static int tpu_probe(struct platform_device *pdev)
 	spin_lock_init(&tpu->lock);
 	tpu->pdev = pdev;
 
-	/* Initialize device configuration from platform data. */
-	tpu_parse_pdata(tpu);
-
 	/* Map memory, get clock and pin control. */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	tpu->base = devm_ioremap_resource(&pdev->dev, res);
diff --git a/include/linux/platform_data/pwm-renesas-tpu.h b/include/linux/platform_data/pwm-renesas-tpu.h
deleted file mode 100644
index a7220b10ddab..000000000000
--- a/include/linux/platform_data/pwm-renesas-tpu.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __PWM_RENESAS_TPU_H__
-#define __PWM_RENESAS_TPU_H__
-
-#include <linux/pwm.h>
-
-#define TPU_CHANNEL_MAX		4
-
-struct tpu_pwm_channel_data {
-	enum pwm_polarity polarity;
-};
-
-struct tpu_pwm_platform_data {
-	struct tpu_pwm_channel_data channels[TPU_CHANNEL_MAX];
-};
-
-#endif /* __PWM_RENESAS_TPU_H__ */
-- 
cgit 


From 4284402924cc55e182008ca7e9d4fb1e891ff5ae Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 19 May 2014 22:42:37 +0200
Subject: pwm: modify PWM_LOOKUP to initialize all struct pwm_lookup members

Now that PWM_LOOKUP is not used anymore, modify it to initialize all the
members of struct pwm_lookup.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 Documentation/pwm.txt | 3 ++-
 include/linux/pwm.h   | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
index 0527f615b115..ca895fd211e4 100644
--- a/Documentation/pwm.txt
+++ b/Documentation/pwm.txt
@@ -19,7 +19,8 @@ should instead register a static mapping that can be used to match PWM
 consumers to providers, as given in the following example:
 
 	static struct pwm_lookup board_pwm_lookup[] = {
-		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL),
+		PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
+			   50000, PWM_POLARITY_NORMAL),
 	};
 
 	static void __init board_init(void)
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 2f45e2fe5b93..e90628cac8fa 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -278,12 +278,14 @@ struct pwm_lookup {
 	enum pwm_polarity polarity;
 };
 
-#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id)	\
+#define PWM_LOOKUP(_provider, _index, _dev_id, _con_id, _period, _polarity) \
 	{						\
 		.provider = _provider,			\
 		.index = _index,			\
 		.dev_id = _dev_id,			\
 		.con_id = _con_id,			\
+		.period = _period,			\
+		.polarity = _polarity			\
 	}
 
 #if IS_ENABLED(CONFIG_PWM)
-- 
cgit 


From 9e1e726311830bc5b8b568d5178f6a52c357fb6e Mon Sep 17 00:00:00 2001
From: Matt Porter <mporter@linaro.org>
Date: Wed, 23 Apr 2014 19:21:31 -0400
Subject: mfd: bcm590xx: Add support for secondary I2C slave address

BCM590xx utilizes a secondary I2C slave address to access additional
register space. Add support for the secondary address space by
instantiating a dummy I2C device with the appropriate secondary
I2C slave address. Also expose a secondary regmap register space so
that MFD drivers can access this secondary i2c slave address space.

Signed-off-by: Matt Porter <mporter@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/bcm590xx.c       | 60 +++++++++++++++++++++++++++++++++-----------
 include/linux/mfd/bcm590xx.h |  9 ++++---
 2 files changed, 52 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/bcm590xx.c b/drivers/mfd/bcm590xx.c
index e9a33c79431b..43cba1a1973c 100644
--- a/drivers/mfd/bcm590xx.c
+++ b/drivers/mfd/bcm590xx.c
@@ -28,39 +28,71 @@ static const struct mfd_cell bcm590xx_devs[] = {
 	},
 };
 
-static const struct regmap_config bcm590xx_regmap_config = {
+static const struct regmap_config bcm590xx_regmap_config_pri = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
-	.max_register	= BCM590XX_MAX_REGISTER,
+	.max_register	= BCM590XX_MAX_REGISTER_PRI,
 	.cache_type	= REGCACHE_RBTREE,
 };
 
-static int bcm590xx_i2c_probe(struct i2c_client *i2c,
+static const struct regmap_config bcm590xx_regmap_config_sec = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.max_register	= BCM590XX_MAX_REGISTER_SEC,
+	.cache_type	= REGCACHE_RBTREE,
+};
+
+static int bcm590xx_i2c_probe(struct i2c_client *i2c_pri,
 			      const struct i2c_device_id *id)
 {
 	struct bcm590xx *bcm590xx;
 	int ret;
 
-	bcm590xx = devm_kzalloc(&i2c->dev, sizeof(*bcm590xx), GFP_KERNEL);
+	bcm590xx = devm_kzalloc(&i2c_pri->dev, sizeof(*bcm590xx), GFP_KERNEL);
 	if (!bcm590xx)
 		return -ENOMEM;
 
-	i2c_set_clientdata(i2c, bcm590xx);
-	bcm590xx->dev = &i2c->dev;
-	bcm590xx->i2c_client = i2c;
+	i2c_set_clientdata(i2c_pri, bcm590xx);
+	bcm590xx->dev = &i2c_pri->dev;
+	bcm590xx->i2c_pri = i2c_pri;
 
-	bcm590xx->regmap = devm_regmap_init_i2c(i2c, &bcm590xx_regmap_config);
-	if (IS_ERR(bcm590xx->regmap)) {
-		ret = PTR_ERR(bcm590xx->regmap);
-		dev_err(&i2c->dev, "regmap initialization failed: %d\n", ret);
+	bcm590xx->regmap_pri = devm_regmap_init_i2c(i2c_pri,
+						 &bcm590xx_regmap_config_pri);
+	if (IS_ERR(bcm590xx->regmap_pri)) {
+		ret = PTR_ERR(bcm590xx->regmap_pri);
+		dev_err(&i2c_pri->dev, "primary regmap init failed: %d\n", ret);
 		return ret;
 	}
 
-	ret = mfd_add_devices(&i2c->dev, -1, bcm590xx_devs,
+	/* Secondary I2C slave address is the base address with A(2) asserted */
+	bcm590xx->i2c_sec = i2c_new_dummy(i2c_pri->adapter,
+					  i2c_pri->addr | BIT(2));
+	if (IS_ERR_OR_NULL(bcm590xx->i2c_sec)) {
+		dev_err(&i2c_pri->dev, "failed to add secondary I2C device\n");
+		return -ENODEV;
+	}
+	i2c_set_clientdata(bcm590xx->i2c_sec, bcm590xx);
+
+	bcm590xx->regmap_sec = devm_regmap_init_i2c(bcm590xx->i2c_sec,
+						&bcm590xx_regmap_config_sec);
+	if (IS_ERR(bcm590xx->regmap_sec)) {
+		ret = PTR_ERR(bcm590xx->regmap_sec);
+		dev_err(&bcm590xx->i2c_sec->dev,
+			"secondary regmap init failed: %d\n", ret);
+		goto err;
+	}
+
+	ret = mfd_add_devices(&i2c_pri->dev, -1, bcm590xx_devs,
 			      ARRAY_SIZE(bcm590xx_devs), NULL, 0, NULL);
-	if (ret < 0)
-		dev_err(&i2c->dev, "failed to add sub-devices: %d\n", ret);
+	if (ret < 0) {
+		dev_err(&i2c_pri->dev, "failed to add sub-devices: %d\n", ret);
+		goto err;
+	}
+
+	return 0;
 
+err:
+	i2c_unregister_device(bcm590xx->i2c_sec);
 	return ret;
 }
 
diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h
index 434df2d4e587..267aedee1c7a 100644
--- a/include/linux/mfd/bcm590xx.h
+++ b/include/linux/mfd/bcm590xx.h
@@ -19,12 +19,15 @@
 #include <linux/regmap.h>
 
 /* max register address */
-#define BCM590XX_MAX_REGISTER	0xe7
+#define BCM590XX_MAX_REGISTER_PRI	0xe7
+#define BCM590XX_MAX_REGISTER_SEC	0xf0
 
 struct bcm590xx {
 	struct device *dev;
-	struct i2c_client *i2c_client;
-	struct regmap *regmap;
+	struct i2c_client *i2c_pri;
+	struct i2c_client *i2c_sec;
+	struct regmap *regmap_pri;
+	struct regmap *regmap_sec;
 	unsigned int id;
 };
 
-- 
cgit 


From 5b3e507820c6e120bc2680c0d35f9d9d81fcb98d Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Date: Wed, 14 May 2014 14:58:08 -0300
Subject: mtd: nand: pxa3xx: Use ECC strength and step size devicetree binding

This commit adds support for the user to specify the ECC strength
and step size through the devicetree. We keep the previous behavior,
when there is no DT parameter provided.

Signed-off-by: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/pxa3xx_nand.c                | 17 +++++++++++++++--
 include/linux/platform_data/mtd-nand-pxa3xx.h |  3 +++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 3b66a6460d67..2a9add06c2d5 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1519,8 +1519,13 @@ KEEP_CONFIG:
 		}
 	}
 
-	ecc_strength = chip->ecc_strength_ds;
-	ecc_step = chip->ecc_step_ds;
+	if (pdata->ecc_strength && pdata->ecc_step_size) {
+		ecc_strength = pdata->ecc_strength;
+		ecc_step = pdata->ecc_step_size;
+	} else {
+		ecc_strength = chip->ecc_strength_ds;
+		ecc_step = chip->ecc_step_ds;
+	}
 
 	/* Set default ECC strength requirements on non-ONFI devices */
 	if (ecc_strength < 1 && ecc_step < 1) {
@@ -1729,6 +1734,14 @@ static int pxa3xx_nand_probe_dt(struct platform_device *pdev)
 	of_property_read_u32(np, "num-cs", &pdata->num_cs);
 	pdata->flash_bbt = of_get_nand_on_flash_bbt(np);
 
+	pdata->ecc_strength = of_get_nand_ecc_strength(np);
+	if (pdata->ecc_strength < 0)
+		pdata->ecc_strength = 0;
+
+	pdata->ecc_step_size = of_get_nand_ecc_step_size(np);
+	if (pdata->ecc_step_size < 0)
+		pdata->ecc_step_size = 0;
+
 	pdev->dev.platform_data = pdata;
 
 	return 0;
diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h
index a94147124929..ac4ea2e641c7 100644
--- a/include/linux/platform_data/mtd-nand-pxa3xx.h
+++ b/include/linux/platform_data/mtd-nand-pxa3xx.h
@@ -58,6 +58,9 @@ struct pxa3xx_nand_platform_data {
 	/* use an flash-based bad block table */
 	bool	flash_bbt;
 
+	/* requested ECC strength and ECC step size */
+	int ecc_strength, ecc_step_size;
+
 	const struct mtd_partition		*parts[NUM_CHIP_SELECT];
 	unsigned int				nr_parts[NUM_CHIP_SELECT];
 
-- 
cgit 


From e814e71ba4a6e1d7509b0f4b1928365ea650cace Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 21 May 2014 13:59:08 -0600
Subject: blk-mq: allow the hctx cpu hotplug notifier to return errors

Prepare this for the next patch which adds more smarts in the
plugging logic, so that we can save some memory.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-cpu.c     | 12 ++++++++----
 block/blk-mq.c         |  9 +++++----
 block/blk-mq.h         |  2 +-
 include/linux/blk-mq.h |  2 +-
 4 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 136ef8643bba..d2c253f71b86 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -18,14 +18,18 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
 {
 	unsigned int cpu = (unsigned long) hcpu;
 	struct blk_mq_cpu_notifier *notify;
+	int ret = NOTIFY_OK;
 
 	raw_spin_lock(&blk_mq_cpu_notify_lock);
 
-	list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
-		notify->notify(notify->data, action, cpu);
+	list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
+		ret = notify->notify(notify->data, action, cpu);
+		if (ret != NOTIFY_OK)
+			break;
+	}
 
 	raw_spin_unlock(&blk_mq_cpu_notify_lock);
-	return NOTIFY_OK;
+	return ret;
 }
 
 void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
@@ -45,7 +49,7 @@ void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 }
 
 void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
-			      void (*fn)(void *, unsigned long, unsigned int),
+			      int (*fn)(void *, unsigned long, unsigned int),
 			      void *data)
 {
 	notifier->notify = fn;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ef7ed5e95d6d..5a3683fc5bdb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1196,8 +1196,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx,
 }
 EXPORT_SYMBOL(blk_mq_free_single_hw_queue);
 
-static void blk_mq_hctx_notify(void *data, unsigned long action,
-			       unsigned int cpu)
+static int blk_mq_hctx_notify(void *data, unsigned long action,
+			      unsigned int cpu)
 {
 	struct blk_mq_hw_ctx *hctx = data;
 	struct request_queue *q = hctx->queue;
@@ -1205,7 +1205,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	LIST_HEAD(tmp);
 
 	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
-		return;
+		return NOTIFY_OK;
 
 	/*
 	 * Move ctx entries to new CPU, if this one is going away.
@@ -1220,7 +1220,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 	spin_unlock(&ctx->lock);
 
 	if (list_empty(&tmp))
-		return;
+		return NOTIFY_OK;
 
 	ctx = blk_mq_get_ctx(q);
 	spin_lock(&ctx->lock);
@@ -1240,6 +1240,7 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
 
 	blk_mq_run_hw_queue(hctx, true);
 	blk_mq_put_ctx(ctx);
+	return NOTIFY_OK;
 }
 
 static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 7db4fe4bd002..491dbd4e93f5 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -39,7 +39,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
  */
 struct blk_mq_cpu_notifier;
 void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
-			      void (*fn)(void *, unsigned long, unsigned int),
+			      int (*fn)(void *, unsigned long, unsigned int),
 			      void *data);
 void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f45424453338..4d2800567aad 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -8,7 +8,7 @@ struct blk_mq_tags;
 struct blk_mq_cpu_notifier {
 	struct list_head list;
 	void *data;
-	void (*notify)(void *data, unsigned long action, unsigned int cpu);
+	int (*notify)(void *data, unsigned long action, unsigned int cpu);
 };
 
 struct blk_mq_ctxmap {
-- 
cgit 


From c1f43dd9c20d85e66c4d77e284f64ac114abe3f8 Mon Sep 17 00:00:00 2001
From: Xuelin Shi <xuelin.shi@freescale.com>
Date: Wed, 21 May 2014 14:02:37 -0700
Subject: dmaengine: fix dmaengine_unmap failure

The count which is used to get_unmap_data maybe not the same as the
count computed in dmaengine_unmap which causes to free data in a
wrong pool.

This patch fixes this issue by keeping the map count with unmap_data
structure and use this count to get the pool.

Cc: <stable@vger.kernel.org>
Signed-off-by: Xuelin Shi <xuelin.shi@freescale.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 2 ++
 include/linux/dmaengine.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index a886713937fd..d5d30ed863ce 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1009,6 +1009,7 @@ static void dmaengine_unmap(struct kref *kref)
 		dma_unmap_page(dev, unmap->addr[i], unmap->len,
 			       DMA_BIDIRECTIONAL);
 	}
+	cnt = unmap->map_cnt;
 	mempool_free(unmap, __get_unmap_pool(cnt)->pool);
 }
 
@@ -1074,6 +1075,7 @@ dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
 	memset(unmap, 0, sizeof(*unmap));
 	kref_init(&unmap->kref);
 	unmap->dev = dev;
+	unmap->map_cnt = nr;
 
 	return unmap;
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8300fb87b84a..72cb0ddb9678 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -429,6 +429,7 @@ typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
 typedef void (*dma_async_tx_callback)(void *dma_async_param);
 
 struct dmaengine_unmap_data {
+	u8 map_cnt;
 	u8 to_cnt;
 	u8 from_cnt;
 	u8 bidi_cnt;
-- 
cgit 


From 5fe821a9dee241fa450703ab7015d970ee0cfb8d Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Mon, 19 May 2014 14:56:14 -0700
Subject: net: filter: cleanup invocation of internal BPF

Kernel API for classic BPF socket filters is:

sk_unattached_filter_create() - validate classic BPF, convert, JIT
SK_RUN_FILTER() - run it
sk_unattached_filter_destroy() - destroy socket filter

Cleanup internal BPF kernel API as following:

sk_filter_select_runtime() - final step of internal BPF creation.
  Try to JIT internal BPF program, if JIT is not available select interpreter
SK_RUN_FILTER() - run it
sk_filter_free() - free internal BPF program

Disallow direct calls to BPF interpreter. Execution of the BPF program should
be done with SK_RUN_FILTER() macro.

Example of internal BPF create, run, destroy:

  struct sk_filter *fp;

  fp = kzalloc(sk_filter_size(prog_len), GFP_KERNEL);
  memcpy(fp->insni, prog, prog_len * sizeof(fp->insni[0]));
  fp->len = prog_len;

  sk_filter_select_runtime(fp);

  SK_RUN_FILTER(fp, ctx);

  sk_filter_free(fp);

Sockets, seccomp, testsuite, tracing are using different ways to populate
sk_filter, so first steps of program creation are not common.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |  6 ++----
 kernel/seccomp.c       |  6 ++----
 lib/test_bpf.c         |  4 ++--
 net/core/filter.c      | 44 ++++++++++++++++++++++++++++----------------
 4 files changed, 34 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9d5ae0a2c954..7977b3958e25 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -184,10 +184,8 @@ static inline unsigned int sk_filter_size(unsigned int proglen)
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
-			      const struct sock_filter_int *insni);
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
-			  const struct sock_filter_int *insni);
+void sk_filter_select_runtime(struct sk_filter *fp);
+void sk_filter_free(struct sk_filter *fp);
 
 int sk_convert_filter(struct sock_filter *prog, int len,
 		      struct sock_filter_int *new_prog, int *new_len);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7e02d624cc50..1036b6f2fded 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -273,10 +273,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
-	filter->prog->bpf_func = (void *)sk_run_filter_int_seccomp;
 
-	/* JIT internal BPF into native HW instructions */
-	bpf_int_jit_compile(filter->prog);
+	sk_filter_select_runtime(filter->prog);
 
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
@@ -340,7 +338,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		bpf_jit_free(freeme->prog);
+		sk_filter_free(freeme->prog);
 		kfree(freeme);
 	}
 }
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3603ebcd5d65..e160934430eb 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1489,7 +1489,7 @@ static __init int test_bpf(void)
 			memcpy(fp_ext->insns, tests[i].insns_int,
 			       fprog.len * 8);
 			fp->len = fprog.len;
-			fp->bpf_func = sk_run_filter_int_skb;
+			sk_filter_select_runtime(fp);
 		} else {
 			err = sk_unattached_filter_create(&fp, &fprog);
 			if (tests[i].data_type == EXPECTED_FAIL) {
@@ -1516,7 +1516,7 @@ static __init int test_bpf(void)
 		if (tests[i].data_type != SKB_INT)
 			sk_unattached_filter_destroy(fp);
 		else
-			kfree(fp);
+			sk_filter_free(fp);
 
 		if (err) {
 			pr_cont("FAIL %d\n", err);
diff --git a/net/core/filter.c b/net/core/filter.c
index 32c5b44c537e..7067cb240d3e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -153,7 +153,7 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  * keep, 0 for none. @ctx is the data we are operating on, @insn is the
  * array of filter instructions.
  */
-unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
+static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *insn)
 {
 	u64 stack[MAX_BPF_STACK / sizeof(u64)];
 	u64 regs[MAX_BPF_REG], tmp;
@@ -571,15 +571,6 @@ load_byte:
 		return 0;
 }
 
-u32 sk_run_filter_int_seccomp(const struct seccomp_data *ctx,
-			      const struct sock_filter_int *insni)
-    __attribute__ ((alias ("__sk_run_filter")));
-
-u32 sk_run_filter_int_skb(const struct sk_buff *ctx,
-			  const struct sock_filter_int *insni)
-    __attribute__ ((alias ("__sk_run_filter")));
-EXPORT_SYMBOL_GPL(sk_run_filter_int_skb);
-
 /* Helper to find the offset of pkt_type in sk_buff structure. We want
  * to make sure its still a 3bit field starting at a byte boundary;
  * taken from arch/x86/net/bpf_jit_comp.c.
@@ -1397,7 +1388,7 @@ static void sk_filter_release_rcu(struct rcu_head *rcu)
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
 	sk_release_orig_filter(fp);
-	bpf_jit_free(fp);
+	sk_filter_free(fp);
 }
 
 /**
@@ -1497,7 +1488,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 		goto out_err_free;
 	}
 
-	fp->bpf_func = sk_run_filter_int_skb;
 	fp->len = new_len;
 
 	/* 2nd pass: remap sock_filter insns into sock_filter_int insns. */
@@ -1510,6 +1500,8 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 		 */
 		goto out_err_free;
 
+	sk_filter_select_runtime(fp);
+
 	kfree(old_prog);
 	return fp;
 
@@ -1528,6 +1520,29 @@ void __weak bpf_int_jit_compile(struct sk_filter *prog)
 {
 }
 
+/**
+ *	sk_filter_select_runtime - select execution runtime for BPF program
+ *	@fp: sk_filter populated with internal BPF program
+ *
+ * try to JIT internal BPF program, if JIT is not available select interpreter
+ * BPF program will be executed via SK_RUN_FILTER() macro
+ */
+void sk_filter_select_runtime(struct sk_filter *fp)
+{
+	fp->bpf_func = (void *) __sk_run_filter;
+
+	/* Probe if internal BPF can be JITed */
+	bpf_int_jit_compile(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
+
+/* free internal BPF program */
+void sk_filter_free(struct sk_filter *fp)
+{
+	bpf_jit_free(fp);
+}
+EXPORT_SYMBOL_GPL(sk_filter_free);
+
 static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 					     struct sock *sk)
 {
@@ -1548,12 +1563,9 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
 	/* JIT compiler couldn't process this filter, so do the
 	 * internal BPF translation for the optimized interpreter.
 	 */
-	if (!fp->jited) {
+	if (!fp->jited)
 		fp = __sk_migrate_filter(fp, sk);
 
-		/* Probe if internal BPF can be jit-ed */
-		bpf_int_jit_compile(fp);
-	}
 	return fp;
 }
 
-- 
cgit 


From ba730340f96c01160b5f26f81e8fb38f8cb1821c Mon Sep 17 00:00:00 2001
From: Alexander Popov <a13xp0p0v88@gmail.com>
Date: Thu, 15 May 2014 18:15:31 +0400
Subject: dmaengine: fix comment typo

Fix comment typo.

Signed-off-by: Alexander Popov <a13xp0p0v88@gmail.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8300fb87b84a..cbb168e04dc1 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -292,7 +292,7 @@ struct dma_chan_dev {
 };
 
 /**
- * enum dma_slave_buswidth - defines bus with of the DMA slave
+ * enum dma_slave_buswidth - defines bus width of the DMA slave
  * device, source or target buses
  */
 enum dma_slave_buswidth {
-- 
cgit 


From db885bf82883f9743efe09d91775c579c0ed6842 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 16 May 2014 15:17:12 +0300
Subject: ARM: edma: Remove queue_tc_mapping data from edma_soc_info

It is no longer in use by the driver or board files.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/platform_data/edma.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 12f134b1493c..633e196ebdf2 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -175,7 +175,6 @@ struct edma_soc_info {
 	/* Resource reservation for other cores */
 	struct edma_rsv_info	*rsv;
 
-	s8	(*queue_tc_mapping)[2];
 	s8	(*queue_priority_mapping)[2];
 	const s16	(*xbar_chans)[2];
 };
-- 
cgit 


From ba391e5a5ac6697b8bcae8c0d01439cb765d9ef8 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 21 May 2014 11:15:56 -0400
Subject: HID: rmi: do not handle touchscreens through hid-rmi

Currently, hid-rmi drives every Synaptics product, but the touchscreens
on the Windows tablets should be handled through hid-multitouch.

Instead of providing a long list of PIDs, rely on the scan_report
capability to detect which should go to hid-multitouch, and which
should not go to hid-rmi.

related bug:
https://bugzilla.kernel.org/show_bug.cgi?id=74241
https://bugzilla.redhat.com/show_bug.cgi?id=1089583

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 10 ++++++++--
 drivers/hid/hid-rmi.c  |  3 +--
 include/linux/hid.h    |  8 ++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index f05255d92de7..64c71c866916 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -776,6 +776,14 @@ static int hid_scan_report(struct hid_device *hid)
 	    (hid->group == HID_GROUP_MULTITOUCH))
 		hid->group = HID_GROUP_MULTITOUCH_WIN_8;
 
+	/*
+	* Vendor specific handlings
+	*/
+	if ((hid->vendor == USB_VENDOR_ID_SYNAPTICS) &&
+	    (hid->group == HID_GROUP_GENERIC))
+		/* hid-rmi should take care of them, not hid-generic */
+		hid->group = HID_GROUP_RMI;
+
 	vfree(parser);
 	return 0;
 }
@@ -1882,8 +1890,6 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_VAIO_VGP_MOUSE) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_STEELSERIES, USB_DEVICE_ID_STEELSERIES_SRWS1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_SUNPLUS, USB_DEVICE_ID_SUNPLUS_WDESKTOP) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
-	{ HID_I2C_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THINGM, USB_DEVICE_ID_BLINK1) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304) },
diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c
index c529b033ba9e..2451c7e5febd 100644
--- a/drivers/hid/hid-rmi.c
+++ b/drivers/hid/hid-rmi.c
@@ -894,8 +894,7 @@ static void rmi_remove(struct hid_device *hdev)
 }
 
 static const struct hid_device_id rmi_id[] = {
-	{ HID_I2C_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
-	{ HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, HID_ANY_ID) },
+	{ HID_DEVICE(HID_BUS_ANY, HID_GROUP_RMI, HID_ANY_ID, HID_ANY_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, rmi_id);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 54f855b2c902..8ce9ff4d50af 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -299,12 +299,20 @@ struct hid_item {
 
 /*
  * HID device groups
+ *
+ * Note: HID_GROUP_ANY is declared in linux/mod_devicetable.h
+ * and has a value of 0x0000
  */
 #define HID_GROUP_GENERIC			0x0001
 #define HID_GROUP_MULTITOUCH			0x0002
 #define HID_GROUP_SENSOR_HUB			0x0003
 #define HID_GROUP_MULTITOUCH_WIN_8		0x0004
 
+/*
+ * Vendor specific HID device groups
+ */
+#define HID_GROUP_RMI				0x0100
+
 /*
  * This is the global environment of the parser. This information is
  * persistent for main-items. The global environment can be saved and
-- 
cgit 


From 7aa2c016db2162defff77f6f5731bff3f25e5175 Mon Sep 17 00:00:00 2001
From: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
Date: Thu, 8 May 2014 18:33:49 +0900
Subject: sched: Consolidate open coded implementations of nice level frobbing
 into nice_to_rlimit() and rlimit_to_nice()

Signed-off-by: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/a568a1e3cc8e78648f41b5035fa5e381d36274da.1399532322.git.yangds.fnst@cn.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/staging/android/binder.c |  2 +-
 include/linux/sched/prio.h       | 16 ++++++++++++++++
 kernel/sched/core.c              |  2 +-
 kernel/sys.c                     |  6 +++---
 4 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c
index 179b21b66504..9311bb67ec35 100644
--- a/drivers/staging/android/binder.c
+++ b/drivers/staging/android/binder.c
@@ -436,7 +436,7 @@ static void binder_set_nice(long nice)
 		set_user_nice(current, nice);
 		return;
 	}
-	min_nice = 20 - current->signal->rlim[RLIMIT_NICE].rlim_cur;
+	min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur);
 	binder_debug(BINDER_DEBUG_PRIORITY_CAP,
 		     "%d: nice value %ld not allowed use %ld instead\n",
 		      current->pid, nice, min_nice);
diff --git a/include/linux/sched/prio.h b/include/linux/sched/prio.h
index ac322583c820..d9cf5a5762d9 100644
--- a/include/linux/sched/prio.h
+++ b/include/linux/sched/prio.h
@@ -41,4 +41,20 @@
 #define TASK_USER_PRIO(p)	USER_PRIO((p)->static_prio)
 #define MAX_USER_PRIO		(USER_PRIO(MAX_PRIO))
 
+/*
+ * Convert nice value [19,-20] to rlimit style value [1,40].
+ */
+static inline long nice_to_rlimit(long nice)
+{
+	return (MAX_NICE - nice + 1);
+}
+
+/*
+ * Convert rlimit style value [1,40] to nice value [-20, 19].
+ */
+static inline long rlimit_to_nice(long prio)
+{
+	return (MAX_NICE - prio + 1);
+}
+
 #endif /* _SCHED_PRIO_H */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index da302ca98f60..321d800e4baa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3033,7 +3033,7 @@ EXPORT_SYMBOL(set_user_nice);
 int can_nice(const struct task_struct *p, const int nice)
 {
 	/* convert nice value [19,-20] to rlimit style value [1,40] */
-	int nice_rlim = 20 - nice;
+	int nice_rlim = nice_to_rlimit(nice);
 
 	return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) ||
 		capable(CAP_SYS_NICE));
diff --git a/kernel/sys.c b/kernel/sys.c
index fba0f29401ea..66a751ebf9d9 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -250,7 +250,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 			else
 				p = current;
 			if (p) {
-				niceval = 20 - task_nice(p);
+				niceval = nice_to_rlimit(task_nice(p));
 				if (niceval > retval)
 					retval = niceval;
 			}
@@ -261,7 +261,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 			else
 				pgrp = task_pgrp(current);
 			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
-				niceval = 20 - task_nice(p);
+				niceval = nice_to_rlimit(task_nice(p));
 				if (niceval > retval)
 					retval = niceval;
 			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
@@ -277,7 +277,7 @@ SYSCALL_DEFINE2(getpriority, int, which, int, who)
 
 			do_each_thread(g, p) {
 				if (uid_eq(task_uid(p), uid)) {
-					niceval = 20 - task_nice(p);
+					niceval = nice_to_rlimit(task_nice(p));
 					if (niceval > retval)
 						retval = niceval;
 				}
-- 
cgit 


From 4027d080854d1be96ef134a1c3024d5276114db6 Mon Sep 17 00:00:00 2001
From: "xiaofeng.yan" <xiaofeng.yan@huawei.com>
Date: Fri, 9 May 2014 03:21:27 +0000
Subject: sched/rt: Fix 'struct sched_dl_entity' and dl_task_time() comments,
 to match the current upstream code

Signed-off-by: xiaofeng.yan <xiaofeng.yan@huawei.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1399605687-18094-1-git-send-email-xiaofeng.yan@huawei.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   | 4 ++--
 kernel/sched/deadline.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 725eef121c9f..0f91d00efd87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1175,8 +1175,8 @@ struct sched_dl_entity {
 
 	/*
 	 * Original scheduling parameters. Copied here from sched_attr
-	 * during sched_setscheduler2(), they will remain the same until
-	 * the next sched_setscheduler2().
+	 * during sched_setattr(), they will remain the same until
+	 * the next sched_setattr().
 	 */
 	u64 dl_runtime;		/* maximum runtime for each instance	*/
 	u64 dl_deadline;	/* relative deadline of each instance	*/
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e0a04ae1e0dd..f9ca7d19781a 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -520,7 +520,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
 	 * We need to take care of a possible races here. In fact, the
 	 * task might have changed its scheduling policy to something
 	 * different from SCHED_DEADLINE or changed its reservation
-	 * parameters (through sched_setscheduler()).
+	 * parameters (through sched_setattr()).
 	 */
 	if (!dl_task(p) || dl_se->dl_new)
 		goto unlock;
-- 
cgit 


From 903ed4913c7fe78d2746445564634264291c7493 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Fri, 16 May 2014 15:17:20 +0300
Subject: ARM: edma: Remove redundant/unused parameters from edma_soc_info

The following parameters are no longer needed by the edma driver since the
information can be obtained from the IP's CCCFG register:
n_channel, n_region, n_slot and n_tc.
Remove the n_cc as well since in this context it has no meaning. We have
separate edma_soc_info struct/eDMA3_CC instance so this member does not
make any sense (and the driver no longer uses it).

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Sekhar Nori <nsekhar@ti.com>
---
 include/linux/platform_data/edma.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h
index 633e196ebdf2..eb8d5627d080 100644
--- a/include/linux/platform_data/edma.h
+++ b/include/linux/platform_data/edma.h
@@ -158,13 +158,6 @@ struct edma_rsv_info {
 
 /* platform_data for EDMA driver */
 struct edma_soc_info {
-
-	/* how many dma resources of each type */
-	unsigned	n_channel;
-	unsigned	n_region;
-	unsigned	n_slot;
-	unsigned	n_tc;
-	unsigned	n_cc;
 	/*
 	 * Default queue is expected to be a low-priority queue.
 	 * This way, long transfers on the default queue started
-- 
cgit 


From bf3b5ec66bd03d66e9ea729aaca013ea1047a797 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:55:30 +0100
Subject: mmc: sdio_irq: rework sdio irq handling

Rather than the SDIO support spawning it's own thread for handling card
interrupts, use the generic IRQ infrastructure for this, triggering it
from the host interface's interrupt handling directly.

This avoids a race between the parent thread waiting to receive an
interrupt response from the card, and the slow startup from the sdio
irq thread, which can occur as a result of high system load (eg, while
udev is running.)

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
[Ulf Hansson] Resolved conflict
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/core/sdio_irq.c | 41 +++++++++++++++++++++++++++++++----------
 include/linux/mmc/host.h    |  3 +++
 2 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index aaa90460ed23..5cc13c8d35bb 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -90,6 +90,15 @@ static int process_sdio_pending_irqs(struct mmc_host *host)
 	return ret;
 }
 
+void sdio_run_irqs(struct mmc_host *host)
+{
+	mmc_claim_host(host);
+	host->sdio_irq_pending = true;
+	process_sdio_pending_irqs(host);
+	mmc_release_host(host);
+}
+EXPORT_SYMBOL_GPL(sdio_run_irqs);
+
 static int sdio_irq_thread(void *_host)
 {
 	struct mmc_host *host = _host;
@@ -189,14 +198,20 @@ static int sdio_card_irq_get(struct mmc_card *card)
 	WARN_ON(!host->claimed);
 
 	if (!host->sdio_irqs++) {
-		atomic_set(&host->sdio_irq_thread_abort, 0);
-		host->sdio_irq_thread =
-			kthread_run(sdio_irq_thread, host, "ksdioirqd/%s",
-				mmc_hostname(host));
-		if (IS_ERR(host->sdio_irq_thread)) {
-			int err = PTR_ERR(host->sdio_irq_thread);
-			host->sdio_irqs--;
-			return err;
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+			atomic_set(&host->sdio_irq_thread_abort, 0);
+			host->sdio_irq_thread =
+				kthread_run(sdio_irq_thread, host,
+					    "ksdioirqd/%s", mmc_hostname(host));
+			if (IS_ERR(host->sdio_irq_thread)) {
+				int err = PTR_ERR(host->sdio_irq_thread);
+				host->sdio_irqs--;
+				return err;
+			}
+		} else {
+			mmc_host_clk_hold(host);
+			host->ops->enable_sdio_irq(host, 1);
+			mmc_host_clk_release(host);
 		}
 	}
 
@@ -211,8 +226,14 @@ static int sdio_card_irq_put(struct mmc_card *card)
 	BUG_ON(host->sdio_irqs < 1);
 
 	if (!--host->sdio_irqs) {
-		atomic_set(&host->sdio_irq_thread_abort, 1);
-		kthread_stop(host->sdio_irq_thread);
+		if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+			atomic_set(&host->sdio_irq_thread_abort, 1);
+			kthread_stop(host->sdio_irq_thread);
+		} else {
+			mmc_host_clk_hold(host);
+			host->ops->enable_sdio_irq(host, 0);
+			mmc_host_clk_release(host);
+		}
 	}
 
 	return 0;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cd595275e118..7960424d0bc0 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -282,6 +282,7 @@ struct mmc_host {
 #define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
 #define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
 				 MMC_CAP2_HS400_1_2V)
+#define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -397,6 +398,8 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host)
 	wake_up_process(host->sdio_irq_thread);
 }
 
+void sdio_run_irqs(struct mmc_host *host);
+
 #ifdef CONFIG_REGULATOR
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct mmc_host *mmc,
-- 
cgit 


From 781e989cf593c71d26bdca74f5e77b3651fc060e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:55:46 +0100
Subject: mmc: sdhci: convert to new SDIO IRQ handling

Use a generic threaded interrupt handler for SDIO interrupt handling,
rather than allowing the SDIO core code to buggily spawn its own
thread.  This results in host drivers to be more in control of how
SDIO interrupts are acknowledged in the hardware, rather than having
the internals of the SDIO core placed upon them, possibly resulting
in sub-standard handling.

At least one SDHCI implementation specifies a very specific sequence
to deal with a card interrupt.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 62 +++++++++++++++++++++++++++++------------------
 include/linux/mmc/sdhci.h |  2 ++
 2 files changed, 41 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4b6cca2130bc..4a0622d52ae5 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2428,10 +2428,10 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 
 static irqreturn_t sdhci_irq(int irq, void *dev_id)
 {
-	irqreturn_t result;
+	irqreturn_t result = IRQ_NONE;
 	struct sdhci_host *host = dev_id;
 	u32 intmask, mask, unexpected = 0;
-	int cardint = 0, max_loops = 16;
+	int max_loops = 16;
 
 	spin_lock(&host->lock);
 
@@ -2490,8 +2490,11 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			pr_err("%s: Card is consuming too much power!\n",
 				mmc_hostname(host->mmc));
 
-		if (intmask & SDHCI_INT_CARD_INT)
-			cardint = 1;
+		if (intmask & SDHCI_INT_CARD_INT) {
+			sdhci_enable_sdio_irq_nolock(host, false);
+			host->thread_isr |= SDHCI_INT_CARD_INT;
+			result = IRQ_WAKE_THREAD;
+		}
 
 		intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
 			     SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
@@ -2503,17 +2506,10 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			sdhci_writel(host, intmask, SDHCI_INT_STATUS);
 		}
 
-		result = IRQ_HANDLED;
+		if (result == IRQ_NONE)
+			result = IRQ_HANDLED;
 
 		intmask = sdhci_readl(host, SDHCI_INT_STATUS);
-
-		/*
-		 * If we know we'll call the driver to signal SDIO IRQ,
-		 * disregard further indications of Card Interrupt in
-		 * the status to avoid a needless loop.
-		 */
-		if (cardint)
-			intmask &= ~SDHCI_INT_CARD_INT;
 	} while (intmask && --max_loops);
 out:
 	spin_unlock(&host->lock);
@@ -2523,15 +2519,33 @@ out:
 			   mmc_hostname(host->mmc), unexpected);
 		sdhci_dumpregs(host);
 	}
-	/*
-	 * We have to delay this as it calls back into the driver.
-	 */
-	if (cardint)
-		mmc_signal_sdio_irq(host->mmc);
 
 	return result;
 }
 
+static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
+{
+	struct sdhci_host *host = dev_id;
+	unsigned long flags;
+	u32 isr;
+
+	spin_lock_irqsave(&host->lock, flags);
+	isr = host->thread_isr;
+	host->thread_isr = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	if (isr & SDHCI_INT_CARD_INT) {
+		sdio_run_irqs(host->mmc);
+
+		spin_lock_irqsave(&host->lock, flags);
+		if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
+			sdhci_enable_sdio_irq_nolock(host, true);
+		spin_unlock_irqrestore(&host->lock, flags);
+	}
+
+	return isr ? IRQ_HANDLED : IRQ_NONE;
+}
+
 /*****************************************************************************\
  *                                                                           *
  * Suspend/resume                                                            *
@@ -2601,8 +2615,9 @@ int sdhci_resume_host(struct sdhci_host *host)
 	}
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
-		ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
-				  mmc_hostname(host->mmc), host);
+		ret = request_threaded_irq(host->irq, sdhci_irq,
+					   sdhci_thread_irq, IRQF_SHARED,
+					   mmc_hostname(host->mmc), host);
 		if (ret)
 			return ret;
 	} else {
@@ -2681,7 +2696,7 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
 	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	synchronize_irq(host->irq);
+	synchronize_hardirq(host->irq);
 
 	spin_lock_irqsave(&host->lock, flags);
 	host->runtime_suspended = true;
@@ -2937,6 +2952,7 @@ int sdhci_add_host(struct sdhci_host *host)
 	mmc->max_busy_timeout = (1 << 27) / host->timeout_clk;
 
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
 		host->flags |= SDHCI_AUTO_CMD12;
@@ -3226,8 +3242,8 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	sdhci_init(host, 0);
 
-	ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
-		mmc_hostname(mmc), host);
+	ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
+				   IRQF_SHARED,	mmc_hostname(mmc), host);
 	if (ret) {
 		pr_err("%s: Failed to request IRQ %d: %d\n",
 		       mmc_hostname(mmc), host->irq, ret);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 7be12b883485..d1aa97b77dd9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -177,6 +177,8 @@ struct sdhci_host {
 	unsigned int            ocr_avail_mmc;
 	u32 ocr_mask;		/* available voltages */
 
+	u32			thread_isr;
+
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
-- 
cgit 


From 3560db8e247aa35bc6b287ec7ec51cd41abd512e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:55:51 +0100
Subject: mmc: sdhci: push card_tasklet into threaded irq handler

There's no requirement to have the card tasklet separate now that we
have a threaded interrupt handler, so kill this and move the called
code into the threaded part of the handler.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 23 +++++++++--------------
 include/linux/mmc/sdhci.h |  3 +--
 2 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 4a0622d52ae5..8def3919b32c 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2128,15 +2128,6 @@ static const struct mmc_host_ops sdhci_ops = {
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_tasklet_card(unsigned long param)
-{
-	struct sdhci_host *host = (struct sdhci_host*)param;
-
-	sdhci_card_event(host->mmc);
-
-	mmc_detect_change(host->mmc, msecs_to_jiffies(200));
-}
-
 static void sdhci_tasklet_finish(unsigned long param)
 {
 	struct sdhci_host *host;
@@ -2477,7 +2468,10 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 
 			sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
 				     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
-			tasklet_schedule(&host->card_tasklet);
+
+			host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
+						       SDHCI_INT_CARD_REMOVE);
+			result = IRQ_WAKE_THREAD;
 		}
 
 		if (intmask & SDHCI_INT_CMD_MASK)
@@ -2534,6 +2528,11 @@ static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
 	host->thread_isr = 0;
 	spin_unlock_irqrestore(&host->lock, flags);
 
+	if (isr & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+		sdhci_card_event(host->mmc);
+		mmc_detect_change(host->mmc, msecs_to_jiffies(200));
+	}
+
 	if (isr & SDHCI_INT_CARD_INT) {
 		sdio_run_irqs(host->mmc);
 
@@ -3224,8 +3223,6 @@ int sdhci_add_host(struct sdhci_host *host)
 	/*
 	 * Init tasklets.
 	 */
-	tasklet_init(&host->card_tasklet,
-		sdhci_tasklet_card, (unsigned long)host);
 	tasklet_init(&host->finish_tasklet,
 		sdhci_tasklet_finish, (unsigned long)host);
 
@@ -3290,7 +3287,6 @@ reset:
 	free_irq(host->irq, host);
 #endif
 untasklet:
-	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
 	return ret;
@@ -3334,7 +3330,6 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 
 	del_timer_sync(&host->timer);
 
-	tasklet_kill(&host->card_tasklet);
 	tasklet_kill(&host->finish_tasklet);
 
 	if (host->vmmc) {
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index d1aa97b77dd9..f1c8e14e8751 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -164,8 +164,7 @@ struct sdhci_host {
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
 
-	struct tasklet_struct card_tasklet;	/* Tasklet structures */
-	struct tasklet_struct finish_tasklet;
+	struct tasklet_struct finish_tasklet;	/* Tasklet structures */
 
 	struct timer_list timer;	/* Timer for timeouts */
 
-- 
cgit 


From b537f94ce19583de1882f539a5cc49aa99260aca Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:56:01 +0100
Subject: mmc: sdhci: more efficient interrupt enable register handling

Rather than wasting cycles read-modify-writing the interrupt enable
registers, cache the value locally instead.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 98 +++++++++++++++++++++++------------------------
 include/linux/mmc/sdhci.h |  3 ++
 2 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0ecbcc4c29d2..4a98ee29d136 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -131,27 +131,6 @@ static void sdhci_dumpregs(struct sdhci_host *host)
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_clear_set_irqs(struct sdhci_host *host, u32 clear, u32 set)
-{
-	u32 ier;
-
-	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-	ier &= ~clear;
-	ier |= set;
-	sdhci_writel(host, ier, SDHCI_INT_ENABLE);
-	sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
-}
-
-static void sdhci_unmask_irqs(struct sdhci_host *host, u32 irqs)
-{
-	sdhci_clear_set_irqs(host, 0, irqs);
-}
-
-static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs)
-{
-	sdhci_clear_set_irqs(host, irqs, 0);
-}
-
 static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 {
 	u32 present, irqs;
@@ -165,9 +144,12 @@ static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
 	irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT;
 
 	if (enable)
-		sdhci_unmask_irqs(host, irqs);
+		host->ier |= irqs;
 	else
-		sdhci_mask_irqs(host, irqs);
+		host->ier &= ~irqs;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static void sdhci_enable_card_detection(struct sdhci_host *host)
@@ -183,17 +165,12 @@ static void sdhci_disable_card_detection(struct sdhci_host *host)
 static void sdhci_reset(struct sdhci_host *host, u8 mask)
 {
 	unsigned long timeout;
-	u32 uninitialized_var(ier);
-
 	if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
 		if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
 			SDHCI_CARD_PRESENT))
 			return;
 	}
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
-		ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-
 	if (host->ops->platform_reset_enter)
 		host->ops->platform_reset_enter(host, mask);
 
@@ -224,8 +201,10 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask)
 	if (host->ops->platform_reset_exit)
 		host->ops->platform_reset_exit(host, mask);
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
-		sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier);
+	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) {
+		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+	}
 
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
@@ -242,11 +221,14 @@ static void sdhci_init(struct sdhci_host *host, int soft)
 	else
 		sdhci_reset(host, SDHCI_RESET_ALL);
 
-	sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK,
-		SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
-		SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX |
-		SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
-		SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE);
+	host->ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
+		    SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT |
+		    SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC |
+		    SDHCI_INT_TIMEOUT | SDHCI_INT_DATA_END |
+		    SDHCI_INT_RESPONSE;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 
 	if (soft) {
 		/* force clock reconfiguration */
@@ -721,9 +703,12 @@ static void sdhci_set_transfer_irqs(struct sdhci_host *host)
 	u32 dma_irqs = SDHCI_INT_DMA_END | SDHCI_INT_ADMA_ERROR;
 
 	if (host->flags & SDHCI_REQ_USE_DMA)
-		sdhci_clear_set_irqs(host, pio_irqs, dma_irqs);
+		host->ier = (host->ier & ~pio_irqs) | dma_irqs;
 	else
-		sdhci_clear_set_irqs(host, dma_irqs, pio_irqs);
+		host->ier = (host->ier & ~dma_irqs) | pio_irqs;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 }
 
 static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
@@ -1713,9 +1698,12 @@ static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 {
 	if (!(host->flags & SDHCI_DEVICE_DEAD)) {
 		if (enable)
-			sdhci_unmask_irqs(host, SDHCI_INT_CARD_INT);
+			host->ier |= SDHCI_INT_CARD_INT;
 		else
-			sdhci_mask_irqs(host, SDHCI_INT_CARD_INT);
+			host->ier &= ~SDHCI_INT_CARD_INT;
+
+		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 		mmiowb();
 	}
 }
@@ -1857,7 +1845,6 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host;
 	u16 ctrl;
-	u32 ier;
 	int tuning_loop_counter = MAX_TUNING_LOOP;
 	unsigned long timeout;
 	int err = 0;
@@ -1911,8 +1898,8 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	 * to make sure we don't hit a controller bug, we _only_
 	 * enable Buffer Read Ready interrupt here.
 	 */
-	ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-	sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL);
+	sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_INT_ENABLE);
+	sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_SIGNAL_ENABLE);
 
 	/*
 	 * Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number
@@ -2047,7 +2034,8 @@ out:
 	if (err && (host->flags & SDHCI_USING_RETUNING_TIMER))
 		err = 0;
 
-	sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	spin_unlock_irqrestore(&host->lock, flags);
 	sdhci_runtime_pm_put(host);
 
@@ -2460,10 +2448,12 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 			 * More testing are needed here to ensure it works
 			 * for other platforms though.
 			 */
-			sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT :
-							SDHCI_INT_CARD_REMOVE);
-			sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE :
-							  SDHCI_INT_CARD_INSERT);
+			host->ier &= ~(SDHCI_INT_CARD_INSERT |
+				       SDHCI_INT_CARD_REMOVE);
+			host->ier |= present ? SDHCI_INT_CARD_REMOVE :
+					       SDHCI_INT_CARD_INSERT;
+			sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+			sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 
 			sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
 				     SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
@@ -2592,7 +2582,9 @@ int sdhci_suspend_host(struct sdhci_host *host)
 	}
 
 	if (!device_may_wakeup(mmc_dev(host->mmc))) {
-		sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+		host->ier = 0;
+		sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+		sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 		free_irq(host->irq, host);
 	} else {
 		sdhci_enable_irq_wakeups(host);
@@ -2691,7 +2683,9 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
 	}
 
 	spin_lock_irqsave(&host->lock, flags);
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK & ~SDHCI_INT_CARD_INT);
+	host->ier &= SDHCI_INT_CARD_INT;
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	spin_unlock_irqrestore(&host->lock, flags);
 
 	synchronize_hardirq(host->irq);
@@ -3282,7 +3276,8 @@ int sdhci_add_host(struct sdhci_host *host)
 #ifdef SDHCI_USE_LEDS_CLASS
 reset:
 	sdhci_reset(host, SDHCI_RESET_ALL);
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
 #endif
 untasklet:
@@ -3324,7 +3319,8 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 	if (!dead)
 		sdhci_reset(host, SDHCI_RESET_ALL);
 
-	sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+	sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
 	free_irq(host->irq, host);
 
 	del_timer_sync(&host->timer);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index f1c8e14e8751..9361d8ef509d 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -178,6 +178,9 @@ struct sdhci_host {
 
 	u32			thread_isr;
 
+	/* cached registers */
+	u32			ier;
+
 	wait_queue_head_t	buf_ready_int;	/* Waitqueue for Buffer Read Ready interrupt */
 	unsigned int		tuning_done;	/* Condition flag set when CMD19 succeeds */
 
-- 
cgit 


From 0718e59ae259f7c48155b4e852d8b0632d59028e Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:57:18 +0100
Subject: mmc: sdhci: move FSL ESDHC reset handling quirk into esdhc code

The Freescale esdhc driver is the only driver which needs the interrupt
registers restored after a reset.  Move this quirk to be part of the
ESDHC driver implementation.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci-esdhc-imx.c | 10 +++++++++-
 drivers/mmc/host/sdhci-esdhc.h     |  3 +--
 drivers/mmc/host/sdhci.c           |  5 -----
 include/linux/mmc/sdhci.h          |  2 --
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index b1d74fa33c5f..812c5772d900 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -876,6 +876,14 @@ static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 	return esdhc_change_pinstate(host, uhs);
 }
 
+static void esdhc_reset(struct sdhci_host *host, u8 mask)
+{
+	sdhci_reset(host, mask);
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+}
+
 static struct sdhci_ops sdhci_esdhc_ops = {
 	.read_l = esdhc_readl_le,
 	.read_w = esdhc_readw_le,
@@ -888,7 +896,7 @@ static struct sdhci_ops sdhci_esdhc_ops = {
 	.get_ro = esdhc_pltfm_get_ro,
 	.set_bus_width = esdhc_pltfm_set_bus_width,
 	.set_uhs_signaling = esdhc_set_uhs_signaling,
-	.reset = sdhci_reset,
+	.reset = esdhc_reset,
 };
 
 static const struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index a7d9f95a7b03..de69bddc3afc 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -22,8 +22,7 @@
 				SDHCI_QUIRK_NO_BUSY_IRQ | \
 				SDHCI_QUIRK_NONSTANDARD_CLOCK | \
 				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
-				SDHCI_QUIRK_PIO_NEEDS_DELAY | \
-				SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
+				SDHCI_QUIRK_PIO_NEEDS_DELAY)
 
 #define ESDHC_SYSTEM_CONTROL	0x2c
 #define ESDHC_CLOCK_MASK	0x0000fff0
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5e25147e92f7..074157e8e73d 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -203,11 +203,6 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 
 	host->ops->reset(host, mask);
 
-	if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET) {
-		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
-		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
-	}
-
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
 			host->ops->enable_dma(host);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 9361d8ef509d..02919ef99419 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -61,8 +61,6 @@ struct sdhci_host {
 #define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
 /* Controller does not like fast PIO transfers */
 #define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
-/* Controller losing signal/interrupt enable states after reset */
-#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET		(1<<19)
 /* Controller has to be forced to use block size of 2048 bytes */
 #define SDHCI_QUIRK_FORCE_BLK_SZ_2048			(1<<20)
 /* Controller cannot do multi-block transfers */
-- 
cgit 


From 1771059cf5f9c09e37ef6315df8acf120f2642fc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:58:55 +0100
Subject: mmc: sdhci: convert sdhci_set_clock() into a library function

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci-acpi.c      |  2 ++
 drivers/mmc/host/sdhci-bcm-kona.c  |  1 +
 drivers/mmc/host/sdhci-bcm2835.c   |  1 +
 drivers/mmc/host/sdhci-cns3xxx.c   |  3 +--
 drivers/mmc/host/sdhci-dove.c      |  1 +
 drivers/mmc/host/sdhci-esdhc.h     |  1 -
 drivers/mmc/host/sdhci-of-arasan.c |  1 +
 drivers/mmc/host/sdhci-of-hlwd.c   |  1 +
 drivers/mmc/host/sdhci-pci.c       |  1 +
 drivers/mmc/host/sdhci-pltfm.c     |  1 +
 drivers/mmc/host/sdhci-pxav2.c     |  1 +
 drivers/mmc/host/sdhci-pxav3.c     |  1 +
 drivers/mmc/host/sdhci-s3c.c       | 19 ++++++++++++++-----
 drivers/mmc/host/sdhci-sirf.c      |  1 +
 drivers/mmc/host/sdhci-spear.c     |  1 +
 drivers/mmc/host/sdhci-tegra.c     |  1 +
 drivers/mmc/host/sdhci.c           | 17 ++++++-----------
 drivers/mmc/host/sdhci.h           |  1 +
 include/linux/mmc/sdhci.h          |  2 --
 19 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index aca84a682551..323e2a688563 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -102,12 +102,14 @@ static void sdhci_acpi_int_hw_reset(struct sdhci_host *host)
 }
 
 static const struct sdhci_ops sdhci_acpi_ops_dflt = {
+	.set_clock = sdhci_set_clock,
 	.enable_dma = sdhci_acpi_enable_dma,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
 
 static const struct sdhci_ops sdhci_acpi_ops_int = {
+	.set_clock = sdhci_set_clock,
 	.enable_dma = sdhci_acpi_enable_dma,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 7b97bfab910d..e610811c09b0 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -206,6 +206,7 @@ static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host,
 }
 
 static struct sdhci_ops sdhci_bcm_kona_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_bcm_kona_get_max_clk,
 	.get_timeout_clock = sdhci_bcm_kona_get_timeout_clock,
 	.platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks,
diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index 289b1c80d5fc..74906d6008e1 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -131,6 +131,7 @@ static const struct sdhci_ops bcm2835_sdhci_ops = {
 	.read_l = bcm2835_sdhci_readl,
 	.read_w = bcm2835_sdhci_readw,
 	.read_b = bcm2835_sdhci_readb,
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_min_clock = bcm2835_sdhci_get_min_clock,
 	.set_bus_width = sdhci_set_bus_width,
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index 416f4a4c2e35..587d73ef33ff 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -89,8 +89,7 @@ static const struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
 		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
 		  SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
 		  SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
-		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
-		  SDHCI_QUIRK_NONSTANDARD_CLOCK,
+		  SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
 };
 
 static int sdhci_cns3xxx_probe(struct platform_device *pdev)
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 1408cc11d881..8ef4ab52f8e0 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -86,6 +86,7 @@ static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
 static const struct sdhci_ops sdhci_dove_ops = {
 	.read_w	= sdhci_dove_readw,
 	.read_l	= sdhci_dove_readl,
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index de69bddc3afc..3497cfaf683c 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -20,7 +20,6 @@
 
 #define ESDHC_DEFAULT_QUIRKS	(SDHCI_QUIRK_FORCE_BLK_SZ_2048 | \
 				SDHCI_QUIRK_NO_BUSY_IRQ | \
-				SDHCI_QUIRK_NONSTANDARD_CLOCK | \
 				SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
 				SDHCI_QUIRK_PIO_NEEDS_DELAY)
 
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index faef21740584..f0ee594f25d1 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -52,6 +52,7 @@ static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
 }
 
 static struct sdhci_ops sdhci_arasan_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.get_timeout_clock = sdhci_arasan_get_timeout_clock,
 	.set_bus_width = sdhci_set_bus_width,
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index fb01958cb18e..a4a1f0f2c0a0 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -58,6 +58,7 @@ static const struct sdhci_ops sdhci_hlwd_ops = {
 	.write_l = sdhci_hlwd_writel,
 	.write_w = sdhci_hlwd_writew,
 	.write_b = sdhci_hlwd_writeb,
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 87f9dd91f68c..b3a28f6b170e 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -1078,6 +1078,7 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host)
 }
 
 static const struct sdhci_ops sdhci_pci_ops = {
+	.set_clock	= sdhci_set_clock,
 	.enable_dma	= sdhci_pci_enable_dma,
 	.set_bus_width	= sdhci_pci_set_bus_width,
 	.reset		= sdhci_reset,
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index bfbf467b61c7..1fb89f44bd58 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -45,6 +45,7 @@ unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host)
 EXPORT_SYMBOL_GPL(sdhci_pltfm_clk_get_max_clock);
 
 static const struct sdhci_ops sdhci_pltfm_ops = {
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index 2eee0c8b88eb..db5257bf032e 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -112,6 +112,7 @@ static void pxav2_mmc_set_bus_width(struct sdhci_host *host, int width)
 }
 
 static const struct sdhci_ops pxav2_sdhci_ops = {
+	.set_clock     = sdhci_set_clock,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
 	.set_bus_width = pxav2_mmc_set_bus_width,
 	.reset         = pxav2_reset,
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index 86564233ae93..8a40e079a57e 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -225,6 +225,7 @@ static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
 }
 
 static const struct sdhci_ops pxav3_sdhci_ops = {
+	.set_clock = sdhci_set_clock,
 	.set_uhs_signaling = pxav3_set_uhs_signaling,
 	.platform_send_init_74_clocks = pxav3_gen_init_74_clocks,
 	.get_max_clock = sdhci_pltfm_clk_get_max_clock,
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 9d710b748b9c..9e6f1c52982c 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -55,6 +55,8 @@ struct sdhci_s3c {
 	struct clk		*clk_io;
 	struct clk		*clk_bus[MAX_BUS_CLK];
 	unsigned long		clk_rates[MAX_BUS_CLK];
+
+	bool			no_divider;
 };
 
 /**
@@ -67,6 +69,7 @@ struct sdhci_s3c {
  */
 struct sdhci_s3c_drv_data {
 	unsigned int	sdhci_quirks;
+	bool		no_divider;
 };
 
 static inline struct sdhci_s3c *to_s3c(struct sdhci_host *host)
@@ -116,7 +119,7 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost,
 	 * If controller uses a non-standard clock division, find the best clock
 	 * speed possible with selected clock source and skip the division.
 	 */
-	if (ourhost->host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+	if (ourhost->no_divider) {
 		rate = clk_round_rate(clksrc, wanted);
 		return wanted - rate;
 	}
@@ -161,8 +164,10 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock)
 	host->mmc->actual_clock = 0;
 
 	/* don't bother if the clock is going off. */
-	if (clock == 0)
+	if (clock == 0) {
+		sdhci_set_clock(host, clock);
 		return;
+	}
 
 	for (src = 0; src < MAX_BUS_CLK; src++) {
 		delta = sdhci_s3c_consider_clock(ourhost, src, clock);
@@ -214,6 +219,8 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock)
 	if (clock < 25 * 1000000)
 		ctrl |= (S3C_SDHCI_CTRL3_FCSEL3 | S3C_SDHCI_CTRL3_FCSEL2);
 	writel(ctrl, host->ioaddr + S3C_SDHCI_CONTROL3);
+
+	sdhci_set_clock(host, clock);
 }
 
 /**
@@ -603,8 +610,10 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
 	/* Setup quirks for the controller */
 	host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
 	host->quirks |= SDHCI_QUIRK_NO_HISPD_BIT;
-	if (drv_data)
+	if (drv_data) {
 		host->quirks |= drv_data->sdhci_quirks;
+		sc->no_divider = drv_data->no_divider;
+	}
 
 #ifndef CONFIG_MMC_SDHCI_S3C_DMA
 
@@ -653,7 +662,7 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
 	 * If controller does not have internal clock divider,
 	 * we can use overriding functions instead of default.
 	 */
-	if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+	if (sc->no_divider) {
 		sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock;
 		sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock;
 		sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock;
@@ -794,7 +803,7 @@ static const struct dev_pm_ops sdhci_s3c_pmops = {
 
 #if defined(CONFIG_CPU_EXYNOS4210) || defined(CONFIG_SOC_EXYNOS4212)
 static struct sdhci_s3c_drv_data exynos4_sdhci_drv_data = {
-	.sdhci_quirks = SDHCI_QUIRK_NONSTANDARD_CLOCK,
+	.no_divider = true,
 };
 #define EXYNOS4_SDHCI_DRV_DATA ((kernel_ulong_t)&exynos4_sdhci_drv_data)
 #else
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 5d79e10e1ba2..3b775348b470 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -28,6 +28,7 @@ static unsigned int sdhci_sirf_get_max_clk(struct sdhci_host *host)
 }
 
 static struct sdhci_ops sdhci_sirf_ops = {
+	.set_clock = sdhci_set_clock,
 	.get_max_clock	= sdhci_sirf_get_max_clk,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index c2a2bedc8813..8bf64ab36720 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -38,6 +38,7 @@ struct spear_sdhci {
 
 /* sdhci ops */
 static const struct sdhci_ops sdhci_pltfm_ops = {
+	.set_clock = sdhci_set_clock,
 	.set_bus_width = sdhci_set_bus_width,
 	.reset = sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index 7754c0319fda..a0a8b5cc3b0c 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -153,6 +153,7 @@ static const struct sdhci_ops tegra_sdhci_ops = {
 	.read_l     = tegra_sdhci_readl,
 	.read_w     = tegra_sdhci_readw,
 	.write_l    = tegra_sdhci_writel,
+	.set_clock  = sdhci_set_clock,
 	.set_bus_width = tegra_sdhci_set_bus_width,
 	.reset      = tegra_sdhci_reset,
 };
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index d9b91fc17bb0..69e58d071b33 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1112,19 +1112,13 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host)
 	return preset;
 }
 
-static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 {
 	int div = 0; /* Initialized for compiler warning */
 	int real_div = div, clk_mul = 1;
 	u16 clk = 0;
 	unsigned long timeout;
 
-	if (host->ops->set_clock) {
-		host->ops->set_clock(host, clock);
-		if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
-			return;
-	}
-
 	host->mmc->actual_clock = 0;
 
 	sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
@@ -1221,6 +1215,7 @@ clock_set:
 	clk |= SDHCI_CLOCK_CARD_EN;
 	sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 }
+EXPORT_SYMBOL_GPL(sdhci_set_clock);
 
 static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
 {
@@ -1439,7 +1434,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		sdhci_enable_preset_value(host, false);
 
 	if (!ios->clock || ios->clock != host->clock) {
-		sdhci_set_clock(host, ios->clock);
+		host->ops->set_clock(host, ios->clock);
 		host->clock = ios->clock;
 	}
 
@@ -1510,7 +1505,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
 			/* Re-enable SD Clock */
-			sdhci_set_clock(host, host->clock);
+			host->ops->set_clock(host, host->clock);
 		}
 
 
@@ -1555,7 +1550,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		}
 
 		/* Re-enable SD Clock */
-		sdhci_set_clock(host, host->clock);
+		host->ops->set_clock(host, host->clock);
 	} else
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
@@ -2129,7 +2124,7 @@ static void sdhci_tasklet_finish(unsigned long param)
 		/* Some controllers need this kick or reset won't work here */
 		if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
 			/* This is to force an update */
-			sdhci_set_clock(host, host->clock);
+			host->ops->set_clock(host, host->clock);
 
 		/* Spec says we should do both at the same time, but Ricoh
 		   controllers do not like that. */
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 7d84cb3b0e00..ac20195f667b 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -400,6 +400,7 @@ static inline bool sdhci_sdio_irq_enabled(struct sdhci_host *host)
 	return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED);
 }
 
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
 void sdhci_set_bus_width(struct sdhci_host *host, int width);
 void sdhci_reset(struct sdhci_host *host, u8 mask);
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 02919ef99419..72a90baf111f 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -57,8 +57,6 @@ struct sdhci_host {
 #define SDHCI_QUIRK_BROKEN_CARD_DETECTION		(1<<15)
 /* Controller reports inverted write-protect state */
 #define SDHCI_QUIRK_INVERTED_WRITE_PROTECT		(1<<16)
-/* Controller has nonstandard clock management */
-#define SDHCI_QUIRK_NONSTANDARD_CLOCK			(1<<17)
 /* Controller does not like fast PIO transfers */
 #define SDHCI_QUIRK_PIO_NEEDS_DELAY			(1<<18)
 /* Controller has to be forced to use block size of 2048 bytes */
-- 
cgit 


From d975f121011a58223c7936ab483c3374a83236c3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 12:59:31 +0100
Subject: mmc: sdhci: cache timing information locally

Rather than reading back the timing information from the registers,
cache it locally.  This allows implementations to translate the UHS
timing by overriding the set_uhs_signaling() method as required
without also having to emulate the SDHCI_HOST_CONTROL2 register.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
[Ulf Hansson] Resolved conflict
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 23 ++++++++++++-----------
 include/linux/mmc/sdhci.h |  2 ++
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 0073aae0adcb..956799c75df2 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1083,24 +1083,23 @@ static void sdhci_finish_command(struct sdhci_host *host)
 
 static u16 sdhci_get_preset_value(struct sdhci_host *host)
 {
-	u16 ctrl, preset = 0;
+	u16 preset = 0;
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
-	switch (ctrl & SDHCI_CTRL_UHS_MASK) {
-	case SDHCI_CTRL_UHS_SDR12:
+	switch (host->timing) {
+	case MMC_TIMING_UHS_SDR12:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12);
 		break;
-	case SDHCI_CTRL_UHS_SDR25:
+	case MMC_TIMING_UHS_SDR25:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25);
 		break;
-	case SDHCI_CTRL_UHS_SDR50:
+	case MMC_TIMING_UHS_SDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50);
 		break;
-	case SDHCI_CTRL_UHS_SDR104:
+	case MMC_TIMING_UHS_SDR104:
+	case MMC_TIMING_MMC_HS200:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104);
 		break;
-	case SDHCI_CTRL_UHS_DDR50:
+	case MMC_TIMING_UHS_DDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
 		break;
 	default:
@@ -1538,6 +1537,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
 
 		host->ops->set_uhs_signaling(host, ios->timing);
+		host->timing = ios->timing;
 
 		if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
 				((ios->timing == MMC_TIMING_UHS_SDR12) ||
@@ -1842,12 +1842,13 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	 * If the Host Controller supports the HS200 mode then the
 	 * tuning function has to be executed.
 	 */
-	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
+	if (host->timing == MMC_TIMING_UHS_SDR50 &&
 	    (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
 	     host->flags & SDHCI_SDR104_NEEDS_TUNING))
 		requires_tuning_nonuhs = true;
 
-	if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
+	if (host->timing == MMC_TIMING_MMC_HS200 ||
+	    host->timing == MMC_TIMING_UHS_SDR104 ||
 	    requires_tuning_nonuhs)
 		ctrl |= SDHCI_CTRL_EXEC_TUNING;
 	else {
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 72a90baf111f..7f3efbab8732 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -172,6 +172,8 @@ struct sdhci_host {
 	unsigned int            ocr_avail_mmc;
 	u32 ocr_mask;		/* available voltages */
 
+	unsigned		timing;		/* Current timing */
+
 	u32			thread_isr;
 
 	/* cached registers */
-- 
cgit 


From da91a8f9c0f56d75b35bfe2e2456187ab55b3639 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@arm.linux.org.uk>
Date: Fri, 25 Apr 2014 13:00:12 +0100
Subject: mmc: sdhci: track whether preset mode is currently enabled in
 hardware

Track whether preset mode is currently enabled in hardware, and use that
when making decisions elsewhere in the code rather than reading the
register and checking the bit.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Tested-by: Markus Pargmann <mpa@pengutronix.de>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/sdhci.c  | 44 ++++++++++++++++++++++++++------------------
 include/linux/mmc/sdhci.h |  1 +
 2 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index effd9e5d1d81..447eef8217c7 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -205,9 +205,14 @@ static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
 
 	host->ops->reset(host, mask);
 
-	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
-		if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
-			host->ops->enable_dma(host);
+	if (mask & SDHCI_RESET_ALL) {
+		if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
+			if (host->ops->enable_dma)
+				host->ops->enable_dma(host);
+		}
+
+		/* Resetting the controller clears many */
+		host->preset_enabled = false;
 	}
 }
 
@@ -1126,8 +1131,7 @@ void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
 		return;
 
 	if (host->version >= SDHCI_SPEC_300) {
-		if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
-			SDHCI_CTRL_PRESET_VAL_ENABLE) {
+		if (host->preset_enabled) {
 			u16 pre_val;
 
 			clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
@@ -1493,13 +1497,13 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		    (ios->timing == MMC_TIMING_UHS_SDR25))
 			ctrl |= SDHCI_CTRL_HISPD;
 
-		ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-		if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+		if (!host->preset_enabled) {
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 			/*
 			 * We only need to set Driver Strength if the
 			 * preset value enable is not set.
 			 */
+			ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 			ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK;
 			if (ios->drv_type == MMC_SET_DRIVER_TYPE_A)
 				ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A;
@@ -2018,26 +2022,30 @@ out:
 
 static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable)
 {
-	u16 ctrl;
-
 	/* Host Controller v3.00 defines preset value registers */
 	if (host->version < SDHCI_SPEC_300)
 		return;
 
-	ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
 	/*
 	 * We only enable or disable Preset Value if they are not already
 	 * enabled or disabled respectively. Otherwise, we bail out.
 	 */
-	if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-		ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
-		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
-		host->flags |= SDHCI_PV_ENABLED;
-	} else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
-		ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+	if (host->preset_enabled != enable) {
+		u16 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+		if (enable)
+			ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+		else
+			ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+
 		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
-		host->flags &= ~SDHCI_PV_ENABLED;
+
+		if (enable)
+			host->flags |= SDHCI_PV_ENABLED;
+		else
+			host->flags &= ~SDHCI_PV_ENABLED;
+
+		host->preset_enabled = enable;
 	}
 }
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 7f3efbab8732..08abe9941884 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -143,6 +143,7 @@ struct sdhci_host {
 
 	bool runtime_suspended;	/* Host is runtime suspended */
 	bool bus_on;		/* Bus power prevents runtime suspend */
+	bool preset_enabled;	/* Preset is enabled */
 
 	struct mmc_request *mrq;	/* Current request */
 	struct mmc_command *cmd;	/* Current command */
-- 
cgit 


From ee526d515ad12e9fee2d2dbfc7f626c0a5c7f417 Mon Sep 17 00:00:00 2001
From: Balaji T K <balajitk@ti.com>
Date: Fri, 9 May 2014 22:16:53 +0530
Subject: mmc: omap_hsmmc: split omap-dma header file

moving dmaengine consumer specific function to omap-dmaengine.h
to Resolve build failure seen with sh-allmodconfig:
    include/linux/omap-dma.h:171:8: error: expected identifier before numeric constant
    make[4]: *** [drivers/mmc/host/omap_hsmmc.o] Error 1

Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Tony Lindgren <tony@atomide.com>
Signed-off-by: Balaji T K <balajitk@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Chris Ball <chris@printf.net>
---
 drivers/mmc/host/omap_hsmmc.c  |  2 +-
 include/linux/omap-dma.h       | 19 +------------------
 include/linux/omap-dmaengine.h | 21 +++++++++++++++++++++
 3 files changed, 23 insertions(+), 19 deletions(-)
 create mode 100644 include/linux/omap-dmaengine.h

(limited to 'include/linux')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index cba71d69a79c..6b7b75585926 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -31,7 +31,7 @@
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/of_device.h>
-#include <linux/omap-dma.h>
+#include <linux/omap-dmaengine.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/core.h>
 #include <linux/mmc/mmc.h>
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 41a13e70f41f..999f52d3d1e7 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -1,23 +1,6 @@
-/*
- * OMAP DMA Engine support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
 #ifndef __LINUX_OMAP_DMA_H
 #define __LINUX_OMAP_DMA_H
-
-struct dma_chan;
-
-#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE)
-bool omap_dma_filter_fn(struct dma_chan *, void *);
-#else
-static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
-{
-	return false;
-}
-#endif
+#include <linux/omap-dmaengine.h>
 
 /*
  *  Legacy OMAP DMA handling defines and functions
diff --git a/include/linux/omap-dmaengine.h b/include/linux/omap-dmaengine.h
new file mode 100644
index 000000000000..2b0b6aa01922
--- /dev/null
+++ b/include/linux/omap-dmaengine.h
@@ -0,0 +1,21 @@
+/*
+ * OMAP DMA Engine support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_OMAP_DMAENGINE_H
+#define __LINUX_OMAP_DMAENGINE_H
+
+struct dma_chan;
+
+#if defined(CONFIG_DMA_OMAP) || defined(CONFIG_DMA_OMAP_MODULE)
+bool omap_dma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
+{
+	return false;
+}
+#endif
+#endif /* __LINUX_OMAP_DMAENGINE_H */
-- 
cgit 


From 73e4354444eef5251e5cdfd388ab02ef9f2e727e Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:42:41 +0800
Subject: workqueue: declare system_highpri_wq

system_highpri_wq is exported to modules via EXPORT_SYMBOL_GPL(),
but it was forgotten to be declared in workqueue.h. So we add the declaration
and a short description for it.

tj: Minor comment tweak.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d93d28b2ec73..b263b29bd98b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -340,6 +340,9 @@ enum {
  * short queue flush time.  Don't queue works which can run for too
  * long.
  *
+ * system_highpri_wq is similar to system_wq but for work items which
+ * require WQ_HIGHPRI.
+ *
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
@@ -358,6 +361,7 @@ enum {
  * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  */
 extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_highpri_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
-- 
cgit 


From 79bc251f0e0aea67bc230c530f7fa57f66f9cdf3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:43:44 +0800
Subject: workqueue: remove unused WORK_CPU_END

WORK_CPU_END is totally unused since 4e8b22bd1a37 ("workqueue: fix
pool ID allocation leakage and remove BUILD_BUG_ON() in
init_workqueues"). It should be removed.

After it is removed, the comment "special cpu IDs" is not precise due to
there is only one special CPU ID (WORK_CPU_UNBOUND) left, so we also
change this comment to the description for WORK_CPU_UNBOUND.

tj: Minor description and comment tweaks.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b263b29bd98b..b8aee9453f22 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -56,9 +56,8 @@ enum {
 	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
-	/* special cpu IDs */
+	/* not bound to any CPU, prefer the local CPU */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
-- 
cgit 


From cafebac153ae54fd0aba5d4ad28af995532c5375 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:43:56 +0800
Subject: workqueue: remove unused work_clear_pending()

In 8930caba3dbd ("workqueue: disable irq while manipulating PENDING"),
setting last CPU and clearing PENDING got merged into a single
operation (set_work_cpu_and_clear_pending()), which resulted that the
internal routine work_clear_pending() is not used any more.

tj: Minor description tweak.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b8aee9453f22..a0cc2e95ed1b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -273,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define delayed_work_pending(w) \
 	work_pending(&(w)->work)
 
-/**
- * work_clear_pending - for internal use only, mark a work item as not pending
- * @work: The work item in question
- */
-#define work_clear_pending(work) \
-	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
-
 /*
  * Workqueue flags and constants.  For details, please refer to
  * Documentation/workqueue.txt.
-- 
cgit 


From ca8a22634381537c92b5a10308652e1c38fd9edf Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 22 May 2014 10:41:08 -0400
Subject: tcp: make cwnd-limited checks measurement-based, and gentler

Experience with the recent e114a710aa50 ("tcp: fix cwnd limited
checking to improve congestion control") has shown that there are
common cases where that commit can cause cwnd to be much larger than
necessary. This leads to TSO autosizing cooking skbs that are too
large, among other things.

The main problems seemed to be:

(1) That commit attempted to predict the future behavior of the
connection by looking at the write queue (if TSO or TSQ limit
sending). That prediction sometimes overestimated future outstanding
packets.

(2) That commit always allowed cwnd to grow to twice the number of
outstanding packets (even in congestion avoidance, where this is not
needed).

This commit improves both of these, by:

(1) Switching to a measurement-based approach where we explicitly
track the largest number of packets in flight during the past window
("max_packets_out"), and remember whether we were cwnd-limited at the
moment we finished sending that flight.

(2) Only allowing cwnd to grow to twice the number of outstanding
packets ("max_packets_out") in slow start. In congestion avoidance
mode we now only allow cwnd to grow if it was fully utilized.

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  6 ++++--
 include/net/tcp.h     | 11 ++++++++---
 net/ipv4/tcp_output.c | 37 +++++++++++++++++++++++--------------
 3 files changed, 35 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bc35e4709e8e..a0513210798f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -197,7 +197,8 @@ struct tcp_sock {
 	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */
 		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
-		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
@@ -209,6 +210,8 @@ struct tcp_sock {
 
 	u32	packets_out;	/* Packets which are "in flight"	*/
 	u32	retrans_out;	/* Retransmitted packets out		*/
+	u32	max_packets_out;  /* max packets_out in last window */
+	u32	max_packets_seq;  /* right edge of max_packets_out flight */
 
 	u16	urg_data;	/* Saved octet of OOB data and control flags */
 	u8	ecn_flags;	/* ECN status bits.			*/
@@ -230,7 +233,6 @@ struct tcp_sock {
 	u32	snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */
 	u32	snd_cwnd_used;
 	u32	snd_cwnd_stamp;
-	u32	lsnd_pending;	/* packets inflight or unsent since last xmit */
 	u32	prior_cwnd;	/* Congestion window at start of Recovery. */
 	u32	prr_delivered;	/* Number of newly delivered packets to
 				 * receiver in Recovery. */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f5d6ca4a9d28..e80abe4486cb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -971,8 +971,9 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
 
 /* We follow the spirit of RFC2861 to validate cwnd but implement a more
  * flexible approach. The RFC suggests cwnd should not be raised unless
- * it was fully used previously. But we allow cwnd to grow as long as the
- * application has used half the cwnd.
+ * it was fully used previously. And that's exactly what we do in
+ * congestion avoidance mode. But in slow start we allow cwnd to grow
+ * as long as the application has used half the cwnd.
  * Example :
  *    cwnd is 10 (IW10), but application sends 9 frames.
  *    We allow cwnd to reach 18 when all frames are ACKed.
@@ -985,7 +986,11 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 
-	return tp->snd_cwnd < 2 * tp->lsnd_pending;
+	/* If in slow start, ensure cwnd grows to twice what was ACKed. */
+	if (tp->snd_cwnd <= tp->snd_ssthresh)
+		return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+	return tp->is_cwnd_limited;
 }
 
 static inline void tcp_check_probe_timer(struct sock *sk)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3d61c52bdf79..d463c35db33d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1402,11 +1402,19 @@ static void tcp_cwnd_application_limited(struct sock *sk)
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 }
 
-static void tcp_cwnd_validate(struct sock *sk, u32 unsent_segs)
+static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	tp->lsnd_pending = tp->packets_out + unsent_segs;
+	/* Track the maximum number of outstanding packets in each
+	 * window, and remember whether we were cwnd-limited then.
+	 */
+	if (!before(tp->snd_una, tp->max_packets_seq) ||
+	    tp->packets_out > tp->max_packets_out) {
+		tp->max_packets_out = tp->packets_out;
+		tp->max_packets_seq = tp->snd_nxt;
+		tp->is_cwnd_limited = is_cwnd_limited;
+	}
 
 	if (tcp_is_cwnd_limited(sk)) {
 		/* Network is feed fully. */
@@ -1660,7 +1668,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  *
  * This algorithm is from John Heffner.
  */
-static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
+static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
+				 bool *is_cwnd_limited)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1724,6 +1733,9 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
 	if (!tp->tso_deferred)
 		tp->tso_deferred = 1 | (jiffies << 1);
 
+	if (cong_win < send_win && cong_win < skb->len)
+		*is_cwnd_limited = true;
+
 	return true;
 
 send_now:
@@ -1881,9 +1893,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	unsigned int tso_segs, sent_pkts, unsent_segs = 0;
+	unsigned int tso_segs, sent_pkts;
 	int cwnd_quota;
 	int result;
+	bool is_cwnd_limited = false;
 
 	sent_pkts = 0;
 
@@ -1908,6 +1921,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
 		if (!cwnd_quota) {
+			is_cwnd_limited = true;
 			if (push_one == 2)
 				/* Force out a loss probe pkt. */
 				cwnd_quota = 1;
@@ -1924,8 +1938,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 						      nonagle : TCP_NAGLE_PUSH))))
 				break;
 		} else {
-			if (!push_one && tcp_tso_should_defer(sk, skb))
-				goto compute_unsent_segs;
+			if (!push_one &&
+			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+				break;
 		}
 
 		/* TCP Small Queues :
@@ -1950,14 +1965,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			 * there is no smp_mb__after_set_bit() yet
 			 */
 			smp_mb__after_clear_bit();
-			if (atomic_read(&sk->sk_wmem_alloc) > limit) {
-				u32 unsent_bytes;
-
-compute_unsent_segs:
-				unsent_bytes = tp->write_seq - tp->snd_nxt;
-				unsent_segs = DIV_ROUND_UP(unsent_bytes, mss_now);
+			if (atomic_read(&sk->sk_wmem_alloc) > limit)
 				break;
-			}
 		}
 
 		limit = mss_now;
@@ -1997,7 +2006,7 @@ repair:
 		/* Send one loss probe per tail loss episode. */
 		if (push_one != 2)
 			tcp_schedule_loss_probe(sk);
-		tcp_cwnd_validate(sk, unsent_segs);
+		tcp_cwnd_validate(sk, is_cwnd_limited);
 		return false;
 	}
 	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
-- 
cgit 


From 9edbcd2252b5ef148177c9f2c11a56469cf5db52 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Thu, 17 Apr 2014 19:48:07 +0200
Subject: PCI: Remove pcibios_add_platform_entries()

Remove pcibios_add_platform_entries().  Architecture-specific attributes
can be achieved by setting pdev->dev.groups.

Link: https://lkml.kernel.org/r/alpine.LFD.2.11.1404141101500.1529@denkbrett
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci-sysfs.c | 10 ----------
 include/linux/pci.h     |  1 -
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 3db1c7ff5dd3..b7333fa5f80d 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1273,11 +1273,6 @@ static struct bin_attribute pcie_config_attr = {
 	.write = pci_write_config,
 };
 
-int __weak pcibios_add_platform_entries(struct pci_dev *dev)
-{
-	return 0;
-}
-
 static ssize_t reset_store(struct device *dev,
 			   struct device_attribute *attr, const char *buf,
 			   size_t count)
@@ -1393,11 +1388,6 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
 		pdev->rom_attr = attr;
 	}
 
-	/* add platform-specific attributes */
-	retval = pcibios_add_platform_entries(pdev);
-	if (retval)
-		goto err_rom_file;
-
 	/* add sysfs entries for various capabilities */
 	retval = pci_create_capabilities_sysfs(pdev);
 	if (retval)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a95aac7ad37f..84182b153b21 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1572,7 +1572,6 @@ extern unsigned long pci_hotplug_io_size;
 extern unsigned long pci_hotplug_mem_size;
 
 /* Architecture-specific versions may override these (weak) */
-int pcibios_add_platform_entries(struct pci_dev *dev);
 void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
-- 
cgit 


From da08143b85203b581f4a6461b149186b0e9592df Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Tue, 20 May 2014 08:29:25 +0200
Subject: vlan: more careful checksum features handling

When combining real_dev's features and vlan_features, simple
bitwise AND is used. This doesn't work well for checksum
offloading features as if one set has NETIF_F_HW_CSUM and the
other NETIF_F_IP_CSUM and/or NETIF_F_IPV6_CSUM, we end up with
no checksum offloading. However, from the logical point of view
(how can_checksum_protocol() works), NETIF_F_HW_CSUM contains
the functionality of NETIF_F_IP_CSUM and NETIF_F_IPV6_CSUM so
that the result should be IP/IPV6.

Add helper function netdev_intersect_features() implementing
this logic and use it in vlan_dev_fix_features().

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 14 ++++++++++++++
 net/8021q/vlan_dev.c      |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2dea98cbbdba..f4ad247fd324 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3153,6 +3153,20 @@ const char *netdev_drivername(const struct net_device *dev);
 
 void linkwatch_run_queue(void);
 
+static inline netdev_features_t netdev_intersect_features(netdev_features_t f1,
+							  netdev_features_t f2)
+{
+	if (f1 & NETIF_F_GEN_CSUM)
+		f1 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+	if (f2 & NETIF_F_GEN_CSUM)
+		f2 |= (NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+	f1 &= f2;
+	if (f1 & NETIF_F_GEN_CSUM)
+		f1 &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
+
+	return f1;
+}
+
 static inline netdev_features_t netdev_get_wanted_features(
 	struct net_device *dev)
 {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 8f025afa29fd..4181fb71ba77 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -678,9 +678,9 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	netdev_features_t old_features = features;
 
-	features &= real_dev->vlan_features;
+	features = netdev_intersect_features(features, real_dev->vlan_features);
 	features |= NETIF_F_RXCSUM;
-	features &= real_dev->features;
+	features = netdev_intersect_features(features, real_dev->features);
 
 	features |= old_features & NETIF_F_SOFT_FEATURES;
 	features |= NETIF_F_LLTX;
-- 
cgit 


From ea3429c77d4e34cb2983b90e49a5506fedf70b98 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 22 May 2014 09:47:50 -0700
Subject: of: mdio: remove of_phy_connect_fixed_link

All in-tree drivers have been converted to use the new pair of
functions: of_is_fixed_phy_link() plus of_phy_register_fixed_link(), we
can now safely remove of_phy_connect_fixed_link.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_mdio.c    | 38 --------------------------------------
 include/linux/of_mdio.h | 10 ----------
 2 files changed, 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 1def0bb5cb37..4c1e01ed16dc 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -245,44 +245,6 @@ struct phy_device *of_phy_connect(struct net_device *dev,
 }
 EXPORT_SYMBOL(of_phy_connect);
 
-/**
- * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy
- * @dev: pointer to net_device claiming the phy
- * @hndlr: Link state callback for the network device
- * @iface: PHY data interface type
- *
- * This function is a temporary stop-gap and will be removed soon.  It is
- * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers.  Do
- * not call this function from new drivers.
- */
-struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-					     void (*hndlr)(struct net_device *),
-					     phy_interface_t iface)
-{
-	struct device_node *net_np;
-	char bus_id[MII_BUS_ID_SIZE + 3];
-	struct phy_device *phy;
-	const __be32 *phy_id;
-	int sz;
-
-	if (!dev->dev.parent)
-		return NULL;
-
-	net_np = dev->dev.parent->of_node;
-	if (!net_np)
-		return NULL;
-
-	phy_id = of_get_property(net_np, "fixed-link", &sz);
-	if (!phy_id || sz < sizeof(*phy_id))
-		return NULL;
-
-	sprintf(bus_id, PHY_ID_FMT, "fixed-0", be32_to_cpu(phy_id[0]));
-
-	phy = phy_connect(dev, bus_id, hndlr, iface);
-	return IS_ERR(phy) ? NULL : phy;
-}
-EXPORT_SYMBOL(of_phy_connect_fixed_link);
-
 /**
  * of_phy_attach - Attach to a PHY without starting the state machine
  * @dev: pointer to net_device claiming the phy
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 0aa367e316cb..d449018d0726 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -22,9 +22,6 @@ extern struct phy_device *of_phy_connect(struct net_device *dev,
 struct phy_device *of_phy_attach(struct net_device *dev,
 				 struct device_node *phy_np, u32 flags,
 				 phy_interface_t iface);
-extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-					 void (*hndlr)(struct net_device *),
-					 phy_interface_t iface);
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
@@ -59,13 +56,6 @@ static inline struct phy_device *of_phy_attach(struct net_device *dev,
 	return NULL;
 }
 
-static inline struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
-							   void (*hndlr)(struct net_device *),
-							   phy_interface_t iface)
-{
-	return NULL;
-}
-
 static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
 	return NULL;
-- 
cgit 


From 16e4d93f6de7063800f3f5e68f064b0ff8fae9b7 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 19 May 2014 13:40:22 -0400
Subject: NFSD: Ignore client's source port on RDMA transports

An NFS/RDMA client's source port is meaningless for RDMA transports.
The transport layer typically sets the source port value on the
connection to a random ephemeral port.

Currently, NFS server administrators must specify the "insecure"
export option to enable clients to access exports via RDMA.

But this means NFS clients can access such an export via IP using an
ephemeral port, which may not be desirable.

This patch eliminates the need to specify the "insecure" export
option to allow NFS/RDMA clients access to an export.

BugLink: https://bugzilla.linux-nfs.org/show_bug.cgi?id=250
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h          | 1 +
 net/sunrpc/svc_xprt.c                    | 2 +-
 net/sunrpc/svcsock.c                     | 9 +++++++++
 net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 +++++++
 4 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b05963f09ebf..0cec1b94c670 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -24,6 +24,7 @@ struct svc_xprt_ops {
 	void		(*xpo_release_rqst)(struct svc_rqst *);
 	void		(*xpo_detach)(struct svc_xprt *);
 	void		(*xpo_free)(struct svc_xprt *);
+	int		(*xpo_secure_port)(struct svc_rqst *);
 };
 
 struct svc_xprt_class {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0cb911..614956f1777e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -793,7 +793,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
-	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+	rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
 	rqstp->rq_chandle.defer = svc_defer;
 
 	if (serv->sv_stats)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 43bcb4699d69..0cb34f5d58dc 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
 	release_sock(sock->sk);
 #endif
 }
+
+static int svc_sock_secure_port(struct svc_rqst *rqstp)
+{
+	return svc_port_is_privileged(svc_addr(rqstp));
+}
+
 /*
  * INET callback when data has been received on the socket.
  */
@@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
 	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
 	.xpo_has_wspace = svc_udp_has_wspace,
 	.xpo_accept = svc_udp_accept,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_udp_class = {
@@ -1234,6 +1241,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
 	.xpo_detach = svc_bc_tcp_sock_detach,
 	.xpo_free = svc_bc_sock_free,
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_tcp_bc_class = {
@@ -1272,6 +1280,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
 	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
 	.xpo_has_wspace = svc_tcp_has_wspace,
 	.xpo_accept = svc_tcp_accept,
+	.xpo_secure_port = svc_sock_secure_port,
 };
 
 static struct svc_xprt_class svc_tcp_class = {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 25688fa2207f..02db8d9cc994 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -65,6 +65,7 @@ static void dto_tasklet_func(unsigned long data);
 static void svc_rdma_detach(struct svc_xprt *xprt);
 static void svc_rdma_free(struct svc_xprt *xprt);
 static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+static int svc_rdma_secure_port(struct svc_rqst *);
 static void rq_cq_reap(struct svcxprt_rdma *xprt);
 static void sq_cq_reap(struct svcxprt_rdma *xprt);
 
@@ -82,6 +83,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
 	.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
 	.xpo_has_wspace = svc_rdma_has_wspace,
 	.xpo_accept = svc_rdma_accept,
+	.xpo_secure_port = svc_rdma_secure_port,
 };
 
 struct svc_xprt_class svc_rdma_class = {
@@ -1207,6 +1209,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 	return 1;
 }
 
+static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+{
+	return 1;
+}
+
 /*
  * Attempt to register the kvec representing the RPC memory with the
  * device.
-- 
cgit 


From ef11ce24875a8a540adc185e7bce3d7d49c8296f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 12 May 2014 11:22:47 +1000
Subject: SUNRPC: track whether a request is coming from a loop-back interface.

If an incoming NFS request is coming from the local host, then
nfsd will need to perform some special handling.  So detect that
possibility and make the source visible in rq_local.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h      |  1 +
 include/linux/sunrpc/svc_xprt.h |  1 +
 net/sunrpc/sunrpc.h             | 13 +++++++++++++
 net/sunrpc/svcsock.c            |  5 +++++
 4 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e763221246..a0dbbd1e00e9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -254,6 +254,7 @@ struct svc_rqst {
 	u32			rq_prot;	/* IP protocol */
 	unsigned short
 				rq_secure  : 1;	/* secure port */
+	unsigned short		rq_local   : 1;	/* local request */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 0cec1b94c670..7235040a19b2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -64,6 +64,7 @@ struct svc_xprt {
 #define	XPT_DETACHED	10		/* detached from tempsocks list */
 #define XPT_LISTENER	11		/* listening endpoint */
 #define XPT_CACHE_AUTH	12		/* cache auth info */
+#define XPT_LOCAL	13		/* connection from loopback interface */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 14c9f6d1c5ff..f2b7cb540e61 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
 		(task->tk_msg.rpc_proc->p_decode != NULL);
 }
 
+static inline int sock_is_loopback(struct sock *sk)
+{
+	struct dst_entry *dst;
+	int loopback = 0;
+	rcu_read_lock();
+	dst = rcu_dereference(sk->sk_dst_cache);
+	if (dst && dst->dev &&
+	    (dst->dev->features & NETIF_F_LOOPBACK))
+		loopback = 1;
+	rcu_read_unlock();
+	return loopback;
+}
+
 int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
 		    struct page *headpage, unsigned long headoffset,
 		    struct page *tailpage, unsigned long tailoffset);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0cb34f5d58dc..f3b8eb309d01 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -874,6 +874,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
 	}
 	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
 
+	if (sock_is_loopback(newsock->sk))
+		set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
+	else
+		clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
 	if (serv->sv_stats)
 		serv->sv_stats->nettcpconn++;
 
@@ -1119,6 +1123,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
+	rqstp->rq_local	      = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
 
 	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	calldir = p[1];
-- 
cgit 


From ecc8fb11cdb37d108d4597ba0f6bdff77c6019af Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Thu, 22 May 2014 15:55:39 +0300
Subject: net/mlx4_core: Deprecate use_prio module parameter

use_prio was added as part of an infrastructure for running FCoE in A0 mode.
FCoE didn't get into Mellanox Upstream driver, and when it will, it won't be
using A0 steering mode.

Therefore we can safely deprecate this module parameter without hurting any
existing user.

CC: Carol Soto <clsoto@linux.vnet.ibm.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 8 ++++----
 include/linux/mlx4/device.h               | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a56f6012258d..08ff5dd9298f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -132,8 +132,7 @@ MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
 
 static bool use_prio;
 module_param_named(use_prio, use_prio, bool, 0444);
-MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
-		  "(0/1, default 0)");
+MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
 
 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
@@ -290,7 +289,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 	dev->caps.log_num_macs  = log_num_mac;
 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
-	dev->caps.log_num_prios = use_prio ? 3 : 0;
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
@@ -358,7 +356,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
 		(1 << dev->caps.log_num_macs) *
 		(1 << dev->caps.log_num_vlans) *
-		(1 << dev->caps.log_num_prios) *
 		dev->caps.num_ports;
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
 
@@ -2775,6 +2772,9 @@ static int __init mlx4_verify_params(void)
 		pr_warning("mlx4_core: log_num_vlan - obsolete module param, using %d\n",
 			   MLX4_LOG_NUM_VLANS);
 
+	if (use_prio != 0)
+		pr_warn("mlx4_core: use_prio - obsolete module param, ignored\n");
+
 	if ((log_mtts_per_seg < 1) || (log_mtts_per_seg > 7)) {
 		pr_warning("mlx4_core: bad log_mtts_per_seg: %d\n", log_mtts_per_seg);
 		return -1;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c0468e6f0442..ca38871a585c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -449,7 +449,6 @@ struct mlx4_caps {
 	int                     reserved_qps_base[MLX4_NUM_QP_REGION];
 	int                     log_num_macs;
 	int                     log_num_vlans;
-	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
 	u8                      suggested_type[MLX4_MAX_PORTS + 1];
-- 
cgit 


From 11d200e95f3e84c1102e4cc9863a3614fd41f3ad Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 14 Mar 2014 17:00:14 +0000
Subject: lib: add glibc style strchrnul() variant

The strchrnul() variant helpfully returns a the end of the string
instead of a NULL if the requested character is not found. This can
simplify string parsing code since it doesn't need to expicitly check
for a NULL return. If a valid string pointer is passed in, then a valid
null terminated string will always come back out.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/string.h |  3 +++
 lib/string.c           | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index ac889c5ea11b..d36977e029af 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -52,6 +52,9 @@ extern int strncasecmp(const char *s1, const char *s2, size_t n);
 #ifndef __HAVE_ARCH_STRCHR
 extern char * strchr(const char *,int);
 #endif
+#ifndef __HAVE_ARCH_STRCHRNUL
+extern char * strchrnul(const char *,int);
+#endif
 #ifndef __HAVE_ARCH_STRNCHR
 extern char * strnchr(const char *, size_t, int);
 #endif
diff --git a/lib/string.c b/lib/string.c
index 9b1f9062a202..e0c20eb362f0 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -301,6 +301,24 @@ char *strchr(const char *s, int c)
 EXPORT_SYMBOL(strchr);
 #endif
 
+#ifndef __HAVE_ARCH_STRCHRNUL
+/**
+ * strchrnul - Find and return a character in a string, or end of string
+ * @s: The string to be searched
+ * @c: The character to search for
+ *
+ * Returns pointer to first occurrence of 'c' in s. If c is not found, then
+ * return a pointer to the null byte at the end of s.
+ */
+char *strchrnul(const char *s, int c)
+{
+	while (*s && *s != (char)c)
+		s++;
+	return (char *)s;
+}
+EXPORT_SYMBOL(strchrnul);
+#endif
+
 #ifndef __HAVE_ARCH_STRRCHR
 /**
  * strrchr - Find the last occurrence of a character in a string
-- 
cgit 


From ad69674e73a18dc3a8da557f4059ccf9389531a5 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Tue, 20 May 2014 13:42:02 +0300
Subject: of/irq: do irq resolution in platform_get_irq_byname()

The commit 9ec36cafe43bf835f8f29273597a5b0cbc8267ef
"of/irq: do irq resolution in platform_get_irq" from Rob Herring -
moves resolving of the interrupt resources in platform_get_irq().
But this solution isn't complete because platform_get_irq_byname()
need to be modified the same way.

Hence, fix it by adding interrupt resolution code at the
platform_get_irq_byname() function too.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: Rob Herring <robh@kernel.org>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Thierry Reding <thierry.reding@gmail.com>

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/base/platform.c |  7 +++++--
 drivers/of/irq.c        | 22 ++++++++++++++++++++++
 include/linux/of_irq.h  |  5 +++++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 5b47210889e0..9e9227e1762d 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -131,9 +131,12 @@ EXPORT_SYMBOL_GPL(platform_get_resource_byname);
  */
 int platform_get_irq_byname(struct platform_device *dev, const char *name)
 {
-	struct resource *r = platform_get_resource_byname(dev, IORESOURCE_IRQ,
-							  name);
+	struct resource *r;
+
+	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node)
+		return of_irq_get_byname(dev->dev.of_node, name);
 
+	r = platform_get_resource_byname(dev, IORESOURCE_IRQ, name);
 	return r ? r->start : -ENXIO;
 }
 EXPORT_SYMBOL_GPL(platform_get_irq_byname);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 5aeb89411350..3e06a699352d 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -405,6 +405,28 @@ int of_irq_get(struct device_node *dev, int index)
 	return irq_create_of_mapping(&oirq);
 }
 
+/**
+ * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number
+ * @dev: pointer to device tree node
+ * @name: irq name
+ *
+ * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
+ * is not yet created, or error code in case of any other failure.
+ */
+int of_irq_get_byname(struct device_node *dev, const char *name)
+{
+	int index;
+
+	if (unlikely(!name))
+		return -EINVAL;
+
+	index = of_property_match_string(dev, "interrupt-names", name);
+	if (index < 0)
+		return index;
+
+	return of_irq_get(dev, index);
+}
+
 /**
  * of_irq_count - Count the number of IRQs a node uses
  * @dev: pointer to device tree node
diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h
index 6404253d810d..bfec136a6d1e 100644
--- a/include/linux/of_irq.h
+++ b/include/linux/of_irq.h
@@ -45,6 +45,7 @@ extern void of_irq_init(const struct of_device_id *matches);
 #ifdef CONFIG_OF_IRQ
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_get(struct device_node *dev, int index);
+extern int of_irq_get_byname(struct device_node *dev, const char *name);
 #else
 static inline int of_irq_count(struct device_node *dev)
 {
@@ -54,6 +55,10 @@ static inline int of_irq_get(struct device_node *dev, int index)
 {
 	return 0;
 }
+static inline int of_irq_get_byname(struct device_node *dev, const char *name)
+{
+	return 0;
+}
 #endif
 
 #if defined(CONFIG_OF)
-- 
cgit 


From 9e9dc7d9597bd6881b3e7ae6ae3d710319605c47 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 8 May 2014 23:16:34 +0200
Subject: mfd: stmpe: root out static GPIO and IRQ assignments

The only platform using the STMPE expander now boots from
device tree using all-dynamic GPIO and IRQ number assignments, so
remove the mechanism to pass this from the device tree entirely.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-stmpe.c | 18 +++++-------------
 drivers/mfd/stmpe.c       |  6 +-----
 include/linux/mfd/stmpe.h | 14 --------------
 3 files changed, 6 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c
index 2776a09bee58..628b58494294 100644
--- a/drivers/gpio/gpio-stmpe.c
+++ b/drivers/gpio/gpio-stmpe.c
@@ -23,7 +23,8 @@
 enum { REG_RE, REG_FE, REG_IE };
 
 #define CACHE_NR_REGS	3
-#define CACHE_NR_BANKS	(STMPE_NR_GPIOS / 8)
+/* No variant has more than 24 GPIOs */
+#define CACHE_NR_BANKS	(24 / 8)
 
 struct stmpe_gpio {
 	struct gpio_chip chip;
@@ -31,8 +32,6 @@ struct stmpe_gpio {
 	struct device *dev;
 	struct mutex irq_lock;
 	struct irq_domain *domain;
-
-	int irq_base;
 	unsigned norequest_mask;
 
 	/* Caches of interrupt control registers for bus_lock */
@@ -311,13 +310,8 @@ static const struct irq_domain_ops stmpe_gpio_irq_simple_ops = {
 static int stmpe_gpio_irq_init(struct stmpe_gpio *stmpe_gpio,
 		struct device_node *np)
 {
-	int base = 0;
-
-	if (!np)
-		base = stmpe_gpio->irq_base;
-
 	stmpe_gpio->domain = irq_domain_add_simple(np,
-				stmpe_gpio->chip.ngpio, base,
+				stmpe_gpio->chip.ngpio, 0,
 				&stmpe_gpio_irq_simple_ops, stmpe_gpio);
 	if (!stmpe_gpio->domain) {
 		dev_err(stmpe_gpio->dev, "failed to create irqdomain\n");
@@ -354,7 +348,7 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
 #ifdef CONFIG_OF
 	stmpe_gpio->chip.of_node = np;
 #endif
-	stmpe_gpio->chip.base = pdata ? pdata->gpio_base : -1;
+	stmpe_gpio->chip.base = -1;
 
 	if (pdata)
 		stmpe_gpio->norequest_mask = pdata->norequest_mask;
@@ -362,9 +356,7 @@ static int stmpe_gpio_probe(struct platform_device *pdev)
 		of_property_read_u32(np, "st,norequest-mask",
 				&stmpe_gpio->norequest_mask);
 
-	if (irq >= 0)
-		stmpe_gpio->irq_base = stmpe->irq_base + STMPE_INT_GPIO(0);
-	else
+	if (irq < 0)
 		dev_info(&pdev->dev,
 			"device configured in no-irq mode; "
 			"irqs are not available\n");
diff --git a/drivers/mfd/stmpe.c b/drivers/mfd/stmpe.c
index 294731be1a15..3b6bfa7184ad 100644
--- a/drivers/mfd/stmpe.c
+++ b/drivers/mfd/stmpe.c
@@ -996,9 +996,6 @@ static int stmpe_irq_init(struct stmpe *stmpe, struct device_node *np)
 	int base = 0;
 	int num_irqs = stmpe->variant->num_irqs;
 
-	if (!np)
-		base = stmpe->irq_base;
-
 	stmpe->domain = irq_domain_add_simple(np, num_irqs, base,
 					      &stmpe_irq_ops, stmpe);
 	if (!stmpe->domain) {
@@ -1077,7 +1074,7 @@ static int stmpe_chip_init(struct stmpe *stmpe)
 static int stmpe_add_device(struct stmpe *stmpe, const struct mfd_cell *cell)
 {
 	return mfd_add_devices(stmpe->dev, stmpe->pdata->id, cell, 1,
-			       NULL, stmpe->irq_base, stmpe->domain);
+			       NULL, 0, stmpe->domain);
 }
 
 static int stmpe_devices_init(struct stmpe *stmpe)
@@ -1181,7 +1178,6 @@ int stmpe_probe(struct stmpe_client_info *ci, int partnum)
 	stmpe->dev = ci->dev;
 	stmpe->client = ci->client;
 	stmpe->pdata = pdata;
-	stmpe->irq_base = pdata->irq_base;
 	stmpe->ci = ci;
 	stmpe->partnum = partnum;
 	stmpe->variant = stmpe_variant_info[partnum];
diff --git a/include/linux/mfd/stmpe.h b/include/linux/mfd/stmpe.h
index 980898620e57..575a86c7fcbd 100644
--- a/include/linux/mfd/stmpe.h
+++ b/include/linux/mfd/stmpe.h
@@ -76,7 +76,6 @@ struct stmpe_client_info;
  * @regs: list of addresses of registers which are at different addresses on
  *	  different variants.  Indexed by one of STMPE_IDX_*.
  * @irq: irq number for stmpe
- * @irq_base: starting IRQ number for internal IRQs
  * @num_gpios: number of gpios, differs for variants
  * @ier: cache of IER registers for bus_lock
  * @oldier: cache of IER registers for bus_lock
@@ -96,7 +95,6 @@ struct stmpe {
 	const u8 *regs;
 
 	int irq;
-	int irq_base;
 	int num_gpios;
 	u8 ier[2];
 	u8 oldier[2];
@@ -137,8 +135,6 @@ struct stmpe_keypad_platform_data {
 
 /**
  * struct stmpe_gpio_platform_data - STMPE GPIO platform data
- * @gpio_base: first gpio number assigned.  A maximum of
- *	       %STMPE_NR_GPIOS GPIOs will be allocated.
  * @norequest_mask: bitmask specifying which GPIOs should _not_ be
  *		    requestable due to different usage (e.g. touch, keypad)
  *		    STMPE_GPIO_NOREQ_* macros can be used here.
@@ -146,7 +142,6 @@ struct stmpe_keypad_platform_data {
  * @remove: board specific remove callback
  */
 struct stmpe_gpio_platform_data {
-	int gpio_base;
 	unsigned norequest_mask;
 	void (*setup)(struct stmpe *stmpe, unsigned gpio_base);
 	void (*remove)(struct stmpe *stmpe, unsigned gpio_base);
@@ -200,8 +195,6 @@ struct stmpe_ts_platform_data {
  * @irq_trigger: IRQ trigger to use for the interrupt to the host
  * @autosleep: bool to enable/disable stmpe autosleep
  * @autosleep_timeout: inactivity timeout in milliseconds for autosleep
- * @irq_base: base IRQ number.  %STMPE_NR_IRQS irqs will be used, or
- *	      %STMPE_NR_INTERNAL_IRQS if the GPIO driver is not used.
  * @irq_over_gpio: true if gpio is used to get irq
  * @irq_gpio: gpio number over which irq will be requested (significant only if
  *	      irq_over_gpio is true)
@@ -212,7 +205,6 @@ struct stmpe_ts_platform_data {
 struct stmpe_platform_data {
 	int id;
 	unsigned int blocks;
-	int irq_base;
 	unsigned int irq_trigger;
 	bool autosleep;
 	bool irq_over_gpio;
@@ -224,10 +216,4 @@ struct stmpe_platform_data {
 	struct stmpe_ts_platform_data *ts;
 };
 
-#define STMPE_NR_INTERNAL_IRQS	9
-#define STMPE_INT_GPIO(x)	(STMPE_NR_INTERNAL_IRQS + (x))
-
-#define STMPE_NR_GPIOS		24
-#define STMPE_NR_IRQS		STMPE_INT_GPIO(STMPE_NR_GPIOS)
-
 #endif
-- 
cgit 


From ad0f614e4723db8cead439cf414108cbf975b224 Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Thu, 22 May 2014 11:54:20 -0700
Subject: wait: swap EXIT_ZOMBIE(Z) and EXIT_DEAD(X) chars in
 TASK_STATE_TO_CHAR_STR

In commit ad86622b478e ("wait: swap EXIT_ZOMBIE and EXIT_DEAD to hide
EXIT_TRACE from user-space") the order of task state definitions were
changed: EXIT_DEAD and EXIT_ZOMBIE were swapped.  Though the charterers
for the states in TASK_STATE_TO_CHAR_STR string were not updated.  This
patch synchronizes the string to the order of definitions.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 25f54c79f757..21fbdae61b9e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -220,7 +220,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
 #define TASK_PARKED		512
 #define TASK_STATE_MAX		1024
 
-#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP"
 
 extern char ___assert_task_state[1 - 2*!!(
 		sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
-- 
cgit 


From 43339bed7010da6e7cf797db3216a136a974a0cd Mon Sep 17 00:00:00 2001
From: Eli Billauer <eli.billauer@gmail.com>
Date: Fri, 16 May 2014 11:26:35 +0300
Subject: devres: Add devm_get_free_pages API

devm_get_free_pages() and devm_free_pages() are the managed counterparts
for __get_free_pages() and free_pages().

Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-model/devres.txt |  2 +
 drivers/base/devres.c                 | 76 +++++++++++++++++++++++++++++++++++
 include/linux/device.h                |  4 ++
 3 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index 499951873997..e1a27074caae 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -237,6 +237,8 @@ MEM
   devm_kzalloc()
   devm_kfree()
   devm_kmemdup()
+  devm_get_free_pages()
+  devm_free_pages()
 
 IIO
   devm_iio_device_alloc()
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index d0914cba2413..52302946770f 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -852,3 +852,79 @@ void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
 	return p;
 }
 EXPORT_SYMBOL_GPL(devm_kmemdup);
+
+struct pages_devres {
+	unsigned long addr;
+	unsigned int order;
+};
+
+static int devm_pages_match(struct device *dev, void *res, void *p)
+{
+	struct pages_devres *devres = res;
+	struct pages_devres *target = p;
+
+	return devres->addr == target->addr;
+}
+
+static void devm_pages_release(struct device *dev, void *res)
+{
+	struct pages_devres *devres = res;
+
+	free_pages(devres->addr, devres->order);
+}
+
+/**
+ * devm_get_free_pages - Resource-managed __get_free_pages
+ * @dev: Device to allocate memory for
+ * @gfp_mask: Allocation gfp flags
+ * @order: Allocation size is (1 << order) pages
+ *
+ * Managed get_free_pages.  Memory allocated with this function is
+ * automatically freed on driver detach.
+ *
+ * RETURNS:
+ * Address of allocated memory on success, 0 on failure.
+ */
+
+unsigned long devm_get_free_pages(struct device *dev,
+				  gfp_t gfp_mask, unsigned int order)
+{
+	struct pages_devres *devres;
+	unsigned long addr;
+
+	addr = __get_free_pages(gfp_mask, order);
+
+	if (unlikely(!addr))
+		return 0;
+
+	devres = devres_alloc(devm_pages_release,
+			      sizeof(struct pages_devres), GFP_KERNEL);
+	if (unlikely(!devres)) {
+		free_pages(addr, order);
+		return 0;
+	}
+
+	devres->addr = addr;
+	devres->order = order;
+
+	devres_add(dev, devres);
+	return addr;
+}
+EXPORT_SYMBOL_GPL(devm_get_free_pages);
+
+/**
+ * devm_free_pages - Resource-managed free_pages
+ * @dev: Device this memory belongs to
+ * @addr: Memory to free
+ *
+ * Free memory allocated with devm_get_free_pages(). Unlike free_pages,
+ * there is no need to supply the @order.
+ */
+void devm_free_pages(struct device *dev, unsigned long addr)
+{
+	struct pages_devres devres = { .addr = addr };
+
+	WARN_ON(devres_release(dev, devm_pages_release, devm_pages_match,
+			       &devres));
+}
+EXPORT_SYMBOL_GPL(devm_free_pages);
diff --git a/include/linux/device.h b/include/linux/device.h
index ab871588da89..3dc69a2faa51 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -626,6 +626,10 @@ extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp);
 extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
 			  gfp_t gfp);
 
+extern unsigned long devm_get_free_pages(struct device *dev,
+					 gfp_t gfp_mask, unsigned int order);
+extern void devm_free_pages(struct device *dev, unsigned long addr);
+
 void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res);
 void __iomem *devm_request_and_ioremap(struct device *dev,
 			struct resource *res);
-- 
cgit 


From 9f8c0fe9542141fd0008d5c0f6ae365890f6da94 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Fri, 23 May 2014 16:44:10 +0100
Subject: regulator: Constify the pointer to alias name array

Toughen-up checks for read-only regulator names.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c           |  7 ++++---
 drivers/regulator/devres.c         |  6 +++---
 include/linux/mfd/core.h           |  2 +-
 include/linux/regulator/consumer.h | 36 ++++++++++++++++++++----------------
 4 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 9a09f3cdbabb..ba28d29b66d2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1597,9 +1597,10 @@ EXPORT_SYMBOL_GPL(regulator_unregister_supply_alias);
  * registered any aliases that were registered will be removed
  * before returning to the caller.
  */
-int regulator_bulk_register_supply_alias(struct device *dev, const char **id,
+int regulator_bulk_register_supply_alias(struct device *dev,
+					 const char *const *id,
 					 struct device *alias_dev,
-					 const char **alias_id,
+					 const char *const *alias_id,
 					 int num_id)
 {
 	int i;
@@ -1637,7 +1638,7 @@ EXPORT_SYMBOL_GPL(regulator_bulk_register_supply_alias);
  * aliases in one operation.
  */
 void regulator_bulk_unregister_supply_alias(struct device *dev,
-					    const char **id,
+					    const char *const *id,
 					    int num_id)
 {
 	int i;
diff --git a/drivers/regulator/devres.c b/drivers/regulator/devres.c
index f44818b838dc..8f785bc9e510 100644
--- a/drivers/regulator/devres.c
+++ b/drivers/regulator/devres.c
@@ -360,9 +360,9 @@ EXPORT_SYMBOL_GPL(devm_regulator_unregister_supply_alias);
  * will be removed before returning to the caller.
  */
 int devm_regulator_bulk_register_supply_alias(struct device *dev,
-					      const char **id,
+					      const char *const *id,
 					      struct device *alias_dev,
-					      const char **alias_id,
+					      const char *const *alias_id,
 					      int num_id)
 {
 	int i;
@@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(devm_regulator_bulk_register_supply_alias);
  * will ensure that the resource is freed.
  */
 void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
-						 const char **id,
+						 const char *const *id,
 						 int num_id)
 {
 	int i;
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index bdba8c61207b..f543de91ce19 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -63,7 +63,7 @@ struct mfd_cell {
 	/* A list of regulator supplies that should be mapped to the MFD
 	 * device rather than the child device when requested
 	 */
-	const char		**parent_supplies;
+	const char * const	*parent_supplies;
 	int			num_parent_supplies;
 };
 
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index e530681bea70..10d0a53f4cd3 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -151,11 +151,13 @@ int regulator_register_supply_alias(struct device *dev, const char *id,
 				    const char *alias_id);
 void regulator_unregister_supply_alias(struct device *dev, const char *id);
 
-int regulator_bulk_register_supply_alias(struct device *dev, const char **id,
+int regulator_bulk_register_supply_alias(struct device *dev,
+					 const char *const *id,
 					 struct device *alias_dev,
-					 const char **alias_id, int num_id);
+					 const char *const *alias_id,
+					 int num_id);
 void regulator_bulk_unregister_supply_alias(struct device *dev,
-					    const char **id, int num_id);
+					    const char * const *id, int num_id);
 
 int devm_regulator_register_supply_alias(struct device *dev, const char *id,
 					 struct device *alias_dev,
@@ -164,12 +166,12 @@ void devm_regulator_unregister_supply_alias(struct device *dev,
 					    const char *id);
 
 int devm_regulator_bulk_register_supply_alias(struct device *dev,
-					      const char **id,
+					      const char *const *id,
 					      struct device *alias_dev,
-					      const char **alias_id,
+					      const char *const *alias_id,
 					      int num_id);
 void devm_regulator_bulk_unregister_supply_alias(struct device *dev,
-						 const char **id,
+						 const char *const *id,
 						 int num_id);
 
 /* regulator output control and status */
@@ -290,17 +292,17 @@ static inline void regulator_unregister_supply_alias(struct device *dev,
 }
 
 static inline int regulator_bulk_register_supply_alias(struct device *dev,
-						       const char **id,
-						       struct device *alias_dev,
-						       const char **alias_id,
-						       int num_id)
+						const char *const *id,
+						struct device *alias_dev,
+						const char * const *alias_id,
+						int num_id)
 {
 	return 0;
 }
 
 static inline void regulator_bulk_unregister_supply_alias(struct device *dev,
-							  const char **id,
-							  int num_id)
+						const char * const *id,
+						int num_id)
 {
 }
 
@@ -317,15 +319,17 @@ static inline void devm_regulator_unregister_supply_alias(struct device *dev,
 {
 }
 
-static inline int devm_regulator_bulk_register_supply_alias(
-		struct device *dev, const char **id, struct device *alias_dev,
-		const char **alias_id, int num_id)
+static inline int devm_regulator_bulk_register_supply_alias(struct device *dev,
+						const char *const *id,
+						struct device *alias_dev,
+						const char *const *alias_id,
+						int num_id)
 {
 	return 0;
 }
 
 static inline void devm_regulator_bulk_unregister_supply_alias(
-		struct device *dev, const char **id, int num_id)
+	struct device *dev, const char *const *id, int num_id)
 {
 }
 
-- 
cgit 


From ed616689a3d95eb6c9bdbb1ef74b0f50cbdf276a Mon Sep 17 00:00:00 2001
From: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Date: Thu, 22 May 2014 09:59:05 -0400
Subject: net-next:v4: Add support to configure SR-IOV VF minimum and maximum
 Tx rate through ip tool.

o min_tx_rate puts lower limit on the VF bandwidth. VF is guaranteed
  to have a bandwidth of at least this value.
  max_tx_rate puts cap on the VF bandwidth. VF can have a bandwidth
  of up to this value.

o A new handler set_vf_rate for attr IFLA_VF_RATE has been introduced
  which takes 4 arguments:
  netdev, VF number, min_tx_rate, max_tx_rate

o ndo_set_vf_rate replaces ndo_set_vf_tx_rate handler.

o Drivers that currently implement ndo_set_vf_tx_rate should now call
  ndo_set_vf_rate instead and reject attempt to set a minimum bandwidth
  greater than 0 for IFLA_VF_TX_RATE when IFLA_VF_RATE is not yet
  implemented by driver.

o If user enters only one of either min_tx_rate or max_tx_rate, then,
  userland should read back the other value from driver and set both
  for IFLA_VF_RATE.
  Drivers that have not yet implemented IFLA_VF_RATE should always
  return min_tx_rate as 0 when read from ip tool.

o If both IFLA_VF_TX_RATE and IFLA_VF_RATE options are specified, then
  IFLA_VF_RATE should override.

o Idea is to have consistent display of rate values to user.

o Usage example: -

  ./ip link set p4p1 vf 0 rate 900

  ./ip link show p4p1
  32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
  DEFAULT qlen 1000
    link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 900 (Mbps), max_tx_rate 900Mbps
    vf 1 MAC f6:c6:7c:3f:3d:6c
    vf 2 MAC 56:32:43:98:d7:71
    vf 3 MAC d6:be:c3:b5:85:ff
    vf 4 MAC ee:a9:9a:1e:19:14
    vf 5 MAC 4a:d0:4c:07:52:18
    vf 6 MAC 3a:76:44:93:62:f9
    vf 7 MAC 82:e9:e7:e3:15:1a

  ./ip link set p4p1 vf 0 max_tx_rate 300 min_tx_rate 200

  ./ip link show p4p1
  32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
  DEFAULT qlen 1000
    link/ether 00:0e:1e:08:b0:f0 brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 3e:a0:ca:bd:ae:5a, tx rate 300 (Mbps), max_tx_rate 300Mbps,
    min_tx_rate 200Mbps
    vf 1 MAC f6:c6:7c:3f:3d:6c
    vf 2 MAC 56:32:43:98:d7:71
    vf 3 MAC d6:be:c3:b5:85:ff
    vf 4 MAC ee:a9:9a:1e:19:14
    vf 5 MAC 4a:d0:4c:07:52:18
    vf 6 MAC 3a:76:44:93:62:f9
    vf 7 MAC 82:e9:e7:e3:15:1a

  ./ip link set p4p1 vf 0 max_tx_rate 600 rate 300

  ./ip link show p4p1
  32: p4p1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode
  DEFAULT qlen 1000
    link/ether 00:0e:1e:08:b0:f brd ff:ff:ff:ff:ff:ff
    vf 0 MAC 3e:a0:ca:bd:ae:5, tx rate 600 (Mbps), max_tx_rate 600Mbps,
    min_tx_rate 200Mbps
    vf 1 MAC f6:c6:7c:3f:3d:6c
    vf 2 MAC 56:32:43:98:d7:71
    vf 3 MAC d6:be:c3:b5:85:ff
    vf 4 MAC ee:a9:9a:1e:19:14
    vf 5 MAC 4a:d0:4c:07:52:18
    vf 6 MAC 3a:76:44:93:62:f9
    vf 7 MAC 82:e9:e7:e3:15:1a

Signed-off-by: Sucheta Chakraborty <sucheta.chakraborty@qlogic.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c  |  3 +-
 drivers/net/ethernet/emulex/benet/be_main.c        | 21 +++++----
 drivers/net/ethernet/intel/i40e/i40e_main.c        |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 +++++++----
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h |  3 +-
 drivers/net/ethernet/intel/igb/igb_main.c          | 20 ++++++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c     | 13 ++++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h     |  3 +-
 drivers/net/ethernet/mellanox/mlx4/cmd.c           | 11 ++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h        |  1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h  |  2 +-
 .../ethernet/qlogic/qlcnic/qlcnic_sriov_common.c   |  1 +
 .../net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c   | 52 +++++++++++++++-------
 drivers/net/ethernet/sfc/siena_sriov.c             |  3 +-
 include/linux/if_link.h                            |  3 +-
 include/linux/netdevice.h                          |  8 ++--
 include/uapi/linux/if_link.h                       |  9 +++-
 net/core/rtnetlink.c                               | 38 +++++++++++++---
 20 files changed, 156 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 81cc2d9831c2..8d0479d5be8e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -2576,7 +2576,8 @@ int bnx2x_get_vf_config(struct net_device *dev, int vfidx,
 
 	ivi->vf = vfidx;
 	ivi->qos = 0;
-	ivi->tx_rate = 10000; /* always 10G. TBA take from link struct */
+	ivi->max_tx_rate = 10000; /* always 10G. TBA take from link struct */
+	ivi->min_tx_rate = 0;
 	ivi->spoofchk = 1; /*always enabled */
 	if (vf->state == VF_ENABLED) {
 		/* mac and vlan are in vlan_mac objects */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index dcc5e5c69743..4693d004a223 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1302,7 +1302,8 @@ static int be_get_vf_config(struct net_device *netdev, int vf,
 		return -EINVAL;
 
 	vi->vf = vf;
-	vi->tx_rate = vf_cfg->tx_rate;
+	vi->max_tx_rate = vf_cfg->tx_rate;
+	vi->min_tx_rate = 0;
 	vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
 	vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
 	memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
@@ -1342,7 +1343,8 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 	return status;
 }
 
-static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate)
+static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int status = 0;
@@ -1353,18 +1355,21 @@ static int be_set_vf_tx_rate(struct net_device *netdev, int vf, int rate)
 	if (vf >= adapter->num_vfs)
 		return -EINVAL;
 
-	if (rate < 100 || rate > 10000) {
+	if (min_tx_rate)
+		return -EINVAL;
+
+	if (max_tx_rate < 100 || max_tx_rate > 10000) {
 		dev_err(&adapter->pdev->dev,
-			"tx rate must be between 100 and 10000 Mbps\n");
+			"max tx rate must be between 100 and 10000 Mbps\n");
 		return -EINVAL;
 	}
 
-	status = be_cmd_config_qos(adapter, rate / 10, vf + 1);
+	status = be_cmd_config_qos(adapter, max_tx_rate / 10, vf + 1);
 	if (status)
 		dev_err(&adapter->pdev->dev,
-			"tx rate %d on VF %d failed\n", rate, vf);
+			"max tx rate %d on VF %d failed\n", max_tx_rate, vf);
 	else
-		adapter->vf_cfg[vf].tx_rate = rate;
+		adapter->vf_cfg[vf].tx_rate = max_tx_rate;
 	return status;
 }
 static int be_set_vf_link_state(struct net_device *netdev, int vf,
@@ -4257,7 +4262,7 @@ static const struct net_device_ops be_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= be_vlan_rem_vid,
 	.ndo_set_vf_mac		= be_set_vf_mac,
 	.ndo_set_vf_vlan	= be_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= be_set_vf_tx_rate,
+	.ndo_set_vf_rate	= be_set_vf_tx_rate,
 	.ndo_get_vf_config	= be_get_vf_config,
 	.ndo_set_vf_link_state  = be_set_vf_link_state,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index e0e5c6a867b1..96f7fabd8758 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6724,7 +6724,7 @@ static const struct net_device_ops i40e_netdev_ops = {
 	.ndo_set_features	= i40e_set_features,
 	.ndo_set_vf_mac		= i40e_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= i40e_ndo_set_vf_port_vlan,
-	.ndo_set_vf_tx_rate	= i40e_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= i40e_ndo_set_vf_bw,
 	.ndo_get_vf_config	= i40e_ndo_get_vf_config,
 	.ndo_set_vf_link_state	= i40e_ndo_set_vf_link_state,
 #ifdef CONFIG_I40E_VXLAN
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4d219566a04d..8564b0939dc4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -2205,7 +2205,8 @@ error_pvid:
  *
  * configure vf tx rate
  **/
-int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
@@ -2221,6 +2222,12 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
 		goto error;
 	}
 
+	if (min_tx_rate) {
+		dev_err(&pf->pdev->dev, "Invalid min tx rate (%d) (greater than 0) specified for vf %d.\n",
+			min_tx_rate, vf_id);
+		return -EINVAL;
+	}
+
 	vf = &(pf->vf[vf_id]);
 	vsi = pf->vsi[vf->lan_vsi_index];
 	if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
@@ -2243,23 +2250,23 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
 		break;
 	}
 
-	if (tx_rate > speed) {
-		dev_err(&pf->pdev->dev, "Invalid tx rate %d specified for vf %d.",
-			tx_rate, vf->vf_id);
+	if (max_tx_rate > speed) {
+		dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for vf %d.",
+			max_tx_rate, vf->vf_id);
 		ret = -EINVAL;
 		goto error;
 	}
 
 	/* Tx rate credits are in values of 50Mbps, 0 is disabled*/
-	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, tx_rate / 50, 0,
-					  NULL);
+	ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid, max_tx_rate / 50,
+					  0, NULL);
 	if (ret) {
-		dev_err(&pf->pdev->dev, "Unable to set tx rate, error code %d.\n",
+		dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n",
 			ret);
 		ret = -EIO;
 		goto error;
 	}
-	vf->tx_rate = tx_rate;
+	vf->tx_rate = max_tx_rate;
 error:
 	return ret;
 }
@@ -2301,7 +2308,8 @@ int i40e_ndo_get_vf_config(struct net_device *netdev,
 
 	memcpy(&ivi->mac, vf->default_lan_addr.addr, ETH_ALEN);
 
-	ivi->tx_rate = vf->tx_rate;
+	ivi->max_tx_rate = vf->tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK;
 	ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >>
 		   I40E_VLAN_PRIORITY_SHIFT;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index ba3d1f8414be..5a559be4ba2c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -116,7 +116,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
 int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
 int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
 			      int vf_id, u16 vlan_id, u8 qos);
-int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate);
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
+		       int max_tx_rate);
 int i40e_ndo_get_vf_config(struct net_device *netdev,
 			   int vf_id, struct ifla_vf_info *ivi);
 int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index bfcda8a455f4..1075b3f8c415 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -169,7 +169,7 @@ static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
 			       int vf, u16 vlan, u8 qos);
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+static int igb_ndo_set_vf_bw(struct net_device *, int, int, int);
 static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf,
 				   bool setting);
 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
@@ -2084,7 +2084,7 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_vlan_rx_kill_vid	= igb_vlan_rx_kill_vid,
 	.ndo_set_vf_mac		= igb_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= igb_ndo_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= igb_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= igb_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= igb_ndo_set_vf_spoofchk,
 	.ndo_get_vf_config	= igb_ndo_get_vf_config,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -7879,7 +7879,8 @@ static void igb_check_vf_rate_limit(struct igb_adapter *adapter)
 	}
 }
 
-static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf,
+			     int min_tx_rate, int max_tx_rate)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -7888,15 +7889,19 @@ static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
 	if (hw->mac.type != e1000_82576)
 		return -EOPNOTSUPP;
 
+	if (min_tx_rate)
+		return -EINVAL;
+
 	actual_link_speed = igb_link_mbps(adapter->link_speed);
 	if ((vf >= adapter->vfs_allocated_count) ||
 	    (!(rd32(E1000_STATUS) & E1000_STATUS_LU)) ||
-	    (tx_rate < 0) || (tx_rate > actual_link_speed))
+	    (max_tx_rate < 0) ||
+	    (max_tx_rate > actual_link_speed))
 		return -EINVAL;
 
 	adapter->vf_rate_link_speed = actual_link_speed;
-	adapter->vf_data[vf].tx_rate = (u16)tx_rate;
-	igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed);
+	adapter->vf_data[vf].tx_rate = (u16)max_tx_rate;
+	igb_set_vf_rate_limit(hw, vf, max_tx_rate, actual_link_speed);
 
 	return 0;
 }
@@ -7936,7 +7941,8 @@ static int igb_ndo_get_vf_config(struct net_device *netdev,
 		return -EINVAL;
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->max_tx_rate = adapter->vf_data[vf].tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = adapter->vf_data[vf].pf_vlan;
 	ivi->qos = adapter->vf_data[vf].pf_qos;
 	ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8089ea9f2fba..a5332389620a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7926,7 +7926,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_do_ioctl		= ixgbe_ioctl,
 	.ndo_set_vf_mac		= ixgbe_ndo_set_vf_mac,
 	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
-	.ndo_set_vf_tx_rate	= ixgbe_ndo_set_vf_bw,
+	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
 	.ndo_get_stats64	= ixgbe_get_stats64,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index a01417c06620..3248e208c9dc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1222,7 +1222,8 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
 	}
 }
 
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	int link_speed;
@@ -1240,13 +1241,16 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
 	if (link_speed != 10000)
 		return -EINVAL;
 
+	if (min_tx_rate)
+		return -EINVAL;
+
 	/* rate limit cannot be less than 10Mbs or greater than link speed */
-	if (tx_rate && ((tx_rate <= 10) || (tx_rate > link_speed)))
+	if (max_tx_rate && ((max_tx_rate <= 10) || (max_tx_rate > link_speed)))
 		return -EINVAL;
 
 	/* store values */
 	adapter->vf_rate_link_speed = link_speed;
-	adapter->vfinfo[vf].tx_rate = tx_rate;
+	adapter->vfinfo[vf].tx_rate = max_tx_rate;
 
 	/* update hardware configuration */
 	ixgbe_set_vf_rate_limit(adapter, vf);
@@ -1288,7 +1292,8 @@ int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 		return -EINVAL;
 	ivi->vf = vf;
 	memcpy(&ivi->mac, adapter->vfinfo[vf].vf_mac_addresses, ETH_ALEN);
-	ivi->tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->max_tx_rate = adapter->vfinfo[vf].tx_rate;
+	ivi->min_tx_rate = 0;
 	ivi->vlan = adapter->vfinfo[vf].pf_vlan;
 	ivi->qos = adapter->vfinfo[vf].pf_qos;
 	ivi->spoofchk = adapter->vfinfo[vf].spoofchk_enabled;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index cea640147604..32c26d586c01 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -44,7 +44,8 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
 			   u8 qos);
-int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
+int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
+			int max_tx_rate);
 int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 			    int vf, struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 59c7fd406805..ca8e7cb5a8e4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2486,11 +2486,12 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
 	ivf->mac[4] = ((s_info->mac >> (1*8)) & 0xff);
 	ivf->mac[5] = ((s_info->mac)  & 0xff);
 
-	ivf->vlan	= s_info->default_vlan;
-	ivf->qos	= s_info->default_qos;
-	ivf->tx_rate	= s_info->tx_rate;
-	ivf->spoofchk	= s_info->spoofchk;
-	ivf->linkstate	= s_info->link_state;
+	ivf->vlan		= s_info->default_vlan;
+	ivf->qos		= s_info->default_qos;
+	ivf->max_tx_rate	= s_info->tx_rate;
+	ivf->min_tx_rate	= 0;
+	ivf->spoofchk		= s_info->spoofchk;
+	ivf->linkstate		= s_info->link_state;
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 6e7527e2b595..41abe6070466 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1319,6 +1319,7 @@ struct qlcnic_eswitch {
 #define QL_STATUS_INVALID_PARAM	-1
 
 #define MAX_BW			100	/* % of link speed */
+#define MIN_BW			1	/* % of link speed */
 #define MAX_VLAN_ID		4095
 #define MIN_VLAN_ID		2
 #define DEFAULT_MAC_LEARN	1
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index f0a285359e66..f06ba90b4282 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -525,7 +525,7 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 #endif
 #ifdef CONFIG_QLCNIC_SRIOV
 	.ndo_set_vf_mac		= qlcnic_sriov_set_vf_mac,
-	.ndo_set_vf_tx_rate	= qlcnic_sriov_set_vf_tx_rate,
+	.ndo_set_vf_rate	= qlcnic_sriov_set_vf_tx_rate,
 	.ndo_get_vf_config	= qlcnic_sriov_get_vf_config,
 	.ndo_set_vf_vlan	= qlcnic_sriov_set_vf_vlan,
 	.ndo_set_vf_spoofchk	= qlcnic_sriov_set_vf_spoofchk,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
index 335b50f7bd3e..4677b2edccca 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h
@@ -233,7 +233,7 @@ bool qlcnic_sriov_soft_flr_check(struct qlcnic_adapter *,
 void qlcnic_sriov_pf_reset(struct qlcnic_adapter *);
 int qlcnic_sriov_pf_reinit(struct qlcnic_adapter *);
 int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *);
-int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int);
+int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int);
 int qlcnic_sriov_get_vf_config(struct net_device *, int ,
 			       struct ifla_vf_info *);
 int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 498fa6350c8d..2bdd9deffb38 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -201,6 +201,7 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs)
 			sriov->vf_info[i].vp = vp;
 			vp->vlan_mode = QLC_GUEST_VLAN_MODE;
 			vp->max_tx_bw = MAX_BW;
+			vp->min_tx_bw = MIN_BW;
 			vp->spoofchk = false;
 			random_ether_addr(vp->mac);
 			dev_info(&adapter->pdev->dev,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index 6d2f72f114f2..a29538b86edf 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -1848,7 +1848,8 @@ int qlcnic_sriov_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	return 0;
 }
 
-int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate)
+int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf,
+				int min_tx_rate, int max_tx_rate)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
@@ -1863,35 +1864,52 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, int tx_rate)
 	if (vf >= sriov->num_vfs)
 		return -EINVAL;
 
-	if (tx_rate >= 10000 || tx_rate < 100) {
+	vf_info = &sriov->vf_info[vf];
+	vp = vf_info->vp;
+	vpid = vp->handle;
+
+	if (!min_tx_rate)
+		min_tx_rate = QLC_VF_MIN_TX_RATE;
+
+	if (max_tx_rate &&
+	    (max_tx_rate >= 10000 || max_tx_rate < min_tx_rate)) {
 		netdev_err(netdev,
-			   "Invalid Tx rate, allowed range is [%d - %d]",
-			   QLC_VF_MIN_TX_RATE, QLC_VF_MAX_TX_RATE);
+			   "Invalid max Tx rate, allowed range is [%d - %d]",
+			   min_tx_rate, QLC_VF_MAX_TX_RATE);
 		return -EINVAL;
 	}
 
-	if (tx_rate == 0)
-		tx_rate = 10000;
+	if (!max_tx_rate)
+		max_tx_rate = 10000;
 
-	vf_info = &sriov->vf_info[vf];
-	vp = vf_info->vp;
-	vpid = vp->handle;
+	if (min_tx_rate &&
+	    (min_tx_rate > max_tx_rate || min_tx_rate < QLC_VF_MIN_TX_RATE)) {
+		netdev_err(netdev,
+			   "Invalid min Tx rate, allowed range is [%d - %d]",
+			   QLC_VF_MIN_TX_RATE, max_tx_rate);
+		return -EINVAL;
+	}
 
 	if (test_bit(QLC_BC_VF_STATE, &vf_info->state)) {
 		if (qlcnic_sriov_get_vf_vport_info(adapter, &nic_info, vpid))
 			return -EIO;
 
-		nic_info.max_tx_bw = tx_rate / 100;
+		nic_info.max_tx_bw = max_tx_rate / 100;
+		nic_info.min_tx_bw = min_tx_rate / 100;
 		nic_info.bit_offsets = BIT_0;
 
 		if (qlcnic_sriov_pf_set_vport_info(adapter, &nic_info, vpid))
 			return -EIO;
 	}
 
-	vp->max_tx_bw = tx_rate / 100;
+	vp->max_tx_bw = max_tx_rate / 100;
+	netdev_info(netdev,
+		    "Setting Max Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
+		    max_tx_rate, vp->max_tx_bw, vf);
+	vp->min_tx_bw = min_tx_rate / 100;
 	netdev_info(netdev,
-		    "Setting Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
-		    tx_rate, vp->max_tx_bw, vf);
+		    "Setting Min Tx rate %d (Mbps), %d %% of PF bandwidth, for VF %d\n",
+		    min_tx_rate, vp->min_tx_bw, vf);
 	return 0;
 }
 
@@ -1990,9 +2008,13 @@ int qlcnic_sriov_get_vf_config(struct net_device *netdev,
 	ivi->qos = vp->qos;
 	ivi->spoofchk = vp->spoofchk;
 	if (vp->max_tx_bw == MAX_BW)
-		ivi->tx_rate = 0;
+		ivi->max_tx_rate = 0;
+	else
+		ivi->max_tx_rate = vp->max_tx_bw * 100;
+	if (vp->min_tx_bw == MIN_BW)
+		ivi->min_tx_rate = 0;
 	else
-		ivi->tx_rate = vp->max_tx_bw * 100;
+		ivi->min_tx_rate = vp->min_tx_bw * 100;
 
 	ivi->vf = vf;
 	return 0;
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index 9a9205e77896..43d2e64546ed 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -1633,7 +1633,8 @@ int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i,
 
 	ivi->vf = vf_i;
 	ether_addr_copy(ivi->mac, vf->addr.mac_addr);
-	ivi->tx_rate = 0;
+	ivi->max_tx_rate = 0;
+	ivi->min_tx_rate = 0;
 	tci = ntohs(vf->addr.tci);
 	ivi->vlan = tci & VLAN_VID_MASK;
 	ivi->qos = (tci >> VLAN_PRIO_SHIFT) & 0x7;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index a86784dec3d3..119130e9298b 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -10,8 +10,9 @@ struct ifla_vf_info {
 	__u8 mac[32];
 	__u32 vlan;
 	__u32 qos;
-	__u32 tx_rate;
 	__u32 spoofchk;
 	__u32 linkstate;
+	__u32 min_tx_rate;
+	__u32 max_tx_rate;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f4ad247fd324..9ec3a945caf2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -850,7 +850,8 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	SR-IOV management functions.
  * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac);
  * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos);
- * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
+ * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
+ *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
@@ -1044,8 +1045,9 @@ struct net_device_ops {
 						  int queue, u8 *mac);
 	int			(*ndo_set_vf_vlan)(struct net_device *dev,
 						   int queue, u16 vlan, u8 qos);
-	int			(*ndo_set_vf_tx_rate)(struct net_device *dev,
-						      int vf, int rate);
+	int			(*ndo_set_vf_rate)(struct net_device *dev,
+						   int vf, int min_tx_rate,
+						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 9a7f7ace6649..622e7910b8cc 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -399,9 +399,10 @@ enum {
 	IFLA_VF_UNSPEC,
 	IFLA_VF_MAC,		/* Hardware queue specific attributes */
 	IFLA_VF_VLAN,
-	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	IFLA_VF_TX_RATE,	/* Max TX Bandwidth Allocation */
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
+	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
 	__IFLA_VF_MAX,
 };
 
@@ -423,6 +424,12 @@ struct ifla_vf_tx_rate {
 	__u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
 };
 
+struct ifla_vf_rate {
+	__u32 vf;
+	__u32 min_tx_rate; /* Min Bandwidth in Mbps */
+	__u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
 struct ifla_vf_spoofchk {
 	__u32 vf;
 	__u32 setting;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 9837bebf93ce..d6417464dc66 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -767,8 +767,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 		size += num_vfs *
 			(nla_total_size(sizeof(struct ifla_vf_mac)) +
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
-			 nla_total_size(sizeof(struct ifla_vf_tx_rate)) +
-			 nla_total_size(sizeof(struct ifla_vf_spoofchk)));
+			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
+			 nla_total_size(sizeof(struct ifla_vf_rate)));
 		return size;
 	} else
 		return 0;
@@ -1034,6 +1034,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_info ivi;
 			struct ifla_vf_mac vf_mac;
 			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_rate vf_rate;
 			struct ifla_vf_tx_rate vf_tx_rate;
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
@@ -1054,6 +1055,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				break;
 			vf_mac.vf =
 				vf_vlan.vf =
+				vf_rate.vf =
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
 				vf_linkstate.vf = ivi.vf;
@@ -1061,7 +1063,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
 			vf_vlan.qos = ivi.qos;
-			vf_tx_rate.rate = ivi.tx_rate;
+			vf_tx_rate.rate = ivi.max_tx_rate;
+			vf_rate.min_tx_rate = ivi.min_tx_rate;
+			vf_rate.max_tx_rate = ivi.max_tx_rate;
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
@@ -1071,6 +1075,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			}
 			if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) ||
 			    nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) ||
+			    nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate),
+				    &vf_rate) ||
 			    nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate),
 				    &vf_tx_rate) ||
 			    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
@@ -1177,6 +1183,8 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_tx_rate) },
 	[IFLA_VF_SPOOFCHK]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct ifla_vf_spoofchk) },
+	[IFLA_VF_RATE]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_rate) },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1336,11 +1344,29 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 		}
 		case IFLA_VF_TX_RATE: {
 			struct ifla_vf_tx_rate *ivt;
+			struct ifla_vf_info ivf;
 			ivt = nla_data(vf);
 			err = -EOPNOTSUPP;
-			if (ops->ndo_set_vf_tx_rate)
-				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
-							      ivt->rate);
+			if (ops->ndo_get_vf_config)
+				err = ops->ndo_get_vf_config(dev, ivt->vf,
+							     &ivf);
+			if (err)
+				break;
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rate)
+				err = ops->ndo_set_vf_rate(dev, ivt->vf,
+							   ivf.min_tx_rate,
+							   ivt->rate);
+			break;
+		}
+		case IFLA_VF_RATE: {
+			struct ifla_vf_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rate)
+				err = ops->ndo_set_vf_rate(dev, ivt->vf,
+							   ivt->min_tx_rate,
+							   ivt->max_tx_rate);
 			break;
 		}
 		case IFLA_VF_SPOOFCHK: {
-- 
cgit 


From edf866b3805c5651bf7d035b72dc0190cb6ff4a7 Mon Sep 17 00:00:00 2001
From: Sam Bradshaw <sbradshaw@micron.com>
Date: Fri, 23 May 2014 13:30:16 -0600
Subject: blk-mq: export blk_mq_tag_busy_iter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Export the blk-mq in-flight tag iterator for driver consumption.
This is particularly useful in exception paths or SRSI where
in-flight IOs need to be cancelled and/or reissued. The NVMe driver
conversion will use this.

Signed-off-by: Sam Bradshaw <sbradshaw@micron.com>
Signed-off-by: Matias Bjørling <m@bjorling.me>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c     | 1 +
 block/blk-mq-tag.h     | 1 -
 include/linux/blk-mq.h | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index f6dea968b710..05e2baf4fa0d 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -400,6 +400,7 @@ void blk_mq_tag_busy_iter(struct blk_mq_tags *tags,
 	fn(data, tag_map);
 	kfree(tag_map);
 }
+EXPORT_SYMBOL(blk_mq_tag_busy_iter);
 
 static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt)
 {
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index e7ff5ceeeb97..2e5e6872d089 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -51,7 +51,6 @@ extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved);
 extern void blk_mq_wait_for_tags(struct blk_mq_hw_ctx *hctx, bool reserved);
 extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag);
-extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
 extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
 extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
 extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4d2800567aad..f76bb18350af 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -181,6 +181,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
+void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
 
 /*
  * Driver command data is immediately after the request. So subtract request
-- 
cgit 


From 1c19448c9ba6545b80ded18488a64a7f3d8e6998 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 23 May 2014 08:47:32 -0700
Subject: net: Make enabling of zero UDP6 csums more restrictive

RFC 6935 permits zero checksums to be used in IPv6 however this is
recommended only for certain tunnel protocols, it does not make
checksums completely optional like they are in IPv4.

This patch restricts the use of IPv6 zero checksums that was previously
intoduced. no_check6_tx and no_check6_rx have been added to control
the use of checksums in UDP6 RX and TX path. The normal
sk_no_check_{rx,tx} settings are not used (this avoids ambiguity when
dealing with a dual stack socket).

A helper function has been added (udp_set_no_check6) which can be
called by tunnel impelmentations to all zero checksums (send on the
socket, and accept them as valid).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h      | 24 +++++++++++++++++++++++-
 include/uapi/linux/udp.h |  2 ++
 net/ipv4/udp.c           | 20 +++++++++++++++++++-
 net/ipv6/udp.c           |  8 ++++----
 4 files changed, 48 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 42278bbf7a88..247cfdcc4b08 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -47,7 +47,9 @@ struct udp_sock {
 #define udp_portaddr_node	inet.sk.__sk_common.skc_portaddr_node
 	int		 pending;	/* Any pending frames ? */
 	unsigned int	 corkflag;	/* Cork is required */
-  	__u16		 encap_type;	/* Is this an Encapsulation socket? */
+	__u8		 encap_type;	/* Is this an Encapsulation socket? */
+	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
+			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -76,6 +78,26 @@ static inline struct udp_sock *udp_sk(const struct sock *sk)
 	return (struct udp_sock *)sk;
 }
 
+static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
+{
+	udp_sk(sk)->no_check6_tx = val;
+}
+
+static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
+{
+	udp_sk(sk)->no_check6_rx = val;
+}
+
+static inline bool udp_get_no_check6_tx(struct sock *sk)
+{
+	return udp_sk(sk)->no_check6_tx;
+}
+
+static inline bool udp_get_no_check6_rx(struct sock *sk)
+{
+	return udp_sk(sk)->no_check6_rx;
+}
+
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index e2bcfd75a30d..16574ea18f0c 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -29,6 +29,8 @@ struct udphdr {
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
+#define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
+#define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */
 
 /* UDP encapsulation types */
 #define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 12c6175b29cd..e07d52b8617a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1968,7 +1968,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		       int (*push_pending_frames)(struct sock *))
 {
 	struct udp_sock *up = udp_sk(sk);
-	int val;
+	int val, valbool;
 	int err = 0;
 	int is_udplite = IS_UDPLITE(sk);
 
@@ -1978,6 +1978,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 	if (get_user(val, (int __user *)optval))
 		return -EFAULT;
 
+	valbool = val ? 1 : 0;
+
 	switch (optname) {
 	case UDP_CORK:
 		if (val != 0) {
@@ -2007,6 +2009,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		}
 		break;
 
+	case UDP_NO_CHECK6_TX:
+		up->no_check6_tx = valbool;
+		break;
+
+	case UDP_NO_CHECK6_RX:
+		up->no_check6_rx = valbool;
+		break;
+
 	/*
 	 * 	UDP-Lite's partial checksum coverage (RFC 3828).
 	 */
@@ -2089,6 +2099,14 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		val = up->encap_type;
 		break;
 
+	case UDP_NO_CHECK6_TX:
+		val = up->no_check6_tx;
+		break;
+
+	case UDP_NO_CHECK6_RX:
+		val = up->no_check6_rx;
+		break;
+
 	/* The following two cannot be changed on UDP sockets, the return is
 	 * always 0 (which corresponds to the full checksum coverage of UDP). */
 	case UDPLITE_SEND_CSCOV:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b8db453133aa..60325236446a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -794,10 +794,10 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 	dif = inet6_iif(skb);
 	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
 	while (sk) {
-		/* If zero checksum and sk_no_check is not on for
+		/* If zero checksum and no_check is not on for
 		 * the socket then skip it.
 		 */
-		if (uh->check || sk->sk_no_check_rx)
+		if (uh->check || udp_sk(sk)->no_check6_rx)
 			stack[count++] = sk;
 
 		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
@@ -887,7 +887,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
-		if (!uh->check && !sk->sk_no_check_rx) {
+		if (!uh->check && !udp_sk(sk)->no_check6_rx) {
 			sock_put(sk);
 			udp6_csum_zero_error(skb);
 			goto csum_error;
@@ -1037,7 +1037,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
 
 	if (is_udplite)
 		csum = udplite_csum_outgoing(sk, skb);
-	else if (sk->sk_no_check_tx) {   /* UDP csum disabled */
+	else if (up->no_check6_tx) {   /* UDP csum disabled */
 		skb->ip_summed = CHECKSUM_NONE;
 		goto send;
 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
-- 
cgit 


From 79c6ab509558f9871175c7e4411f857de12cf33b Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Fri, 23 May 2014 18:32:15 +0530
Subject: clk: divider: add CLK_DIVIDER_READ_ONLY flag

From: Heiko Stuebner <heiko@sntech.de>

Similar to muxes which already have a read-only flag there sometimes
exist dividers which should not be changed by the clock framework
but whose value still should be readable.

Therefore add a READ_ONLY flag similar to the mux-one to clk-divider

Signed-off-by: Heiko Stuebner <heiko@sntech.de>
[changed flag bit to BIT(5) as suggested by Tomasz Figa]
Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
Acked-by: Tomasz Figa <t.figa@samsung.com>
Acked-by: Max Schwarz <max.schwarz@online.de>
Tested-by: Max Schwarz <max.schwarz@online.de>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-divider.c    | 10 +++++++++-
 include/linux/clk-provider.h |  4 ++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index b3c83966be18..c9343f5d9918 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -361,6 +361,11 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
+const struct clk_ops clk_divider_ro_ops = {
+	.recalc_rate = clk_divider_recalc_rate,
+};
+EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
+
 static struct clk *_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -386,7 +391,10 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	}
 
 	init.name = name;
-	init.ops = &clk_divider_ops;
+	if (clk_divider_flags & CLK_DIVIDER_READ_ONLY)
+		init.ops = &clk_divider_ro_ops;
+	else
+		init.ops = &clk_divider_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = (parent_name ? &parent_name: NULL);
 	init.num_parents = (parent_name ? 1 : 0);
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 40809431641e..c7135dbbcd65 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -320,6 +320,8 @@ struct clk_div_table {
  *	updated to indicate changing divider bits.
  * CLK_DIVIDER_ROUND_CLOSEST - Makes the best calculated divider to be rounded
  *	to the closest integer instead of the up one.
+ * CLK_DIVIDER_READ_ONLY - The divider settings are preconfigured and should
+ *	not be changed by the clock framework.
  */
 struct clk_divider {
 	struct clk_hw	hw;
@@ -336,8 +338,10 @@ struct clk_divider {
 #define CLK_DIVIDER_ALLOW_ZERO		BIT(2)
 #define CLK_DIVIDER_HIWORD_MASK		BIT(3)
 #define CLK_DIVIDER_ROUND_CLOSEST	BIT(4)
+#define CLK_DIVIDER_READ_ONLY		BIT(5)
 
 extern const struct clk_ops clk_divider_ops;
+extern const struct clk_ops clk_divider_ro_ops;
 struct clk *clk_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
-- 
cgit 


From 8556ce79d5986a87fee4c29300b4efee07c0f15e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 23 May 2014 18:43:57 +0200
Subject: net: filter: remove DL macro

Lets get rid of this macro. After commit 5bcfedf06f7f ("net: filter:
simplify label names from jump-table"), labels have become more
readable due to omission of BPF_ prefix but at the same time more
generic, so that things like `git grep -n` would not find them. As
a middle path, lets get rid of the DL macro as it's not strictly
needed and would otherwise just hide the full name.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |   3 -
 net/core/filter.c      | 193 +++++++++++++++++++++++++------------------------
 2 files changed, 99 insertions(+), 97 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7977b3958e25..2b0056afd1f7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -37,9 +37,6 @@
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
-/* Placeholder/dummy for 0 */
-#define BPF_0		0
-
 /* Register numbers */
 enum {
 	BPF_REG_0 = 0,
diff --git a/net/core/filter.c b/net/core/filter.c
index 7067cb240d3e..b3b0889fe089 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -160,95 +160,100 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins
 	static const void *jumptable[256] = {
 		[0 ... 255] = &&default_label,
 		/* Now overwrite non-defaults ... */
-#define DL(A, B, C)	[BPF_##A|BPF_##B|BPF_##C] = &&A##_##B##_##C
-		DL(ALU, ADD, X),
-		DL(ALU, ADD, K),
-		DL(ALU, SUB, X),
-		DL(ALU, SUB, K),
-		DL(ALU, AND, X),
-		DL(ALU, AND, K),
-		DL(ALU, OR, X),
-		DL(ALU, OR, K),
-		DL(ALU, LSH, X),
-		DL(ALU, LSH, K),
-		DL(ALU, RSH, X),
-		DL(ALU, RSH, K),
-		DL(ALU, XOR, X),
-		DL(ALU, XOR, K),
-		DL(ALU, MUL, X),
-		DL(ALU, MUL, K),
-		DL(ALU, MOV, X),
-		DL(ALU, MOV, K),
-		DL(ALU, DIV, X),
-		DL(ALU, DIV, K),
-		DL(ALU, MOD, X),
-		DL(ALU, MOD, K),
-		DL(ALU, NEG, 0),
-		DL(ALU, END, TO_BE),
-		DL(ALU, END, TO_LE),
-		DL(ALU64, ADD, X),
-		DL(ALU64, ADD, K),
-		DL(ALU64, SUB, X),
-		DL(ALU64, SUB, K),
-		DL(ALU64, AND, X),
-		DL(ALU64, AND, K),
-		DL(ALU64, OR, X),
-		DL(ALU64, OR, K),
-		DL(ALU64, LSH, X),
-		DL(ALU64, LSH, K),
-		DL(ALU64, RSH, X),
-		DL(ALU64, RSH, K),
-		DL(ALU64, XOR, X),
-		DL(ALU64, XOR, K),
-		DL(ALU64, MUL, X),
-		DL(ALU64, MUL, K),
-		DL(ALU64, MOV, X),
-		DL(ALU64, MOV, K),
-		DL(ALU64, ARSH, X),
-		DL(ALU64, ARSH, K),
-		DL(ALU64, DIV, X),
-		DL(ALU64, DIV, K),
-		DL(ALU64, MOD, X),
-		DL(ALU64, MOD, K),
-		DL(ALU64, NEG, 0),
-		DL(JMP, CALL, 0),
-		DL(JMP, JA, 0),
-		DL(JMP, JEQ, X),
-		DL(JMP, JEQ, K),
-		DL(JMP, JNE, X),
-		DL(JMP, JNE, K),
-		DL(JMP, JGT, X),
-		DL(JMP, JGT, K),
-		DL(JMP, JGE, X),
-		DL(JMP, JGE, K),
-		DL(JMP, JSGT, X),
-		DL(JMP, JSGT, K),
-		DL(JMP, JSGE, X),
-		DL(JMP, JSGE, K),
-		DL(JMP, JSET, X),
-		DL(JMP, JSET, K),
-		DL(JMP, EXIT, 0),
-		DL(STX, MEM, B),
-		DL(STX, MEM, H),
-		DL(STX, MEM, W),
-		DL(STX, MEM, DW),
-		DL(STX, XADD, W),
-		DL(STX, XADD, DW),
-		DL(ST, MEM, B),
-		DL(ST, MEM, H),
-		DL(ST, MEM, W),
-		DL(ST, MEM, DW),
-		DL(LDX, MEM, B),
-		DL(LDX, MEM, H),
-		DL(LDX, MEM, W),
-		DL(LDX, MEM, DW),
-		DL(LD, ABS, W),
-		DL(LD, ABS, H),
-		DL(LD, ABS, B),
-		DL(LD, IND, W),
-		DL(LD, IND, H),
-		DL(LD, IND, B),
-#undef DL
+		/* 32 bit ALU operations */
+		[BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
+		[BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
+		[BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
+		[BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
+		[BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
+		[BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
+		[BPF_ALU | BPF_OR | BPF_X]  = &&ALU_OR_X,
+		[BPF_ALU | BPF_OR | BPF_K]  = &&ALU_OR_K,
+		[BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
+		[BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
+		[BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
+		[BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
+		[BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
+		[BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
+		[BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
+		[BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
+		[BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
+		[BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
+		[BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
+		[BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
+		[BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
+		[BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
+		[BPF_ALU | BPF_NEG] = &&ALU_NEG,
+		[BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
+		[BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
+		/* 64 bit ALU operations */
+		[BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
+		[BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
+		[BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
+		[BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
+		[BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
+		[BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
+		[BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
+		[BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
+		[BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
+		[BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
+		[BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
+		[BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
+		[BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
+		[BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
+		[BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
+		[BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
+		[BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
+		[BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
+		[BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
+		[BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
+		[BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
+		[BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
+		[BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
+		[BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
+		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
+		/* Call instruction */
+		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+		/* Jumps */
+		[BPF_JMP | BPF_JA] = &&JMP_JA,
+		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
+		[BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
+		[BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
+		[BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
+		[BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
+		[BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
+		[BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
+		[BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
+		[BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
+		[BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
+		[BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
+		[BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
+		[BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
+		[BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
+		/* Program return */
+		[BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
+		/* Store instructions */
+		[BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
+		[BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
+		[BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
+		[BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
+		[BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
+		[BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
+		[BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
+		[BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
+		[BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
+		[BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
+		/* Load instructions */
+		[BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
+		[BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
+		[BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
+		[BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
+		[BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
+		[BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
+		[BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
+		[BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
+		[BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
+		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
 	};
 	void *ptr;
 	int off;
@@ -290,10 +295,10 @@ select_insn:
 	ALU(XOR,  ^)
 	ALU(MUL,  *)
 #undef ALU
-	ALU_NEG_0:
+	ALU_NEG:
 		A = (u32) -A;
 		CONT;
-	ALU64_NEG_0:
+	ALU64_NEG:
 		A = -A;
 		CONT;
 	ALU_MOV_X:
@@ -382,7 +387,7 @@ select_insn:
 		CONT;
 
 	/* CALL */
-	JMP_CALL_0:
+	JMP_CALL:
 		/* Function call scratches BPF_R1-BPF_R5 registers,
 		 * preserves BPF_R6-BPF_R9, and stores return value
 		 * into BPF_R0.
@@ -392,7 +397,7 @@ select_insn:
 		CONT;
 
 	/* JMP */
-	JMP_JA_0:
+	JMP_JA:
 		insn += insn->off;
 		CONT;
 	JMP_JEQ_X:
@@ -479,7 +484,7 @@ select_insn:
 			CONT_JMP;
 		}
 		CONT;
-	JMP_EXIT_0:
+	JMP_EXIT:
 		return BPF_R0;
 
 	/* STX and ST and LDX*/
-- 
cgit 


From b1fcd35cf53553a0a3ef949b05106d921446abc3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Fri, 23 May 2014 18:43:58 +0200
Subject: net: filter: let unattached filters use sock_fprog_kern

The sk_unattached_filter_create() API is used by BPF filters that
are not directly attached or related to sockets, and are used in
team, ptp, xt_bpf, cls_bpf, etc. As such all users do their own
internal managment of obtaining filter blocks and thus already
have them in kernel memory and set up before calling into
sk_unattached_filter_create(). As a result, due to __user annotation
in sock_fprog, sparse triggers false positives (incorrect type in
assignment [different address space]) when filters are set up before
passing them to sk_unattached_filter_create(). Therefore, let
sk_unattached_filter_create() API use sock_fprog_kern to overcome
this issue.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/i4l/isdn_ppp.c              |  4 ++--
 drivers/net/ppp/ppp_generic.c            |  4 ++--
 drivers/net/team/team_mode_loadbalance.c | 10 +++++-----
 include/linux/filter.h                   |  2 +-
 lib/test_bpf.c                           |  2 +-
 net/core/filter.c                        |  2 +-
 net/core/ptp_classifier.c                |  2 +-
 net/netfilter/xt_bpf.c                   |  5 +++--
 net/sched/cls_bpf.c                      |  4 ++--
 9 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/i4l/isdn_ppp.c b/drivers/isdn/i4l/isdn_ppp.c
index a5da511e3c9a..61ac63237446 100644
--- a/drivers/isdn/i4l/isdn_ppp.c
+++ b/drivers/isdn/i4l/isdn_ppp.c
@@ -634,7 +634,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_IPPP_FILTER
 	case PPPIOCSPASS:
 	{
-		struct sock_fprog fprog;
+		struct sock_fprog_kern fprog;
 		struct sock_filter *code;
 		int err, len = get_filter(argp, &code);
 
@@ -653,7 +653,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg)
 	}
 	case PPPIOCSACTIVE:
 	{
-		struct sock_fprog fprog;
+		struct sock_fprog_kern fprog;
 		struct sock_filter *code;
 		int err, len = get_filter(argp, &code);
 
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index e3923ebb693f..91d6c1272fcf 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -757,7 +757,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		err = get_filter(argp, &code);
 		if (err >= 0) {
-			struct sock_fprog fprog = {
+			struct sock_fprog_kern fprog = {
 				.len = err,
 				.filter = code,
 			};
@@ -778,7 +778,7 @@ static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 
 		err = get_filter(argp, &code);
 		if (err >= 0) {
-			struct sock_fprog fprog = {
+			struct sock_fprog_kern fprog = {
 				.len = err,
 				.filter = code,
 			};
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index dbde3412ee5e..0a6ee07bf0af 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -49,7 +49,7 @@ struct lb_port_mapping {
 struct lb_priv_ex {
 	struct team *team;
 	struct lb_port_mapping tx_hash_to_port_mapping[LB_TX_HASHTABLE_SIZE];
-	struct sock_fprog *orig_fprog;
+	struct sock_fprog_kern *orig_fprog;
 	struct {
 		unsigned int refresh_interval; /* in tenths of second */
 		struct delayed_work refresh_dw;
@@ -241,10 +241,10 @@ static int lb_bpf_func_get(struct team *team, struct team_gsetter_ctx *ctx)
 	return 0;
 }
 
-static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
+static int __fprog_create(struct sock_fprog_kern **pfprog, u32 data_len,
 			  const void *data)
 {
-	struct sock_fprog *fprog;
+	struct sock_fprog_kern *fprog;
 	struct sock_filter *filter = (struct sock_filter *) data;
 
 	if (data_len % sizeof(struct sock_filter))
@@ -262,7 +262,7 @@ static int __fprog_create(struct sock_fprog **pfprog, u32 data_len,
 	return 0;
 }
 
-static void __fprog_destroy(struct sock_fprog *fprog)
+static void __fprog_destroy(struct sock_fprog_kern *fprog)
 {
 	kfree(fprog->filter);
 	kfree(fprog);
@@ -273,7 +273,7 @@ static int lb_bpf_func_set(struct team *team, struct team_gsetter_ctx *ctx)
 	struct lb_priv *lb_priv = get_lb_priv(team);
 	struct sk_filter *fp = NULL;
 	struct sk_filter *orig_fp;
-	struct sock_fprog *fprog = NULL;
+	struct sock_fprog_kern *fprog = NULL;
 	int err;
 
 	if (ctx->data.bin_val.len) {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2b0056afd1f7..625f4de9bdf2 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -188,7 +188,7 @@ int sk_convert_filter(struct sock_filter *prog, int len,
 		      struct sock_filter_int *new_prog, int *new_len);
 
 int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog *fprog);
+				struct sock_fprog_kern *fprog);
 void sk_unattached_filter_destroy(struct sk_filter *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 3d80adbdb559..e03991ea8cc2 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1472,7 +1472,7 @@ static int run_one(struct sk_filter *fp, struct bpf_test *t)
 static __init int test_bpf(void)
 {
 	struct sk_filter *fp, *fp_ext = NULL;
-	struct sock_fprog fprog;
+	struct sock_fprog_kern fprog;
 	int err, i, err_cnt = 0;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
diff --git a/net/core/filter.c b/net/core/filter.c
index b3b0889fe089..2c2d35d9d101 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1585,7 +1585,7 @@ static struct sk_filter *__sk_prepare_filter(struct sk_filter *fp,
  * a negative errno code is returned. On success the return is zero.
  */
 int sk_unattached_filter_create(struct sk_filter **pfp,
-				struct sock_fprog *fprog)
+				struct sock_fprog_kern *fprog)
 {
 	unsigned int fsize = sk_filter_proglen(fprog);
 	struct sk_filter *fp;
diff --git a/net/core/ptp_classifier.c b/net/core/ptp_classifier.c
index 37d86157b76e..d3027a73fd4b 100644
--- a/net/core/ptp_classifier.c
+++ b/net/core/ptp_classifier.c
@@ -133,7 +133,7 @@ void __init ptp_classifier_init(void)
 		{ 0x16,  0,  0, 0x00000000 },
 		{ 0x06,  0,  0, 0x00000000 },
 	};
-	struct sock_fprog ptp_prog = {
+	struct sock_fprog_kern ptp_prog = {
 		.len = ARRAY_SIZE(ptp_filter), .filter = ptp_filter,
 	};
 
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 12d4da8e6c77..bbffdbdaf603 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -23,10 +23,11 @@ MODULE_ALIAS("ip6t_bpf");
 static int bpf_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_bpf_info *info = par->matchinfo;
-	struct sock_fprog program;
+	struct sock_fprog_kern program;
 
 	program.len = info->bpf_program_num_elem;
-	program.filter = (struct sock_filter __user *) info->bpf_program;
+	program.filter = info->bpf_program;
+
 	if (sk_unattached_filter_create(&info->filter, &program)) {
 		pr_info("bpf: check failed: parse error\n");
 		return -EINVAL;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 16186965af97..13f64df2c710 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -160,7 +160,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 {
 	struct sock_filter *bpf_ops, *bpf_old;
 	struct tcf_exts exts;
-	struct sock_fprog tmp;
+	struct sock_fprog_kern tmp;
 	struct sk_filter *fp, *fp_old;
 	u16 bpf_size, bpf_len;
 	u32 classid;
@@ -191,7 +191,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
 	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 
 	tmp.len = bpf_len;
-	tmp.filter = (struct sock_filter __user *) bpf_ops;
+	tmp.filter = bpf_ops;
 
 	ret = sk_unattached_filter_create(&fp, &tmp);
 	if (ret)
-- 
cgit 


From 49b2f4c56fbf70ca693d6df1c491f0566d516aea Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Tue, 15 Apr 2014 08:35:25 -0300
Subject: [media] exynos4-is: Remove support for non-dt platforms

All platforms supported by this driver are going to get device tree
support in this kernel release so remove code that would have been
actually not used any more.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <m.chehab@samsung.com>
---
 Documentation/video4linux/fimc.txt                 |  30 --
 MAINTAINERS                                        |   1 -
 drivers/media/platform/exynos4-is/Kconfig          |   3 +-
 drivers/media/platform/exynos4-is/common.c         |   2 +-
 drivers/media/platform/exynos4-is/fimc-core.h      |   2 +-
 drivers/media/platform/exynos4-is/fimc-isp-video.c |   2 +-
 drivers/media/platform/exynos4-is/fimc-isp.h       |   2 +-
 drivers/media/platform/exynos4-is/fimc-lite-reg.c  |   2 +-
 drivers/media/platform/exynos4-is/fimc-lite.c      |   2 +-
 drivers/media/platform/exynos4-is/fimc-lite.h      |   2 +-
 drivers/media/platform/exynos4-is/fimc-reg.c       |   2 +-
 drivers/media/platform/exynos4-is/media-dev.c      | 329 ++-------------------
 drivers/media/platform/exynos4-is/media-dev.h      |   6 +-
 drivers/media/platform/exynos4-is/mipi-csis.c      |  43 +--
 include/linux/platform_data/mipi-csis.h            |  28 --
 include/media/exynos-fimc.h                        | 161 ++++++++++
 include/media/s5p_fimc.h                           | 182 ------------
 17 files changed, 211 insertions(+), 588 deletions(-)
 delete mode 100644 include/linux/platform_data/mipi-csis.h
 create mode 100644 include/media/exynos-fimc.h
 delete mode 100644 include/media/s5p_fimc.h

(limited to 'include/linux')

diff --git a/Documentation/video4linux/fimc.txt b/Documentation/video4linux/fimc.txt
index 7d6e160724bd..e0c6b8bc4743 100644
--- a/Documentation/video4linux/fimc.txt
+++ b/Documentation/video4linux/fimc.txt
@@ -140,39 +140,9 @@ You can either grep through the kernel log to find relevant information, i.e.
 or retrieve the information from /dev/media? with help of the media-ctl tool:
 # media-ctl -p
 
-6. Platform support
-===================
-
-The machine code (arch/arm/plat-samsung and arch/arm/mach-*) must select
-following options:
-
-CONFIG_S5P_DEV_FIMC0       mandatory
-CONFIG_S5P_DEV_FIMC1  \
-CONFIG_S5P_DEV_FIMC2  |    optional
-CONFIG_S5P_DEV_FIMC3  |
-CONFIG_S5P_SETUP_FIMC /
-CONFIG_S5P_DEV_CSIS0  \    optional for MIPI-CSI interface
-CONFIG_S5P_DEV_CSIS1  /
-
-Except that, relevant s5p_device_fimc? should be registered in the machine code
-in addition to a "s5p-fimc-md" platform device to which the media device driver
-is bound.  The "s5p-fimc-md" device instance is required even if only mem-to-mem
-operation is used.
-
-The description of sensor(s) attached to FIMC/MIPI-CSIS camera inputs should be
-passed as the "s5p-fimc-md" device platform_data.  The platform data structure
-is defined in file include/media/s5p_fimc.h.
-
 7. Build
 ========
 
-This driver depends on following config options:
-PLAT_S5P,
-PM_RUNTIME,
-I2C,
-REGULATOR,
-VIDEO_V4L2_SUBDEV_API,
-
 If the driver is built as a loadable kernel module (CONFIG_VIDEO_SAMSUNG_S5P_FIMC=m)
 two modules are created (in addition to the core v4l2 modules): s5p-fimc.ko and
 optional s5p-csis.ko (MIPI-CSI receiver subdev).
diff --git a/MAINTAINERS b/MAINTAINERS
index 129621ed165f..6b7c633a2e98 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7654,7 +7654,6 @@ L:	linux-media@vger.kernel.org
 Q:	https://patchwork.linuxtv.org/project/linux-media/list/
 S:	Supported
 F:	drivers/media/platform/exynos4-is/
-F:	include/media/s5p_fimc.h
 
 SAMSUNG S3C24XX/S3C64XX SOC SERIES CAMIF DRIVER
 M:	Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
diff --git a/drivers/media/platform/exynos4-is/Kconfig b/drivers/media/platform/exynos4-is/Kconfig
index e1b2ceba00c1..5dcaa0a80540 100644
--- a/drivers/media/platform/exynos4-is/Kconfig
+++ b/drivers/media/platform/exynos4-is/Kconfig
@@ -3,6 +3,7 @@ config VIDEO_SAMSUNG_EXYNOS4_IS
 	bool "Samsung S5P/EXYNOS4 SoC series Camera Subsystem driver"
 	depends on VIDEO_V4L2 && VIDEO_V4L2_SUBDEV_API
 	depends on (PLAT_S5P || ARCH_EXYNOS)
+	depends on OF && COMMON_CLK
 	help
 	  Say Y here to enable camera host interface devices for
 	  Samsung S5P and EXYNOS SoC series.
@@ -17,7 +18,7 @@ config VIDEO_S5P_FIMC
 	depends on I2C
 	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_MEM2MEM_DEV
-	select MFD_SYSCON if OF
+	select MFD_SYSCON
 	select VIDEO_EXYNOS4_IS_COMMON
 	help
 	  This is a V4L2 driver for Samsung S5P and EXYNOS4 SoC camera host
diff --git a/drivers/media/platform/exynos4-is/common.c b/drivers/media/platform/exynos4-is/common.c
index 0ec210b4da1d..0eb34ecb8ee4 100644
--- a/drivers/media/platform/exynos4-is/common.c
+++ b/drivers/media/platform/exynos4-is/common.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/module.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 #include "common.h"
 
 /* Called with the media graph mutex held or entity->stream_count > 0. */
diff --git a/drivers/media/platform/exynos4-is/fimc-core.h b/drivers/media/platform/exynos4-is/fimc-core.h
index 1790fb4e32ea..6c75c6ced1f7 100644
--- a/drivers/media/platform/exynos4-is/fimc-core.h
+++ b/drivers/media/platform/exynos4-is/fimc-core.h
@@ -27,7 +27,7 @@
 #include <media/v4l2-device.h>
 #include <media/v4l2-mem2mem.h>
 #include <media/v4l2-mediabus.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #define dbg(fmt, args...) \
 	pr_debug("%s:%d: " fmt "\n", __func__, __LINE__, ##args)
diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c
index ced46600e343..93f9cf2ebcd6 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp-video.c
+++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c
@@ -30,7 +30,7 @@
 #include <media/v4l2-ioctl.h>
 #include <media/videobuf2-core.h>
 #include <media/videobuf2-dma-contig.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #include "common.h"
 #include "media-dev.h"
diff --git a/drivers/media/platform/exynos4-is/fimc-isp.h b/drivers/media/platform/exynos4-is/fimc-isp.h
index 4dc55a18d978..b99be09b49fc 100644
--- a/drivers/media/platform/exynos4-is/fimc-isp.h
+++ b/drivers/media/platform/exynos4-is/fimc-isp.h
@@ -24,7 +24,7 @@
 #include <media/videobuf2-core.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-mediabus.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 extern int fimc_isp_debug;
 
diff --git a/drivers/media/platform/exynos4-is/fimc-lite-reg.c b/drivers/media/platform/exynos4-is/fimc-lite-reg.c
index d0dc7ee04452..bc3ec7d25a32 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite-reg.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite-reg.c
@@ -12,7 +12,7 @@
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/io.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #include "fimc-lite-reg.h"
 #include "fimc-lite.h"
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c
index 630aef52dbb8..a97d2352f1d7 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.c
+++ b/drivers/media/platform/exynos4-is/fimc-lite.c
@@ -30,7 +30,7 @@
 #include <media/v4l2-mem2mem.h>
 #include <media/videobuf2-core.h>
 #include <media/videobuf2-dma-contig.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #include "common.h"
 #include "fimc-core.h"
diff --git a/drivers/media/platform/exynos4-is/fimc-lite.h b/drivers/media/platform/exynos4-is/fimc-lite.h
index 7428b2d22b52..ea19dc7be63e 100644
--- a/drivers/media/platform/exynos4-is/fimc-lite.h
+++ b/drivers/media/platform/exynos4-is/fimc-lite.h
@@ -23,7 +23,7 @@
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-mediabus.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #define FIMC_LITE_DRV_NAME	"exynos-fimc-lite"
 #define FLITE_CLK_NAME		"flite"
diff --git a/drivers/media/platform/exynos4-is/fimc-reg.c b/drivers/media/platform/exynos4-is/fimc-reg.c
index 1db8cb4c46ef..2d77fd8f440a 100644
--- a/drivers/media/platform/exynos4-is/fimc-reg.c
+++ b/drivers/media/platform/exynos4-is/fimc-reg.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/regmap.h>
 
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 #include "media-dev.h"
 
 #include "fimc-reg.h"
diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c
index 6e2d6042ade6..344718df5c62 100644
--- a/drivers/media/platform/exynos4-is/media-dev.c
+++ b/drivers/media/platform/exynos4-is/media-dev.c
@@ -31,7 +31,7 @@
 #include <media/v4l2-ctrls.h>
 #include <media/v4l2-of.h>
 #include <media/media-device.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #include "media-dev.h"
 #include "fimc-core.h"
@@ -39,10 +39,6 @@
 #include "fimc-lite.h"
 #include "mipi-csis.h"
 
-static int __fimc_md_set_camclk(struct fimc_md *fmd,
-				struct fimc_source_info *si,
-				bool on);
-
 /* Set up image sensor subdev -> FIMC capture node notifications. */
 static void __setup_sensor_notification(struct fimc_md *fmd,
 					struct v4l2_subdev *sensor,
@@ -223,17 +219,10 @@ static int __fimc_pipeline_open(struct exynos_media_pipeline *ep,
 			return ret;
 	}
 
-	ret = fimc_md_set_camclk(sd, true);
-	if (ret < 0)
-		goto err_wbclk;
-
 	ret = fimc_pipeline_s_power(p, 1);
 	if (!ret)
 		return 0;
 
-	fimc_md_set_camclk(sd, false);
-
-err_wbclk:
 	if (!IS_ERR(fmd->wbclk[CLK_IDX_WB_B]) && p->subdevs[IDX_IS_ISP])
 		clk_disable_unprepare(fmd->wbclk[CLK_IDX_WB_B]);
 
@@ -259,7 +248,6 @@ static int __fimc_pipeline_close(struct exynos_media_pipeline *ep)
 	}
 
 	ret = fimc_pipeline_s_power(p, 0);
-	fimc_md_set_camclk(sd, false);
 
 	fmd = entity_to_fimc_mdev(&sd->entity);
 
@@ -337,75 +325,14 @@ static void fimc_md_pipelines_free(struct fimc_md *fmd)
 	}
 }
 
-/*
- * Sensor subdevice helper functions
- */
-static struct v4l2_subdev *fimc_md_register_sensor(struct fimc_md *fmd,
-						struct fimc_source_info *si)
-{
-	struct i2c_adapter *adapter;
-	struct v4l2_subdev *sd = NULL;
-
-	if (!si || !fmd)
-		return NULL;
-	/*
-	 * If FIMC bus type is not Writeback FIFO assume it is same
-	 * as sensor_bus_type.
-	 */
-	si->fimc_bus_type = si->sensor_bus_type;
-
-	adapter = i2c_get_adapter(si->i2c_bus_num);
-	if (!adapter) {
-		v4l2_warn(&fmd->v4l2_dev,
-			  "Failed to get I2C adapter %d, deferring probe\n",
-			  si->i2c_bus_num);
-		return ERR_PTR(-EPROBE_DEFER);
-	}
-	sd = v4l2_i2c_new_subdev_board(&fmd->v4l2_dev, adapter,
-						si->board_info, NULL);
-	if (IS_ERR_OR_NULL(sd)) {
-		i2c_put_adapter(adapter);
-		v4l2_warn(&fmd->v4l2_dev,
-			  "Failed to acquire subdev %s, deferring probe\n",
-			  si->board_info->type);
-		return ERR_PTR(-EPROBE_DEFER);
-	}
-	v4l2_set_subdev_hostdata(sd, si);
-	sd->grp_id = GRP_ID_SENSOR;
-
-	v4l2_info(&fmd->v4l2_dev, "Registered sensor subdevice %s\n",
-		  sd->name);
-	return sd;
-}
-
-static void fimc_md_unregister_sensor(struct v4l2_subdev *sd)
-{
-	struct i2c_client *client = v4l2_get_subdevdata(sd);
-	struct i2c_adapter *adapter;
-
-	if (!client || client->dev.of_node)
-		return;
-
-	v4l2_device_unregister_subdev(sd);
-
-	adapter = client->adapter;
-	i2c_unregister_device(client);
-	if (adapter)
-		i2c_put_adapter(adapter);
-}
-
-#ifdef CONFIG_OF
 /* Parse port node and register as a sub-device any sensor specified there. */
 static int fimc_md_parse_port_node(struct fimc_md *fmd,
 				   struct device_node *port,
 				   unsigned int index)
 {
+	struct fimc_source_info *pd = &fmd->sensor[index].pdata;
 	struct device_node *rem, *ep, *np;
-	struct fimc_source_info *pd;
 	struct v4l2_of_endpoint endpoint;
-	u32 val;
-
-	pd = &fmd->sensor[index].pdata;
 
 	/* Assume here a port node can have only one endpoint node. */
 	ep = of_get_next_child(port, NULL);
@@ -425,20 +352,6 @@ static int fimc_md_parse_port_node(struct fimc_md *fmd,
 							ep->full_name);
 		return 0;
 	}
-	if (!of_property_read_u32(rem, "samsung,camclk-out", &val))
-		pd->clk_id = val;
-
-	if (!of_property_read_u32(rem, "clock-frequency", &val))
-		pd->clk_frequency = val;
-	else
-		pd->clk_frequency = DEFAULT_SENSOR_CLK_FREQ;
-
-	if (pd->clk_frequency == 0) {
-		v4l2_err(&fmd->v4l2_dev, "Wrong clock frequency at node %s\n",
-			 rem->full_name);
-		of_node_put(rem);
-		return -EINVAL;
-	}
 
 	if (fimc_input_is_parallel(endpoint.base.port)) {
 		if (endpoint.bus_type == V4L2_MBUS_PARALLEL)
@@ -485,14 +398,26 @@ static int fimc_md_parse_port_node(struct fimc_md *fmd,
 }
 
 /* Register all SoC external sub-devices */
-static int fimc_md_of_sensors_register(struct fimc_md *fmd,
-				       struct device_node *np)
+static int fimc_md_register_sensor_entities(struct fimc_md *fmd)
 {
 	struct device_node *parent = fmd->pdev->dev.of_node;
 	struct device_node *node, *ports;
 	int index = 0;
 	int ret;
 
+	/*
+	 * Runtime resume one of the FIMC entities to make sure
+	 * the sclk_cam clocks are not globally disabled.
+	 */
+	if (!fmd->pmf)
+		return -ENXIO;
+
+	ret = pm_runtime_get_sync(fmd->pmf);
+	if (ret < 0)
+		return ret;
+
+	fmd->num_sensors = 0;
+
 	/* Attach sensors linked to MIPI CSI-2 receivers */
 	for_each_available_child_of_node(parent, node) {
 		struct device_node *port;
@@ -506,14 +431,14 @@ static int fimc_md_of_sensors_register(struct fimc_md *fmd,
 
 		ret = fimc_md_parse_port_node(fmd, port, index);
 		if (ret < 0)
-			return ret;
+			goto rpm_put;
 		index++;
 	}
 
 	/* Attach sensors listed in the parallel-ports node */
 	ports = of_get_child_by_name(parent, "parallel-ports");
 	if (!ports)
-		return 0;
+		goto rpm_put;
 
 	for_each_child_of_node(ports, node) {
 		ret = fimc_md_parse_port_node(fmd, node, index);
@@ -521,8 +446,9 @@ static int fimc_md_of_sensors_register(struct fimc_md *fmd,
 			break;
 		index++;
 	}
-
-	return 0;
+rpm_put:
+	pm_runtime_put(fmd->pmf);
+	return ret;
 }
 
 static int __of_get_csis_id(struct device_node *np)
@@ -535,68 +461,10 @@ static int __of_get_csis_id(struct device_node *np)
 	of_property_read_u32(np, "reg", &reg);
 	return reg - FIMC_INPUT_MIPI_CSI2_0;
 }
-#else
-#define fimc_md_of_sensors_register(fmd, np) (-ENOSYS)
-#define __of_get_csis_id(np) (-ENOSYS)
-#endif
-
-static int fimc_md_register_sensor_entities(struct fimc_md *fmd)
-{
-	struct s5p_platform_fimc *pdata = fmd->pdev->dev.platform_data;
-	struct device_node *of_node = fmd->pdev->dev.of_node;
-	int num_clients = 0;
-	int ret, i;
-
-	/*
-	 * Runtime resume one of the FIMC entities to make sure
-	 * the sclk_cam clocks are not globally disabled.
-	 */
-	if (!fmd->pmf)
-		return -ENXIO;
-
-	ret = pm_runtime_get_sync(fmd->pmf);
-	if (ret < 0)
-		return ret;
-
-	if (of_node) {
-		fmd->num_sensors = 0;
-		ret = fimc_md_of_sensors_register(fmd, of_node);
-	} else if (pdata) {
-		WARN_ON(pdata->num_clients > ARRAY_SIZE(fmd->sensor));
-		num_clients = min_t(u32, pdata->num_clients,
-				    ARRAY_SIZE(fmd->sensor));
-		fmd->num_sensors = num_clients;
-
-		for (i = 0; i < num_clients; i++) {
-			struct fimc_sensor_info *si = &fmd->sensor[i];
-			struct v4l2_subdev *sd;
-
-			si->pdata = pdata->source_info[i];
-			ret = __fimc_md_set_camclk(fmd, &si->pdata, true);
-			if (ret)
-				break;
-			sd = fimc_md_register_sensor(fmd, &si->pdata);
-			ret = __fimc_md_set_camclk(fmd, &si->pdata, false);
-
-			if (IS_ERR(sd)) {
-				si->subdev = NULL;
-				ret = PTR_ERR(sd);
-				break;
-			}
-			si->subdev = sd;
-			if (ret)
-				break;
-		}
-	}
-
-	pm_runtime_put(fmd->pmf);
-	return ret;
-}
 
 /*
  * MIPI-CSIS, FIMC and FIMC-LITE platform devices registration.
  */
-
 static int register_fimc_lite_entity(struct fimc_md *fmd,
 				     struct fimc_lite *fimc_lite)
 {
@@ -753,35 +621,9 @@ dev_unlock:
 	return ret;
 }
 
-static int fimc_md_pdev_match(struct device *dev, void *data)
-{
-	struct platform_device *pdev = to_platform_device(dev);
-	int plat_entity = -1;
-	int ret;
-	char *p;
-
-	if (!get_device(dev))
-		return -ENODEV;
-
-	if (!strcmp(pdev->name, CSIS_DRIVER_NAME)) {
-		plat_entity = IDX_CSIS;
-	} else {
-		p = strstr(pdev->name, "fimc");
-		if (p && *(p + 4) == 0)
-			plat_entity = IDX_FIMC;
-	}
-
-	if (plat_entity >= 0)
-		ret = fimc_md_register_platform_entity(data, pdev,
-						       plat_entity);
-	put_device(dev);
-	return 0;
-}
-
 /* Register FIMC, FIMC-LITE and CSIS media entities */
-#ifdef CONFIG_OF
-static int fimc_md_register_of_platform_entities(struct fimc_md *fmd,
-						 struct device_node *parent)
+static int fimc_md_register_platform_entities(struct fimc_md *fmd,
+					      struct device_node *parent)
 {
 	struct device_node *node;
 	int ret = 0;
@@ -815,9 +657,6 @@ static int fimc_md_register_of_platform_entities(struct fimc_md *fmd,
 
 	return ret;
 }
-#else
-#define fimc_md_register_of_platform_entities(fmd, node) (-ENOSYS)
-#endif
 
 static void fimc_md_unregister_entities(struct fimc_md *fmd)
 {
@@ -845,14 +684,6 @@ static void fimc_md_unregister_entities(struct fimc_md *fmd)
 		v4l2_device_unregister_subdev(fmd->csis[i].sd);
 		fmd->csis[i].sd = NULL;
 	}
-	if (fmd->pdev->dev.of_node == NULL) {
-		for (i = 0; i < fmd->num_sensors; i++) {
-			if (fmd->sensor[i].subdev == NULL)
-				continue;
-			fimc_md_unregister_sensor(fmd->sensor[i].subdev);
-			fmd->sensor[i].subdev = NULL;
-		}
-	}
 
 	if (fmd->fimc_is)
 		v4l2_device_unregister_subdev(&fmd->fimc_is->isp.subdev);
@@ -1137,7 +968,7 @@ static void fimc_md_put_clocks(struct fimc_md *fmd)
 
 static int fimc_md_get_clocks(struct fimc_md *fmd)
 {
-	struct device *dev = NULL;
+	struct device *dev = &fmd->pdev->dev;
 	char clk_name[32];
 	struct clk *clock;
 	int i, ret = 0;
@@ -1145,16 +976,12 @@ static int fimc_md_get_clocks(struct fimc_md *fmd)
 	for (i = 0; i < FIMC_MAX_CAMCLKS; i++)
 		fmd->camclk[i].clock = ERR_PTR(-EINVAL);
 
-	if (fmd->pdev->dev.of_node)
-		dev = &fmd->pdev->dev;
-
 	for (i = 0; i < FIMC_MAX_CAMCLKS; i++) {
 		snprintf(clk_name, sizeof(clk_name), "sclk_cam%u", i);
 		clock = clk_get(dev, clk_name);
 
 		if (IS_ERR(clock)) {
-			dev_err(&fmd->pdev->dev, "Failed to get clock: %s\n",
-								clk_name);
+			dev_err(dev, "Failed to get clock: %s\n", clk_name);
 			ret = PTR_ERR(clock);
 			break;
 		}
@@ -1188,86 +1015,6 @@ static int fimc_md_get_clocks(struct fimc_md *fmd)
 	return ret;
 }
 
-static int __fimc_md_set_camclk(struct fimc_md *fmd,
-				struct fimc_source_info *si,
-				bool on)
-{
-	struct fimc_camclk_info *camclk;
-	int ret = 0;
-
-	/*
-	 * When device tree is used the sensor drivers are supposed to
-	 * control the clock themselves. This whole function will be
-	 * removed once S5PV210 platform is converted to the device tree.
-	 */
-	if (fmd->pdev->dev.of_node)
-		return 0;
-
-	if (WARN_ON(si->clk_id >= FIMC_MAX_CAMCLKS) || !fmd || !fmd->pmf)
-		return -EINVAL;
-
-	camclk = &fmd->camclk[si->clk_id];
-
-	dbg("camclk %d, f: %lu, use_count: %d, on: %d",
-	    si->clk_id, si->clk_frequency, camclk->use_count, on);
-
-	if (on) {
-		if (camclk->use_count > 0 &&
-		    camclk->frequency != si->clk_frequency)
-			return -EINVAL;
-
-		if (camclk->use_count++ == 0) {
-			clk_set_rate(camclk->clock, si->clk_frequency);
-			camclk->frequency = si->clk_frequency;
-			ret = pm_runtime_get_sync(fmd->pmf);
-			if (ret < 0)
-				return ret;
-			ret = clk_prepare_enable(camclk->clock);
-			dbg("Enabled camclk %d: f: %lu", si->clk_id,
-			    clk_get_rate(camclk->clock));
-		}
-		return ret;
-	}
-
-	if (WARN_ON(camclk->use_count == 0))
-		return 0;
-
-	if (--camclk->use_count == 0) {
-		clk_disable_unprepare(camclk->clock);
-		pm_runtime_put(fmd->pmf);
-		dbg("Disabled camclk %d", si->clk_id);
-	}
-	return ret;
-}
-
-/**
- * fimc_md_set_camclk - peripheral sensor clock setup
- * @sd: sensor subdev to configure sclk_cam clock for
- * @on: 1 to enable or 0 to disable the clock
- *
- * There are 2 separate clock outputs available in the SoC for external
- * image processors. These clocks are shared between all registered FIMC
- * devices to which sensors can be attached, either directly or through
- * the MIPI CSI receiver. The clock is allowed here to be used by
- * multiple sensors concurrently if they use same frequency.
- * This function should only be called when the graph mutex is held.
- */
-int fimc_md_set_camclk(struct v4l2_subdev *sd, bool on)
-{
-	struct fimc_source_info *si = v4l2_get_subdev_hostdata(sd);
-	struct fimc_md *fmd = entity_to_fimc_mdev(&sd->entity);
-
-	/*
-	 * If there is a clock provider registered the sensors will
-	 * handle their clock themselves, no need to control it on
-	 * the host interface side.
-	 */
-	if (fmd->clk_provider.num_clocks > 0)
-		return 0;
-
-	return __fimc_md_set_camclk(fmd, si, on);
-}
-
 static int __fimc_md_modify_pipeline(struct media_entity *entity, bool enable)
 {
 	struct exynos_video_entity *ve;
@@ -1426,7 +1173,6 @@ static int fimc_md_get_pinctrl(struct fimc_md *fmd)
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static int cam_clk_prepare(struct clk_hw *hw)
 {
 	struct cam_clk *camclk = to_cam_clk(hw);
@@ -1518,10 +1264,6 @@ err:
 	fimc_md_unregister_clk_provider(fmd);
 	return ret;
 }
-#else
-#define fimc_md_register_clk_provider(fmd) (0)
-#define fimc_md_unregister_clk_provider(fmd)
-#endif
 
 static int subdev_notifier_bound(struct v4l2_async_notifier *notifier,
 				 struct v4l2_subdev *subdev,
@@ -1585,8 +1327,8 @@ static int fimc_md_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	spin_lock_init(&fmd->slock);
-	fmd->pdev = pdev;
 	INIT_LIST_HEAD(&fmd->pipelines);
+	fmd->pdev = pdev;
 
 	strlcpy(fmd->media_dev.model, "SAMSUNG S5P FIMC",
 		sizeof(fmd->media_dev.model));
@@ -1599,6 +1341,7 @@ static int fimc_md_probe(struct platform_device *pdev)
 	strlcpy(v4l2_dev->name, "s5p-fimc-md", sizeof(v4l2_dev->name));
 
 	fmd->use_isp = fimc_md_is_isp_available(dev->of_node);
+	fmd->user_subdev_api = true;
 
 	ret = v4l2_device_register(dev, &fmd->v4l2_dev);
 	if (ret < 0) {
@@ -1616,8 +1359,6 @@ static int fimc_md_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_md;
 
-	fmd->user_subdev_api = (dev->of_node != NULL);
-
 	ret = fimc_md_get_pinctrl(fmd);
 	if (ret < 0) {
 		if (ret != EPROBE_DEFER)
@@ -1630,22 +1371,16 @@ static int fimc_md_probe(struct platform_device *pdev)
 	/* Protect the media graph while we're registering entities */
 	mutex_lock(&fmd->media_dev.graph_mutex);
 
-	if (dev->of_node)
-		ret = fimc_md_register_of_platform_entities(fmd, dev->of_node);
-	else
-		ret = bus_for_each_dev(&platform_bus_type, NULL, fmd,
-						fimc_md_pdev_match);
+	ret = fimc_md_register_platform_entities(fmd, dev->of_node);
 	if (ret) {
 		mutex_unlock(&fmd->media_dev.graph_mutex);
 		goto err_clk;
 	}
 
-	if (dev->platform_data || dev->of_node) {
-		ret = fimc_md_register_sensor_entities(fmd);
-		if (ret) {
-			mutex_unlock(&fmd->media_dev.graph_mutex);
-			goto err_m_ent;
-		}
+	ret = fimc_md_register_sensor_entities(fmd);
+	if (ret) {
+		mutex_unlock(&fmd->media_dev.graph_mutex);
+		goto err_m_ent;
 	}
 
 	mutex_unlock(&fmd->media_dev.graph_mutex);
diff --git a/drivers/media/platform/exynos4-is/media-dev.h b/drivers/media/platform/exynos4-is/media-dev.h
index 58c49456b13f..03214541f149 100644
--- a/drivers/media/platform/exynos4-is/media-dev.h
+++ b/drivers/media/platform/exynos4-is/media-dev.h
@@ -19,7 +19,7 @@
 #include <media/media-entity.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-subdev.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 
 #include "fimc-core.h"
 #include "fimc-lite.h"
@@ -94,9 +94,7 @@ struct fimc_sensor_info {
 };
 
 struct cam_clk {
-#ifdef CONFIG_COMMON_CLK
 	struct clk_hw hw;
-#endif
 	struct fimc_md *fmd;
 };
 #define to_cam_clk(_hw) container_of(_hw, struct cam_clk, hw)
@@ -144,9 +142,7 @@ struct fimc_md {
 
 	struct cam_clk_provider {
 		struct clk *clks[FIMC_MAX_CAMCLKS];
-#ifdef CONFIG_COMMON_CLK
 		struct clk_onecell_data clk_data;
-#endif
 		struct device_node *of_node;
 		struct cam_clk camclk[FIMC_MAX_CAMCLKS];
 		int num_clocks;
diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c
index 3678ba59725c..ae54ef5f535d 100644
--- a/drivers/media/platform/exynos4-is/mipi-csis.c
+++ b/drivers/media/platform/exynos4-is/mipi-csis.c
@@ -22,14 +22,13 @@
 #include <linux/of.h>
 #include <linux/of_graph.h>
 #include <linux/phy/phy.h>
-#include <linux/platform_data/mipi-csis.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/videodev2.h>
-#include <media/s5p_fimc.h>
+#include <media/exynos-fimc.h>
 #include <media/v4l2-of.h>
 #include <media/v4l2-subdev.h>
 
@@ -730,26 +729,6 @@ static irqreturn_t s5pcsis_irq_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int s5pcsis_get_platform_data(struct platform_device *pdev,
-				     struct csis_state *state)
-{
-	struct s5p_platform_mipi_csis *pdata = pdev->dev.platform_data;
-
-	if (pdata == NULL) {
-		dev_err(&pdev->dev, "Platform data not specified\n");
-		return -EINVAL;
-	}
-
-	state->clk_frequency = pdata->clk_rate;
-	state->num_lanes = pdata->lanes;
-	state->hs_settle = pdata->hs_settle;
-	state->index = max(0, pdev->id);
-	state->max_num_lanes = state->index ? CSIS1_MAX_LANES :
-					      CSIS0_MAX_LANES;
-	return 0;
-}
-
-#ifdef CONFIG_OF
 static int s5pcsis_parse_dt(struct platform_device *pdev,
 			    struct csis_state *state)
 {
@@ -787,9 +766,6 @@ static int s5pcsis_parse_dt(struct platform_device *pdev,
 
 	return 0;
 }
-#else
-#define s5pcsis_parse_dt(pdev, state) (-ENOSYS)
-#endif
 
 static int s5pcsis_pm_resume(struct device *dev, bool runtime);
 static const struct of_device_id s5pcsis_of_match[];
@@ -812,19 +788,14 @@ static int s5pcsis_probe(struct platform_device *pdev)
 	spin_lock_init(&state->slock);
 	state->pdev = pdev;
 
-	if (dev->of_node) {
-		of_id = of_match_node(s5pcsis_of_match, dev->of_node);
-		if (WARN_ON(of_id == NULL))
-			return -EINVAL;
-
-		drv_data = of_id->data;
-		state->interrupt_mask = drv_data->interrupt_mask;
+	of_id = of_match_node(s5pcsis_of_match, dev->of_node);
+	if (WARN_ON(of_id == NULL))
+		return -EINVAL;
 
-		ret = s5pcsis_parse_dt(pdev, state);
-	} else {
-		ret = s5pcsis_get_platform_data(pdev, state);
-	}
+	drv_data = of_id->data;
+	state->interrupt_mask = drv_data->interrupt_mask;
 
+	ret = s5pcsis_parse_dt(pdev, state);
 	if (ret < 0)
 		return ret;
 
diff --git a/include/linux/platform_data/mipi-csis.h b/include/linux/platform_data/mipi-csis.h
deleted file mode 100644
index c2fd9024717c..000000000000
--- a/include/linux/platform_data/mipi-csis.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (C) 2010 - 2012 Samsung Electronics Co., Ltd.
- *
- * Samsung S5P/Exynos SoC series MIPI CSIS device support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __PLAT_SAMSUNG_MIPI_CSIS_H_
-#define __PLAT_SAMSUNG_MIPI_CSIS_H_ __FILE__
-
-/**
- * struct s5p_platform_mipi_csis - platform data for S5P MIPI-CSIS driver
- * @clk_rate:    bus clock frequency
- * @wclk_source: CSI wrapper clock selection: 0 - bus clock, 1 - ext. SCLK_CAM
- * @lanes:       number of data lanes used
- * @hs_settle:   HS-RX settle time
- */
-struct s5p_platform_mipi_csis {
-	unsigned long clk_rate;
-	u8 wclk_source;
-	u8 lanes;
-	u8 hs_settle;
-};
-
-#endif /* __PLAT_SAMSUNG_MIPI_CSIS_H_ */
diff --git a/include/media/exynos-fimc.h b/include/media/exynos-fimc.h
new file mode 100644
index 000000000000..aa44660e2041
--- /dev/null
+++ b/include/media/exynos-fimc.h
@@ -0,0 +1,161 @@
+/*
+ * Samsung S5P/Exynos4 SoC series camera interface driver header
+ *
+ * Copyright (C) 2010 - 2013 Samsung Electronics Co., Ltd.
+ * Sylwester Nawrocki <s.nawrocki@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef S5P_FIMC_H_
+#define S5P_FIMC_H_
+
+#include <media/media-entity.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-mediabus.h>
+
+/*
+ * Enumeration of data inputs to the camera subsystem.
+ */
+enum fimc_input {
+	FIMC_INPUT_PARALLEL_0	= 1,
+	FIMC_INPUT_PARALLEL_1,
+	FIMC_INPUT_MIPI_CSI2_0	= 3,
+	FIMC_INPUT_MIPI_CSI2_1,
+	FIMC_INPUT_WRITEBACK_A	= 5,
+	FIMC_INPUT_WRITEBACK_B,
+	FIMC_INPUT_WRITEBACK_ISP = 5,
+};
+
+/*
+ * Enumeration of the FIMC data bus types.
+ */
+enum fimc_bus_type {
+	/* Camera parallel bus */
+	FIMC_BUS_TYPE_ITU_601 = 1,
+	/* Camera parallel bus with embedded synchronization */
+	FIMC_BUS_TYPE_ITU_656,
+	/* Camera MIPI-CSI2 serial bus */
+	FIMC_BUS_TYPE_MIPI_CSI2,
+	/* FIFO link from LCD controller (WriteBack A) */
+	FIMC_BUS_TYPE_LCD_WRITEBACK_A,
+	/* FIFO link from LCD controller (WriteBack B) */
+	FIMC_BUS_TYPE_LCD_WRITEBACK_B,
+	/* FIFO link from FIMC-IS */
+	FIMC_BUS_TYPE_ISP_WRITEBACK = FIMC_BUS_TYPE_LCD_WRITEBACK_B,
+};
+
+#define fimc_input_is_parallel(x) ((x) == 1 || (x) == 2)
+#define fimc_input_is_mipi_csi(x) ((x) == 3 || (x) == 4)
+
+/*
+ * The subdevices' group IDs.
+ */
+#define GRP_ID_SENSOR		(1 << 8)
+#define GRP_ID_FIMC_IS_SENSOR	(1 << 9)
+#define GRP_ID_WRITEBACK	(1 << 10)
+#define GRP_ID_CSIS		(1 << 11)
+#define GRP_ID_FIMC		(1 << 12)
+#define GRP_ID_FLITE		(1 << 13)
+#define GRP_ID_FIMC_IS		(1 << 14)
+
+/**
+ * struct fimc_source_info - video source description required for the host
+ *			     interface configuration
+ *
+ * @fimc_bus_type: FIMC camera input type
+ * @sensor_bus_type: image sensor bus type, MIPI, ITU-R BT.601 etc.
+ * @flags: the parallel sensor bus flags defining signals polarity (V4L2_MBUS_*)
+ * @mux_id: FIMC camera interface multiplexer index (separate for MIPI and ITU)
+ */
+struct fimc_source_info {
+	enum fimc_bus_type fimc_bus_type;
+	enum fimc_bus_type sensor_bus_type;
+	u16 flags;
+	u16 mux_id;
+};
+
+/*
+ * v4l2_device notification id. This is only for internal use in the kernel.
+ * Sensor subdevs should issue S5P_FIMC_TX_END_NOTIFY notification in single
+ * frame capture mode when there is only one VSYNC pulse issued by the sensor
+ * at begining of the frame transmission.
+ */
+#define S5P_FIMC_TX_END_NOTIFY _IO('e', 0)
+
+#define FIMC_MAX_PLANES	3
+
+/**
+ * struct fimc_fmt - color format data structure
+ * @mbus_code: media bus pixel code, -1 if not applicable
+ * @name: format description
+ * @fourcc: fourcc code for this format, 0 if not applicable
+ * @color: the driver's private color format id
+ * @memplanes: number of physically non-contiguous data planes
+ * @colplanes: number of physically contiguous data planes
+ * @colorspace: v4l2 colorspace (V4L2_COLORSPACE_*)
+ * @depth: per plane driver's private 'number of bits per pixel'
+ * @mdataplanes: bitmask indicating meta data plane(s), (1 << plane_no)
+ * @flags: flags indicating which operation mode format applies to
+ */
+struct fimc_fmt {
+	enum v4l2_mbus_pixelcode mbus_code;
+	char	*name;
+	u32	fourcc;
+	u32	color;
+	u16	memplanes;
+	u16	colplanes;
+	u8	colorspace;
+	u8	depth[FIMC_MAX_PLANES];
+	u16	mdataplanes;
+	u16	flags;
+#define FMT_FLAGS_CAM		(1 << 0)
+#define FMT_FLAGS_M2M_IN	(1 << 1)
+#define FMT_FLAGS_M2M_OUT	(1 << 2)
+#define FMT_FLAGS_M2M		(1 << 1 | 1 << 2)
+#define FMT_HAS_ALPHA		(1 << 3)
+#define FMT_FLAGS_COMPRESSED	(1 << 4)
+#define FMT_FLAGS_WRITEBACK	(1 << 5)
+#define FMT_FLAGS_RAW_BAYER	(1 << 6)
+#define FMT_FLAGS_YUV		(1 << 7)
+};
+
+struct exynos_media_pipeline;
+
+/*
+ * Media pipeline operations to be called from within a video node,  i.e. the
+ * last entity within the pipeline. Implemented by related media device driver.
+ */
+struct exynos_media_pipeline_ops {
+	int (*prepare)(struct exynos_media_pipeline *p,
+						struct media_entity *me);
+	int (*unprepare)(struct exynos_media_pipeline *p);
+	int (*open)(struct exynos_media_pipeline *p, struct media_entity *me,
+							bool resume);
+	int (*close)(struct exynos_media_pipeline *p);
+	int (*set_stream)(struct exynos_media_pipeline *p, bool state);
+};
+
+struct exynos_video_entity {
+	struct video_device vdev;
+	struct exynos_media_pipeline *pipe;
+};
+
+struct exynos_media_pipeline {
+	struct media_pipeline mp;
+	const struct exynos_media_pipeline_ops *ops;
+};
+
+static inline struct exynos_video_entity *vdev_to_exynos_video_entity(
+					struct video_device *vdev)
+{
+	return container_of(vdev, struct exynos_video_entity, vdev);
+}
+
+#define fimc_pipeline_call(ent, op, args...)				  \
+	(!(ent) ? -ENOENT : (((ent)->pipe->ops && (ent)->pipe->ops->op) ? \
+	(ent)->pipe->ops->op(((ent)->pipe), ##args) : -ENOIOCTLCMD))	  \
+
+#endif /* S5P_FIMC_H_ */
diff --git a/include/media/s5p_fimc.h b/include/media/s5p_fimc.h
deleted file mode 100644
index b975c285c8a9..000000000000
--- a/include/media/s5p_fimc.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Samsung S5P/Exynos4 SoC series camera interface driver header
- *
- * Copyright (C) 2010 - 2013 Samsung Electronics Co., Ltd.
- * Sylwester Nawrocki <s.nawrocki@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef S5P_FIMC_H_
-#define S5P_FIMC_H_
-
-#include <media/media-entity.h>
-#include <media/v4l2-dev.h>
-#include <media/v4l2-mediabus.h>
-
-/*
- * Enumeration of data inputs to the camera subsystem.
- */
-enum fimc_input {
-	FIMC_INPUT_PARALLEL_0	= 1,
-	FIMC_INPUT_PARALLEL_1,
-	FIMC_INPUT_MIPI_CSI2_0	= 3,
-	FIMC_INPUT_MIPI_CSI2_1,
-	FIMC_INPUT_WRITEBACK_A	= 5,
-	FIMC_INPUT_WRITEBACK_B,
-	FIMC_INPUT_WRITEBACK_ISP = 5,
-};
-
-/*
- * Enumeration of the FIMC data bus types.
- */
-enum fimc_bus_type {
-	/* Camera parallel bus */
-	FIMC_BUS_TYPE_ITU_601 = 1,
-	/* Camera parallel bus with embedded synchronization */
-	FIMC_BUS_TYPE_ITU_656,
-	/* Camera MIPI-CSI2 serial bus */
-	FIMC_BUS_TYPE_MIPI_CSI2,
-	/* FIFO link from LCD controller (WriteBack A) */
-	FIMC_BUS_TYPE_LCD_WRITEBACK_A,
-	/* FIFO link from LCD controller (WriteBack B) */
-	FIMC_BUS_TYPE_LCD_WRITEBACK_B,
-	/* FIFO link from FIMC-IS */
-	FIMC_BUS_TYPE_ISP_WRITEBACK = FIMC_BUS_TYPE_LCD_WRITEBACK_B,
-};
-
-#define fimc_input_is_parallel(x) ((x) == 1 || (x) == 2)
-#define fimc_input_is_mipi_csi(x) ((x) == 3 || (x) == 4)
-
-/*
- * The subdevices' group IDs.
- */
-#define GRP_ID_SENSOR		(1 << 8)
-#define GRP_ID_FIMC_IS_SENSOR	(1 << 9)
-#define GRP_ID_WRITEBACK	(1 << 10)
-#define GRP_ID_CSIS		(1 << 11)
-#define GRP_ID_FIMC		(1 << 12)
-#define GRP_ID_FLITE		(1 << 13)
-#define GRP_ID_FIMC_IS		(1 << 14)
-
-struct i2c_board_info;
-
-/**
- * struct fimc_source_info - video source description required for the host
- *			     interface configuration
- *
- * @board_info: pointer to I2C subdevice's board info
- * @clk_frequency: frequency of the clock the host interface provides to sensor
- * @fimc_bus_type: FIMC camera input type
- * @sensor_bus_type: image sensor bus type, MIPI, ITU-R BT.601 etc.
- * @flags: the parallel sensor bus flags defining signals polarity (V4L2_MBUS_*)
- * @i2c_bus_num: i2c control bus id the sensor is attached to
- * @mux_id: FIMC camera interface multiplexer index (separate for MIPI and ITU)
- * @clk_id: index of the SoC peripheral clock for sensors
- */
-struct fimc_source_info {
-	struct i2c_board_info *board_info;
-	unsigned long clk_frequency;
-	enum fimc_bus_type fimc_bus_type;
-	enum fimc_bus_type sensor_bus_type;
-	u16 flags;
-	u16 i2c_bus_num;
-	u16 mux_id;
-	u8 clk_id;
-};
-
-/**
- * struct s5p_platform_fimc - camera host interface platform data
- *
- * @source_info: properties of an image source for the host interface setup
- * @num_clients: the number of attached image sources
- */
-struct s5p_platform_fimc {
-	struct fimc_source_info *source_info;
-	int num_clients;
-};
-
-/*
- * v4l2_device notification id. This is only for internal use in the kernel.
- * Sensor subdevs should issue S5P_FIMC_TX_END_NOTIFY notification in single
- * frame capture mode when there is only one VSYNC pulse issued by the sensor
- * at begining of the frame transmission.
- */
-#define S5P_FIMC_TX_END_NOTIFY _IO('e', 0)
-
-#define FIMC_MAX_PLANES	3
-
-/**
- * struct fimc_fmt - color format data structure
- * @mbus_code: media bus pixel code, -1 if not applicable
- * @name: format description
- * @fourcc: fourcc code for this format, 0 if not applicable
- * @color: the driver's private color format id
- * @memplanes: number of physically non-contiguous data planes
- * @colplanes: number of physically contiguous data planes
- * @colorspace: v4l2 colorspace (V4L2_COLORSPACE_*)
- * @depth: per plane driver's private 'number of bits per pixel'
- * @mdataplanes: bitmask indicating meta data plane(s), (1 << plane_no)
- * @flags: flags indicating which operation mode format applies to
- */
-struct fimc_fmt {
-	enum v4l2_mbus_pixelcode mbus_code;
-	char	*name;
-	u32	fourcc;
-	u32	color;
-	u16	memplanes;
-	u16	colplanes;
-	u8	colorspace;
-	u8	depth[FIMC_MAX_PLANES];
-	u16	mdataplanes;
-	u16	flags;
-#define FMT_FLAGS_CAM		(1 << 0)
-#define FMT_FLAGS_M2M_IN	(1 << 1)
-#define FMT_FLAGS_M2M_OUT	(1 << 2)
-#define FMT_FLAGS_M2M		(1 << 1 | 1 << 2)
-#define FMT_HAS_ALPHA		(1 << 3)
-#define FMT_FLAGS_COMPRESSED	(1 << 4)
-#define FMT_FLAGS_WRITEBACK	(1 << 5)
-#define FMT_FLAGS_RAW_BAYER	(1 << 6)
-#define FMT_FLAGS_YUV		(1 << 7)
-};
-
-struct exynos_media_pipeline;
-
-/*
- * Media pipeline operations to be called from within a video node,  i.e. the
- * last entity within the pipeline. Implemented by related media device driver.
- */
-struct exynos_media_pipeline_ops {
-	int (*prepare)(struct exynos_media_pipeline *p,
-						struct media_entity *me);
-	int (*unprepare)(struct exynos_media_pipeline *p);
-	int (*open)(struct exynos_media_pipeline *p, struct media_entity *me,
-							bool resume);
-	int (*close)(struct exynos_media_pipeline *p);
-	int (*set_stream)(struct exynos_media_pipeline *p, bool state);
-};
-
-struct exynos_video_entity {
-	struct video_device vdev;
-	struct exynos_media_pipeline *pipe;
-};
-
-struct exynos_media_pipeline {
-	struct media_pipeline mp;
-	const struct exynos_media_pipeline_ops *ops;
-};
-
-static inline struct exynos_video_entity *vdev_to_exynos_video_entity(
-					struct video_device *vdev)
-{
-	return container_of(vdev, struct exynos_video_entity, vdev);
-}
-
-#define fimc_pipeline_call(ent, op, args...)				  \
-	(!(ent) ? -ENOENT : (((ent)->pipe->ops && (ent)->pipe->ops->op) ? \
-	(ent)->pipe->ops->op(((ent)->pipe), ##args) : -ENOIOCTLCMD))	  \
-
-#endif /* S5P_FIMC_H_ */
-- 
cgit 


From 8cd84092d35e52372da2c3c3c2afb1a719917af2 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 9 May 2014 16:43:08 +0900
Subject: PM / devfreq: Add resource-managed function for devfreq device

This patch add resource-managed function for devfreq device as following
functions. The devm_devfreq_add_device() manages automatically the memory
of devfreq device using device resource management.
- devm_devfreq_add_device()
- devm_devfreq_remove_device()

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/devfreq.h   | 21 +++++++++++++++-
 2 files changed, 83 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index af4af7708574..8b6295d9d1f5 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -544,6 +544,69 @@ int devfreq_remove_device(struct devfreq *devfreq)
 }
 EXPORT_SYMBOL(devfreq_remove_device);
 
+static int devm_devfreq_dev_match(struct device *dev, void *res, void *data)
+{
+	struct devfreq **r = res;
+
+	if (WARN_ON(!r || !*r))
+		return 0;
+
+	return *r == data;
+}
+
+static void devm_devfreq_dev_release(struct device *dev, void *res)
+{
+	devfreq_remove_device(*(struct devfreq **)res);
+}
+
+/**
+ * devm_devfreq_add_device() - Resource-managed devfreq_add_device()
+ * @dev:	the device to add devfreq feature.
+ * @profile:	device-specific profile to run devfreq.
+ * @governor_name:	name of the policy to choose frequency.
+ * @data:	private data for the governor. The devfreq framework does not
+ *		touch this value.
+ *
+ * This function manages automatically the memory of devfreq device using device
+ * resource management and simplify the free operation for memory of devfreq
+ * device.
+ */
+struct devfreq *devm_devfreq_add_device(struct device *dev,
+					struct devfreq_dev_profile *profile,
+					const char *governor_name,
+					void *data)
+{
+	struct devfreq **ptr, *devfreq;
+
+	ptr = devres_alloc(devm_devfreq_dev_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return ERR_PTR(-ENOMEM);
+
+	devfreq = devfreq_add_device(dev, profile, governor_name, data);
+	if (IS_ERR(devfreq)) {
+		devres_free(ptr);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	*ptr = devfreq;
+	devres_add(dev, ptr);
+
+	return devfreq;
+}
+EXPORT_SYMBOL(devm_devfreq_add_device);
+
+/**
+ * devm_devfreq_remove_device() - Resource-managed devfreq_remove_device()
+ * @dev:	the device to add devfreq feature.
+ * @devfreq:	the devfreq instance to be removed
+ */
+void devm_devfreq_remove_device(struct device *dev, struct devfreq *devfreq)
+{
+	WARN_ON(devres_release(dev, devm_devfreq_dev_release,
+			       devm_devfreq_dev_match, devfreq));
+}
+EXPORT_SYMBOL(devm_devfreq_remove_device);
+
 /**
  * devfreq_suspend_device() - Suspend devfreq of a device.
  * @devfreq: the devfreq instance to be suspended
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index d48dc00232a4..023d668a2cb5 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -181,6 +181,12 @@ extern struct devfreq *devfreq_add_device(struct device *dev,
 				  const char *governor_name,
 				  void *data);
 extern int devfreq_remove_device(struct devfreq *devfreq);
+extern struct devfreq *devm_devfreq_add_device(struct device *dev,
+				  struct devfreq_dev_profile *profile,
+				  const char *governor_name,
+				  void *data);
+extern void devm_devfreq_remove_device(struct device *dev,
+				  struct devfreq *devfreq);
 
 /* Supposed to be called by PM_SLEEP/PM_RUNTIME callbacks */
 extern int devfreq_suspend_device(struct devfreq *devfreq);
@@ -220,7 +226,7 @@ static inline struct devfreq *devfreq_add_device(struct device *dev,
 					  const char *governor_name,
 					  void *data)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 
 static inline int devfreq_remove_device(struct devfreq *devfreq)
@@ -228,6 +234,19 @@ static inline int devfreq_remove_device(struct devfreq *devfreq)
 	return 0;
 }
 
+static inline struct devfreq *devm_devfreq_add_device(struct device *dev,
+					struct devfreq_dev_profile *profile,
+					const char *governor_name,
+					void *data)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline void devm_devfreq_remove_device(struct device *dev,
+					struct devfreq *devfreq)
+{
+}
+
 static inline int devfreq_suspend_device(struct devfreq *devfreq)
 {
 	return 0;
-- 
cgit 


From d5b040d0cab9cae1dc1ad61a07019062235f4878 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Fri, 9 May 2014 16:43:09 +0900
Subject: PM / devfreq: Add devm_devfreq_{register,unregister}_opp_notfier
 function

This patch add resource-managed function for devfreq opp as following
functions. The devm_devfreq_register_opp_notifier() manages automatically
the registration of devfreq opp using device resource management.
- devm_devfreq_register_opp_notifier
- devm_devfreq_unregister_opp_notifier()

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/devfreq.h   | 14 ++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 8b6295d9d1f5..9f90369dd6bd 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -1169,6 +1169,54 @@ int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq)
 	return ret;
 }
 
+static void devm_devfreq_opp_release(struct device *dev, void *res)
+{
+	devfreq_unregister_opp_notifier(dev, *(struct devfreq **)res);
+}
+
+/**
+ * devm_ devfreq_register_opp_notifier()
+ *		- Resource-managed devfreq_register_opp_notifier()
+ * @dev:	The devfreq user device. (parent of devfreq)
+ * @devfreq:	The devfreq object.
+ */
+int devm_devfreq_register_opp_notifier(struct device *dev,
+				       struct devfreq *devfreq)
+{
+	struct devfreq **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_devfreq_opp_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = devfreq_register_opp_notifier(dev, devfreq);
+	if (ret) {
+		devres_free(ptr);
+		return ret;
+	}
+
+	*ptr = devfreq;
+	devres_add(dev, ptr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_devfreq_register_opp_notifier);
+
+/**
+ * devm_devfreq_unregister_opp_notifier()
+ *		- Resource-managed devfreq_unregister_opp_notifier()
+ * @dev:	The devfreq user device. (parent of devfreq)
+ * @devfreq:	The devfreq object.
+ */
+void devm_devfreq_unregister_opp_notifier(struct device *dev,
+					 struct devfreq *devfreq)
+{
+	WARN_ON(devres_release(dev, devm_devfreq_opp_release,
+			       devm_devfreq_dev_match, devfreq));
+}
+EXPORT_SYMBOL(devm_devfreq_unregister_opp_notifier);
+
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
 MODULE_DESCRIPTION("devfreq class support");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 023d668a2cb5..f1863dcd83ea 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -199,6 +199,10 @@ extern int devfreq_register_opp_notifier(struct device *dev,
 					 struct devfreq *devfreq);
 extern int devfreq_unregister_opp_notifier(struct device *dev,
 					   struct devfreq *devfreq);
+extern int devm_devfreq_register_opp_notifier(struct device *dev,
+					      struct devfreq *devfreq);
+extern void devm_devfreq_unregister_opp_notifier(struct device *dev,
+						struct devfreq *devfreq);
 
 #if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND)
 /**
@@ -275,6 +279,16 @@ static inline int devfreq_unregister_opp_notifier(struct device *dev,
 	return -EINVAL;
 }
 
+static inline int devm_devfreq_register_opp_notifier(struct device *dev,
+						     struct devfreq *devfreq)
+{
+	return -EINVAL;
+}
+
+static inline void devm_devfreq_unregister_opp_notifier(struct device *dev,
+							struct devfreq *devfreq)
+{
+}
 #endif /* CONFIG_PM_DEVFREQ */
 
 #endif /* __LINUX_DEVFREQ_H__ */
-- 
cgit 


From fbebf59778600488147744cdf7d7c20d22531025 Mon Sep 17 00:00:00 2001
From: srinik <srinivas.kandagatla@linaro.org>
Date: Thu, 15 May 2014 11:28:44 +0100
Subject: ARM: 8057/1: amba: Add Qualcomm vendor ID.

This patch adds Qualcomm amba vendor Id to the list. This ID is used in mmci driver. The ID selected in same lines like 0x41 is "A" for ARM, 0x51 is "Q" for Qualcomm.

As there are no physical register on Qcom SOC for amba vendor id, this is a fake ID assigned based on "Q" prefix from Qualcomm.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 63b5eff0a80f..fdd7e1b61f60 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -47,6 +47,7 @@ struct amba_driver {
 enum amba_vendor {
 	AMBA_VENDOR_ARM = 0x41,
 	AMBA_VENDOR_ST = 0x80,
+	AMBA_VENDOR_QCOM = 0x51,
 };
 
 extern struct bus_type amba_bustype;
-- 
cgit 


From 72e6ae285a1dbff553734985bedadf409d99c02d Mon Sep 17 00:00:00 2001
From: Victor Kamensky <victor.kamensky@linaro.org>
Date: Tue, 29 Apr 2014 04:20:52 +0100
Subject: ARM: 8043/1: uprobes need icache flush after xol write

After instruction write into xol area, on ARM V7
architecture code need to flush dcache and icache to sync
them up for given set of addresses. Having just
'flush_dcache_page(page)' call is not enough - it is
possible to have stale instruction sitting in icache
for given xol area slot address.

Introduce arch_uprobe_ixol_copy weak function
that by default calls uprobes copy_to_page function and
than flush_dcache_page function and on ARM define new one
that handles xol slot copy in ARM specific way

flush_uprobe_xol_access function shares/reuses implementation
with/of flush_ptrace_access function and takes care of writing
instruction to user land address space on given variety of
different cache types on ARM CPUs. Because
flush_uprobe_xol_access does not have vma around
flush_ptrace_access was split into two parts. First that
retrieves set of condition from vma and common that receives
those conditions as flags.

Note ARM cache flush function need kernel address
through which instruction write happened, so instead
of using uprobes copy_to_page function changed
code to explicitly map page and do memcpy.

Note arch_uprobe_copy_ixol function, in similar way as
copy_to_user_page function, has preempt_disable/preempt_enable.

Signed-off-by: Victor Kamensky <victor.kamensky@linaro.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: David A. Long <dave.long@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/cacheflush.h |  2 ++
 arch/arm/kernel/uprobes.c         | 20 ++++++++++++++++++++
 arch/arm/mm/flush.c               | 33 ++++++++++++++++++++++++++++-----
 include/linux/uprobes.h           |  3 +++
 kernel/events/uprobes.c           | 25 +++++++++++++++++--------
 5 files changed, 70 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index 00af9fe435e6..fd43f7f55b70 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -487,4 +487,6 @@ int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
 
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+			     void *kaddr, unsigned long len);
 #endif
diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/kernel/uprobes.c
index f9bacee973bf..56adf9c1fde0 100644
--- a/arch/arm/kernel/uprobes.c
+++ b/arch/arm/kernel/uprobes.c
@@ -113,6 +113,26 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	return 0;
 }
 
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+			   void *src, unsigned long len)
+{
+	void *xol_page_kaddr = kmap_atomic(page);
+	void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK);
+
+	preempt_disable();
+
+	/* Initialize the slot */
+	memcpy(dst, src, len);
+
+	/* flush caches (dcache/icache) */
+	flush_uprobe_xol_access(page, vaddr, dst, len);
+
+	preempt_enable();
+
+	kunmap_atomic(xol_page_kaddr);
+}
+
+
 int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	struct uprobe_task *utask = current->utask;
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 3387e60e4ea3..43d54f5b26b9 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -104,17 +104,20 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig
 #define flush_icache_alias(pfn,vaddr,len)	do { } while (0)
 #endif
 
+#define FLAG_PA_IS_EXEC 1
+#define FLAG_PA_CORE_IN_MM 2
+
 static void flush_ptrace_access_other(void *args)
 {
 	__flush_icache_all();
 }
 
-static
-void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
-			 unsigned long uaddr, void *kaddr, unsigned long len)
+static inline
+void __flush_ptrace_access(struct page *page, unsigned long uaddr, void *kaddr,
+			   unsigned long len, unsigned int flags)
 {
 	if (cache_is_vivt()) {
-		if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm))) {
+		if (flags & FLAG_PA_CORE_IN_MM) {
 			unsigned long addr = (unsigned long)kaddr;
 			__cpuc_coherent_kern_range(addr, addr + len);
 		}
@@ -128,7 +131,7 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 	}
 
 	/* VIPT non-aliasing D-cache */
-	if (vma->vm_flags & VM_EXEC) {
+	if (flags & FLAG_PA_IS_EXEC) {
 		unsigned long addr = (unsigned long)kaddr;
 		if (icache_is_vipt_aliasing())
 			flush_icache_alias(page_to_pfn(page), uaddr, len);
@@ -140,6 +143,26 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
 	}
 }
 
+static
+void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
+			 unsigned long uaddr, void *kaddr, unsigned long len)
+{
+	unsigned int flags = 0;
+	if (cpumask_test_cpu(smp_processor_id(), mm_cpumask(vma->vm_mm)))
+		flags |= FLAG_PA_CORE_IN_MM;
+	if (vma->vm_flags & VM_EXEC)
+		flags |= FLAG_PA_IS_EXEC;
+	__flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
+void flush_uprobe_xol_access(struct page *page, unsigned long uaddr,
+			     void *kaddr, unsigned long len)
+{
+	unsigned int flags = FLAG_PA_CORE_IN_MM|FLAG_PA_IS_EXEC;
+
+	__flush_ptrace_access(page, uaddr, kaddr, len, flags);
+}
+
 /*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index edff2b97b864..c52f827ba6ce 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -32,6 +32,7 @@ struct vm_area_struct;
 struct mm_struct;
 struct inode;
 struct notifier_block;
+struct page;
 
 #define UPROBE_HANDLER_REMOVE		1
 #define UPROBE_HANDLER_MASK		1
@@ -127,6 +128,8 @@ extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned l
 extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
 extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+					 void *src, unsigned long len);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
 };
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 04709b66369d..4968213c63fa 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1296,14 +1296,8 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
 	if (unlikely(!xol_vaddr))
 		return 0;
 
-	/* Initialize the slot */
-	copy_to_page(area->page, xol_vaddr,
-			&uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
-	/*
-	 * We probably need flush_icache_user_range() but it needs vma.
-	 * This should work on supported architectures too.
-	 */
-	flush_dcache_page(area->page);
+	arch_uprobe_copy_ixol(area->page, xol_vaddr,
+			      &uprobe->arch.ixol, sizeof(uprobe->arch.ixol));
 
 	return xol_vaddr;
 }
@@ -1346,6 +1340,21 @@ static void xol_free_insn_slot(struct task_struct *tsk)
 	}
 }
 
+void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+				  void *src, unsigned long len)
+{
+	/* Initialize the slot */
+	copy_to_page(page, vaddr, src, len);
+
+	/*
+	 * We probably need flush_icache_user_range() but it needs vma.
+	 * This should work on most of architectures by default. If
+	 * architecture needs to do something different it can define
+	 * its own version of the function.
+	 */
+	flush_dcache_page(page);
+}
+
 /**
  * uprobe_get_swbp_addr - compute address of swbp given post-swbp regs
  * @regs: Reflects the saved state of the task after it has hit a breakpoint
-- 
cgit 


From d25a2a16f0889de4a1cd8639896f35dc9465f6f5 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 2 Apr 2014 12:47:37 +0200
Subject: iommu: Add driver for Renesas VMSA-compatible IPMMU

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig                    |   12 +
 drivers/iommu/Makefile                   |    1 +
 drivers/iommu/ipmmu-vmsa.c               | 1070 ++++++++++++++++++++++++++++++
 include/linux/platform_data/ipmmu-vmsa.h |   24 +
 4 files changed, 1107 insertions(+)
 create mode 100644 drivers/iommu/ipmmu-vmsa.c
 create mode 100644 include/linux/platform_data/ipmmu-vmsa.h

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index df56e4c74a7e..a22b537caacd 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -272,6 +272,18 @@ config SHMOBILE_IOMMU_L1SIZE
 	default 256 if SHMOBILE_IOMMU_ADDRSIZE_64MB
 	default 128 if SHMOBILE_IOMMU_ADDRSIZE_32MB
 
+config IPMMU_VMSA
+	bool "Renesas VMSA-compatible IPMMU"
+	depends on ARM_LPAE
+	depends on ARCH_SHMOBILE || COMPILE_TEST
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU
+	help
+	  Support for the Renesas VMSA-compatible IPMMU Renesas found in the
+	  R-Mobile APE6 and R-Car H2/M2 SoCs.
+
+	  If unsure, say N.
+
 config SPAPR_TCE_IOMMU
 	bool "sPAPR TCE IOMMU Support"
 	depends on PPC_POWERNV || PPC_PSERIES
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 5d58bf16e9e3..8893bad048e0 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
 obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
+obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu2.o
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
new file mode 100644
index 000000000000..b084530babf4
--- /dev/null
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -0,0 +1,1070 @@
+/*
+ * IPMMU VMSA
+ *
+ * Copyright (C) 2014 Renesas Electronics Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/platform_data/ipmmu-vmsa.h>
+#include <linux/platform_device.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+
+#include <asm/dma-iommu.h>
+#include <asm/pgalloc.h>
+
+struct ipmmu_vmsa_device {
+	struct device *dev;
+	void __iomem *base;
+	struct list_head list;
+
+	const struct ipmmu_vmsa_platform_data *pdata;
+	unsigned int num_utlbs;
+
+	struct dma_iommu_mapping *mapping;
+};
+
+struct ipmmu_vmsa_domain {
+	struct ipmmu_vmsa_device *mmu;
+	struct iommu_domain *io_domain;
+
+	unsigned int context_id;
+	spinlock_t lock;			/* Protects mappings */
+	pgd_t *pgd;
+};
+
+static DEFINE_SPINLOCK(ipmmu_devices_lock);
+static LIST_HEAD(ipmmu_devices);
+
+#define TLB_LOOP_TIMEOUT		100	/* 100us */
+
+/* -----------------------------------------------------------------------------
+ * Registers Definition
+ */
+
+#define IM_CTX_SIZE			0x40
+
+#define IMCTR				0x0000
+#define IMCTR_TRE			(1 << 17)
+#define IMCTR_AFE			(1 << 16)
+#define IMCTR_RTSEL_MASK		(3 << 4)
+#define IMCTR_RTSEL_SHIFT		4
+#define IMCTR_TREN			(1 << 3)
+#define IMCTR_INTEN			(1 << 2)
+#define IMCTR_FLUSH			(1 << 1)
+#define IMCTR_MMUEN			(1 << 0)
+
+#define IMCAAR				0x0004
+
+#define IMTTBCR				0x0008
+#define IMTTBCR_EAE			(1 << 31)
+#define IMTTBCR_PMB			(1 << 30)
+#define IMTTBCR_SH1_NON_SHAREABLE	(0 << 28)
+#define IMTTBCR_SH1_OUTER_SHAREABLE	(2 << 28)
+#define IMTTBCR_SH1_INNER_SHAREABLE	(3 << 28)
+#define IMTTBCR_SH1_MASK		(3 << 28)
+#define IMTTBCR_ORGN1_NC		(0 << 26)
+#define IMTTBCR_ORGN1_WB_WA		(1 << 26)
+#define IMTTBCR_ORGN1_WT		(2 << 26)
+#define IMTTBCR_ORGN1_WB		(3 << 26)
+#define IMTTBCR_ORGN1_MASK		(3 << 26)
+#define IMTTBCR_IRGN1_NC		(0 << 24)
+#define IMTTBCR_IRGN1_WB_WA		(1 << 24)
+#define IMTTBCR_IRGN1_WT		(2 << 24)
+#define IMTTBCR_IRGN1_WB		(3 << 24)
+#define IMTTBCR_IRGN1_MASK		(3 << 24)
+#define IMTTBCR_TSZ1_MASK		(7 << 16)
+#define IMTTBCR_TSZ1_SHIFT		16
+#define IMTTBCR_SH0_NON_SHAREABLE	(0 << 12)
+#define IMTTBCR_SH0_OUTER_SHAREABLE	(2 << 12)
+#define IMTTBCR_SH0_INNER_SHAREABLE	(3 << 12)
+#define IMTTBCR_SH0_MASK		(3 << 12)
+#define IMTTBCR_ORGN0_NC		(0 << 10)
+#define IMTTBCR_ORGN0_WB_WA		(1 << 10)
+#define IMTTBCR_ORGN0_WT		(2 << 10)
+#define IMTTBCR_ORGN0_WB		(3 << 10)
+#define IMTTBCR_ORGN0_MASK		(3 << 10)
+#define IMTTBCR_IRGN0_NC		(0 << 8)
+#define IMTTBCR_IRGN0_WB_WA		(1 << 8)
+#define IMTTBCR_IRGN0_WT		(2 << 8)
+#define IMTTBCR_IRGN0_WB		(3 << 8)
+#define IMTTBCR_IRGN0_MASK		(3 << 8)
+#define IMTTBCR_SL0_LVL_2		(0 << 4)
+#define IMTTBCR_SL0_LVL_1		(1 << 4)
+#define IMTTBCR_TSZ0_MASK		(7 << 0)
+#define IMTTBCR_TSZ0_SHIFT		O
+
+#define IMBUSCR				0x000c
+#define IMBUSCR_DVM			(1 << 2)
+#define IMBUSCR_BUSSEL_SYS		(0 << 0)
+#define IMBUSCR_BUSSEL_CCI		(1 << 0)
+#define IMBUSCR_BUSSEL_IMCAAR		(2 << 0)
+#define IMBUSCR_BUSSEL_CCI_IMCAAR	(3 << 0)
+#define IMBUSCR_BUSSEL_MASK		(3 << 0)
+
+#define IMTTLBR0			0x0010
+#define IMTTUBR0			0x0014
+#define IMTTLBR1			0x0018
+#define IMTTUBR1			0x001c
+
+#define IMSTR				0x0020
+#define IMSTR_ERRLVL_MASK		(3 << 12)
+#define IMSTR_ERRLVL_SHIFT		12
+#define IMSTR_ERRCODE_TLB_FORMAT	(1 << 8)
+#define IMSTR_ERRCODE_ACCESS_PERM	(4 << 8)
+#define IMSTR_ERRCODE_SECURE_ACCESS	(5 << 8)
+#define IMSTR_ERRCODE_MASK		(7 << 8)
+#define IMSTR_MHIT			(1 << 4)
+#define IMSTR_ABORT			(1 << 2)
+#define IMSTR_PF			(1 << 1)
+#define IMSTR_TF			(1 << 0)
+
+#define IMMAIR0				0x0028
+#define IMMAIR1				0x002c
+#define IMMAIR_ATTR_MASK		0xff
+#define IMMAIR_ATTR_DEVICE		0x04
+#define IMMAIR_ATTR_NC			0x44
+#define IMMAIR_ATTR_WBRWA		0xff
+#define IMMAIR_ATTR_SHIFT(n)		((n) << 3)
+#define IMMAIR_ATTR_IDX_NC		0
+#define IMMAIR_ATTR_IDX_WBRWA		1
+#define IMMAIR_ATTR_IDX_DEV		2
+
+#define IMEAR				0x0030
+
+#define IMPCTR				0x0200
+#define IMPSTR				0x0208
+#define IMPEAR				0x020c
+#define IMPMBA(n)			(0x0280 + ((n) * 4))
+#define IMPMBD(n)			(0x02c0 + ((n) * 4))
+
+#define IMUCTR(n)			(0x0300 + ((n) * 16))
+#define IMUCTR_FIXADDEN			(1 << 31)
+#define IMUCTR_FIXADD_MASK		(0xff << 16)
+#define IMUCTR_FIXADD_SHIFT		16
+#define IMUCTR_TTSEL_MMU(n)		((n) << 4)
+#define IMUCTR_TTSEL_PMB		(8 << 4)
+#define IMUCTR_TTSEL_MASK		(15 << 4)
+#define IMUCTR_FLUSH			(1 << 1)
+#define IMUCTR_MMUEN			(1 << 0)
+
+#define IMUASID(n)			(0x0308 + ((n) * 16))
+#define IMUASID_ASID8_MASK		(0xff << 8)
+#define IMUASID_ASID8_SHIFT		8
+#define IMUASID_ASID0_MASK		(0xff << 0)
+#define IMUASID_ASID0_SHIFT		0
+
+/* -----------------------------------------------------------------------------
+ * Page Table Bits
+ */
+
+/*
+ * VMSA states in section B3.6.3 "Control of Secure or Non-secure memory access,
+ * Long-descriptor format" that the NStable bit being set in a table descriptor
+ * will result in the NStable and NS bits of all child entries being ignored and
+ * considered as being set. The IPMMU seems not to comply with this, as it
+ * generates a secure access page fault if any of the NStable and NS bits isn't
+ * set when running in non-secure mode.
+ */
+#ifndef PMD_NSTABLE
+#define PMD_NSTABLE			(_AT(pmdval_t, 1) << 63)
+#endif
+
+#define ARM_VMSA_PTE_XN			(((pteval_t)3) << 53)
+#define ARM_VMSA_PTE_CONT		(((pteval_t)1) << 52)
+#define ARM_VMSA_PTE_AF			(((pteval_t)1) << 10)
+#define ARM_VMSA_PTE_SH_NS		(((pteval_t)0) << 8)
+#define ARM_VMSA_PTE_SH_OS		(((pteval_t)2) << 8)
+#define ARM_VMSA_PTE_SH_IS		(((pteval_t)3) << 8)
+#define ARM_VMSA_PTE_NS			(((pteval_t)1) << 5)
+#define ARM_VMSA_PTE_PAGE		(((pteval_t)3) << 0)
+
+/* Stage-1 PTE */
+#define ARM_VMSA_PTE_AP_UNPRIV		(((pteval_t)1) << 6)
+#define ARM_VMSA_PTE_AP_RDONLY		(((pteval_t)2) << 6)
+#define ARM_VMSA_PTE_ATTRINDX_SHIFT	2
+#define ARM_VMSA_PTE_nG			(((pteval_t)1) << 11)
+
+/* Stage-2 PTE */
+#define ARM_VMSA_PTE_HAP_FAULT		(((pteval_t)0) << 6)
+#define ARM_VMSA_PTE_HAP_READ		(((pteval_t)1) << 6)
+#define ARM_VMSA_PTE_HAP_WRITE		(((pteval_t)2) << 6)
+#define ARM_VMSA_PTE_MEMATTR_OIWB	(((pteval_t)0xf) << 2)
+#define ARM_VMSA_PTE_MEMATTR_NC		(((pteval_t)0x5) << 2)
+#define ARM_VMSA_PTE_MEMATTR_DEV	(((pteval_t)0x1) << 2)
+
+/* -----------------------------------------------------------------------------
+ * Read/Write Access
+ */
+
+static u32 ipmmu_read(struct ipmmu_vmsa_device *mmu, unsigned int offset)
+{
+	return ioread32(mmu->base + offset);
+}
+
+static void ipmmu_write(struct ipmmu_vmsa_device *mmu, unsigned int offset,
+			u32 data)
+{
+	iowrite32(data, mmu->base + offset);
+}
+
+static u32 ipmmu_ctx_read(struct ipmmu_vmsa_domain *domain, unsigned int reg)
+{
+	return ipmmu_read(domain->mmu, domain->context_id * IM_CTX_SIZE + reg);
+}
+
+static void ipmmu_ctx_write(struct ipmmu_vmsa_domain *domain, unsigned int reg,
+			    u32 data)
+{
+	ipmmu_write(domain->mmu, domain->context_id * IM_CTX_SIZE + reg, data);
+}
+
+/* -----------------------------------------------------------------------------
+ * TLB and microTLB Management
+ */
+
+/* Wait for any pending TLB invalidations to complete */
+static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain)
+{
+	unsigned int count = 0;
+
+	while (ipmmu_ctx_read(domain, IMCTR) & IMCTR_FLUSH) {
+		cpu_relax();
+		if (++count == TLB_LOOP_TIMEOUT) {
+			dev_err_ratelimited(domain->mmu->dev,
+			"TLB sync timed out -- MMU may be deadlocked\n");
+			return;
+		}
+		udelay(1);
+	}
+}
+
+static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain)
+{
+	u32 reg;
+
+	reg = ipmmu_ctx_read(domain, IMCTR);
+	reg |= IMCTR_FLUSH;
+	ipmmu_ctx_write(domain, IMCTR, reg);
+
+	ipmmu_tlb_sync(domain);
+}
+
+/*
+ * Enable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain,
+			      const struct ipmmu_vmsa_master *master)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+	/* TODO: What should we set the ASID to ? */
+	ipmmu_write(mmu, IMUASID(master->utlb), 0);
+	/* TODO: Do we need to flush the microTLB ? */
+	ipmmu_write(mmu, IMUCTR(master->utlb),
+		    IMUCTR_TTSEL_MMU(domain->context_id) | IMUCTR_FLUSH |
+		    IMUCTR_MMUEN);
+}
+
+/*
+ * Disable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain,
+			       const struct ipmmu_vmsa_master *master)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+	ipmmu_write(mmu, IMUCTR(master->utlb), 0);
+}
+
+static void ipmmu_flush_pgtable(struct ipmmu_vmsa_device *mmu, void *addr,
+				size_t size)
+{
+	unsigned long offset = (unsigned long)addr & ~PAGE_MASK;
+
+	/*
+	 * TODO: Add support for coherent walk through CCI with DVM and remove
+	 * cache handling.
+	 */
+	dma_map_page(mmu->dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE);
+}
+
+/* -----------------------------------------------------------------------------
+ * Domain/Context Management
+ */
+
+static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain)
+{
+	phys_addr_t ttbr;
+	u32 reg;
+
+	/*
+	 * TODO: When adding support for multiple contexts, find an unused
+	 * context.
+	 */
+	domain->context_id = 0;
+
+	/* TTBR0 */
+	ipmmu_flush_pgtable(domain->mmu, domain->pgd,
+			    PTRS_PER_PGD * sizeof(*domain->pgd));
+	ttbr = __pa(domain->pgd);
+	ipmmu_ctx_write(domain, IMTTLBR0, ttbr);
+	ipmmu_ctx_write(domain, IMTTUBR0, ttbr >> 32);
+
+	/*
+	 * TTBCR
+	 * We use long descriptors with inner-shareable WBWA tables and allocate
+	 * the whole 32-bit VA space to TTBR0.
+	 */
+	ipmmu_ctx_write(domain, IMTTBCR, IMTTBCR_EAE |
+			IMTTBCR_SH0_INNER_SHAREABLE | IMTTBCR_ORGN0_WB_WA |
+			IMTTBCR_IRGN0_WB_WA | IMTTBCR_SL0_LVL_1);
+
+	/*
+	 * MAIR0
+	 * We need three attributes only, non-cacheable, write-back read/write
+	 * allocate and device memory.
+	 */
+	reg = (IMMAIR_ATTR_NC << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_NC))
+	    | (IMMAIR_ATTR_WBRWA << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_WBRWA))
+	    | (IMMAIR_ATTR_DEVICE << IMMAIR_ATTR_SHIFT(IMMAIR_ATTR_IDX_DEV));
+	ipmmu_ctx_write(domain, IMMAIR0, reg);
+
+	/* IMBUSCR */
+	ipmmu_ctx_write(domain, IMBUSCR,
+			ipmmu_ctx_read(domain, IMBUSCR) &
+			~(IMBUSCR_DVM | IMBUSCR_BUSSEL_MASK));
+
+	/*
+	 * IMSTR
+	 * Clear all interrupt flags.
+	 */
+	ipmmu_ctx_write(domain, IMSTR, ipmmu_ctx_read(domain, IMSTR));
+
+	/*
+	 * IMCTR
+	 * Enable the MMU and interrupt generation. The long-descriptor
+	 * translation table format doesn't use TEX remapping. Don't enable AF
+	 * software management as we have no use for it. Flush the TLB as
+	 * required when modifying the context registers.
+	 */
+	ipmmu_ctx_write(domain, IMCTR, IMCTR_INTEN | IMCTR_FLUSH | IMCTR_MMUEN);
+
+	return 0;
+}
+
+static void ipmmu_domain_destroy_context(struct ipmmu_vmsa_domain *domain)
+{
+	/*
+	 * Disable the context. Flush the TLB as required when modifying the
+	 * context registers.
+	 *
+	 * TODO: Is TLB flush really needed ?
+	 */
+	ipmmu_ctx_write(domain, IMCTR, IMCTR_FLUSH);
+	ipmmu_tlb_sync(domain);
+}
+
+/* -----------------------------------------------------------------------------
+ * Fault Handling
+ */
+
+static irqreturn_t ipmmu_domain_irq(struct ipmmu_vmsa_domain *domain)
+{
+	const u32 err_mask = IMSTR_MHIT | IMSTR_ABORT | IMSTR_PF | IMSTR_TF;
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+	u32 status;
+	u32 iova;
+
+	status = ipmmu_ctx_read(domain, IMSTR);
+	if (!(status & err_mask))
+		return IRQ_NONE;
+
+	iova = ipmmu_ctx_read(domain, IMEAR);
+
+	/*
+	 * Clear the error status flags. Unlike traditional interrupt flag
+	 * registers that must be cleared by writing 1, this status register
+	 * seems to require 0. The error address register must be read before,
+	 * otherwise its value will be 0.
+	 */
+	ipmmu_ctx_write(domain, IMSTR, 0);
+
+	/* Log fatal errors. */
+	if (status & IMSTR_MHIT)
+		dev_err_ratelimited(mmu->dev, "Multiple TLB hits @0x%08x\n",
+				    iova);
+	if (status & IMSTR_ABORT)
+		dev_err_ratelimited(mmu->dev, "Page Table Walk Abort @0x%08x\n",
+				    iova);
+
+	if (!(status & (IMSTR_PF | IMSTR_TF)))
+		return IRQ_NONE;
+
+	/*
+	 * Try to handle page faults and translation faults.
+	 *
+	 * TODO: We need to look up the faulty device based on the I/O VA. Use
+	 * the IOMMU device for now.
+	 */
+	if (!report_iommu_fault(domain->io_domain, mmu->dev, iova, 0))
+		return IRQ_HANDLED;
+
+	dev_err_ratelimited(mmu->dev,
+			    "Unhandled fault: status 0x%08x iova 0x%08x\n",
+			    status, iova);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ipmmu_irq(int irq, void *dev)
+{
+	struct ipmmu_vmsa_device *mmu = dev;
+	struct iommu_domain *io_domain;
+	struct ipmmu_vmsa_domain *domain;
+
+	if (!mmu->mapping)
+		return IRQ_NONE;
+
+	io_domain = mmu->mapping->domain;
+	domain = io_domain->priv;
+
+	return ipmmu_domain_irq(domain);
+}
+
+/* -----------------------------------------------------------------------------
+ * Page Table Management
+ */
+
+static void ipmmu_free_ptes(pmd_t *pmd)
+{
+	pgtable_t table = pmd_pgtable(*pmd);
+	__free_page(table);
+}
+
+static void ipmmu_free_pmds(pud_t *pud)
+{
+	pmd_t *pmd, *pmd_base = pmd_offset(pud, 0);
+	unsigned int i;
+
+	pmd = pmd_base;
+	for (i = 0; i < PTRS_PER_PMD; ++i) {
+		if (pmd_none(*pmd))
+			continue;
+
+		ipmmu_free_ptes(pmd);
+		pmd++;
+	}
+
+	pmd_free(NULL, pmd_base);
+}
+
+static void ipmmu_free_puds(pgd_t *pgd)
+{
+	pud_t *pud, *pud_base = pud_offset(pgd, 0);
+	unsigned int i;
+
+	pud = pud_base;
+	for (i = 0; i < PTRS_PER_PUD; ++i) {
+		if (pud_none(*pud))
+			continue;
+
+		ipmmu_free_pmds(pud);
+		pud++;
+	}
+
+	pud_free(NULL, pud_base);
+}
+
+static void ipmmu_free_pgtables(struct ipmmu_vmsa_domain *domain)
+{
+	pgd_t *pgd, *pgd_base = domain->pgd;
+	unsigned int i;
+
+	/*
+	 * Recursively free the page tables for this domain. We don't care about
+	 * speculative TLB filling, because the TLB will be nuked next time this
+	 * context bank is re-allocated and no devices currently map to these
+	 * tables.
+	 */
+	pgd = pgd_base;
+	for (i = 0; i < PTRS_PER_PGD; ++i) {
+		if (pgd_none(*pgd))
+			continue;
+		ipmmu_free_puds(pgd);
+		pgd++;
+	}
+
+	kfree(pgd_base);
+}
+
+/*
+ * We can't use the (pgd|pud|pmd|pte)_populate or the set_(pgd|pud|pmd|pte)
+ * functions as they would flush the CPU TLB.
+ */
+
+static int ipmmu_alloc_init_pte(struct ipmmu_vmsa_device *mmu, pmd_t *pmd,
+				unsigned long addr, unsigned long end,
+				phys_addr_t phys, int prot)
+{
+	unsigned long pfn = __phys_to_pfn(phys);
+	pteval_t pteval = ARM_VMSA_PTE_PAGE | ARM_VMSA_PTE_NS | ARM_VMSA_PTE_AF
+			| ARM_VMSA_PTE_XN;
+	pte_t *pte, *start;
+
+	if (pmd_none(*pmd)) {
+		/* Allocate a new set of tables */
+		pte = (pte_t *)get_zeroed_page(GFP_ATOMIC);
+		if (!pte)
+			return -ENOMEM;
+
+		ipmmu_flush_pgtable(mmu, pte, PAGE_SIZE);
+		*pmd = __pmd(__pa(pte) | PMD_NSTABLE | PMD_TYPE_TABLE);
+		ipmmu_flush_pgtable(mmu, pmd, sizeof(*pmd));
+
+		pte += pte_index(addr);
+	} else
+		pte = pte_offset_kernel(pmd, addr);
+
+	pteval |= ARM_VMSA_PTE_AP_UNPRIV | ARM_VMSA_PTE_nG;
+	if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
+		pteval |= ARM_VMSA_PTE_AP_RDONLY;
+
+	if (prot & IOMMU_CACHE)
+		pteval |= (IMMAIR_ATTR_IDX_WBRWA <<
+			   ARM_VMSA_PTE_ATTRINDX_SHIFT);
+
+	/* If no access, create a faulting entry to avoid TLB fills */
+	if (prot & IOMMU_EXEC)
+		pteval &= ~ARM_VMSA_PTE_XN;
+	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+		pteval &= ~ARM_VMSA_PTE_PAGE;
+
+	pteval |= ARM_VMSA_PTE_SH_IS;
+	start = pte;
+
+	/* Install the page table entries. */
+	do {
+		*pte++ = pfn_pte(pfn++, __pgprot(pteval));
+		addr += PAGE_SIZE;
+	} while (addr != end);
+
+	ipmmu_flush_pgtable(mmu, start, sizeof(*pte) * (pte - start));
+	return 0;
+}
+
+static int ipmmu_alloc_init_pmd(struct ipmmu_vmsa_device *mmu, pud_t *pud,
+				unsigned long addr, unsigned long end,
+				phys_addr_t phys, int prot)
+{
+	unsigned long next;
+	pmd_t *pmd;
+	int ret;
+
+#ifndef __PAGETABLE_PMD_FOLDED
+	if (pud_none(*pud)) {
+		pmd = (pmd_t *)get_zeroed_page(GFP_ATOMIC);
+		if (!pmd)
+			return -ENOMEM;
+
+		ipmmu_flush_pgtable(mmu, pmd, PAGE_SIZE);
+		*pud = __pud(__pa(pmd) | PMD_NSTABLE | PMD_TYPE_TABLE);
+		ipmmu_flush_pgtable(mmu, pud, sizeof(*pud));
+
+		pmd += pmd_index(addr);
+	} else
+#endif
+		pmd = pmd_offset(pud, addr);
+
+	do {
+		next = pmd_addr_end(addr, end);
+		ret = ipmmu_alloc_init_pte(mmu, pmd, addr, end, phys, prot);
+		phys += next - addr;
+	} while (pmd++, addr = next, addr < end);
+
+	return ret;
+}
+
+static int ipmmu_alloc_init_pud(struct ipmmu_vmsa_device *mmu, pgd_t *pgd,
+				unsigned long addr, unsigned long end,
+				phys_addr_t phys, int prot)
+{
+	unsigned long next;
+	pud_t *pud;
+	int ret;
+
+#ifndef __PAGETABLE_PUD_FOLDED
+	if (pgd_none(*pgd)) {
+		pud = (pud_t *)get_zeroed_page(GFP_ATOMIC);
+		if (!pud)
+			return -ENOMEM;
+
+		ipmmu_flush_pgtable(mmu, pud, PAGE_SIZE);
+		*pgd = __pgd(__pa(pud) | PMD_NSTABLE | PMD_TYPE_TABLE);
+		ipmmu_flush_pgtable(mmu, pgd, sizeof(*pgd));
+
+		pud += pud_index(addr);
+	} else
+#endif
+		pud = pud_offset(pgd, addr);
+
+	do {
+		next = pud_addr_end(addr, end);
+		ret = ipmmu_alloc_init_pmd(mmu, pud, addr, next, phys, prot);
+		phys += next - addr;
+	} while (pud++, addr = next, addr < end);
+
+	return ret;
+}
+
+static int ipmmu_handle_mapping(struct ipmmu_vmsa_domain *domain,
+				unsigned long iova, phys_addr_t paddr,
+				size_t size, int prot)
+{
+	struct ipmmu_vmsa_device *mmu = domain->mmu;
+	pgd_t *pgd = domain->pgd;
+	unsigned long flags;
+	unsigned long end;
+	int ret;
+
+	if (!pgd)
+		return -EINVAL;
+
+	if (size & ~PAGE_MASK)
+		return -EINVAL;
+
+	if (paddr & ~((1ULL << 40) - 1))
+		return -ERANGE;
+
+	spin_lock_irqsave(&domain->lock, flags);
+
+	pgd += pgd_index(iova);
+	end = iova + size;
+
+	do {
+		unsigned long next = pgd_addr_end(iova, end);
+
+		ret = ipmmu_alloc_init_pud(mmu, pgd, iova, next, paddr, prot);
+		if (ret)
+			break;
+
+		paddr += next - iova;
+		iova = next;
+	} while (pgd++, iova != end);
+
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	ipmmu_tlb_invalidate(domain);
+
+	return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * IOMMU Operations
+ */
+
+static const struct ipmmu_vmsa_master *
+ipmmu_find_master(struct ipmmu_vmsa_device *ipmmu, struct device *dev)
+{
+	const struct ipmmu_vmsa_master *master = ipmmu->pdata->masters;
+	const char *devname = dev_name(dev);
+	unsigned int i;
+
+	for (i = 0; i < ipmmu->pdata->num_masters; ++i, ++master) {
+		if (strcmp(master->name, devname) == 0)
+			return master;
+	}
+
+	return NULL;
+}
+
+static int ipmmu_domain_init(struct iommu_domain *io_domain)
+{
+	struct ipmmu_vmsa_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return -ENOMEM;
+
+	spin_lock_init(&domain->lock);
+
+	domain->pgd = kzalloc(PTRS_PER_PGD * sizeof(pgd_t), GFP_KERNEL);
+	if (!domain->pgd) {
+		kfree(domain);
+		return -ENOMEM;
+	}
+
+	io_domain->priv = domain;
+	domain->io_domain = io_domain;
+
+	return 0;
+}
+
+static void ipmmu_domain_destroy(struct iommu_domain *io_domain)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+
+	/*
+	 * Free the domain resources. We assume that all devices have already
+	 * been detached.
+	 */
+	ipmmu_domain_destroy_context(domain);
+	ipmmu_free_pgtables(domain);
+	kfree(domain);
+}
+
+static int ipmmu_attach_device(struct iommu_domain *io_domain,
+			       struct device *dev)
+{
+	struct ipmmu_vmsa_device *mmu = dev->archdata.iommu;
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	const struct ipmmu_vmsa_master *master;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!mmu) {
+		dev_err(dev, "Cannot attach to IPMMU\n");
+		return -ENXIO;
+	}
+
+	spin_lock_irqsave(&domain->lock, flags);
+
+	if (!domain->mmu) {
+		/* The domain hasn't been used yet, initialize it. */
+		domain->mmu = mmu;
+		ret = ipmmu_domain_init_context(domain);
+	} else if (domain->mmu != mmu) {
+		/*
+		 * Something is wrong, we can't attach two devices using
+		 * different IOMMUs to the same domain.
+		 */
+		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
+			dev_name(mmu->dev), dev_name(domain->mmu->dev));
+		ret = -EINVAL;
+	}
+
+	spin_unlock_irqrestore(&domain->lock, flags);
+
+	if (ret < 0)
+		return ret;
+
+	master = ipmmu_find_master(mmu, dev);
+	if (!master)
+		return -EINVAL;
+
+	ipmmu_utlb_enable(domain, master);
+
+	return 0;
+}
+
+static void ipmmu_detach_device(struct iommu_domain *io_domain,
+				struct device *dev)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	const struct ipmmu_vmsa_master *master;
+
+	master = ipmmu_find_master(domain->mmu, dev);
+	if (!master)
+		return;
+
+	ipmmu_utlb_disable(domain, master);
+
+	/*
+	 * TODO: Optimize by disabling the context when no device is attached.
+	 */
+}
+
+static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
+		     phys_addr_t paddr, size_t size, int prot)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+
+	if (!domain)
+		return -ENODEV;
+
+	return ipmmu_handle_mapping(domain, iova, paddr, size, prot);
+}
+
+static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
+			  size_t size)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	int ret;
+
+	ret = ipmmu_handle_mapping(domain, iova, 0, size, 0);
+	return ret ? 0 : size;
+}
+
+static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain,
+				      dma_addr_t iova)
+{
+	struct ipmmu_vmsa_domain *domain = io_domain->priv;
+	pgd_t pgd;
+	pud_t pud;
+	pmd_t pmd;
+	pte_t pte;
+
+	/* TODO: Is locking needed ? */
+
+	if (!domain->pgd)
+		return 0;
+
+	pgd = *(domain->pgd + pgd_index(iova));
+	if (pgd_none(pgd))
+		return 0;
+
+	pud = *pud_offset(&pgd, iova);
+	if (pud_none(pud))
+		return 0;
+
+	pmd = *pmd_offset(&pud, iova);
+	if (pmd_none(pmd))
+		return 0;
+
+	pte = *(pmd_page_vaddr(pmd) + pte_index(iova));
+	if (pte_none(pte))
+		return 0;
+
+	return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK);
+}
+
+static int ipmmu_add_device(struct device *dev)
+{
+	const struct ipmmu_vmsa_master *master = NULL;
+	struct ipmmu_vmsa_device *mmu;
+	struct iommu_group *group;
+	int ret;
+
+	if (dev->archdata.iommu) {
+		dev_warn(dev, "IOMMU driver already assigned to device %s\n",
+			 dev_name(dev));
+		return -EINVAL;
+	}
+
+	/* Find the master corresponding to the device. */
+	spin_lock(&ipmmu_devices_lock);
+
+	list_for_each_entry(mmu, &ipmmu_devices, list) {
+		master = ipmmu_find_master(mmu, dev);
+		if (master) {
+			/*
+			 * TODO Take a reference to the master to protect
+			 * against device removal.
+			 */
+			break;
+		}
+	}
+
+	spin_unlock(&ipmmu_devices_lock);
+
+	if (!master)
+		return -ENODEV;
+
+	if (!master->utlb >= mmu->num_utlbs)
+		return -EINVAL;
+
+	/* Create a device group and add the device to it. */
+	group = iommu_group_alloc();
+	if (IS_ERR(group)) {
+		dev_err(dev, "Failed to allocate IOMMU group\n");
+		return PTR_ERR(group);
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+
+	if (ret < 0) {
+		dev_err(dev, "Failed to add device to IPMMU group\n");
+		return ret;
+	}
+
+	dev->archdata.iommu = mmu;
+
+	/*
+	 * Create the ARM mapping, used by the ARM DMA mapping core to allocate
+	 * VAs. This will allocate a corresponding IOMMU domain.
+	 *
+	 * TODO:
+	 * - Create one mapping per context (TLB).
+	 * - Make the mapping size configurable ? We currently use a 2GB mapping
+	 *   at a 1GB offset to ensure that NULL VAs will fault.
+	 */
+	if (!mmu->mapping) {
+		struct dma_iommu_mapping *mapping;
+
+		mapping = arm_iommu_create_mapping(&platform_bus_type,
+							SZ_1G, SZ_2G, 0);
+		if (IS_ERR(mapping)) {
+			dev_err(mmu->dev, "failed to create ARM IOMMU mapping\n");
+			return PTR_ERR(mapping);
+		}
+
+		mmu->mapping = mapping;
+	}
+
+	/* Attach the ARM VA mapping to the device. */
+	ret = arm_iommu_attach_device(dev, mmu->mapping);
+	if (ret < 0) {
+		dev_err(dev, "Failed to attach device to VA mapping\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	dev->archdata.iommu = NULL;
+	iommu_group_remove_device(dev);
+	return ret;
+}
+
+static void ipmmu_remove_device(struct device *dev)
+{
+	arm_iommu_detach_device(dev);
+	iommu_group_remove_device(dev);
+	dev->archdata.iommu = NULL;
+}
+
+static struct iommu_ops ipmmu_ops = {
+	.domain_init = ipmmu_domain_init,
+	.domain_destroy = ipmmu_domain_destroy,
+	.attach_dev = ipmmu_attach_device,
+	.detach_dev = ipmmu_detach_device,
+	.map = ipmmu_map,
+	.unmap = ipmmu_unmap,
+	.iova_to_phys = ipmmu_iova_to_phys,
+	.add_device = ipmmu_add_device,
+	.remove_device = ipmmu_remove_device,
+	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe/remove and init
+ */
+
+static void ipmmu_device_reset(struct ipmmu_vmsa_device *mmu)
+{
+	unsigned int i;
+
+	/* Disable all contexts. */
+	for (i = 0; i < 4; ++i)
+		ipmmu_write(mmu, i * IM_CTX_SIZE + IMCTR, 0);
+}
+
+static int ipmmu_probe(struct platform_device *pdev)
+{
+	struct ipmmu_vmsa_device *mmu;
+	struct resource *res;
+	int irq;
+	int ret;
+
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -EINVAL;
+	}
+
+	mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
+	if (!mmu) {
+		dev_err(&pdev->dev, "cannot allocate device data\n");
+		return -ENOMEM;
+	}
+
+	mmu->dev = &pdev->dev;
+	mmu->pdata = pdev->dev.platform_data;
+	mmu->num_utlbs = 32;
+
+	/* Map I/O memory and request IRQ. */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	mmu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mmu->base))
+		return PTR_ERR(mmu->base);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "no IRQ found\n");
+		return irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, ipmmu_irq, 0,
+			       dev_name(&pdev->dev), mmu);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
+		return irq;
+	}
+
+	ipmmu_device_reset(mmu);
+
+	/*
+	 * We can't create the ARM mapping here as it requires the bus to have
+	 * an IOMMU, which only happens when bus_set_iommu() is called in
+	 * ipmmu_init() after the probe function returns.
+	 */
+
+	spin_lock(&ipmmu_devices_lock);
+	list_add(&mmu->list, &ipmmu_devices);
+	spin_unlock(&ipmmu_devices_lock);
+
+	platform_set_drvdata(pdev, mmu);
+
+	return 0;
+}
+
+static int ipmmu_remove(struct platform_device *pdev)
+{
+	struct ipmmu_vmsa_device *mmu = platform_get_drvdata(pdev);
+
+	spin_lock(&ipmmu_devices_lock);
+	list_del(&mmu->list);
+	spin_unlock(&ipmmu_devices_lock);
+
+	arm_iommu_release_mapping(mmu->mapping);
+
+	ipmmu_device_reset(mmu);
+
+	return 0;
+}
+
+static struct platform_driver ipmmu_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "ipmmu-vmsa",
+	},
+	.probe = ipmmu_probe,
+	.remove	= ipmmu_remove,
+};
+
+static int __init ipmmu_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&ipmmu_driver);
+	if (ret < 0)
+		return ret;
+
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &ipmmu_ops);
+
+	return 0;
+}
+
+static void __exit ipmmu_exit(void)
+{
+	return platform_driver_unregister(&ipmmu_driver);
+}
+
+subsys_initcall(ipmmu_init);
+module_exit(ipmmu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for Renesas VMSA-compatible IPMMU");
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/ipmmu-vmsa.h b/include/linux/platform_data/ipmmu-vmsa.h
new file mode 100644
index 000000000000..5275b3ac6d37
--- /dev/null
+++ b/include/linux/platform_data/ipmmu-vmsa.h
@@ -0,0 +1,24 @@
+/*
+ * IPMMU VMSA Platform Data
+ *
+ * Copyright (C) 2014 Renesas Electronics Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ */
+
+#ifndef __IPMMU_VMSA_H__
+#define __IPMMU_VMSA_H__
+
+struct ipmmu_vmsa_master {
+	const char *name;
+	unsigned int utlb;
+};
+
+struct ipmmu_vmsa_platform_data {
+	const struct ipmmu_vmsa_master *masters;
+	unsigned int num_masters;
+};
+
+#endif /* __IPMMU_VMSA_H__ */
-- 
cgit 


From 9b1ee0b2cb8bffdbb3003b1d5205f3ae0592c15a Mon Sep 17 00:00:00 2001
From: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Date: Fri, 25 Apr 2014 22:45:30 +0900
Subject: ALSA: firewire/bebob: Add a workaround for M-Audio special Firewire
 series

In post commit, a quirk of this firmware about transactions is reported.
This commit apply a workaround for this quirk.

They often fail transactions due to gap_count mismatch. This state is changed
by generating bus reset.

The fw_schedule_bus_reset() is an exported symbol in firewire-core. But there
are no header for public. This commit moves its prototype from
drivers/firewire/core.h to include/linux/firewire.h.

This mismatch still affects bus management before generating this bus reset.
It still takes a time to call driver's probe() because transactions are still
often failed.

Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/firewire/core.h      |  1 -
 include/linux/firewire.h     |  3 +++
 sound/firewire/bebob/bebob.c | 32 ++++++++++++++++++++++++++++----
 sound/firewire/bebob/bebob.h |  1 +
 4 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index c98764aeeec6..870044e82316 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -118,7 +118,6 @@ int fw_card_add(struct fw_card *card,
 		u32 max_receive, u32 link_speed, u64 guid);
 void fw_core_remove_card(struct fw_card *card);
 int fw_compute_block_crc(__be32 *block);
-void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
 /* -cdev */
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index c3683bdf28fe..d4b7683c722d 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -367,6 +367,9 @@ static inline int fw_stream_packet_destination_id(int tag, int channel, int sy)
 	return tag << 14 | channel << 8 | sy;
 }
 
+void fw_schedule_bus_reset(struct fw_card *card, bool delayed,
+			   bool short_reset);
+
 struct fw_descriptor {
 	struct list_head link;
 	size_t length;
diff --git a/sound/firewire/bebob/bebob.c b/sound/firewire/bebob/bebob.c
index e1dd4219ea6c..31b96b7264cf 100644
--- a/sound/firewire/bebob/bebob.c
+++ b/sound/firewire/bebob/bebob.c
@@ -247,10 +247,26 @@ bebob_probe(struct fw_unit *unit,
 	if (err < 0)
 		goto error;
 
-	err = snd_card_register(card);
-	if (err < 0) {
-		snd_bebob_stream_destroy_duplex(bebob);
-		goto error;
+	if (!bebob->maudio_special_quirk) {
+		err = snd_card_register(card);
+		if (err < 0) {
+			snd_bebob_stream_destroy_duplex(bebob);
+			goto error;
+		}
+	} else {
+		/*
+		 * This is a workaround. This bus reset seems to have an effect
+		 * to make devices correctly handling transactions. Without
+		 * this, the devices have gap_count mismatch. This causes much
+		 * failure of transaction.
+		 *
+		 * Just after registration, user-land application receive
+		 * signals from dbus and starts I/Os. To avoid I/Os till the
+		 * future bus reset, registration is done in next update().
+		 */
+		bebob->deferred_registration = true;
+		fw_schedule_bus_reset(fw_parent_device(bebob->unit)->card,
+				      false, true);
 	}
 
 	dev_set_drvdata(&unit->device, bebob);
@@ -273,6 +289,14 @@ bebob_update(struct fw_unit *unit)
 
 	fcp_bus_reset(bebob->unit);
 	snd_bebob_stream_update_duplex(bebob);
+
+	if (bebob->deferred_registration) {
+		if (snd_card_register(bebob->card) < 0) {
+			snd_bebob_stream_destroy_duplex(bebob);
+			snd_card_free(bebob->card);
+		}
+		bebob->deferred_registration = false;
+	}
 }
 
 static void bebob_remove(struct fw_unit *unit)
diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h
index 4a54e746c5c6..91b26b0c649a 100644
--- a/sound/firewire/bebob/bebob.h
+++ b/sound/firewire/bebob/bebob.h
@@ -109,6 +109,7 @@ struct snd_bebob {
 
 	/* for M-Audio special devices */
 	void *maudio_special_quirk;
+	bool deferred_registration;
 };
 
 static inline int
-- 
cgit 


From 45fef5b88d1f2f47ecdefae6354372d440ca5c84 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Thu, 22 May 2014 12:47:47 +0200
Subject: ACPI: add dynamic_debug support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 1a699476e258 ("ACPI / hotplug / PCI: Hotplug notifications
from acpi_bus_notify()") added debug messages for a few common
events. These debug messages are unconditionally enabled if
CONFIG_DYNAMIC_DEBUG is defined, contrary to the documented
meaning, making the ACPI system spew lots of unwanted noise on
any kernel with dynamic debugging.

The bug was introduced by commit fbfddae69657 ("ACPI: Add
acpi_handle_<level>() interfaces"), which added the
CONFIG_DYNAMIC_DEBUG dependency without respecting its meaning.

Fix by adding real support for dynamic_debug.

Fixes: fbfddae69657 ("ACPI: Add acpi_handle_<level>() interfaces")
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/utils.c | 64 ++++++++++++++++++++++++++++++++++++++++++----------
 include/linux/acpi.h | 22 ++++++++++++++++--
 2 files changed, 72 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index bba526148583..07c8c5a5ee95 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -30,6 +30,7 @@
 #include <linux/types.h>
 #include <linux/hardirq.h>
 #include <linux/acpi.h>
+#include <linux/dynamic_debug.h>
 
 #include "internal.h"
 
@@ -456,6 +457,24 @@ acpi_evaluate_ost(acpi_handle handle, u32 source_event, u32 status_code,
 }
 EXPORT_SYMBOL(acpi_evaluate_ost);
 
+/**
+ * acpi_handle_path: Return the object path of handle
+ *
+ * Caller must free the returned buffer
+ */
+static char *acpi_handle_path(acpi_handle handle)
+{
+	struct acpi_buffer buffer = {
+		.length = ACPI_ALLOCATE_BUFFER,
+		.pointer = NULL
+	};
+
+	if (in_interrupt() ||
+	    acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer) != AE_OK)
+		return NULL;
+	return buffer.pointer;
+}
+
 /**
  * acpi_handle_printk: Print message with ACPI prefix and object path
  *
@@ -469,29 +488,50 @@ acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	struct acpi_buffer buffer = {
-		.length = ACPI_ALLOCATE_BUFFER,
-		.pointer = NULL
-	};
 	const char *path;
 
 	va_start(args, fmt);
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	if (in_interrupt() ||
-	    acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer) != AE_OK)
-		path = "<n/a>";
-	else
-		path = buffer.pointer;
-
-	printk("%sACPI: %s: %pV", level, path, &vaf);
+	path = acpi_handle_path(handle);
+	printk("%sACPI: %s: %pV", level, path ? path : "<n/a>" , &vaf);
 
 	va_end(args);
-	kfree(buffer.pointer);
+	kfree(path);
 }
 EXPORT_SYMBOL(acpi_handle_printk);
 
+#if defined(CONFIG_DYNAMIC_DEBUG)
+/**
+ * __acpi_handle_debug: pr_debug with ACPI prefix and object path
+ *
+ * This function is called through acpi_handle_debug macro and debug
+ * prints a message with ACPI prefix and object path. This function
+ * acquires the global namespace mutex to obtain an object path.  In
+ * interrupt context, it shows the object path as <n/a>.
+ */
+void
+__acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle,
+		    const char *fmt, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	const char *path;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	path = acpi_handle_path(handle);
+	__dynamic_pr_debug(descriptor, "ACPI: %s: %pV", path ? path : "<n/a>", &vaf);
+
+	va_end(args);
+	kfree(path);
+}
+EXPORT_SYMBOL(__acpi_handle_debug);
+#endif
+
 /**
  * acpi_has_method: Check whether @handle has a method named @name
  * @handle: ACPI device handle
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7a8f2cd66c8b..0e2569031a6f 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -37,6 +37,7 @@
 
 #include <linux/list.h>
 #include <linux/mod_devicetable.h>
+#include <linux/dynamic_debug.h>
 
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
@@ -589,6 +590,14 @@ static inline __printf(3, 4) void
 acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {}
 #endif	/* !CONFIG_ACPI */
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_DYNAMIC_DEBUG)
+__printf(3, 4)
+void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const char *fmt, ...);
+#else
+#define __acpi_handle_debug(descriptor, handle, fmt, ...)		\
+	acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__);
+#endif
+
 /*
  * acpi_handle_<level>: Print message with ACPI prefix and object path
  *
@@ -610,11 +619,19 @@ acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {}
 #define acpi_handle_info(handle, fmt, ...)				\
 	acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__)
 
-/* REVISIT: Support CONFIG_DYNAMIC_DEBUG when necessary */
-#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(DEBUG)
 #define acpi_handle_debug(handle, fmt, ...)				\
 	acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__)
 #else
+#if defined(CONFIG_DYNAMIC_DEBUG)
+#define acpi_handle_debug(handle, fmt, ...)				\
+do {									\
+	DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt);			\
+	if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT))		\
+		__acpi_handle_debug(&descriptor, handle, pr_fmt(fmt),	\
+				##__VA_ARGS__);				\
+} while (0)
+#else
 #define acpi_handle_debug(handle, fmt, ...)				\
 ({									\
 	if (0)								\
@@ -622,5 +639,6 @@ acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {}
 	0;								\
 })
 #endif
+#endif
 
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit 


From 6c46ccc8bb0660c1805f6662d4646eb5405dcb2d Mon Sep 17 00:00:00 2001
From: Alban Bedel <alban.bedel@avionic-design.de>
Date: Tue, 20 May 2014 12:14:03 +0200
Subject: regulator: tps6586x: Add support for the TPS658640

The TPS658640 has a different set of output voltage for most LDO and
the RTC LDO isn't settable. This chip also report 2 different version
ID, as the datasheet doesn't list the possible values the second ID
has simply been named TPS658640v2.

Signed-off-by: Alban Bedel <alban.bedel@avionic-design.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/mfd/tps6586x.c                 |  4 ++++
 drivers/regulator/tps6586x-regulator.c | 36 ++++++++++++++++++++++++++++++++++
 include/linux/mfd/tps6586x.h           |  2 ++
 3 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps6586x.c b/drivers/mfd/tps6586x.c
index bbd54414a75d..835e5549ecdd 100644
--- a/drivers/mfd/tps6586x.c
+++ b/drivers/mfd/tps6586x.c
@@ -495,6 +495,10 @@ static void tps6586x_print_version(struct i2c_client *client, int version)
 	case TPS658623:
 		name = "TPS658623";
 		break;
+	case TPS658640:
+	case TPS658640v2:
+		name = "TPS658640";
+		break;
 	case TPS658643:
 		name = "TPS658643";
 		break;
diff --git a/drivers/regulator/tps6586x-regulator.c b/drivers/regulator/tps6586x-regulator.c
index da8ee0217573..e045b7fe5572 100644
--- a/drivers/regulator/tps6586x-regulator.c
+++ b/drivers/regulator/tps6586x-regulator.c
@@ -116,6 +116,13 @@ static const unsigned int tps6586x_sm2_voltages[] = {
 	4200000, 4250000, 4300000, 4350000, 4400000, 4450000, 4500000, 4550000,
 };
 
+static int tps658640_sm2_voltages[] = {
+	2150000, 2200000, 2250000, 2300000, 2350000, 2400000, 2450000, 2500000,
+	2550000, 2600000, 2650000, 2700000, 2750000, 2800000, 2850000, 2900000,
+	2950000, 3000000, 3050000, 3100000, 3150000, 3200000, 3250000, 3300000,
+	3350000, 3400000, 3450000, 3500000, 3550000, 3600000, 3650000, 3700000,
+};
+
 static const unsigned int tps658643_sm2_voltages[] = {
 	1025000, 1050000, 1075000, 1100000, 1125000, 1150000, 1175000, 1200000,
 	1225000, 1250000, 1275000, 1300000, 1325000, 1350000, 1375000, 1400000,
@@ -130,6 +137,10 @@ static const unsigned int tps6586x_dvm_voltages[] = {
 	1325000, 1350000, 1375000, 1400000, 1425000, 1450000, 1475000, 1500000,
 };
 
+static int tps658640_rtc_voltages[] = {
+	2500000, 2850000, 3100000, 3300000,
+};
+
 #define TPS6586X_REGULATOR(_id, _ops, _pin_name, vdata, vreg, shift, nbits, \
 			   ereg0, ebit0, ereg1, ebit1, goreg, gobit)	\
 	.desc	= {							\
@@ -224,6 +235,26 @@ static struct tps6586x_regulator tps658623_regulator[] = {
 					END, 7),
 };
 
+static struct tps6586x_regulator tps658640_regulator[] = {
+	TPS6586X_LDO(LDO_3, "vinldo23", tps6586x_ldo0, SUPPLYV4, 0, 3,
+					ENC, 2, END, 2),
+	TPS6586X_LDO(LDO_5, "REG-SYS", tps6586x_ldo0, SUPPLYV6, 0, 3,
+					ENE, 6, ENE, 6),
+	TPS6586X_LDO(LDO_6, "vinldo678", tps6586x_ldo0, SUPPLYV3, 0, 3,
+					ENC, 4, END, 4),
+	TPS6586X_LDO(LDO_7, "vinldo678", tps6586x_ldo0, SUPPLYV3, 3, 3,
+					ENC, 5, END, 5),
+	TPS6586X_LDO(LDO_8, "vinldo678", tps6586x_ldo0, SUPPLYV2, 5, 3,
+					ENC, 6, END, 6),
+	TPS6586X_LDO(LDO_9, "vinldo9", tps6586x_ldo0, SUPPLYV6, 3, 3,
+					ENE, 7, ENE, 7),
+	TPS6586X_LDO(SM_2, "vin-sm2", tps658640_sm2, SUPPLYV2, 0, 5,
+					ENC, 7, END, 7),
+
+	TPS6586X_FIXED_LDO(LDO_RTC, "REG-SYS", tps658640_rtc, SUPPLYV4, 3, 2,
+					V4, 7, V4, 7),
+};
+
 static struct tps6586x_regulator tps658643_regulator[] = {
 	TPS6586X_LDO(SM_2, "vin-sm2", tps658643_sm2, SUPPLYV2, 0, 5, ENC, 7,
 					END, 7),
@@ -312,6 +343,11 @@ static struct tps6586x_regulator *find_regulator_info(int id, int version)
 		table = tps658623_regulator;
 		num = ARRAY_SIZE(tps658623_regulator);
 		break;
+	case TPS658640:
+	case TPS658640v2:
+		table = tps658640_regulator;
+		num = ARRAY_SIZE(tps658640_regulator);
+		break;
 	case TPS658643:
 		table = tps658643_regulator;
 		num = ARRAY_SIZE(tps658643_regulator);
diff --git a/include/linux/mfd/tps6586x.h b/include/linux/mfd/tps6586x.h
index cbecec2e353a..96187ed9f9bb 100644
--- a/include/linux/mfd/tps6586x.h
+++ b/include/linux/mfd/tps6586x.h
@@ -17,6 +17,8 @@
 #define TPS658621A	0x15
 #define TPS658621CD	0x2c
 #define TPS658623	0x1b
+#define TPS658640	0x01
+#define TPS658640v2	0x02
 #define TPS658643	0x03
 
 enum {
-- 
cgit 


From 3cc6919bd61315ea60baf95f3f9868aacfd1ace4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 21 May 2014 15:39:54 +0200
Subject: backlight: Add backlight device (un)registration notification

Some firmware drivers, ie acpi-video want to get themselves out of the
way (in some cases) when their also is a raw backlight device available.

Due to module loading ordering being unknown, acpi-video cannot be certain
that the backlight_device_registered(BACKLIGHT_RAW) it does for this is
the final verdict wrt there being a BACKLIGHT_RAW device.

By adding notification acpi-video can listen for backlight devices showing
up after it has loaded, and unregister its backlight device if desired.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/video/backlight/backlight.c | 40 +++++++++++++++++++++++++++++++++++++
 include/linux/backlight.h           |  7 +++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index bd2172c2d650..428089009cd5 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -23,6 +23,7 @@
 
 static struct list_head backlight_dev_list;
 static struct mutex backlight_dev_list_mutex;
+static struct blocking_notifier_head backlight_notifier;
 
 static const char *const backlight_types[] = {
 	[BACKLIGHT_RAW] = "raw",
@@ -370,6 +371,9 @@ struct backlight_device *backlight_device_register(const char *name,
 	list_add(&new_bd->entry, &backlight_dev_list);
 	mutex_unlock(&backlight_dev_list_mutex);
 
+	blocking_notifier_call_chain(&backlight_notifier,
+				     BACKLIGHT_REGISTERED, new_bd);
+
 	return new_bd;
 }
 EXPORT_SYMBOL(backlight_device_register);
@@ -413,6 +417,10 @@ void backlight_device_unregister(struct backlight_device *bd)
 		pmac_backlight = NULL;
 	mutex_unlock(&pmac_backlight_mutex);
 #endif
+
+	blocking_notifier_call_chain(&backlight_notifier,
+				     BACKLIGHT_UNREGISTERED, bd);
+
 	mutex_lock(&bd->ops_lock);
 	bd->ops = NULL;
 	mutex_unlock(&bd->ops_lock);
@@ -437,6 +445,36 @@ static int devm_backlight_device_match(struct device *dev, void *res,
 	return *r == data;
 }
 
+/**
+ * backlight_register_notifier - get notified of backlight (un)registration
+ * @nb: notifier block with the notifier to call on backlight (un)registration
+ *
+ * @return 0 on success, otherwise a negative error code
+ *
+ * Register a notifier to get notified when backlight devices get registered
+ * or unregistered.
+ */
+int backlight_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&backlight_notifier, nb);
+}
+EXPORT_SYMBOL(backlight_register_notifier);
+
+/**
+ * backlight_unregister_notifier - unregister a backlight notifier
+ * @nb: notifier block to unregister
+ *
+ * @return 0 on success, otherwise a negative error code
+ *
+ * Register a notifier to get notified when backlight devices get registered
+ * or unregistered.
+ */
+int backlight_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&backlight_notifier, nb);
+}
+EXPORT_SYMBOL(backlight_unregister_notifier);
+
 /**
  * devm_backlight_device_register - resource managed backlight_device_register()
  * @dev: the device to register
@@ -544,6 +582,8 @@ static int __init backlight_class_init(void)
 	backlight_class->pm = &backlight_class_dev_pm_ops;
 	INIT_LIST_HEAD(&backlight_dev_list);
 	mutex_init(&backlight_dev_list_mutex);
+	BLOCKING_INIT_NOTIFIER_HEAD(&backlight_notifier);
+
 	return 0;
 }
 
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 72647429adf6..adb14a8616df 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -40,6 +40,11 @@ enum backlight_type {
 	BACKLIGHT_TYPE_MAX,
 };
 
+enum backlight_notification {
+	BACKLIGHT_REGISTERED,
+	BACKLIGHT_UNREGISTERED,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -133,6 +138,8 @@ extern void devm_backlight_device_unregister(struct device *dev,
 extern void backlight_force_update(struct backlight_device *bd,
 				   enum backlight_update_reason reason);
 extern bool backlight_device_registered(enum backlight_type type);
+extern int backlight_register_notifier(struct notifier_block *nb);
+extern int backlight_unregister_notifier(struct notifier_block *nb);
 
 #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
 
-- 
cgit 


From 61dc1abae64854c7cef543598b9e6f04886c4ebd Mon Sep 17 00:00:00 2001
From: Nick Dyer <nick.dyer@itdev.co.uk>
Date: Sun, 18 May 2014 23:16:49 -0700
Subject: Input: atmel_mxt_ts - read screen config from chip

By reading the touchscreen configuration from the settings that the
maXTouch chip is actually using, we can remove some platform data.

The matrix size is not used for anything, and results in some rather
confusing code to re-read it because it may change when configuration
is downloaded, so don't print it out.

Signed-off-by: Nick Dyer <nick.dyer@itdev.co.uk>
Acked-by: Benson Leung <bleung@chromium.org>
Acked-by: Yufeng Shen <miletus@chromium.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 arch/arm/mach-s5pv210/mach-goni.c         |   3 -
 drivers/input/touchscreen/atmel_mxt_ts.c  | 136 ++++++++++++++----------------
 drivers/platform/chrome/chromeos_laptop.c |   6 --
 include/linux/i2c/atmel_mxt_ts.h          |  15 ----
 4 files changed, 65 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s5pv210/mach-goni.c b/arch/arm/mach-s5pv210/mach-goni.c
index e549ecf0e5dc..bb9354f45e27 100644
--- a/arch/arm/mach-s5pv210/mach-goni.c
+++ b/arch/arm/mach-s5pv210/mach-goni.c
@@ -239,9 +239,6 @@ static void __init goni_radio_init(void)
 
 /* TSP */
 static struct mxt_platform_data qt602240_platform_data = {
-	.x_size		= 800,
-	.y_size		= 480,
-	.orient		= MXT_DIAGONAL,
 	.irqflags	= IRQF_TRIGGER_FALLING,
 };
 
diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c
index 7a9197a19f67..75493ca8e784 100644
--- a/drivers/input/touchscreen/atmel_mxt_ts.c
+++ b/drivers/input/touchscreen/atmel_mxt_ts.c
@@ -100,33 +100,16 @@
 
 /* MXT_TOUCH_MULTI_T9 field */
 #define MXT_TOUCH_CTRL		0
-#define MXT_TOUCH_XORIGIN	1
-#define MXT_TOUCH_YORIGIN	2
-#define MXT_TOUCH_XSIZE		3
-#define MXT_TOUCH_YSIZE		4
-#define MXT_TOUCH_BLEN		6
-#define MXT_TOUCH_TCHTHR	7
-#define MXT_TOUCH_TCHDI		8
-#define MXT_TOUCH_ORIENT	9
-#define MXT_TOUCH_MOVHYSTI	11
-#define MXT_TOUCH_MOVHYSTN	12
-#define MXT_TOUCH_NUMTOUCH	14
-#define MXT_TOUCH_MRGHYST	15
-#define MXT_TOUCH_MRGTHR	16
-#define MXT_TOUCH_AMPHYST	17
-#define MXT_TOUCH_XRANGE_LSB	18
-#define MXT_TOUCH_XRANGE_MSB	19
-#define MXT_TOUCH_YRANGE_LSB	20
-#define MXT_TOUCH_YRANGE_MSB	21
-#define MXT_TOUCH_XLOCLIP	22
-#define MXT_TOUCH_XHICLIP	23
-#define MXT_TOUCH_YLOCLIP	24
-#define MXT_TOUCH_YHICLIP	25
-#define MXT_TOUCH_XEDGECTRL	26
-#define MXT_TOUCH_XEDGEDIST	27
-#define MXT_TOUCH_YEDGECTRL	28
-#define MXT_TOUCH_YEDGEDIST	29
-#define MXT_TOUCH_JUMPLIMIT	30
+#define MXT_T9_ORIENT		9
+#define MXT_T9_RANGE		18
+
+struct t9_range {
+	u16 x;
+	u16 y;
+} __packed;
+
+/* Touch orient bits */
+#define MXT_XY_SWITCH		(1 << 0)
 
 /* MXT_PROCI_GRIPFACE_T20 field */
 #define MXT_GRIPFACE_CTRL	0
@@ -211,11 +194,6 @@
 #define MXT_PRESS		(1 << 6)
 #define MXT_DETECT		(1 << 7)
 
-/* Touch orient bits */
-#define MXT_XY_SWITCH		(1 << 0)
-#define MXT_X_INVERT		(1 << 1)
-#define MXT_Y_INVERT		(1 << 2)
-
 /* Touchscreen absolute values */
 #define MXT_MAX_AREA		0xff
 
@@ -580,11 +558,6 @@ static int __mxt_read_reg(struct i2c_client *client,
 	return ret;
 }
 
-static int mxt_read_reg(struct i2c_client *client, u16 reg, u8 *val)
-{
-	return __mxt_read_reg(client, reg, 1, val);
-}
-
 static int __mxt_write_reg(struct i2c_client *client, u16 reg, u16 len,
 			   const void *val)
 {
@@ -1029,12 +1002,59 @@ static void mxt_free_object_table(struct mxt_data *data)
 	data->T19_reportid = 0;
 }
 
+static int mxt_read_t9_resolution(struct mxt_data *data)
+{
+	struct i2c_client *client = data->client;
+	int error;
+	struct t9_range range;
+	unsigned char orient;
+	struct mxt_object *object;
+
+	object = mxt_get_object(data, MXT_TOUCH_MULTI_T9);
+	if (!object)
+		return -EINVAL;
+
+	error = __mxt_read_reg(client,
+			       object->start_address + MXT_T9_RANGE,
+			       sizeof(range), &range);
+	if (error)
+		return error;
+
+	le16_to_cpus(&range.x);
+	le16_to_cpus(&range.y);
+
+	error =  __mxt_read_reg(client,
+				object->start_address + MXT_T9_ORIENT,
+				1, &orient);
+	if (error)
+		return error;
+
+	/* Handle default values */
+	if (range.x == 0)
+		range.x = 1023;
+
+	if (range.y == 0)
+		range.y = 1023;
+
+	if (orient & MXT_XY_SWITCH) {
+		data->max_x = range.y;
+		data->max_y = range.x;
+	} else {
+		data->max_x = range.x;
+		data->max_y = range.y;
+	}
+
+	dev_dbg(&client->dev,
+		"Touchscreen size X%uY%u\n", data->max_x, data->max_y);
+
+	return 0;
+}
+
 static int mxt_initialize(struct mxt_data *data)
 {
 	struct i2c_client *client = data->client;
 	struct mxt_info *info = &data->info;
 	int error;
-	u8 val;
 
 	error = mxt_get_info(data);
 	if (error)
@@ -1063,26 +1083,16 @@ static int mxt_initialize(struct mxt_data *data)
 		goto err_free_object_table;
 	}
 
-	/* Update matrix size at info struct */
-	error = mxt_read_reg(client, MXT_MATRIX_X_SIZE, &val);
-	if (error)
-		goto err_free_object_table;
-	info->matrix_xsize = val;
-
-	error = mxt_read_reg(client, MXT_MATRIX_Y_SIZE, &val);
-	if (error)
+	error = mxt_read_t9_resolution(data);
+	if (error) {
+		dev_err(&client->dev, "Failed to initialize T9 resolution\n");
 		goto err_free_object_table;
-	info->matrix_ysize = val;
-
-	dev_info(&client->dev,
-			"Family: %u Variant: %u Firmware V%u.%u.%02X\n",
-			info->family_id, info->variant_id, info->version >> 4,
-			info->version & 0xf, info->build);
+	}
 
 	dev_info(&client->dev,
-			"Matrix X Size: %u Matrix Y Size: %u Objects: %u\n",
-			info->matrix_xsize, info->matrix_ysize,
-			info->object_num);
+		 "Family: %u Variant: %u Firmware V%u.%u.%02X Objects: %u\n",
+		 info->family_id, info->variant_id, info->version >> 4,
+		 info->version & 0xf, info->build, info->object_num);
 
 	return 0;
 
@@ -1091,20 +1101,6 @@ err_free_object_table:
 	return error;
 }
 
-static void mxt_calc_resolution(struct mxt_data *data)
-{
-	unsigned int max_x = data->pdata->x_size - 1;
-	unsigned int max_y = data->pdata->y_size - 1;
-
-	if (data->pdata->orient & MXT_XY_SWITCH) {
-		data->max_x = max_y;
-		data->max_y = max_x;
-	} else {
-		data->max_x = max_x;
-		data->max_y = max_y;
-	}
-}
-
 /* Firmware Version is returned as Major.Minor.Build */
 static ssize_t mxt_fw_version_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
@@ -1430,8 +1426,6 @@ static int mxt_probe(struct i2c_client *client,
 	init_completion(&data->reset_completion);
 	init_completion(&data->crc_completion);
 
-	mxt_calc_resolution(data);
-
 	error = mxt_initialize(data);
 	if (error)
 		goto err_free_mem;
diff --git a/drivers/platform/chrome/chromeos_laptop.c b/drivers/platform/chrome/chromeos_laptop.c
index 8b7523ab62e5..7f1a2e2711bd 100644
--- a/drivers/platform/chrome/chromeos_laptop.c
+++ b/drivers/platform/chrome/chromeos_laptop.c
@@ -94,9 +94,6 @@ static int mxt_t19_keys[] = {
 };
 
 static struct mxt_platform_data atmel_224s_tp_platform_data = {
-	.x_size			= 102*20,
-	.y_size			= 68*20,
-	.orient			= MXT_VERTICAL_FLIP,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.t19_num_keys		= ARRAY_SIZE(mxt_t19_keys),
 	.t19_keymap		= mxt_t19_keys,
@@ -111,9 +108,6 @@ static struct i2c_board_info atmel_224s_tp_device = {
 };
 
 static struct mxt_platform_data atmel_1664s_platform_data = {
-	.x_size			= 1700,
-	.y_size			= 2560,
-	.orient			= MXT_ROTATED_90_COUNTER,
 	.irqflags		= IRQF_TRIGGER_FALLING,
 	.config			= NULL,
 	.config_length		= 0,
diff --git a/include/linux/i2c/atmel_mxt_ts.h b/include/linux/i2c/atmel_mxt_ts.h
index 9f92135b6620..3891dc1de21c 100644
--- a/include/linux/i2c/atmel_mxt_ts.h
+++ b/include/linux/i2c/atmel_mxt_ts.h
@@ -15,26 +15,11 @@
 
 #include <linux/types.h>
 
-/* Orient */
-#define MXT_NORMAL		0x0
-#define MXT_DIAGONAL		0x1
-#define MXT_HORIZONTAL_FLIP	0x2
-#define MXT_ROTATED_90_COUNTER	0x3
-#define MXT_VERTICAL_FLIP	0x4
-#define MXT_ROTATED_90		0x5
-#define MXT_ROTATED_180		0x6
-#define MXT_DIAGONAL_COUNTER	0x7
-
 /* The platform data for the Atmel maXTouch touchscreen driver */
 struct mxt_platform_data {
 	const u8 *config;
 	size_t config_length;
 	u32 config_crc;
-
-	unsigned int x_size;
-	unsigned int y_size;
-	unsigned char orient;
-
 	unsigned long irqflags;
 	u8 t19_num_keys;
 	const unsigned int *t19_keymap;
-- 
cgit 


From 3ebe7f9f7e4a4fd1f6461ecd01ff2961317a483a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 2 May 2014 10:40:42 -0600
Subject: PCI: Notify driver before and after device reset

Notify a PCI device driver when its device's access is about to be disabled
for an impending reset attempt, then after the attempt completes and device
access is restored.  The notification is via the pci_error_handlers
interface.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c   | 21 +++++++++++++++++++++
 include/linux/pci.h |  3 +++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7325d43bf030..43d87b26ec5b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -3305,8 +3305,27 @@ static void pci_dev_unlock(struct pci_dev *dev)
 	pci_cfg_access_unlock(dev);
 }
 
+/**
+ * pci_reset_notify - notify device driver of reset
+ * @dev: device to be notified of reset
+ * @prepare: 'true' if device is about to be reset; 'false' if reset attempt
+ *           completed
+ *
+ * Must be called prior to device access being disabled and after device
+ * access is restored.
+ */
+static void pci_reset_notify(struct pci_dev *dev, bool prepare)
+{
+	const struct pci_error_handlers *err_handler =
+			dev->driver ? dev->driver->err_handler : NULL;
+	if (err_handler && err_handler->reset_notify)
+		err_handler->reset_notify(dev, prepare);
+}
+
 static void pci_dev_save_and_disable(struct pci_dev *dev)
 {
+	pci_reset_notify(dev, true);
+
 	/*
 	 * Wake-up device prior to save.  PM registers default to D0 after
 	 * reset and a simple register restore doesn't reliably return
@@ -3328,6 +3347,7 @@ static void pci_dev_save_and_disable(struct pci_dev *dev)
 static void pci_dev_restore(struct pci_dev *dev)
 {
 	pci_restore_state(dev);
+	pci_reset_notify(dev, false);
 }
 
 static int pci_dev_reset(struct pci_dev *dev, int probe)
@@ -3344,6 +3364,7 @@ static int pci_dev_reset(struct pci_dev *dev, int probe)
 
 	return rc;
 }
+
 /**
  * __pci_reset_function - reset a PCI device function
  * @dev: PCI device to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..31c43093e538 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -603,6 +603,9 @@ struct pci_error_handlers {
 	/* PCI slot has been reset */
 	pci_ers_result_t (*slot_reset)(struct pci_dev *dev);
 
+	/* PCI function reset prepare or completed */
+	void (*reset_notify)(struct pci_dev *dev, bool prepare);
+
 	/* Device driver may resume normal operations */
 	void (*resume)(struct pci_dev *dev);
 };
-- 
cgit 


From f14bbe77a96bb979dc539d8308ee18a9363a544f Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 27 May 2014 12:06:53 -0600
Subject: blk-mq: pass in suggested NUMA node to ->alloc_hctx()

Drivers currently have to figure this out on their own, and they
are missing information to do it properly. The ones that did
attempt to do it, do it wrong.

So just pass in the suggested node directly to the alloc
function.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-cpumap.c    | 16 ++++++++++++++++
 block/blk-mq.c           | 26 +++++++++++++++-----------
 block/blk-mq.h           |  1 +
 drivers/block/null_blk.c | 35 +++--------------------------------
 include/linux/blk-mq.h   |  4 ++--
 5 files changed, 37 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 5d0f93cf358c..0daacb927be1 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -96,3 +96,19 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
 	kfree(map);
 	return NULL;
 }
+
+/*
+ * We have no quick way of doing reverse lookups. This is only used at
+ * queue init time, so runtime isn't important.
+ */
+int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (index == mq_map[i])
+			return cpu_to_node(i);
+	}
+
+	return NUMA_NO_NODE;
+}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e8b5f74dc1a1..30bad930e661 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1297,10 +1297,10 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
 EXPORT_SYMBOL(blk_mq_map_queue);
 
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set,
-						   unsigned int hctx_index)
+						   unsigned int hctx_index,
+						   int node)
 {
-	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
-				set->numa_node);
+	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node);
 }
 EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
 
@@ -1752,6 +1752,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	struct blk_mq_hw_ctx **hctxs;
 	struct blk_mq_ctx *ctx;
 	struct request_queue *q;
+	unsigned int *map;
 	int i;
 
 	ctx = alloc_percpu(struct blk_mq_ctx);
@@ -1764,8 +1765,14 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!hctxs)
 		goto err_percpu;
 
+	map = blk_mq_make_queue_map(set);
+	if (!map)
+		goto err_map;
+
 	for (i = 0; i < set->nr_hw_queues; i++) {
-		hctxs[i] = set->ops->alloc_hctx(set, i);
+		int node = blk_mq_hw_queue_to_node(map, i);
+
+		hctxs[i] = set->ops->alloc_hctx(set, i, node);
 		if (!hctxs[i])
 			goto err_hctxs;
 
@@ -1773,7 +1780,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 			goto err_hctxs;
 
 		atomic_set(&hctxs[i]->nr_active, 0);
-		hctxs[i]->numa_node = NUMA_NO_NODE;
+		hctxs[i]->numa_node = node;
 		hctxs[i]->queue_num = i;
 	}
 
@@ -1784,15 +1791,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (percpu_counter_init(&q->mq_usage_counter, 0))
 		goto err_map;
 
-	q->mq_map = blk_mq_make_queue_map(set);
-	if (!q->mq_map)
-		goto err_map;
-
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
 	blk_queue_rq_timeout(q, 30000);
 
 	q->nr_queues = nr_cpu_ids;
 	q->nr_hw_queues = set->nr_hw_queues;
+	q->mq_map = map;
 
 	q->queue_ctx = ctx;
 	q->queue_hw_ctx = hctxs;
@@ -1844,16 +1848,16 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 err_flush_rq:
 	kfree(q->flush_rq);
 err_hw:
-	kfree(q->mq_map);
-err_map:
 	blk_cleanup_queue(q);
 err_hctxs:
+	kfree(map);
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		if (!hctxs[i])
 			break;
 		free_cpumask_var(hctxs[i]->cpumask);
 		set->ops->free_hctx(hctxs[i], i);
 	}
+err_map:
 	kfree(hctxs);
 err_percpu:
 	free_percpu(ctx);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 491dbd4e93f5..ff5e6bf0f691 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -52,6 +52,7 @@ void blk_mq_disable_hotplug(void);
  */
 extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
 extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
+extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
 
 /*
  * Basic implementation of sparser bitmap, allowing the user to spread
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 8e7e3a0b0d24..4d33c8c25fbf 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -322,39 +322,10 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
 }
 
 static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_tag_set *set,
-		unsigned int hctx_index)
+					     unsigned int hctx_index,
+					     int node)
 {
-	int b_size = DIV_ROUND_UP(set->nr_hw_queues, nr_online_nodes);
-	int tip = (set->nr_hw_queues % nr_online_nodes);
-	int node = 0, i, n;
-
-	/*
-	 * Split submit queues evenly wrt to the number of nodes. If uneven,
-	 * fill the first buckets with one extra, until the rest is filled with
-	 * no extra.
-	 */
-	for (i = 0, n = 1; i < hctx_index; i++, n++) {
-		if (n % b_size == 0) {
-			n = 0;
-			node++;
-
-			tip--;
-			if (!tip)
-				b_size = set->nr_hw_queues / nr_online_nodes;
-		}
-	}
-
-	/*
-	 * A node might not be online, therefore map the relative node id to the
-	 * real node id.
-	 */
-	for_each_online_node(n) {
-		if (!node)
-			break;
-		node--;
-	}
-
-	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n);
+	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node);
 }
 
 static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f76bb18350af..afeb93496907 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -80,7 +80,7 @@ struct blk_mq_tag_set {
 typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
 typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *,
-		unsigned int);
+		unsigned int, int);
 typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -165,7 +165,7 @@ struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, g
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
-struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int);
+struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
 
 void blk_mq_end_io(struct request *rq, int error);
-- 
cgit 


From 1c86438c9423a26cc9f7f74a8950d9cf9c93bc23 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Sun, 4 May 2014 12:23:37 +0800
Subject: PCI: Add new pci_is_bridge() interface

Add a helper function to check a device's header type for PCI bridge or
CardBus bridge.

Requires: 326c1cdae741 PCI: Rename pci_is_bridge() to pci_has_subordinate()
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..f2a5946ea0bf 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -477,6 +477,19 @@ static inline bool pci_is_root_bus(struct pci_bus *pbus)
 	return !(pbus->parent);
 }
 
+/**
+ * pci_is_bridge - check if the PCI device is a bridge
+ * @dev: PCI device
+ *
+ * Return true if the PCI device is bridge whether it has subordinate
+ * or not.
+ */
+static inline bool pci_is_bridge(struct pci_dev *dev)
+{
+	return dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
+		dev->hdr_type == PCI_HEADER_TYPE_CARDBUS;
+}
+
 static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev)
 {
 	dev = pci_physfn(dev);
-- 
cgit 


From b475598aec63f2efbc78f0ff1895d917d2370846 Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 22 May 2014 14:50:10 +0300
Subject: mlx5_core: Store MR attributes in mlx5_mr_core during creation and
 after UMR

The patch stores iova, pd and size during mr creation and after UMRs
that modify them.  It removes the unused access flags field.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/mr.c              | 4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 4 ++++
 include/linux/mlx5/driver.h                  | 1 -
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9d932a2aa9f4..f472ab246d94 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -794,6 +794,10 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 		err = -EFAULT;
 	}
 
+	mr->mmr.iova = virt_addr;
+	mr->mmr.size = len;
+	mr->mmr.pd = to_mpd(pd)->pdn;
+
 unmap_dma:
 	up(&umrc->sem);
 	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 4cc927649404..ac52a0fe2d3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -82,7 +82,11 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mr *mr,
 		return mlx5_cmd_status_to_err(&lout.hdr);
 	}
 
+	mr->iova = be64_to_cpu(in->seg.start_addr);
+	mr->size = be64_to_cpu(in->seg.len);
 	mr->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key;
+	mr->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff;
+
 	mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n",
 		      be32_to_cpu(lout.mkey), key, mr->key);
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 93cef6313e72..2bce4aad2570 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -427,7 +427,6 @@ struct mlx5_core_mr {
 	u64			size;
 	u32			key;
 	u32			pd;
-	u32			access;
 };
 
 struct mlx5_core_srq {
-- 
cgit 


From 1bb6c08abfb653ce6e65d8ab4ddef403227afedf Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 14 Apr 2014 12:54:47 +0200
Subject: driver core: Move driver_data back to struct device

Having to allocate memory as part of dev_set_drvdata() is a problem
because that memory may never get freed if the device itself is not
created. So move driver_data back to struct device.

This is a partial revert of commit b4028437.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/base.h    |  3 ---
 drivers/base/dd.c      | 13 +++----------
 include/linux/device.h |  3 +++
 3 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 24f424249d9b..251c5d30f963 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -63,8 +63,6 @@ struct driver_private {
  *	binding of drivers which were unable to get all the resources needed by
  *	the device; typically because it depends on another driver getting
  *	probed first.
- * @driver_data - private pointer for driver specific info.  Will turn into a
- * list soon.
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
@@ -76,7 +74,6 @@ struct device_private {
 	struct klist_node knode_driver;
 	struct klist_node knode_bus;
 	struct list_head deferred_probe;
-	void *driver_data;
 	struct device *device;
 };
 #define to_device_private_parent(obj)	\
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 62ec61e8f84a..d14b6e895896 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -594,22 +594,15 @@ void driver_detach(struct device_driver *drv)
  */
 void *dev_get_drvdata(const struct device *dev)
 {
-	if (dev && dev->p)
-		return dev->p->driver_data;
+	if (dev)
+		return dev->driver_data;
 	return NULL;
 }
 EXPORT_SYMBOL(dev_get_drvdata);
 
 int dev_set_drvdata(struct device *dev, void *data)
 {
-	int error;
-
-	if (!dev->p) {
-		error = device_private_init(dev);
-		if (error)
-			return error;
-	}
-	dev->p->driver_data = data;
+	dev->driver_data = data;
 	return 0;
 }
 EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index d1d1c055b48e..5c94ac3e7972 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -673,6 +673,7 @@ struct acpi_dev_node {
  * 		variants, which GPIO pins act in what additional roles, and so
  * 		on.  This shrinks the "Board Support Packages" (BSPs) and
  * 		minimizes board-specific #ifdefs in drivers.
+ * @driver_data: Private pointer for driver specific info.
  * @power:	For device power management.
  * 		See Documentation/power/devices.txt for details.
  * @pm_domain:	Provide callbacks that are executed during system suspend,
@@ -734,6 +735,8 @@ struct device {
 					   device */
 	void		*platform_data;	/* Platform specific data, device
 					   core doesn't touch it */
+	void		*driver_data;	/* Driver data, set and get with
+					   dev_set/get_drvdata */
 	struct dev_pm_info	power;
 	struct dev_pm_domain	*pm_domain;
 
-- 
cgit 


From 4101866c743a3695666e8562b5713b4d7f341cbf Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 27 May 2014 10:53:17 +0200
Subject: ASoC: Add ADAU1X61 and ADAU1X81 CODECs common code

The ADAU1X61 and ADAU1X81 are very similar in the digital domain, but are quite
different in the analog domain. This patch adds support for the common parts of
the ADAU1X61 and ADAU1X81 CODECs.

The patch also restores some of the alphabetical order in the Makfile and
Kconfig.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/platform_data/adau17x1.h |  23 +
 sound/soc/codecs/Kconfig               |   6 +-
 sound/soc/codecs/Makefile              |   6 +-
 sound/soc/codecs/adau17x1.c            | 866 +++++++++++++++++++++++++++++++++
 sound/soc/codecs/adau17x1.h            | 124 +++++
 5 files changed, 1022 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/platform_data/adau17x1.h
 create mode 100644 sound/soc/codecs/adau17x1.c
 create mode 100644 sound/soc/codecs/adau17x1.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/adau17x1.h b/include/linux/platform_data/adau17x1.h
new file mode 100644
index 000000000000..f90bd9286f31
--- /dev/null
+++ b/include/linux/platform_data/adau17x1.h
@@ -0,0 +1,23 @@
+/*
+ * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961/ADAU1781/ADAU1781 codecs
+ *
+ * Copyright 2011-2014 Analog Devices Inc.
+ * Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_ADAU17X1_H__
+#define __LINUX_PLATFORM_DATA_ADAU17X1_H__
+
+/**
+ * enum adau17x1_micbias_voltage - Microphone bias voltage
+ * @ADAU17X1_MICBIAS_0_90_AVDD: 0.9 * AVDD
+ * @ADAU17X1_MICBIAS_0_65_AVDD: 0.65 * AVDD
+ */
+enum adau17x1_micbias_voltage {
+	ADAU17X1_MICBIAS_0_90_AVDD = 0,
+	ADAU17X1_MICBIAS_0_65_AVDD = 1,
+};
+
+#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index f0e840137887..182a39751a91 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -210,13 +210,17 @@ config SND_SOC_AD1980
 config SND_SOC_AD73311
 	tristate
 
+config SND_SOC_ADAU1373
+	tristate
+
 config SND_SOC_ADAU1701
 	tristate "Analog Devices ADAU1701 CODEC"
 	depends on I2C
 	select SND_SOC_SIGMADSP
 
-config SND_SOC_ADAU1373
+config SND_SOC_ADAU17X1
 	tristate
+	select SND_SOC_SIGMADSP
 
 config SND_SOC_ADAU1977
 	tristate
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index 3c4d275d064b..a8cba3086830 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -7,8 +7,9 @@ snd-soc-ad193x-spi-objs := ad193x-spi.o
 snd-soc-ad193x-i2c-objs := ad193x-i2c.o
 snd-soc-ad1980-objs := ad1980.o
 snd-soc-ad73311-objs := ad73311.o
-snd-soc-adau1701-objs := adau1701.o
 snd-soc-adau1373-objs := adau1373.o
+snd-soc-adau1701-objs := adau1701.o
+snd-soc-adau17x1-objs := adau17x1.o
 snd-soc-adau1977-objs := adau1977.o
 snd-soc-adau1977-spi-objs := adau1977-spi.o
 snd-soc-adau1977-i2c-objs := adau1977-i2c.o
@@ -157,10 +158,11 @@ obj-$(CONFIG_SND_SOC_AD193X_I2C)	+= snd-soc-ad193x-i2c.o
 obj-$(CONFIG_SND_SOC_AD1980)	+= snd-soc-ad1980.o
 obj-$(CONFIG_SND_SOC_AD73311) += snd-soc-ad73311.o
 obj-$(CONFIG_SND_SOC_ADAU1373)	+= snd-soc-adau1373.o
+obj-$(CONFIG_SND_SOC_ADAU1701)		+= snd-soc-adau1701.o
+obj-$(CONFIG_SND_SOC_ADAU17X1)		+= snd-soc-adau17x1.o
 obj-$(CONFIG_SND_SOC_ADAU1977)		+= snd-soc-adau1977.o
 obj-$(CONFIG_SND_SOC_ADAU1977_SPI)	+= snd-soc-adau1977-spi.o
 obj-$(CONFIG_SND_SOC_ADAU1977_I2C)	+= snd-soc-adau1977-i2c.o
-obj-$(CONFIG_SND_SOC_ADAU1701)  += snd-soc-adau1701.o
 obj-$(CONFIG_SND_SOC_ADAV80X)  += snd-soc-adav80x.o
 obj-$(CONFIG_SND_SOC_ADAV801)  += snd-soc-adav801.o
 obj-$(CONFIG_SND_SOC_ADAV803)  += snd-soc-adav803.o
diff --git a/sound/soc/codecs/adau17x1.c b/sound/soc/codecs/adau17x1.c
new file mode 100644
index 000000000000..2961fae9670a
--- /dev/null
+++ b/sound/soc/codecs/adau17x1.c
@@ -0,0 +1,866 @@
+/*
+ * Common code for ADAU1X61 and ADAU1X81 codecs
+ *
+ * Copyright 2011-2014 Analog Devices Inc.
+ * Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/tlv.h>
+#include <linux/gcd.h>
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/regmap.h>
+
+#include "sigmadsp.h"
+#include "adau17x1.h"
+
+static const char * const adau17x1_capture_mixer_boost_text[] = {
+	"Normal operation", "Boost Level 1", "Boost Level 2", "Boost Level 3",
+};
+
+static SOC_ENUM_SINGLE_DECL(adau17x1_capture_boost_enum,
+	ADAU17X1_REC_POWER_MGMT, 5, adau17x1_capture_mixer_boost_text);
+
+static const char * const adau17x1_mic_bias_mode_text[] = {
+	"Normal operation", "High performance",
+};
+
+static SOC_ENUM_SINGLE_DECL(adau17x1_mic_bias_mode_enum,
+	ADAU17X1_MICBIAS, 3, adau17x1_mic_bias_mode_text);
+
+static const DECLARE_TLV_DB_MINMAX(adau17x1_digital_tlv, -9563, 0);
+
+static const struct snd_kcontrol_new adau17x1_controls[] = {
+	SOC_DOUBLE_R_TLV("Digital Capture Volume",
+		ADAU17X1_LEFT_INPUT_DIGITAL_VOL,
+		ADAU17X1_RIGHT_INPUT_DIGITAL_VOL,
+		0, 0xff, 1, adau17x1_digital_tlv),
+	SOC_DOUBLE_R_TLV("Digital Playback Volume", ADAU17X1_DAC_CONTROL1,
+		ADAU17X1_DAC_CONTROL2, 0, 0xff, 1, adau17x1_digital_tlv),
+
+	SOC_SINGLE("ADC High Pass Filter Switch", ADAU17X1_ADC_CONTROL,
+		5, 1, 0),
+	SOC_SINGLE("Playback De-emphasis Switch", ADAU17X1_DAC_CONTROL0,
+		2, 1, 0),
+
+	SOC_ENUM("Capture Boost", adau17x1_capture_boost_enum),
+
+	SOC_ENUM("Mic Bias Mode", adau17x1_mic_bias_mode_enum),
+};
+
+static int adau17x1_pll_event(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(w->codec);
+	int ret;
+
+	if (SND_SOC_DAPM_EVENT_ON(event)) {
+		adau->pll_regs[5] = 1;
+	} else {
+		adau->pll_regs[5] = 0;
+		/* Bypass the PLL when disabled, otherwise registers will become
+		 * inaccessible. */
+		regmap_update_bits(adau->regmap, ADAU17X1_CLOCK_CONTROL,
+			ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL, 0);
+	}
+
+	/* The PLL register is 6 bytes long and can only be written at once. */
+	ret = regmap_raw_write(adau->regmap, ADAU17X1_PLL_CONTROL,
+			adau->pll_regs, ARRAY_SIZE(adau->pll_regs));
+
+	if (SND_SOC_DAPM_EVENT_ON(event)) {
+		mdelay(5);
+		regmap_update_bits(adau->regmap, ADAU17X1_CLOCK_CONTROL,
+			ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL,
+			ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL);
+	}
+
+	return 0;
+}
+
+static const char * const adau17x1_mono_stereo_text[] = {
+	"Stereo",
+	"Mono Left Channel (L+R)",
+	"Mono Right Channel (L+R)",
+	"Mono (L+R)",
+};
+
+static SOC_ENUM_SINGLE_DECL(adau17x1_dac_mode_enum,
+	ADAU17X1_DAC_CONTROL0, 6, adau17x1_mono_stereo_text);
+
+static const struct snd_kcontrol_new adau17x1_dac_mode_mux =
+	SOC_DAPM_ENUM("DAC Mono-Stereo-Mode", adau17x1_dac_mode_enum);
+
+static const struct snd_soc_dapm_widget adau17x1_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY_S("PLL", 3, SND_SOC_NOPM, 0, 0, adau17x1_pll_event,
+		SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
+
+	SND_SOC_DAPM_SUPPLY("AIFCLK", SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("MICBIAS", ADAU17X1_MICBIAS, 0, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("Left Playback Enable", ADAU17X1_PLAY_POWER_MGMT,
+		0, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Right Playback Enable", ADAU17X1_PLAY_POWER_MGMT,
+		1, 0, NULL, 0),
+
+	SND_SOC_DAPM_MUX("Left DAC Mode Mux", SND_SOC_NOPM, 0, 0,
+		&adau17x1_dac_mode_mux),
+	SND_SOC_DAPM_MUX("Right DAC Mode Mux", SND_SOC_NOPM, 0, 0,
+		&adau17x1_dac_mode_mux),
+
+	SND_SOC_DAPM_ADC("Left Decimator", NULL, ADAU17X1_ADC_CONTROL, 0, 0),
+	SND_SOC_DAPM_ADC("Right Decimator", NULL, ADAU17X1_ADC_CONTROL, 1, 0),
+	SND_SOC_DAPM_DAC("Left DAC", NULL, ADAU17X1_DAC_CONTROL0, 0, 0),
+	SND_SOC_DAPM_DAC("Right DAC", NULL, ADAU17X1_DAC_CONTROL0, 1, 0),
+};
+
+static const struct snd_soc_dapm_route adau17x1_dapm_routes[] = {
+	{ "Left Decimator", NULL, "SYSCLK" },
+	{ "Right Decimator", NULL, "SYSCLK" },
+	{ "Left DAC", NULL, "SYSCLK" },
+	{ "Right DAC", NULL, "SYSCLK" },
+	{ "Capture", NULL, "SYSCLK" },
+	{ "Playback", NULL, "SYSCLK" },
+
+	{ "Left DAC", NULL, "Left DAC Mode Mux" },
+	{ "Right DAC", NULL, "Right DAC Mode Mux" },
+
+	{ "Capture", NULL, "AIFCLK" },
+	{ "Playback", NULL, "AIFCLK" },
+};
+
+static const struct snd_soc_dapm_route adau17x1_dapm_pll_route = {
+	"SYSCLK", NULL, "PLL",
+};
+
+/*
+ * The MUX register for the Capture and Playback MUXs selects either DSP as
+ * source/destination or one of the TDM slots. The TDM slot is selected via
+ * snd_soc_dai_set_tdm_slot(), so we only expose whether to go to the DSP or
+ * directly to the DAI interface with this control.
+ */
+static int adau17x1_dsp_mux_enum_put(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_soc_dapm_kcontrol_codec(kcontrol);
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+	struct snd_soc_dapm_update update;
+	unsigned int stream = e->shift_l;
+	unsigned int val, change;
+	int reg;
+
+	if (ucontrol->value.enumerated.item[0] >= e->items)
+		return -EINVAL;
+
+	switch (ucontrol->value.enumerated.item[0]) {
+	case 0:
+		val = 0;
+		adau->dsp_bypass[stream] = false;
+		break;
+	default:
+		val = (adau->tdm_slot[stream] * 2) + 1;
+		adau->dsp_bypass[stream] = true;
+		break;
+	}
+
+	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+		reg = ADAU17X1_SERIAL_INPUT_ROUTE;
+	else
+		reg = ADAU17X1_SERIAL_OUTPUT_ROUTE;
+
+	change = snd_soc_test_bits(codec, reg, 0xff, val);
+	if (change) {
+		update.kcontrol = kcontrol;
+		update.mask = 0xff;
+		update.reg = reg;
+		update.val = val;
+
+		snd_soc_dapm_mux_update_power(&codec->dapm, kcontrol,
+				ucontrol->value.enumerated.item[0], e, &update);
+	}
+
+	return change;
+}
+
+static int adau17x1_dsp_mux_enum_get(struct snd_kcontrol *kcontrol,
+	struct snd_ctl_elem_value *ucontrol)
+{
+	struct snd_soc_codec *codec = snd_soc_dapm_kcontrol_codec(kcontrol);
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
+	unsigned int stream = e->shift_l;
+	unsigned int reg, val;
+	int ret;
+
+	if (stream == SNDRV_PCM_STREAM_PLAYBACK)
+		reg = ADAU17X1_SERIAL_INPUT_ROUTE;
+	else
+		reg = ADAU17X1_SERIAL_OUTPUT_ROUTE;
+
+	ret = regmap_read(adau->regmap, reg, &val);
+	if (ret)
+		return ret;
+
+	if (val != 0)
+		val = 1;
+	ucontrol->value.enumerated.item[0] = val;
+
+	return 0;
+}
+
+#define DECLARE_ADAU17X1_DSP_MUX_CTRL(_name, _label, _stream, _text) \
+	const struct snd_kcontrol_new _name = \
+		SOC_DAPM_ENUM_EXT(_label, (const struct soc_enum)\
+			SOC_ENUM_SINGLE(SND_SOC_NOPM, _stream, \
+				ARRAY_SIZE(_text), _text), \
+			adau17x1_dsp_mux_enum_get, adau17x1_dsp_mux_enum_put)
+
+static const char * const adau17x1_dac_mux_text[] = {
+	"DSP",
+	"AIFIN",
+};
+
+static const char * const adau17x1_capture_mux_text[] = {
+	"DSP",
+	"Decimator",
+};
+
+static DECLARE_ADAU17X1_DSP_MUX_CTRL(adau17x1_dac_mux, "DAC Playback Mux",
+	SNDRV_PCM_STREAM_PLAYBACK, adau17x1_dac_mux_text);
+
+static DECLARE_ADAU17X1_DSP_MUX_CTRL(adau17x1_capture_mux, "Capture Mux",
+	SNDRV_PCM_STREAM_CAPTURE, adau17x1_capture_mux_text);
+
+static const struct snd_soc_dapm_widget adau17x1_dsp_dapm_widgets[] = {
+	SND_SOC_DAPM_PGA("DSP", ADAU17X1_DSP_RUN, 0, 0, NULL, 0),
+	SND_SOC_DAPM_SIGGEN("DSP Siggen"),
+
+	SND_SOC_DAPM_MUX("DAC Playback Mux", SND_SOC_NOPM, 0, 0,
+		&adau17x1_dac_mux),
+	SND_SOC_DAPM_MUX("Capture Mux", SND_SOC_NOPM, 0, 0,
+		&adau17x1_capture_mux),
+};
+
+static const struct snd_soc_dapm_route adau17x1_dsp_dapm_routes[] = {
+	{ "DAC Playback Mux", "DSP", "DSP" },
+	{ "DAC Playback Mux", "AIFIN", "Playback" },
+
+	{ "Left DAC Mode Mux", "Stereo", "DAC Playback Mux" },
+	{ "Left DAC Mode Mux", "Mono (L+R)", "DAC Playback Mux" },
+	{ "Left DAC Mode Mux", "Mono Left Channel (L+R)", "DAC Playback Mux" },
+	{ "Right DAC Mode Mux", "Stereo", "DAC Playback Mux" },
+	{ "Right DAC Mode Mux", "Mono (L+R)", "DAC Playback Mux" },
+	{ "Right DAC Mode Mux", "Mono Right Channel (L+R)", "DAC Playback Mux" },
+
+	{ "Capture Mux", "DSP", "DSP" },
+	{ "Capture Mux", "Decimator", "Left Decimator" },
+	{ "Capture Mux", "Decimator", "Right Decimator" },
+
+	{ "Capture", NULL, "Capture Mux" },
+
+	{ "DSP", NULL, "DSP Siggen" },
+
+	{ "DSP", NULL, "Left Decimator" },
+	{ "DSP", NULL, "Right Decimator" },
+};
+
+static const struct snd_soc_dapm_route adau17x1_no_dsp_dapm_routes[] = {
+	{ "Left DAC Mode Mux", "Stereo", "Playback" },
+	{ "Left DAC Mode Mux", "Mono (L+R)", "Playback" },
+	{ "Left DAC Mode Mux", "Mono Left Channel (L+R)", "Playback" },
+	{ "Right DAC Mode Mux", "Stereo", "Playback" },
+	{ "Right DAC Mode Mux", "Mono (L+R)", "Playback" },
+	{ "Right DAC Mode Mux", "Mono Right Channel (L+R)", "Playback" },
+	{ "Capture", NULL, "Left Decimator" },
+	{ "Capture", NULL, "Right Decimator" },
+};
+
+bool adau17x1_has_dsp(struct adau *adau)
+{
+	switch (adau->type) {
+	case ADAU1761:
+	case ADAU1381:
+	case ADAU1781:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_GPL(adau17x1_has_dsp);
+
+static int adau17x1_hw_params(struct snd_pcm_substream *substream,
+	struct snd_pcm_hw_params *params, struct snd_soc_dai *dai)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	unsigned int val, div, dsp_div;
+	unsigned int freq;
+
+	if (adau->clk_src == ADAU17X1_CLK_SRC_PLL)
+		freq = adau->pll_freq;
+	else
+		freq = adau->sysclk;
+
+	if (freq % params_rate(params) != 0)
+		return -EINVAL;
+
+	switch (freq / params_rate(params)) {
+	case 1024: /* fs */
+		div = 0;
+		dsp_div = 1;
+		break;
+	case 6144: /* fs / 6 */
+		div = 1;
+		dsp_div = 6;
+		break;
+	case 4096: /* fs / 4 */
+		div = 2;
+		dsp_div = 5;
+		break;
+	case 3072: /* fs / 3 */
+		div = 3;
+		dsp_div = 4;
+		break;
+	case 2048: /* fs / 2 */
+		div = 4;
+		dsp_div = 3;
+		break;
+	case 1536: /* fs / 1.5 */
+		div = 5;
+		dsp_div = 2;
+		break;
+	case 512: /* fs / 0.5 */
+		div = 6;
+		dsp_div = 0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(adau->regmap, ADAU17X1_CONVERTER0,
+		ADAU17X1_CONVERTER0_CONVSR_MASK, div);
+	if (adau17x1_has_dsp(adau)) {
+		regmap_write(adau->regmap, ADAU17X1_SERIAL_SAMPLING_RATE, div);
+		regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, dsp_div);
+	}
+
+	if (adau->dai_fmt != SND_SOC_DAIFMT_RIGHT_J)
+		return 0;
+
+	switch (params_format(params)) {
+	case SNDRV_PCM_FORMAT_S16_LE:
+		val = ADAU17X1_SERIAL_PORT1_DELAY16;
+		break;
+	case SNDRV_PCM_FORMAT_S24_LE:
+		val = ADAU17X1_SERIAL_PORT1_DELAY8;
+		break;
+	case SNDRV_PCM_FORMAT_S32_LE:
+		val = ADAU17X1_SERIAL_PORT1_DELAY0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return regmap_update_bits(adau->regmap, ADAU17X1_SERIAL_PORT1,
+			ADAU17X1_SERIAL_PORT1_DELAY_MASK, val);
+}
+
+static int adau17x1_set_dai_pll(struct snd_soc_dai *dai, int pll_id,
+	int source, unsigned int freq_in, unsigned int freq_out)
+{
+	struct snd_soc_codec *codec = dai->codec;
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	unsigned int r, n, m, i, j;
+	unsigned int div;
+	int ret;
+
+	if (freq_in < 8000000 || freq_in > 27000000)
+		return -EINVAL;
+
+	if (!freq_out) {
+		r = 0;
+		n = 0;
+		m = 0;
+		div = 0;
+	} else {
+		if (freq_out % freq_in != 0) {
+			div = DIV_ROUND_UP(freq_in, 13500000);
+			freq_in /= div;
+			r = freq_out / freq_in;
+			i = freq_out % freq_in;
+			j = gcd(i, freq_in);
+			n = i / j;
+			m = freq_in / j;
+			div--;
+		} else {
+			r = freq_out / freq_in;
+			n = 0;
+			m = 0;
+			div = 0;
+		}
+		if (n > 0xffff || m > 0xffff || div > 3 || r > 8 || r < 2)
+			return -EINVAL;
+	}
+
+	adau->pll_regs[0] = m >> 8;
+	adau->pll_regs[1] = m & 0xff;
+	adau->pll_regs[2] = n >> 8;
+	adau->pll_regs[3] = n & 0xff;
+	adau->pll_regs[4] = (r << 3) | (div << 1);
+	if (m != 0)
+		adau->pll_regs[4] |= 1; /* Fractional mode */
+
+	/* The PLL register is 6 bytes long and can only be written at once. */
+	ret = regmap_raw_write(adau->regmap, ADAU17X1_PLL_CONTROL,
+			adau->pll_regs, ARRAY_SIZE(adau->pll_regs));
+	if (ret)
+		return ret;
+
+	adau->pll_freq = freq_out;
+
+	return 0;
+}
+
+static int adau17x1_set_dai_sysclk(struct snd_soc_dai *dai,
+		int clk_id, unsigned int freq, int dir)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(dai->codec);
+	struct snd_soc_dapm_context *dapm = &dai->codec->dapm;
+
+	switch (clk_id) {
+	case ADAU17X1_CLK_SRC_MCLK:
+	case ADAU17X1_CLK_SRC_PLL:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	adau->sysclk = freq;
+
+	if (adau->clk_src != clk_id) {
+		if (clk_id == ADAU17X1_CLK_SRC_PLL) {
+			snd_soc_dapm_add_routes(dapm,
+				&adau17x1_dapm_pll_route, 1);
+		} else {
+			snd_soc_dapm_del_routes(dapm,
+				&adau17x1_dapm_pll_route, 1);
+		}
+	}
+
+	adau->clk_src = clk_id;
+
+	return 0;
+}
+
+static int adau17x1_set_dai_fmt(struct snd_soc_dai *dai,
+		unsigned int fmt)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(dai->codec);
+	unsigned int ctrl0, ctrl1;
+	int lrclk_pol;
+
+	switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) {
+	case SND_SOC_DAIFMT_CBM_CFM:
+		ctrl0 = ADAU17X1_SERIAL_PORT0_MASTER;
+		adau->master = true;
+		break;
+	case SND_SOC_DAIFMT_CBS_CFS:
+		ctrl0 = 0;
+		adau->master = false;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
+	case SND_SOC_DAIFMT_I2S:
+		lrclk_pol = 0;
+		ctrl1 = ADAU17X1_SERIAL_PORT1_DELAY1;
+		break;
+	case SND_SOC_DAIFMT_LEFT_J:
+	case SND_SOC_DAIFMT_RIGHT_J:
+		lrclk_pol = 1;
+		ctrl1 = ADAU17X1_SERIAL_PORT1_DELAY0;
+		break;
+	case SND_SOC_DAIFMT_DSP_A:
+		lrclk_pol = 1;
+		ctrl0 |= ADAU17X1_SERIAL_PORT0_PULSE_MODE;
+		ctrl1 = ADAU17X1_SERIAL_PORT1_DELAY1;
+		break;
+	case SND_SOC_DAIFMT_DSP_B:
+		lrclk_pol = 1;
+		ctrl0 |= ADAU17X1_SERIAL_PORT0_PULSE_MODE;
+		ctrl1 = ADAU17X1_SERIAL_PORT1_DELAY0;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+	case SND_SOC_DAIFMT_NB_NF:
+		break;
+	case SND_SOC_DAIFMT_IB_NF:
+		ctrl0 |= ADAU17X1_SERIAL_PORT0_BCLK_POL;
+		break;
+	case SND_SOC_DAIFMT_NB_IF:
+		lrclk_pol = !lrclk_pol;
+		break;
+	case SND_SOC_DAIFMT_IB_IF:
+		ctrl0 |= ADAU17X1_SERIAL_PORT0_BCLK_POL;
+		lrclk_pol = !lrclk_pol;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (lrclk_pol)
+		ctrl0 |= ADAU17X1_SERIAL_PORT0_LRCLK_POL;
+
+	regmap_write(adau->regmap, ADAU17X1_SERIAL_PORT0, ctrl0);
+	regmap_write(adau->regmap, ADAU17X1_SERIAL_PORT1, ctrl1);
+
+	adau->dai_fmt = fmt & SND_SOC_DAIFMT_FORMAT_MASK;
+
+	return 0;
+}
+
+static int adau17x1_set_dai_tdm_slot(struct snd_soc_dai *dai,
+	unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(dai->codec);
+	unsigned int ser_ctrl0, ser_ctrl1;
+	unsigned int conv_ctrl0, conv_ctrl1;
+
+	/* I2S mode */
+	if (slots == 0) {
+		slots = 2;
+		rx_mask = 3;
+		tx_mask = 3;
+		slot_width = 32;
+	}
+
+	switch (slots) {
+	case 2:
+		ser_ctrl0 = ADAU17X1_SERIAL_PORT0_STEREO;
+		break;
+	case 4:
+		ser_ctrl0 = ADAU17X1_SERIAL_PORT0_TDM4;
+		break;
+	case 8:
+		if (adau->type == ADAU1361)
+			return -EINVAL;
+
+		ser_ctrl0 = ADAU17X1_SERIAL_PORT0_TDM8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (slot_width * slots) {
+	case 32:
+		if (adau->type == ADAU1761)
+			return -EINVAL;
+
+		ser_ctrl1 = ADAU17X1_SERIAL_PORT1_BCLK32;
+		break;
+	case 64:
+		ser_ctrl1 = ADAU17X1_SERIAL_PORT1_BCLK64;
+		break;
+	case 48:
+		ser_ctrl1 = ADAU17X1_SERIAL_PORT1_BCLK48;
+		break;
+	case 128:
+		ser_ctrl1 = ADAU17X1_SERIAL_PORT1_BCLK128;
+		break;
+	case 256:
+		if (adau->type == ADAU1361)
+			return -EINVAL;
+
+		ser_ctrl1 = ADAU17X1_SERIAL_PORT1_BCLK256;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (rx_mask) {
+	case 0x03:
+		conv_ctrl1 = ADAU17X1_CONVERTER1_ADC_PAIR(1);
+		adau->tdm_slot[SNDRV_PCM_STREAM_CAPTURE] = 0;
+		break;
+	case 0x0c:
+		conv_ctrl1 = ADAU17X1_CONVERTER1_ADC_PAIR(2);
+		adau->tdm_slot[SNDRV_PCM_STREAM_CAPTURE] = 1;
+		break;
+	case 0x30:
+		conv_ctrl1 = ADAU17X1_CONVERTER1_ADC_PAIR(3);
+		adau->tdm_slot[SNDRV_PCM_STREAM_CAPTURE] = 2;
+		break;
+	case 0xc0:
+		conv_ctrl1 = ADAU17X1_CONVERTER1_ADC_PAIR(4);
+		adau->tdm_slot[SNDRV_PCM_STREAM_CAPTURE] = 3;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (tx_mask) {
+	case 0x03:
+		conv_ctrl0 = ADAU17X1_CONVERTER0_DAC_PAIR(1);
+		adau->tdm_slot[SNDRV_PCM_STREAM_PLAYBACK] = 0;
+		break;
+	case 0x0c:
+		conv_ctrl0 = ADAU17X1_CONVERTER0_DAC_PAIR(2);
+		adau->tdm_slot[SNDRV_PCM_STREAM_PLAYBACK] = 1;
+		break;
+	case 0x30:
+		conv_ctrl0 = ADAU17X1_CONVERTER0_DAC_PAIR(3);
+		adau->tdm_slot[SNDRV_PCM_STREAM_PLAYBACK] = 2;
+		break;
+	case 0xc0:
+		conv_ctrl0 = ADAU17X1_CONVERTER0_DAC_PAIR(4);
+		adau->tdm_slot[SNDRV_PCM_STREAM_PLAYBACK] = 3;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_update_bits(adau->regmap, ADAU17X1_CONVERTER0,
+		ADAU17X1_CONVERTER0_DAC_PAIR_MASK, conv_ctrl0);
+	regmap_update_bits(adau->regmap, ADAU17X1_CONVERTER1,
+		ADAU17X1_CONVERTER1_ADC_PAIR_MASK, conv_ctrl1);
+	regmap_update_bits(adau->regmap, ADAU17X1_SERIAL_PORT0,
+		ADAU17X1_SERIAL_PORT0_TDM_MASK, ser_ctrl0);
+	regmap_update_bits(adau->regmap, ADAU17X1_SERIAL_PORT1,
+		ADAU17X1_SERIAL_PORT1_BCLK_MASK, ser_ctrl1);
+
+	if (!adau17x1_has_dsp(adau))
+		return 0;
+
+	if (adau->dsp_bypass[SNDRV_PCM_STREAM_PLAYBACK]) {
+		regmap_write(adau->regmap, ADAU17X1_SERIAL_INPUT_ROUTE,
+			(adau->tdm_slot[SNDRV_PCM_STREAM_PLAYBACK] * 2) + 1);
+	}
+
+	if (adau->dsp_bypass[SNDRV_PCM_STREAM_CAPTURE]) {
+		regmap_write(adau->regmap, ADAU17X1_SERIAL_OUTPUT_ROUTE,
+			(adau->tdm_slot[SNDRV_PCM_STREAM_CAPTURE] * 2) + 1);
+	}
+
+	return 0;
+}
+
+const struct snd_soc_dai_ops adau17x1_dai_ops = {
+	.hw_params	= adau17x1_hw_params,
+	.set_sysclk	= adau17x1_set_dai_sysclk,
+	.set_fmt	= adau17x1_set_dai_fmt,
+	.set_pll	= adau17x1_set_dai_pll,
+	.set_tdm_slot	= adau17x1_set_dai_tdm_slot,
+};
+EXPORT_SYMBOL_GPL(adau17x1_dai_ops);
+
+int adau17x1_set_micbias_voltage(struct snd_soc_codec *codec,
+	enum adau17x1_micbias_voltage micbias)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+
+	switch (micbias) {
+	case ADAU17X1_MICBIAS_0_90_AVDD:
+	case ADAU17X1_MICBIAS_0_65_AVDD:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return regmap_write(adau->regmap, ADAU17X1_MICBIAS, micbias << 2);
+}
+EXPORT_SYMBOL_GPL(adau17x1_set_micbias_voltage);
+
+bool adau17x1_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case ADAU17X1_CLOCK_CONTROL:
+	case ADAU17X1_PLL_CONTROL:
+	case ADAU17X1_REC_POWER_MGMT:
+	case ADAU17X1_MICBIAS:
+	case ADAU17X1_SERIAL_PORT0:
+	case ADAU17X1_SERIAL_PORT1:
+	case ADAU17X1_CONVERTER0:
+	case ADAU17X1_CONVERTER1:
+	case ADAU17X1_LEFT_INPUT_DIGITAL_VOL:
+	case ADAU17X1_RIGHT_INPUT_DIGITAL_VOL:
+	case ADAU17X1_ADC_CONTROL:
+	case ADAU17X1_PLAY_POWER_MGMT:
+	case ADAU17X1_DAC_CONTROL0:
+	case ADAU17X1_DAC_CONTROL1:
+	case ADAU17X1_DAC_CONTROL2:
+	case ADAU17X1_SERIAL_PORT_PAD:
+	case ADAU17X1_CONTROL_PORT_PAD0:
+	case ADAU17X1_CONTROL_PORT_PAD1:
+	case ADAU17X1_DSP_SAMPLING_RATE:
+	case ADAU17X1_SERIAL_INPUT_ROUTE:
+	case ADAU17X1_SERIAL_OUTPUT_ROUTE:
+	case ADAU17X1_DSP_ENABLE:
+	case ADAU17X1_DSP_RUN:
+	case ADAU17X1_SERIAL_SAMPLING_RATE:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+EXPORT_SYMBOL_GPL(adau17x1_readable_register);
+
+bool adau17x1_volatile_register(struct device *dev, unsigned int reg)
+{
+	/* SigmaDSP parameter and program memory */
+	if (reg < 0x4000)
+		return true;
+
+	switch (reg) {
+	/* The PLL register is 6 bytes long */
+	case ADAU17X1_PLL_CONTROL:
+	case ADAU17X1_PLL_CONTROL + 1:
+	case ADAU17X1_PLL_CONTROL + 2:
+	case ADAU17X1_PLL_CONTROL + 3:
+	case ADAU17X1_PLL_CONTROL + 4:
+	case ADAU17X1_PLL_CONTROL + 5:
+		return true;
+	default:
+		break;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(adau17x1_volatile_register);
+
+int adau17x1_load_firmware(struct adau *adau, struct device *dev,
+	const char *firmware)
+{
+	int ret;
+	int dspsr;
+
+	ret = regmap_read(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, &dspsr);
+	if (ret)
+		return ret;
+
+	regmap_write(adau->regmap, ADAU17X1_DSP_ENABLE, 1);
+	regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, 0xf);
+
+	ret = process_sigma_firmware_regmap(dev, adau->regmap, firmware);
+	if (ret) {
+		regmap_write(adau->regmap, ADAU17X1_DSP_ENABLE, 0);
+		return ret;
+	}
+	regmap_write(adau->regmap, ADAU17X1_DSP_SAMPLING_RATE, dspsr);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adau17x1_load_firmware);
+
+int adau17x1_add_widgets(struct snd_soc_codec *codec)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	int ret;
+
+	ret = snd_soc_add_codec_controls(codec, adau17x1_controls,
+		ARRAY_SIZE(adau17x1_controls));
+	if (ret)
+		return ret;
+	ret = snd_soc_dapm_new_controls(&codec->dapm, adau17x1_dapm_widgets,
+		ARRAY_SIZE(adau17x1_dapm_widgets));
+	if (ret)
+		return ret;
+
+	if (adau17x1_has_dsp(adau)) {
+		ret = snd_soc_dapm_new_controls(&codec->dapm,
+			adau17x1_dsp_dapm_widgets,
+			ARRAY_SIZE(adau17x1_dsp_dapm_widgets));
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adau17x1_add_widgets);
+
+int adau17x1_add_routes(struct snd_soc_codec *codec)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	int ret;
+
+	ret = snd_soc_dapm_add_routes(&codec->dapm, adau17x1_dapm_routes,
+		ARRAY_SIZE(adau17x1_dapm_routes));
+	if (ret)
+		return ret;
+
+	if (adau17x1_has_dsp(adau)) {
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau17x1_dsp_dapm_routes,
+			ARRAY_SIZE(adau17x1_dsp_dapm_routes));
+	} else {
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau17x1_no_dsp_dapm_routes,
+			ARRAY_SIZE(adau17x1_no_dsp_dapm_routes));
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(adau17x1_add_routes);
+
+int adau17x1_suspend(struct snd_soc_codec *codec)
+{
+	codec->driver->set_bias_level(codec, SND_SOC_BIAS_OFF);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adau17x1_suspend);
+
+int adau17x1_resume(struct snd_soc_codec *codec)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+
+	if (adau->switch_mode)
+		adau->switch_mode(codec->dev);
+
+	codec->driver->set_bias_level(codec, SND_SOC_BIAS_STANDBY);
+	regcache_sync(adau->regmap);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adau17x1_resume);
+
+int adau17x1_probe(struct device *dev, struct regmap *regmap,
+	enum adau17x1_type type, void (*switch_mode)(struct device *dev))
+{
+	struct adau *adau;
+
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	adau = devm_kzalloc(dev, sizeof(*adau), GFP_KERNEL);
+	if (!adau)
+		return -ENOMEM;
+
+	adau->regmap = regmap;
+	adau->switch_mode = switch_mode;
+	adau->type = type;
+
+	dev_set_drvdata(dev, adau);
+
+	if (switch_mode)
+		switch_mode(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adau17x1_probe);
+
+MODULE_DESCRIPTION("ASoC ADAU1X61/ADAU1X81 common code");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau17x1.h b/sound/soc/codecs/adau17x1.h
new file mode 100644
index 000000000000..3ffabaf4c7a8
--- /dev/null
+++ b/sound/soc/codecs/adau17x1.h
@@ -0,0 +1,124 @@
+#ifndef __ADAU17X1_H__
+#define __ADAU17X1_H__
+
+#include <linux/regmap.h>
+#include <linux/platform_data/adau17x1.h>
+
+enum adau17x1_type {
+	ADAU1361,
+	ADAU1761,
+	ADAU1381,
+	ADAU1781,
+};
+
+enum adau17x1_pll {
+	ADAU17X1_PLL,
+};
+
+enum adau17x1_pll_src {
+	ADAU17X1_PLL_SRC_MCLK,
+};
+
+enum adau17x1_clk_src {
+	ADAU17X1_CLK_SRC_MCLK,
+	ADAU17X1_CLK_SRC_PLL,
+};
+
+struct adau {
+	unsigned int sysclk;
+	unsigned int pll_freq;
+
+	enum adau17x1_clk_src clk_src;
+	enum adau17x1_type type;
+	void (*switch_mode)(struct device *dev);
+
+	unsigned int dai_fmt;
+
+	uint8_t pll_regs[6];
+
+	bool master;
+
+	unsigned int tdm_slot[2];
+	bool dsp_bypass[2];
+
+	struct regmap *regmap;
+};
+
+int adau17x1_add_widgets(struct snd_soc_codec *codec);
+int adau17x1_add_routes(struct snd_soc_codec *codec);
+int adau17x1_probe(struct device *dev, struct regmap *regmap,
+	enum adau17x1_type type, void (*switch_mode)(struct device *dev));
+int adau17x1_set_micbias_voltage(struct snd_soc_codec *codec,
+	enum adau17x1_micbias_voltage micbias);
+bool adau17x1_readable_register(struct device *dev, unsigned int reg);
+bool adau17x1_volatile_register(struct device *dev, unsigned int reg);
+int adau17x1_suspend(struct snd_soc_codec *codec);
+int adau17x1_resume(struct snd_soc_codec *codec);
+
+extern const struct snd_soc_dai_ops adau17x1_dai_ops;
+
+int adau17x1_load_firmware(struct adau *adau, struct device *dev,
+	const char *firmware);
+bool adau17x1_has_dsp(struct adau *adau);
+
+#define ADAU17X1_CLOCK_CONTROL			0x4000
+#define ADAU17X1_PLL_CONTROL			0x4002
+#define ADAU17X1_REC_POWER_MGMT			0x4009
+#define ADAU17X1_MICBIAS			0x4010
+#define ADAU17X1_SERIAL_PORT0			0x4015
+#define ADAU17X1_SERIAL_PORT1			0x4016
+#define ADAU17X1_CONVERTER0			0x4017
+#define ADAU17X1_CONVERTER1			0x4018
+#define ADAU17X1_LEFT_INPUT_DIGITAL_VOL		0x401a
+#define ADAU17X1_RIGHT_INPUT_DIGITAL_VOL	0x401b
+#define ADAU17X1_ADC_CONTROL			0x4019
+#define ADAU17X1_PLAY_POWER_MGMT		0x4029
+#define ADAU17X1_DAC_CONTROL0			0x402a
+#define ADAU17X1_DAC_CONTROL1			0x402b
+#define ADAU17X1_DAC_CONTROL2			0x402c
+#define ADAU17X1_SERIAL_PORT_PAD		0x402d
+#define ADAU17X1_CONTROL_PORT_PAD0		0x402f
+#define ADAU17X1_CONTROL_PORT_PAD1		0x4030
+#define ADAU17X1_DSP_SAMPLING_RATE		0x40eb
+#define ADAU17X1_SERIAL_INPUT_ROUTE		0x40f2
+#define ADAU17X1_SERIAL_OUTPUT_ROUTE		0x40f3
+#define ADAU17X1_DSP_ENABLE			0x40f5
+#define ADAU17X1_DSP_RUN			0x40f6
+#define ADAU17X1_SERIAL_SAMPLING_RATE		0x40f8
+
+#define ADAU17X1_SERIAL_PORT0_BCLK_POL		BIT(4)
+#define ADAU17X1_SERIAL_PORT0_LRCLK_POL		BIT(3)
+#define ADAU17X1_SERIAL_PORT0_MASTER		BIT(0)
+
+#define ADAU17X1_SERIAL_PORT1_DELAY1		0x00
+#define ADAU17X1_SERIAL_PORT1_DELAY0		0x01
+#define ADAU17X1_SERIAL_PORT1_DELAY8		0x02
+#define ADAU17X1_SERIAL_PORT1_DELAY16		0x03
+#define ADAU17X1_SERIAL_PORT1_DELAY_MASK	0x03
+
+#define ADAU17X1_CLOCK_CONTROL_INFREQ_MASK	0x6
+#define ADAU17X1_CLOCK_CONTROL_CORECLK_SRC_PLL	BIT(3)
+#define ADAU17X1_CLOCK_CONTROL_SYSCLK_EN	BIT(0)
+
+#define ADAU17X1_SERIAL_PORT1_BCLK32		(0x0 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK48		(0x1 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK64		(0x2 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK128		(0x3 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK256		(0x4 << 5)
+#define ADAU17X1_SERIAL_PORT1_BCLK_MASK		(0x7 << 5)
+
+#define ADAU17X1_SERIAL_PORT0_STEREO		(0x0 << 1)
+#define ADAU17X1_SERIAL_PORT0_TDM4		(0x1 << 1)
+#define ADAU17X1_SERIAL_PORT0_TDM8		(0x2 << 1)
+#define ADAU17X1_SERIAL_PORT0_TDM_MASK		(0x3 << 1)
+#define ADAU17X1_SERIAL_PORT0_PULSE_MODE	BIT(5)
+
+#define ADAU17X1_CONVERTER0_DAC_PAIR(x)		(((x) - 1) << 5)
+#define ADAU17X1_CONVERTER0_DAC_PAIR_MASK	(0x3 << 5)
+#define ADAU17X1_CONVERTER1_ADC_PAIR(x)		((x) - 1)
+#define ADAU17X1_CONVERTER1_ADC_PAIR_MASK	0x3
+
+#define ADAU17X1_CONVERTER0_CONVSR_MASK		0x7
+
+
+#endif
-- 
cgit 


From dab464b60b2435a2aaae3630266db8ad130b7fad Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 27 May 2014 10:53:18 +0200
Subject: ASoC: Add ADAU1361/ADAU1761 audio CODEC support

This patch adds support for the Analog Devices ADAU1361 and ADAU1761 CODECs.
The device is a a low-power, 24-bit stereo audio CODEC with multiple analog
input and outputs, one digital microphone input and an I2S interface. The device
can be controlled either via I2C or SPI. The main difference between the two
variants is that the ADAU1761 has a built-in SigmaDSP, while the ADAU1361 has
not.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/platform_data/adau17x1.h |  67 +++
 sound/soc/codecs/Kconfig               |  16 +
 sound/soc/codecs/Makefile              |   6 +
 sound/soc/codecs/adau1761-i2c.c        |  60 +++
 sound/soc/codecs/adau1761-spi.c        |  77 ++++
 sound/soc/codecs/adau1761.c            | 803 +++++++++++++++++++++++++++++++++
 sound/soc/codecs/adau1761.h            |  23 +
 7 files changed, 1052 insertions(+)
 create mode 100644 sound/soc/codecs/adau1761-i2c.c
 create mode 100644 sound/soc/codecs/adau1761-spi.c
 create mode 100644 sound/soc/codecs/adau1761.c
 create mode 100644 sound/soc/codecs/adau1761.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/adau17x1.h b/include/linux/platform_data/adau17x1.h
index f90bd9286f31..d234d9e46fd6 100644
--- a/include/linux/platform_data/adau17x1.h
+++ b/include/linux/platform_data/adau17x1.h
@@ -20,4 +20,71 @@ enum adau17x1_micbias_voltage {
 	ADAU17X1_MICBIAS_0_65_AVDD = 1,
 };
 
+/**
+ * enum adau1761_digmic_jackdet_pin_mode - Configuration of the JACKDET/MICIN pin
+ * @ADAU1761_DIGMIC_JACKDET_PIN_MODE_NONE: Disable the pin
+ * @ADAU1761_DIGMIC_JACKDET_PIN_MODE_DIGMIC: Configure the pin for usage as
+ *   digital microphone input.
+ * @ADAU1761_DIGMIC_JACKDET_PIN_MODE_JACKDETECT: Configure the pin for jack
+ *  insertion detection.
+ */
+enum adau1761_digmic_jackdet_pin_mode {
+	ADAU1761_DIGMIC_JACKDET_PIN_MODE_NONE,
+	ADAU1761_DIGMIC_JACKDET_PIN_MODE_DIGMIC,
+	ADAU1761_DIGMIC_JACKDET_PIN_MODE_JACKDETECT,
+};
+
+/**
+ * adau1761_jackdetect_debounce_time - Jack insertion detection debounce time
+ * @ADAU1761_JACKDETECT_DEBOUNCE_5MS: 5 milliseconds
+ * @ADAU1761_JACKDETECT_DEBOUNCE_10MS: 10 milliseconds
+ * @ADAU1761_JACKDETECT_DEBOUNCE_20MS: 20 milliseconds
+ * @ADAU1761_JACKDETECT_DEBOUNCE_40MS: 40 milliseconds
+ */
+enum adau1761_jackdetect_debounce_time {
+	ADAU1761_JACKDETECT_DEBOUNCE_5MS = 0,
+	ADAU1761_JACKDETECT_DEBOUNCE_10MS = 1,
+	ADAU1761_JACKDETECT_DEBOUNCE_20MS = 2,
+	ADAU1761_JACKDETECT_DEBOUNCE_40MS = 3,
+};
+
+/**
+ * enum adau1761_output_mode - Output mode configuration
+ * @ADAU1761_OUTPUT_MODE_HEADPHONE: Headphone output
+ * @ADAU1761_OUTPUT_MODE_HEADPHONE_CAPLESS: Capless headphone output
+ * @ADAU1761_OUTPUT_MODE_LINE: Line output
+ */
+enum adau1761_output_mode {
+	ADAU1761_OUTPUT_MODE_HEADPHONE,
+	ADAU1761_OUTPUT_MODE_HEADPHONE_CAPLESS,
+	ADAU1761_OUTPUT_MODE_LINE,
+};
+
+/**
+ * struct adau1761_platform_data - ADAU1761 Codec driver platform data
+ * @input_differential: If true the input pins will be configured in
+ *  differential mode.
+ * @lineout_mode: Output mode for the LOUT/ROUT pins
+ * @headphone_mode: Output mode for the LHP/RHP pins
+ * @digmic_jackdetect_pin_mode: JACKDET/MICIN pin configuration
+ * @jackdetect_debounce_time: Jack insertion detection debounce time.
+ *  Note: This value will only be used, if the JACKDET/MICIN pin is configured
+ *  for jack insertion detection.
+ * @jackdetect_active_low: If true the jack insertion detection is active low.
+ *  Othwise it will be active high.
+ * @micbias_voltage: Microphone voltage bias
+ */
+struct adau1761_platform_data {
+	bool input_differential;
+	enum adau1761_output_mode lineout_mode;
+	enum adau1761_output_mode headphone_mode;
+
+	enum adau1761_digmic_jackdet_pin_mode digmic_jackdetect_pin_mode;
+
+	enum adau1761_jackdetect_debounce_time jackdetect_debounce_time;
+	bool jackdetect_active_low;
+
+	enum adau17x1_micbias_voltage micbias_voltage;
+};
+
 #endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 182a39751a91..4233ed118c48 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -23,6 +23,8 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_AD1980 if SND_SOC_AC97_BUS
 	select SND_SOC_AD73311
 	select SND_SOC_ADAU1373 if I2C
+	select SND_SOC_ADAU1761_I2C if I2C
+	select SND_SOC_ADAU1761_SPI if SPI
 	select SND_SOC_ADAV801 if SPI_MASTER
 	select SND_SOC_ADAV803 if I2C
 	select SND_SOC_ADAU1977_SPI if SPI_MASTER
@@ -222,6 +224,20 @@ config SND_SOC_ADAU17X1
 	tristate
 	select SND_SOC_SIGMADSP
 
+config SND_SOC_ADAU1761
+	tristate
+	select SND_SOC_ADAU17X1
+
+config SND_SOC_ADAU1761_I2C
+	tristate
+	select SND_SOC_ADAU1761
+	select REGMAP_I2C
+
+config SND_SOC_ADAU1761_SPI
+	tristate
+	select SND_SOC_ADAU1761
+	select REGMAP_SPI
+
 config SND_SOC_ADAU1977
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index a8cba3086830..e96499ca76bd 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -10,6 +10,9 @@ snd-soc-ad73311-objs := ad73311.o
 snd-soc-adau1373-objs := adau1373.o
 snd-soc-adau1701-objs := adau1701.o
 snd-soc-adau17x1-objs := adau17x1.o
+snd-soc-adau1761-objs := adau1761.o
+snd-soc-adau1761-i2c-objs := adau1761-i2c.o
+snd-soc-adau1761-spi-objs := adau1761-spi.o
 snd-soc-adau1977-objs := adau1977.o
 snd-soc-adau1977-spi-objs := adau1977-spi.o
 snd-soc-adau1977-i2c-objs := adau1977-i2c.o
@@ -160,6 +163,9 @@ obj-$(CONFIG_SND_SOC_AD73311) += snd-soc-ad73311.o
 obj-$(CONFIG_SND_SOC_ADAU1373)	+= snd-soc-adau1373.o
 obj-$(CONFIG_SND_SOC_ADAU1701)		+= snd-soc-adau1701.o
 obj-$(CONFIG_SND_SOC_ADAU17X1)		+= snd-soc-adau17x1.o
+obj-$(CONFIG_SND_SOC_ADAU1761)		+= snd-soc-adau1761.o
+obj-$(CONFIG_SND_SOC_ADAU1761_I2C)	+= snd-soc-adau1761-i2c.o
+obj-$(CONFIG_SND_SOC_ADAU1761_SPI)	+= snd-soc-adau1761-spi.o
 obj-$(CONFIG_SND_SOC_ADAU1977)		+= snd-soc-adau1977.o
 obj-$(CONFIG_SND_SOC_ADAU1977_SPI)	+= snd-soc-adau1977-spi.o
 obj-$(CONFIG_SND_SOC_ADAU1977_I2C)	+= snd-soc-adau1977-i2c.o
diff --git a/sound/soc/codecs/adau1761-i2c.c b/sound/soc/codecs/adau1761-i2c.c
new file mode 100644
index 000000000000..862796dec693
--- /dev/null
+++ b/sound/soc/codecs/adau1761-i2c.c
@@ -0,0 +1,60 @@
+/*
+ * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961 codec
+ *
+ * Copyright 2014 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <sound/soc.h>
+
+#include "adau1761.h"
+
+static int adau1761_i2c_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct regmap_config config;
+
+	config = adau1761_regmap_config;
+	config.val_bits = 8;
+	config.reg_bits = 16;
+
+	return adau1761_probe(&client->dev,
+		devm_regmap_init_i2c(client, &config),
+		id->driver_data, NULL);
+}
+
+static int adau1761_i2c_remove(struct i2c_client *client)
+{
+	snd_soc_unregister_codec(&client->dev);
+	return 0;
+}
+
+static const struct i2c_device_id adau1761_i2c_ids[] = {
+	{ "adau1361", ADAU1361 },
+	{ "adau1461", ADAU1761 },
+	{ "adau1761", ADAU1761 },
+	{ "adau1961", ADAU1361 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adau1761_i2c_ids);
+
+static struct i2c_driver adau1761_i2c_driver = {
+	.driver = {
+		.name = "adau1761",
+		.owner = THIS_MODULE,
+	},
+	.probe = adau1761_i2c_probe,
+	.remove = adau1761_i2c_remove,
+	.id_table = adau1761_i2c_ids,
+};
+module_i2c_driver(adau1761_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC ADAU1361/ADAU1461/ADAU1761/ADAU1961 CODEC I2C driver");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau1761-spi.c b/sound/soc/codecs/adau1761-spi.c
new file mode 100644
index 000000000000..cce2f11f1ffb
--- /dev/null
+++ b/sound/soc/codecs/adau1761-spi.c
@@ -0,0 +1,77 @@
+/*
+ * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961 codec
+ *
+ * Copyright 2014 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+#include <sound/soc.h>
+
+#include "adau1761.h"
+
+static void adau1761_spi_switch_mode(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+
+	/*
+	 * To get the device into SPI mode CLATCH has to be pulled low three
+	 * times.  Do this by issuing three dummy reads.
+	 */
+	spi_w8r8(spi, 0x00);
+	spi_w8r8(spi, 0x00);
+	spi_w8r8(spi, 0x00);
+}
+
+static int adau1761_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct regmap_config config;
+
+	if (!id)
+		return -EINVAL;
+
+	config = adau1761_regmap_config;
+	config.val_bits = 8;
+	config.reg_bits = 24;
+	config.read_flag_mask = 0x1;
+
+	return adau1761_probe(&spi->dev,
+		devm_regmap_init_spi(spi, &config),
+		id->driver_data, adau1761_spi_switch_mode);
+}
+
+static int adau1761_spi_remove(struct spi_device *spi)
+{
+	snd_soc_unregister_codec(&spi->dev);
+	return 0;
+}
+
+static const struct spi_device_id adau1761_spi_id[] = {
+	{ "adau1361", ADAU1361 },
+	{ "adau1461", ADAU1761 },
+	{ "adau1761", ADAU1761 },
+	{ "adau1961", ADAU1361 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, adau1761_spi_id);
+
+static struct spi_driver adau1761_spi_driver = {
+	.driver = {
+		.name = "adau1761",
+		.owner = THIS_MODULE,
+	},
+	.probe = adau1761_spi_probe,
+	.remove = adau1761_spi_remove,
+	.id_table = adau1761_spi_id,
+};
+module_spi_driver(adau1761_spi_driver);
+
+MODULE_DESCRIPTION("ASoC ADAU1361/ADAU1461/ADAU1761/ADAU1961 CODEC SPI driver");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau1761.c b/sound/soc/codecs/adau1761.c
new file mode 100644
index 000000000000..848cab839553
--- /dev/null
+++ b/sound/soc/codecs/adau1761.c
@@ -0,0 +1,803 @@
+/*
+ * Driver for ADAU1761/ADAU1461/ADAU1761/ADAU1961 codec
+ *
+ * Copyright 2011-2013 Analog Devices Inc.
+ * Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/tlv.h>
+#include <linux/platform_data/adau17x1.h>
+
+#include "adau17x1.h"
+#include "adau1761.h"
+
+#define ADAU1761_DIGMIC_JACKDETECT	0x4008
+#define ADAU1761_REC_MIXER_LEFT0	0x400a
+#define ADAU1761_REC_MIXER_LEFT1	0x400b
+#define ADAU1761_REC_MIXER_RIGHT0	0x400c
+#define ADAU1761_REC_MIXER_RIGHT1	0x400d
+#define ADAU1761_LEFT_DIFF_INPUT_VOL	0x400e
+#define ADAU1761_RIGHT_DIFF_INPUT_VOL	0x400f
+#define ADAU1761_PLAY_LR_MIXER_LEFT	0x4020
+#define ADAU1761_PLAY_MIXER_LEFT0	0x401c
+#define ADAU1761_PLAY_MIXER_LEFT1	0x401d
+#define ADAU1761_PLAY_MIXER_RIGHT0	0x401e
+#define ADAU1761_PLAY_MIXER_RIGHT1	0x401f
+#define ADAU1761_PLAY_LR_MIXER_RIGHT	0x4021
+#define ADAU1761_PLAY_MIXER_MONO	0x4022
+#define ADAU1761_PLAY_HP_LEFT_VOL	0x4023
+#define ADAU1761_PLAY_HP_RIGHT_VOL	0x4024
+#define ADAU1761_PLAY_LINE_LEFT_VOL	0x4025
+#define ADAU1761_PLAY_LINE_RIGHT_VOL	0x4026
+#define ADAU1761_PLAY_MONO_OUTPUT_VOL	0x4027
+#define ADAU1761_POP_CLICK_SUPPRESS	0x4028
+#define ADAU1761_JACK_DETECT_PIN	0x4031
+#define ADAU1761_DEJITTER		0x4036
+#define ADAU1761_CLK_ENABLE0		0x40f9
+#define ADAU1761_CLK_ENABLE1		0x40fa
+
+#define ADAU1761_DIGMIC_JACKDETECT_ACTIVE_LOW	BIT(0)
+#define ADAU1761_DIGMIC_JACKDETECT_DIGMIC	BIT(5)
+
+#define ADAU1761_DIFF_INPUT_VOL_LDEN		BIT(0)
+
+#define ADAU1761_PLAY_MONO_OUTPUT_VOL_MODE_HP	BIT(0)
+#define ADAU1761_PLAY_MONO_OUTPUT_VOL_UNMUTE	BIT(1)
+
+#define ADAU1761_PLAY_HP_RIGHT_VOL_MODE_HP	BIT(0)
+
+#define ADAU1761_PLAY_LINE_LEFT_VOL_MODE_HP	BIT(0)
+
+#define ADAU1761_PLAY_LINE_RIGHT_VOL_MODE_HP	BIT(0)
+
+
+#define ADAU1761_FIRMWARE "adau1761.bin"
+
+static const struct reg_default adau1761_reg_defaults[] = {
+	{ ADAU1761_DEJITTER,			0x03 },
+	{ ADAU1761_DIGMIC_JACKDETECT,		0x00 },
+	{ ADAU1761_REC_MIXER_LEFT0,		0x00 },
+	{ ADAU1761_REC_MIXER_LEFT1,		0x00 },
+	{ ADAU1761_REC_MIXER_RIGHT0,		0x00 },
+	{ ADAU1761_REC_MIXER_RIGHT1,		0x00 },
+	{ ADAU1761_LEFT_DIFF_INPUT_VOL,		0x00 },
+	{ ADAU1761_RIGHT_DIFF_INPUT_VOL,	0x00 },
+	{ ADAU1761_PLAY_LR_MIXER_LEFT,		0x00 },
+	{ ADAU1761_PLAY_MIXER_LEFT0,		0x00 },
+	{ ADAU1761_PLAY_MIXER_LEFT1,		0x00 },
+	{ ADAU1761_PLAY_MIXER_RIGHT0,		0x00 },
+	{ ADAU1761_PLAY_MIXER_RIGHT1,		0x00 },
+	{ ADAU1761_PLAY_LR_MIXER_RIGHT,		0x00 },
+	{ ADAU1761_PLAY_MIXER_MONO,		0x00 },
+	{ ADAU1761_PLAY_HP_LEFT_VOL,		0x00 },
+	{ ADAU1761_PLAY_HP_RIGHT_VOL,		0x00 },
+	{ ADAU1761_PLAY_LINE_LEFT_VOL,		0x00 },
+	{ ADAU1761_PLAY_LINE_RIGHT_VOL,		0x00 },
+	{ ADAU1761_PLAY_MONO_OUTPUT_VOL,	0x00 },
+	{ ADAU1761_POP_CLICK_SUPPRESS,		0x00 },
+	{ ADAU1761_JACK_DETECT_PIN,		0x00 },
+	{ ADAU1761_CLK_ENABLE0,			0x00 },
+	{ ADAU1761_CLK_ENABLE1,			0x00 },
+	{ ADAU17X1_CLOCK_CONTROL,		0x00 },
+	{ ADAU17X1_PLL_CONTROL,			0x00 },
+	{ ADAU17X1_REC_POWER_MGMT,		0x00 },
+	{ ADAU17X1_MICBIAS,			0x00 },
+	{ ADAU17X1_SERIAL_PORT0,		0x00 },
+	{ ADAU17X1_SERIAL_PORT1,		0x00 },
+	{ ADAU17X1_CONVERTER0,			0x00 },
+	{ ADAU17X1_CONVERTER1,			0x00 },
+	{ ADAU17X1_LEFT_INPUT_DIGITAL_VOL,	0x00 },
+	{ ADAU17X1_RIGHT_INPUT_DIGITAL_VOL,	0x00 },
+	{ ADAU17X1_ADC_CONTROL,			0x00 },
+	{ ADAU17X1_PLAY_POWER_MGMT,		0x00 },
+	{ ADAU17X1_DAC_CONTROL0,		0x00 },
+	{ ADAU17X1_DAC_CONTROL1,		0x00 },
+	{ ADAU17X1_DAC_CONTROL2,		0x00 },
+	{ ADAU17X1_SERIAL_PORT_PAD,		0xaa },
+	{ ADAU17X1_CONTROL_PORT_PAD0,		0xaa },
+	{ ADAU17X1_CONTROL_PORT_PAD1,		0x00 },
+	{ ADAU17X1_DSP_SAMPLING_RATE,		0x01 },
+	{ ADAU17X1_SERIAL_INPUT_ROUTE,		0x00 },
+	{ ADAU17X1_SERIAL_OUTPUT_ROUTE,		0x00 },
+	{ ADAU17X1_DSP_ENABLE,			0x00 },
+	{ ADAU17X1_DSP_RUN,			0x00 },
+	{ ADAU17X1_SERIAL_SAMPLING_RATE,	0x00 },
+};
+
+static const DECLARE_TLV_DB_SCALE(adau1761_sing_in_tlv, -1500, 300, 1);
+static const DECLARE_TLV_DB_SCALE(adau1761_diff_in_tlv, -1200, 75, 0);
+static const DECLARE_TLV_DB_SCALE(adau1761_out_tlv, -5700, 100, 0);
+static const DECLARE_TLV_DB_SCALE(adau1761_sidetone_tlv, -1800, 300, 1);
+static const DECLARE_TLV_DB_SCALE(adau1761_boost_tlv, -600, 600, 1);
+static const DECLARE_TLV_DB_SCALE(adau1761_pga_boost_tlv, -2000, 2000, 1);
+
+static const unsigned int adau1761_bias_select_values[] = {
+	0, 2, 3,
+};
+
+static const char * const adau1761_bias_select_text[] = {
+	"Normal operation", "Enhanced performance", "Power saving",
+};
+
+static const char * const adau1761_bias_select_extreme_text[] = {
+	"Normal operation", "Extreme power saving", "Enhanced performance",
+	"Power saving",
+};
+
+static SOC_ENUM_SINGLE_DECL(adau1761_adc_bias_enum,
+		ADAU17X1_REC_POWER_MGMT, 3, adau1761_bias_select_extreme_text);
+static SOC_ENUM_SINGLE_DECL(adau1761_hp_bias_enum,
+		ADAU17X1_PLAY_POWER_MGMT, 6, adau1761_bias_select_extreme_text);
+static SOC_ENUM_SINGLE_DECL(adau1761_dac_bias_enum,
+		ADAU17X1_PLAY_POWER_MGMT, 4, adau1761_bias_select_extreme_text);
+static SOC_VALUE_ENUM_SINGLE_DECL(adau1761_playback_bias_enum,
+		ADAU17X1_PLAY_POWER_MGMT, 2, 0x3, adau1761_bias_select_text,
+		adau1761_bias_select_values);
+static SOC_VALUE_ENUM_SINGLE_DECL(adau1761_capture_bias_enum,
+		ADAU17X1_REC_POWER_MGMT, 1, 0x3, adau1761_bias_select_text,
+		adau1761_bias_select_values);
+
+static const struct snd_kcontrol_new adau1761_jack_detect_controls[] = {
+	SOC_SINGLE("Speaker Auto-mute Switch", ADAU1761_DIGMIC_JACKDETECT,
+		4, 1, 0),
+};
+
+static const struct snd_kcontrol_new adau1761_differential_mode_controls[] = {
+	SOC_DOUBLE_R_TLV("Capture Volume", ADAU1761_LEFT_DIFF_INPUT_VOL,
+		ADAU1761_RIGHT_DIFF_INPUT_VOL, 2, 0x3f, 0,
+		adau1761_diff_in_tlv),
+	SOC_DOUBLE_R("Capture Switch", ADAU1761_LEFT_DIFF_INPUT_VOL,
+		ADAU1761_RIGHT_DIFF_INPUT_VOL, 1, 1, 0),
+
+	SOC_DOUBLE_R_TLV("PGA Boost Capture Volume", ADAU1761_REC_MIXER_LEFT1,
+		ADAU1761_REC_MIXER_RIGHT1, 3, 2, 0, adau1761_pga_boost_tlv),
+};
+
+static const struct snd_kcontrol_new adau1761_single_mode_controls[] = {
+	SOC_SINGLE_TLV("Input 1 Capture Volume", ADAU1761_REC_MIXER_LEFT0,
+		4, 7, 0, adau1761_sing_in_tlv),
+	SOC_SINGLE_TLV("Input 2 Capture Volume", ADAU1761_REC_MIXER_LEFT0,
+		1, 7, 0, adau1761_sing_in_tlv),
+	SOC_SINGLE_TLV("Input 3 Capture Volume", ADAU1761_REC_MIXER_RIGHT0,
+		4, 7, 0, adau1761_sing_in_tlv),
+	SOC_SINGLE_TLV("Input 4 Capture Volume", ADAU1761_REC_MIXER_RIGHT0,
+		1, 7, 0, adau1761_sing_in_tlv),
+};
+
+static const struct snd_kcontrol_new adau1761_controls[] = {
+	SOC_DOUBLE_R_TLV("Aux Capture Volume", ADAU1761_REC_MIXER_LEFT1,
+		ADAU1761_REC_MIXER_RIGHT1, 0, 7, 0, adau1761_sing_in_tlv),
+
+	SOC_DOUBLE_R_TLV("Headphone Playback Volume", ADAU1761_PLAY_HP_LEFT_VOL,
+		ADAU1761_PLAY_HP_RIGHT_VOL, 2, 0x3f, 0, adau1761_out_tlv),
+	SOC_DOUBLE_R("Headphone Playback Switch", ADAU1761_PLAY_HP_LEFT_VOL,
+		ADAU1761_PLAY_HP_RIGHT_VOL, 1, 1, 0),
+	SOC_DOUBLE_R_TLV("Lineout Playback Volume", ADAU1761_PLAY_LINE_LEFT_VOL,
+		ADAU1761_PLAY_LINE_RIGHT_VOL, 2, 0x3f, 0, adau1761_out_tlv),
+	SOC_DOUBLE_R("Lineout Playback Switch", ADAU1761_PLAY_LINE_LEFT_VOL,
+		ADAU1761_PLAY_LINE_RIGHT_VOL, 1, 1, 0),
+
+	SOC_ENUM("ADC Bias", adau1761_adc_bias_enum),
+	SOC_ENUM("DAC Bias", adau1761_dac_bias_enum),
+	SOC_ENUM("Capture Bias", adau1761_capture_bias_enum),
+	SOC_ENUM("Playback Bias", adau1761_playback_bias_enum),
+	SOC_ENUM("Headphone Bias", adau1761_hp_bias_enum),
+};
+
+static const struct snd_kcontrol_new adau1761_mono_controls[] = {
+	SOC_SINGLE_TLV("Mono Playback Volume", ADAU1761_PLAY_MONO_OUTPUT_VOL,
+		2, 0x3f, 0, adau1761_out_tlv),
+	SOC_SINGLE("Mono Playback Switch", ADAU1761_PLAY_MONO_OUTPUT_VOL,
+		1, 1, 0),
+};
+
+static const struct snd_kcontrol_new adau1761_left_mixer_controls[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("Left DAC Switch",
+		ADAU1761_PLAY_MIXER_LEFT0, 5, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("Right DAC Switch",
+		ADAU1761_PLAY_MIXER_LEFT0, 6, 1, 0),
+	SOC_DAPM_SINGLE_TLV("Aux Bypass Volume",
+		ADAU1761_PLAY_MIXER_LEFT0, 1, 8, 0, adau1761_sidetone_tlv),
+	SOC_DAPM_SINGLE_TLV("Right Bypass Volume",
+		ADAU1761_PLAY_MIXER_LEFT1, 4, 8, 0, adau1761_sidetone_tlv),
+	SOC_DAPM_SINGLE_TLV("Left Bypass Volume",
+		ADAU1761_PLAY_MIXER_LEFT1, 0, 8, 0, adau1761_sidetone_tlv),
+};
+
+static const struct snd_kcontrol_new adau1761_right_mixer_controls[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("Left DAC Switch",
+		ADAU1761_PLAY_MIXER_RIGHT0, 5, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("Right DAC Switch",
+		ADAU1761_PLAY_MIXER_RIGHT0, 6, 1, 0),
+	SOC_DAPM_SINGLE_TLV("Aux Bypass Volume",
+		ADAU1761_PLAY_MIXER_RIGHT0, 1, 8, 0, adau1761_sidetone_tlv),
+	SOC_DAPM_SINGLE_TLV("Right Bypass Volume",
+		ADAU1761_PLAY_MIXER_RIGHT1, 4, 8, 0, adau1761_sidetone_tlv),
+	SOC_DAPM_SINGLE_TLV("Left Bypass Volume",
+		ADAU1761_PLAY_MIXER_RIGHT1, 0, 8, 0, adau1761_sidetone_tlv),
+};
+
+static const struct snd_kcontrol_new adau1761_left_lr_mixer_controls[] = {
+	SOC_DAPM_SINGLE_TLV("Left Volume",
+		ADAU1761_PLAY_LR_MIXER_LEFT, 1, 2, 0, adau1761_boost_tlv),
+	SOC_DAPM_SINGLE_TLV("Right Volume",
+		ADAU1761_PLAY_LR_MIXER_LEFT, 3, 2, 0, adau1761_boost_tlv),
+};
+
+static const struct snd_kcontrol_new adau1761_right_lr_mixer_controls[] = {
+	SOC_DAPM_SINGLE_TLV("Left Volume",
+		ADAU1761_PLAY_LR_MIXER_RIGHT, 1, 2, 0, adau1761_boost_tlv),
+	SOC_DAPM_SINGLE_TLV("Right Volume",
+		ADAU1761_PLAY_LR_MIXER_RIGHT, 3, 2, 0, adau1761_boost_tlv),
+};
+
+static const char * const adau1761_input_mux_text[] = {
+	"ADC", "DMIC",
+};
+
+static SOC_ENUM_SINGLE_DECL(adau1761_input_mux_enum,
+	ADAU17X1_ADC_CONTROL, 2, adau1761_input_mux_text);
+
+static const struct snd_kcontrol_new adau1761_input_mux_control =
+	SOC_DAPM_ENUM("Input Select", adau1761_input_mux_enum);
+
+static int adau1761_dejitter_fixup(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(w->codec);
+
+	/* After any power changes have been made the dejitter circuit
+	 * has to be reinitialized. */
+	regmap_write(adau->regmap, ADAU1761_DEJITTER, 0);
+	if (!adau->master)
+		regmap_write(adau->regmap, ADAU1761_DEJITTER, 3);
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_widget adau1x61_dapm_widgets[] = {
+	SND_SOC_DAPM_MIXER("Left Input Mixer", ADAU1761_REC_MIXER_LEFT0, 0, 0,
+		NULL, 0),
+	SND_SOC_DAPM_MIXER("Right Input Mixer", ADAU1761_REC_MIXER_RIGHT0, 0, 0,
+		NULL, 0),
+
+	SOC_MIXER_ARRAY("Left Playback Mixer", ADAU1761_PLAY_MIXER_LEFT0,
+		0, 0, adau1761_left_mixer_controls),
+	SOC_MIXER_ARRAY("Right Playback Mixer", ADAU1761_PLAY_MIXER_RIGHT0,
+		0, 0, adau1761_right_mixer_controls),
+	SOC_MIXER_ARRAY("Left LR Playback Mixer", ADAU1761_PLAY_LR_MIXER_LEFT,
+		0, 0, adau1761_left_lr_mixer_controls),
+	SOC_MIXER_ARRAY("Right LR Playback Mixer", ADAU1761_PLAY_LR_MIXER_RIGHT,
+		0, 0, adau1761_right_lr_mixer_controls),
+
+	SND_SOC_DAPM_SUPPLY("Headphone", ADAU1761_PLAY_HP_LEFT_VOL,
+		0, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("SYSCLK", 2, SND_SOC_NOPM, 0, 0, NULL, 0),
+
+	SND_SOC_DAPM_POST("Dejitter fixup", adau1761_dejitter_fixup),
+
+	SND_SOC_DAPM_INPUT("LAUX"),
+	SND_SOC_DAPM_INPUT("RAUX"),
+	SND_SOC_DAPM_INPUT("LINP"),
+	SND_SOC_DAPM_INPUT("LINN"),
+	SND_SOC_DAPM_INPUT("RINP"),
+	SND_SOC_DAPM_INPUT("RINN"),
+
+	SND_SOC_DAPM_OUTPUT("LOUT"),
+	SND_SOC_DAPM_OUTPUT("ROUT"),
+	SND_SOC_DAPM_OUTPUT("LHP"),
+	SND_SOC_DAPM_OUTPUT("RHP"),
+};
+
+static const struct snd_soc_dapm_widget adau1761_mono_dapm_widgets[] = {
+	SND_SOC_DAPM_MIXER("Mono Playback Mixer", ADAU1761_PLAY_MIXER_MONO,
+		0, 0, NULL, 0),
+
+	SND_SOC_DAPM_OUTPUT("MONOOUT"),
+};
+
+static const struct snd_soc_dapm_widget adau1761_capless_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY_S("Headphone VGND", 1, ADAU1761_PLAY_MIXER_MONO,
+		0, 0, NULL, 0),
+};
+
+static const struct snd_soc_dapm_route adau1x61_dapm_routes[] = {
+	{ "Left Input Mixer", NULL, "LINP" },
+	{ "Left Input Mixer", NULL, "LINN" },
+	{ "Left Input Mixer", NULL, "LAUX" },
+
+	{ "Right Input Mixer", NULL, "RINP" },
+	{ "Right Input Mixer", NULL, "RINN" },
+	{ "Right Input Mixer", NULL, "RAUX" },
+
+	{ "Left Playback Mixer", NULL, "Left Playback Enable"},
+	{ "Right Playback Mixer", NULL, "Right Playback Enable"},
+	{ "Left LR Playback Mixer", NULL, "Left Playback Enable"},
+	{ "Right LR Playback Mixer", NULL, "Right Playback Enable"},
+
+	{ "Left Playback Mixer", "Left DAC Switch", "Left DAC" },
+	{ "Left Playback Mixer", "Right DAC Switch", "Right DAC" },
+
+	{ "Right Playback Mixer", "Left DAC Switch", "Left DAC" },
+	{ "Right Playback Mixer", "Right DAC Switch", "Right DAC" },
+
+	{ "Left LR Playback Mixer", "Left Volume", "Left Playback Mixer" },
+	{ "Left LR Playback Mixer", "Right Volume", "Right Playback Mixer" },
+
+	{ "Right LR Playback Mixer", "Left Volume", "Left Playback Mixer" },
+	{ "Right LR Playback Mixer", "Right Volume", "Right Playback Mixer" },
+
+	{ "LHP", NULL, "Left Playback Mixer" },
+	{ "RHP", NULL, "Right Playback Mixer" },
+
+	{ "LHP", NULL, "Headphone" },
+	{ "RHP", NULL, "Headphone" },
+
+	{ "LOUT", NULL, "Left LR Playback Mixer" },
+	{ "ROUT", NULL, "Right LR Playback Mixer" },
+
+	{ "Left Playback Mixer", "Aux Bypass Volume", "LAUX" },
+	{ "Left Playback Mixer", "Left Bypass Volume", "Left Input Mixer" },
+	{ "Left Playback Mixer", "Right Bypass Volume", "Right Input Mixer" },
+	{ "Right Playback Mixer", "Aux Bypass Volume", "RAUX" },
+	{ "Right Playback Mixer", "Left Bypass Volume", "Left Input Mixer" },
+	{ "Right Playback Mixer", "Right Bypass Volume", "Right Input Mixer" },
+};
+
+static const struct snd_soc_dapm_route adau1761_mono_dapm_routes[] = {
+	{ "Mono Playback Mixer", NULL, "Left Playback Mixer" },
+	{ "Mono Playback Mixer", NULL, "Right Playback Mixer" },
+
+	{ "MONOOUT", NULL, "Mono Playback Mixer" },
+};
+
+static const struct snd_soc_dapm_route adau1761_capless_dapm_routes[] = {
+	{ "Headphone", NULL, "Headphone VGND" },
+};
+
+static const struct snd_soc_dapm_widget adau1761_dmic_widgets[] = {
+	SND_SOC_DAPM_MUX("Left Decimator Mux", SND_SOC_NOPM, 0, 0,
+		&adau1761_input_mux_control),
+	SND_SOC_DAPM_MUX("Right Decimator Mux", SND_SOC_NOPM, 0, 0,
+		&adau1761_input_mux_control),
+
+	SND_SOC_DAPM_INPUT("DMIC"),
+};
+
+static const struct snd_soc_dapm_route adau1761_dmic_routes[] = {
+	{ "Left Decimator Mux", "ADC", "Left Input Mixer" },
+	{ "Left Decimator Mux", "DMIC", "DMIC" },
+	{ "Right Decimator Mux", "ADC", "Right Input Mixer" },
+	{ "Right Decimator Mux", "DMIC", "DMIC" },
+
+	{ "Left Decimator", NULL, "Left Decimator Mux" },
+	{ "Right Decimator", NULL, "Right Decimator Mux" },
+};
+
+static const struct snd_soc_dapm_route adau1761_no_dmic_routes[] = {
+	{ "Left Decimator", NULL, "Left Input Mixer" },
+	{ "Right Decimator", NULL, "Right Input Mixer" },
+};
+
+static const struct snd_soc_dapm_widget adau1761_dapm_widgets[] = {
+	SND_SOC_DAPM_SUPPLY("Serial Port Clock", ADAU1761_CLK_ENABLE0,
+		0, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Serial Input Routing Clock", ADAU1761_CLK_ENABLE0,
+		1, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Serial Output Routing Clock", ADAU1761_CLK_ENABLE0,
+		3, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("Decimator Resync Clock", ADAU1761_CLK_ENABLE0,
+		4, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Interpolator Resync Clock", ADAU1761_CLK_ENABLE0,
+		2, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("Slew Clock", ADAU1761_CLK_ENABLE0, 6, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY_S("Digital Clock 0", 1, ADAU1761_CLK_ENABLE1,
+		0, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("Digital Clock 1", 1, ADAU1761_CLK_ENABLE1,
+		1, 0, NULL, 0),
+};
+
+static const struct snd_soc_dapm_route adau1761_dapm_routes[] = {
+	{ "Left Decimator", NULL, "Digital Clock 0", },
+	{ "Right Decimator", NULL, "Digital Clock 0", },
+	{ "Left DAC", NULL, "Digital Clock 0", },
+	{ "Right DAC", NULL, "Digital Clock 0", },
+
+	{ "AIFCLK", NULL, "Digital Clock 1" },
+
+	{ "Playback", NULL, "Serial Port Clock" },
+	{ "Capture", NULL, "Serial Port Clock" },
+	{ "Playback", NULL, "Serial Input Routing Clock" },
+	{ "Capture", NULL, "Serial Output Routing Clock" },
+
+	{ "Left Decimator", NULL, "Decimator Resync Clock" },
+	{ "Right Decimator", NULL, "Decimator Resync Clock" },
+	{ "Left DAC", NULL, "Interpolator Resync Clock" },
+	{ "Right DAC", NULL, "Interpolator Resync Clock" },
+
+	{ "DSP", NULL, "Digital Clock 0" },
+
+	{ "Slew Clock", NULL, "Digital Clock 0" },
+	{ "Right Playback Mixer", NULL, "Slew Clock" },
+	{ "Left Playback Mixer", NULL, "Slew Clock" },
+
+	{ "Digital Clock 0", NULL, "SYSCLK" },
+	{ "Digital Clock 1", NULL, "SYSCLK" },
+};
+
+static int adau1761_set_bias_level(struct snd_soc_codec *codec,
+				 enum snd_soc_bias_level level)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		regmap_update_bits(adau->regmap, ADAU17X1_CLOCK_CONTROL,
+			ADAU17X1_CLOCK_CONTROL_SYSCLK_EN,
+			ADAU17X1_CLOCK_CONTROL_SYSCLK_EN);
+		break;
+	case SND_SOC_BIAS_OFF:
+		regmap_update_bits(adau->regmap, ADAU17X1_CLOCK_CONTROL,
+			ADAU17X1_CLOCK_CONTROL_SYSCLK_EN, 0);
+		break;
+
+	}
+	codec->dapm.bias_level = level;
+	return 0;
+}
+
+static enum adau1761_output_mode adau1761_get_lineout_mode(
+	struct snd_soc_codec *codec)
+{
+	struct adau1761_platform_data *pdata = codec->dev->platform_data;
+
+	if (pdata)
+		return pdata->lineout_mode;
+
+	return ADAU1761_OUTPUT_MODE_LINE;
+}
+
+static int adau1761_setup_digmic_jackdetect(struct snd_soc_codec *codec)
+{
+	struct adau1761_platform_data *pdata = codec->dev->platform_data;
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	enum adau1761_digmic_jackdet_pin_mode mode;
+	unsigned int val = 0;
+	int ret;
+
+	if (pdata)
+		mode = pdata->digmic_jackdetect_pin_mode;
+	else
+		mode = ADAU1761_DIGMIC_JACKDET_PIN_MODE_NONE;
+
+	switch (mode) {
+	case ADAU1761_DIGMIC_JACKDET_PIN_MODE_JACKDETECT:
+		switch (pdata->jackdetect_debounce_time) {
+		case ADAU1761_JACKDETECT_DEBOUNCE_5MS:
+		case ADAU1761_JACKDETECT_DEBOUNCE_10MS:
+		case ADAU1761_JACKDETECT_DEBOUNCE_20MS:
+		case ADAU1761_JACKDETECT_DEBOUNCE_40MS:
+			val |= pdata->jackdetect_debounce_time << 6;
+			break;
+		default:
+			return -EINVAL;
+		}
+		if (pdata->jackdetect_active_low)
+			val |= ADAU1761_DIGMIC_JACKDETECT_ACTIVE_LOW;
+
+		ret = snd_soc_add_codec_controls(codec,
+			adau1761_jack_detect_controls,
+			ARRAY_SIZE(adau1761_jack_detect_controls));
+		if (ret)
+			return ret;
+	case ADAU1761_DIGMIC_JACKDET_PIN_MODE_NONE: /* fallthrough */
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1761_no_dmic_routes,
+			ARRAY_SIZE(adau1761_no_dmic_routes));
+		if (ret)
+			return ret;
+		break;
+	case ADAU1761_DIGMIC_JACKDET_PIN_MODE_DIGMIC:
+		ret = snd_soc_dapm_new_controls(&codec->dapm,
+			adau1761_dmic_widgets,
+			ARRAY_SIZE(adau1761_dmic_widgets));
+		if (ret)
+			return ret;
+
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1761_dmic_routes,
+			ARRAY_SIZE(adau1761_dmic_routes));
+		if (ret)
+			return ret;
+
+		val |= ADAU1761_DIGMIC_JACKDETECT_DIGMIC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	regmap_write(adau->regmap, ADAU1761_DIGMIC_JACKDETECT, val);
+
+	return 0;
+}
+
+static int adau1761_setup_headphone_mode(struct snd_soc_codec *codec)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	struct adau1761_platform_data *pdata = codec->dev->platform_data;
+	enum adau1761_output_mode mode;
+	int ret;
+
+	if (pdata)
+		mode = pdata->headphone_mode;
+	else
+		mode = ADAU1761_OUTPUT_MODE_HEADPHONE;
+
+	switch (mode) {
+	case ADAU1761_OUTPUT_MODE_LINE:
+		break;
+	case ADAU1761_OUTPUT_MODE_HEADPHONE_CAPLESS:
+		regmap_update_bits(adau->regmap, ADAU1761_PLAY_MONO_OUTPUT_VOL,
+			ADAU1761_PLAY_MONO_OUTPUT_VOL_MODE_HP |
+			ADAU1761_PLAY_MONO_OUTPUT_VOL_UNMUTE,
+			ADAU1761_PLAY_MONO_OUTPUT_VOL_MODE_HP |
+			ADAU1761_PLAY_MONO_OUTPUT_VOL_UNMUTE);
+		/* fallthrough */
+	case ADAU1761_OUTPUT_MODE_HEADPHONE:
+		regmap_update_bits(adau->regmap, ADAU1761_PLAY_HP_RIGHT_VOL,
+			ADAU1761_PLAY_HP_RIGHT_VOL_MODE_HP,
+			ADAU1761_PLAY_HP_RIGHT_VOL_MODE_HP);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (mode == ADAU1761_OUTPUT_MODE_HEADPHONE_CAPLESS) {
+		ret = snd_soc_dapm_new_controls(&codec->dapm,
+			adau1761_capless_dapm_widgets,
+			ARRAY_SIZE(adau1761_capless_dapm_widgets));
+		if (ret)
+			return ret;
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1761_capless_dapm_routes,
+			ARRAY_SIZE(adau1761_capless_dapm_routes));
+	} else {
+		ret = snd_soc_add_codec_controls(codec, adau1761_mono_controls,
+			ARRAY_SIZE(adau1761_mono_controls));
+		if (ret)
+			return ret;
+		ret = snd_soc_dapm_new_controls(&codec->dapm,
+			adau1761_mono_dapm_widgets,
+			ARRAY_SIZE(adau1761_mono_dapm_widgets));
+		if (ret)
+			return ret;
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1761_mono_dapm_routes,
+			ARRAY_SIZE(adau1761_mono_dapm_routes));
+	}
+
+	return ret;
+}
+
+static bool adau1761_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case ADAU1761_DIGMIC_JACKDETECT:
+	case ADAU1761_REC_MIXER_LEFT0:
+	case ADAU1761_REC_MIXER_LEFT1:
+	case ADAU1761_REC_MIXER_RIGHT0:
+	case ADAU1761_REC_MIXER_RIGHT1:
+	case ADAU1761_LEFT_DIFF_INPUT_VOL:
+	case ADAU1761_RIGHT_DIFF_INPUT_VOL:
+	case ADAU1761_PLAY_LR_MIXER_LEFT:
+	case ADAU1761_PLAY_MIXER_LEFT0:
+	case ADAU1761_PLAY_MIXER_LEFT1:
+	case ADAU1761_PLAY_MIXER_RIGHT0:
+	case ADAU1761_PLAY_MIXER_RIGHT1:
+	case ADAU1761_PLAY_LR_MIXER_RIGHT:
+	case ADAU1761_PLAY_MIXER_MONO:
+	case ADAU1761_PLAY_HP_LEFT_VOL:
+	case ADAU1761_PLAY_HP_RIGHT_VOL:
+	case ADAU1761_PLAY_LINE_LEFT_VOL:
+	case ADAU1761_PLAY_LINE_RIGHT_VOL:
+	case ADAU1761_PLAY_MONO_OUTPUT_VOL:
+	case ADAU1761_POP_CLICK_SUPPRESS:
+	case ADAU1761_JACK_DETECT_PIN:
+	case ADAU1761_DEJITTER:
+	case ADAU1761_CLK_ENABLE0:
+	case ADAU1761_CLK_ENABLE1:
+		return true;
+	default:
+		break;
+	}
+
+	return adau17x1_readable_register(dev, reg);
+}
+
+static int adau1761_codec_probe(struct snd_soc_codec *codec)
+{
+	struct adau1761_platform_data *pdata = codec->dev->platform_data;
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	int ret;
+
+	ret = adau17x1_add_widgets(codec);
+	if (ret < 0)
+		return ret;
+
+	if (pdata && pdata->input_differential) {
+		regmap_update_bits(adau->regmap, ADAU1761_LEFT_DIFF_INPUT_VOL,
+			ADAU1761_DIFF_INPUT_VOL_LDEN,
+			ADAU1761_DIFF_INPUT_VOL_LDEN);
+		regmap_update_bits(adau->regmap, ADAU1761_RIGHT_DIFF_INPUT_VOL,
+			ADAU1761_DIFF_INPUT_VOL_LDEN,
+			ADAU1761_DIFF_INPUT_VOL_LDEN);
+		ret = snd_soc_add_codec_controls(codec,
+			adau1761_differential_mode_controls,
+			ARRAY_SIZE(adau1761_differential_mode_controls));
+		if (ret)
+			return ret;
+	} else {
+		ret = snd_soc_add_codec_controls(codec,
+			adau1761_single_mode_controls,
+			ARRAY_SIZE(adau1761_single_mode_controls));
+		if (ret)
+			return ret;
+	}
+
+	switch (adau1761_get_lineout_mode(codec)) {
+	case ADAU1761_OUTPUT_MODE_LINE:
+		break;
+	case ADAU1761_OUTPUT_MODE_HEADPHONE:
+		regmap_update_bits(adau->regmap, ADAU1761_PLAY_LINE_LEFT_VOL,
+			ADAU1761_PLAY_LINE_LEFT_VOL_MODE_HP,
+			ADAU1761_PLAY_LINE_LEFT_VOL_MODE_HP);
+		regmap_update_bits(adau->regmap, ADAU1761_PLAY_LINE_RIGHT_VOL,
+			ADAU1761_PLAY_LINE_RIGHT_VOL_MODE_HP,
+			ADAU1761_PLAY_LINE_RIGHT_VOL_MODE_HP);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = adau1761_setup_headphone_mode(codec);
+	if (ret)
+		return ret;
+
+	ret = adau1761_setup_digmic_jackdetect(codec);
+	if (ret)
+		return ret;
+
+	if (adau->type == ADAU1761) {
+		ret = snd_soc_dapm_new_controls(&codec->dapm,
+			adau1761_dapm_widgets,
+			ARRAY_SIZE(adau1761_dapm_widgets));
+		if (ret)
+			return ret;
+
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1761_dapm_routes,
+			ARRAY_SIZE(adau1761_dapm_routes));
+		if (ret)
+			return ret;
+
+		ret = adau17x1_load_firmware(adau, codec->dev,
+			ADAU1761_FIRMWARE);
+		if (ret)
+			dev_warn(codec->dev, "Failed to firmware\n");
+	}
+
+	ret = adau17x1_add_routes(codec);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static const struct snd_soc_codec_driver adau1761_codec_driver = {
+	.probe = adau1761_codec_probe,
+	.suspend = adau17x1_suspend,
+	.resume	= adau17x1_resume,
+	.set_bias_level	= adau1761_set_bias_level,
+
+	.controls = adau1761_controls,
+	.num_controls = ARRAY_SIZE(adau1761_controls),
+	.dapm_widgets = adau1x61_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(adau1x61_dapm_widgets),
+	.dapm_routes = adau1x61_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(adau1x61_dapm_routes),
+};
+
+#define ADAU1761_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | \
+	SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver adau1361_dai_driver = {
+	.name = "adau-hifi",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 2,
+		.channels_max = 4,
+		.rates = SNDRV_PCM_RATE_8000_96000,
+		.formats = ADAU1761_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 2,
+		.channels_max = 4,
+		.rates = SNDRV_PCM_RATE_8000_96000,
+		.formats = ADAU1761_FORMATS,
+	},
+	.ops = &adau17x1_dai_ops,
+};
+
+static struct snd_soc_dai_driver adau1761_dai_driver = {
+	.name = "adau-hifi",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 2,
+		.channels_max = 8,
+		.rates = SNDRV_PCM_RATE_8000_96000,
+		.formats = ADAU1761_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 2,
+		.channels_max = 8,
+		.rates = SNDRV_PCM_RATE_8000_96000,
+		.formats = ADAU1761_FORMATS,
+	},
+	.ops = &adau17x1_dai_ops,
+};
+
+int adau1761_probe(struct device *dev, struct regmap *regmap,
+	enum adau17x1_type type, void (*switch_mode)(struct device *dev))
+{
+	struct snd_soc_dai_driver *dai_drv;
+	int ret;
+
+	ret = adau17x1_probe(dev, regmap, type, switch_mode);
+	if (ret)
+		return ret;
+
+	if (type == ADAU1361)
+		dai_drv = &adau1361_dai_driver;
+	else
+		dai_drv = &adau1761_dai_driver;
+
+	return snd_soc_register_codec(dev, &adau1761_codec_driver, dai_drv, 1);
+}
+EXPORT_SYMBOL_GPL(adau1761_probe);
+
+const struct regmap_config adau1761_regmap_config = {
+	.val_bits = 8,
+	.reg_bits = 16,
+	.max_register = 0x40fa,
+	.reg_defaults = adau1761_reg_defaults,
+	.num_reg_defaults = ARRAY_SIZE(adau1761_reg_defaults),
+	.readable_reg = adau1761_readable_register,
+	.volatile_reg = adau17x1_volatile_register,
+	.cache_type = REGCACHE_RBTREE,
+};
+EXPORT_SYMBOL_GPL(adau1761_regmap_config);
+
+MODULE_DESCRIPTION("ASoC ADAU1361/ADAU1461/ADAU1761/ADAU1961 CODEC driver");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau1761.h b/sound/soc/codecs/adau1761.h
new file mode 100644
index 000000000000..a9e0d288301e
--- /dev/null
+++ b/sound/soc/codecs/adau1761.h
@@ -0,0 +1,23 @@
+/*
+ * ADAU1361/ADAU1461/ADAU1761/ADAU1961 driver
+ *
+ * Copyright 2014 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef __SOUND_SOC_CODECS_ADAU1761_H__
+#define __SOUND_SOC_CODECS_ADAU1761_H__
+
+#include <linux/regmap.h>
+#include "adau17x1.h"
+
+struct device;
+
+int adau1761_probe(struct device *dev, struct regmap *regmap,
+	enum adau17x1_type type, void (*switch_mode)(struct device *dev));
+
+extern const struct regmap_config adau1761_regmap_config;
+
+#endif
-- 
cgit 


From 2923af024681508132881c9e5ddd65cd51b0d8e3 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 27 May 2014 10:53:19 +0200
Subject: ASoC: Add ADAU1381/ADAU1781 audio CODEC support

This patch adds support for the Analog Devices ADAU1381 and ADAU1781 audio
CODECs. The device is a low-power, 24-bit stereo audio CODEC with multiple
analog inputs and outputs, two digital microphone inputs and an I2S interface.
The device can be controlled either using I2C or SPI. The main difference
between the two variants is that the ADAU1781 has a freely programmable SigmaDSP
processor, while the ADAU1381 has a fixed function wind noise reduction filter.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/platform_data/adau17x1.h |  19 ++
 sound/soc/codecs/Kconfig               |  16 ++
 sound/soc/codecs/Makefile              |   6 +
 sound/soc/codecs/adau1781-i2c.c        |  58 ++++
 sound/soc/codecs/adau1781-spi.c        |  75 +++++
 sound/soc/codecs/adau1781.c            | 511 +++++++++++++++++++++++++++++++++
 sound/soc/codecs/adau1781.h            |  23 ++
 7 files changed, 708 insertions(+)
 create mode 100644 sound/soc/codecs/adau1781-i2c.c
 create mode 100644 sound/soc/codecs/adau1781-spi.c
 create mode 100644 sound/soc/codecs/adau1781.c
 create mode 100644 sound/soc/codecs/adau1781.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/adau17x1.h b/include/linux/platform_data/adau17x1.h
index d234d9e46fd6..a81766cae230 100644
--- a/include/linux/platform_data/adau17x1.h
+++ b/include/linux/platform_data/adau17x1.h
@@ -87,4 +87,23 @@ struct adau1761_platform_data {
 	enum adau17x1_micbias_voltage micbias_voltage;
 };
 
+/**
+ * struct adau1781_platform_data - ADAU1781 Codec driver platform data
+ * @left_input_differential: If true configure the left input as
+ * differential input.
+ * @right_input_differential: If true configure the right input as differntial
+ *  input.
+ * @use_dmic: If true configure the MIC pins as digital microphone pins instead
+ *  of analog microphone pins.
+ * @micbias_voltage: Microphone voltage bias
+ */
+struct adau1781_platform_data {
+	bool left_input_differential;
+	bool right_input_differential;
+
+	bool use_dmic;
+
+	enum adau17x1_micbias_voltage micbias_voltage;
+};
+
 #endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 4233ed118c48..dd5deea9221a 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -25,6 +25,8 @@ config SND_SOC_ALL_CODECS
 	select SND_SOC_ADAU1373 if I2C
 	select SND_SOC_ADAU1761_I2C if I2C
 	select SND_SOC_ADAU1761_SPI if SPI
+	select SND_SOC_ADAU1781_I2C if I2C
+	select SND_SOC_ADAU1781_SPI if SPI
 	select SND_SOC_ADAV801 if SPI_MASTER
 	select SND_SOC_ADAV803 if I2C
 	select SND_SOC_ADAU1977_SPI if SPI_MASTER
@@ -238,6 +240,20 @@ config SND_SOC_ADAU1761_SPI
 	select SND_SOC_ADAU1761
 	select REGMAP_SPI
 
+config SND_SOC_ADAU1781
+	select SND_SOC_ADAU17X1
+	tristate
+
+config SND_SOC_ADAU1781_I2C
+	tristate
+	select SND_SOC_ADAU1781
+	select REGMAP_I2C
+
+config SND_SOC_ADAU1781_SPI
+	tristate
+	select SND_SOC_ADAU1781
+	select REGMAP_SPI
+
 config SND_SOC_ADAU1977
 	tristate
 
diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile
index e96499ca76bd..4ef1a1ed5f92 100644
--- a/sound/soc/codecs/Makefile
+++ b/sound/soc/codecs/Makefile
@@ -13,6 +13,9 @@ snd-soc-adau17x1-objs := adau17x1.o
 snd-soc-adau1761-objs := adau1761.o
 snd-soc-adau1761-i2c-objs := adau1761-i2c.o
 snd-soc-adau1761-spi-objs := adau1761-spi.o
+snd-soc-adau1781-objs := adau1781.o
+snd-soc-adau1781-i2c-objs := adau1781-i2c.o
+snd-soc-adau1781-spi-objs := adau1781-spi.o
 snd-soc-adau1977-objs := adau1977.o
 snd-soc-adau1977-spi-objs := adau1977-spi.o
 snd-soc-adau1977-i2c-objs := adau1977-i2c.o
@@ -166,6 +169,9 @@ obj-$(CONFIG_SND_SOC_ADAU17X1)		+= snd-soc-adau17x1.o
 obj-$(CONFIG_SND_SOC_ADAU1761)		+= snd-soc-adau1761.o
 obj-$(CONFIG_SND_SOC_ADAU1761_I2C)	+= snd-soc-adau1761-i2c.o
 obj-$(CONFIG_SND_SOC_ADAU1761_SPI)	+= snd-soc-adau1761-spi.o
+obj-$(CONFIG_SND_SOC_ADAU1781)		+= snd-soc-adau1781.o
+obj-$(CONFIG_SND_SOC_ADAU1781_I2C)	+= snd-soc-adau1781-i2c.o
+obj-$(CONFIG_SND_SOC_ADAU1781_SPI)	+= snd-soc-adau1781-spi.o
 obj-$(CONFIG_SND_SOC_ADAU1977)		+= snd-soc-adau1977.o
 obj-$(CONFIG_SND_SOC_ADAU1977_SPI)	+= snd-soc-adau1977-spi.o
 obj-$(CONFIG_SND_SOC_ADAU1977_I2C)	+= snd-soc-adau1977-i2c.o
diff --git a/sound/soc/codecs/adau1781-i2c.c b/sound/soc/codecs/adau1781-i2c.c
new file mode 100644
index 000000000000..2ce4362ccec1
--- /dev/null
+++ b/sound/soc/codecs/adau1781-i2c.c
@@ -0,0 +1,58 @@
+/*
+ * Driver for ADAU1381/ADAU1781 CODEC
+ *
+ * Copyright 2014 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <sound/soc.h>
+
+#include "adau1781.h"
+
+static int adau1781_i2c_probe(struct i2c_client *client,
+	const struct i2c_device_id *id)
+{
+	struct regmap_config config;
+
+	config = adau1781_regmap_config;
+	config.val_bits = 8;
+	config.reg_bits = 16;
+
+	return adau1781_probe(&client->dev,
+		devm_regmap_init_i2c(client, &config),
+		id->driver_data, NULL);
+}
+
+static int adau1781_i2c_remove(struct i2c_client *client)
+{
+	snd_soc_unregister_codec(&client->dev);
+	return 0;
+}
+
+static const struct i2c_device_id adau1781_i2c_ids[] = {
+	{ "adau1381", ADAU1381 },
+	{ "adau1781", ADAU1781 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adau1781_i2c_ids);
+
+static struct i2c_driver adau1781_i2c_driver = {
+	.driver = {
+		.name = "adau1781",
+		.owner = THIS_MODULE,
+	},
+	.probe = adau1781_i2c_probe,
+	.remove = adau1781_i2c_remove,
+	.id_table = adau1781_i2c_ids,
+};
+module_i2c_driver(adau1781_i2c_driver);
+
+MODULE_DESCRIPTION("ASoC ADAU1381/ADAU1781 CODEC I2C driver");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau1781-spi.c b/sound/soc/codecs/adau1781-spi.c
new file mode 100644
index 000000000000..194686716bbe
--- /dev/null
+++ b/sound/soc/codecs/adau1781-spi.c
@@ -0,0 +1,75 @@
+/*
+ * Driver for ADAU1381/ADAU1781 CODEC
+ *
+ * Copyright 2014 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+#include <sound/soc.h>
+
+#include "adau1781.h"
+
+static void adau1781_spi_switch_mode(struct device *dev)
+{
+	struct spi_device *spi = to_spi_device(dev);
+
+	/*
+	 * To get the device into SPI mode CLATCH has to be pulled low three
+	 * times.  Do this by issuing three dummy reads.
+	 */
+	spi_w8r8(spi, 0x00);
+	spi_w8r8(spi, 0x00);
+	spi_w8r8(spi, 0x00);
+}
+
+static int adau1781_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *id = spi_get_device_id(spi);
+	struct regmap_config config;
+
+	if (!id)
+		return -EINVAL;
+
+	config = adau1781_regmap_config;
+	config.val_bits = 8;
+	config.reg_bits = 24;
+	config.read_flag_mask = 0x1;
+
+	return adau1781_probe(&spi->dev,
+		devm_regmap_init_spi(spi, &config),
+		id->driver_data, adau1781_spi_switch_mode);
+}
+
+static int adau1781_spi_remove(struct spi_device *spi)
+{
+	snd_soc_unregister_codec(&spi->dev);
+	return 0;
+}
+
+static const struct spi_device_id adau1781_spi_id[] = {
+	{ "adau1381", ADAU1381 },
+	{ "adau1781", ADAU1781 },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, adau1781_spi_id);
+
+static struct spi_driver adau1781_spi_driver = {
+	.driver = {
+		.name = "adau1781",
+		.owner = THIS_MODULE,
+	},
+	.probe = adau1781_spi_probe,
+	.remove = adau1781_spi_remove,
+	.id_table = adau1781_spi_id,
+};
+module_spi_driver(adau1781_spi_driver);
+
+MODULE_DESCRIPTION("ASoC ADAU1381/ADAU1781 CODEC SPI driver");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau1781.c b/sound/soc/codecs/adau1781.c
new file mode 100644
index 000000000000..045a61413840
--- /dev/null
+++ b/sound/soc/codecs/adau1781.c
@@ -0,0 +1,511 @@
+/*
+ * Driver for ADAU1781/ADAU1781 codec
+ *
+ * Copyright 2011-2013 Analog Devices Inc.
+ * Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/spi/spi.h>
+#include <linux/slab.h>
+#include <sound/core.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/soc.h>
+#include <sound/tlv.h>
+#include <linux/platform_data/adau17x1.h>
+
+#include "adau17x1.h"
+#include "adau1781.h"
+
+#define ADAU1781_DMIC_BEEP_CTRL		0x4008
+#define ADAU1781_LEFT_PGA		0x400e
+#define ADAU1781_RIGHT_PGA		0x400f
+#define ADAU1781_LEFT_PLAYBACK_MIXER	0x401c
+#define ADAU1781_RIGHT_PLAYBACK_MIXER	0x401e
+#define ADAU1781_MONO_PLAYBACK_MIXER	0x401f
+#define ADAU1781_LEFT_LINEOUT		0x4025
+#define ADAU1781_RIGHT_LINEOUT		0x4026
+#define ADAU1781_SPEAKER		0x4027
+#define ADAU1781_BEEP_ZC		0x4028
+#define ADAU1781_DEJITTER		0x4032
+#define ADAU1781_DIG_PWDN0		0x4080
+#define ADAU1781_DIG_PWDN1		0x4081
+
+#define ADAU1781_INPUT_DIFFERNTIAL BIT(3)
+
+#define ADAU1381_FIRMWARE "adau1381.bin"
+#define ADAU1781_FIRMWARE "adau1781.bin"
+
+static const struct reg_default adau1781_reg_defaults[] = {
+	{ ADAU1781_DMIC_BEEP_CTRL,		0x00 },
+	{ ADAU1781_LEFT_PGA,			0xc7 },
+	{ ADAU1781_RIGHT_PGA,			0xc7 },
+	{ ADAU1781_LEFT_PLAYBACK_MIXER,		0x00 },
+	{ ADAU1781_RIGHT_PLAYBACK_MIXER,	0x00 },
+	{ ADAU1781_MONO_PLAYBACK_MIXER,		0x00 },
+	{ ADAU1781_LEFT_LINEOUT,		0x00 },
+	{ ADAU1781_RIGHT_LINEOUT,		0x00 },
+	{ ADAU1781_SPEAKER,			0x00 },
+	{ ADAU1781_BEEP_ZC,			0x19 },
+	{ ADAU1781_DEJITTER,			0x60 },
+	{ ADAU1781_DIG_PWDN1,			0x0c },
+	{ ADAU1781_DIG_PWDN1,			0x00 },
+	{ ADAU17X1_CLOCK_CONTROL,		0x00 },
+	{ ADAU17X1_PLL_CONTROL,			0x00 },
+	{ ADAU17X1_REC_POWER_MGMT,		0x00 },
+	{ ADAU17X1_MICBIAS,			0x04 },
+	{ ADAU17X1_SERIAL_PORT0,		0x00 },
+	{ ADAU17X1_SERIAL_PORT1,		0x00 },
+	{ ADAU17X1_CONVERTER0,			0x00 },
+	{ ADAU17X1_CONVERTER1,			0x00 },
+	{ ADAU17X1_LEFT_INPUT_DIGITAL_VOL,	0x00 },
+	{ ADAU17X1_RIGHT_INPUT_DIGITAL_VOL,	0x00 },
+	{ ADAU17X1_ADC_CONTROL,			0x00 },
+	{ ADAU17X1_PLAY_POWER_MGMT,		0x00 },
+	{ ADAU17X1_DAC_CONTROL0,		0x00 },
+	{ ADAU17X1_DAC_CONTROL1,		0x00 },
+	{ ADAU17X1_DAC_CONTROL2,		0x00 },
+	{ ADAU17X1_SERIAL_PORT_PAD,		0x00 },
+	{ ADAU17X1_CONTROL_PORT_PAD0,		0x00 },
+	{ ADAU17X1_CONTROL_PORT_PAD1,		0x00 },
+	{ ADAU17X1_DSP_SAMPLING_RATE,		0x01 },
+	{ ADAU17X1_SERIAL_INPUT_ROUTE,		0x00 },
+	{ ADAU17X1_SERIAL_OUTPUT_ROUTE,		0x00 },
+	{ ADAU17X1_DSP_ENABLE,			0x00 },
+	{ ADAU17X1_DSP_RUN,			0x00 },
+	{ ADAU17X1_SERIAL_SAMPLING_RATE,	0x00 },
+};
+
+static const DECLARE_TLV_DB_SCALE(adau1781_speaker_tlv, 0, 200, 0);
+
+static const DECLARE_TLV_DB_RANGE(adau1781_pga_tlv,
+	0, 1, TLV_DB_SCALE_ITEM(0, 600, 0),
+	2, 3, TLV_DB_SCALE_ITEM(1000, 400, 0),
+	4, 4, TLV_DB_SCALE_ITEM(1700, 0, 0),
+	5, 7, TLV_DB_SCALE_ITEM(2000, 600, 0)
+);
+
+static const DECLARE_TLV_DB_RANGE(adau1781_beep_tlv,
+	0, 1, TLV_DB_SCALE_ITEM(0, 600, 0),
+	2, 3, TLV_DB_SCALE_ITEM(1000, 400, 0),
+	4, 4, TLV_DB_SCALE_ITEM(-2300, 0, 0),
+	5, 7, TLV_DB_SCALE_ITEM(2000, 600, 0)
+);
+
+static const DECLARE_TLV_DB_SCALE(adau1781_sidetone_tlv, -1800, 300, 1);
+
+static const char * const adau1781_speaker_bias_select_text[] = {
+	"Normal operation", "Power saving", "Enhanced performance",
+};
+
+static const char * const adau1781_bias_select_text[] = {
+	"Normal operation", "Extreme power saving", "Power saving",
+	"Enhanced performance",
+};
+
+static SOC_ENUM_SINGLE_DECL(adau1781_adc_bias_enum,
+		ADAU17X1_REC_POWER_MGMT, 3, adau1781_bias_select_text);
+static SOC_ENUM_SINGLE_DECL(adau1781_speaker_bias_enum,
+		ADAU17X1_PLAY_POWER_MGMT, 6, adau1781_speaker_bias_select_text);
+static SOC_ENUM_SINGLE_DECL(adau1781_dac_bias_enum,
+		ADAU17X1_PLAY_POWER_MGMT, 4, adau1781_bias_select_text);
+static SOC_ENUM_SINGLE_DECL(adau1781_playback_bias_enum,
+		ADAU17X1_PLAY_POWER_MGMT, 2, adau1781_bias_select_text);
+static SOC_ENUM_SINGLE_DECL(adau1781_capture_bias_enum,
+		ADAU17X1_REC_POWER_MGMT, 1, adau1781_bias_select_text);
+
+static const struct snd_kcontrol_new adau1781_controls[] = {
+	SOC_SINGLE_TLV("Beep Capture Volume", ADAU1781_DMIC_BEEP_CTRL, 0, 7, 0,
+		adau1781_beep_tlv),
+	SOC_DOUBLE_R_TLV("PGA Capture Volume", ADAU1781_LEFT_PGA,
+		ADAU1781_RIGHT_PGA, 5, 7, 0, adau1781_pga_tlv),
+	SOC_DOUBLE_R("PGA Capture Switch", ADAU1781_LEFT_PGA,
+		ADAU1781_RIGHT_PGA, 1, 1, 0),
+
+	SOC_DOUBLE_R("Lineout Playback Switch", ADAU1781_LEFT_LINEOUT,
+		ADAU1781_RIGHT_LINEOUT, 1, 1, 0),
+	SOC_SINGLE("Beep ZC Switch", ADAU1781_BEEP_ZC, 0, 1, 0),
+
+	SOC_SINGLE("Mono Playback Switch", ADAU1781_MONO_PLAYBACK_MIXER,
+		0, 1, 0),
+	SOC_SINGLE_TLV("Mono Playback Volume", ADAU1781_SPEAKER, 6, 3, 0,
+		adau1781_speaker_tlv),
+
+	SOC_ENUM("ADC Bias", adau1781_adc_bias_enum),
+	SOC_ENUM("DAC Bias", adau1781_dac_bias_enum),
+	SOC_ENUM("Capture Bias", adau1781_capture_bias_enum),
+	SOC_ENUM("Playback Bias", adau1781_playback_bias_enum),
+	SOC_ENUM("Speaker Bias", adau1781_speaker_bias_enum),
+};
+
+static const struct snd_kcontrol_new adau1781_beep_mixer_controls[] = {
+	SOC_DAPM_SINGLE("Beep Capture Switch", ADAU1781_DMIC_BEEP_CTRL,
+		3, 1, 0),
+};
+
+static const struct snd_kcontrol_new adau1781_left_mixer_controls[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("Switch",
+		ADAU1781_LEFT_PLAYBACK_MIXER, 5, 1, 0),
+	SOC_DAPM_SINGLE_TLV("Beep Playback Volume",
+		ADAU1781_LEFT_PLAYBACK_MIXER, 1, 8, 0, adau1781_sidetone_tlv),
+};
+
+static const struct snd_kcontrol_new adau1781_right_mixer_controls[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("Switch",
+		ADAU1781_RIGHT_PLAYBACK_MIXER, 6, 1, 0),
+	SOC_DAPM_SINGLE_TLV("Beep Playback Volume",
+		ADAU1781_LEFT_PLAYBACK_MIXER, 1, 8, 0, adau1781_sidetone_tlv),
+};
+
+static const struct snd_kcontrol_new adau1781_mono_mixer_controls[] = {
+	SOC_DAPM_SINGLE_AUTODISABLE("Left Switch",
+		ADAU1781_MONO_PLAYBACK_MIXER, 7, 1, 0),
+	SOC_DAPM_SINGLE_AUTODISABLE("Right Switch",
+		 ADAU1781_MONO_PLAYBACK_MIXER, 6, 1, 0),
+	SOC_DAPM_SINGLE_TLV("Beep Playback Volume",
+		ADAU1781_MONO_PLAYBACK_MIXER, 2, 8, 0, adau1781_sidetone_tlv),
+};
+
+static int adau1781_dejitter_fixup(struct snd_soc_dapm_widget *w,
+	struct snd_kcontrol *kcontrol, int event)
+{
+	struct snd_soc_codec *codec = w->codec;
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+
+	/* After any power changes have been made the dejitter circuit
+	 * has to be reinitialized. */
+	regmap_write(adau->regmap, ADAU1781_DEJITTER, 0);
+	if (!adau->master)
+		regmap_write(adau->regmap, ADAU1781_DEJITTER, 5);
+
+	return 0;
+}
+
+static const struct snd_soc_dapm_widget adau1781_dapm_widgets[] = {
+	SND_SOC_DAPM_PGA("Left PGA", ADAU1781_LEFT_PGA, 0, 0, NULL, 0),
+	SND_SOC_DAPM_PGA("Right PGA", ADAU1781_RIGHT_PGA, 0, 0, NULL, 0),
+
+	SND_SOC_DAPM_OUT_DRV("Speaker", ADAU1781_SPEAKER, 0, 0, NULL, 0),
+
+	SOC_MIXER_NAMED_CTL_ARRAY("Beep Mixer", ADAU17X1_MICBIAS, 4, 0,
+		adau1781_beep_mixer_controls),
+
+	SOC_MIXER_ARRAY("Left Lineout Mixer", SND_SOC_NOPM, 0, 0,
+		adau1781_left_mixer_controls),
+	SOC_MIXER_ARRAY("Right Lineout Mixer", SND_SOC_NOPM, 0, 0,
+		adau1781_right_mixer_controls),
+	SOC_MIXER_ARRAY("Mono Mixer", SND_SOC_NOPM, 0, 0,
+		adau1781_mono_mixer_controls),
+
+	SND_SOC_DAPM_SUPPLY("Serial Input Routing", ADAU1781_DIG_PWDN0,
+		2, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Serial Output Routing", ADAU1781_DIG_PWDN0,
+		3, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Clock Domain Transfer", ADAU1781_DIG_PWDN0,
+		5, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Serial Ports", ADAU1781_DIG_PWDN0, 4, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("ADC Engine", ADAU1781_DIG_PWDN0, 7, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("DAC Engine", ADAU1781_DIG_PWDN1, 0, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY("Digital Mic", ADAU1781_DIG_PWDN1, 1, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("Sound Engine", ADAU1781_DIG_PWDN0, 0, 0, NULL, 0),
+	SND_SOC_DAPM_SUPPLY_S("SYSCLK", 1, ADAU1781_DIG_PWDN0, 1, 0, NULL, 0),
+
+	SND_SOC_DAPM_SUPPLY("Zero Crossing Detector", ADAU1781_DIG_PWDN1, 2, 0,
+		NULL, 0),
+
+	SND_SOC_DAPM_POST("Dejitter fixup", adau1781_dejitter_fixup),
+
+	SND_SOC_DAPM_INPUT("BEEP"),
+
+	SND_SOC_DAPM_OUTPUT("AOUTL"),
+	SND_SOC_DAPM_OUTPUT("AOUTR"),
+	SND_SOC_DAPM_OUTPUT("SP"),
+	SND_SOC_DAPM_INPUT("LMIC"),
+	SND_SOC_DAPM_INPUT("RMIC"),
+};
+
+static const struct snd_soc_dapm_route adau1781_dapm_routes[] = {
+	{ "Left Lineout Mixer", NULL, "Left Playback Enable" },
+	{ "Right Lineout Mixer", NULL, "Right Playback Enable" },
+
+	{ "Left Lineout Mixer", "Beep Playback Volume", "Beep Mixer" },
+	{ "Left Lineout Mixer", "Switch", "Left DAC" },
+
+	{ "Right Lineout Mixer", "Beep Playback Volume", "Beep Mixer" },
+	{ "Right Lineout Mixer", "Switch", "Right DAC" },
+
+	{ "Mono Mixer", "Beep Playback Volume", "Beep Mixer" },
+	{ "Mono Mixer", "Right Switch", "Right DAC" },
+	{ "Mono Mixer", "Left Switch", "Left DAC" },
+	{ "Speaker", NULL, "Mono Mixer" },
+
+	{ "Mono Mixer", NULL, "SYSCLK" },
+	{ "Left Lineout Mixer", NULL, "SYSCLK" },
+	{ "Left Lineout Mixer", NULL, "SYSCLK" },
+
+	{ "Beep Mixer", "Beep Capture Switch", "BEEP" },
+	{ "Beep Mixer", NULL, "Zero Crossing Detector" },
+
+	{ "Left DAC", NULL, "DAC Engine" },
+	{ "Right DAC", NULL, "DAC Engine" },
+
+	{ "Sound Engine", NULL, "SYSCLK" },
+	{ "DSP", NULL, "Sound Engine" },
+
+	{ "Left Decimator", NULL, "ADC Engine" },
+	{ "Right Decimator", NULL, "ADC Engine" },
+
+	{ "AIFCLK", NULL, "SYSCLK" },
+
+	{ "Playback", NULL, "Serial Input Routing" },
+	{ "Playback", NULL, "Serial Ports" },
+	{ "Playback", NULL, "Clock Domain Transfer" },
+	{ "Capture", NULL, "Serial Output Routing" },
+	{ "Capture", NULL, "Serial Ports" },
+	{ "Capture", NULL, "Clock Domain Transfer" },
+
+	{ "AOUTL", NULL, "Left Lineout Mixer" },
+	{ "AOUTR", NULL, "Right Lineout Mixer" },
+	{ "SP", NULL, "Speaker" },
+};
+
+static const struct snd_soc_dapm_route adau1781_adc_dapm_routes[] = {
+	{ "Left PGA", NULL, "LMIC" },
+	{ "Right PGA", NULL, "RMIC" },
+
+	{ "Left Decimator", NULL, "Left PGA" },
+	{ "Right Decimator", NULL, "Right PGA" },
+};
+
+static const char * const adau1781_dmic_select_text[] = {
+	"DMIC1", "DMIC2",
+};
+
+static SOC_ENUM_SINGLE_VIRT_DECL(adau1781_dmic_select_enum,
+	adau1781_dmic_select_text);
+
+static const struct snd_kcontrol_new adau1781_dmic_mux =
+	SOC_DAPM_ENUM("DMIC Select", adau1781_dmic_select_enum);
+
+static const struct snd_soc_dapm_widget adau1781_dmic_dapm_widgets[] = {
+	SND_SOC_DAPM_MUX("DMIC Select", SND_SOC_NOPM, 0, 0, &adau1781_dmic_mux),
+
+	SND_SOC_DAPM_ADC("DMIC1", NULL, ADAU1781_DMIC_BEEP_CTRL, 4, 0),
+	SND_SOC_DAPM_ADC("DMIC2", NULL, ADAU1781_DMIC_BEEP_CTRL, 5, 0),
+};
+
+static const struct snd_soc_dapm_route adau1781_dmic_dapm_routes[] = {
+	{ "DMIC1", NULL, "LMIC" },
+	{ "DMIC2", NULL, "RMIC" },
+
+	{ "DMIC1", NULL, "Digital Mic" },
+	{ "DMIC2", NULL, "Digital Mic" },
+
+	{ "DMIC Select", "DMIC1", "DMIC1" },
+	{ "DMIC Select", "DMIC2", "DMIC2" },
+
+	{ "Left Decimator", NULL, "DMIC Select" },
+	{ "Right Decimator", NULL, "DMIC Select" },
+};
+
+static int adau1781_set_bias_level(struct snd_soc_codec *codec,
+		enum snd_soc_bias_level level)
+{
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+
+	switch (level) {
+	case SND_SOC_BIAS_ON:
+		break;
+	case SND_SOC_BIAS_PREPARE:
+		break;
+	case SND_SOC_BIAS_STANDBY:
+		regmap_update_bits(adau->regmap, ADAU17X1_CLOCK_CONTROL,
+			ADAU17X1_CLOCK_CONTROL_SYSCLK_EN,
+			ADAU17X1_CLOCK_CONTROL_SYSCLK_EN);
+
+		/* Precharge */
+		regmap_update_bits(adau->regmap, ADAU1781_DIG_PWDN1, 0x8, 0x8);
+		break;
+	case SND_SOC_BIAS_OFF:
+		regmap_update_bits(adau->regmap, ADAU1781_DIG_PWDN1, 0xc, 0x0);
+		regmap_update_bits(adau->regmap, ADAU17X1_CLOCK_CONTROL,
+			ADAU17X1_CLOCK_CONTROL_SYSCLK_EN, 0);
+		break;
+	}
+
+	codec->dapm.bias_level = level;
+	return 0;
+}
+
+static bool adau1781_readable_register(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case ADAU1781_DMIC_BEEP_CTRL:
+	case ADAU1781_LEFT_PGA:
+	case ADAU1781_RIGHT_PGA:
+	case ADAU1781_LEFT_PLAYBACK_MIXER:
+	case ADAU1781_RIGHT_PLAYBACK_MIXER:
+	case ADAU1781_MONO_PLAYBACK_MIXER:
+	case ADAU1781_LEFT_LINEOUT:
+	case ADAU1781_RIGHT_LINEOUT:
+	case ADAU1781_SPEAKER:
+	case ADAU1781_BEEP_ZC:
+	case ADAU1781_DEJITTER:
+	case ADAU1781_DIG_PWDN0:
+	case ADAU1781_DIG_PWDN1:
+		return true;
+	default:
+		break;
+	}
+
+	return adau17x1_readable_register(dev, reg);
+}
+
+static int adau1781_set_input_mode(struct adau *adau, unsigned int reg,
+	bool differential)
+{
+	unsigned int val;
+
+	if (differential)
+		val = ADAU1781_INPUT_DIFFERNTIAL;
+	else
+		val = 0;
+
+	return regmap_update_bits(adau->regmap, reg,
+		ADAU1781_INPUT_DIFFERNTIAL, val);
+}
+
+static int adau1781_codec_probe(struct snd_soc_codec *codec)
+{
+	struct adau1781_platform_data *pdata = dev_get_platdata(codec->dev);
+	struct adau *adau = snd_soc_codec_get_drvdata(codec);
+	const char *firmware;
+	int ret;
+
+	ret = adau17x1_add_widgets(codec);
+	if (ret)
+		return ret;
+
+	if (pdata) {
+		ret = adau1781_set_input_mode(adau, ADAU1781_LEFT_PGA,
+			pdata->left_input_differential);
+		if (ret)
+			return ret;
+		ret = adau1781_set_input_mode(adau, ADAU1781_RIGHT_PGA,
+			pdata->right_input_differential);
+		if (ret)
+			return ret;
+	}
+
+	if (pdata && pdata->use_dmic) {
+		ret = snd_soc_dapm_new_controls(&codec->dapm,
+			adau1781_dmic_dapm_widgets,
+			ARRAY_SIZE(adau1781_dmic_dapm_widgets));
+		if (ret)
+			return ret;
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1781_dmic_dapm_routes,
+			ARRAY_SIZE(adau1781_dmic_dapm_routes));
+		if (ret)
+			return ret;
+	} else {
+		ret = snd_soc_dapm_add_routes(&codec->dapm,
+			adau1781_adc_dapm_routes,
+			ARRAY_SIZE(adau1781_adc_dapm_routes));
+		if (ret)
+			return ret;
+	}
+
+	switch (adau->type) {
+	case ADAU1381:
+		firmware = ADAU1381_FIRMWARE;
+		break;
+	case ADAU1781:
+		firmware = ADAU1781_FIRMWARE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = adau17x1_add_routes(codec);
+	if (ret < 0)
+		return ret;
+
+	ret = adau17x1_load_firmware(adau, codec->dev, firmware);
+	if (ret)
+		dev_warn(codec->dev, "Failed to load firmware\n");
+
+	return 0;
+}
+
+static const struct snd_soc_codec_driver adau1781_codec_driver = {
+	.probe = adau1781_codec_probe,
+	.suspend = adau17x1_suspend,
+	.resume = adau17x1_resume,
+	.set_bias_level = adau1781_set_bias_level,
+
+	.controls = adau1781_controls,
+	.num_controls = ARRAY_SIZE(adau1781_controls),
+	.dapm_widgets = adau1781_dapm_widgets,
+	.num_dapm_widgets = ARRAY_SIZE(adau1781_dapm_widgets),
+	.dapm_routes = adau1781_dapm_routes,
+	.num_dapm_routes = ARRAY_SIZE(adau1781_dapm_routes),
+};
+
+#define ADAU1781_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE | \
+	SNDRV_PCM_FMTBIT_S32_LE)
+
+static struct snd_soc_dai_driver adau1781_dai_driver = {
+	.name = "adau-hifi",
+	.playback = {
+		.stream_name = "Playback",
+		.channels_min = 2,
+		.channels_max = 8,
+		.rates = SNDRV_PCM_RATE_8000_96000,
+		.formats = ADAU1781_FORMATS,
+	},
+	.capture = {
+		.stream_name = "Capture",
+		.channels_min = 2,
+		.channels_max = 8,
+		.rates = SNDRV_PCM_RATE_8000_96000,
+		.formats = ADAU1781_FORMATS,
+	},
+	.ops = &adau17x1_dai_ops,
+};
+
+const struct regmap_config adau1781_regmap_config = {
+	.val_bits		= 8,
+	.reg_bits		= 16,
+	.max_register		= 0x40f8,
+	.reg_defaults		= adau1781_reg_defaults,
+	.num_reg_defaults	= ARRAY_SIZE(adau1781_reg_defaults),
+	.readable_reg		= adau1781_readable_register,
+	.volatile_reg		= adau17x1_volatile_register,
+	.cache_type		= REGCACHE_RBTREE,
+};
+EXPORT_SYMBOL_GPL(adau1781_regmap_config);
+
+int adau1781_probe(struct device *dev, struct regmap *regmap,
+	enum adau17x1_type type, void (*switch_mode)(struct device *dev))
+{
+	int ret;
+
+	ret = adau17x1_probe(dev, regmap, type, switch_mode);
+	if (ret)
+		return ret;
+
+	return snd_soc_register_codec(dev, &adau1781_codec_driver,
+		&adau1781_dai_driver, 1);
+}
+EXPORT_SYMBOL_GPL(adau1781_probe);
+
+MODULE_DESCRIPTION("ASoC ADAU1381/ADAU1781 driver");
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/adau1781.h b/sound/soc/codecs/adau1781.h
new file mode 100644
index 000000000000..2b96e0a9ff2e
--- /dev/null
+++ b/sound/soc/codecs/adau1781.h
@@ -0,0 +1,23 @@
+/*
+ * ADAU1381/ADAU1781 driver
+ *
+ * Copyright 2014 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#ifndef __SOUND_SOC_CODECS_ADAU1781_H__
+#define __SOUND_SOC_CODECS_ADAU1781_H__
+
+#include <linux/regmap.h>
+#include "adau17x1.h"
+
+struct device;
+
+int adau1781_probe(struct device *dev, struct regmap *regmap,
+	enum adau17x1_type type, void (*switch_mode)(struct device *dev));
+
+extern const struct regmap_config adau1781_regmap_config;
+
+#endif
-- 
cgit 


From 2c1f1ff0f0d9e0df8c9b6d3697ac250900091541 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 14 Apr 2014 12:56:34 +0200
Subject: driver core: dev_set_drvdata returns void

dev_set_drvdata can no longer fail, so it could return void.

All callers have hopefully been updated to no longer check for the
return value.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c      | 3 +--
 include/linux/device.h | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index d14b6e895896..d21f4b8dc37b 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -600,9 +600,8 @@ void *dev_get_drvdata(const struct device *dev)
 }
 EXPORT_SYMBOL(dev_get_drvdata);
 
-int dev_set_drvdata(struct device *dev, void *data)
+void dev_set_drvdata(struct device *dev, void *data)
 {
 	dev->driver_data = data;
-	return 0;
 }
 EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index 5c94ac3e7972..6d3a75773cd4 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -911,7 +911,7 @@ extern const char *device_get_devnode(struct device *dev,
 				      umode_t *mode, kuid_t *uid, kgid_t *gid,
 				      const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
-extern int dev_set_drvdata(struct device *dev, void *data);
+extern void dev_set_drvdata(struct device *dev, void *data);
 
 static inline bool device_supports_offline(struct device *dev)
 {
-- 
cgit 


From a996d010b648788b615938f6a26be6cf08d96aaf Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 14 Apr 2014 12:58:53 +0200
Subject: driver core: Inline dev_set/get_drvdata

dev_set_drvdata and dev_get_drvdata are now simple enough again that
we can inline them as they used to be before commit b40284378.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c      | 16 ----------------
 include/linux/device.h | 12 ++++++++++--
 2 files changed, 10 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index ba03353ff243..e4ffbcf2f519 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -587,19 +587,3 @@ void driver_detach(struct device_driver *drv)
 		put_device(dev);
 	}
 }
-
-/*
- * These exports can't be _GPL due to .h files using this within them, and it
- * might break something that was previously working...
- */
-void *dev_get_drvdata(const struct device *dev)
-{
-	return dev->driver_data;
-}
-EXPORT_SYMBOL(dev_get_drvdata);
-
-void dev_set_drvdata(struct device *dev, void *data)
-{
-	dev->driver_data = data;
-}
-EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index 6d3a75773cd4..1b18c886445c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -826,6 +826,16 @@ static inline void set_dev_node(struct device *dev, int node)
 }
 #endif
 
+static inline void *dev_get_drvdata(const struct device *dev)
+{
+	return dev->driver_data;
+}
+
+static inline void dev_set_drvdata(struct device *dev, void *data)
+{
+	dev->driver_data = data;
+}
+
 static inline struct pm_subsys_data *dev_to_psd(struct device *dev)
 {
 	return dev ? dev->power.subsys_data : NULL;
@@ -910,8 +920,6 @@ extern int device_move(struct device *dev, struct device *new_parent,
 extern const char *device_get_devnode(struct device *dev,
 				      umode_t *mode, kuid_t *uid, kgid_t *gid,
 				      const char **tmp);
-extern void *dev_get_drvdata(const struct device *dev);
-extern void dev_set_drvdata(struct device *dev, void *data);
 
 static inline bool device_supports_offline(struct device *dev)
 {
-- 
cgit 


From 34470e0bfae223e3f22bd2bd6e0e1dac366c9290 Mon Sep 17 00:00:00 2001
From: David Fries <David@Fries.net>
Date: Tue, 8 Apr 2014 22:37:08 -0500
Subject: connector: allow multiple messages to be sent in one packet

This increases the amount of bundling to reduce the number of packets
sent.  For the one wire use there can be multiple struct
w1_netlink_cmd in a struct w1_netlink_msg and multiple of those in
struct cn_msg, and with this change multiple of those in a struct
nlmsghdr, and at each level the len identifies there being multiple of
the next.

Signed-off-by: David Fries <David@Fries.net>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/connector/connector.txt | 13 ++++++++++---
 drivers/connector/connector.c         | 17 +++++++++++++++--
 include/linux/connector.h             |  1 +
 3 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index e5c5f5e6ab70..e56abdb21975 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -24,7 +24,8 @@ netlink based networking for inter-process communication in a significantly
 easier way:
 
 int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
-void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);
+void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
+void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
 
 struct cb_id
 {
@@ -71,15 +72,21 @@ void cn_del_callback(struct cb_id *id);
  struct cb_id *id		- unique connector's user identifier.
 
 
-int cn_netlink_send(struct cn_msg *msg, u32 __groups, int gfp_mask);
+int cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __groups, int gfp_mask);
+int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __groups, int gfp_mask);
 
  Sends message to the specified groups.  It can be safely called from
  softirq context, but may silently fail under strong memory pressure.
  If there are no listeners for given group -ESRCH can be returned.
 
  struct cn_msg *		- message header(with attached data).
+ u16 len			- for *_multi multiple cn_msg messages can be sent
+ u32 port			- destination port.
+ 				  If non-zero the message will be sent to the
+				  given port, which should be set to the
+				  original sender.
  u32 __group			- destination group.
-				  If __group is zero, then appropriate group will
+				  If port and __group is zero, then appropriate group will
 				  be searched through all registered connector users,
 				  and message will be delivered to the group which was
 				  created for user with the same ID as in msg.
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index b14f1d36f897..f612d68629dc 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -43,6 +43,8 @@ static struct cn_dev cdev;
 static int cn_already_initialized;
 
 /*
+ * Sends mult (multiple) cn_msg at a time.
+ *
  * msg->seq and msg->ack are used to determine message genealogy.
  * When someone sends message it puts there locally unique sequence
  * and random acknowledge numbers.  Sequence number may be copied into
@@ -62,10 +64,13 @@ static int cn_already_initialized;
  * the acknowledgement number in the original message + 1, then it is
  * a new message.
  *
+ * If msg->len != len, then additional cn_msg messages are expected following
+ * the first msg.
+ *
  * The message is sent to, the portid if given, the group if given, both if
  * both, or if both are zero then the group is looked up and sent there.
  */
-int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
+int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 __group,
 	gfp_t gfp_mask)
 {
 	struct cn_callback_entry *__cbq;
@@ -98,7 +103,7 @@ int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
 	if (!portid && !netlink_has_listeners(dev->nls, group))
 		return -ESRCH;
 
-	size = sizeof(*msg) + msg->len;
+	size = sizeof(*msg) + len;
 
 	skb = nlmsg_new(size, gfp_mask);
 	if (!skb)
@@ -121,6 +126,14 @@ int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
 					 gfp_mask);
 	return netlink_unicast(dev->nls, skb, portid, !(gfp_mask&__GFP_WAIT));
 }
+EXPORT_SYMBOL_GPL(cn_netlink_send_mult);
+
+/* same as cn_netlink_send_mult except msg->len is used for len */
+int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group,
+	gfp_t gfp_mask)
+{
+	return cn_netlink_send_mult(msg, msg->len, portid, __group, gfp_mask);
+}
 EXPORT_SYMBOL_GPL(cn_netlink_send);
 
 /*
diff --git a/include/linux/connector.h b/include/linux/connector.h
index be9c4747d511..f8fe8637d771 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -71,6 +71,7 @@ struct cn_dev {
 int cn_add_callback(struct cb_id *id, const char *name,
 		    void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_del_callback(struct cb_id *);
+int cn_netlink_send_mult(struct cn_msg *msg, u16 len, u32 portid, u32 group, gfp_t gfp_mask);
 int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 group, gfp_t gfp_mask);
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, const char *name,
-- 
cgit 


From 451ef1caa8698511bb7766344ccec9f08d5d294b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 13 May 2014 09:05:26 -0700
Subject: init.h: Update initcall_sync variants to fix build errors

We are getting randconfig build errors on device drivers with
tristate Kconfig option if they are using custom initcall
levels. Rather than add ifdeffery into the drivers, let's add
the missing initcall_sync variants.

As the comment in init.h has kept people from updating the
list of initcalls that can be just module_init when the driver
is loaded as a loadable module, let's also update the comment
a bit to describe valid use cases custom initcall levels.

While most drivers should nowadays use just regular module_init
because of the deferred probe, we do have quite a few custom
initcall levels left that we cannot remove until tested properly.

There are also still few valid cases where a custom initcall
level might make sense that I'm aware of.

For example a bus snooping driver can provide information about
invalid bus access and is handy loader early when built in. But
there's no hard dependency to have it necessarily built in and
a loadable module is a valid option.

Another example is a driver implementing a Linux framework like
pinctrl framework. That driver may be needed early on some
platforms because of legacy reasons, while it can be just a
regular module_init on most platforms.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/init.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init.h b/include/linux/init.h
index a3ba27076342..2df8e8dd10a4 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -297,16 +297,28 @@ void __init parse_early_options(char *cmdline);
 
 #else /* MODULE */
 
-/* Don't use these in loadable modules, but some people do... */
+/*
+ * In most cases loadable modules do not need custom
+ * initcall levels. There are still some valid cases where
+ * a driver may be needed early if built in, and does not
+ * matter when built as a loadable module. Like bus
+ * snooping debug drivers.
+ */
 #define early_initcall(fn)		module_init(fn)
 #define core_initcall(fn)		module_init(fn)
+#define core_initcall_sync(fn)		module_init(fn)
 #define postcore_initcall(fn)		module_init(fn)
+#define postcore_initcall_sync(fn)	module_init(fn)
 #define arch_initcall(fn)		module_init(fn)
 #define subsys_initcall(fn)		module_init(fn)
+#define subsys_initcall_sync(fn)	module_init(fn)
 #define fs_initcall(fn)			module_init(fn)
+#define fs_initcall_sync(fn)		module_init(fn)
 #define rootfs_initcall(fn)		module_init(fn)
 #define device_initcall(fn)		module_init(fn)
+#define device_initcall_sync(fn)	module_init(fn)
 #define late_initcall(fn)		module_init(fn)
+#define late_initcall_sync(fn)		module_init(fn)
 
 #define console_initcall(fn)		module_init(fn)
 #define security_initcall(fn)		module_init(fn)
-- 
cgit 


From 78e1da627040ca49c41b456db707342ef210ae0f Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Wed, 16 Apr 2014 11:56:45 +0200
Subject: sysfs.h: don't return a void-valued expression in sysfs_remove_file

Sparse was complaining about that:
include/linux/sysfs.h:432:9: warning: returning void-valued expression

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 5ffaa3443712..f97d0dbb59fa 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -437,7 +437,7 @@ static inline int __must_check sysfs_create_file(struct kobject *kobj,
 static inline void sysfs_remove_file(struct kobject *kobj,
 				     const struct attribute *attr)
 {
-	return sysfs_remove_file_ns(kobj, attr, NULL);
+	sysfs_remove_file_ns(kobj, attr, NULL);
 }
 
 static inline int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
-- 
cgit 


From 26fc9cd200ec839e0b3095e05ae018f27314e7aa Mon Sep 17 00:00:00 2001
From: Jianyu Zhan <nasa4836@gmail.com>
Date: Sat, 26 Apr 2014 15:40:28 +0800
Subject: kernfs: move the last knowledge of sysfs out from kernfs

There is still one residue of sysfs remaining: the sb_magic
SYSFS_MAGIC. However this should be kernfs user specific,
so this patch moves it out. Kerrnfs user should specify their
magic number while mouting.

Signed-off-by: Jianyu Zhan <nasa4836@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/mount.c      | 11 ++++++-----
 fs/sysfs/mount.c       |  4 +++-
 include/linux/kernfs.h | 13 ++++++++-----
 kernel/cgroup.c        |  4 +++-
 4 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index f25a7c0c3cdc..d171b98a6cdd 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,7 +62,7 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
 	return NULL;
 }
 
-static int kernfs_fill_super(struct super_block *sb)
+static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 {
 	struct kernfs_super_info *info = kernfs_info(sb);
 	struct inode *inode;
@@ -71,7 +71,7 @@ static int kernfs_fill_super(struct super_block *sb)
 	info->sb = sb;
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-	sb->s_magic = SYSFS_MAGIC;
+	sb->s_magic = magic;
 	sb->s_op = &kernfs_sops;
 	sb->s_time_gran = 1;
 
@@ -132,6 +132,7 @@ const void *kernfs_super_ns(struct super_block *sb)
  * @fs_type: file_system_type of the fs being mounted
  * @flags: mount flags specified for the mount
  * @root: kernfs_root of the hierarchy being mounted
+ * @magic: file system specific magic number
  * @new_sb_created: tell the caller if we allocated a new superblock
  * @ns: optional namespace tag of the mount
  *
@@ -143,8 +144,8 @@ const void *kernfs_super_ns(struct super_block *sb)
  * The return value can be passed to the vfs layer verbatim.
  */
 struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, bool *new_sb_created,
-			       const void *ns)
+				struct kernfs_root *root, unsigned long magic,
+				bool *new_sb_created, const void *ns)
 {
 	struct super_block *sb;
 	struct kernfs_super_info *info;
@@ -169,7 +170,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 	if (!sb->s_root) {
 		struct kernfs_super_info *info = kernfs_info(sb);
 
-		error = kernfs_fill_super(sb);
+		error = kernfs_fill_super(sb, magic);
 		if (error) {
 			deactivate_locked_super(sb);
 			return ERR_PTR(error);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8794423f7efb..8a49486bf30c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -13,6 +13,7 @@
 #define DEBUG
 
 #include <linux/fs.h>
+#include <linux/magic.h>
 #include <linux/mount.h>
 #include <linux/init.h>
 #include <linux/user_namespace.h>
@@ -38,7 +39,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	}
 
 	ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
-	root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns);
+	root = kernfs_mount_ns(fs_type, flags, sysfs_root,
+				SYSFS_MAGIC, &new_sb, ns);
 	if (IS_ERR(root) || !new_sb)
 		kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
 	return root;
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index c841688a78a3..17aa1cce6f8e 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -301,8 +301,8 @@ void kernfs_notify(struct kernfs_node *kn);
 
 const void *kernfs_super_ns(struct super_block *sb);
 struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, bool *new_sb_created,
-			       const void *ns);
+			       struct kernfs_root *root, unsigned long magic,
+			       bool *new_sb_created, const void *ns);
 void kernfs_kill_sb(struct super_block *sb);
 
 void kernfs_init(void);
@@ -395,7 +395,8 @@ static inline const void *kernfs_super_ns(struct super_block *sb)
 
 static inline struct dentry *
 kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-		struct kernfs_root *root, bool *new_sb_created, const void *ns)
+		struct kernfs_root *root, unsigned long magic,
+		bool *new_sb_created, const void *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline void kernfs_kill_sb(struct super_block *sb) { }
@@ -453,9 +454,11 @@ static inline int kernfs_rename(struct kernfs_node *kn,
 
 static inline struct dentry *
 kernfs_mount(struct file_system_type *fs_type, int flags,
-	     struct kernfs_root *root, bool *new_sb_created)
+		struct kernfs_root *root, unsigned long magic,
+		bool *new_sb_created)
 {
-	return kernfs_mount_ns(fs_type, flags, root, new_sb_created, NULL);
+	return kernfs_mount_ns(fs_type, flags, root,
+				magic, new_sb_created, NULL);
 }
 
 #endif	/* __LINUX_KERNFS_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3f1ca934a237..ceee0c54c6a4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -33,6 +33,7 @@
 #include <linux/init_task.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/magic.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/mount.h>
@@ -1604,7 +1605,8 @@ out_unlock:
 	if (ret)
 		return ERR_PTR(ret);
 
-	dentry = kernfs_mount(fs_type, flags, root->kf_root, &new_sb);
+	dentry = kernfs_mount(fs_type, flags, root->kf_root,
+				CGROUP_SUPER_MAGIC, &new_sb);
 	if (IS_ERR(dentry) || !new_sb)
 		cgroup_put(&root->cgrp);
 	return dentry;
-- 
cgit 


From 2d53139f31626bad6f8983d8e519ddde2cbba921 Mon Sep 17 00:00:00 2001
From: David Mosberger <davidm@egauge.net>
Date: Mon, 28 Apr 2014 22:14:07 -0600
Subject: Add support for using a MAX3421E chip as a host driver.

Signed-off-by: David Mosberger <davidm@egauge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/Makefile                      |    1 +
 drivers/usb/host/Kconfig                  |   11 +
 drivers/usb/host/Makefile                 |    1 +
 drivers/usb/host/max3421-hcd.c            | 1937 +++++++++++++++++++++++++++++
 include/linux/platform_data/max3421-hcd.h |   23 +
 5 files changed, 1973 insertions(+)
 create mode 100644 drivers/usb/host/max3421-hcd.c
 create mode 100644 include/linux/platform_data/max3421-hcd.h

(limited to 'include/linux')

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 1ae2bf39d84b..9bb672199703 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_USB_IMX21_HCD)	+= host/
 obj-$(CONFIG_USB_FSL_MPH_DR_OF)	+= host/
 obj-$(CONFIG_USB_FUSBH200_HCD)	+= host/
 obj-$(CONFIG_USB_FOTG210_HCD)	+= host/
+obj-$(CONFIG_USB_MAX3421_HCD)	+= host/
 
 obj-$(CONFIG_USB_C67X00_HCD)	+= c67x00/
 
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 7a39ae86d5ce..52144c720a1d 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -342,6 +342,17 @@ config USB_FOTG210_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called fotg210-hcd.
 
+config USB_MAX3421_HCD
+	tristate "MAX3421 HCD (USB-over-SPI) support"
+	depends on USB && SPI
+	---help---
+	  The Maxim MAX3421E chip supports standard USB 2.0-compliant
+	  full-speed devices either in host or peripheral mode.  This
+	  driver supports the host-mode of the MAX3421E only.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called max3421-hcd.
+
 config USB_OHCI_HCD
 	tristate "OHCI HCD (USB 1.1) support"
 	select ISP1301_OMAP if MACH_OMAP_H2 || MACH_OMAP_H3
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 7530468c9a4f..ea2bec52a4fb 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -70,3 +70,4 @@ obj-$(CONFIG_USB_HCD_BCMA)	+= bcma-hcd.o
 obj-$(CONFIG_USB_HCD_SSB)	+= ssb-hcd.o
 obj-$(CONFIG_USB_FUSBH200_HCD)	+= fusbh200-hcd.o
 obj-$(CONFIG_USB_FOTG210_HCD)	+= fotg210-hcd.o
+obj-$(CONFIG_USB_MAX3421_HCD)	+= max3421-hcd.o
diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
new file mode 100644
index 000000000000..dfc74d6738db
--- /dev/null
+++ b/drivers/usb/host/max3421-hcd.c
@@ -0,0 +1,1937 @@
+/*
+ * MAX3421 Host Controller driver for USB.
+ *
+ * Author: David Mosberger-Tang <davidm@egauge.net>
+ *
+ * (C) Copyright 2014 David Mosberger-Tang <davidm@egauge.net>
+ *
+ * MAX3421 is a chip implementing a USB 2.0 Full-/Low-Speed host
+ * controller on a SPI bus.
+ *
+ * Based on:
+ *	o MAX3421E datasheet
+ *		http://datasheets.maximintegrated.com/en/ds/MAX3421E.pdf
+ *	o MAX3421E Programming Guide
+ *		http://www.hdl.co.jp/ftpdata/utl-001/AN3785.pdf
+ *	o gadget/dummy_hcd.c
+ *		For USB HCD implementation.
+ *	o Arduino MAX3421 driver
+ *	     https://github.com/felis/USB_Host_Shield_2.0/blob/master/Usb.cpp
+ *
+ * This file is licenced under the GPL v2.
+ *
+ * Important note on worst-case (full-speed) packet size constraints
+ * (See USB 2.0 Section 5.6.3 and following):
+ *
+ *	- control:	  64 bytes
+ *	- isochronous:	1023 bytes
+ *	- interrupt:	  64 bytes
+ *	- bulk:		  64 bytes
+ *
+ * Since the MAX3421 FIFO size is 64 bytes, we do not have to work about
+ * multi-FIFO writes/reads for a single USB packet *except* for isochronous
+ * transfers.  We don't support isochronous transfers at this time, so we
+ * just assume that a USB packet always fits into a single FIFO buffer.
+ *
+ * NOTE: The June 2006 version of "MAX3421E Programming Guide"
+ * (AN3785) has conflicting info for the RCVDAVIRQ bit:
+ *
+ *	The description of RCVDAVIRQ says "The CPU *must* clear
+ *	this IRQ bit (by writing a 1 to it) before reading the
+ *	RCVFIFO data.
+ *
+ * However, the earlier section on "Programming BULK-IN
+ * Transfers" says * that:
+ *
+ *	After the CPU retrieves the data, it clears the
+ *	RCVDAVIRQ bit.
+ *
+ * The December 2006 version has been corrected and it consistently
+ * states the second behavior is the correct one.
+ *
+ * Synchronous SPI transactions sleep so we can't perform any such
+ * transactions while holding a spin-lock (and/or while interrupts are
+ * masked).  To achieve this, all SPI transactions are issued from a
+ * single thread (max3421_spi_thread).
+ */
+
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/usb.h>
+#include <linux/usb/hcd.h>
+
+#include <linux/platform_data/max3421-hcd.h>
+
+#define DRIVER_DESC	"MAX3421 USB Host-Controller Driver"
+#define DRIVER_VERSION	"1.0"
+
+/* 11-bit counter that wraps around (USB 2.0 Section 8.3.3): */
+#define USB_MAX_FRAME_NUMBER	0x7ff
+#define USB_MAX_RETRIES		3 /* # of retries before error is reported */
+
+/*
+ * Max. # of times we're willing to retransmit a request immediately in
+ * resposne to a NAK.  Afterwards, we fall back on trying once a frame.
+ */
+#define NAK_MAX_FAST_RETRANSMITS	2
+
+#define POWER_BUDGET	500	/* in mA; use 8 for low-power port testing */
+
+/* Port-change mask: */
+#define PORT_C_MASK	((USB_PORT_STAT_C_CONNECTION |	\
+			  USB_PORT_STAT_C_ENABLE |	\
+			  USB_PORT_STAT_C_SUSPEND |	\
+			  USB_PORT_STAT_C_OVERCURRENT | \
+			  USB_PORT_STAT_C_RESET) << 16)
+
+enum max3421_rh_state {
+	MAX3421_RH_RESET,
+	MAX3421_RH_SUSPENDED,
+	MAX3421_RH_RUNNING
+};
+
+enum pkt_state {
+	PKT_STATE_SETUP,	/* waiting to send setup packet to ctrl pipe */
+	PKT_STATE_TRANSFER,	/* waiting to xfer transfer_buffer */
+	PKT_STATE_TERMINATE	/* waiting to terminate control transfer */
+};
+
+enum scheduling_pass {
+	SCHED_PASS_PERIODIC,
+	SCHED_PASS_NON_PERIODIC,
+	SCHED_PASS_DONE
+};
+
+struct max3421_hcd {
+	spinlock_t lock;
+
+	struct task_struct *spi_thread;
+
+	struct max3421_hcd *next;
+
+	enum max3421_rh_state rh_state;
+	/* lower 16 bits contain port status, upper 16 bits the change mask: */
+	u32 port_status;
+
+	unsigned active:1;
+
+	struct list_head ep_list;	/* list of EP's with work */
+
+	/*
+	 * The following are owned by spi_thread (may be accessed by
+	 * SPI-thread without acquiring the HCD lock:
+	 */
+	u8 rev;				/* chip revision */
+	u16 frame_number;
+	/*
+	 * URB we're currently processing.  Must not be reset to NULL
+	 * unless MAX3421E chip is idle:
+	 */
+	struct urb *curr_urb;
+	enum scheduling_pass sched_pass;
+	struct usb_device *loaded_dev;	/* dev that's loaded into the chip */
+	int loaded_epnum;		/* epnum whose toggles are loaded */
+	int urb_done;			/* > 0 -> no errors, < 0: errno */
+	size_t curr_len;
+	u8 hien;
+	u8 mode;
+	u8 iopins[2];
+	unsigned int do_enable_irq:1;
+	unsigned int do_reset_hcd:1;
+	unsigned int do_reset_port:1;
+	unsigned int do_check_unlink:1;
+	unsigned int do_iopin_update:1;
+#ifdef DEBUG
+	unsigned long err_stat[16];
+#endif
+};
+
+struct max3421_ep {
+	struct usb_host_endpoint *ep;
+	struct list_head ep_list;
+	u32 naks;
+	u16 last_active;		/* frame # this ep was last active */
+	enum pkt_state pkt_state;
+	u8 retries;
+	u8 retransmit;			/* packet needs retransmission */
+};
+
+static struct max3421_hcd *max3421_hcd_list;
+
+#define MAX3421_FIFO_SIZE	64
+
+#define MAX3421_SPI_DIR_RD	0	/* read register from MAX3421 */
+#define MAX3421_SPI_DIR_WR	1	/* write register to MAX3421 */
+
+/* SPI commands: */
+#define MAX3421_SPI_DIR_SHIFT	1
+#define MAX3421_SPI_REG_SHIFT	3
+
+#define MAX3421_REG_RCVFIFO	1
+#define MAX3421_REG_SNDFIFO	2
+#define MAX3421_REG_SUDFIFO	4
+#define MAX3421_REG_RCVBC	6
+#define MAX3421_REG_SNDBC	7
+#define MAX3421_REG_USBIRQ	13
+#define MAX3421_REG_USBIEN	14
+#define MAX3421_REG_USBCTL	15
+#define MAX3421_REG_CPUCTL	16
+#define MAX3421_REG_PINCTL	17
+#define MAX3421_REG_REVISION	18
+#define MAX3421_REG_IOPINS1	20
+#define MAX3421_REG_IOPINS2	21
+#define MAX3421_REG_GPINIRQ	22
+#define MAX3421_REG_GPINIEN	23
+#define MAX3421_REG_GPINPOL	24
+#define MAX3421_REG_HIRQ	25
+#define MAX3421_REG_HIEN	26
+#define MAX3421_REG_MODE	27
+#define MAX3421_REG_PERADDR	28
+#define MAX3421_REG_HCTL	29
+#define MAX3421_REG_HXFR	30
+#define MAX3421_REG_HRSL	31
+
+enum {
+	MAX3421_USBIRQ_OSCOKIRQ_BIT = 0,
+	MAX3421_USBIRQ_NOVBUSIRQ_BIT = 5,
+	MAX3421_USBIRQ_VBUSIRQ_BIT
+};
+
+enum {
+	MAX3421_CPUCTL_IE_BIT = 0,
+	MAX3421_CPUCTL_PULSEWID0_BIT = 6,
+	MAX3421_CPUCTL_PULSEWID1_BIT
+};
+
+enum {
+	MAX3421_USBCTL_PWRDOWN_BIT = 4,
+	MAX3421_USBCTL_CHIPRES_BIT
+};
+
+enum {
+	MAX3421_PINCTL_GPXA_BIT	= 0,
+	MAX3421_PINCTL_GPXB_BIT,
+	MAX3421_PINCTL_POSINT_BIT,
+	MAX3421_PINCTL_INTLEVEL_BIT,
+	MAX3421_PINCTL_FDUPSPI_BIT,
+	MAX3421_PINCTL_EP0INAK_BIT,
+	MAX3421_PINCTL_EP2INAK_BIT,
+	MAX3421_PINCTL_EP3INAK_BIT,
+};
+
+enum {
+	MAX3421_HI_BUSEVENT_BIT = 0,	/* bus-reset/-resume */
+	MAX3421_HI_RWU_BIT,		/* remote wakeup */
+	MAX3421_HI_RCVDAV_BIT,		/* receive FIFO data available */
+	MAX3421_HI_SNDBAV_BIT,		/* send buffer available */
+	MAX3421_HI_SUSDN_BIT,		/* suspend operation done */
+	MAX3421_HI_CONDET_BIT,		/* peripheral connect/disconnect */
+	MAX3421_HI_FRAME_BIT,		/* frame generator */
+	MAX3421_HI_HXFRDN_BIT,		/* host transfer done */
+};
+
+enum {
+	MAX3421_HCTL_BUSRST_BIT = 0,
+	MAX3421_HCTL_FRMRST_BIT,
+	MAX3421_HCTL_SAMPLEBUS_BIT,
+	MAX3421_HCTL_SIGRSM_BIT,
+	MAX3421_HCTL_RCVTOG0_BIT,
+	MAX3421_HCTL_RCVTOG1_BIT,
+	MAX3421_HCTL_SNDTOG0_BIT,
+	MAX3421_HCTL_SNDTOG1_BIT
+};
+
+enum {
+	MAX3421_MODE_HOST_BIT = 0,
+	MAX3421_MODE_LOWSPEED_BIT,
+	MAX3421_MODE_HUBPRE_BIT,
+	MAX3421_MODE_SOFKAENAB_BIT,
+	MAX3421_MODE_SEPIRQ_BIT,
+	MAX3421_MODE_DELAYISO_BIT,
+	MAX3421_MODE_DMPULLDN_BIT,
+	MAX3421_MODE_DPPULLDN_BIT
+};
+
+enum {
+	MAX3421_HRSL_OK = 0,
+	MAX3421_HRSL_BUSY,
+	MAX3421_HRSL_BADREQ,
+	MAX3421_HRSL_UNDEF,
+	MAX3421_HRSL_NAK,
+	MAX3421_HRSL_STALL,
+	MAX3421_HRSL_TOGERR,
+	MAX3421_HRSL_WRONGPID,
+	MAX3421_HRSL_BADBC,
+	MAX3421_HRSL_PIDERR,
+	MAX3421_HRSL_PKTERR,
+	MAX3421_HRSL_CRCERR,
+	MAX3421_HRSL_KERR,
+	MAX3421_HRSL_JERR,
+	MAX3421_HRSL_TIMEOUT,
+	MAX3421_HRSL_BABBLE,
+	MAX3421_HRSL_RESULT_MASK = 0xf,
+	MAX3421_HRSL_RCVTOGRD_BIT = 4,
+	MAX3421_HRSL_SNDTOGRD_BIT,
+	MAX3421_HRSL_KSTATUS_BIT,
+	MAX3421_HRSL_JSTATUS_BIT
+};
+
+/* Return same error-codes as ohci.h:cc_to_error: */
+static const int hrsl_to_error[] = {
+	[MAX3421_HRSL_OK] =		0,
+	[MAX3421_HRSL_BUSY] =		-EINVAL,
+	[MAX3421_HRSL_BADREQ] =		-EINVAL,
+	[MAX3421_HRSL_UNDEF] =		-EINVAL,
+	[MAX3421_HRSL_NAK] =		-EAGAIN,
+	[MAX3421_HRSL_STALL] =		-EPIPE,
+	[MAX3421_HRSL_TOGERR] =		-EILSEQ,
+	[MAX3421_HRSL_WRONGPID] =	-EPROTO,
+	[MAX3421_HRSL_BADBC] =		-EREMOTEIO,
+	[MAX3421_HRSL_PIDERR] =		-EPROTO,
+	[MAX3421_HRSL_PKTERR] =		-EPROTO,
+	[MAX3421_HRSL_CRCERR] =		-EILSEQ,
+	[MAX3421_HRSL_KERR] =		-EIO,
+	[MAX3421_HRSL_JERR] =		-EIO,
+	[MAX3421_HRSL_TIMEOUT] =	-ETIME,
+	[MAX3421_HRSL_BABBLE] =		-EOVERFLOW
+};
+
+/*
+ * See http://www.beyondlogic.org/usbnutshell/usb4.shtml#Control for a
+ * reasonable overview of how control transfers use the the IN/OUT
+ * tokens.
+ */
+#define MAX3421_HXFR_BULK_IN(ep)	(0x00 | (ep))	/* bulk or interrupt */
+#define MAX3421_HXFR_SETUP		 0x10
+#define MAX3421_HXFR_BULK_OUT(ep)	(0x20 | (ep))	/* bulk or interrupt */
+#define MAX3421_HXFR_ISO_IN(ep)		(0x40 | (ep))
+#define MAX3421_HXFR_ISO_OUT(ep)	(0x60 | (ep))
+#define MAX3421_HXFR_HS_IN		 0x80		/* handshake in */
+#define MAX3421_HXFR_HS_OUT		 0xa0		/* handshake out */
+
+#define field(val, bit)	((val) << (bit))
+
+static inline s16
+frame_diff(u16 left, u16 right)
+{
+	return ((unsigned) (left - right)) % (USB_MAX_FRAME_NUMBER + 1);
+}
+
+static inline struct max3421_hcd *
+hcd_to_max3421(struct usb_hcd *hcd)
+{
+	return (struct max3421_hcd *) hcd->hcd_priv;
+}
+
+static inline struct usb_hcd *
+max3421_to_hcd(struct max3421_hcd *max3421_hcd)
+{
+	return container_of((void *) max3421_hcd, struct usb_hcd, hcd_priv);
+}
+
+static u8
+spi_rd8(struct usb_hcd *hcd, unsigned int reg)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct spi_transfer transfer;
+	u8 tx_data[1];
+	/*
+	 * RX data must be in its own cache-line so it stays flushed
+	 * from the cache until the transfer is complete.  Otherwise,
+	 * we get stale data from the cache.
+	 */
+	u8 rx_data[SMP_CACHE_BYTES] ____cacheline_aligned;
+	struct spi_message msg;
+
+	memset(&transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
+	tx_data[0] = (field(reg, MAX3421_SPI_REG_SHIFT) |
+		      field(MAX3421_SPI_DIR_RD, MAX3421_SPI_DIR_SHIFT));
+
+	transfer.tx_buf = tx_data;
+	transfer.rx_buf = rx_data;
+	transfer.len = 2;
+
+	spi_message_add_tail(&transfer, &msg);
+	spi_sync(spi, &msg);
+
+	return rx_data[1];
+}
+
+static void
+spi_wr8(struct usb_hcd *hcd, unsigned int reg, u8 val)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct spi_transfer transfer;
+	struct spi_message msg;
+	u8 tx_data[2];
+
+	memset(&transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
+	tx_data[0] = (field(reg, MAX3421_SPI_REG_SHIFT) |
+		      field(MAX3421_SPI_DIR_WR, MAX3421_SPI_DIR_SHIFT));
+	tx_data[1] = val;
+
+	transfer.tx_buf = tx_data;
+	transfer.len = 2;
+
+	spi_message_add_tail(&transfer, &msg);
+	spi_sync(spi, &msg);
+}
+
+static void
+spi_rd_buf(struct usb_hcd *hcd, unsigned int reg, void *buf, size_t len)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct spi_transfer transfer[2];
+	struct spi_message msg;
+	u8 cmd;
+
+	memset(transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
+	cmd = (field(reg, MAX3421_SPI_REG_SHIFT) |
+	       field(MAX3421_SPI_DIR_RD, MAX3421_SPI_DIR_SHIFT));
+
+	transfer[0].tx_buf = &cmd;
+	transfer[0].len = 1;
+
+	transfer[1].rx_buf = buf;
+	transfer[1].len = len;
+
+	spi_message_add_tail(&transfer[0], &msg);
+	spi_message_add_tail(&transfer[1], &msg);
+	spi_sync(spi, &msg);
+}
+
+static void
+spi_wr_buf(struct usb_hcd *hcd, unsigned int reg, void *buf, size_t len)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct spi_transfer transfer[2];
+	struct spi_message msg;
+	u8 cmd;
+
+	memset(transfer, 0, sizeof(transfer));
+
+	spi_message_init(&msg);
+
+	cmd = (field(reg, MAX3421_SPI_REG_SHIFT) |
+	       field(MAX3421_SPI_DIR_WR, MAX3421_SPI_DIR_SHIFT));
+
+	transfer[0].tx_buf = &cmd;
+	transfer[0].len = 1;
+
+	transfer[1].tx_buf = buf;
+	transfer[1].len = len;
+
+	spi_message_add_tail(&transfer[0], &msg);
+	spi_message_add_tail(&transfer[1], &msg);
+	spi_sync(spi, &msg);
+}
+
+/*
+ * Figure out the correct setting for the LOWSPEED and HUBPRE mode
+ * bits.  The HUBPRE bit needs to be set when MAX3421E operates at
+ * full speed, but it's talking to a low-speed device (i.e., through a
+ * hub).  Setting that bit ensures that every low-speed packet is
+ * preceded by a full-speed PRE PID.  Possible configurations:
+ *
+ * Hub speed:	Device speed:	=>	LOWSPEED bit:	HUBPRE bit:
+ *	FULL	FULL		=>	0		0
+ *	FULL	LOW		=>	1		1
+ *	LOW	LOW		=>	1		0
+ *	LOW	FULL		=>	1		0
+ */
+static void
+max3421_set_speed(struct usb_hcd *hcd, struct usb_device *dev)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	u8 mode_lowspeed, mode_hubpre, mode = max3421_hcd->mode;
+
+	mode_lowspeed = BIT(MAX3421_MODE_LOWSPEED_BIT);
+	mode_hubpre   = BIT(MAX3421_MODE_HUBPRE_BIT);
+	if (max3421_hcd->port_status & USB_PORT_STAT_LOW_SPEED) {
+		mode |=  mode_lowspeed;
+		mode &= ~mode_hubpre;
+	} else if (dev->speed == USB_SPEED_LOW) {
+		mode |= mode_lowspeed | mode_hubpre;
+	} else {
+		mode &= ~(mode_lowspeed | mode_hubpre);
+	}
+	if (mode != max3421_hcd->mode) {
+		max3421_hcd->mode = mode;
+		spi_wr8(hcd, MAX3421_REG_MODE, max3421_hcd->mode);
+	}
+
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static void
+max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum,
+		    int force_toggles)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	int old_epnum, same_ep, rcvtog, sndtog;
+	struct usb_device *old_dev;
+	u8 hctl;
+
+	old_dev = max3421_hcd->loaded_dev;
+	old_epnum = max3421_hcd->loaded_epnum;
+
+	same_ep = (dev == old_dev && epnum == old_epnum);
+	if (same_ep && !force_toggles)
+		return;
+
+	if (old_dev && !same_ep) {
+		/* save the old end-points toggles: */
+		u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
+
+		rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1;
+		sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
+
+		/* no locking: HCD (i.e., we) own toggles, don't we? */
+		usb_settoggle(old_dev, old_epnum, 0, rcvtog);
+		usb_settoggle(old_dev, old_epnum, 1, sndtog);
+	}
+	/* setup new endpoint's toggle bits: */
+	rcvtog = usb_gettoggle(dev, epnum, 0);
+	sndtog = usb_gettoggle(dev, epnum, 1);
+	hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) |
+		BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT));
+
+	max3421_hcd->loaded_epnum = epnum;
+	spi_wr8(hcd, MAX3421_REG_HCTL, hctl);
+
+	/*
+	 * Note: devnum for one and the same device can change during
+	 * address-assignment so it's best to just always load the
+	 * address whenever the end-point changed/was forced.
+	 */
+	max3421_hcd->loaded_dev = dev;
+	spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum);
+}
+
+static int
+max3421_ctrl_setup(struct usb_hcd *hcd, struct urb *urb)
+{
+	spi_wr_buf(hcd, MAX3421_REG_SUDFIFO, urb->setup_packet, 8);
+	return MAX3421_HXFR_SETUP;
+}
+
+static int
+max3421_transfer_in(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	int epnum = usb_pipeendpoint(urb->pipe);
+
+	max3421_hcd->curr_len = 0;
+	max3421_hcd->hien |= BIT(MAX3421_HI_RCVDAV_BIT);
+	return MAX3421_HXFR_BULK_IN(epnum);
+}
+
+static int
+max3421_transfer_out(struct usb_hcd *hcd, struct urb *urb, int fast_retransmit)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	int epnum = usb_pipeendpoint(urb->pipe);
+	u32 max_packet;
+	void *src;
+
+	src = urb->transfer_buffer + urb->actual_length;
+
+	if (fast_retransmit) {
+		if (max3421_hcd->rev == 0x12) {
+			/* work around rev 0x12 bug: */
+			spi_wr8(hcd, MAX3421_REG_SNDBC, 0);
+			spi_wr8(hcd, MAX3421_REG_SNDFIFO, ((u8 *) src)[0]);
+			spi_wr8(hcd, MAX3421_REG_SNDBC, max3421_hcd->curr_len);
+		}
+		return MAX3421_HXFR_BULK_OUT(epnum);
+	}
+
+	max_packet = usb_maxpacket(urb->dev, urb->pipe, 1);
+
+	if (max_packet > MAX3421_FIFO_SIZE) {
+		/*
+		 * We do not support isochronous transfers at this
+		 * time.
+		 */
+		dev_err(&spi->dev,
+			"%s: packet-size of %u too big (limit is %u bytes)",
+			__func__, max_packet, MAX3421_FIFO_SIZE);
+		max3421_hcd->urb_done = -EMSGSIZE;
+		return -EMSGSIZE;
+	}
+	max3421_hcd->curr_len = min((urb->transfer_buffer_length -
+				     urb->actual_length), max_packet);
+
+	spi_wr_buf(hcd, MAX3421_REG_SNDFIFO, src, max3421_hcd->curr_len);
+	spi_wr8(hcd, MAX3421_REG_SNDBC, max3421_hcd->curr_len);
+	return MAX3421_HXFR_BULK_OUT(epnum);
+}
+
+/*
+ * Issue the next host-transfer command.
+ * Caller must NOT hold HCD spinlock.
+ */
+static void
+max3421_next_transfer(struct usb_hcd *hcd, int fast_retransmit)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct urb *urb = max3421_hcd->curr_urb;
+	struct max3421_ep *max3421_ep = urb->ep->hcpriv;
+	int cmd = -EINVAL;
+
+	if (!urb)
+		return;	/* nothing to do */
+
+	switch (max3421_ep->pkt_state) {
+	case PKT_STATE_SETUP:
+		cmd = max3421_ctrl_setup(hcd, urb);
+		break;
+
+	case PKT_STATE_TRANSFER:
+		if (usb_urb_dir_in(urb))
+			cmd = max3421_transfer_in(hcd, urb);
+		else
+			cmd = max3421_transfer_out(hcd, urb, fast_retransmit);
+		break;
+
+	case PKT_STATE_TERMINATE:
+		/*
+		 * IN transfers are terminated with HS_OUT token,
+		 * OUT transfers with HS_IN:
+		 */
+		if (usb_urb_dir_in(urb))
+			cmd = MAX3421_HXFR_HS_OUT;
+		else
+			cmd = MAX3421_HXFR_HS_IN;
+		break;
+	}
+
+	if (cmd < 0)
+		return;
+
+	/* issue the command and wait for host-xfer-done interrupt: */
+
+	spi_wr8(hcd, MAX3421_REG_HXFR, cmd);
+	max3421_hcd->hien |= BIT(MAX3421_HI_HXFRDN_BIT);
+}
+
+/*
+ * Find the next URB to process and start its execution.
+ *
+ * At this time, we do not anticipate ever connecting a USB hub to the
+ * MAX3421 chip, so at most USB device can be connected and we can use
+ * a simplistic scheduler: at the start of a frame, schedule all
+ * periodic transfers.  Once that is done, use the remainder of the
+ * frame to process non-periodic (bulk & control) transfers.
+ *
+ * Preconditions:
+ * o Caller must NOT hold HCD spinlock.
+ * o max3421_hcd->curr_urb MUST BE NULL.
+ * o MAX3421E chip must be idle.
+ */
+static int
+max3421_select_and_start_urb(struct usb_hcd *hcd)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct urb *urb, *curr_urb = NULL;
+	struct max3421_ep *max3421_ep;
+	int epnum, force_toggles = 0;
+	struct usb_host_endpoint *ep;
+	struct list_head *pos;
+	unsigned long flags;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	for (;
+	     max3421_hcd->sched_pass < SCHED_PASS_DONE;
+	     ++max3421_hcd->sched_pass)
+		list_for_each(pos, &max3421_hcd->ep_list) {
+			urb = NULL;
+			max3421_ep = container_of(pos, struct max3421_ep,
+						  ep_list);
+			ep = max3421_ep->ep;
+
+			switch (usb_endpoint_type(&ep->desc)) {
+			case USB_ENDPOINT_XFER_ISOC:
+			case USB_ENDPOINT_XFER_INT:
+				if (max3421_hcd->sched_pass !=
+				    SCHED_PASS_PERIODIC)
+					continue;
+				break;
+
+			case USB_ENDPOINT_XFER_CONTROL:
+			case USB_ENDPOINT_XFER_BULK:
+				if (max3421_hcd->sched_pass !=
+				    SCHED_PASS_NON_PERIODIC)
+					continue;
+				break;
+			}
+
+			if (list_empty(&ep->urb_list))
+				continue;	/* nothing to do */
+			urb = list_first_entry(&ep->urb_list, struct urb,
+					       urb_list);
+			if (urb->unlinked) {
+				dev_dbg(&spi->dev, "%s: URB %p unlinked=%d",
+					__func__, urb, urb->unlinked);
+				max3421_hcd->curr_urb = urb;
+				max3421_hcd->urb_done = 1;
+				spin_unlock_irqrestore(&max3421_hcd->lock,
+						       flags);
+				return 1;
+			}
+
+			switch (usb_endpoint_type(&ep->desc)) {
+			case USB_ENDPOINT_XFER_CONTROL:
+				/*
+				 * Allow one control transaction per
+				 * frame per endpoint:
+				 */
+				if (frame_diff(max3421_ep->last_active,
+					       max3421_hcd->frame_number) == 0)
+					continue;
+				break;
+
+			case USB_ENDPOINT_XFER_BULK:
+				if (max3421_ep->retransmit
+				    && (frame_diff(max3421_ep->last_active,
+						   max3421_hcd->frame_number)
+					== 0))
+					/*
+					 * We already tried this EP
+					 * during this frame and got a
+					 * NAK or error; wait for next frame
+					 */
+					continue;
+				break;
+
+			case USB_ENDPOINT_XFER_ISOC:
+			case USB_ENDPOINT_XFER_INT:
+				if (frame_diff(max3421_hcd->frame_number,
+					       max3421_ep->last_active)
+				    < urb->interval)
+					/*
+					 * We already processed this
+					 * end-point in the current
+					 * frame
+					 */
+					continue;
+				break;
+			}
+
+			/* move current ep to tail: */
+			list_move_tail(pos, &max3421_hcd->ep_list);
+			curr_urb = urb;
+			goto done;
+		}
+done:
+	if (!curr_urb) {
+		spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+		return 0;
+	}
+
+	urb = max3421_hcd->curr_urb = curr_urb;
+	epnum = usb_endpoint_num(&urb->ep->desc);
+	if (max3421_ep->retransmit)
+		/* restart (part of) a USB transaction: */
+		max3421_ep->retransmit = 0;
+	else {
+		/* start USB transaction: */
+		if (usb_endpoint_xfer_control(&ep->desc)) {
+			/*
+			 * See USB 2.0 spec section 8.6.1
+			 * Initialization via SETUP Token:
+			 */
+			usb_settoggle(urb->dev, epnum, 0, 1);
+			usb_settoggle(urb->dev, epnum, 1, 1);
+			max3421_ep->pkt_state = PKT_STATE_SETUP;
+			force_toggles = 1;
+		} else
+			max3421_ep->pkt_state = PKT_STATE_TRANSFER;
+	}
+
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+
+	max3421_ep->last_active = max3421_hcd->frame_number;
+	max3421_set_address(hcd, urb->dev, epnum, force_toggles);
+	max3421_set_speed(hcd, urb->dev);
+	max3421_next_transfer(hcd, 0);
+	return 1;
+}
+
+/*
+ * Check all endpoints for URBs that got unlinked.
+ *
+ * Caller must NOT hold HCD spinlock.
+ */
+static int
+max3421_check_unlink(struct usb_hcd *hcd)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct list_head *pos, *upos, *next_upos;
+	struct max3421_ep *max3421_ep;
+	struct usb_host_endpoint *ep;
+	struct urb *urb;
+	unsigned long flags;
+	int retval = 0;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+	list_for_each(pos, &max3421_hcd->ep_list) {
+		max3421_ep = container_of(pos, struct max3421_ep, ep_list);
+		ep = max3421_ep->ep;
+		list_for_each_safe(upos, next_upos, &ep->urb_list) {
+			urb = container_of(upos, struct urb, urb_list);
+			if (urb->unlinked) {
+				retval = 1;
+				dev_dbg(&spi->dev, "%s: URB %p unlinked=%d",
+					__func__, urb, urb->unlinked);
+				usb_hcd_unlink_urb_from_ep(hcd, urb);
+				spin_unlock_irqrestore(&max3421_hcd->lock,
+						       flags);
+				usb_hcd_giveback_urb(hcd, urb, 0);
+				spin_lock_irqsave(&max3421_hcd->lock, flags);
+			}
+		}
+	}
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+	return retval;
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static void
+max3421_slow_retransmit(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct urb *urb = max3421_hcd->curr_urb;
+	struct max3421_ep *max3421_ep;
+
+	max3421_ep = urb->ep->hcpriv;
+	max3421_ep->retransmit = 1;
+	max3421_hcd->curr_urb = NULL;
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static void
+max3421_recv_data_available(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct urb *urb = max3421_hcd->curr_urb;
+	size_t remaining, transfer_size;
+	u8 rcvbc;
+
+	rcvbc = spi_rd8(hcd, MAX3421_REG_RCVBC);
+
+	if (rcvbc > MAX3421_FIFO_SIZE)
+		rcvbc = MAX3421_FIFO_SIZE;
+	if (urb->actual_length >= urb->transfer_buffer_length)
+		remaining = 0;
+	else
+		remaining = urb->transfer_buffer_length - urb->actual_length;
+	transfer_size = rcvbc;
+	if (transfer_size > remaining)
+		transfer_size = remaining;
+	if (transfer_size > 0) {
+		void *dst = urb->transfer_buffer + urb->actual_length;
+
+		spi_rd_buf(hcd, MAX3421_REG_RCVFIFO, dst, transfer_size);
+		urb->actual_length += transfer_size;
+		max3421_hcd->curr_len = transfer_size;
+	}
+
+	/* ack the RCVDAV irq now that the FIFO has been read: */
+	spi_wr8(hcd, MAX3421_REG_HIRQ, BIT(MAX3421_HI_RCVDAV_BIT));
+}
+
+static void
+max3421_handle_error(struct usb_hcd *hcd, u8 hrsl)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	u8 result_code = hrsl & MAX3421_HRSL_RESULT_MASK;
+	struct urb *urb = max3421_hcd->curr_urb;
+	struct max3421_ep *max3421_ep = urb->ep->hcpriv;
+	int switch_sndfifo;
+
+	/*
+	 * If an OUT command results in any response other than OK
+	 * (i.e., error or NAK), we have to perform a dummy-write to
+	 * SNDBC so the FIFO gets switched back to us.  Otherwise, we
+	 * get out of sync with the SNDFIFO double buffer.
+	 */
+	switch_sndfifo = (max3421_ep->pkt_state == PKT_STATE_TRANSFER &&
+			  usb_urb_dir_out(urb));
+
+	switch (result_code) {
+	case MAX3421_HRSL_OK:
+		return;			/* this shouldn't happen */
+
+	case MAX3421_HRSL_WRONGPID:	/* received wrong PID */
+	case MAX3421_HRSL_BUSY:		/* SIE busy */
+	case MAX3421_HRSL_BADREQ:	/* bad val in HXFR */
+	case MAX3421_HRSL_UNDEF:	/* reserved */
+	case MAX3421_HRSL_KERR:		/* K-state instead of response */
+	case MAX3421_HRSL_JERR:		/* J-state instead of response */
+		/*
+		 * packet experienced an error that we cannot recover
+		 * from; report error
+		 */
+		max3421_hcd->urb_done = hrsl_to_error[result_code];
+		dev_dbg(&spi->dev, "%s: unexpected error HRSL=0x%02x",
+			__func__, hrsl);
+		break;
+
+	case MAX3421_HRSL_TOGERR:
+		if (usb_urb_dir_in(urb))
+			; /* don't do anything (device will switch toggle) */
+		else {
+			/* flip the send toggle bit: */
+			int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1;
+
+			sndtog ^= 1;
+			spi_wr8(hcd, MAX3421_REG_HCTL,
+				BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT));
+		}
+		/* FALL THROUGH */
+	case MAX3421_HRSL_BADBC:	/* bad byte count */
+	case MAX3421_HRSL_PIDERR:	/* received PID is corrupted */
+	case MAX3421_HRSL_PKTERR:	/* packet error (stuff, EOP) */
+	case MAX3421_HRSL_CRCERR:	/* CRC error */
+	case MAX3421_HRSL_BABBLE:	/* device talked too long */
+	case MAX3421_HRSL_TIMEOUT:
+		if (max3421_ep->retries++ < USB_MAX_RETRIES)
+			/* retry the packet again in the next frame */
+			max3421_slow_retransmit(hcd);
+		else {
+			/* Based on ohci.h cc_to_err[]: */
+			max3421_hcd->urb_done = hrsl_to_error[result_code];
+			dev_dbg(&spi->dev, "%s: unexpected error HRSL=0x%02x",
+				__func__, hrsl);
+		}
+		break;
+
+	case MAX3421_HRSL_STALL:
+		dev_dbg(&spi->dev, "%s: unexpected error HRSL=0x%02x",
+			__func__, hrsl);
+		max3421_hcd->urb_done = hrsl_to_error[result_code];
+		break;
+
+	case MAX3421_HRSL_NAK:
+		/*
+		 * Device wasn't ready for data or has no data
+		 * available: retry the packet again.
+		 */
+		if (max3421_ep->naks++ < NAK_MAX_FAST_RETRANSMITS) {
+			max3421_next_transfer(hcd, 1);
+			switch_sndfifo = 0;
+		} else
+			max3421_slow_retransmit(hcd);
+		break;
+	}
+	if (switch_sndfifo)
+		spi_wr8(hcd, MAX3421_REG_SNDBC, 0);
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static int
+max3421_transfer_in_done(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	u32 max_packet;
+
+	if (urb->actual_length >= urb->transfer_buffer_length)
+		return 1;	/* read is complete, so we're done */
+
+	/*
+	 * USB 2.0 Section 5.3.2 Pipes: packets must be full size
+	 * except for last one.
+	 */
+	max_packet = usb_maxpacket(urb->dev, urb->pipe, 0);
+	if (max_packet > MAX3421_FIFO_SIZE) {
+		/*
+		 * We do not support isochronous transfers at this
+		 * time...
+		 */
+		dev_err(&spi->dev,
+			"%s: packet-size of %u too big (limit is %u bytes)",
+			__func__, max_packet, MAX3421_FIFO_SIZE);
+		return -EINVAL;
+	}
+
+	if (max3421_hcd->curr_len < max_packet) {
+		if (urb->transfer_flags & URB_SHORT_NOT_OK) {
+			/*
+			 * remaining > 0 and received an
+			 * unexpected partial packet ->
+			 * error
+			 */
+			return -EREMOTEIO;
+		} else
+			/* short read, but it's OK */
+			return 1;
+	}
+	return 0;	/* not done */
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static int
+max3421_transfer_out_done(struct usb_hcd *hcd, struct urb *urb)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+
+	urb->actual_length += max3421_hcd->curr_len;
+	if (urb->actual_length < urb->transfer_buffer_length)
+		return 0;
+	if (urb->transfer_flags & URB_ZERO_PACKET) {
+		/*
+		 * Some hardware needs a zero-size packet at the end
+		 * of a bulk-out transfer if the last transfer was a
+		 * full-sized packet (i.e., such hardware use <
+		 * max_packet as an indicator that the end of the
+		 * packet has been reached).
+		 */
+		u32 max_packet = usb_maxpacket(urb->dev, urb->pipe, 1);
+
+		if (max3421_hcd->curr_len == max_packet)
+			return 0;
+	}
+	return 1;
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static void
+max3421_host_transfer_done(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct urb *urb = max3421_hcd->curr_urb;
+	struct max3421_ep *max3421_ep;
+	u8 result_code, hrsl;
+	int urb_done = 0;
+
+	max3421_hcd->hien &= ~(BIT(MAX3421_HI_HXFRDN_BIT) |
+			       BIT(MAX3421_HI_RCVDAV_BIT));
+
+	hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
+	result_code = hrsl & MAX3421_HRSL_RESULT_MASK;
+
+#ifdef DEBUG
+	++max3421_hcd->err_stat[result_code];
+#endif
+
+	max3421_ep = urb->ep->hcpriv;
+
+	if (unlikely(result_code != MAX3421_HRSL_OK)) {
+		max3421_handle_error(hcd, hrsl);
+		return;
+	}
+
+	max3421_ep->naks = 0;
+	max3421_ep->retries = 0;
+	switch (max3421_ep->pkt_state) {
+
+	case PKT_STATE_SETUP:
+		if (urb->transfer_buffer_length > 0)
+			max3421_ep->pkt_state = PKT_STATE_TRANSFER;
+		else
+			max3421_ep->pkt_state = PKT_STATE_TERMINATE;
+		break;
+
+	case PKT_STATE_TRANSFER:
+		if (usb_urb_dir_in(urb))
+			urb_done = max3421_transfer_in_done(hcd, urb);
+		else
+			urb_done = max3421_transfer_out_done(hcd, urb);
+		if (urb_done > 0 && usb_pipetype(urb->pipe) == PIPE_CONTROL) {
+			/*
+			 * We aren't really done - we still need to
+			 * terminate the control transfer:
+			 */
+			max3421_hcd->urb_done = urb_done = 0;
+			max3421_ep->pkt_state = PKT_STATE_TERMINATE;
+		}
+		break;
+
+	case PKT_STATE_TERMINATE:
+		urb_done = 1;
+		break;
+	}
+
+	if (urb_done)
+		max3421_hcd->urb_done = urb_done;
+	else
+		max3421_next_transfer(hcd, 0);
+}
+
+/*
+ * Caller must NOT hold HCD spinlock.
+ */
+static void
+max3421_detect_conn(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	unsigned int jk, have_conn = 0;
+	u32 old_port_status, chg;
+	unsigned long flags;
+	u8 hrsl, mode;
+
+	hrsl = spi_rd8(hcd, MAX3421_REG_HRSL);
+
+	jk = ((((hrsl >> MAX3421_HRSL_JSTATUS_BIT) & 1) << 0) |
+	      (((hrsl >> MAX3421_HRSL_KSTATUS_BIT) & 1) << 1));
+
+	mode = max3421_hcd->mode;
+
+	switch (jk) {
+	case 0x0: /* SE0: disconnect */
+		/*
+		 * Turn off SOFKAENAB bit to avoid getting interrupt
+		 * every milli-second:
+		 */
+		mode &= ~BIT(MAX3421_MODE_SOFKAENAB_BIT);
+		break;
+
+	case 0x1: /* J=0,K=1: low-speed (in full-speed or vice versa) */
+	case 0x2: /* J=1,K=0: full-speed (in full-speed or vice versa) */
+		if (jk == 0x2)
+			/* need to switch to the other speed: */
+			mode ^= BIT(MAX3421_MODE_LOWSPEED_BIT);
+		/* turn on SOFKAENAB bit: */
+		mode |= BIT(MAX3421_MODE_SOFKAENAB_BIT);
+		have_conn = 1;
+		break;
+
+	case 0x3: /* illegal */
+		break;
+	}
+
+	max3421_hcd->mode = mode;
+	spi_wr8(hcd, MAX3421_REG_MODE, max3421_hcd->mode);
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+	old_port_status = max3421_hcd->port_status;
+	if (have_conn)
+		max3421_hcd->port_status |=  USB_PORT_STAT_CONNECTION;
+	else
+		max3421_hcd->port_status &= ~USB_PORT_STAT_CONNECTION;
+	if (mode & BIT(MAX3421_MODE_LOWSPEED_BIT))
+		max3421_hcd->port_status |=  USB_PORT_STAT_LOW_SPEED;
+	else
+		max3421_hcd->port_status &= ~USB_PORT_STAT_LOW_SPEED;
+	chg = (old_port_status ^ max3421_hcd->port_status);
+	max3421_hcd->port_status |= chg << 16;
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+}
+
+static irqreturn_t
+max3421_irq_handler(int irq, void *dev_id)
+{
+	struct usb_hcd *hcd = dev_id;
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+
+	if (max3421_hcd->spi_thread &&
+	    max3421_hcd->spi_thread->state != TASK_RUNNING)
+		wake_up_process(max3421_hcd->spi_thread);
+	if (!max3421_hcd->do_enable_irq) {
+		max3421_hcd->do_enable_irq = 1;
+		disable_irq_nosync(spi->irq);
+	}
+	return IRQ_HANDLED;
+}
+
+#ifdef DEBUG
+
+static void
+dump_eps(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct max3421_ep *max3421_ep;
+	struct usb_host_endpoint *ep;
+	struct list_head *pos, *upos;
+	char ubuf[512], *dp, *end;
+	unsigned long flags;
+	struct urb *urb;
+	int epnum, ret;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+	list_for_each(pos, &max3421_hcd->ep_list) {
+		max3421_ep = container_of(pos, struct max3421_ep, ep_list);
+		ep = max3421_ep->ep;
+
+		dp = ubuf;
+		end = dp + sizeof(ubuf);
+		*dp = '\0';
+		list_for_each(upos, &ep->urb_list) {
+			urb = container_of(upos, struct urb, urb_list);
+			ret = snprintf(dp, end - dp, " %p(%d.%s %d/%d)", urb,
+				       usb_pipetype(urb->pipe),
+				       usb_urb_dir_in(urb) ? "IN" : "OUT",
+				       urb->actual_length,
+				       urb->transfer_buffer_length);
+			if (ret < 0 || ret >= end - dp)
+				break;	/* error or buffer full */
+			dp += ret;
+		}
+
+		epnum = usb_endpoint_num(&ep->desc);
+		pr_info("EP%0u %u lst %04u rtr %u nak %6u rxmt %u: %s\n",
+			epnum, max3421_ep->pkt_state, max3421_ep->last_active,
+			max3421_ep->retries, max3421_ep->naks,
+			max3421_ep->retransmit, ubuf);
+	}
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+}
+
+#endif /* DEBUG */
+
+/* Return zero if no work was performed, 1 otherwise.  */
+static int
+max3421_handle_irqs(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	u32 chg, old_port_status;
+	unsigned long flags;
+	u8 hirq;
+
+	/*
+	 * Read and ack pending interrupts (CPU must never
+	 * clear SNDBAV directly and RCVDAV must be cleared by
+	 * max3421_recv_data_available()!):
+	 */
+	hirq = spi_rd8(hcd, MAX3421_REG_HIRQ);
+	hirq &= max3421_hcd->hien;
+	if (!hirq)
+		return 0;
+
+	spi_wr8(hcd, MAX3421_REG_HIRQ,
+		hirq & ~(BIT(MAX3421_HI_SNDBAV_BIT) |
+			 BIT(MAX3421_HI_RCVDAV_BIT)));
+
+	if (hirq & BIT(MAX3421_HI_FRAME_BIT)) {
+		max3421_hcd->frame_number = ((max3421_hcd->frame_number + 1)
+					     & USB_MAX_FRAME_NUMBER);
+		max3421_hcd->sched_pass = SCHED_PASS_PERIODIC;
+	}
+
+	if (hirq & BIT(MAX3421_HI_RCVDAV_BIT))
+		max3421_recv_data_available(hcd);
+
+	if (hirq & BIT(MAX3421_HI_HXFRDN_BIT))
+		max3421_host_transfer_done(hcd);
+
+	if (hirq & BIT(MAX3421_HI_CONDET_BIT))
+		max3421_detect_conn(hcd);
+
+	/*
+	 * Now process interrupts that may affect HCD state
+	 * other than the end-points:
+	 */
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	old_port_status = max3421_hcd->port_status;
+	if (hirq & BIT(MAX3421_HI_BUSEVENT_BIT)) {
+		if (max3421_hcd->port_status & USB_PORT_STAT_RESET) {
+			/* BUSEVENT due to completion of Bus Reset */
+			max3421_hcd->port_status &= ~USB_PORT_STAT_RESET;
+			max3421_hcd->port_status |=  USB_PORT_STAT_ENABLE;
+		} else {
+			/* BUSEVENT due to completion of Bus Resume */
+			pr_info("%s: BUSEVENT Bus Resume Done\n", __func__);
+		}
+	}
+	if (hirq & BIT(MAX3421_HI_RWU_BIT))
+		pr_info("%s: RWU\n", __func__);
+	if (hirq & BIT(MAX3421_HI_SUSDN_BIT))
+		pr_info("%s: SUSDN\n", __func__);
+
+	chg = (old_port_status ^ max3421_hcd->port_status);
+	max3421_hcd->port_status |= chg << 16;
+
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+
+#ifdef DEBUG
+	{
+		static unsigned long last_time;
+		char sbuf[16 * 16], *dp, *end;
+		int i;
+
+		if (jiffies - last_time > 5*HZ) {
+			dp = sbuf;
+			end = sbuf + sizeof(sbuf);
+			*dp = '\0';
+			for (i = 0; i < 16; ++i) {
+				int ret = snprintf(dp, end - dp, " %lu",
+						   max3421_hcd->err_stat[i]);
+				if (ret < 0 || ret >= end - dp)
+					break;	/* error or buffer full */
+				dp += ret;
+			}
+			pr_info("%s: hrsl_stats %s\n", __func__, sbuf);
+			memset(max3421_hcd->err_stat, 0,
+			       sizeof(max3421_hcd->err_stat));
+			last_time = jiffies;
+
+			dump_eps(hcd);
+		}
+	}
+#endif
+	return 1;
+}
+
+static int
+max3421_reset_hcd(struct usb_hcd *hcd)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	int timeout;
+
+	/* perform a chip reset and wait for OSCIRQ signal to appear: */
+	spi_wr8(hcd, MAX3421_REG_USBCTL, BIT(MAX3421_USBCTL_CHIPRES_BIT));
+	/* clear reset: */
+	spi_wr8(hcd, MAX3421_REG_USBCTL, 0);
+	timeout = 1000;
+	while (1) {
+		if (spi_rd8(hcd, MAX3421_REG_USBIRQ)
+		    & BIT(MAX3421_USBIRQ_OSCOKIRQ_BIT))
+			break;
+		if (--timeout < 0) {
+			dev_err(&spi->dev,
+				"timed out waiting for oscillator OK signal");
+			return 1;
+		}
+		cond_resched();
+	}
+
+	/*
+	 * Turn on host mode, automatic generation of SOF packets, and
+	 * enable pull-down registers on DM/DP:
+	 */
+	max3421_hcd->mode = (BIT(MAX3421_MODE_HOST_BIT) |
+			     BIT(MAX3421_MODE_SOFKAENAB_BIT) |
+			     BIT(MAX3421_MODE_DMPULLDN_BIT) |
+			     BIT(MAX3421_MODE_DPPULLDN_BIT));
+	spi_wr8(hcd, MAX3421_REG_MODE, max3421_hcd->mode);
+
+	/* reset frame-number: */
+	max3421_hcd->frame_number = USB_MAX_FRAME_NUMBER;
+	spi_wr8(hcd, MAX3421_REG_HCTL, BIT(MAX3421_HCTL_FRMRST_BIT));
+
+	/* sample the state of the D+ and D- lines */
+	spi_wr8(hcd, MAX3421_REG_HCTL, BIT(MAX3421_HCTL_SAMPLEBUS_BIT));
+	max3421_detect_conn(hcd);
+
+	/* enable frame, connection-detected, and bus-event interrupts: */
+	max3421_hcd->hien = (BIT(MAX3421_HI_FRAME_BIT) |
+			     BIT(MAX3421_HI_CONDET_BIT) |
+			     BIT(MAX3421_HI_BUSEVENT_BIT));
+	spi_wr8(hcd, MAX3421_REG_HIEN, max3421_hcd->hien);
+
+	/* enable interrupts: */
+	spi_wr8(hcd, MAX3421_REG_CPUCTL, BIT(MAX3421_CPUCTL_IE_BIT));
+	return 1;
+}
+
+static int
+max3421_urb_done(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	unsigned long flags;
+	struct urb *urb;
+	int status;
+
+	status = max3421_hcd->urb_done;
+	max3421_hcd->urb_done = 0;
+	if (status > 0)
+		status = 0;
+	urb = max3421_hcd->curr_urb;
+	if (urb) {
+		max3421_hcd->curr_urb = NULL;
+		spin_lock_irqsave(&max3421_hcd->lock, flags);
+		usb_hcd_unlink_urb_from_ep(hcd, urb);
+		spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+
+		/* must be called without the HCD spinlock: */
+		usb_hcd_giveback_urb(hcd, urb, status);
+	}
+	return 1;
+}
+
+static int
+max3421_spi_thread(void *dev_id)
+{
+	struct usb_hcd *hcd = dev_id;
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	int i, i_worked = 1;
+
+	/* set full-duplex SPI mode, low-active interrupt pin: */
+	spi_wr8(hcd, MAX3421_REG_PINCTL,
+		(BIT(MAX3421_PINCTL_FDUPSPI_BIT) |	/* full-duplex */
+		 BIT(MAX3421_PINCTL_INTLEVEL_BIT)));	/* low-active irq */
+
+	while (!kthread_should_stop()) {
+		max3421_hcd->rev = spi_rd8(hcd, MAX3421_REG_REVISION);
+		if (max3421_hcd->rev == 0x12 || max3421_hcd->rev == 0x13)
+			break;
+		dev_err(&spi->dev, "bad rev 0x%02x", max3421_hcd->rev);
+		msleep(10000);
+	}
+	dev_info(&spi->dev, "rev 0x%x, SPI clk %dHz, bpw %u, irq %d\n",
+		 max3421_hcd->rev, spi->max_speed_hz, spi->bits_per_word,
+		 spi->irq);
+
+	while (!kthread_should_stop()) {
+		if (!i_worked) {
+			/*
+			 * We'll be waiting for wakeups from the hard
+			 * interrupt handler, so now is a good time to
+			 * sync our hien with the chip:
+			 */
+			spi_wr8(hcd, MAX3421_REG_HIEN, max3421_hcd->hien);
+
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (max3421_hcd->do_enable_irq) {
+				max3421_hcd->do_enable_irq = 0;
+				enable_irq(spi->irq);
+			}
+			schedule();
+			__set_current_state(TASK_RUNNING);
+		}
+
+		i_worked = 0;
+
+		if (max3421_hcd->urb_done)
+			i_worked |= max3421_urb_done(hcd);
+		else if (max3421_handle_irqs(hcd))
+			i_worked = 1;
+		else if (!max3421_hcd->curr_urb)
+			i_worked |= max3421_select_and_start_urb(hcd);
+
+		if (max3421_hcd->do_reset_hcd) {
+			/* reset the HCD: */
+			max3421_hcd->do_reset_hcd = 0;
+			i_worked |= max3421_reset_hcd(hcd);
+		}
+		if (max3421_hcd->do_reset_port) {
+			/* perform a USB bus reset: */
+			max3421_hcd->do_reset_port = 0;
+			spi_wr8(hcd, MAX3421_REG_HCTL,
+				BIT(MAX3421_HCTL_BUSRST_BIT));
+			i_worked = 1;
+		}
+		if (max3421_hcd->do_check_unlink) {
+			max3421_hcd->do_check_unlink = 0;
+			i_worked |= max3421_check_unlink(hcd);
+		}
+		if (max3421_hcd->do_iopin_update) {
+			/*
+			 * IOPINS1/IOPINS2 do not auto-increment, so we can't
+			 * use spi_wr_buf().
+			 */
+			for (i = 0; i < ARRAY_SIZE(max3421_hcd->iopins); ++i) {
+				u8 val = spi_rd8(hcd, MAX3421_REG_IOPINS1);
+
+				val = ((val & 0xf0) |
+				       (max3421_hcd->iopins[i] & 0x0f));
+				spi_wr8(hcd, MAX3421_REG_IOPINS1 + i, val);
+				max3421_hcd->iopins[i] = val;
+			}
+			max3421_hcd->do_iopin_update = 0;
+			i_worked = 1;
+		}
+	}
+	set_current_state(TASK_RUNNING);
+	dev_info(&spi->dev, "SPI thread exiting");
+	return 0;
+}
+
+static int
+max3421_reset_port(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+
+	max3421_hcd->port_status &= ~(USB_PORT_STAT_ENABLE |
+				      USB_PORT_STAT_LOW_SPEED);
+	max3421_hcd->do_reset_port = 1;
+	wake_up_process(max3421_hcd->spi_thread);
+	return 0;
+}
+
+static int
+max3421_reset(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+
+	hcd->self.sg_tablesize = 0;
+	hcd->speed = HCD_USB2;
+	hcd->self.root_hub->speed = USB_SPEED_FULL;
+	max3421_hcd->do_reset_hcd = 1;
+	wake_up_process(max3421_hcd->spi_thread);
+	return 0;
+}
+
+static int
+max3421_start(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+
+	spin_lock_init(&max3421_hcd->lock);
+	max3421_hcd->rh_state = MAX3421_RH_RUNNING;
+
+	INIT_LIST_HEAD(&max3421_hcd->ep_list);
+
+	hcd->power_budget = POWER_BUDGET;
+	hcd->state = HC_STATE_RUNNING;
+	hcd->uses_new_polling = 1;
+	return 0;
+}
+
+static void
+max3421_stop(struct usb_hcd *hcd)
+{
+}
+
+static int
+max3421_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct max3421_ep *max3421_ep;
+	unsigned long flags;
+	int retval;
+
+	switch (usb_pipetype(urb->pipe)) {
+	case PIPE_INTERRUPT:
+	case PIPE_ISOCHRONOUS:
+		if (urb->interval < 0) {
+			dev_err(&spi->dev,
+			  "%s: interval=%d for intr-/iso-pipe; expected > 0\n",
+				__func__, urb->interval);
+			return -EINVAL;
+		}
+	default:
+		break;
+	}
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	max3421_ep = urb->ep->hcpriv;
+	if (!max3421_ep) {
+		/* gets freed in max3421_endpoint_disable: */
+		max3421_ep = kzalloc(sizeof(struct max3421_ep), mem_flags);
+		if (!max3421_ep)
+			return -ENOMEM;
+		max3421_ep->ep = urb->ep;
+		max3421_ep->last_active = max3421_hcd->frame_number;
+		urb->ep->hcpriv = max3421_ep;
+
+		list_add_tail(&max3421_ep->ep_list, &max3421_hcd->ep_list);
+	}
+
+	retval = usb_hcd_link_urb_to_ep(hcd, urb);
+	if (retval == 0) {
+		/* Since we added to the queue, restart scheduling: */
+		max3421_hcd->sched_pass = SCHED_PASS_PERIODIC;
+		wake_up_process(max3421_hcd->spi_thread);
+	}
+
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+	return retval;
+}
+
+static int
+max3421_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	unsigned long flags;
+	int retval;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	/*
+	 * This will set urb->unlinked which in turn causes the entry
+	 * to be dropped at the next opportunity.
+	 */
+	retval = usb_hcd_check_unlink_urb(hcd, urb, status);
+	if (retval == 0) {
+		max3421_hcd->do_check_unlink = 1;
+		wake_up_process(max3421_hcd->spi_thread);
+	}
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+	return retval;
+}
+
+static void
+max3421_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *ep)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	unsigned long flags;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	if (ep->hcpriv) {
+		struct max3421_ep *max3421_ep = ep->hcpriv;
+
+		/* remove myself from the ep_list: */
+		if (!list_empty(&max3421_ep->ep_list))
+			list_del(&max3421_ep->ep_list);
+		kfree(max3421_ep);
+		ep->hcpriv = NULL;
+	}
+
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+}
+
+static int
+max3421_get_frame_number(struct usb_hcd *hcd)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	return max3421_hcd->frame_number;
+}
+
+/*
+ * Should return a non-zero value when any port is undergoing a resume
+ * transition while the root hub is suspended.
+ */
+static int
+max3421_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	unsigned long flags;
+	int retval = 0;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+	if (!HCD_HW_ACCESSIBLE(hcd))
+		goto done;
+
+	*buf = 0;
+	if ((max3421_hcd->port_status & PORT_C_MASK) != 0) {
+		*buf = (1 << 1); /* a hub over-current condition exists */
+		dev_dbg(hcd->self.controller,
+			"port status 0x%08x has changes\n",
+			max3421_hcd->port_status);
+		retval = 1;
+		if (max3421_hcd->rh_state == MAX3421_RH_SUSPENDED)
+			usb_hcd_resume_root_hub(hcd);
+	}
+done:
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+	return retval;
+}
+
+static inline void
+hub_descriptor(struct usb_hub_descriptor *desc)
+{
+	memset(desc, 0, sizeof(*desc));
+	/*
+	 * See Table 11-13: Hub Descriptor in USB 2.0 spec.
+	 */
+	desc->bDescriptorType = 0x29;	/* hub descriptor */
+	desc->bDescLength = 9;
+	desc->wHubCharacteristics = cpu_to_le16(0x0001);
+	desc->bNbrPorts = 1;
+}
+
+/*
+ * Set the MAX3421E general-purpose output with number PIN_NUMBER to
+ * VALUE (0 or 1).  PIN_NUMBER may be in the range from 1-8.  For
+ * any other value, this function acts as a no-op.
+ */
+static void
+max3421_gpout_set_value(struct usb_hcd *hcd, u8 pin_number, u8 value)
+{
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	u8 mask, idx;
+
+	--pin_number;
+	if (pin_number > 7)
+		return;
+
+	mask = 1u << pin_number;
+	idx = pin_number / 4;
+
+	if (value)
+		max3421_hcd->iopins[idx] |=  mask;
+	else
+		max3421_hcd->iopins[idx] &= ~mask;
+	max3421_hcd->do_iopin_update = 1;
+	wake_up_process(max3421_hcd->spi_thread);
+}
+
+static int
+max3421_hub_control(struct usb_hcd *hcd, u16 type_req, u16 value, u16 index,
+		    char *buf, u16 length)
+{
+	struct spi_device *spi = to_spi_device(hcd->self.controller);
+	struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd);
+	struct max3421_hcd_platform_data *pdata;
+	unsigned long flags;
+	int retval = 0;
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	pdata = spi->dev.platform_data;
+
+	switch (type_req) {
+	case ClearHubFeature:
+		break;
+	case ClearPortFeature:
+		switch (value) {
+		case USB_PORT_FEAT_SUSPEND:
+			break;
+		case USB_PORT_FEAT_POWER:
+			dev_dbg(hcd->self.controller, "power-off\n");
+			max3421_gpout_set_value(hcd, pdata->vbus_gpout, 0);
+			/* FALLS THROUGH */
+		default:
+			max3421_hcd->port_status &= ~(1 << value);
+		}
+		break;
+	case GetHubDescriptor:
+		hub_descriptor((struct usb_hub_descriptor *) buf);
+		break;
+
+	case DeviceRequest | USB_REQ_GET_DESCRIPTOR:
+	case GetPortErrorCount:
+	case SetHubDepth:
+		/* USB3 only */
+		goto error;
+
+	case GetHubStatus:
+		*(__le32 *) buf = cpu_to_le32(0);
+		break;
+
+	case GetPortStatus:
+		if (index != 1) {
+			retval = -EPIPE;
+			goto error;
+		}
+		((__le16 *) buf)[0] = cpu_to_le16(max3421_hcd->port_status);
+		((__le16 *) buf)[1] =
+			cpu_to_le16(max3421_hcd->port_status >> 16);
+		break;
+
+	case SetHubFeature:
+		retval = -EPIPE;
+		break;
+
+	case SetPortFeature:
+		switch (value) {
+		case USB_PORT_FEAT_LINK_STATE:
+		case USB_PORT_FEAT_U1_TIMEOUT:
+		case USB_PORT_FEAT_U2_TIMEOUT:
+		case USB_PORT_FEAT_BH_PORT_RESET:
+			goto error;
+		case USB_PORT_FEAT_SUSPEND:
+			if (max3421_hcd->active)
+				max3421_hcd->port_status |=
+					USB_PORT_STAT_SUSPEND;
+			break;
+		case USB_PORT_FEAT_POWER:
+			dev_dbg(hcd->self.controller, "power-on\n");
+			max3421_hcd->port_status |= USB_PORT_STAT_POWER;
+			max3421_gpout_set_value(hcd, pdata->vbus_gpout, 1);
+			break;
+		case USB_PORT_FEAT_RESET:
+			max3421_reset_port(hcd);
+			/* FALLS THROUGH */
+		default:
+			if ((max3421_hcd->port_status & USB_PORT_STAT_POWER)
+			    != 0)
+				max3421_hcd->port_status |= (1 << value);
+		}
+		break;
+
+	default:
+		dev_dbg(hcd->self.controller,
+			"hub control req%04x v%04x i%04x l%d\n",
+			type_req, value, index, length);
+error:		/* "protocol stall" on error */
+		retval = -EPIPE;
+	}
+
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+	return retval;
+}
+
+static int
+max3421_bus_suspend(struct usb_hcd *hcd)
+{
+	return -1;
+}
+
+static int
+max3421_bus_resume(struct usb_hcd *hcd)
+{
+	return -1;
+}
+
+/*
+ * The SPI driver already takes care of DMA-mapping/unmapping, so no
+ * reason to do it twice.
+ */
+static int
+max3421_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags)
+{
+	return 0;
+}
+
+static void
+max3421_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+{
+}
+
+static struct hc_driver max3421_hcd_desc = {
+	.description =		"max3421",
+	.product_desc =		DRIVER_DESC,
+	.hcd_priv_size =	sizeof(struct max3421_hcd),
+	.flags =		HCD_USB11,
+	.reset =		max3421_reset,
+	.start =		max3421_start,
+	.stop =			max3421_stop,
+	.get_frame_number =	max3421_get_frame_number,
+	.urb_enqueue =		max3421_urb_enqueue,
+	.urb_dequeue =		max3421_urb_dequeue,
+	.map_urb_for_dma =	max3421_map_urb_for_dma,
+	.unmap_urb_for_dma =	max3421_unmap_urb_for_dma,
+	.endpoint_disable =	max3421_endpoint_disable,
+	.hub_status_data =	max3421_hub_status_data,
+	.hub_control =		max3421_hub_control,
+	.bus_suspend =		max3421_bus_suspend,
+	.bus_resume =		max3421_bus_resume,
+};
+
+static int
+max3421_probe(struct spi_device *spi)
+{
+	struct max3421_hcd *max3421_hcd;
+	struct usb_hcd *hcd;
+	int retval;
+
+	if (spi_setup(spi) < 0) {
+		dev_err(&spi->dev, "Unable to setup SPI bus");
+		return -EFAULT;
+	}
+
+	hcd = usb_create_hcd(&max3421_hcd_desc, &spi->dev,
+			     dev_name(&spi->dev));
+	if (!hcd) {
+		dev_err(&spi->dev, "failed to create HCD structure\n");
+		return -ENOMEM;
+	}
+	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
+	max3421_hcd = hcd_to_max3421(hcd);
+	max3421_hcd->next = max3421_hcd_list;
+	max3421_hcd_list = max3421_hcd;
+	INIT_LIST_HEAD(&max3421_hcd->ep_list);
+
+	max3421_hcd->spi_thread = kthread_run(max3421_spi_thread, hcd,
+					      "max3421_spi_thread");
+	if (max3421_hcd->spi_thread == ERR_PTR(-ENOMEM)) {
+		dev_err(&spi->dev,
+			"failed to create SPI thread (out of memory)\n");
+		return -ENOMEM;
+	}
+
+	retval = usb_add_hcd(hcd, 0, 0);
+	if (retval) {
+		dev_err(&spi->dev, "failed to add HCD\n");
+		usb_put_hcd(hcd);
+		return retval;
+	}
+
+	retval = request_irq(spi->irq, max3421_irq_handler,
+			     IRQF_TRIGGER_LOW, "max3421", hcd);
+	if (retval < 0) {
+		usb_put_hcd(hcd);
+		dev_err(&spi->dev, "failed to request irq %d\n", spi->irq);
+		return retval;
+	}
+	return 0;
+}
+
+static int
+max3421_remove(struct spi_device *spi)
+{
+	struct max3421_hcd *max3421_hcd = NULL, **prev;
+	struct usb_hcd *hcd = NULL;
+	unsigned long flags;
+
+	for (prev = &max3421_hcd_list; *prev; prev = &(*prev)->next) {
+		max3421_hcd = *prev;
+		hcd = max3421_to_hcd(max3421_hcd);
+		if (hcd->self.controller == &spi->dev)
+			break;
+	}
+	if (!max3421_hcd) {
+		dev_err(&spi->dev, "no MAX3421 HCD found for SPI device %p\n",
+			spi);
+		return -ENODEV;
+	}
+
+	usb_remove_hcd(hcd);
+
+	spin_lock_irqsave(&max3421_hcd->lock, flags);
+
+	kthread_stop(max3421_hcd->spi_thread);
+	*prev = max3421_hcd->next;
+
+	spin_unlock_irqrestore(&max3421_hcd->lock, flags);
+
+	free_irq(spi->irq, hcd);
+
+	usb_put_hcd(hcd);
+	return 0;
+}
+
+static struct spi_driver max3421_driver = {
+	.probe		= max3421_probe,
+	.remove		= max3421_remove,
+	.driver		= {
+		.name	= "max3421-hcd",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init
+max3421_mod_init(void)
+{
+	return spi_register_driver(&max3421_driver);
+}
+
+static void __exit
+max3421_mod_exit(void)
+{
+	spi_unregister_driver(&max3421_driver);
+}
+
+module_init(max3421_mod_init);
+module_exit(max3421_mod_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("David Mosberger <davidm@egauge.net>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/max3421-hcd.h b/include/linux/platform_data/max3421-hcd.h
new file mode 100644
index 000000000000..4ad459605d87
--- /dev/null
+++ b/include/linux/platform_data/max3421-hcd.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2014 eGauge Systems LLC
+ *	Contributed by David Mosberger-Tang <davidm@egauge.net>
+ *
+ * Platform-data structure for MAX3421 USB HCD driver.
+ *
+ */
+#ifndef MAX3421_HCD_PLAT_H_INCLUDED
+#define MAX3421_HCD_PLAT_H_INCLUDED
+
+/*
+ * This structure defines the mapping of certain auxiliary functions to the
+ * MAX3421E GPIO pins.  The chip has eight GP inputs and eight GP outputs.
+ * A value of 0 indicates that the pin is not used/wired to anything.
+ *
+ * At this point, the only control the max3421-hcd driver cares about is
+ * to control Vbus (5V to the peripheral).
+ */
+struct max3421_hcd_platform_data {
+	u8 vbus_gpout;			/* pin controlling Vbus */
+};
+
+#endif /* MAX3421_HCD_PLAT_H_INCLUDED */
-- 
cgit 


From a43ae58c848cfbadaba81c8d63202b4487f922a0 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.org>
Date: Tue, 6 May 2014 11:29:52 +0800
Subject: PCI: Turn pcibios_penalize_isa_irq() into a weak function

pcibios_penalize_isa_irq() is only implemented by x86 now, and legacy ISA
is not used by some architectures.  Make pcibios_penalize_isa_irq() a
__weak function to simplify the code.  This removes the need for new
platforms to add stub implementations of pcibios_penalize_isa_irq().

[bhelgaas: changelog, comments]
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/pci.h        |  5 -----
 arch/arm/include/asm/pci.h          |  5 -----
 arch/blackfin/include/asm/pci.h     |  5 -----
 arch/cris/include/asm/pci.h         |  1 -
 arch/frv/include/asm/pci.h          |  2 --
 arch/frv/mb93090-mb00/pci-irq.c     |  4 ----
 arch/ia64/include/asm/pci.h         |  6 ------
 arch/microblaze/include/asm/pci.h   |  5 -----
 arch/mips/include/asm/pci.h         |  5 -----
 arch/mn10300/include/asm/pci.h      |  1 -
 arch/mn10300/unit-asb2305/pci-irq.c |  4 ----
 arch/parisc/include/asm/pci.h       |  5 -----
 arch/powerpc/include/asm/pci.h      |  5 -----
 arch/sh/include/asm/pci.h           |  5 -----
 arch/sparc/include/asm/pci_32.h     |  5 -----
 arch/sparc/include/asm/pci_64.h     |  5 -----
 arch/unicore32/include/asm/pci.h    |  5 -----
 arch/x86/include/asm/pci.h          |  1 -
 arch/xtensa/include/asm/pci.h       |  5 -----
 drivers/pci/pci.c                   | 11 +++++++++++
 include/linux/pci.h                 |  1 +
 21 files changed, 12 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index d01afb78919c..f7f680f7457d 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -59,11 +59,6 @@ struct pci_controller {
 
 extern void pcibios_set_master(struct pci_dev *dev);
 
-extern inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 /* IOMMU controls.  */
 
 /* The PCI address space does not equal the physical memory address space.
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 680a83e94467..7e95d8535e24 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -31,11 +31,6 @@ static inline int pci_proc_domain(struct pci_bus *bus)
 }
 #endif /* CONFIG_PCI_DOMAINS */
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 /*
  * The PCI address space does equal the physical memory address space.
  * The networking and block device layers use this boolean for bounce
diff --git a/arch/blackfin/include/asm/pci.h b/arch/blackfin/include/asm/pci.h
index 74352c4597d9..c737909fba47 100644
--- a/arch/blackfin/include/asm/pci.h
+++ b/arch/blackfin/include/asm/pci.h
@@ -10,9 +10,4 @@
 #define PCIBIOS_MIN_IO 0x00001000
 #define PCIBIOS_MIN_MEM 0x10000000
 
-static inline void pcibios_penalize_isa_irq(int irq)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 #endif				/* _ASM_BFIN_PCI_H */
diff --git a/arch/cris/include/asm/pci.h b/arch/cris/include/asm/pci.h
index f666734926d5..cc2399c175e9 100644
--- a/arch/cris/include/asm/pci.h
+++ b/arch/cris/include/asm/pci.h
@@ -20,7 +20,6 @@ void pcibios_config_init(void);
 struct pci_bus * pcibios_scan_root(int bus);
 
 void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq);
 struct irq_routing_table *pcibios_get_irq_routing_table(void);
 int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
 
diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h
index ef03baf5d89d..2035a4d3f9b9 100644
--- a/arch/frv/include/asm/pci.h
+++ b/arch/frv/include/asm/pci.h
@@ -24,8 +24,6 @@ struct pci_dev;
 
 extern void pcibios_set_master(struct pci_dev *dev);
 
-extern void pcibios_penalize_isa_irq(int irq);
-
 #ifdef CONFIG_MMU
 extern void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle);
 extern void consistent_free(void *vaddr);
diff --git a/arch/frv/mb93090-mb00/pci-irq.c b/arch/frv/mb93090-mb00/pci-irq.c
index c677b9d81d30..1c35c93f942b 100644
--- a/arch/frv/mb93090-mb00/pci-irq.c
+++ b/arch/frv/mb93090-mb00/pci-irq.c
@@ -55,10 +55,6 @@ void __init pcibios_fixup_irqs(void)
 	}
 }
 
-void __init pcibios_penalize_isa_irq(int irq)
-{
-}
-
 void pcibios_enable_irq(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index 7d41cc089822..52af5ed9f60b 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -50,12 +50,6 @@ struct pci_dev;
 extern unsigned long ia64_max_iommu_merge_mask;
 #define PCI_DMA_BUS_IS_PHYS	(ia64_max_iommu_merge_mask == ~0UL)
 
-static inline void
-pcibios_penalize_isa_irq (int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 #include <asm-generic/pci-dma-compat.h>
 
 #ifdef CONFIG_PCI
diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h
index 935f9bec414a..335524040fff 100644
--- a/arch/microblaze/include/asm/pci.h
+++ b/arch/microblaze/include/asm/pci.h
@@ -44,11 +44,6 @@ struct pci_dev;
  */
 #define pcibios_assign_all_busses()	0
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 #ifdef CONFIG_PCI
 extern void set_pci_dma_ops(struct dma_map_ops *dma_ops);
 extern struct dma_map_ops *get_pci_dma_ops(void);
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index 12d6842962be..974b0e308963 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -73,11 +73,6 @@ extern unsigned long PCIBIOS_MIN_MEM;
 
 extern void pcibios_set_master(struct pci_dev *dev);
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 #define HAVE_PCI_MMAP
 
 extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 166323824683..5f70af25c7d0 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -48,7 +48,6 @@ extern void unit_pci_init(void);
 #define PCIBIOS_MIN_MEM		0xB8000000
 
 void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq);
 
 /* Dynamic DMA mapping stuff.
  * i386 has everything mapped statically.
diff --git a/arch/mn10300/unit-asb2305/pci-irq.c b/arch/mn10300/unit-asb2305/pci-irq.c
index 77439da04671..fcb28ceb824d 100644
--- a/arch/mn10300/unit-asb2305/pci-irq.c
+++ b/arch/mn10300/unit-asb2305/pci-irq.c
@@ -40,10 +40,6 @@ void __init pcibios_fixup_irqs(void)
 	}
 }
 
-void __init pcibios_penalize_isa_irq(int irq)
-{
-}
-
 void pcibios_enable_irq(struct pci_dev *dev)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 465154076d23..20df2b04fc09 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -215,11 +215,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 }
 #endif
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't need to penalize isa irq's */
-}
-
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
 	return channel ? 15 : 14;
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 95145a15c708..1b0739bc14b5 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -46,11 +46,6 @@ struct pci_dev;
 #define pcibios_assign_all_busses() \
 	(pci_has_flag(PCI_REASSIGN_ALL_BUS))
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 #define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index bff96c2e7d25..5b4511552998 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -70,11 +70,6 @@ extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	enum pci_mmap_state mmap_state, int write_combine);
 extern void pcibios_set_master(struct pci_dev *dev);
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 /* Dynamic DMA mapping stuff.
  * SuperH has everything mapped statically like x86.
  */
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
index dc503297481f..53e9b4987db0 100644
--- a/arch/sparc/include/asm/pci_32.h
+++ b/arch/sparc/include/asm/pci_32.h
@@ -16,11 +16,6 @@
 
 #define PCI_IRQ_NONE		0xffffffff
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 /* Dynamic DMA mapping stuff.
  */
 #define PCI_DMA_BUS_IS_PHYS	(0)
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 1633b718d3bc..c6c7396e7627 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -16,11 +16,6 @@
 
 #define PCI_IRQ_NONE		0xffffffff
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 /* The PCI address space does not equal the physical memory
  * address space.  The networking and block device layers use
  * this boolean for bounce buffer decisions.
diff --git a/arch/unicore32/include/asm/pci.h b/arch/unicore32/include/asm/pci.h
index f5e108f4a151..654407e98619 100644
--- a/arch/unicore32/include/asm/pci.h
+++ b/arch/unicore32/include/asm/pci.h
@@ -18,11 +18,6 @@
 #include <asm-generic/pci.h>
 #include <mach/hardware.h> /* for PCIBIOS_MIN_* */
 
-static inline void pcibios_penalize_isa_irq(int irq, int active)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
 					enum pci_dma_burst_strategy *strat,
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 96ae4f4040bb..0892ea0e683f 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -68,7 +68,6 @@ void pcibios_config_init(void);
 void pcibios_scan_root(int bus);
 
 void pcibios_set_master(struct pci_dev *dev);
-void pcibios_penalize_isa_irq(int irq, int active);
 struct irq_routing_table *pcibios_get_irq_routing_table(void);
 int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq);
 
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index 614be031a79a..5d52dc43dfe7 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -22,11 +22,6 @@
 
 extern struct pci_controller* pcibios_alloc_controller(void);
 
-static inline void pcibios_penalize_isa_irq(int irq)
-{
-	/* We don't do dynamic PCI IRQ allocation */
-}
-
 /* Assume some values. (We should revise them, if necessary) */
 
 #define PCIBIOS_MIN_IO		0x2000
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 39012831867e..11f24912523c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1468,6 +1468,17 @@ void __weak pcibios_release_device(struct pci_dev *dev) {}
  */
 void __weak pcibios_disable_device (struct pci_dev *dev) {}
 
+/**
+ * pcibios_penalize_isa_irq - penalize an ISA IRQ
+ * @irq: ISA IRQ to penalize
+ * @active: IRQ active or not
+ *
+ * Permits the platform to provide architecture-specific functionality when
+ * penalizing ISA IRQs. This is the default implementation. Architecture
+ * implementations can override this.
+ */
+void __weak pcibios_penalize_isa_irq(int irq, int active) {}
+
 static void do_pci_disable_device(struct pci_dev *dev)
 {
 	u16 pci_command;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 84182b153b21..018877b8b4e8 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1578,6 +1578,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev,
 				 enum pcie_reset_state state);
 int pcibios_add_device(struct pci_dev *dev);
 void pcibios_release_device(struct pci_dev *dev);
+void pcibios_penalize_isa_irq(int irq, int active);
 
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 extern struct dev_pm_ops pcibios_pm_ops;
-- 
cgit 


From c4128cac3557ddd5fa972cb6511c426cd94a7ccd Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 15 May 2014 14:28:46 +0200
Subject: usb: gadget: net2280: Add support for PLX USB338X

This patch adds support for the PLX USB3380 and USB3382.

This driver is based on the driver from the manufacturer.

Since USB338X is register compatible with NET2280, I thought that it
would be better to include this hardware into net2280 driver.

Manufacturer's driver only supported the USB33X, did not follow the
Kernel Style and contain some trivial errors. This patch has tried to
address this issues.

This patch has only been tested on USB338x hardware, but the merge has
been done trying to not affect the behaviour of NET2280.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Tested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/Kconfig   |   10 +-
 drivers/usb/gadget/net2280.c | 1115 ++++++++++++++++++++++++++++++++++++++----
 drivers/usb/gadget/net2280.h |   97 +++-
 include/linux/usb/usb338x.h  |  199 ++++++++
 4 files changed, 1330 insertions(+), 91 deletions(-)
 create mode 100644 include/linux/usb/usb338x.h

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index ba18e9c110cc..49e434ec527d 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -409,7 +409,7 @@ config USB_NET2272_DMA
 	  If unsure, say "N" here.  The driver works fine in PIO mode.
 
 config USB_NET2280
-	tristate "NetChip 228x"
+	tristate "NetChip 228x / PLX USB338x"
 	depends on PCI
 	help
 	   NetChip 2280 / 2282 is a PCI based USB peripheral controller which
@@ -419,6 +419,14 @@ config USB_NET2280
 	   (for control transfers) and several endpoints with dedicated
 	   functions.
 
+	   PLX 3380 / 3382 is a PCIe based USB peripheral controller which
+	   supports full, high speed USB 2.0 and super speed USB 3.0
+	   data transfers.
+
+	   It has eight configurable endpoints, as well as endpoint zero
+	   (for control transfers) and several endpoints with dedicated
+	   functions.
+
 	   Say "y" to link the driver statically, or "m" to build a
 	   dynamically linked module called "net2280" and force all
 	   gadget drivers to also be dynamically linked.
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 300b3a71383b..87789c9bf7fe 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -18,6 +18,9 @@
  * hint to completely eliminate some IRQs, if a later IRQ is guaranteed
  * and DMA chaining is enabled.
  *
+ * MSI is enabled by default.  The legacy IRQ is used if MSI couldn't
+ * be enabled.
+ *
  * Note that almost all the errata workarounds here are only needed for
  * rev1 chips.  Rev1a silicon (0110) fixes almost all of them.
  */
@@ -25,10 +28,14 @@
 /*
  * Copyright (C) 2003 David Brownell
  * Copyright (C) 2003-2005 PLX Technology, Inc.
+ * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
  *
  * Modified Seth Levy 2005 PLX Technology, Inc. to provide compatibility
  *	with 2282 chip
  *
+ * Modified Ricardo Ribalda Qtechnology AS  to provide compatibility
+ *	with usb 338x chip. Based on PLX driver
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -61,9 +68,8 @@
 #include <asm/irq.h>
 #include <asm/unaligned.h>
 
-
-#define	DRIVER_DESC		"PLX NET228x USB Peripheral Controller"
-#define	DRIVER_VERSION		"2005 Sept 27"
+#define	DRIVER_DESC		"PLX NET228x/USB338x USB Peripheral Controller"
+#define	DRIVER_VERSION		"2005 Sept 27/v3.0"
 
 #define	EP_DONTUSE		13	/* nonzero */
 
@@ -73,11 +79,12 @@
 static const char driver_name [] = "net2280";
 static const char driver_desc [] = DRIVER_DESC;
 
+static const u32 ep_bit[9] = { 0, 17, 2, 19, 4, 1, 18, 3, 20 };
 static const char ep0name [] = "ep0";
 static const char *const ep_name [] = {
 	ep0name,
 	"ep-a", "ep-b", "ep-c", "ep-d",
-	"ep-e", "ep-f",
+	"ep-e", "ep-f", "ep-g", "ep-h",
 };
 
 /* use_dma -- general goodness, fewer interrupts, less cpu load (vs PIO)
@@ -90,11 +97,12 @@ static const char *const ep_name [] = {
  */
 static bool use_dma = 1;
 static bool use_dma_chaining = 0;
+static bool use_msi = 1;
 
 /* "modprobe net2280 use_dma=n" etc */
 module_param (use_dma, bool, S_IRUGO);
 module_param (use_dma_chaining, bool, S_IRUGO);
-
+module_param(use_msi, bool, S_IRUGO);
 
 /* mode 0 == ep-{a,b,c,d} 1K fifo each
  * mode 1 == ep-{a,b} 2K fifo each, ep-{c,d} unavailable
@@ -148,6 +156,9 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	struct net2280_ep	*ep;
 	u32			max, tmp;
 	unsigned long		flags;
+	static const u32 ep_key[9] = { 1, 0, 1, 0, 1, 1, 0, 1, 0 };
+	static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
+					  0x50, 0x20, 0x70, 0x40, 0x90 };
 
 	ep = container_of (_ep, struct net2280_ep, ep);
 	if (!_ep || !desc || ep->desc || _ep->name == ep0name
@@ -161,11 +172,20 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	if ((desc->bEndpointAddress & 0x0f) == EP_DONTUSE)
 		return -EDOM;
 
+	if (dev->pdev->vendor == 0x10b5) {
+		if ((desc->bEndpointAddress & 0x0f) >= 0x0c)
+			return -EDOM;
+		ep->is_in = !!usb_endpoint_dir_in(desc);
+		if (dev->enhanced_mode && ep->is_in && ep_key[ep->num])
+			return -EINVAL;
+	}
+
 	/* sanity check ep-e/ep-f since their fifos are small */
 	max = usb_endpoint_maxp (desc) & 0x1fff;
-	if (ep->num > 4 && max > 64)
+	if (ep->num > 4 && max > 64 && (dev->pdev->vendor == 0x17cc))
 		return -ERANGE;
 
+
 	spin_lock_irqsave (&dev->lock, flags);
 	_ep->maxpacket = max & 0x7ff;
 	ep->desc = desc;
@@ -176,7 +196,8 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	ep->out_overflow = 0;
 
 	/* set speed-dependent max packet; may kick in high bandwidth */
-	set_idx_reg (dev->regs, REG_EP_MAXPKT (dev, ep->num), max);
+	set_idx_reg(dev->regs, (dev->enhanced_mode) ? ep_enhanced[ep->num]
+					: REG_EP_MAXPKT(dev, ep->num), max);
 
 	/* FIFO lines can't go to different packets.  PIO is ok, so
 	 * use it instead of troublesome (non-bulk) multi-packet DMA.
@@ -199,23 +220,43 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 				&ep->regs->ep_rsp);
 	} else if (tmp == USB_ENDPOINT_XFER_BULK) {
 		/* catch some particularly blatant driver bugs */
-		if ((dev->gadget.speed == USB_SPEED_HIGH
-					&& max != 512)
-				|| (dev->gadget.speed == USB_SPEED_FULL
-					&& max > 64)) {
-			spin_unlock_irqrestore (&dev->lock, flags);
+		if ((dev->gadget.speed == USB_SPEED_SUPER && max != 1024) ||
+		    (dev->gadget.speed == USB_SPEED_HIGH && max != 512) ||
+		    (dev->gadget.speed == USB_SPEED_FULL && max > 64)) {
+			spin_unlock_irqrestore(&dev->lock, flags);
 			return -ERANGE;
 		}
 	}
 	ep->is_iso = (tmp == USB_ENDPOINT_XFER_ISOC) ? 1 : 0;
-	tmp <<= ENDPOINT_TYPE;
-	tmp |= desc->bEndpointAddress;
-	tmp |= (4 << ENDPOINT_BYTE_COUNT);	/* default full fifo lines */
-	tmp |= 1 << ENDPOINT_ENABLE;
-	wmb ();
+	/* Enable this endpoint */
+	if (dev->pdev->vendor == 0x17cc) {
+		tmp <<= ENDPOINT_TYPE;
+		tmp |= desc->bEndpointAddress;
+		/* default full fifo lines */
+		tmp |= (4 << ENDPOINT_BYTE_COUNT);
+		tmp |= 1 << ENDPOINT_ENABLE;
+		ep->is_in = (tmp & USB_DIR_IN) != 0;
+	} else {
+		/* In Legacy mode, only OUT endpoints are used */
+		if (dev->enhanced_mode && ep->is_in) {
+			tmp <<= IN_ENDPOINT_TYPE;
+			tmp |= (1 << IN_ENDPOINT_ENABLE);
+			/* Not applicable to Legacy */
+			tmp |= (1 << ENDPOINT_DIRECTION);
+		} else {
+			tmp <<= OUT_ENDPOINT_TYPE;
+			tmp |= (1 << OUT_ENDPOINT_ENABLE);
+			tmp |= (ep->is_in << ENDPOINT_DIRECTION);
+		}
+
+		tmp |= usb_endpoint_num(desc);
+		tmp |= (ep->ep.maxburst << MAX_BURST_SIZE);
+	}
+
+	/* Make sure all the registers are written before ep_rsp*/
+	wmb();
 
 	/* for OUT transfers, block the rx fifo until a read is posted */
-	ep->is_in = (tmp & USB_DIR_IN) != 0;
 	if (!ep->is_in)
 		writel ((1 << SET_NAK_OUT_PACKETS), &ep->regs->ep_rsp);
 	else if (dev->pdev->device != 0x2280) {
@@ -226,11 +267,13 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			| (1 << CLEAR_NAK_OUT_PACKETS_MODE), &ep->regs->ep_rsp);
 	}
 
-	writel (tmp, &ep->regs->ep_cfg);
+	writel(tmp, &ep->cfg->ep_cfg);
 
 	/* enable irqs */
 	if (!ep->dma) {				/* pio, per-packet */
-		tmp = (1 << ep->num) | readl (&dev->regs->pciirqenb0);
+		tmp = (dev->pdev->vendor == 0x17cc)?(1 << ep->num)
+						   : (1 << ep_bit[ep->num]);
+		tmp |= readl(&dev->regs->pciirqenb0);
 		writel (tmp, &dev->regs->pciirqenb0);
 
 		tmp = (1 << DATA_PACKET_RECEIVED_INTERRUPT_ENABLE)
@@ -251,8 +294,10 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			tmp = (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT_ENABLE);
 			writel (tmp, &ep->regs->ep_irqenb);
 
-			tmp = (1 << ep->num) | readl (&dev->regs->pciirqenb0);
-			writel (tmp, &dev->regs->pciirqenb0);
+			tmp = (dev->pdev->vendor == 0x17cc)?(1 << ep->num)
+						: (1 << ep_bit[ep->num]);
+			tmp |= readl(&dev->regs->pciirqenb0);
+			writel(tmp, &dev->regs->pciirqenb0);
 		}
 	}
 
@@ -286,7 +331,8 @@ static int handshake (u32 __iomem *ptr, u32 mask, u32 done, int usec)
 
 static const struct usb_ep_ops net2280_ep_ops;
 
-static void ep_reset (struct net2280_regs __iomem *regs, struct net2280_ep *ep)
+static void ep_reset_228x(struct net2280_regs __iomem *regs,
+			  struct net2280_ep *ep)
 {
 	u32		tmp;
 
@@ -361,6 +407,55 @@ static void ep_reset (struct net2280_regs __iomem *regs, struct net2280_ep *ep)
 	/* fifo size is handled separately */
 }
 
+static void ep_reset_338x(struct net2280_regs __iomem *regs,
+					struct net2280_ep *ep)
+{
+	u32 tmp, dmastat;
+
+	ep->desc = NULL;
+	INIT_LIST_HEAD(&ep->queue);
+
+	usb_ep_set_maxpacket_limit(&ep->ep, ~0);
+	ep->ep.ops = &net2280_ep_ops;
+
+	/* disable the dma, irqs, endpoint... */
+	if (ep->dma) {
+		writel(0, &ep->dma->dmactl);
+		writel((1 << DMA_ABORT_DONE_INTERRUPT) |
+		       (1 << DMA_PAUSE_DONE_INTERRUPT) |
+		       (1 << DMA_SCATTER_GATHER_DONE_INTERRUPT) |
+		       (1 << DMA_TRANSACTION_DONE_INTERRUPT)
+		       /* | (1 << DMA_ABORT) */
+		       , &ep->dma->dmastat);
+
+		dmastat = readl(&ep->dma->dmastat);
+		if (dmastat == 0x5002) {
+			WARNING(ep->dev, "The dmastat return = %x!!\n",
+			       dmastat);
+			writel(0x5a, &ep->dma->dmastat);
+		}
+
+		tmp = readl(&regs->pciirqenb0);
+		tmp &= ~(1 << ep_bit[ep->num]);
+		writel(tmp, &regs->pciirqenb0);
+	} else {
+		if (ep->num < 5) {
+			tmp = readl(&regs->pciirqenb1);
+			tmp &= ~(1 << (8 + ep->num));	/* completion */
+			writel(tmp, &regs->pciirqenb1);
+		}
+	}
+	writel(0, &ep->regs->ep_irqenb);
+
+	writel((1 << SHORT_PACKET_OUT_DONE_INTERRUPT) |
+	       (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT) |
+	       (1 << FIFO_OVERFLOW) |
+	       (1 << DATA_PACKET_RECEIVED_INTERRUPT) |
+	       (1 << DATA_PACKET_TRANSMITTED_INTERRUPT) |
+	       (1 << DATA_OUT_PING_TOKEN_INTERRUPT) |
+	       (1 << DATA_IN_TOKEN_INTERRUPT), &ep->regs->ep_stat);
+}
+
 static void nuke (struct net2280_ep *);
 
 static int net2280_disable (struct usb_ep *_ep)
@@ -374,13 +469,17 @@ static int net2280_disable (struct usb_ep *_ep)
 
 	spin_lock_irqsave (&ep->dev->lock, flags);
 	nuke (ep);
-	ep_reset (ep->dev->regs, ep);
+
+	if (ep->dev->pdev->vendor == 0x10b5)
+		ep_reset_338x(ep->dev->regs, ep);
+	else
+		ep_reset_228x(ep->dev->regs, ep);
 
 	VDEBUG (ep->dev, "disabled %s %s\n",
 			ep->dma ? "dma" : "pio", _ep->name);
 
 	/* synch memory views with the device */
-	(void) readl (&ep->regs->ep_cfg);
+	(void)readl(&ep->cfg->ep_cfg);
 
 	if (use_dma && !ep->dma && ep->num >= 1 && ep->num <= 4)
 		ep->dma = &ep->dev->dma [ep->num - 1];
@@ -698,6 +797,8 @@ static void start_queue (struct net2280_ep *ep, u32 dmactl, u32 td_dma)
 	writel (readl (&dma->dmastat), &dma->dmastat);
 
 	writel (td_dma, &dma->dmadesc);
+	if (ep->dev->pdev->vendor == 0x10b5)
+		dmactl |= (0x01 << DMA_REQUEST_OUTSTANDING);
 	writel (dmactl, &dma->dmactl);
 
 	/* erratum 0116 workaround part 3:  pci arbiter away from net2280 */
@@ -772,6 +873,21 @@ static void start_dma (struct net2280_ep *ep, struct net2280_request *req)
 	start_queue (ep, tmp, req->td_dma);
 }
 
+static inline void resume_dma(struct net2280_ep *ep)
+{
+	writel(readl(&ep->dma->dmactl) | (1 << DMA_ENABLE), &ep->dma->dmactl);
+
+	ep->dma_started = true;
+}
+
+static inline void ep_stop_dma(struct net2280_ep *ep)
+{
+	writel(readl(&ep->dma->dmactl) & ~(1 << DMA_ENABLE), &ep->dma->dmactl);
+	spin_stop_dma(ep->dma);
+
+	ep->dma_started = false;
+}
+
 static inline void
 queue_dma (struct net2280_ep *ep, struct net2280_request *req, int valid)
 {
@@ -874,8 +990,23 @@ net2280_queue (struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
 
 	/* kickstart this i/o queue? */
 	if (list_empty (&ep->queue) && !ep->stopped) {
+		/* DMA request while EP halted */
+		if (ep->dma &&
+		    (readl(&ep->regs->ep_rsp) & (1 << CLEAR_ENDPOINT_HALT)) &&
+			(dev->pdev->vendor == 0x10b5)) {
+			int valid = 1;
+			if (ep->is_in) {
+				int expect;
+				expect = likely(req->req.zero ||
+						((req->req.length %
+						  ep->ep.maxpacket) != 0));
+				if (expect != ep->in_fifo_validate)
+					valid = 0;
+			}
+			queue_dma(ep, req, valid);
+		}
 		/* use DMA if the endpoint supports it, else pio */
-		if (ep->dma)
+		else if (ep->dma)
 			start_dma (ep, req);
 		else {
 			/* maybe there's no control data, just status ack */
@@ -993,6 +1124,8 @@ static void scan_dma_completions (struct net2280_ep *ep)
 		} else if (!ep->is_in
 				&& (req->req.length % ep->ep.maxpacket) != 0) {
 			tmp = readl (&ep->regs->ep_stat);
+			if (ep->dev->pdev->vendor == 0x10b5)
+				return dma_done(ep, req, tmp, 0);
 
 			/* AVOID TROUBLE HERE by not issuing short reads from
 			 * your gadget driver.  That helps avoids errata 0121,
@@ -1079,7 +1212,7 @@ static void restart_dma (struct net2280_ep *ep)
 	start_queue (ep, dmactl, req->td_dma);
 }
 
-static void abort_dma (struct net2280_ep *ep)
+static void abort_dma_228x(struct net2280_ep *ep)
 {
 	/* abort the current transfer */
 	if (likely (!list_empty (&ep->queue))) {
@@ -1091,6 +1224,19 @@ static void abort_dma (struct net2280_ep *ep)
 	scan_dma_completions (ep);
 }
 
+static void abort_dma_338x(struct net2280_ep *ep)
+{
+	writel((1 << DMA_ABORT), &ep->dma->dmastat);
+	spin_stop_dma(ep->dma);
+}
+
+static void abort_dma(struct net2280_ep *ep)
+{
+	if (ep->dev->pdev->vendor == 0x17cc)
+		return abort_dma_228x(ep);
+	return abort_dma_338x(ep);
+}
+
 /* dequeue ALL requests */
 static void nuke (struct net2280_ep *ep)
 {
@@ -1244,6 +1390,9 @@ net2280_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged)
 				ep->wedged = 1;
 		} else {
 			clear_halt (ep);
+			if (ep->dev->pdev->vendor == 0x10b5 &&
+				!list_empty(&ep->queue) && ep->td_dma)
+					restart_dma(ep);
 			ep->wedged = 0;
 		}
 		(void) readl (&ep->regs->ep_rsp);
@@ -1367,10 +1516,13 @@ static int net2280_set_selfpowered (struct usb_gadget *_gadget, int value)
 
 	spin_lock_irqsave (&dev->lock, flags);
 	tmp = readl (&dev->usb->usbctl);
-	if (value)
+	if (value) {
 		tmp |= (1 << SELF_POWERED_STATUS);
-	else
+		dev->selfpowered = 1;
+	} else {
 		tmp &= ~(1 << SELF_POWERED_STATUS);
+		dev->selfpowered = 0;
+	}
 	writel (tmp, &dev->usb->usbctl);
 	spin_unlock_irqrestore (&dev->lock, flags);
 
@@ -1504,14 +1656,14 @@ static ssize_t registers_show(struct device *_dev,
 	/* DMA Control Registers */
 
 	/* Configurable EP Control Registers */
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < dev->n_ep; i++) {
 		struct net2280_ep	*ep;
 
 		ep = &dev->ep [i];
 		if (i && !ep->desc)
 			continue;
 
-		t1 = readl (&ep->regs->ep_cfg);
+		t1 = readl(&ep->cfg->ep_cfg);
 		t2 = readl (&ep->regs->ep_rsp) & 0xff;
 		t = scnprintf (next, size,
 				"\n%s\tcfg %05x rsp (%02x) %s%s%s%s%s%s%s%s"
@@ -1571,7 +1723,7 @@ static ssize_t registers_show(struct device *_dev,
 	t = scnprintf (next, size, "\nirqs:  ");
 	size -= t;
 	next += t;
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < dev->n_ep; i++) {
 		struct net2280_ep	*ep;
 
 		ep = &dev->ep [i];
@@ -1606,7 +1758,7 @@ static ssize_t queues_show(struct device *_dev, struct device_attribute *attr,
 	size = PAGE_SIZE;
 	spin_lock_irqsave (&dev->lock, flags);
 
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < dev->n_ep; i++) {
 		struct net2280_ep		*ep = &dev->ep [i];
 		struct net2280_request		*req;
 		int				t;
@@ -1735,6 +1887,121 @@ static void set_fifo_mode (struct net2280 *dev, int mode)
 	list_add_tail (&dev->ep [6].ep.ep_list, &dev->gadget.ep_list);
 }
 
+static void defect7374_disable_data_eps(struct net2280 *dev)
+{
+	/*
+	 * For Defect 7374, disable data EPs (and more):
+	 *  - This phase undoes the earlier phase of the Defect 7374 workaround,
+	 *    returing ep regs back to normal.
+	 */
+	struct net2280_ep *ep;
+	int i;
+	unsigned char ep_sel;
+	u32 tmp_reg;
+
+	for (i = 1; i < 5; i++) {
+		ep = &dev->ep[i];
+		writel(0, &ep->cfg->ep_cfg);
+	}
+
+	/* CSROUT, CSRIN, PCIOUT, PCIIN, STATIN, RCIN */
+	for (i = 0; i < 6; i++)
+		writel(0, &dev->dep[i].dep_cfg);
+
+	for (ep_sel = 0; ep_sel <= 21; ep_sel++) {
+		/* Select an endpoint for subsequent operations: */
+		tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
+		writel(((tmp_reg & ~0x1f) | ep_sel), &dev->plregs->pl_ep_ctrl);
+
+		if (ep_sel < 2 || (ep_sel > 9 && ep_sel < 14) ||
+					ep_sel == 18 || ep_sel == 20)
+			continue;
+
+		/* Change settings on some selected endpoints */
+		tmp_reg = readl(&dev->plregs->pl_ep_cfg_4);
+		tmp_reg &= ~(1 << NON_CTRL_IN_TOLERATE_BAD_DIR);
+		writel(tmp_reg, &dev->plregs->pl_ep_cfg_4);
+		tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
+		tmp_reg |= (1 << EP_INITIALIZED);
+		writel(tmp_reg, &dev->plregs->pl_ep_ctrl);
+	}
+}
+
+static void defect7374_enable_data_eps_zero(struct net2280 *dev)
+{
+	u32 tmp = 0, tmp_reg;
+	u32 fsmvalue, scratch;
+	int i;
+	unsigned char ep_sel;
+
+	scratch = get_idx_reg(dev->regs, SCRATCH);
+	fsmvalue = scratch & (0xf << DEFECT7374_FSM_FIELD);
+	scratch &= ~(0xf << DEFECT7374_FSM_FIELD);
+
+	/*See if firmware needs to set up for workaround*/
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ) {
+		WARNING(dev, "Operate Defect 7374 workaround soft this time");
+		WARNING(dev, "It will operate on cold-reboot and SS connect");
+
+		/*GPEPs:*/
+		tmp = ((0 << ENDPOINT_NUMBER) | (1 << ENDPOINT_DIRECTION) |
+		       (2 << OUT_ENDPOINT_TYPE) | (2 << IN_ENDPOINT_TYPE) |
+		       ((dev->enhanced_mode) ?
+			1 << OUT_ENDPOINT_ENABLE : 1 << ENDPOINT_ENABLE) |
+		       (1 << IN_ENDPOINT_ENABLE));
+
+		for (i = 1; i < 5; i++)
+			writel(tmp, &dev->ep[i].cfg->ep_cfg);
+
+		/* CSRIN, PCIIN, STATIN, RCIN*/
+		tmp = ((0 << ENDPOINT_NUMBER) | (1 << ENDPOINT_ENABLE));
+		writel(tmp, &dev->dep[1].dep_cfg);
+		writel(tmp, &dev->dep[3].dep_cfg);
+		writel(tmp, &dev->dep[4].dep_cfg);
+		writel(tmp, &dev->dep[5].dep_cfg);
+
+		/*Implemented for development and debug.
+		 * Can be refined/tuned later.*/
+		for (ep_sel = 0; ep_sel <= 21; ep_sel++) {
+			/* Select an endpoint for subsequent operations: */
+			tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
+			writel(((tmp_reg & ~0x1f) | ep_sel),
+			       &dev->plregs->pl_ep_ctrl);
+
+			if (ep_sel == 1) {
+				tmp =
+				    (readl(&dev->plregs->pl_ep_ctrl) |
+				     (1 << CLEAR_ACK_ERROR_CODE) | 0);
+				writel(tmp, &dev->plregs->pl_ep_ctrl);
+				continue;
+			}
+
+			if (ep_sel == 0 || (ep_sel > 9 && ep_sel < 14) ||
+					ep_sel == 18  || ep_sel == 20)
+				continue;
+
+			tmp = (readl(&dev->plregs->pl_ep_cfg_4) |
+				 (1 << NON_CTRL_IN_TOLERATE_BAD_DIR) | 0);
+			writel(tmp, &dev->plregs->pl_ep_cfg_4);
+
+			tmp = readl(&dev->plregs->pl_ep_ctrl) &
+				~(1 << EP_INITIALIZED);
+			writel(tmp, &dev->plregs->pl_ep_ctrl);
+
+		}
+
+		/* Set FSM to focus on the first Control Read:
+		 * - Tip: Connection speed is known upon the first
+		 * setup request.*/
+		scratch |= DEFECT7374_FSM_WAITING_FOR_CONTROL_READ;
+		set_idx_reg(dev->regs, SCRATCH, scratch);
+
+	} else{
+		WARNING(dev, "Defect 7374 workaround soft will NOT operate");
+		WARNING(dev, "It will operate on cold-reboot and SS connect");
+	}
+}
+
 /* keeping it simple:
  * - one bus driver, initted first;
  * - one function driver, initted second
@@ -1744,7 +2011,7 @@ static void set_fifo_mode (struct net2280 *dev, int mode)
  * perhaps to bind specific drivers to specific devices.
  */
 
-static void usb_reset (struct net2280 *dev)
+static void usb_reset_228x(struct net2280 *dev)
 {
 	u32	tmp;
 
@@ -1760,11 +2027,11 @@ static void usb_reset (struct net2280 *dev)
 
 	/* clear old dma and irq state */
 	for (tmp = 0; tmp < 4; tmp++) {
-		struct net2280_ep	*ep = &dev->ep [tmp + 1];
-
+		struct net2280_ep       *ep = &dev->ep[tmp + 1];
 		if (ep->dma)
-			abort_dma (ep);
+			abort_dma(ep);
 	}
+
 	writel (~0, &dev->regs->irqstat0),
 	writel (~(1 << SUSPEND_REQUEST_INTERRUPT), &dev->regs->irqstat1),
 
@@ -1780,7 +2047,67 @@ static void usb_reset (struct net2280 *dev)
 	set_fifo_mode (dev, (fifo_mode <= 2) ? fifo_mode : 0);
 }
 
-static void usb_reinit (struct net2280 *dev)
+static void usb_reset_338x(struct net2280 *dev)
+{
+	u32 tmp;
+	u32 fsmvalue;
+
+	dev->gadget.speed = USB_SPEED_UNKNOWN;
+	(void)readl(&dev->usb->usbctl);
+
+	net2280_led_init(dev);
+
+	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+			(0xf << DEFECT7374_FSM_FIELD);
+
+	/* See if firmware needs to set up for workaround: */
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ) {
+		INFO(dev, "%s: Defect 7374 FsmValue 0x%08X\n", __func__,
+		     fsmvalue);
+	} else {
+		/* disable automatic responses, and irqs */
+		writel(0, &dev->usb->stdrsp);
+		writel(0, &dev->regs->pciirqenb0);
+		writel(0, &dev->regs->pciirqenb1);
+	}
+
+	/* clear old dma and irq state */
+	for (tmp = 0; tmp < 4; tmp++) {
+		struct net2280_ep *ep = &dev->ep[tmp + 1];
+
+		if (ep->dma)
+			abort_dma(ep);
+	}
+
+	writel(~0, &dev->regs->irqstat0), writel(~0, &dev->regs->irqstat1);
+
+	if (fsmvalue == DEFECT7374_FSM_SS_CONTROL_READ) {
+		/* reset, and enable pci */
+		tmp = readl(&dev->regs->devinit) |
+		    (1 << PCI_ENABLE) |
+		    (1 << FIFO_SOFT_RESET) |
+		    (1 << USB_SOFT_RESET) |
+		    (1 << M8051_RESET);
+
+		writel(tmp, &dev->regs->devinit);
+	}
+
+	/* always ep-{1,2,3,4} ... maybe not ep-3 or ep-4 */
+	INIT_LIST_HEAD(&dev->gadget.ep_list);
+
+	for (tmp = 1; tmp < dev->n_ep; tmp++)
+		list_add_tail(&dev->ep[tmp].ep.ep_list, &dev->gadget.ep_list);
+
+}
+
+static void usb_reset(struct net2280 *dev)
+{
+	if (dev->pdev->vendor == 0x17cc)
+		return usb_reset_228x(dev);
+	return usb_reset_338x(dev);
+}
+
+static void usb_reinit_228x(struct net2280 *dev)
 {
 	u32	tmp;
 	int	init_dma;
@@ -1803,7 +2130,8 @@ static void usb_reinit (struct net2280 *dev)
 		} else
 			ep->fifo_size = 64;
 		ep->regs = &dev->epregs [tmp];
-		ep_reset (dev->regs, ep);
+		ep->cfg = &dev->epregs[tmp];
+		ep_reset_228x(dev->regs, ep);
 	}
 	usb_ep_set_maxpacket_limit(&dev->ep [0].ep, 64);
 	usb_ep_set_maxpacket_limit(&dev->ep [5].ep, 64);
@@ -1820,7 +2148,122 @@ static void usb_reinit (struct net2280 *dev)
 		writel (EP_DONTUSE, &dev->dep [tmp].dep_cfg);
 }
 
-static void ep0_start (struct net2280 *dev)
+static void usb_reinit_338x(struct net2280 *dev)
+{
+	int init_dma;
+	int i;
+	u32 tmp, val;
+	u32 fsmvalue;
+	static const u32 ne[9] = { 0, 1, 2, 3, 4, 1, 2, 3, 4 };
+	static const u32 ep_reg_addr[9] = { 0x00, 0xC0, 0x00, 0xC0, 0x00,
+						0x00, 0xC0, 0x00, 0xC0 };
+
+	/* use_dma changes are ignored till next device re-init */
+	init_dma = use_dma;
+
+	/* basic endpoint init */
+	for (i = 0; i < dev->n_ep; i++) {
+		struct net2280_ep *ep = &dev->ep[i];
+
+		ep->ep.name = ep_name[i];
+		ep->dev = dev;
+		ep->num = i;
+
+		if (i > 0 && i <= 4 && init_dma)
+			ep->dma = &dev->dma[i - 1];
+
+		if (dev->enhanced_mode) {
+			ep->cfg = &dev->epregs[ne[i]];
+			ep->regs = (struct net2280_ep_regs __iomem *)
+				(((void *)&dev->epregs[ne[i]]) +
+				ep_reg_addr[i]);
+			ep->fiforegs = &dev->fiforegs[i];
+		} else {
+			ep->cfg = &dev->epregs[i];
+			ep->regs = &dev->epregs[i];
+			ep->fiforegs = &dev->fiforegs[i];
+		}
+
+		ep->fifo_size = (i != 0) ? 2048 : 512;
+
+		ep_reset_338x(dev->regs, ep);
+	}
+	usb_ep_set_maxpacket_limit(&dev->ep[0].ep, 512);
+
+	dev->gadget.ep0 = &dev->ep[0].ep;
+	dev->ep[0].stopped = 0;
+
+	/* Link layer set up */
+	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+				(0xf << DEFECT7374_FSM_FIELD);
+
+	/* See if driver needs to set up for workaround: */
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ)
+		INFO(dev, "%s: Defect 7374 FsmValue %08x\n",
+						__func__, fsmvalue);
+	else {
+		tmp = readl(&dev->usb_ext->usbctl2) &
+		    ~((1 << U1_ENABLE) | (1 << U2_ENABLE) | (1 << LTM_ENABLE));
+		writel(tmp, &dev->usb_ext->usbctl2);
+	}
+
+	/* Hardware Defect and Workaround */
+	val = readl(&dev->ll_lfps_regs->ll_lfps_5);
+	val &= ~(0xf << TIMER_LFPS_6US);
+	val |= 0x5 << TIMER_LFPS_6US;
+	writel(val, &dev->ll_lfps_regs->ll_lfps_5);
+
+	val = readl(&dev->ll_lfps_regs->ll_lfps_6);
+	val &= ~(0xffff << TIMER_LFPS_80US);
+	val |= 0x0100 << TIMER_LFPS_80US;
+	writel(val, &dev->ll_lfps_regs->ll_lfps_6);
+
+	/*
+	 * AA_AB Errata. Issue 4. Workaround for SuperSpeed USB
+	 * Hot Reset Exit Handshake may Fail in Specific Case using
+	 * Default Register Settings. Workaround for Enumeration test.
+	 */
+	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_2);
+	val &= ~(0x1f << HOT_TX_NORESET_TS2);
+	val |= 0x10 << HOT_TX_NORESET_TS2;
+	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_2);
+
+	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_3);
+	val &= ~(0x1f << HOT_RX_RESET_TS2);
+	val |= 0x3 << HOT_RX_RESET_TS2;
+	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_3);
+
+	/*
+	 * Set Recovery Idle to Recover bit:
+	 * - On SS connections, setting Recovery Idle to Recover Fmw improves
+	 *   link robustness with various hosts and hubs.
+	 * - It is safe to set for all connection speeds; all chip revisions.
+	 * - R-M-W to leave other bits undisturbed.
+	 * - Reference PLX TT-7372
+	*/
+	val = readl(&dev->ll_chicken_reg->ll_tsn_chicken_bit);
+	val |= (1 << RECOVERY_IDLE_TO_RECOVER_FMW);
+	writel(val, &dev->ll_chicken_reg->ll_tsn_chicken_bit);
+
+	INIT_LIST_HEAD(&dev->gadget.ep0->ep_list);
+
+	/* disable dedicated endpoints */
+	writel(0x0D, &dev->dep[0].dep_cfg);
+	writel(0x0D, &dev->dep[1].dep_cfg);
+	writel(0x0E, &dev->dep[2].dep_cfg);
+	writel(0x0E, &dev->dep[3].dep_cfg);
+	writel(0x0F, &dev->dep[4].dep_cfg);
+	writel(0x0C, &dev->dep[5].dep_cfg);
+}
+
+static void usb_reinit(struct net2280 *dev)
+{
+	if (dev->pdev->vendor == 0x17cc)
+		return usb_reinit_228x(dev);
+	return usb_reinit_338x(dev);
+}
+
+static void ep0_start_228x(struct net2280 *dev)
 {
 	writel (  (1 << CLEAR_EP_HIDE_STATUS_PHASE)
 		| (1 << CLEAR_NAK_OUT_PACKETS)
@@ -1863,6 +2306,61 @@ static void ep0_start (struct net2280 *dev)
 	(void) readl (&dev->usb->usbctl);
 }
 
+static void ep0_start_338x(struct net2280 *dev)
+{
+	u32 fsmvalue;
+
+	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+			(0xf << DEFECT7374_FSM_FIELD);
+
+	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ)
+		INFO(dev, "%s: Defect 7374 FsmValue %08x\n", __func__,
+		     fsmvalue);
+	else
+		writel((1 << CLEAR_NAK_OUT_PACKETS_MODE) |
+		       (1 << SET_EP_HIDE_STATUS_PHASE),
+		       &dev->epregs[0].ep_rsp);
+
+	/*
+	 * hardware optionally handles a bunch of standard requests
+	 * that the API hides from drivers anyway.  have it do so.
+	 * endpoint status/features are handled in software, to
+	 * help pass tests for some dubious behavior.
+	 */
+	writel((1 << SET_ISOCHRONOUS_DELAY) |
+	       (1 << SET_SEL) |
+	       (1 << SET_TEST_MODE) |
+	       (1 << SET_ADDRESS) |
+	       (1 << GET_INTERFACE_STATUS) |
+	       (1 << GET_DEVICE_STATUS),
+		&dev->usb->stdrsp);
+	dev->wakeup_enable = 1;
+	writel((1 << USB_ROOT_PORT_WAKEUP_ENABLE) |
+	       (dev->softconnect << USB_DETECT_ENABLE) |
+	       (1 << DEVICE_REMOTE_WAKEUP_ENABLE),
+	       &dev->usb->usbctl);
+
+	/* enable irqs so we can see ep0 and general operation  */
+	writel((1 << SETUP_PACKET_INTERRUPT_ENABLE) |
+	       (1 << ENDPOINT_0_INTERRUPT_ENABLE)
+	       , &dev->regs->pciirqenb0);
+	writel((1 << PCI_INTERRUPT_ENABLE) |
+	       (1 << ROOT_PORT_RESET_INTERRUPT_ENABLE) |
+	       (1 << SUSPEND_REQUEST_CHANGE_INTERRUPT_ENABLE) |
+	       (1 << VBUS_INTERRUPT_ENABLE),
+	       &dev->regs->pciirqenb1);
+
+	/* don't leave any writes posted */
+	(void)readl(&dev->usb->usbctl);
+}
+
+static void ep0_start(struct net2280 *dev)
+{
+	if (dev->pdev->vendor == 0x17cc)
+		return ep0_start_228x(dev);
+	return ep0_start_338x(dev);
+}
+
 /* when a driver is successfully registered, it will receive
  * control requests including set_configuration(), which enables
  * non-control requests.  then usb traffic follows until a
@@ -1886,7 +2384,7 @@ static int net2280_start(struct usb_gadget *_gadget,
 
 	dev = container_of (_gadget, struct net2280, gadget);
 
-	for (i = 0; i < 7; i++)
+	for (i = 0; i < dev->n_ep; i++)
 		dev->ep [i].irqs = 0;
 
 	/* hook up the driver ... */
@@ -1900,13 +2398,17 @@ static int net2280_start(struct usb_gadget *_gadget,
 	if (retval) goto err_func;
 
 	/* Enable force-full-speed testing mode, if desired */
-	if (full_speed)
+	if (full_speed && dev->pdev->vendor == 0x17cc)
 		writel(1 << FORCE_FULL_SPEED_MODE, &dev->usb->xcvrdiag);
 
 	/* ... then enable host detection and ep0; and we're ready
 	 * for set_configuration as well as eventual disconnect.
 	 */
 	net2280_led_active (dev, 1);
+
+	if (dev->pdev->vendor == 0x10b5)
+		defect7374_enable_data_eps_zero(dev);
+
 	ep0_start (dev);
 
 	DEBUG (dev, "%s ready, usbctl %08x stdrsp %08x\n",
@@ -1937,7 +2439,7 @@ stop_activity (struct net2280 *dev, struct usb_gadget_driver *driver)
 	 * and kill any outstanding requests.
 	 */
 	usb_reset (dev);
-	for (i = 0; i < 7; i++)
+	for (i = 0; i < dev->n_ep; i++)
 		nuke (&dev->ep [i]);
 
 	/* report disconnect; the driver is already quiesced */
@@ -1967,7 +2469,8 @@ static int net2280_stop(struct usb_gadget *_gadget,
 	net2280_led_active (dev, 0);
 
 	/* Disable full-speed test mode */
-	writel(0, &dev->usb->xcvrdiag);
+	if (dev->pdev->vendor == 0x17cc)
+		writel(0, &dev->usb->xcvrdiag);
 
 	device_remove_file (&dev->pdev->dev, &dev_attr_function);
 	device_remove_file (&dev->pdev->dev, &dev_attr_queues);
@@ -2219,6 +2722,350 @@ get_ep_by_addr (struct net2280 *dev, u16 wIndex)
 	return NULL;
 }
 
+static void defect7374_workaround(struct net2280 *dev, struct usb_ctrlrequest r)
+{
+	u32 scratch, fsmvalue;
+	u32 ack_wait_timeout, state;
+
+	/* Workaround for Defect 7374 (U1/U2 erroneously rejected): */
+	scratch = get_idx_reg(dev->regs, SCRATCH);
+	fsmvalue = scratch & (0xf << DEFECT7374_FSM_FIELD);
+	scratch &= ~(0xf << DEFECT7374_FSM_FIELD);
+
+	if (!((fsmvalue == DEFECT7374_FSM_WAITING_FOR_CONTROL_READ) &&
+				(r.bRequestType & USB_DIR_IN)))
+		return;
+
+	/* This is the first Control Read for this connection: */
+	if (!(readl(&dev->usb->usbstat) & (1 << SUPER_SPEED_MODE))) {
+		/*
+		 * Connection is NOT SS:
+		 * - Connection must be FS or HS.
+		 * - This FSM state should allow workaround software to
+		 * run after the next USB connection.
+		 */
+		scratch |= DEFECT7374_FSM_NON_SS_CONTROL_READ;
+		goto restore_data_eps;
+	}
+
+	/* Connection is SS: */
+	for (ack_wait_timeout = 0;
+			ack_wait_timeout < DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS;
+			ack_wait_timeout++) {
+
+		state =	readl(&dev->plregs->pl_ep_status_1)
+			& (0xff << STATE);
+		if ((state >= (ACK_GOOD_NORMAL << STATE)) &&
+			(state <= (ACK_GOOD_MORE_ACKS_TO_COME << STATE))) {
+			scratch |= DEFECT7374_FSM_SS_CONTROL_READ;
+			break;
+		}
+
+		/*
+		 * We have not yet received host's Data Phase ACK
+		 * - Wait and try again.
+		 */
+		udelay(DEFECT_7374_PROCESSOR_WAIT_TIME);
+
+		continue;
+	}
+
+
+	if (ack_wait_timeout >= DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS) {
+		ERROR(dev, "FAIL: Defect 7374 workaround waited but failed");
+		ERROR(dev, "to detect SS host's data phase ACK.");
+		ERROR(dev, "PL_EP_STATUS_1(23:16):.Expected from 0x11 to 0x16");
+		ERROR(dev, "got 0x%2.2x.\n", state >> STATE);
+	} else {
+		WARNING(dev, "INFO: Defect 7374 workaround waited about\n");
+		WARNING(dev, "%duSec for Control Read Data Phase ACK\n",
+			DEFECT_7374_PROCESSOR_WAIT_TIME * ack_wait_timeout);
+	}
+
+restore_data_eps:
+	/*
+	 * Restore data EPs to their pre-workaround settings (disabled,
+	 * initialized, and other details).
+	 */
+	defect7374_disable_data_eps(dev);
+
+	set_idx_reg(dev->regs, SCRATCH, scratch);
+
+	return;
+}
+
+static void ep_stall(struct net2280_ep *ep, int stall)
+{
+	struct net2280 *dev = ep->dev;
+	u32 val;
+	static const u32 ep_pl[9] = { 0, 3, 4, 7, 8, 2, 5, 6, 9 };
+
+	if (stall) {
+		writel((1 << SET_ENDPOINT_HALT) |
+		       /* (1 << SET_NAK_PACKETS) | */
+		       (1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE),
+		       &ep->regs->ep_rsp);
+		ep->is_halt = 1;
+	} else {
+		if (dev->gadget.speed == USB_SPEED_SUPER) {
+			/*
+			 * Workaround for SS SeqNum not cleared via
+			 * Endpoint Halt (Clear) bit. select endpoint
+			 */
+			val = readl(&dev->plregs->pl_ep_ctrl);
+			val = (val & ~0x1f) | ep_pl[ep->num];
+			writel(val, &dev->plregs->pl_ep_ctrl);
+
+			val |= (1 << SEQUENCE_NUMBER_RESET);
+			writel(val, &dev->plregs->pl_ep_ctrl);
+		}
+		val = readl(&ep->regs->ep_rsp);
+		val |= (1 << CLEAR_ENDPOINT_HALT) |
+			(1 << CLEAR_ENDPOINT_TOGGLE);
+		writel(val
+		       /* | (1 << CLEAR_NAK_PACKETS)*/
+		       , &ep->regs->ep_rsp);
+		ep->is_halt = 0;
+		val = readl(&ep->regs->ep_rsp);
+	}
+}
+
+static void ep_stdrsp(struct net2280_ep *ep, int value, int wedged)
+{
+	/* set/clear, then synch memory views with the device */
+	if (value) {
+		ep->stopped = 1;
+		if (ep->num == 0)
+			ep->dev->protocol_stall = 1;
+		else {
+			if (ep->dma)
+				ep_stop_dma(ep);
+			ep_stall(ep, true);
+		}
+
+		if (wedged)
+			ep->wedged = 1;
+	} else {
+		ep->stopped = 0;
+		ep->wedged = 0;
+
+		ep_stall(ep, false);
+
+		/* Flush the queue */
+		if (!list_empty(&ep->queue)) {
+			struct net2280_request *req =
+			    list_entry(ep->queue.next, struct net2280_request,
+				       queue);
+			if (ep->dma)
+				resume_dma(ep);
+			else {
+				if (ep->is_in)
+					write_fifo(ep, &req->req);
+				else {
+					if (read_fifo(ep, req))
+						done(ep, req, 0);
+				}
+			}
+		}
+	}
+}
+
+static void handle_stat0_irqs_superspeed(struct net2280 *dev,
+		struct net2280_ep *ep, struct usb_ctrlrequest r)
+{
+	int tmp = 0;
+
+#define	w_value		le16_to_cpu(r.wValue)
+#define	w_index		le16_to_cpu(r.wIndex)
+#define	w_length	le16_to_cpu(r.wLength)
+
+	switch (r.bRequest) {
+		struct net2280_ep *e;
+		u16 status;
+
+	case USB_REQ_SET_CONFIGURATION:
+		dev->addressed_state = !w_value;
+		goto usb3_delegate;
+
+	case USB_REQ_GET_STATUS:
+		switch (r.bRequestType) {
+		case (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
+			status = dev->wakeup_enable ? 0x02 : 0x00;
+			if (dev->selfpowered)
+				status |= 1 << 0;
+			status |= (dev->u1_enable << 2 | dev->u2_enable << 3 |
+							dev->ltm_enable << 4);
+			writel(0, &dev->epregs[0].ep_irqenb);
+			set_fifo_bytecount(ep, sizeof(status));
+			writel((__force u32) status, &dev->epregs[0].ep_data);
+			allow_status_338x(ep);
+			break;
+
+		case (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
+			e = get_ep_by_addr(dev, w_index);
+			if (!e)
+				goto do_stall3;
+			status = readl(&e->regs->ep_rsp) &
+						(1 << CLEAR_ENDPOINT_HALT);
+			writel(0, &dev->epregs[0].ep_irqenb);
+			set_fifo_bytecount(ep, sizeof(status));
+			writel((__force u32) status, &dev->epregs[0].ep_data);
+			allow_status_338x(ep);
+			break;
+
+		default:
+			goto usb3_delegate;
+		}
+		break;
+
+	case USB_REQ_CLEAR_FEATURE:
+		switch (r.bRequestType) {
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
+			if (!dev->addressed_state) {
+				switch (w_value) {
+				case USB_DEVICE_U1_ENABLE:
+					dev->u1_enable = 0;
+					writel(readl(&dev->usb_ext->usbctl2) &
+						~(1 << U1_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_U2_ENABLE:
+					dev->u2_enable = 0;
+					writel(readl(&dev->usb_ext->usbctl2) &
+						~(1 << U2_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_LTM_ENABLE:
+					dev->ltm_enable = 0;
+					writel(readl(&dev->usb_ext->usbctl2) &
+						~(1 << LTM_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				default:
+					break;
+				}
+			}
+			if (w_value == USB_DEVICE_REMOTE_WAKEUP) {
+				dev->wakeup_enable = 0;
+				writel(readl(&dev->usb->usbctl) &
+					~(1 << DEVICE_REMOTE_WAKEUP_ENABLE),
+					&dev->usb->usbctl);
+				allow_status_338x(ep);
+				break;
+			}
+			goto usb3_delegate;
+
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
+			e = get_ep_by_addr(dev,	w_index);
+			if (!e)
+				goto do_stall3;
+			if (w_value != USB_ENDPOINT_HALT)
+				goto do_stall3;
+			VDEBUG(dev, "%s clear halt\n", e->ep.name);
+			ep_stall(e, false);
+			if (!list_empty(&e->queue) && e->td_dma)
+				restart_dma(e);
+			allow_status(ep);
+			ep->stopped = 1;
+			break;
+
+		default:
+			goto usb3_delegate;
+		}
+		break;
+	case USB_REQ_SET_FEATURE:
+		switch (r.bRequestType) {
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
+			if (!dev->addressed_state) {
+				switch (w_value) {
+				case USB_DEVICE_U1_ENABLE:
+					dev->u1_enable = 1;
+					writel(readl(&dev->usb_ext->usbctl2) |
+						(1 << U1_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_U2_ENABLE:
+					dev->u2_enable = 1;
+					writel(readl(&dev->usb_ext->usbctl2) |
+						(1 << U2_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+
+				case USB_DEVICE_LTM_ENABLE:
+					dev->ltm_enable = 1;
+					writel(readl(&dev->usb_ext->usbctl2) |
+						(1 << LTM_ENABLE),
+						&dev->usb_ext->usbctl2);
+					allow_status_338x(ep);
+					goto next_endpoints3;
+				default:
+					break;
+				}
+			}
+
+			if (w_value == USB_DEVICE_REMOTE_WAKEUP) {
+				dev->wakeup_enable = 1;
+				writel(readl(&dev->usb->usbctl) |
+					(1 << DEVICE_REMOTE_WAKEUP_ENABLE),
+					&dev->usb->usbctl);
+				allow_status_338x(ep);
+				break;
+			}
+			goto usb3_delegate;
+
+		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
+			e = get_ep_by_addr(dev,	w_index);
+			if (!e || (w_value != USB_ENDPOINT_HALT))
+				goto do_stall3;
+			ep_stdrsp(e, true, false);
+			allow_status_338x(ep);
+			break;
+
+		default:
+			goto usb3_delegate;
+		}
+
+		break;
+	default:
+
+usb3_delegate:
+		VDEBUG(dev, "setup %02x.%02x v%04x i%04x l%04x ep_cfg %08x\n",
+				r.bRequestType, r.bRequest,
+				w_value, w_index, w_length,
+				readl(&ep->cfg->ep_cfg));
+
+		ep->responded = 0;
+		spin_unlock(&dev->lock);
+		tmp = dev->driver->setup(&dev->gadget, &r);
+		spin_lock(&dev->lock);
+	}
+do_stall3:
+	if (tmp < 0) {
+		VDEBUG(dev, "req %02x.%02x protocol STALL; stat %d\n",
+				r.bRequestType, r.bRequest, tmp);
+		dev->protocol_stall = 1;
+		/* TD 9.9 Halt Endpoint test. TD 9.22 Set feature test */
+		ep_stall(ep, true);
+	}
+
+next_endpoints3:
+
+#undef	w_value
+#undef	w_index
+#undef	w_length
+
+	return;
+}
+
 static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 {
 	struct net2280_ep	*ep;
@@ -2240,10 +3087,20 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		struct net2280_request		*req;
 
 		if (dev->gadget.speed == USB_SPEED_UNKNOWN) {
-			if (readl (&dev->usb->usbstat) & (1 << HIGH_SPEED))
+			u32 val = readl(&dev->usb->usbstat);
+			if (val & (1 << SUPER_SPEED)) {
+				dev->gadget.speed = USB_SPEED_SUPER;
+				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
+						EP0_SS_MAX_PACKET_SIZE);
+			} else if (val & (1 << HIGH_SPEED)) {
 				dev->gadget.speed = USB_SPEED_HIGH;
-			else
+				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
+						EP0_HS_MAX_PACKET_SIZE);
+			} else {
 				dev->gadget.speed = USB_SPEED_FULL;
+				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
+						EP0_HS_MAX_PACKET_SIZE);
+			}
 			net2280_led_speed (dev, dev->gadget.speed);
 			DEBUG(dev, "%s\n", usb_speed_string(dev->gadget.speed));
 		}
@@ -2261,32 +3118,38 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		}
 		ep->stopped = 0;
 		dev->protocol_stall = 0;
-
-		if (ep->dev->pdev->device == 0x2280)
-			tmp = (1 << FIFO_OVERFLOW)
-				| (1 << FIFO_UNDERFLOW);
-		else
-			tmp = 0;
-
-		writel (tmp | (1 << TIMEOUT)
-			| (1 << USB_STALL_SENT)
-			| (1 << USB_IN_NAK_SENT)
-			| (1 << USB_IN_ACK_RCVD)
-			| (1 << USB_OUT_PING_NAK_SENT)
-			| (1 << USB_OUT_ACK_SENT)
-			| (1 << SHORT_PACKET_OUT_DONE_INTERRUPT)
-			| (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT)
-			| (1 << DATA_PACKET_RECEIVED_INTERRUPT)
-			| (1 << DATA_PACKET_TRANSMITTED_INTERRUPT)
-			| (1 << DATA_OUT_PING_TOKEN_INTERRUPT)
-			| (1 << DATA_IN_TOKEN_INTERRUPT)
-			, &ep->regs->ep_stat);
-		u.raw [0] = readl (&dev->usb->setup0123);
-		u.raw [1] = readl (&dev->usb->setup4567);
+		if (dev->pdev->vendor == 0x10b5)
+			ep->is_halt = 0;
+		else{
+			if (ep->dev->pdev->device == 0x2280)
+				tmp = (1 << FIFO_OVERFLOW) |
+				    (1 << FIFO_UNDERFLOW);
+			else
+				tmp = 0;
+
+			writel(tmp | (1 << TIMEOUT) |
+				   (1 << USB_STALL_SENT) |
+				   (1 << USB_IN_NAK_SENT) |
+				   (1 << USB_IN_ACK_RCVD) |
+				   (1 << USB_OUT_PING_NAK_SENT) |
+				   (1 << USB_OUT_ACK_SENT) |
+				   (1 << SHORT_PACKET_OUT_DONE_INTERRUPT) |
+				   (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT) |
+				   (1 << DATA_PACKET_RECEIVED_INTERRUPT) |
+				   (1 << DATA_PACKET_TRANSMITTED_INTERRUPT) |
+				   (1 << DATA_OUT_PING_TOKEN_INTERRUPT) |
+				   (1 << DATA_IN_TOKEN_INTERRUPT)
+				   , &ep->regs->ep_stat);
+		}
+		u.raw[0] = readl(&dev->usb->setup0123);
+		u.raw[1] = readl(&dev->usb->setup4567);
 
 		cpu_to_le32s (&u.raw [0]);
 		cpu_to_le32s (&u.raw [1]);
 
+		if (dev->pdev->vendor == 0x10b5)
+			defect7374_workaround(dev, u.r);
+
 		tmp = 0;
 
 #define	w_value		le16_to_cpu(u.r.wValue)
@@ -2318,6 +3181,12 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		 * everything else goes uplevel to the gadget code.
 		 */
 		ep->responded = 1;
+
+		if (dev->gadget.speed == USB_SPEED_SUPER) {
+			handle_stat0_irqs_superspeed(dev, ep, u.r);
+			goto next_endpoints;
+		}
+
 		switch (u.r.bRequest) {
 		case USB_REQ_GET_STATUS: {
 			struct net2280_ep	*e;
@@ -2360,8 +3229,11 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 				VDEBUG(dev, "%s wedged, halt not cleared\n",
 						ep->ep.name);
 			} else {
-				VDEBUG(dev, "%s clear halt\n", ep->ep.name);
+				VDEBUG(dev, "%s clear halt\n", e->ep.name);
 				clear_halt(e);
+				if (ep->dev->pdev->vendor == 0x10b5 &&
+					!list_empty(&e->queue) && e->td_dma)
+						restart_dma(e);
 			}
 			allow_status (ep);
 			goto next_endpoints;
@@ -2381,6 +3253,8 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 			if (e->ep.name == ep0name)
 				goto do_stall;
 			set_halt (e);
+			if (dev->pdev->vendor == 0x10b5 && e->dma)
+				abort_dma(e);
 			allow_status (ep);
 			VDEBUG (dev, "%s set halt\n", ep->ep.name);
 			goto next_endpoints;
@@ -2392,7 +3266,7 @@ delegate:
 				"ep_cfg %08x\n",
 				u.r.bRequestType, u.r.bRequest,
 				w_value, w_index, w_length,
-				readl (&ep->regs->ep_cfg));
+				readl(&ep->cfg->ep_cfg));
 			ep->responded = 0;
 			spin_unlock (&dev->lock);
 			tmp = dev->driver->setup (&dev->gadget, &u.r);
@@ -2455,7 +3329,7 @@ static void handle_stat1_irqs (struct net2280 *dev, u32 stat)
 
 	/* after disconnect there's nothing else to do! */
 	tmp = (1 << VBUS_INTERRUPT) | (1 << ROOT_PORT_RESET_INTERRUPT);
-	mask = (1 << HIGH_SPEED) | (1 << FULL_SPEED);
+	mask = (1 << SUPER_SPEED) | (1 << HIGH_SPEED) | (1 << FULL_SPEED);
 
 	/* VBUS disconnect is indicated by VBUS_PIN and VBUS_INTERRUPT set.
 	 * Root Port Reset is indicated by ROOT_PORT_RESET_INTERRUPT set and
@@ -2546,12 +3420,19 @@ static void handle_stat1_irqs (struct net2280 *dev, u32 stat)
 		tmp = readl (&dma->dmastat);
 		writel (tmp, &dma->dmastat);
 
+		/* dma sync*/
+		if (dev->pdev->vendor == 0x10b5) {
+			u32 r_dmacount = readl(&dma->dmacount);
+			if (!ep->is_in &&  (r_dmacount & 0x00FFFFFF) &&
+			    (tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT)))
+				continue;
+		}
+
 		/* chaining should stop on abort, short OUT from fifo,
 		 * or (stat0 codepath) short OUT transfer.
 		 */
 		if (!use_dma_chaining) {
-			if ((tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT))
-					== 0) {
+			if (!(tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT))) {
 				DEBUG (ep->dev, "%s no xact done? %08x\n",
 					ep->ep.name, tmp);
 				continue;
@@ -2625,7 +3506,8 @@ static irqreturn_t net2280_irq (int irq, void *_dev)
 	struct net2280		*dev = _dev;
 
 	/* shared interrupt, not ours */
-	if (!(readl(&dev->regs->irqstat0) & (1 << INTA_ASSERTED)))
+	if (dev->pdev->vendor == 0x17cc &&
+		(!(readl(&dev->regs->irqstat0) & (1 << INTA_ASSERTED))))
 		return IRQ_NONE;
 
 	spin_lock (&dev->lock);
@@ -2636,6 +3518,13 @@ static irqreturn_t net2280_irq (int irq, void *_dev)
 	/* control requests and PIO */
 	handle_stat0_irqs (dev, readl (&dev->regs->irqstat0));
 
+	if (dev->pdev->vendor == 0x10b5) {
+		/* re-enable interrupt to trigger any possible new interrupt */
+		u32 pciirqenb1 = readl(&dev->regs->pciirqenb1);
+		writel(pciirqenb1 & 0x7FFFFFFF, &dev->regs->pciirqenb1);
+		writel(pciirqenb1, &dev->regs->pciirqenb1);
+	}
+
 	spin_unlock (&dev->lock);
 
 	return IRQ_HANDLED;
@@ -2674,6 +3563,8 @@ static void net2280_remove (struct pci_dev *pdev)
 	}
 	if (dev->got_irq)
 		free_irq (pdev->irq, dev);
+	if (use_msi && dev->pdev->vendor == 0x10b5)
+		pci_disable_msi(pdev);
 	if (dev->regs)
 		iounmap (dev->regs);
 	if (dev->region)
@@ -2708,7 +3599,8 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init (&dev->lock);
 	dev->pdev = pdev;
 	dev->gadget.ops = &net2280_ops;
-	dev->gadget.max_speed = USB_SPEED_HIGH;
+	dev->gadget.max_speed = (dev->pdev->vendor == 0x10b5) ?
+				USB_SPEED_SUPER : USB_SPEED_HIGH;
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev->gadget.name = driver_name;
@@ -2750,8 +3642,39 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->dep = (struct net2280_dep_regs __iomem *) (base + 0x0200);
 	dev->epregs = (struct net2280_ep_regs __iomem *) (base + 0x0300);
 
-	/* put into initial config, link up all endpoints */
-	writel (0, &dev->usb->usbctl);
+	if (dev->pdev->vendor == 0x10b5) {
+		u32 fsmvalue;
+		u32 usbstat;
+		dev->usb_ext = (struct usb338x_usb_ext_regs __iomem *)
+							(base + 0x00b4);
+		dev->fiforegs = (struct usb338x_fifo_regs __iomem *)
+							(base + 0x0500);
+		dev->llregs = (struct usb338x_ll_regs __iomem *)
+							(base + 0x0700);
+		dev->ll_lfps_regs = (struct usb338x_ll_lfps_regs __iomem *)
+							(base + 0x0748);
+		dev->ll_tsn_regs = (struct usb338x_ll_tsn_regs __iomem *)
+							(base + 0x077c);
+		dev->ll_chicken_reg = (struct usb338x_ll_chi_regs __iomem *)
+							(base + 0x079c);
+		dev->plregs = (struct usb338x_pl_regs __iomem *)
+							(base + 0x0800);
+		usbstat = readl(&dev->usb->usbstat);
+		dev->enhanced_mode = (usbstat & (1 << 11)) ? 1 : 0;
+		dev->n_ep = (dev->enhanced_mode) ? 9 : 5;
+		/* put into initial config, link up all endpoints */
+		fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
+					(0xf << DEFECT7374_FSM_FIELD);
+		/* See if firmware needs to set up for workaround: */
+		if (fsmvalue == DEFECT7374_FSM_SS_CONTROL_READ)
+			writel(0, &dev->usb->usbctl);
+	} else{
+		dev->enhanced_mode = 0;
+		dev->n_ep = 7;
+		/* put into initial config, link up all endpoints */
+		writel(0, &dev->usb->usbctl);
+	}
+
 	usb_reset (dev);
 	usb_reinit (dev);
 
@@ -2762,6 +3685,10 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 		goto done;
 	}
 
+	if (use_msi && dev->pdev->vendor == 0x10b5)
+		if (pci_enable_msi(pdev))
+			ERROR(dev, "Failed to enable MSI mode\n");
+
 	if (request_irq (pdev->irq, net2280_irq, IRQF_SHARED, driver_name, dev)
 			!= 0) {
 		ERROR (dev, "request interrupt %d failed\n", pdev->irq);
@@ -2797,7 +3724,8 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* enable lower-overhead pci memory bursts during DMA */
-	writel ( (1 << DMA_MEMORY_WRITE_AND_INVALIDATE_ENABLE)
+	if (dev->pdev->vendor == 0x17cc)
+		writel((1 << DMA_MEMORY_WRITE_AND_INVALIDATE_ENABLE)
 			// 256 write retries may not be enough...
 			// | (1 << PCI_RETRY_ABORT_ENABLE)
 			| (1 << DMA_READ_MULTIPLE_ENABLE)
@@ -2814,10 +3742,10 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	INFO (dev, "%s\n", driver_desc);
 	INFO (dev, "irq %d, pci mem %p, chip rev %04x\n",
 			pdev->irq, base, dev->chiprev);
-	INFO (dev, "version: " DRIVER_VERSION "; dma %s\n",
-			use_dma
-				? (use_dma_chaining ? "chaining" : "enabled")
-				: "disabled");
+	INFO(dev, "version: " DRIVER_VERSION "; dma %s %s\n",
+		use_dma	? (use_dma_chaining ? "chaining" : "enabled")
+			: "disabled",
+		dev->enhanced_mode ? "enhanced mode" : "legacy mode");
 	retval = device_create_file (&pdev->dev, &dev_attr_registers);
 	if (retval) goto done;
 
@@ -2849,7 +3777,8 @@ static void net2280_shutdown (struct pci_dev *pdev)
 	writel (0, &dev->usb->usbctl);
 
 	/* Disable full-speed test mode */
-	writel(0, &dev->usb->xcvrdiag);
+	if (dev->pdev->vendor == 0x17cc)
+		writel(0, &dev->usb->xcvrdiag);
 }
 
 
@@ -2869,8 +3798,24 @@ static const struct pci_device_id pci_ids [] = { {
 	.device =	0x2282,
 	.subvendor =	PCI_ANY_ID,
 	.subdevice =	PCI_ANY_ID,
-
-}, { /* end: all zeroes */ }
+},
+	{
+	 .class = ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	 .class_mask = ~0,
+	 .vendor = 0x10b5,
+	 .device = 0x3380,
+	 .subvendor = PCI_ANY_ID,
+	 .subdevice = PCI_ANY_ID,
+	 },
+	{
+	 .class = ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
+	 .class_mask = ~0,
+	 .vendor = 0x10b5,
+	 .device = 0x3382,
+	 .subvendor = PCI_ANY_ID,
+	 .subdevice = PCI_ANY_ID,
+	 },
+{ /* end: all zeroes */ }
 };
 MODULE_DEVICE_TABLE (pci, pci_ids);
 
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index a844be0d683a..f32c2746b6ae 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -6,6 +6,7 @@
 /*
  * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com)
  * Copyright (C) 2003 David Brownell
+ * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -14,6 +15,7 @@
  */
 
 #include <linux/usb/net2280.h>
+#include <linux/usb/usb338x.h>
 
 /*-------------------------------------------------------------------------*/
 
@@ -59,6 +61,13 @@ set_idx_reg (struct net2280_regs __iomem *regs, u32 index, u32 value)
 #define	CHIPREV_1	0x0100
 #define	CHIPREV_1A	0x0110
 
+/* DEFECT 7374 */
+#define DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS         200
+#define DEFECT_7374_PROCESSOR_WAIT_TIME             10
+
+/* ep0 max packet size */
+#define EP0_SS_MAX_PACKET_SIZE  0x200
+#define EP0_HS_MAX_PACKET_SIZE  0x40
 #ifdef	__KERNEL__
 
 /* ep a-f highspeed and fullspeed maxpacket, addresses
@@ -85,12 +94,15 @@ struct net2280_dma {
 
 struct net2280_ep {
 	struct usb_ep				ep;
+	struct net2280_ep_regs __iomem *cfg;
 	struct net2280_ep_regs			__iomem *regs;
 	struct net2280_dma_regs			__iomem *dma;
 	struct net2280_dma			*dummy;
+	struct usb338x_fifo_regs __iomem *fiforegs;
 	dma_addr_t				td_dma;	/* of dummy */
 	struct net2280				*dev;
 	unsigned long				irqs;
+	unsigned is_halt:1, dma_started:1;
 
 	/* analogous to a host-side qh */
 	struct list_head			queue;
@@ -116,10 +128,19 @@ static inline void allow_status (struct net2280_ep *ep)
 	ep->stopped = 1;
 }
 
-/* count (<= 4) bytes in the next fifo write will be valid */
-static inline void set_fifo_bytecount (struct net2280_ep *ep, unsigned count)
+static void allow_status_338x(struct net2280_ep *ep)
 {
-	writeb (count, 2 + (u8 __iomem *) &ep->regs->ep_cfg);
+	/*
+	 * Control Status Phase Handshake was set by the chip when the setup
+	 * packet arrived. While set, the chip automatically NAKs the host's
+	 * Status Phase tokens.
+	 */
+	writel(1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE, &ep->regs->ep_rsp);
+
+	ep->stopped = 1;
+
+	/* TD 9.9 Halt Endpoint test.  TD 9.22 set feature test. */
+	ep->responded = 0;
 }
 
 struct net2280_request {
@@ -135,23 +156,38 @@ struct net2280 {
 	/* each pci device provides one gadget, several endpoints */
 	struct usb_gadget		gadget;
 	spinlock_t			lock;
-	struct net2280_ep		ep [7];
+	struct net2280_ep		ep[9];
 	struct usb_gadget_driver 	*driver;
 	unsigned			enabled : 1,
 					protocol_stall : 1,
 					softconnect : 1,
 					got_irq : 1,
-					region : 1;
+					region:1,
+					u1_enable:1,
+					u2_enable:1,
+					ltm_enable:1,
+					wakeup_enable:1,
+					selfpowered:1,
+					addressed_state:1;
 	u16				chiprev;
+	int enhanced_mode;
+	int n_ep;
 
 	/* pci state used to access those endpoints */
 	struct pci_dev			*pdev;
 	struct net2280_regs		__iomem *regs;
 	struct net2280_usb_regs		__iomem *usb;
+	struct usb338x_usb_ext_regs	__iomem *usb_ext;
 	struct net2280_pci_regs		__iomem *pci;
 	struct net2280_dma_regs		__iomem *dma;
 	struct net2280_dep_regs		__iomem *dep;
 	struct net2280_ep_regs		__iomem *epregs;
+	struct usb338x_fifo_regs	__iomem *fiforegs;
+	struct usb338x_ll_regs		__iomem *llregs;
+	struct usb338x_ll_lfps_regs	__iomem *ll_lfps_regs;
+	struct usb338x_ll_tsn_regs	__iomem *ll_tsn_regs;
+	struct usb338x_ll_chi_regs	__iomem *ll_chicken_reg;
+	struct usb338x_pl_regs		__iomem *plregs;
 
 	struct pci_pool			*requests;
 	// statistics...
@@ -179,6 +215,43 @@ static inline void clear_halt (struct net2280_ep *ep)
 		, &ep->regs->ep_rsp);
 }
 
+/*
+ * FSM value for Defect 7374 (U1U2 Test) is managed in
+ * chip's SCRATCH register:
+ */
+#define DEFECT7374_FSM_FIELD    28
+
+/* Waiting for Control Read:
+ *  - A transition to this state indicates a fresh USB connection,
+ *    before the first Setup Packet. The connection speed is not
+ *    known. Firmware is waiting for the first Control Read.
+ *  - Starting state: This state can be thought of as the FSM's typical
+ *    starting state.
+ *  - Tip: Upon the first SS Control Read the FSM never
+ *    returns to this state.
+ */
+#define DEFECT7374_FSM_WAITING_FOR_CONTROL_READ (1 << DEFECT7374_FSM_FIELD)
+
+/* Non-SS Control Read:
+ *  - A transition to this state indicates detection of the first HS
+ *    or FS Control Read.
+ *  - Tip: Upon the first SS Control Read the FSM never
+ *    returns to this state.
+ */
+#define	DEFECT7374_FSM_NON_SS_CONTROL_READ (2 << DEFECT7374_FSM_FIELD)
+
+/* SS Control Read:
+ *  - A transition to this state indicates detection of the
+ *    first SS Control Read.
+ *  - This state indicates workaround completion. Workarounds no longer
+ *    need to be applied (as long as the chip remains powered up).
+ *  - Tip: Once in this state the FSM state does not change (until
+ *    the chip's power is lost and restored).
+ *  - This can be thought of as the final state of the FSM;
+ *    the FSM 'locks-up' in this state until the chip loses power.
+ */
+#define DEFECT7374_FSM_SS_CONTROL_READ (3 << DEFECT7374_FSM_FIELD)
+
 #ifdef USE_RDK_LEDS
 
 static inline void net2280_led_init (struct net2280 *dev)
@@ -198,6 +271,9 @@ void net2280_led_speed (struct net2280 *dev, enum usb_device_speed speed)
 {
 	u32	val = readl (&dev->regs->gpioctl);
 	switch (speed) {
+	case USB_SPEED_SUPER:		/* green + red */
+		val |= (1 << GPIO0_DATA) | (1 << GPIO1_DATA);
+		break;
 	case USB_SPEED_HIGH:		/* green */
 		val &= ~(1 << GPIO0_DATA);
 		val |= (1 << GPIO1_DATA);
@@ -271,6 +347,17 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
 
 /*-------------------------------------------------------------------------*/
 
+static inline void set_fifo_bytecount(struct net2280_ep *ep, unsigned count)
+{
+	if (ep->dev->pdev->vendor == 0x17cc)
+		writeb(count, 2 + (u8 __iomem *) &ep->regs->ep_cfg);
+	else{
+		u32 tmp = readl(&ep->cfg->ep_cfg) &
+					(~(0x07 << EP_FIFO_BYTE_COUNT));
+		writel(tmp | (count << EP_FIFO_BYTE_COUNT), &ep->cfg->ep_cfg);
+	}
+}
+
 static inline void start_out_naking (struct net2280_ep *ep)
 {
 	/* NOTE:  hardware races lurk here, and PING protocol issues */
diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h
new file mode 100644
index 000000000000..f92eb635b9d3
--- /dev/null
+++ b/include/linux/usb/usb338x.h
@@ -0,0 +1,199 @@
+/*
+ * USB 338x super/high/full speed USB device controller.
+ * Unlike many such controllers, this one talks PCI.
+ *
+ * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com)
+ * Copyright (C) 2003 David Brownell
+ * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_USB_USB338X_H
+#define __LINUX_USB_USB338X_H
+
+#include <linux/usb/net2280.h>
+
+/*
+ * Extra defined bits for net2280 registers
+ */
+#define     SCRATCH			      0x0b
+
+#define     DEFECT7374_FSM_FIELD                28
+#define     SUPER_SPEED				 8
+#define     DMA_REQUEST_OUTSTANDING              5
+#define     DMA_PAUSE_DONE_INTERRUPT            26
+#define     SET_ISOCHRONOUS_DELAY               24
+#define     SET_SEL                             22
+#define     SUPER_SPEED_MODE                     8
+
+/*ep_cfg*/
+#define     MAX_BURST_SIZE                      24
+#define     EP_FIFO_BYTE_COUNT                  16
+#define     IN_ENDPOINT_ENABLE                  14
+#define     IN_ENDPOINT_TYPE                    12
+#define     OUT_ENDPOINT_ENABLE                 10
+#define     OUT_ENDPOINT_TYPE                    8
+
+struct usb338x_usb_ext_regs {
+	u32     usbclass;
+#define     DEVICE_PROTOCOL                     16
+#define     DEVICE_SUB_CLASS                     8
+#define     DEVICE_CLASS                         0
+	u32     ss_sel;
+#define     U2_SYSTEM_EXIT_LATENCY               8
+#define     U1_SYSTEM_EXIT_LATENCY               0
+	u32     ss_del;
+#define     U2_DEVICE_EXIT_LATENCY               8
+#define     U1_DEVICE_EXIT_LATENCY               0
+	u32     usb2lpm;
+#define     USB_L1_LPM_HIRD                      2
+#define     USB_L1_LPM_REMOTE_WAKE               1
+#define     USB_L1_LPM_SUPPORT                   0
+	u32     usb3belt;
+#define     BELT_MULTIPLIER                     10
+#define     BEST_EFFORT_LATENCY_TOLERANCE        0
+	u32     usbctl2;
+#define     LTM_ENABLE                           7
+#define     U2_ENABLE                            6
+#define     U1_ENABLE                            5
+#define     FUNCTION_SUSPEND                     4
+#define     USB3_CORE_ENABLE                     3
+#define     USB2_CORE_ENABLE                     2
+#define     SERIAL_NUMBER_STRING_ENABLE          0
+	u32     in_timeout;
+#define     GPEP3_TIMEOUT                       19
+#define     GPEP2_TIMEOUT                       18
+#define     GPEP1_TIMEOUT                       17
+#define     GPEP0_TIMEOUT                       16
+#define     GPEP3_TIMEOUT_VALUE                 13
+#define     GPEP3_TIMEOUT_ENABLE                12
+#define     GPEP2_TIMEOUT_VALUE                  9
+#define     GPEP2_TIMEOUT_ENABLE                 8
+#define     GPEP1_TIMEOUT_VALUE                  5
+#define     GPEP1_TIMEOUT_ENABLE                 4
+#define     GPEP0_TIMEOUT_VALUE                  1
+#define     GPEP0_TIMEOUT_ENABLE                 0
+	u32     isodelay;
+#define     ISOCHRONOUS_DELAY                    0
+} __packed;
+
+struct usb338x_fifo_regs {
+	/* offset 0x0500, 0x0520, 0x0540, 0x0560, 0x0580 */
+	u32     ep_fifo_size_base;
+#define     IN_FIFO_BASE_ADDRESS                                22
+#define     IN_FIFO_SIZE                                        16
+#define     OUT_FIFO_BASE_ADDRESS                               6
+#define     OUT_FIFO_SIZE                                       0
+	u32     ep_fifo_out_wrptr;
+	u32     ep_fifo_out_rdptr;
+	u32     ep_fifo_in_wrptr;
+	u32     ep_fifo_in_rdptr;
+	u32     unused[3];
+} __packed;
+
+
+/* Link layer */
+struct usb338x_ll_regs {
+	/* offset 0x700 */
+	u32   ll_ltssm_ctrl1;
+	u32   ll_ltssm_ctrl2;
+	u32   ll_ltssm_ctrl3;
+	u32   unused[2];
+	u32   ll_general_ctrl0;
+	u32   ll_general_ctrl1;
+#define     PM_U3_AUTO_EXIT                                     29
+#define     PM_U2_AUTO_EXIT                                     28
+#define     PM_U1_AUTO_EXIT                                     27
+#define     PM_FORCE_U2_ENTRY                                   26
+#define     PM_FORCE_U1_ENTRY                                   25
+#define     PM_LGO_COLLISION_SEND_LAU                           24
+#define     PM_DIR_LINK_REJECT                                  23
+#define     PM_FORCE_LINK_ACCEPT                                22
+#define     PM_DIR_ENTRY_U3                                     20
+#define     PM_DIR_ENTRY_U2                                     19
+#define     PM_DIR_ENTRY_U1                                     18
+#define     PM_U2_ENABLE                                        17
+#define     PM_U1_ENABLE                                        16
+#define     SKP_THRESHOLD_ADJUST_FMW                            8
+#define     RESEND_DPP_ON_LRTY_FMW                              7
+#define     DL_BIT_VALUE_FMW                                    6
+#define     FORCE_DL_BIT                                        5
+	u32   ll_general_ctrl2;
+#define     SELECT_INVERT_LANE_POLARITY                         7
+#define     FORCE_INVERT_LANE_POLARITY                          6
+	u32   ll_general_ctrl3;
+	u32   ll_general_ctrl4;
+	u32   ll_error_gen;
+} __packed;
+
+struct usb338x_ll_lfps_regs {
+	/* offset 0x748 */
+	u32   ll_lfps_5;
+#define     TIMER_LFPS_6US                                      16
+	u32   ll_lfps_6;
+#define     TIMER_LFPS_80US                                     0
+} __packed;
+
+struct usb338x_ll_tsn_regs {
+	/* offset 0x77C */
+	u32   ll_tsn_counters_2;
+#define     HOT_TX_NORESET_TS2                                  24
+	u32   ll_tsn_counters_3;
+#define     HOT_RX_RESET_TS2                                    0
+} __packed;
+
+struct usb338x_ll_chi_regs {
+	/* offset 0x79C */
+	u32   ll_tsn_chicken_bit;
+#define     RECOVERY_IDLE_TO_RECOVER_FMW                        3
+} __packed;
+
+/* protocol layer */
+struct usb338x_pl_regs {
+	/* offset 0x800 */
+	u32   pl_reg_1;
+	u32   pl_reg_2;
+	u32   pl_reg_3;
+	u32   pl_reg_4;
+	u32   pl_ep_ctrl;
+	/* Protocol Layer Endpoint Control*/
+#define     PL_EP_CTRL                                  0x810
+#define     ENDPOINT_SELECT                             0
+	/* [4:0] */
+#define     EP_INITIALIZED                              16
+#define     SEQUENCE_NUMBER_RESET                       17
+#define     CLEAR_ACK_ERROR_CODE                        20
+	u32   pl_reg_6;
+	u32   pl_reg_7;
+	u32   pl_reg_8;
+	u32   pl_ep_status_1;
+	/* Protocol Layer Endpoint Status 1*/
+#define     PL_EP_STATUS_1                              0x820
+#define     STATE                                       16
+#define     ACK_GOOD_NORMAL                             0x11
+#define     ACK_GOOD_MORE_ACKS_TO_COME                  0x16
+	u32   pl_ep_status_2;
+	u32   pl_ep_status_3;
+	/* Protocol Layer Endpoint Status 3*/
+#define     PL_EP_STATUS_3                              0x828
+#define     SEQUENCE_NUMBER                             0
+	u32   pl_ep_status_4;
+	/* Protocol Layer Endpoint Status 4*/
+#define     PL_EP_STATUS_4                              0x82c
+	u32   pl_ep_cfg_4;
+	/* Protocol Layer Endpoint Configuration 4*/
+#define     PL_EP_CFG_4                                 0x830
+#define     NON_CTRL_IN_TOLERATE_BAD_DIR                6
+} __packed;
+
+#endif /* __LINUX_USB_USB338X_H */
-- 
cgit 


From d97ffe236894856d08146390ef3fbe6448a8ac2b Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 21 May 2014 15:23:30 +1000
Subject: PCI: Fix return value from pci_user_{read,write}_config_*()

The PCI user-space config accessors pci_user_{read,write}_config_*() return
negative error numbers, which were introduced by commit 34e3207205ef
("PCI: handle positive error codes").  That patch converted all positive
error numbers from platform-specific PCI config accessors to -EINVAL, which
means the callers don't know anything about the specific cause of the
failure.

The patch fixes the issue by converting the positive PCIBIOS_* error values
to generic negative error numbers with pcibios_err_to_errno().

[bhelgaas: changelog]
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Thelen <gthelen@google.com>
---
 drivers/pci/access.c | 12 ++++--------
 include/linux/pci.h  |  4 ++--
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 7f8b78c08879..8c148f39e8d7 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -148,7 +148,7 @@ static noinline void pci_wait_cfg(struct pci_dev *dev)
 int pci_user_read_config_##size						\
 	(struct pci_dev *dev, int pos, type *val)			\
 {									\
-	int ret = 0;							\
+	int ret = PCIBIOS_SUCCESSFUL;					\
 	u32 data = -1;							\
 	if (PCI_##size##_BAD)						\
 		return -EINVAL;						\
@@ -159,9 +159,7 @@ int pci_user_read_config_##size						\
 					pos, sizeof(type), &data);	\
 	raw_spin_unlock_irq(&pci_lock);				\
 	*val = (type)data;						\
-	if (ret > 0)							\
-		ret = -EINVAL;						\
-	return ret;							\
+	return pcibios_err_to_errno(ret);				\
 }									\
 EXPORT_SYMBOL_GPL(pci_user_read_config_##size);
 
@@ -170,7 +168,7 @@ EXPORT_SYMBOL_GPL(pci_user_read_config_##size);
 int pci_user_write_config_##size					\
 	(struct pci_dev *dev, int pos, type val)			\
 {									\
-	int ret = -EIO;							\
+	int ret = PCIBIOS_SUCCESSFUL;					\
 	if (PCI_##size##_BAD)						\
 		return -EINVAL;						\
 	raw_spin_lock_irq(&pci_lock);				\
@@ -179,9 +177,7 @@ int pci_user_write_config_##size					\
 	ret = dev->bus->ops->write(dev->bus, dev->devfn,		\
 					pos, sizeof(type), val);	\
 	raw_spin_unlock_irq(&pci_lock);				\
-	if (ret > 0)							\
-		ret = -EINVAL;						\
-	return ret;							\
+	return pcibios_err_to_errno(ret);				\
 }									\
 EXPORT_SYMBOL_GPL(pci_user_write_config_##size);
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 018877b8b4e8..322335aaa7e1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -518,7 +518,7 @@ static inline int pcibios_err_to_errno(int err)
 	case PCIBIOS_FUNC_NOT_SUPPORTED:
 		return -ENOENT;
 	case PCIBIOS_BAD_VENDOR_ID:
-		return -EINVAL;
+		return -ENOTTY;
 	case PCIBIOS_DEVICE_NOT_FOUND:
 		return -ENODEV;
 	case PCIBIOS_BAD_REGISTER_NUMBER:
@@ -529,7 +529,7 @@ static inline int pcibios_err_to_errno(int err)
 		return -ENOSPC;
 	}
 
-	return -ENOTTY;
+	return -ERANGE;
 }
 
 /* Low-level architecture-dependent routines */
-- 
cgit 


From 6fecd4f2a58c60028b1a75deefcf111516d3f836 Mon Sep 17 00:00:00 2001
From: Todd E Brandt <todd.e.brandt@linux.intel.com>
Date: Mon, 19 May 2014 10:55:32 -0700
Subject: USB: separate usb_address0 mutexes for each bus

This patch creates a separate instance of the usb_address0 mutex for each USB
bus, and attaches it to the usb_bus device struct. This allows devices on
separate buses to be enumerated in parallel; saving time.

In the current code, there is a single, global instance of the usb_address0
mutex which is used for all devices on all buses. This isn't completely
necessary, as this mutex is only needed to prevent address0 collisions for
devices on the *same* bus (usb 2.0 spec, sec 4.6.1). This superfluous coverage
can cause additional delay in system resume on systems with multiple hosts
(up to several seconds depending on what devices are attached).

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 1 +
 drivers/usb/core/hub.c | 6 ++----
 include/linux/usb.h    | 2 ++
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index adddc66c9e8d..174eb857a6b4 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -918,6 +918,7 @@ static void usb_bus_init (struct usb_bus *bus)
 	bus->bandwidth_allocated = 0;
 	bus->bandwidth_int_reqs  = 0;
 	bus->bandwidth_isoc_reqs = 0;
+	mutex_init(&bus->usb_address0_mutex);
 
 	INIT_LIST_HEAD (&bus->bus_list);
 }
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 090469ebfcff..726fa072c3fe 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4016,8 +4016,6 @@ static int
 hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 		int retry_counter)
 {
-	static DEFINE_MUTEX(usb_address0_mutex);
-
 	struct usb_device	*hdev = hub->hdev;
 	struct usb_hcd		*hcd = bus_to_hcd(hdev->bus);
 	int			i, j, retval;
@@ -4040,7 +4038,7 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	if (oldspeed == USB_SPEED_LOW)
 		delay = HUB_LONG_RESET_TIME;
 
-	mutex_lock(&usb_address0_mutex);
+	mutex_lock(&hdev->bus->usb_address0_mutex);
 
 	/* Reset the device; full speed may morph to high speed */
 	/* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */
@@ -4317,7 +4315,7 @@ fail:
 		hub_port_disable(hub, port1, 0);
 		update_devnum(udev, devnum);	/* for disconnect processing */
 	}
-	mutex_unlock(&usb_address0_mutex);
+	mutex_unlock(&hdev->bus->usb_address0_mutex);
 	return retval;
 }
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 6b7ec376fb4d..d2465bc0e73c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -352,6 +352,8 @@ struct usb_bus {
 	struct usb_bus *hs_companion;	/* Companion EHCI bus, if any */
 	struct list_head bus_list;	/* list of busses */
 
+	struct mutex usb_address0_mutex; /* unaddressed device mutex */
+
 	int bandwidth_allocated;	/* on this bus: how much of the time
 					 * reserved for periodic (intr/iso)
 					 * requests is used, on average?
-- 
cgit 


From 95f096849932fe5eaa7bfec887530cf556744a76 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 27 May 2014 17:46:48 -0600
Subject: blk-mq: allow non-softirq completions

Right now we export two ways of completing a request:

1) blk_mq_complete_request(). This uses an IPI (if needed) and
   completes through q->softirq_done_fn(). It also works with
   timeouts.

2) blk_mq_end_io(). This completes inline, and ignores any timeout
   state of the request.

Let blk_mq_complete_request() handle non-softirq_done_fn completions
as well, by just completing inline. If a driver has enough completion
ports to place completions correctly, it need not define a
mq_ops->complete() and we can avoid an indirect function call by
doing the completion inline.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 12 +++++++++---
 include/linux/blk-mq.h |  4 ++++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 30bad930e661..010b878d53b3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -434,10 +434,16 @@ void __blk_mq_complete_request(struct request *rq)
  **/
 void blk_mq_complete_request(struct request *rq)
 {
-	if (unlikely(blk_should_fake_timeout(rq->q)))
+	struct request_queue *q = rq->q;
+
+	if (unlikely(blk_should_fake_timeout(q)))
 		return;
-	if (!blk_mark_rq_complete(rq))
-		__blk_mq_complete_request(rq);
+	if (!blk_mark_rq_complete(rq)) {
+		if (q->softirq_done_fn)
+			__blk_mq_complete_request(rq);
+		else
+			blk_mq_end_io(rq, rq->errors);
+	}
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index afeb93496907..1dfeb1529a61 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -173,6 +173,10 @@ void __blk_mq_end_io(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
 
+/*
+ * Complete request through potential IPI for right placement. Driver must
+ * have defined a mq_ops->complete() hook for this.
+ */
 void blk_mq_complete_request(struct request *rq);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
-- 
cgit 


From 4ec65b77c64504e178d75aaba6ac96f68837416c Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@men.de>
Date: Thu, 24 Apr 2014 14:35:25 +0200
Subject: mcb: Add support for shared PCI IRQs

Add support for shared PCI IRQs to mcb and mcb-pci.

Signed-off-by: Johannes Thumshirn <johannes.thumshirn@men.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mcb/mcb-core.c | 20 +++++++++++++++-----
 drivers/mcb/mcb-pci.c  | 17 ++++++++++++++++-
 include/linux/mcb.h    |  6 +++++-
 3 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index bbe12932d404..9018ab83517a 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -183,14 +183,14 @@ EXPORT_SYMBOL_GPL(mcb_device_register);
  *
  * Allocate a new @mcb_bus.
  */
-struct mcb_bus *mcb_alloc_bus(void)
+struct mcb_bus *mcb_alloc_bus(struct device *carrier)
 {
 	struct mcb_bus *bus;
 	int bus_nr;
 
 	bus = kzalloc(sizeof(struct mcb_bus), GFP_KERNEL);
 	if (!bus)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	bus_nr = ida_simple_get(&mcb_ida, 0, 0, GFP_KERNEL);
 	if (bus_nr < 0) {
@@ -200,7 +200,7 @@ struct mcb_bus *mcb_alloc_bus(void)
 
 	INIT_LIST_HEAD(&bus->children);
 	bus->bus_nr = bus_nr;
-
+	bus->carrier = carrier;
 	return bus;
 }
 EXPORT_SYMBOL_GPL(mcb_alloc_bus);
@@ -378,6 +378,13 @@ void mcb_release_mem(struct resource *mem)
 }
 EXPORT_SYMBOL_GPL(mcb_release_mem);
 
+static int __mcb_get_irq(struct mcb_device *dev)
+{
+	struct resource *irq = &dev->irq;
+
+	return irq->start;
+}
+
 /**
  * mcb_get_irq() - Get device's IRQ number
  * @dev: The @mcb_device the IRQ is for
@@ -386,9 +393,12 @@ EXPORT_SYMBOL_GPL(mcb_release_mem);
  */
 int mcb_get_irq(struct mcb_device *dev)
 {
-	struct resource *irq = &dev->irq;
+	struct mcb_bus *bus = dev->bus;
 
-	return irq->start;
+	if (bus->get_irq)
+		return bus->get_irq(dev);
+
+	return __mcb_get_irq(dev);
 }
 EXPORT_SYMBOL_GPL(mcb_get_irq);
 
diff --git a/drivers/mcb/mcb-pci.c b/drivers/mcb/mcb-pci.c
index 99c742cbfb5b..b59181965643 100644
--- a/drivers/mcb/mcb-pci.c
+++ b/drivers/mcb/mcb-pci.c
@@ -20,6 +20,15 @@ struct priv {
 	void __iomem *base;
 };
 
+static int mcb_pci_get_irq(struct mcb_device *mdev)
+{
+	struct mcb_bus *mbus = mdev->bus;
+	struct device *dev = mbus->carrier;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return pdev->irq;
+}
+
 static int mcb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct priv *priv;
@@ -67,7 +76,13 @@ static int mcb_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	pci_set_drvdata(pdev, priv);
 
-	priv->bus = mcb_alloc_bus();
+	priv->bus = mcb_alloc_bus(&pdev->dev);
+	if (IS_ERR(priv->bus)) {
+		ret = PTR_ERR(priv->bus);
+		goto err_drvdata;
+	}
+
+	priv->bus->get_irq = mcb_pci_get_irq;
 
 	ret = chameleon_parse_cells(priv->bus, mapbase, priv->base);
 	if (ret < 0)
diff --git a/include/linux/mcb.h b/include/linux/mcb.h
index 2db284d14064..ed06e15a36aa 100644
--- a/include/linux/mcb.h
+++ b/include/linux/mcb.h
@@ -16,6 +16,7 @@
 #include <linux/irqreturn.h>
 
 struct mcb_driver;
+struct mcb_device;
 
 /**
  * struct mcb_bus - MEN Chameleon Bus
@@ -23,11 +24,14 @@ struct mcb_driver;
  * @dev: pointer to carrier device
  * @children: the child busses
  * @bus_nr: mcb bus number
+ * @get_irq: callback to get IRQ number
  */
 struct mcb_bus {
 	struct list_head children;
 	struct device dev;
+	struct device *carrier;
 	int bus_nr;
+	int (*get_irq)(struct mcb_device *dev);
 };
 #define to_mcb_bus(b) container_of((b), struct mcb_bus, dev)
 
@@ -105,7 +109,7 @@ extern void mcb_unregister_driver(struct mcb_driver *driver);
 	module_driver(__mcb_driver, mcb_register_driver, mcb_unregister_driver);
 extern void mcb_bus_add_devices(const struct mcb_bus *bus);
 extern int mcb_device_register(struct mcb_bus *bus, struct mcb_device *dev);
-extern struct mcb_bus *mcb_alloc_bus(void);
+extern struct mcb_bus *mcb_alloc_bus(struct device *carrier);
 extern struct mcb_bus *mcb_bus_get(struct mcb_bus *bus);
 extern void mcb_bus_put(struct mcb_bus *bus);
 extern struct mcb_device *mcb_alloc_dev(struct mcb_bus *bus);
-- 
cgit 


From f82dd4b093ead1161770de70515cb11602ac664c Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Fri, 16 May 2014 04:36:13 -0400
Subject: miscdevice.h: Simple syntax fix to make pointers consistent.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/miscdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 51e26f3cd3b3..ee80dd7d9f60 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -64,7 +64,7 @@ struct miscdevice  {
 	umode_t mode;
 };
 
-extern int misc_register(struct miscdevice * misc);
+extern int misc_register(struct miscdevice *misc);
 extern int misc_deregister(struct miscdevice *misc);
 
 #define MODULE_ALIAS_MISCDEV(minor)				\
-- 
cgit 


From 29fe5732bef2ba3c785fcb4aa5ba7160ad8faba5 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Thu, 6 Mar 2014 09:44:18 -0800
Subject: mtd: pfow: remove unused variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes warning:

In file included from drivers/mtd/lpddr/qinfo_probe.c:31:0:
include/linux/mtd/pfow.h: In function ‘send_pfow_command’:
include/linux/mtd/pfow.h:104:6: warning: variable ‘chipnum’ set but not used [-Wunused-but-set-variable]
  int chipnum;
      ^

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/pfow.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h
index b730d4f84655..42ff7ff09bf5 100644
--- a/include/linux/mtd/pfow.h
+++ b/include/linux/mtd/pfow.h
@@ -101,9 +101,6 @@ static inline void send_pfow_command(struct map_info *map,
 				unsigned long len, map_word *datum)
 {
 	int bits_per_chip = map_bankwidth(map) * 8;
-	int chipnum;
-	struct lpddr_private *lpddr = map->fldrv_priv;
-	chipnum = adr >> lpddr->chipshift;
 
 	map_write(map, CMD(cmd_code), map->pfow_base + PFOW_COMMAND_CODE);
 	map_write(map, CMD(adr & ((1<<bits_per_chip) - 1)),
-- 
cgit 


From e7cd1d1eb16fcdf53001b926187a82f1f3e1a7e6 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 20 May 2014 11:17:54 -0700
Subject: mfd: twl4030-power: Add generic reset configuration

The twl4030 PMIC needs to be configured properly for things like
warm reset and deeper idle states so the PMIC manages the regulators
properly based on the hardware triggers from the SoC. Earlier
we have configured twl4030 using platform data, but we want to
do it for device tree based booting also.

In some cases configuring twl4030 is needed for things to work.
For example, when rebooting an OMAP3530 at 125 MHz, it hangs.
With this patch, TWL4030 will be reset when a warm reset occures,
and OMAP3530 does not hang on reboot.

Let's add device tree support and configure things for warm reset
as the default when compatible = "ti,twl4030-power". More
complicated configurations can be added to the driver based on
other compatible flags.

Note we now also make the pdata const like it should be.
This allows use it for match->data with the device tree
related functions.

Based on earlier patch by Matthias Brugger <matthias.bgg@gmail.com>
and Lesly A M <leslyam@ti.com>.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/mfd/twl4030-power.txt      |   7 +-
 drivers/mfd/twl4030-power.c                        | 109 ++++++++++++++++++---
 include/linux/i2c/twl.h                            |   3 +
 3 files changed, 105 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/twl4030-power.txt b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
index 8e15ec35ac99..b90611650cd9 100644
--- a/Documentation/devicetree/bindings/mfd/twl4030-power.txt
+++ b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
@@ -5,7 +5,12 @@ to control the power resources, including power scripts. For now, the
 binding only supports the complete shutdown of the system after poweroff.
 
 Required properties:
-- compatible : must be "ti,twl4030-power"
+- compatible : must be one of the following
+	"ti,twl4030-power"
+	"ti,twl4030-power-reset"
+
+The use of ti,twl4030-power-reset is recommended at least on
+3530 that needs a special configuration for warm reset to work.
 
 Optional properties:
 - ti,use_poweroff: With this flag, the chip will initiates an ACTIVE-to-OFF or
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 0b037dca46a8..cb5b0cb8f933 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -29,6 +29,7 @@
 #include <linux/i2c/twl.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 
 #include <asm/mach-types.h>
 
@@ -128,6 +129,40 @@ static u8 res_config_addrs[] = {
 	[RES_MAIN_REF]	= 0x94,
 };
 
+/*
+ * Usable values for .remap_sleep and .remap_off
+ * Based on table "5.3.3 Resource Operating modes"
+ */
+enum {
+	TWL_REMAP_OFF = 0,
+	TWL_REMAP_SLEEP = 8,
+	TWL_REMAP_ACTIVE = 9,
+};
+
+/*
+ * Macros to configure the PM register states for various resources.
+ * Note that we can make MSG_SINGULAR etc private to this driver once
+ * omap3 has been made DT only.
+ */
+#define TWL_DFLT_DELAY		2	/* typically 2 32 KiHz cycles */
+#define TWL_RESOURCE_SET(res, state)					\
+	{ MSG_SINGULAR(DEV_GRP_NULL, (res), (state)), TWL_DFLT_DELAY }
+#define TWL_RESOURCE_ON(res)	TWL_RESOURCE_SET(res, RES_STATE_ACTIVE)
+#define TWL_RESOURCE_OFF(res)	TWL_RESOURCE_SET(res, RES_STATE_OFF)
+#define TWL_RESOURCE_RESET(res)	TWL_RESOURCE_SET(res, RES_STATE_WRST)
+/*
+ * It seems that type1 and type2 is just the resource init order
+ * number for the type1 and type2 group.
+ */
+#define TWL_RESOURCE_GROUP_RESET(group, type1, type2)			\
+	{ MSG_BROADCAST(DEV_GRP_NULL, (group), (type1), (type2),	\
+		RES_STATE_WRST), TWL_DFLT_DELAY }
+#define TWL_REMAP_SLEEP(res, devgrp, typ, typ2)				\
+	{ .resource = (res), .devgroup = (devgrp),			\
+	  .type = (typ), .type2 = (typ2),				\
+	  .remap_off = TWL_REMAP_OFF,					\
+	  .remap_sleep = TWL_REMAP_SLEEP, }
+
 static int twl4030_write_script_byte(u8 address, u8 byte)
 {
 	int err;
@@ -502,7 +537,8 @@ int twl4030_remove_script(u8 flags)
 	return err;
 }
 
-static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata)
+static int
+twl4030_power_configure_scripts(const struct twl4030_power_data *pdata)
 {
 	int err;
 	int i;
@@ -518,7 +554,8 @@ static int twl4030_power_configure_scripts(struct twl4030_power_data *pdata)
 	return 0;
 }
 
-static int twl4030_power_configure_resources(struct twl4030_power_data *pdata)
+static int
+twl4030_power_configure_resources(const struct twl4030_power_data *pdata)
 {
 	struct twl4030_resconfig *resconfig = pdata->resource_config;
 	int err;
@@ -550,7 +587,7 @@ void twl4030_power_off(void)
 		pr_err("TWL4030 Unable to power off\n");
 }
 
-static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata,
+static bool twl4030_power_use_poweroff(const struct twl4030_power_data *pdata,
 					struct device_node *node)
 {
 	if (pdata && pdata->use_poweroff)
@@ -562,10 +599,60 @@ static bool twl4030_power_use_poweroff(struct twl4030_power_data *pdata,
 	return false;
 }
 
+#ifdef CONFIG_OF
+
+/* Generic warm reset configuration for omap3 */
+
+static struct twl4030_ins omap3_wrst_seq[] = {
+	TWL_RESOURCE_OFF(RES_NRES_PWRON),
+	TWL_RESOURCE_OFF(RES_RESET),
+	TWL_RESOURCE_RESET(RES_MAIN_REF),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R2),
+	TWL_RESOURCE_RESET(RES_VUSB_3V1),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_ALL, RES_TYPE_R0, RES_TYPE2_R1),
+	TWL_RESOURCE_GROUP_RESET(RES_GRP_RC, RES_TYPE_ALL, RES_TYPE2_R0),
+	TWL_RESOURCE_ON(RES_RESET),
+	TWL_RESOURCE_ON(RES_NRES_PWRON),
+};
+
+static struct twl4030_script omap3_wrst_script = {
+	.script	= omap3_wrst_seq,
+	.size	= ARRAY_SIZE(omap3_wrst_seq),
+	.flags	= TWL4030_WRST_SCRIPT,
+};
+
+static struct twl4030_script *omap3_reset_scripts[] = {
+	&omap3_wrst_script,
+};
+
+static struct twl4030_resconfig omap3_rconfig[] = {
+	TWL_REMAP_SLEEP(RES_HFCLKOUT, DEV_GRP_P3, -1, -1),
+	TWL_REMAP_SLEEP(RES_VDD1, DEV_GRP_P1, -1, -1),
+	TWL_REMAP_SLEEP(RES_VDD2, DEV_GRP_P1, -1, -1),
+	{ 0, 0 },
+};
+
+static struct twl4030_power_data omap3_reset = {
+	.scripts		= omap3_reset_scripts,
+	.num			= ARRAY_SIZE(omap3_reset_scripts),
+	.resource_config	= omap3_rconfig,
+};
+
+static struct of_device_id twl4030_power_of_match[] = {
+	{
+		.compatible = "ti,twl4030-power-reset",
+		.data = &omap3_reset,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, twl4030_power_of_match);
+#endif	/* CONFIG_OF */
+
 static int twl4030_power_probe(struct platform_device *pdev)
 {
-	struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev);
+	const struct twl4030_power_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
 	int err = 0;
 	int err2 = 0;
 	u8 val;
@@ -586,8 +673,12 @@ static int twl4030_power_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	match = of_match_device(of_match_ptr(twl4030_power_of_match),
+				&pdev->dev);
+	if (match && match->data)
+		pdata = match->data;
+
 	if (pdata) {
-		/* TODO: convert to device tree */
 		err = twl4030_power_configure_scripts(pdata);
 		if (err) {
 			pr_err("TWL4030 failed to load scripts\n");
@@ -637,14 +728,6 @@ static int twl4030_power_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id twl4030_power_of_match[] = {
-	{.compatible = "ti,twl4030-power", },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, twl4030_power_of_match);
-#endif
-
 static struct platform_driver twl4030_power_driver = {
 	.driver = {
 		.name	= "twl4030_power",
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index ade1c06d4ceb..5fe031375ed4 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -486,7 +486,10 @@ static inline int twl6030_mmc_card_detect(struct device *dev, int slot)
 #define RES_GRP_ALL		0x7	/* All resource groups */
 
 #define RES_TYPE2_R0		0x0
+#define RES_TYPE2_R1		0x1
+#define RES_TYPE2_R2		0x2
 
+#define RES_TYPE_R0		0x0
 #define RES_TYPE_ALL		0x7
 
 /* Resource states */
-- 
cgit 


From 482e7db160df713a2d1d4c7ee9fffad92008283f Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 20 May 2014 11:17:54 -0700
Subject: mfd: twl4030-power: Add support for board specific configuration

With the recommended twl4030 configuration added, we can now add
board specific changes as modifications to the recommended
configuration.

Note that the data is private to this driver, and the data must
always have a NULL resource in the sentinel.

Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/twl4030-power.c | 21 +++++++++++++++++++++
 include/linux/i2c/twl.h     |  1 +
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
index 2bfbb40ca9d2..4846c7b48ebb 100644
--- a/drivers/mfd/twl4030-power.c
+++ b/drivers/mfd/twl4030-power.c
@@ -567,13 +567,34 @@ twl4030_power_configure_scripts(const struct twl4030_power_data *pdata)
 	return 0;
 }
 
+static void twl4030_patch_rconfig(struct twl4030_resconfig *common,
+				  struct twl4030_resconfig *board)
+{
+	while (common->resource) {
+		struct twl4030_resconfig *b = board;
+
+		while (b->resource) {
+			if (b->resource == common->resource) {
+				*common = *b;
+				break;
+			}
+			b++;
+		}
+		common++;
+	}
+}
+
 static int
 twl4030_power_configure_resources(const struct twl4030_power_data *pdata)
 {
 	struct twl4030_resconfig *resconfig = pdata->resource_config;
+	struct twl4030_resconfig *boardconf = pdata->board_config;
 	int err;
 
 	if (resconfig) {
+		if (boardconf)
+			twl4030_patch_rconfig(resconfig, boardconf);
+
 		while (resconfig->resource) {
 			err = twl4030_configure_resource(resconfig);
 			if (err)
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 5fe031375ed4..57fe782bf031 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -662,6 +662,7 @@ struct twl4030_power_data {
 	struct twl4030_script **scripts;
 	unsigned num;
 	struct twl4030_resconfig *resource_config;
+	struct twl4030_resconfig *board_config;
 #define TWL4030_RESCONFIG_UNDEF	((u8)-1)
 	bool use_poweroff;	/* Board is wired for TWL poweroff */
 };
-- 
cgit 


From aa76fcf473f6bfa839f37f77b6fdb71f0fb88d8f Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 21 Feb 2014 17:36:21 +0200
Subject: CLK: TI: DPLL: add support for omap2 core dpll

OMAP2 has slightly different DPLL compared to later OMAP generations.
This patch adds support for the ti,omap2-dpll-core-clock and also adds
the bindings documentation.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 .../devicetree/bindings/clock/ti/dpll.txt          |  9 +++
 arch/arm/mach-omap2/clock.h                        |  1 -
 arch/arm/mach-omap2/clock2xxx.h                    |  4 --
 drivers/clk/ti/dpll.c                              | 78 +++++++++++++++++++---
 include/linux/clk/ti.h                             |  6 ++
 5 files changed, 82 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
index 30bfdb7c9f18..50a1a427608f 100644
--- a/Documentation/devicetree/bindings/clock/ti/dpll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt
@@ -30,6 +30,7 @@ Required properties:
 		"ti,am3-dpll-clock",
 		"ti,am3-dpll-core-clock",
 		"ti,am3-dpll-x2-clock",
+		"ti,omap2-dpll-core-clock",
 
 - #clock-cells : from common clock binding; shall be set to 0.
 - clocks : link phandles of parent clocks, first entry lists reference clock
@@ -41,6 +42,7 @@ Required properties:
 	"mult-div1" - contains the multiplier / divider register base address
 	"autoidle" - contains the autoidle register base address (optional)
   ti,am3-* dpll types do not have autoidle register
+  ti,omap2-* dpll type does not support idlest / autoidle registers
 
 Optional properties:
 - DPLL mode setting - defining any one or more of the following overrides
@@ -73,3 +75,10 @@ Examples:
 		clocks = <&sys_clkin_ck>, <&sys_clkin_ck>;
 		reg = <0x90>, <0x5c>, <0x68>;
 	};
+
+	dpll_ck: dpll_ck {
+		#clock-cells = <0>;
+		compatible = "ti,omap2-dpll-core-clock";
+		clocks = <&sys_ck>, <&sys_ck>;
+		reg = <0x0500>, <0x0540>;
+	};
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index bda767a9dea8..f6e9904d7a75 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -279,7 +279,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait;
 extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait;
 extern const struct clk_hw_omap_ops clkhwops_apll54;
 extern const struct clk_hw_omap_ops clkhwops_apll96;
-extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 
 /* clksel_rate blocks shared between OMAP44xx and AM33xx */
diff --git a/arch/arm/mach-omap2/clock2xxx.h b/arch/arm/mach-omap2/clock2xxx.h
index 539dc08afbba..45f41a411603 100644
--- a/arch/arm/mach-omap2/clock2xxx.h
+++ b/arch/arm/mach-omap2/clock2xxx.h
@@ -21,10 +21,6 @@ unsigned long omap2xxx_sys_clk_recalc(struct clk_hw *clk,
 				      unsigned long parent_rate);
 unsigned long omap2_osc_clk_recalc(struct clk_hw *clk,
 				   unsigned long parent_rate);
-unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
-				    unsigned long parent_rate);
-int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
-			     unsigned long parent_rate);
 void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
 unsigned long omap2_clk_apll54_recalc(struct clk_hw *hw,
 				      unsigned long parent_rate);
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index dda262db42ea..34e233990212 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -35,21 +35,18 @@ static const struct clk_ops dpll_m4xen_ck_ops = {
 	.set_rate	= &omap3_noncore_dpll_set_rate,
 	.get_parent	= &omap2_init_dpll_parent,
 };
+#else
+static const struct clk_ops dpll_m4xen_ck_ops = {};
 #endif
 
+#if defined(CONFIG_ARCH_OMAP3) || defined(CONFIG_ARCH_OMAP4) || \
+	defined(CONFIG_SOC_OMAP5) || defined(CONFIG_SOC_DRA7XX) || \
+	defined(CONFIG_SOC_AM33XX) || defined(CONFIG_SOC_AM43XX)
 static const struct clk_ops dpll_core_ck_ops = {
 	.recalc_rate	= &omap3_dpll_recalc,
 	.get_parent	= &omap2_init_dpll_parent,
 };
 
-#ifdef CONFIG_ARCH_OMAP3
-static const struct clk_ops omap3_dpll_core_ck_ops = {
-	.get_parent	= &omap2_init_dpll_parent,
-	.recalc_rate	= &omap3_dpll_recalc,
-	.round_rate	= &omap2_dpll_round_rate,
-};
-#endif
-
 static const struct clk_ops dpll_ck_ops = {
 	.enable		= &omap3_noncore_dpll_enable,
 	.disable	= &omap3_noncore_dpll_disable,
@@ -65,6 +62,33 @@ static const struct clk_ops dpll_no_gate_ck_ops = {
 	.round_rate	= &omap2_dpll_round_rate,
 	.set_rate	= &omap3_noncore_dpll_set_rate,
 };
+#else
+static const struct clk_ops dpll_core_ck_ops = {};
+static const struct clk_ops dpll_ck_ops = {};
+static const struct clk_ops dpll_no_gate_ck_ops = {};
+const struct clk_hw_omap_ops clkhwops_omap3_dpll = {};
+#endif
+
+#ifdef CONFIG_ARCH_OMAP2
+static const struct clk_ops omap2_dpll_core_ck_ops = {
+	.get_parent	= &omap2_init_dpll_parent,
+	.recalc_rate	= &omap2_dpllcore_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+	.set_rate	= &omap2_reprogram_dpllcore,
+};
+#else
+static const struct clk_ops omap2_dpll_core_ck_ops = {};
+#endif
+
+#ifdef CONFIG_ARCH_OMAP3
+static const struct clk_ops omap3_dpll_core_ck_ops = {
+	.get_parent	= &omap2_init_dpll_parent,
+	.recalc_rate	= &omap3_dpll_recalc,
+	.round_rate	= &omap2_dpll_round_rate,
+};
+#else
+static const struct clk_ops omap3_dpll_core_ck_ops = {};
+#endif
 
 #ifdef CONFIG_ARCH_OMAP3
 static const struct clk_ops omap3_dpll_ck_ops = {
@@ -237,10 +261,27 @@ static void __init of_ti_dpll_setup(struct device_node *node,
 	init->parent_names = parent_names;
 
 	dd->control_reg = ti_clk_get_reg_addr(node, 0);
-	dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
-	dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
 
-	if (!dd->control_reg || !dd->idlest_reg || !dd->mult_div1_reg)
+	/*
+	 * Special case for OMAP2 DPLL, register order is different due to
+	 * missing idlest_reg, also clkhwops is different. Detected from
+	 * missing idlest_mask.
+	 */
+	if (!dd->idlest_mask) {
+		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 1);
+#ifdef CONFIG_ARCH_OMAP2
+		clk_hw->ops = &clkhwops_omap2xxx_dpll;
+		omap2xxx_clkt_dpllcore_init(&clk_hw->hw);
+#endif
+	} else {
+		dd->idlest_reg = ti_clk_get_reg_addr(node, 1);
+		if (!dd->idlest_reg)
+			goto cleanup;
+
+		dd->mult_div1_reg = ti_clk_get_reg_addr(node, 2);
+	}
+
+	if (!dd->control_reg || !dd->mult_div1_reg)
 		goto cleanup;
 
 	if (dd->autoidle_mask) {
@@ -547,3 +588,18 @@ static void __init of_ti_am3_core_dpll_setup(struct device_node *node)
 }
 CLK_OF_DECLARE(ti_am3_core_dpll_clock, "ti,am3-dpll-core-clock",
 	       of_ti_am3_core_dpll_setup);
+
+static void __init of_ti_omap2_core_dpll_setup(struct device_node *node)
+{
+	const struct dpll_data dd = {
+		.enable_mask = 0x3,
+		.mult_mask = 0x3ff << 12,
+		.div1_mask = 0xf << 8,
+		.max_divider = 16,
+		.min_divider = 1,
+	};
+
+	of_ti_dpll_setup(node, &omap2_dpll_core_ck_ops, &dd);
+}
+CLK_OF_DECLARE(ti_omap2_core_dpll_clock, "ti,omap2-dpll-core-clock",
+	       of_ti_omap2_core_dpll_setup);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 4a21a872dbbd..753878c6fa52 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -259,6 +259,11 @@ int omap2_dflt_clk_enable(struct clk_hw *hw);
 void omap2_dflt_clk_disable(struct clk_hw *hw);
 int omap2_dflt_clk_is_enabled(struct clk_hw *hw);
 void omap3_clk_lock_dpll5(void);
+unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
+				    unsigned long parent_rate);
+int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
+			     unsigned long parent_rate);
+void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
 
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
@@ -287,6 +292,7 @@ static inline void of_ti_clk_allow_autoidle_all(void) { }
 static inline void of_ti_clk_deny_autoidle_all(void) { }
 #endif
 
+extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap3_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx;
 extern const struct clk_hw_omap_ops clkhwops_wait;
-- 
cgit 


From 4d008589e271e28eae728eef7f5fb1f658f12b9f Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 24 Feb 2014 16:06:34 +0200
Subject: CLK: TI: APLL: add support for omap2 aplls

This patch adds support for omap2 type aplls, which have gating and
autoidle functionality.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 .../devicetree/bindings/clock/ti/apll.txt          |  24 ++-
 arch/arm/mach-omap2/clock.h                        |  11 --
 drivers/clk/ti/apll.c                              | 181 +++++++++++++++++++++
 include/linux/clk/ti.h                             |  21 ++-
 4 files changed, 220 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
index 7faf5a68b3be..ade4dd4c30f0 100644
--- a/Documentation/devicetree/bindings/clock/ti/apll.txt
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt
@@ -14,18 +14,32 @@ a subtype of a DPLL [2], although a simplified one at that.
 [2] Documentation/devicetree/bindings/clock/ti/dpll.txt
 
 Required properties:
-- compatible : shall be "ti,dra7-apll-clock"
+- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock"
 - #clock-cells : from common clock binding; shall be set to 0.
 - clocks : link phandles of parent clocks (clk-ref and clk-bypass)
 - reg : address and length of the register set for controlling the APLL.
   It contains the information of registers in the following order:
-	"control" - contains the control register base address
-	"idlest" - contains the idlest register base address
+	"control" - contains the control register offset
+	"idlest" - contains the idlest register offset
+	"autoidle" - contains the autoidle register offset (OMAP2 only)
+- ti,clock-frequency : static clock frequency for the clock (OMAP2 only)
+- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only)
+- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only)
 
 Examples:
-	apll_pcie_ck: apll_pcie_ck@4a008200 {
+	apll_pcie_ck: apll_pcie_ck {
 		#clock-cells = <0>;
 		clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>;
-		reg = <0x4a00821c 0x4>, <0x4a008220 0x4>;
+		reg = <0x021c>, <0x0220>;
 		compatible = "ti,dra7-apll-clock";
 	};
+
+	apll96_ck: apll96_ck {
+		#clock-cells = <0>;
+		compatible = "ti,omap2-apll-clock";
+		clocks = <&sys_ck>;
+		ti,bit-shift = <2>;
+		ti,idlest-shift = <8>;
+		ti,clock-frequency = <96000000>;
+		reg = <0x0500>, <0x0530>, <0x0520>;
+	};
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index f6e9904d7a75..eb441d137843 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -178,17 +178,6 @@ struct clksel {
 	const struct clksel_rate *rates;
 };
 
-struct clk_hw_omap_ops {
-	void			(*find_idlest)(struct clk_hw_omap *oclk,
-					void __iomem **idlest_reg,
-					u8 *idlest_bit, u8 *idlest_val);
-	void			(*find_companion)(struct clk_hw_omap *oclk,
-					void __iomem **other_reg,
-					u8 *other_bit);
-	void			(*allow_idle)(struct clk_hw_omap *oclk);
-	void			(*deny_idle)(struct clk_hw_omap *oclk);
-};
-
 unsigned long omap_fixed_divisor_recalc(struct clk_hw *hw,
 					unsigned long parent_rate);
 
diff --git a/drivers/clk/ti/apll.c b/drivers/clk/ti/apll.c
index b986f61f5a77..5428c9c547cd 100644
--- a/drivers/clk/ti/apll.c
+++ b/drivers/clk/ti/apll.c
@@ -221,3 +221,184 @@ cleanup:
 	kfree(init);
 }
 CLK_OF_DECLARE(dra7_apll_clock, "ti,dra7-apll-clock", of_dra7_apll_setup);
+
+#define OMAP2_EN_APLL_LOCKED	0x3
+#define OMAP2_EN_APLL_STOPPED	0x0
+
+static int omap2_apll_is_enabled(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ad->enable_mask;
+
+	v >>= __ffs(ad->enable_mask);
+
+	return v == OMAP2_EN_APLL_LOCKED ? 1 : 0;
+}
+
+static unsigned long omap2_apll_recalc(struct clk_hw *hw,
+				       unsigned long parent_rate)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+
+	if (omap2_apll_is_enabled(hw))
+		return clk->fixed_rate;
+
+	return 0;
+}
+
+static int omap2_apll_enable(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+	int i = 0;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ~ad->enable_mask;
+	v |= OMAP2_EN_APLL_LOCKED << __ffs(ad->enable_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+
+	while (1) {
+		v = ti_clk_ll_ops->clk_readl(ad->idlest_reg);
+		if (v & ad->idlest_mask)
+			break;
+		if (i > MAX_APLL_WAIT_TRIES)
+			break;
+		i++;
+		udelay(1);
+	}
+
+	if (i == MAX_APLL_WAIT_TRIES) {
+		pr_warn("%s failed to transition to locked\n",
+			__clk_get_name(clk->hw.clk));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static void omap2_apll_disable(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->control_reg);
+	v &= ~ad->enable_mask;
+	v |= OMAP2_EN_APLL_STOPPED << __ffs(ad->enable_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+}
+
+static struct clk_ops omap2_apll_ops = {
+	.enable		= &omap2_apll_enable,
+	.disable	= &omap2_apll_disable,
+	.is_enabled	= &omap2_apll_is_enabled,
+	.recalc_rate	= &omap2_apll_recalc,
+};
+
+static void omap2_apll_set_autoidle(struct clk_hw_omap *clk, u32 val)
+{
+	struct dpll_data *ad = clk->dpll_data;
+	u32 v;
+
+	v = ti_clk_ll_ops->clk_readl(ad->autoidle_reg);
+	v &= ~ad->autoidle_mask;
+	v |= val << __ffs(ad->autoidle_mask);
+	ti_clk_ll_ops->clk_writel(v, ad->control_reg);
+}
+
+#define OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP	0x3
+#define OMAP2_APLL_AUTOIDLE_DISABLE		0x0
+
+static void omap2_apll_allow_idle(struct clk_hw_omap *clk)
+{
+	omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_LOW_POWER_STOP);
+}
+
+static void omap2_apll_deny_idle(struct clk_hw_omap *clk)
+{
+	omap2_apll_set_autoidle(clk, OMAP2_APLL_AUTOIDLE_DISABLE);
+}
+
+static struct clk_hw_omap_ops omap2_apll_hwops = {
+	.allow_idle	= &omap2_apll_allow_idle,
+	.deny_idle	= &omap2_apll_deny_idle,
+};
+
+static void __init of_omap2_apll_setup(struct device_node *node)
+{
+	struct dpll_data *ad = NULL;
+	struct clk_hw_omap *clk_hw = NULL;
+	struct clk_init_data *init = NULL;
+	struct clk *clk;
+	const char *parent_name;
+	u32 val;
+
+	ad = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL);
+	init = kzalloc(sizeof(*init), GFP_KERNEL);
+
+	if (!ad || !clk_hw || !init)
+		goto cleanup;
+
+	clk_hw->dpll_data = ad;
+	clk_hw->hw.init = init;
+	init->ops = &omap2_apll_ops;
+	init->name = node->name;
+	clk_hw->ops = &omap2_apll_hwops;
+
+	init->num_parents = of_clk_get_parent_count(node);
+	if (init->num_parents != 1) {
+		pr_err("%s must have one parent\n", node->name);
+		goto cleanup;
+	}
+
+	parent_name = of_clk_get_parent_name(node, 0);
+	init->parent_names = &parent_name;
+
+	if (of_property_read_u32(node, "ti,clock-frequency", &val)) {
+		pr_err("%s missing clock-frequency\n", node->name);
+		goto cleanup;
+	}
+	clk_hw->fixed_rate = val;
+
+	if (of_property_read_u32(node, "ti,bit-shift", &val)) {
+		pr_err("%s missing bit-shift\n", node->name);
+		goto cleanup;
+	}
+
+	clk_hw->enable_bit = val;
+	ad->enable_mask = 0x3 << val;
+	ad->autoidle_mask = 0x3 << val;
+
+	if (of_property_read_u32(node, "ti,idlest-shift", &val)) {
+		pr_err("%s missing idlest-shift\n", node->name);
+		goto cleanup;
+	}
+
+	ad->idlest_mask = 1 << val;
+
+	ad->control_reg = ti_clk_get_reg_addr(node, 0);
+	ad->autoidle_reg = ti_clk_get_reg_addr(node, 1);
+	ad->idlest_reg = ti_clk_get_reg_addr(node, 2);
+
+	if (!ad->control_reg || !ad->autoidle_reg || !ad->idlest_reg)
+		goto cleanup;
+
+	clk = clk_register(NULL, &clk_hw->hw);
+	if (!IS_ERR(clk)) {
+		of_clk_add_provider(node, of_clk_src_simple_get, clk);
+		kfree(init);
+		return;
+	}
+cleanup:
+	kfree(ad);
+	kfree(clk_hw);
+	kfree(init);
+}
+CLK_OF_DECLARE(omap2_apll_clock, "ti,omap2-apll-clock",
+	       of_omap2_apll_setup);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 753878c6fa52..44bf84002a34 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -94,7 +94,26 @@ struct dpll_data {
 	u8			flags;
 };
 
-struct clk_hw_omap_ops;
+struct clk_hw_omap;
+
+/**
+ * struct clk_hw_omap_ops - OMAP clk ops
+ * @find_idlest: find idlest register information for a clock
+ * @find_companion: find companion clock register information for a clock,
+ *		    basically converts CM_ICLKEN* <-> CM_FCLKEN*
+ * @allow_idle: enables autoidle hardware functionality for a clock
+ * @deny_idle: prevent autoidle hardware functionality for a clock
+ */
+struct clk_hw_omap_ops {
+	void	(*find_idlest)(struct clk_hw_omap *oclk,
+			       void __iomem **idlest_reg,
+			       u8 *idlest_bit, u8 *idlest_val);
+	void	(*find_companion)(struct clk_hw_omap *oclk,
+				  void __iomem **other_reg,
+				  u8 *other_bit);
+	void	(*allow_idle)(struct clk_hw_omap *oclk);
+	void	(*deny_idle)(struct clk_hw_omap *oclk);
+};
 
 /**
  * struct clk_hw_omap - OMAP struct clk
-- 
cgit 


From de742570745e12b53c70130ace958f2a60044000 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Tue, 25 Feb 2014 19:16:07 +0200
Subject: CLK: TI: interface: add support for omap2430 specific interface clock

OMAP2430 I2CHS modules require specific hardware ops to be used, so added
a new compatible string for this.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 Documentation/devicetree/bindings/clock/ti/interface.txt |  2 ++
 arch/arm/mach-omap2/clock.h                              |  1 -
 drivers/clk/ti/interface.c                               | 11 +++++++++++
 include/linux/clk/ti.h                                   |  1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
index 064e8caccac3..3111a409fea6 100644
--- a/Documentation/devicetree/bindings/clock/ti/interface.txt
+++ b/Documentation/devicetree/bindings/clock/ti/interface.txt
@@ -21,6 +21,8 @@ Required properties:
   "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling
   "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling
   "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling
+  "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW
+				  handling
 - #clock-cells : from common clock binding; shall be set to 0
 - clocks : link to phandle of parent clock
 - reg : base address for the control register
diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h
index eb441d137843..12f54d428d7c 100644
--- a/arch/arm/mach-omap2/clock.h
+++ b/arch/arm/mach-omap2/clock.h
@@ -268,7 +268,6 @@ extern const struct clk_hw_omap_ops clkhwops_omap3430es2_hsotgusb_wait;
 extern const struct clk_hw_omap_ops clkhwops_am35xx_ipss_module_wait;
 extern const struct clk_hw_omap_ops clkhwops_apll54;
 extern const struct clk_hw_omap_ops clkhwops_apll96;
-extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 
 /* clksel_rate blocks shared between OMAP44xx and AM33xx */
 extern const struct clksel_rate div_1_0_rates[];
diff --git a/drivers/clk/ti/interface.c b/drivers/clk/ti/interface.c
index 320a2b168bb2..9c3e8c4aaa40 100644
--- a/drivers/clk/ti/interface.c
+++ b/drivers/clk/ti/interface.c
@@ -94,6 +94,7 @@ static void __init of_ti_no_wait_interface_clk_setup(struct device_node *node)
 CLK_OF_DECLARE(ti_no_wait_interface_clk, "ti,omap3-no-wait-interface-clock",
 	       of_ti_no_wait_interface_clk_setup);
 
+#ifdef CONFIG_ARCH_OMAP3
 static void __init of_ti_hsotgusb_interface_clk_setup(struct device_node *node)
 {
 	_of_ti_interface_clk_setup(node,
@@ -123,3 +124,13 @@ static void __init of_ti_am35xx_interface_clk_setup(struct device_node *node)
 }
 CLK_OF_DECLARE(ti_am35xx_interface_clk, "ti,am35xx-interface-clock",
 	       of_ti_am35xx_interface_clk_setup);
+#endif
+
+#ifdef CONFIG_SOC_OMAP2430
+static void __init of_ti_omap2430_interface_clk_setup(struct device_node *node)
+{
+	_of_ti_interface_clk_setup(node, &clkhwops_omap2430_i2chs_wait);
+}
+CLK_OF_DECLARE(ti_omap2430_interface_clk, "ti,omap2430-interface-clock",
+	       of_ti_omap2430_interface_clk_setup);
+#endif
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 44bf84002a34..a8390d478528 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -312,6 +312,7 @@ static inline void of_ti_clk_deny_autoidle_all(void) { }
 #endif
 
 extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
+extern const struct clk_hw_omap_ops clkhwops_omap2430_i2chs_wait;
 extern const struct clk_hw_omap_ops clkhwops_omap3_dpll;
 extern const struct clk_hw_omap_ops clkhwops_omap4_dpllmx;
 extern const struct clk_hw_omap_ops clkhwops_wait;
-- 
cgit 


From be67c3bf382c591d8267e0ef12d80041854731d9 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 24 Feb 2014 17:52:57 +0200
Subject: CLK: TI: OMAP2: add clock init support

Adds support for registering the alias clocks, boot time clock-enable list
and disabling autoidle of clocks.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/ti/Makefile   |   1 +
 drivers/clk/ti/clk-2xxx.c | 254 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk/ti.h    |   2 +
 3 files changed, 257 insertions(+)
 create mode 100644 drivers/clk/ti/clk-2xxx.c

(limited to 'include/linux')

diff --git a/drivers/clk/ti/Makefile b/drivers/clk/ti/Makefile
index 4319d4031aa3..4afeaed9e9ba 100644
--- a/drivers/clk/ti/Makefile
+++ b/drivers/clk/ti/Makefile
@@ -3,6 +3,7 @@ obj-y					+= clk.o autoidle.o clockdomain.o
 clk-common				= dpll.o composite.o divider.o gate.o \
 					  fixed-factor.o mux.o apll.o
 obj-$(CONFIG_SOC_AM33XX)		+= $(clk-common) clk-33xx.o
+obj-$(CONFIG_ARCH_OMAP2)		+= $(clk-common) interface.o clk-2xxx.o
 obj-$(CONFIG_ARCH_OMAP3)		+= $(clk-common) interface.o clk-3xxx.o
 obj-$(CONFIG_ARCH_OMAP4)		+= $(clk-common) clk-44xx.o
 obj-$(CONFIG_SOC_OMAP5)			+= $(clk-common) clk-54xx.o
diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c
new file mode 100644
index 000000000000..f6400fb5ee3e
--- /dev/null
+++ b/drivers/clk/ti/clk-2xxx.c
@@ -0,0 +1,254 @@
+/*
+ * OMAP2 Clock init
+ *
+ * Copyright (C) 2013 Texas Instruments, Inc
+ *     Tero Kristo (t-kristo@ti.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/clk-provider.h>
+#include <linux/clk/ti.h>
+
+static struct ti_dt_clk omap2xxx_clks[] = {
+	DT_CLK(NULL, "func_32k_ck", "func_32k_ck"),
+	DT_CLK(NULL, "secure_32k_ck", "secure_32k_ck"),
+	DT_CLK(NULL, "virt_12m_ck", "virt_12m_ck"),
+	DT_CLK(NULL, "virt_13m_ck", "virt_13m_ck"),
+	DT_CLK(NULL, "virt_19200000_ck", "virt_19200000_ck"),
+	DT_CLK(NULL, "virt_26m_ck", "virt_26m_ck"),
+	DT_CLK(NULL, "aplls_clkin_ck", "aplls_clkin_ck"),
+	DT_CLK(NULL, "aplls_clkin_x2_ck", "aplls_clkin_x2_ck"),
+	DT_CLK(NULL, "osc_ck", "osc_ck"),
+	DT_CLK(NULL, "sys_ck", "sys_ck"),
+	DT_CLK(NULL, "alt_ck", "alt_ck"),
+	DT_CLK(NULL, "mcbsp_clks", "mcbsp_clks"),
+	DT_CLK(NULL, "dpll_ck", "dpll_ck"),
+	DT_CLK(NULL, "apll96_ck", "apll96_ck"),
+	DT_CLK(NULL, "apll54_ck", "apll54_ck"),
+	DT_CLK(NULL, "func_54m_ck", "func_54m_ck"),
+	DT_CLK(NULL, "core_ck", "core_ck"),
+	DT_CLK(NULL, "func_96m_ck", "func_96m_ck"),
+	DT_CLK(NULL, "func_48m_ck", "func_48m_ck"),
+	DT_CLK(NULL, "func_12m_ck", "func_12m_ck"),
+	DT_CLK(NULL, "sys_clkout_src", "sys_clkout_src"),
+	DT_CLK(NULL, "sys_clkout", "sys_clkout"),
+	DT_CLK(NULL, "emul_ck", "emul_ck"),
+	DT_CLK(NULL, "mpu_ck", "mpu_ck"),
+	DT_CLK(NULL, "dsp_fck", "dsp_fck"),
+	DT_CLK(NULL, "gfx_3d_fck", "gfx_3d_fck"),
+	DT_CLK(NULL, "gfx_2d_fck", "gfx_2d_fck"),
+	DT_CLK(NULL, "gfx_ick", "gfx_ick"),
+	DT_CLK("omapdss_dss", "ick", "dss_ick"),
+	DT_CLK(NULL, "dss_ick", "dss_ick"),
+	DT_CLK(NULL, "dss1_fck", "dss1_fck"),
+	DT_CLK(NULL, "dss2_fck", "dss2_fck"),
+	DT_CLK(NULL, "dss_54m_fck", "dss_54m_fck"),
+	DT_CLK(NULL, "core_l3_ck", "core_l3_ck"),
+	DT_CLK(NULL, "ssi_fck", "ssi_ssr_sst_fck"),
+	DT_CLK(NULL, "usb_l4_ick", "usb_l4_ick"),
+	DT_CLK(NULL, "l4_ck", "l4_ck"),
+	DT_CLK(NULL, "ssi_l4_ick", "ssi_l4_ick"),
+	DT_CLK(NULL, "gpt1_ick", "gpt1_ick"),
+	DT_CLK(NULL, "gpt1_fck", "gpt1_fck"),
+	DT_CLK(NULL, "gpt2_ick", "gpt2_ick"),
+	DT_CLK(NULL, "gpt2_fck", "gpt2_fck"),
+	DT_CLK(NULL, "gpt3_ick", "gpt3_ick"),
+	DT_CLK(NULL, "gpt3_fck", "gpt3_fck"),
+	DT_CLK(NULL, "gpt4_ick", "gpt4_ick"),
+	DT_CLK(NULL, "gpt4_fck", "gpt4_fck"),
+	DT_CLK(NULL, "gpt5_ick", "gpt5_ick"),
+	DT_CLK(NULL, "gpt5_fck", "gpt5_fck"),
+	DT_CLK(NULL, "gpt6_ick", "gpt6_ick"),
+	DT_CLK(NULL, "gpt6_fck", "gpt6_fck"),
+	DT_CLK(NULL, "gpt7_ick", "gpt7_ick"),
+	DT_CLK(NULL, "gpt7_fck", "gpt7_fck"),
+	DT_CLK(NULL, "gpt8_ick", "gpt8_ick"),
+	DT_CLK(NULL, "gpt8_fck", "gpt8_fck"),
+	DT_CLK(NULL, "gpt9_ick", "gpt9_ick"),
+	DT_CLK(NULL, "gpt9_fck", "gpt9_fck"),
+	DT_CLK(NULL, "gpt10_ick", "gpt10_ick"),
+	DT_CLK(NULL, "gpt10_fck", "gpt10_fck"),
+	DT_CLK(NULL, "gpt11_ick", "gpt11_ick"),
+	DT_CLK(NULL, "gpt11_fck", "gpt11_fck"),
+	DT_CLK(NULL, "gpt12_ick", "gpt12_ick"),
+	DT_CLK(NULL, "gpt12_fck", "gpt12_fck"),
+	DT_CLK("omap-mcbsp.1", "ick", "mcbsp1_ick"),
+	DT_CLK(NULL, "mcbsp1_ick", "mcbsp1_ick"),
+	DT_CLK(NULL, "mcbsp1_fck", "mcbsp1_fck"),
+	DT_CLK("omap-mcbsp.2", "ick", "mcbsp2_ick"),
+	DT_CLK(NULL, "mcbsp2_ick", "mcbsp2_ick"),
+	DT_CLK(NULL, "mcbsp2_fck", "mcbsp2_fck"),
+	DT_CLK("omap2_mcspi.1", "ick", "mcspi1_ick"),
+	DT_CLK(NULL, "mcspi1_ick", "mcspi1_ick"),
+	DT_CLK(NULL, "mcspi1_fck", "mcspi1_fck"),
+	DT_CLK("omap2_mcspi.2", "ick", "mcspi2_ick"),
+	DT_CLK(NULL, "mcspi2_ick", "mcspi2_ick"),
+	DT_CLK(NULL, "mcspi2_fck", "mcspi2_fck"),
+	DT_CLK(NULL, "uart1_ick", "uart1_ick"),
+	DT_CLK(NULL, "uart1_fck", "uart1_fck"),
+	DT_CLK(NULL, "uart2_ick", "uart2_ick"),
+	DT_CLK(NULL, "uart2_fck", "uart2_fck"),
+	DT_CLK(NULL, "uart3_ick", "uart3_ick"),
+	DT_CLK(NULL, "uart3_fck", "uart3_fck"),
+	DT_CLK(NULL, "gpios_ick", "gpios_ick"),
+	DT_CLK(NULL, "gpios_fck", "gpios_fck"),
+	DT_CLK("omap_wdt", "ick", "mpu_wdt_ick"),
+	DT_CLK(NULL, "mpu_wdt_ick", "mpu_wdt_ick"),
+	DT_CLK(NULL, "mpu_wdt_fck", "mpu_wdt_fck"),
+	DT_CLK(NULL, "sync_32k_ick", "sync_32k_ick"),
+	DT_CLK(NULL, "wdt1_ick", "wdt1_ick"),
+	DT_CLK(NULL, "omapctrl_ick", "omapctrl_ick"),
+	DT_CLK("omap24xxcam", "fck", "cam_fck"),
+	DT_CLK(NULL, "cam_fck", "cam_fck"),
+	DT_CLK("omap24xxcam", "ick", "cam_ick"),
+	DT_CLK(NULL, "cam_ick", "cam_ick"),
+	DT_CLK(NULL, "mailboxes_ick", "mailboxes_ick"),
+	DT_CLK(NULL, "wdt4_ick", "wdt4_ick"),
+	DT_CLK(NULL, "wdt4_fck", "wdt4_fck"),
+	DT_CLK(NULL, "mspro_ick", "mspro_ick"),
+	DT_CLK(NULL, "mspro_fck", "mspro_fck"),
+	DT_CLK(NULL, "fac_ick", "fac_ick"),
+	DT_CLK(NULL, "fac_fck", "fac_fck"),
+	DT_CLK("omap_hdq.0", "ick", "hdq_ick"),
+	DT_CLK(NULL, "hdq_ick", "hdq_ick"),
+	DT_CLK("omap_hdq.0", "fck", "hdq_fck"),
+	DT_CLK(NULL, "hdq_fck", "hdq_fck"),
+	DT_CLK("omap_i2c.1", "ick", "i2c1_ick"),
+	DT_CLK(NULL, "i2c1_ick", "i2c1_ick"),
+	DT_CLK("omap_i2c.2", "ick", "i2c2_ick"),
+	DT_CLK(NULL, "i2c2_ick", "i2c2_ick"),
+	DT_CLK(NULL, "gpmc_fck", "gpmc_fck"),
+	DT_CLK(NULL, "sdma_fck", "sdma_fck"),
+	DT_CLK(NULL, "sdma_ick", "sdma_ick"),
+	DT_CLK(NULL, "sdrc_ick", "sdrc_ick"),
+	DT_CLK(NULL, "des_ick", "des_ick"),
+	DT_CLK("omap-sham", "ick", "sha_ick"),
+	DT_CLK(NULL, "sha_ick", "sha_ick"),
+	DT_CLK("omap_rng", "ick", "rng_ick"),
+	DT_CLK(NULL, "rng_ick", "rng_ick"),
+	DT_CLK("omap-aes", "ick", "aes_ick"),
+	DT_CLK(NULL, "aes_ick", "aes_ick"),
+	DT_CLK(NULL, "pka_ick", "pka_ick"),
+	DT_CLK(NULL, "usb_fck", "usb_fck"),
+	DT_CLK(NULL, "timer_32k_ck", "func_32k_ck"),
+	DT_CLK(NULL, "timer_sys_ck", "sys_ck"),
+	DT_CLK(NULL, "timer_ext_ck", "alt_ck"),
+	{ .node_name = NULL },
+};
+
+static struct ti_dt_clk omap2420_clks[] = {
+	DT_CLK(NULL, "sys_clkout2_src", "sys_clkout2_src"),
+	DT_CLK(NULL, "sys_clkout2", "sys_clkout2"),
+	DT_CLK(NULL, "dsp_ick", "dsp_ick"),
+	DT_CLK(NULL, "iva1_ifck", "iva1_ifck"),
+	DT_CLK(NULL, "iva1_mpu_int_ifck", "iva1_mpu_int_ifck"),
+	DT_CLK(NULL, "wdt3_ick", "wdt3_ick"),
+	DT_CLK(NULL, "wdt3_fck", "wdt3_fck"),
+	DT_CLK("mmci-omap.0", "ick", "mmc_ick"),
+	DT_CLK(NULL, "mmc_ick", "mmc_ick"),
+	DT_CLK("mmci-omap.0", "fck", "mmc_fck"),
+	DT_CLK(NULL, "mmc_fck", "mmc_fck"),
+	DT_CLK(NULL, "eac_ick", "eac_ick"),
+	DT_CLK(NULL, "eac_fck", "eac_fck"),
+	DT_CLK(NULL, "i2c1_fck", "i2c1_fck"),
+	DT_CLK(NULL, "i2c2_fck", "i2c2_fck"),
+	DT_CLK(NULL, "vlynq_ick", "vlynq_ick"),
+	DT_CLK(NULL, "vlynq_fck", "vlynq_fck"),
+	DT_CLK("musb-hdrc", "fck", "osc_ck"),
+	{ .node_name = NULL },
+};
+
+static struct ti_dt_clk omap2430_clks[] = {
+	DT_CLK("twl", "fck", "osc_ck"),
+	DT_CLK(NULL, "iva2_1_ick", "iva2_1_ick"),
+	DT_CLK(NULL, "mdm_ick", "mdm_ick"),
+	DT_CLK(NULL, "mdm_osc_ck", "mdm_osc_ck"),
+	DT_CLK("omap-mcbsp.3", "ick", "mcbsp3_ick"),
+	DT_CLK(NULL, "mcbsp3_ick", "mcbsp3_ick"),
+	DT_CLK(NULL, "mcbsp3_fck", "mcbsp3_fck"),
+	DT_CLK("omap-mcbsp.4", "ick", "mcbsp4_ick"),
+	DT_CLK(NULL, "mcbsp4_ick", "mcbsp4_ick"),
+	DT_CLK(NULL, "mcbsp4_fck", "mcbsp4_fck"),
+	DT_CLK("omap-mcbsp.5", "ick", "mcbsp5_ick"),
+	DT_CLK(NULL, "mcbsp5_ick", "mcbsp5_ick"),
+	DT_CLK(NULL, "mcbsp5_fck", "mcbsp5_fck"),
+	DT_CLK("omap2_mcspi.3", "ick", "mcspi3_ick"),
+	DT_CLK(NULL, "mcspi3_ick", "mcspi3_ick"),
+	DT_CLK(NULL, "mcspi3_fck", "mcspi3_fck"),
+	DT_CLK(NULL, "icr_ick", "icr_ick"),
+	DT_CLK(NULL, "i2chs1_fck", "i2chs1_fck"),
+	DT_CLK(NULL, "i2chs2_fck", "i2chs2_fck"),
+	DT_CLK("musb-omap2430", "ick", "usbhs_ick"),
+	DT_CLK(NULL, "usbhs_ick", "usbhs_ick"),
+	DT_CLK("omap_hsmmc.0", "ick", "mmchs1_ick"),
+	DT_CLK(NULL, "mmchs1_ick", "mmchs1_ick"),
+	DT_CLK(NULL, "mmchs1_fck", "mmchs1_fck"),
+	DT_CLK("omap_hsmmc.1", "ick", "mmchs2_ick"),
+	DT_CLK(NULL, "mmchs2_ick", "mmchs2_ick"),
+	DT_CLK(NULL, "mmchs2_fck", "mmchs2_fck"),
+	DT_CLK(NULL, "gpio5_ick", "gpio5_ick"),
+	DT_CLK(NULL, "gpio5_fck", "gpio5_fck"),
+	DT_CLK(NULL, "mdm_intc_ick", "mdm_intc_ick"),
+	DT_CLK("omap_hsmmc.0", "mmchsdb_fck", "mmchsdb1_fck"),
+	DT_CLK(NULL, "mmchsdb1_fck", "mmchsdb1_fck"),
+	DT_CLK("omap_hsmmc.1", "mmchsdb_fck", "mmchsdb2_fck"),
+	DT_CLK(NULL, "mmchsdb2_fck", "mmchsdb2_fck"),
+	{ .node_name = NULL },
+};
+
+static const char *enable_init_clks[] = {
+	"apll96_ck",
+	"apll54_ck",
+	"sync_32k_ick",
+	"omapctrl_ick",
+	"gpmc_fck",
+	"sdrc_ick",
+};
+
+enum {
+	OMAP2_SOC_OMAP2420,
+	OMAP2_SOC_OMAP2430,
+};
+
+static int __init omap2xxx_dt_clk_init(int soc_type)
+{
+	ti_dt_clocks_register(omap2xxx_clks);
+
+	if (soc_type == OMAP2_SOC_OMAP2420)
+		ti_dt_clocks_register(omap2420_clks);
+	else
+		ti_dt_clocks_register(omap2430_clks);
+
+	omap2_clk_disable_autoidle_all();
+
+	omap2_clk_enable_init_clocks(enable_init_clks,
+				     ARRAY_SIZE(enable_init_clks));
+
+	pr_info("Clocking rate (Crystal/DPLL/MPU): %ld.%01ld/%ld/%ld MHz\n",
+		(clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 1000000),
+		(clk_get_rate(clk_get_sys(NULL, "sys_ck")) / 100000) % 10,
+		(clk_get_rate(clk_get_sys(NULL, "dpll_ck")) / 1000000),
+		(clk_get_rate(clk_get_sys(NULL, "mpu_ck")) / 1000000));
+
+	return 0;
+}
+
+int __init omap2420_dt_clk_init(void)
+{
+	return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2420);
+}
+
+int __init omap2430_dt_clk_init(void)
+{
+	return omap2xxx_dt_clk_init(OMAP2_SOC_OMAP2430);
+}
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index a8390d478528..188f0cbb26c2 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -302,6 +302,8 @@ int omap5xxx_dt_clk_init(void);
 int dra7xx_dt_clk_init(void);
 int am33xx_dt_clk_init(void);
 int am43xx_dt_clk_init(void);
+int omap2420_dt_clk_init(void);
+int omap2430_dt_clk_init(void);
 
 #ifdef CONFIG_OF
 void of_ti_clk_allow_autoidle_all(void);
-- 
cgit 


From 61f25ca76ccc7b63371a7a6b0b8b9a8a46745b79 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Mon, 24 Feb 2014 18:49:35 +0200
Subject: ARM: OMAP2: clock: add DT boot support for cpufreq_ck

The clock and clkdev for this are added manually.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c | 53 ++++++++++++++++++++++++++++
 drivers/clk/ti/clk-2xxx.c                    |  2 ++
 include/linux/clk/ti.h                       |  1 +
 3 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
index b935ed2922d8..85e0b0c06718 100644
--- a/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
+++ b/arch/arm/mach-omap2/clkt2xxx_virt_prcm_set.c
@@ -208,3 +208,56 @@ void omap2xxx_clkt_vps_late_init(void)
 		clk_put(c);
 	}
 }
+
+#ifdef CONFIG_OF
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+
+static const struct clk_ops virt_prcm_set_ops = {
+	.recalc_rate	= &omap2_table_mpu_recalc,
+	.set_rate	= &omap2_select_table_rate,
+	.round_rate	= &omap2_round_to_table_rate,
+};
+
+/**
+ * omap2xxx_clkt_vps_init - initialize virt_prcm_set clock
+ *
+ * Does a manual init for the virtual prcm DVFS clock for OMAP2. This
+ * function is called only from omap2 DT clock init, as the virtual
+ * node is not modelled in the DT clock data.
+ */
+void omap2xxx_clkt_vps_init(void)
+{
+	struct clk_init_data init = { NULL };
+	struct clk_hw_omap *hw = NULL;
+	struct clk *clk;
+	const char *parent_name = "mpu_ck";
+	struct clk_lookup *lookup = NULL;
+
+	omap2xxx_clkt_vps_late_init();
+	omap2xxx_clkt_vps_check_bootloader_rates();
+
+	hw = kzalloc(sizeof(*hw), GFP_KERNEL);
+	lookup = kzalloc(sizeof(*lookup), GFP_KERNEL);
+	if (!hw || !lookup)
+		goto cleanup;
+	init.name = "virt_prcm_set";
+	init.ops = &virt_prcm_set_ops;
+	init.parent_names = &parent_name;
+	init.num_parents = 1;
+
+	hw->hw.init = &init;
+
+	clk = clk_register(NULL, &hw->hw);
+
+	lookup->dev_id = NULL;
+	lookup->con_id = "cpufreq_ck";
+	lookup->clk = clk;
+
+	clkdev_add(lookup);
+	return;
+cleanup:
+	kfree(hw);
+	kfree(lookup);
+}
+#endif
diff --git a/drivers/clk/ti/clk-2xxx.c b/drivers/clk/ti/clk-2xxx.c
index f6400fb5ee3e..c808ab3d2bb2 100644
--- a/drivers/clk/ti/clk-2xxx.c
+++ b/drivers/clk/ti/clk-2xxx.c
@@ -229,6 +229,8 @@ static int __init omap2xxx_dt_clk_init(int soc_type)
 	else
 		ti_dt_clocks_register(omap2430_clks);
 
+	omap2xxx_clkt_vps_init();
+
 	omap2_clk_disable_autoidle_all();
 
 	omap2_clk_enable_init_clocks(enable_init_clks,
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 188f0cbb26c2..4231c41bed51 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -283,6 +283,7 @@ unsigned long omap2_dpllcore_recalc(struct clk_hw *hw,
 int omap2_reprogram_dpllcore(struct clk_hw *clk, unsigned long rate,
 			     unsigned long parent_rate);
 void omap2xxx_clkt_dpllcore_init(struct clk_hw *hw);
+void omap2xxx_clkt_vps_init(void);
 
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
-- 
cgit 


From 7738dac4f697ffbd0ed4c4aeb69a714ef9d876da Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 28 May 2014 08:06:34 -0600
Subject: blk-mq: remove stale comment for blk_mq_complete_request()

It works for both IPI and local completions as of commit
95f096849932.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blk-mq.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1dfeb1529a61..5b171fbe95c5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -172,11 +172,6 @@ void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
-
-/*
- * Complete request through potential IPI for right placement. Driver must
- * have defined a mq_ops->complete() hook for this.
- */
 void blk_mq_complete_request(struct request *rq);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
-- 
cgit 


From 6fca6a611c27f1f0d90fbe1cc3c229dbf8c09e48 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 May 2014 08:08:02 -0600
Subject: blk-mq: add helper to insert requests from irq context

Both the cache flush state machine and the SCSI midlayer want to submit
requests from irq context, and the current per-request requeue_work
unfortunately causes corruption due to sharing with the csd field for
flushes.  Replace them with a per-request_queue list of requests to
be requeued.

Based on an earlier test by Ming Lei.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reported-by: Ming Lei <tom.leiming@gmail.com>
Tested-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c      | 16 ++++---------
 block/blk-mq.c         | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/blk-mq.h |  2 ++
 include/linux/blkdev.h |  5 +++-
 4 files changed, 73 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index ec7a224d6733..ef608b35d9be 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -130,21 +130,13 @@ static void blk_flush_restore_request(struct request *rq)
 	blk_clear_rq_complete(rq);
 }
 
-static void mq_flush_run(struct work_struct *work)
-{
-	struct request *rq;
-
-	rq = container_of(work, struct request, requeue_work);
-
-	memset(&rq->csd, 0, sizeof(rq->csd));
-	blk_mq_insert_request(rq, false, true, false);
-}
-
 static bool blk_flush_queue_rq(struct request *rq, bool add_front)
 {
 	if (rq->q->mq_ops) {
-		INIT_WORK(&rq->requeue_work, mq_flush_run);
-		kblockd_schedule_work(&rq->requeue_work);
+		struct request_queue *q = rq->q;
+
+		blk_mq_add_to_requeue_list(rq, add_front);
+		blk_mq_kick_requeue_list(q);
 		return false;
 	} else {
 		if (add_front)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 010b878d53b3..67066ecc79c0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -516,10 +516,68 @@ void blk_mq_requeue_request(struct request *rq)
 	blk_clear_rq_complete(rq);
 
 	BUG_ON(blk_queued_rq(rq));
-	blk_mq_insert_request(rq, true, true, false);
+	blk_mq_add_to_requeue_list(rq, true);
 }
 EXPORT_SYMBOL(blk_mq_requeue_request);
 
+static void blk_mq_requeue_work(struct work_struct *work)
+{
+	struct request_queue *q =
+		container_of(work, struct request_queue, requeue_work);
+	LIST_HEAD(rq_list);
+	struct request *rq, *next;
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->requeue_lock, flags);
+	list_splice_init(&q->requeue_list, &rq_list);
+	spin_unlock_irqrestore(&q->requeue_lock, flags);
+
+	list_for_each_entry_safe(rq, next, &rq_list, queuelist) {
+		if (!(rq->cmd_flags & REQ_SOFTBARRIER))
+			continue;
+
+		rq->cmd_flags &= ~REQ_SOFTBARRIER;
+		list_del_init(&rq->queuelist);
+		blk_mq_insert_request(rq, true, false, false);
+	}
+
+	while (!list_empty(&rq_list)) {
+		rq = list_entry(rq_list.next, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		blk_mq_insert_request(rq, false, false, false);
+	}
+
+	blk_mq_run_queues(q, false);
+}
+
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
+{
+	struct request_queue *q = rq->q;
+	unsigned long flags;
+
+	/*
+	 * We abuse this flag that is otherwise used by the I/O scheduler to
+	 * request head insertation from the workqueue.
+	 */
+	BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER);
+
+	spin_lock_irqsave(&q->requeue_lock, flags);
+	if (at_head) {
+		rq->cmd_flags |= REQ_SOFTBARRIER;
+		list_add(&rq->queuelist, &q->requeue_list);
+	} else {
+		list_add_tail(&rq->queuelist, &q->requeue_list);
+	}
+	spin_unlock_irqrestore(&q->requeue_lock, flags);
+}
+EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
+
+void blk_mq_kick_requeue_list(struct request_queue *q)
+{
+	kblockd_schedule_work(&q->requeue_work);
+}
+EXPORT_SYMBOL(blk_mq_kick_requeue_list);
+
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	return tags->rqs[tag];
@@ -1812,6 +1870,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	q->sg_reserved_size = INT_MAX;
 
+	INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
+	INIT_LIST_HEAD(&q->requeue_list);
+	spin_lock_init(&q->requeue_lock);
+
 	if (q->nr_hw_queues > 1)
 		blk_queue_make_request(q, blk_mq_make_request);
 	else
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 5b171fbe95c5..b9a74a386dbc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -172,6 +172,8 @@ void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
+void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
+void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_complete_request(struct request *rq);
 
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6bc011a09e82..913f1c2d3be0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -99,7 +99,6 @@ struct request {
 	struct list_head queuelist;
 	union {
 		struct call_single_data csd;
-		struct work_struct requeue_work;
 		unsigned long fifo_time;
 	};
 
@@ -463,6 +462,10 @@ struct request_queue {
 	struct request		*flush_rq;
 	spinlock_t		mq_flush_lock;
 
+	struct list_head	requeue_list;
+	spinlock_t		requeue_lock;
+	struct work_struct	requeue_work;
+
 	struct mutex		sysfs_lock;
 
 	int			bypass_depth;
-- 
cgit 


From 494b6590043b4cd73ceb3f58e1c012a2c6c98d85 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <Jes.Sorensen@redhat.com>
Date: Mon, 26 May 2014 18:06:34 +0200
Subject: wireless: add missing WLAN_EID_BSS_INTOLERANT_CHL_REPORT

Signed-off-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f194ccb8539c..6bff13f74050 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1711,6 +1711,7 @@ enum ieee80211_eid {
 	WLAN_EID_RRM_ENABLED_CAPABILITIES = 70,
 	WLAN_EID_MULTIPLE_BSSID = 71,
 	WLAN_EID_BSS_COEX_2040 = 72,
+	WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73,
 	WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74,
 	WLAN_EID_RIC_DESCRIPTOR = 75,
 	WLAN_EID_MMIE = 76,
-- 
cgit 


From f08dbf8a61462aa122b9b5077849a3f4bd84702a Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Wed, 8 Jan 2014 11:23:35 +0000
Subject: cpuidle: declare cpuidle_dev in cpuidle.h

Declaring this allows drivers which need to initialise each struct
cpuidle_device at initialisation time to make use of the structures
already defined in cpuidle.c, rather than having to wastefully define
their own.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
---
 include/linux/cpuidle.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index b0238cba440b..99cbd7a74e9f 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -84,6 +84,7 @@ struct cpuidle_device {
 };
 
 DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 
 /**
  * cpuidle_get_last_residency - retrieves the last state's residency time
-- 
cgit 


From 4ce01dd1a07d9cf3eaf44fbf4ea9a61b11badccc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 27 May 2014 20:59:46 +0200
Subject: blk-mq: merge blk_mq_alloc_reserved_request into blk_mq_alloc_request

Instead of having two almost identical copies of the same code just let
the callers pass in the reserved flag directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 +-
 block/blk-mq.c         | 20 +++-----------------
 include/linux/blk-mq.h |  4 ++--
 3 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 29d5fbafd94a..d87be5b4e554 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1173,7 +1173,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
 struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
 {
 	if (q->mq_ops)
-		return blk_mq_alloc_request(q, rw, gfp_mask);
+		return blk_mq_alloc_request(q, rw, gfp_mask, false);
 	else
 		return blk_old_get_request(q, rw, gfp_mask);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 67066ecc79c0..63d581d72a70 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -294,35 +294,21 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
 	return rq;
 }
 
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
+		bool reserved)
 {
 	struct request *rq;
 
 	if (blk_mq_queue_enter(q))
 		return NULL;
 
-	rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
+	rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved);
 	if (rq)
 		blk_mq_put_ctx(rq->mq_ctx);
 	return rq;
 }
 EXPORT_SYMBOL(blk_mq_alloc_request);
 
-struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw,
-					      gfp_t gfp)
-{
-	struct request *rq;
-
-	if (blk_mq_queue_enter(q))
-		return NULL;
-
-	rq = blk_mq_alloc_request_pinned(q, rw, gfp, true);
-	if (rq)
-		blk_mq_put_ctx(rq->mq_ctx);
-	return rq;
-}
-EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
-
 static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 				  struct blk_mq_ctx *ctx, struct request *rq)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b9a74a386dbc..2bd82f399128 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -160,8 +160,8 @@ void blk_mq_insert_request(struct request *, bool, bool, bool);
 void blk_mq_run_queues(struct request_queue *q, bool async);
 void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
-struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp);
-struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp);
+struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
+		gfp_t gfp, bool reserved);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
-- 
cgit 


From cdef54dd85ad66e77262ea57796a3e81683dd5d6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 May 2014 18:11:06 +0200
Subject: blk-mq: remove alloc_hctx and free_hctx methods

There is no need for drivers to control hardware context allocation
now that we do the context to node mapping in common code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c             | 26 +++++---------------------
 drivers/block/null_blk.c   | 28 +---------------------------
 drivers/block/virtio_blk.c |  2 --
 include/linux/blk-mq.h     | 10 ----------
 4 files changed, 6 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5cc4b871cb11..f27fe44230c2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1335,21 +1335,6 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
 }
 EXPORT_SYMBOL(blk_mq_map_queue);
 
-struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *set,
-						   unsigned int hctx_index,
-						   int node)
-{
-	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node);
-}
-EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
-
-void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx,
-				 unsigned int hctx_index)
-{
-	kfree(hctx);
-}
-EXPORT_SYMBOL(blk_mq_free_single_hw_queue);
-
 static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
 		struct blk_mq_tags *tags, unsigned int hctx_idx)
 {
@@ -1590,7 +1575,7 @@ static void blk_mq_free_hw_queues(struct request_queue *q,
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		free_cpumask_var(hctx->cpumask);
-		set->ops->free_hctx(hctx, i);
+		kfree(hctx);
 	}
 }
 
@@ -1811,7 +1796,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		int node = blk_mq_hw_queue_to_node(map, i);
 
-		hctxs[i] = set->ops->alloc_hctx(set, i, node);
+		hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx),
+					GFP_KERNEL, node);
 		if (!hctxs[i])
 			goto err_hctxs;
 
@@ -1898,7 +1884,7 @@ err_hctxs:
 		if (!hctxs[i])
 			break;
 		free_cpumask_var(hctxs[i]->cpumask);
-		set->ops->free_hctx(hctxs[i], i);
+		kfree(hctxs[i]);
 	}
 err_map:
 	kfree(hctxs);
@@ -1983,9 +1969,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
 		return -EINVAL;
 
-	if (!set->nr_hw_queues ||
-	    !set->ops->queue_rq || !set->ops->map_queue ||
-	    !set->ops->alloc_hctx || !set->ops->free_hctx)
+	if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
 		return -EINVAL;
 
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 4d33c8c25fbf..b40af63a5476 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -321,18 +321,6 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return BLK_MQ_RQ_QUEUE_OK;
 }
 
-static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_tag_set *set,
-					     unsigned int hctx_index,
-					     int node)
-{
-	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, node);
-}
-
-static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index)
-{
-	kfree(hctx);
-}
-
 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
 {
 	BUG_ON(!nullb);
@@ -360,17 +348,6 @@ static struct blk_mq_ops null_mq_ops = {
 	.map_queue      = blk_mq_map_queue,
 	.init_hctx	= null_init_hctx,
 	.complete	= null_softirq_done_fn,
-	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
-	.free_hctx	= blk_mq_free_single_hw_queue,
-};
-
-static struct blk_mq_ops null_mq_ops_pernode = {
-	.queue_rq       = null_queue_rq,
-	.map_queue      = blk_mq_map_queue,
-	.init_hctx	= null_init_hctx,
-	.complete	= null_softirq_done_fn,
-	.alloc_hctx	= null_alloc_hctx,
-	.free_hctx	= null_free_hctx,
 };
 
 static void null_del_dev(struct nullb *nullb)
@@ -496,10 +473,7 @@ static int null_add_dev(void)
 		goto out_free_nullb;
 
 	if (queue_mode == NULL_Q_MQ) {
-		if (use_per_node_hctx)
-			nullb->tag_set.ops = &null_mq_ops_pernode;
-		else
-			nullb->tag_set.ops = &null_mq_ops;
+		nullb->tag_set.ops = &null_mq_ops;
 		nullb->tag_set.nr_hw_queues = submit_queues;
 		nullb->tag_set.queue_depth = hw_queue_depth;
 		nullb->tag_set.numa_node = home_node;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 7a51f065edcd..16c21c0cb14d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -497,8 +497,6 @@ static int virtblk_init_request(void *data, struct request *rq,
 static struct blk_mq_ops virtio_mq_ops = {
 	.queue_rq	= virtio_queue_rq,
 	.map_queue	= blk_mq_map_queue,
-	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
-	.free_hctx	= blk_mq_free_single_hw_queue,
 	.complete	= virtblk_request_done,
 	.init_request	= virtblk_init_request,
 };
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2bd82f399128..91dfb75ce39f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -79,9 +79,6 @@ struct blk_mq_tag_set {
 
 typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
-typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_tag_set *,
-		unsigned int, int);
-typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_request_fn)(void *, struct request *, unsigned int,
@@ -107,12 +104,6 @@ struct blk_mq_ops {
 
 	softirq_done_fn		*complete;
 
-	/*
-	 * Override for hctx allocations (should probably go)
-	 */
-	alloc_hctx_fn		*alloc_hctx;
-	free_hctx_fn		*free_hctx;
-
 	/*
 	 * Called when the block layer side of a hardware queue has been
 	 * set up, allowing the driver to allocate/init matching structures.
@@ -166,7 +157,6 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
-void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
 
 void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
-- 
cgit 


From c25dc82899e67a32fdcfb20dd72a37fc236fde2e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 22 May 2014 17:07:30 -0600
Subject: PCI: Add DMA alias iterator

In a mixed PCI/PCI-X/PCIe topology, bridges can take ownership of
transactions, replacing the original requester ID with their own.
Sometimes we just want to know the resulting device or resulting alias;
other times we want each step in the chain.  This iterator allows either
usage.  When an endpoint is connected via an unbroken chain of PCIe
switches and root ports, it has no alias and its requester ID is visible to
the root bus.  When PCI/X get in the way, we pick up aliases for bridges.

The reason why we potentially care about each step in the path is because
of PCI-X.  PCI-X has the concept of a requester ID, but bridges may or may
not take ownership of various types of transactions.  We therefore leave it
to the consumer of this function to prune out what they don't care about
rather than attempt to flatten the alias ourselves.

Tested-by: George Spelvin <linux@horizon.com>
Tested-by: Pat Erley <pat-lkml@erley.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci.h  |  4 +++
 2 files changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 4a1b972efe7f..5601cdb8bbb3 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -17,6 +17,76 @@
 DECLARE_RWSEM(pci_bus_sem);
 EXPORT_SYMBOL_GPL(pci_bus_sem);
 
+/*
+ * pci_for_each_dma_alias - Iterate over DMA aliases for a device
+ * @pdev: starting downstream device
+ * @fn: function to call for each alias
+ * @data: opaque data to pass to @fn
+ *
+ * Starting @pdev, walk up the bus calling @fn for each possible alias
+ * of @pdev at the root bus.
+ */
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data)
+{
+	struct pci_bus *bus;
+	int ret;
+
+	ret = fn(pdev, PCI_DEVID(pdev->bus->number, pdev->devfn), data);
+	if (ret)
+		return ret;
+
+	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
+		struct pci_dev *tmp;
+
+		/* Skip virtual buses */
+		if (!bus->self)
+			continue;
+
+		tmp = bus->self;
+
+		/*
+		 * PCIe-to-PCI/X bridges alias transactions from downstream
+		 * devices using the subordinate bus number (PCI Express to
+		 * PCI/PCI-X Bridge Spec, rev 1.0, sec 2.3).  For all cases
+		 * where the upstream bus is PCI/X we alias to the bridge
+		 * (there are various conditions in the previous reference
+		 * where the bridge may take ownership of transactions, even
+		 * when the secondary interface is PCI-X).
+		 */
+		if (pci_is_pcie(tmp)) {
+			switch (pci_pcie_type(tmp)) {
+			case PCI_EXP_TYPE_ROOT_PORT:
+			case PCI_EXP_TYPE_UPSTREAM:
+			case PCI_EXP_TYPE_DOWNSTREAM:
+				continue;
+			case PCI_EXP_TYPE_PCI_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+				if (ret)
+					return ret;
+				continue;
+			case PCI_EXP_TYPE_PCIE_BRIDGE:
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->bus->number,
+						   tmp->devfn), data);
+				if (ret)
+					return ret;
+				continue;
+			}
+		} else {
+			ret = fn(tmp, PCI_DEVID(tmp->bus->number, tmp->devfn),
+				 data);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return ret;
+}
+
 /*
  * find the upstream PCIe-to-PCI bridge of a PCI device
  * if the device is PCIE, return NULL
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..14b074bbc841 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1795,6 +1795,10 @@ static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev)
 }
 #endif
 
+int pci_for_each_dma_alias(struct pci_dev *pdev,
+			   int (*fn)(struct pci_dev *pdev,
+				     u16 alias, void *data), void *data);
+
 /**
  * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
  * @pdev: the PCI device
-- 
cgit 


From d660e92a97aac08aa33cd41e00a325066e00f1ef Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 27 May 2014 17:37:29 +0530
Subject: regulators: Add definition of regulator_set_voltage_time() for
 !CONFIG_REGULATOR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already have dummy implementation for most of the regulators APIs for
!CONFIG_REGULATOR case and were missing it for regulator_set_voltage_time().

Found this issue while compiling cpufreq-cpu0 driver without regulators support
in kernel.

drivers/cpufreq/cpufreq-cpu0.c: In function ‘cpu0_cpufreq_probe’:
drivers/cpufreq/cpufreq-cpu0.c:186:3: error: implicit declaration of function ‘regulator_set_voltage_time’ [-Werror=implicit-function-declaration]

Fix this by adding dummy definition for regulator_set_voltage_time().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/consumer.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index e530681bea70..94719e8dce04 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -397,6 +397,12 @@ static inline int regulator_set_voltage(struct regulator *regulator,
 	return 0;
 }
 
+static inline int regulator_set_voltage_time(struct regulator *regulator,
+					     int old_uV, int new_uV)
+{
+	return 0;
+}
+
 static inline int regulator_get_voltage(struct regulator *regulator)
 {
 	return -EINVAL;
-- 
cgit 


From 357d596ea7bea5abf1479cc72ae5888c738717dd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 28 May 2014 11:35:41 -0700
Subject: Revert "usb: gadget: net2280: Add support for PLX USB338X"

This reverts commit c4128cac3557ddd5fa972cb6511c426cd94a7ccd.

This should come through Felipe's tree first, and there was a bunch of
other patches that are needed after this one as well that I didn't have.

Cc: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/Kconfig   |   10 +-
 drivers/usb/gadget/net2280.c | 1115 ++++--------------------------------------
 drivers/usb/gadget/net2280.h |   97 +---
 include/linux/usb/usb338x.h  |  199 --------
 4 files changed, 91 insertions(+), 1330 deletions(-)
 delete mode 100644 include/linux/usb/usb338x.h

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 49e434ec527d..ba18e9c110cc 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -409,7 +409,7 @@ config USB_NET2272_DMA
 	  If unsure, say "N" here.  The driver works fine in PIO mode.
 
 config USB_NET2280
-	tristate "NetChip 228x / PLX USB338x"
+	tristate "NetChip 228x"
 	depends on PCI
 	help
 	   NetChip 2280 / 2282 is a PCI based USB peripheral controller which
@@ -419,14 +419,6 @@ config USB_NET2280
 	   (for control transfers) and several endpoints with dedicated
 	   functions.
 
-	   PLX 3380 / 3382 is a PCIe based USB peripheral controller which
-	   supports full, high speed USB 2.0 and super speed USB 3.0
-	   data transfers.
-
-	   It has eight configurable endpoints, as well as endpoint zero
-	   (for control transfers) and several endpoints with dedicated
-	   functions.
-
 	   Say "y" to link the driver statically, or "m" to build a
 	   dynamically linked module called "net2280" and force all
 	   gadget drivers to also be dynamically linked.
diff --git a/drivers/usb/gadget/net2280.c b/drivers/usb/gadget/net2280.c
index 87789c9bf7fe..300b3a71383b 100644
--- a/drivers/usb/gadget/net2280.c
+++ b/drivers/usb/gadget/net2280.c
@@ -18,9 +18,6 @@
  * hint to completely eliminate some IRQs, if a later IRQ is guaranteed
  * and DMA chaining is enabled.
  *
- * MSI is enabled by default.  The legacy IRQ is used if MSI couldn't
- * be enabled.
- *
  * Note that almost all the errata workarounds here are only needed for
  * rev1 chips.  Rev1a silicon (0110) fixes almost all of them.
  */
@@ -28,14 +25,10 @@
 /*
  * Copyright (C) 2003 David Brownell
  * Copyright (C) 2003-2005 PLX Technology, Inc.
- * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
  *
  * Modified Seth Levy 2005 PLX Technology, Inc. to provide compatibility
  *	with 2282 chip
  *
- * Modified Ricardo Ribalda Qtechnology AS  to provide compatibility
- *	with usb 338x chip. Based on PLX driver
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -68,8 +61,9 @@
 #include <asm/irq.h>
 #include <asm/unaligned.h>
 
-#define	DRIVER_DESC		"PLX NET228x/USB338x USB Peripheral Controller"
-#define	DRIVER_VERSION		"2005 Sept 27/v3.0"
+
+#define	DRIVER_DESC		"PLX NET228x USB Peripheral Controller"
+#define	DRIVER_VERSION		"2005 Sept 27"
 
 #define	EP_DONTUSE		13	/* nonzero */
 
@@ -79,12 +73,11 @@
 static const char driver_name [] = "net2280";
 static const char driver_desc [] = DRIVER_DESC;
 
-static const u32 ep_bit[9] = { 0, 17, 2, 19, 4, 1, 18, 3, 20 };
 static const char ep0name [] = "ep0";
 static const char *const ep_name [] = {
 	ep0name,
 	"ep-a", "ep-b", "ep-c", "ep-d",
-	"ep-e", "ep-f", "ep-g", "ep-h",
+	"ep-e", "ep-f",
 };
 
 /* use_dma -- general goodness, fewer interrupts, less cpu load (vs PIO)
@@ -97,12 +90,11 @@ static const char *const ep_name [] = {
  */
 static bool use_dma = 1;
 static bool use_dma_chaining = 0;
-static bool use_msi = 1;
 
 /* "modprobe net2280 use_dma=n" etc */
 module_param (use_dma, bool, S_IRUGO);
 module_param (use_dma_chaining, bool, S_IRUGO);
-module_param(use_msi, bool, S_IRUGO);
+
 
 /* mode 0 == ep-{a,b,c,d} 1K fifo each
  * mode 1 == ep-{a,b} 2K fifo each, ep-{c,d} unavailable
@@ -156,9 +148,6 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	struct net2280_ep	*ep;
 	u32			max, tmp;
 	unsigned long		flags;
-	static const u32 ep_key[9] = { 1, 0, 1, 0, 1, 1, 0, 1, 0 };
-	static const u32 ep_enhanced[9] = { 0x10, 0x60, 0x30, 0x80,
-					  0x50, 0x20, 0x70, 0x40, 0x90 };
 
 	ep = container_of (_ep, struct net2280_ep, ep);
 	if (!_ep || !desc || ep->desc || _ep->name == ep0name
@@ -172,20 +161,11 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	if ((desc->bEndpointAddress & 0x0f) == EP_DONTUSE)
 		return -EDOM;
 
-	if (dev->pdev->vendor == 0x10b5) {
-		if ((desc->bEndpointAddress & 0x0f) >= 0x0c)
-			return -EDOM;
-		ep->is_in = !!usb_endpoint_dir_in(desc);
-		if (dev->enhanced_mode && ep->is_in && ep_key[ep->num])
-			return -EINVAL;
-	}
-
 	/* sanity check ep-e/ep-f since their fifos are small */
 	max = usb_endpoint_maxp (desc) & 0x1fff;
-	if (ep->num > 4 && max > 64 && (dev->pdev->vendor == 0x17cc))
+	if (ep->num > 4 && max > 64)
 		return -ERANGE;
 
-
 	spin_lock_irqsave (&dev->lock, flags);
 	_ep->maxpacket = max & 0x7ff;
 	ep->desc = desc;
@@ -196,8 +176,7 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 	ep->out_overflow = 0;
 
 	/* set speed-dependent max packet; may kick in high bandwidth */
-	set_idx_reg(dev->regs, (dev->enhanced_mode) ? ep_enhanced[ep->num]
-					: REG_EP_MAXPKT(dev, ep->num), max);
+	set_idx_reg (dev->regs, REG_EP_MAXPKT (dev, ep->num), max);
 
 	/* FIFO lines can't go to different packets.  PIO is ok, so
 	 * use it instead of troublesome (non-bulk) multi-packet DMA.
@@ -220,43 +199,23 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 				&ep->regs->ep_rsp);
 	} else if (tmp == USB_ENDPOINT_XFER_BULK) {
 		/* catch some particularly blatant driver bugs */
-		if ((dev->gadget.speed == USB_SPEED_SUPER && max != 1024) ||
-		    (dev->gadget.speed == USB_SPEED_HIGH && max != 512) ||
-		    (dev->gadget.speed == USB_SPEED_FULL && max > 64)) {
-			spin_unlock_irqrestore(&dev->lock, flags);
+		if ((dev->gadget.speed == USB_SPEED_HIGH
+					&& max != 512)
+				|| (dev->gadget.speed == USB_SPEED_FULL
+					&& max > 64)) {
+			spin_unlock_irqrestore (&dev->lock, flags);
 			return -ERANGE;
 		}
 	}
 	ep->is_iso = (tmp == USB_ENDPOINT_XFER_ISOC) ? 1 : 0;
-	/* Enable this endpoint */
-	if (dev->pdev->vendor == 0x17cc) {
-		tmp <<= ENDPOINT_TYPE;
-		tmp |= desc->bEndpointAddress;
-		/* default full fifo lines */
-		tmp |= (4 << ENDPOINT_BYTE_COUNT);
-		tmp |= 1 << ENDPOINT_ENABLE;
-		ep->is_in = (tmp & USB_DIR_IN) != 0;
-	} else {
-		/* In Legacy mode, only OUT endpoints are used */
-		if (dev->enhanced_mode && ep->is_in) {
-			tmp <<= IN_ENDPOINT_TYPE;
-			tmp |= (1 << IN_ENDPOINT_ENABLE);
-			/* Not applicable to Legacy */
-			tmp |= (1 << ENDPOINT_DIRECTION);
-		} else {
-			tmp <<= OUT_ENDPOINT_TYPE;
-			tmp |= (1 << OUT_ENDPOINT_ENABLE);
-			tmp |= (ep->is_in << ENDPOINT_DIRECTION);
-		}
-
-		tmp |= usb_endpoint_num(desc);
-		tmp |= (ep->ep.maxburst << MAX_BURST_SIZE);
-	}
-
-	/* Make sure all the registers are written before ep_rsp*/
-	wmb();
+	tmp <<= ENDPOINT_TYPE;
+	tmp |= desc->bEndpointAddress;
+	tmp |= (4 << ENDPOINT_BYTE_COUNT);	/* default full fifo lines */
+	tmp |= 1 << ENDPOINT_ENABLE;
+	wmb ();
 
 	/* for OUT transfers, block the rx fifo until a read is posted */
+	ep->is_in = (tmp & USB_DIR_IN) != 0;
 	if (!ep->is_in)
 		writel ((1 << SET_NAK_OUT_PACKETS), &ep->regs->ep_rsp);
 	else if (dev->pdev->device != 0x2280) {
@@ -267,13 +226,11 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			| (1 << CLEAR_NAK_OUT_PACKETS_MODE), &ep->regs->ep_rsp);
 	}
 
-	writel(tmp, &ep->cfg->ep_cfg);
+	writel (tmp, &ep->regs->ep_cfg);
 
 	/* enable irqs */
 	if (!ep->dma) {				/* pio, per-packet */
-		tmp = (dev->pdev->vendor == 0x17cc)?(1 << ep->num)
-						   : (1 << ep_bit[ep->num]);
-		tmp |= readl(&dev->regs->pciirqenb0);
+		tmp = (1 << ep->num) | readl (&dev->regs->pciirqenb0);
 		writel (tmp, &dev->regs->pciirqenb0);
 
 		tmp = (1 << DATA_PACKET_RECEIVED_INTERRUPT_ENABLE)
@@ -294,10 +251,8 @@ net2280_enable (struct usb_ep *_ep, const struct usb_endpoint_descriptor *desc)
 			tmp = (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT_ENABLE);
 			writel (tmp, &ep->regs->ep_irqenb);
 
-			tmp = (dev->pdev->vendor == 0x17cc)?(1 << ep->num)
-						: (1 << ep_bit[ep->num]);
-			tmp |= readl(&dev->regs->pciirqenb0);
-			writel(tmp, &dev->regs->pciirqenb0);
+			tmp = (1 << ep->num) | readl (&dev->regs->pciirqenb0);
+			writel (tmp, &dev->regs->pciirqenb0);
 		}
 	}
 
@@ -331,8 +286,7 @@ static int handshake (u32 __iomem *ptr, u32 mask, u32 done, int usec)
 
 static const struct usb_ep_ops net2280_ep_ops;
 
-static void ep_reset_228x(struct net2280_regs __iomem *regs,
-			  struct net2280_ep *ep)
+static void ep_reset (struct net2280_regs __iomem *regs, struct net2280_ep *ep)
 {
 	u32		tmp;
 
@@ -407,55 +361,6 @@ static void ep_reset_228x(struct net2280_regs __iomem *regs,
 	/* fifo size is handled separately */
 }
 
-static void ep_reset_338x(struct net2280_regs __iomem *regs,
-					struct net2280_ep *ep)
-{
-	u32 tmp, dmastat;
-
-	ep->desc = NULL;
-	INIT_LIST_HEAD(&ep->queue);
-
-	usb_ep_set_maxpacket_limit(&ep->ep, ~0);
-	ep->ep.ops = &net2280_ep_ops;
-
-	/* disable the dma, irqs, endpoint... */
-	if (ep->dma) {
-		writel(0, &ep->dma->dmactl);
-		writel((1 << DMA_ABORT_DONE_INTERRUPT) |
-		       (1 << DMA_PAUSE_DONE_INTERRUPT) |
-		       (1 << DMA_SCATTER_GATHER_DONE_INTERRUPT) |
-		       (1 << DMA_TRANSACTION_DONE_INTERRUPT)
-		       /* | (1 << DMA_ABORT) */
-		       , &ep->dma->dmastat);
-
-		dmastat = readl(&ep->dma->dmastat);
-		if (dmastat == 0x5002) {
-			WARNING(ep->dev, "The dmastat return = %x!!\n",
-			       dmastat);
-			writel(0x5a, &ep->dma->dmastat);
-		}
-
-		tmp = readl(&regs->pciirqenb0);
-		tmp &= ~(1 << ep_bit[ep->num]);
-		writel(tmp, &regs->pciirqenb0);
-	} else {
-		if (ep->num < 5) {
-			tmp = readl(&regs->pciirqenb1);
-			tmp &= ~(1 << (8 + ep->num));	/* completion */
-			writel(tmp, &regs->pciirqenb1);
-		}
-	}
-	writel(0, &ep->regs->ep_irqenb);
-
-	writel((1 << SHORT_PACKET_OUT_DONE_INTERRUPT) |
-	       (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT) |
-	       (1 << FIFO_OVERFLOW) |
-	       (1 << DATA_PACKET_RECEIVED_INTERRUPT) |
-	       (1 << DATA_PACKET_TRANSMITTED_INTERRUPT) |
-	       (1 << DATA_OUT_PING_TOKEN_INTERRUPT) |
-	       (1 << DATA_IN_TOKEN_INTERRUPT), &ep->regs->ep_stat);
-}
-
 static void nuke (struct net2280_ep *);
 
 static int net2280_disable (struct usb_ep *_ep)
@@ -469,17 +374,13 @@ static int net2280_disable (struct usb_ep *_ep)
 
 	spin_lock_irqsave (&ep->dev->lock, flags);
 	nuke (ep);
-
-	if (ep->dev->pdev->vendor == 0x10b5)
-		ep_reset_338x(ep->dev->regs, ep);
-	else
-		ep_reset_228x(ep->dev->regs, ep);
+	ep_reset (ep->dev->regs, ep);
 
 	VDEBUG (ep->dev, "disabled %s %s\n",
 			ep->dma ? "dma" : "pio", _ep->name);
 
 	/* synch memory views with the device */
-	(void)readl(&ep->cfg->ep_cfg);
+	(void) readl (&ep->regs->ep_cfg);
 
 	if (use_dma && !ep->dma && ep->num >= 1 && ep->num <= 4)
 		ep->dma = &ep->dev->dma [ep->num - 1];
@@ -797,8 +698,6 @@ static void start_queue (struct net2280_ep *ep, u32 dmactl, u32 td_dma)
 	writel (readl (&dma->dmastat), &dma->dmastat);
 
 	writel (td_dma, &dma->dmadesc);
-	if (ep->dev->pdev->vendor == 0x10b5)
-		dmactl |= (0x01 << DMA_REQUEST_OUTSTANDING);
 	writel (dmactl, &dma->dmactl);
 
 	/* erratum 0116 workaround part 3:  pci arbiter away from net2280 */
@@ -873,21 +772,6 @@ static void start_dma (struct net2280_ep *ep, struct net2280_request *req)
 	start_queue (ep, tmp, req->td_dma);
 }
 
-static inline void resume_dma(struct net2280_ep *ep)
-{
-	writel(readl(&ep->dma->dmactl) | (1 << DMA_ENABLE), &ep->dma->dmactl);
-
-	ep->dma_started = true;
-}
-
-static inline void ep_stop_dma(struct net2280_ep *ep)
-{
-	writel(readl(&ep->dma->dmactl) & ~(1 << DMA_ENABLE), &ep->dma->dmactl);
-	spin_stop_dma(ep->dma);
-
-	ep->dma_started = false;
-}
-
 static inline void
 queue_dma (struct net2280_ep *ep, struct net2280_request *req, int valid)
 {
@@ -990,23 +874,8 @@ net2280_queue (struct usb_ep *_ep, struct usb_request *_req, gfp_t gfp_flags)
 
 	/* kickstart this i/o queue? */
 	if (list_empty (&ep->queue) && !ep->stopped) {
-		/* DMA request while EP halted */
-		if (ep->dma &&
-		    (readl(&ep->regs->ep_rsp) & (1 << CLEAR_ENDPOINT_HALT)) &&
-			(dev->pdev->vendor == 0x10b5)) {
-			int valid = 1;
-			if (ep->is_in) {
-				int expect;
-				expect = likely(req->req.zero ||
-						((req->req.length %
-						  ep->ep.maxpacket) != 0));
-				if (expect != ep->in_fifo_validate)
-					valid = 0;
-			}
-			queue_dma(ep, req, valid);
-		}
 		/* use DMA if the endpoint supports it, else pio */
-		else if (ep->dma)
+		if (ep->dma)
 			start_dma (ep, req);
 		else {
 			/* maybe there's no control data, just status ack */
@@ -1124,8 +993,6 @@ static void scan_dma_completions (struct net2280_ep *ep)
 		} else if (!ep->is_in
 				&& (req->req.length % ep->ep.maxpacket) != 0) {
 			tmp = readl (&ep->regs->ep_stat);
-			if (ep->dev->pdev->vendor == 0x10b5)
-				return dma_done(ep, req, tmp, 0);
 
 			/* AVOID TROUBLE HERE by not issuing short reads from
 			 * your gadget driver.  That helps avoids errata 0121,
@@ -1212,7 +1079,7 @@ static void restart_dma (struct net2280_ep *ep)
 	start_queue (ep, dmactl, req->td_dma);
 }
 
-static void abort_dma_228x(struct net2280_ep *ep)
+static void abort_dma (struct net2280_ep *ep)
 {
 	/* abort the current transfer */
 	if (likely (!list_empty (&ep->queue))) {
@@ -1224,19 +1091,6 @@ static void abort_dma_228x(struct net2280_ep *ep)
 	scan_dma_completions (ep);
 }
 
-static void abort_dma_338x(struct net2280_ep *ep)
-{
-	writel((1 << DMA_ABORT), &ep->dma->dmastat);
-	spin_stop_dma(ep->dma);
-}
-
-static void abort_dma(struct net2280_ep *ep)
-{
-	if (ep->dev->pdev->vendor == 0x17cc)
-		return abort_dma_228x(ep);
-	return abort_dma_338x(ep);
-}
-
 /* dequeue ALL requests */
 static void nuke (struct net2280_ep *ep)
 {
@@ -1390,9 +1244,6 @@ net2280_set_halt_and_wedge(struct usb_ep *_ep, int value, int wedged)
 				ep->wedged = 1;
 		} else {
 			clear_halt (ep);
-			if (ep->dev->pdev->vendor == 0x10b5 &&
-				!list_empty(&ep->queue) && ep->td_dma)
-					restart_dma(ep);
 			ep->wedged = 0;
 		}
 		(void) readl (&ep->regs->ep_rsp);
@@ -1516,13 +1367,10 @@ static int net2280_set_selfpowered (struct usb_gadget *_gadget, int value)
 
 	spin_lock_irqsave (&dev->lock, flags);
 	tmp = readl (&dev->usb->usbctl);
-	if (value) {
+	if (value)
 		tmp |= (1 << SELF_POWERED_STATUS);
-		dev->selfpowered = 1;
-	} else {
+	else
 		tmp &= ~(1 << SELF_POWERED_STATUS);
-		dev->selfpowered = 0;
-	}
 	writel (tmp, &dev->usb->usbctl);
 	spin_unlock_irqrestore (&dev->lock, flags);
 
@@ -1656,14 +1504,14 @@ static ssize_t registers_show(struct device *_dev,
 	/* DMA Control Registers */
 
 	/* Configurable EP Control Registers */
-	for (i = 0; i < dev->n_ep; i++) {
+	for (i = 0; i < 7; i++) {
 		struct net2280_ep	*ep;
 
 		ep = &dev->ep [i];
 		if (i && !ep->desc)
 			continue;
 
-		t1 = readl(&ep->cfg->ep_cfg);
+		t1 = readl (&ep->regs->ep_cfg);
 		t2 = readl (&ep->regs->ep_rsp) & 0xff;
 		t = scnprintf (next, size,
 				"\n%s\tcfg %05x rsp (%02x) %s%s%s%s%s%s%s%s"
@@ -1723,7 +1571,7 @@ static ssize_t registers_show(struct device *_dev,
 	t = scnprintf (next, size, "\nirqs:  ");
 	size -= t;
 	next += t;
-	for (i = 0; i < dev->n_ep; i++) {
+	for (i = 0; i < 7; i++) {
 		struct net2280_ep	*ep;
 
 		ep = &dev->ep [i];
@@ -1758,7 +1606,7 @@ static ssize_t queues_show(struct device *_dev, struct device_attribute *attr,
 	size = PAGE_SIZE;
 	spin_lock_irqsave (&dev->lock, flags);
 
-	for (i = 0; i < dev->n_ep; i++) {
+	for (i = 0; i < 7; i++) {
 		struct net2280_ep		*ep = &dev->ep [i];
 		struct net2280_request		*req;
 		int				t;
@@ -1887,121 +1735,6 @@ static void set_fifo_mode (struct net2280 *dev, int mode)
 	list_add_tail (&dev->ep [6].ep.ep_list, &dev->gadget.ep_list);
 }
 
-static void defect7374_disable_data_eps(struct net2280 *dev)
-{
-	/*
-	 * For Defect 7374, disable data EPs (and more):
-	 *  - This phase undoes the earlier phase of the Defect 7374 workaround,
-	 *    returing ep regs back to normal.
-	 */
-	struct net2280_ep *ep;
-	int i;
-	unsigned char ep_sel;
-	u32 tmp_reg;
-
-	for (i = 1; i < 5; i++) {
-		ep = &dev->ep[i];
-		writel(0, &ep->cfg->ep_cfg);
-	}
-
-	/* CSROUT, CSRIN, PCIOUT, PCIIN, STATIN, RCIN */
-	for (i = 0; i < 6; i++)
-		writel(0, &dev->dep[i].dep_cfg);
-
-	for (ep_sel = 0; ep_sel <= 21; ep_sel++) {
-		/* Select an endpoint for subsequent operations: */
-		tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
-		writel(((tmp_reg & ~0x1f) | ep_sel), &dev->plregs->pl_ep_ctrl);
-
-		if (ep_sel < 2 || (ep_sel > 9 && ep_sel < 14) ||
-					ep_sel == 18 || ep_sel == 20)
-			continue;
-
-		/* Change settings on some selected endpoints */
-		tmp_reg = readl(&dev->plregs->pl_ep_cfg_4);
-		tmp_reg &= ~(1 << NON_CTRL_IN_TOLERATE_BAD_DIR);
-		writel(tmp_reg, &dev->plregs->pl_ep_cfg_4);
-		tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
-		tmp_reg |= (1 << EP_INITIALIZED);
-		writel(tmp_reg, &dev->plregs->pl_ep_ctrl);
-	}
-}
-
-static void defect7374_enable_data_eps_zero(struct net2280 *dev)
-{
-	u32 tmp = 0, tmp_reg;
-	u32 fsmvalue, scratch;
-	int i;
-	unsigned char ep_sel;
-
-	scratch = get_idx_reg(dev->regs, SCRATCH);
-	fsmvalue = scratch & (0xf << DEFECT7374_FSM_FIELD);
-	scratch &= ~(0xf << DEFECT7374_FSM_FIELD);
-
-	/*See if firmware needs to set up for workaround*/
-	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ) {
-		WARNING(dev, "Operate Defect 7374 workaround soft this time");
-		WARNING(dev, "It will operate on cold-reboot and SS connect");
-
-		/*GPEPs:*/
-		tmp = ((0 << ENDPOINT_NUMBER) | (1 << ENDPOINT_DIRECTION) |
-		       (2 << OUT_ENDPOINT_TYPE) | (2 << IN_ENDPOINT_TYPE) |
-		       ((dev->enhanced_mode) ?
-			1 << OUT_ENDPOINT_ENABLE : 1 << ENDPOINT_ENABLE) |
-		       (1 << IN_ENDPOINT_ENABLE));
-
-		for (i = 1; i < 5; i++)
-			writel(tmp, &dev->ep[i].cfg->ep_cfg);
-
-		/* CSRIN, PCIIN, STATIN, RCIN*/
-		tmp = ((0 << ENDPOINT_NUMBER) | (1 << ENDPOINT_ENABLE));
-		writel(tmp, &dev->dep[1].dep_cfg);
-		writel(tmp, &dev->dep[3].dep_cfg);
-		writel(tmp, &dev->dep[4].dep_cfg);
-		writel(tmp, &dev->dep[5].dep_cfg);
-
-		/*Implemented for development and debug.
-		 * Can be refined/tuned later.*/
-		for (ep_sel = 0; ep_sel <= 21; ep_sel++) {
-			/* Select an endpoint for subsequent operations: */
-			tmp_reg = readl(&dev->plregs->pl_ep_ctrl);
-			writel(((tmp_reg & ~0x1f) | ep_sel),
-			       &dev->plregs->pl_ep_ctrl);
-
-			if (ep_sel == 1) {
-				tmp =
-				    (readl(&dev->plregs->pl_ep_ctrl) |
-				     (1 << CLEAR_ACK_ERROR_CODE) | 0);
-				writel(tmp, &dev->plregs->pl_ep_ctrl);
-				continue;
-			}
-
-			if (ep_sel == 0 || (ep_sel > 9 && ep_sel < 14) ||
-					ep_sel == 18  || ep_sel == 20)
-				continue;
-
-			tmp = (readl(&dev->plregs->pl_ep_cfg_4) |
-				 (1 << NON_CTRL_IN_TOLERATE_BAD_DIR) | 0);
-			writel(tmp, &dev->plregs->pl_ep_cfg_4);
-
-			tmp = readl(&dev->plregs->pl_ep_ctrl) &
-				~(1 << EP_INITIALIZED);
-			writel(tmp, &dev->plregs->pl_ep_ctrl);
-
-		}
-
-		/* Set FSM to focus on the first Control Read:
-		 * - Tip: Connection speed is known upon the first
-		 * setup request.*/
-		scratch |= DEFECT7374_FSM_WAITING_FOR_CONTROL_READ;
-		set_idx_reg(dev->regs, SCRATCH, scratch);
-
-	} else{
-		WARNING(dev, "Defect 7374 workaround soft will NOT operate");
-		WARNING(dev, "It will operate on cold-reboot and SS connect");
-	}
-}
-
 /* keeping it simple:
  * - one bus driver, initted first;
  * - one function driver, initted second
@@ -2011,7 +1744,7 @@ static void defect7374_enable_data_eps_zero(struct net2280 *dev)
  * perhaps to bind specific drivers to specific devices.
  */
 
-static void usb_reset_228x(struct net2280 *dev)
+static void usb_reset (struct net2280 *dev)
 {
 	u32	tmp;
 
@@ -2027,11 +1760,11 @@ static void usb_reset_228x(struct net2280 *dev)
 
 	/* clear old dma and irq state */
 	for (tmp = 0; tmp < 4; tmp++) {
-		struct net2280_ep       *ep = &dev->ep[tmp + 1];
+		struct net2280_ep	*ep = &dev->ep [tmp + 1];
+
 		if (ep->dma)
-			abort_dma(ep);
+			abort_dma (ep);
 	}
-
 	writel (~0, &dev->regs->irqstat0),
 	writel (~(1 << SUSPEND_REQUEST_INTERRUPT), &dev->regs->irqstat1),
 
@@ -2047,67 +1780,7 @@ static void usb_reset_228x(struct net2280 *dev)
 	set_fifo_mode (dev, (fifo_mode <= 2) ? fifo_mode : 0);
 }
 
-static void usb_reset_338x(struct net2280 *dev)
-{
-	u32 tmp;
-	u32 fsmvalue;
-
-	dev->gadget.speed = USB_SPEED_UNKNOWN;
-	(void)readl(&dev->usb->usbctl);
-
-	net2280_led_init(dev);
-
-	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
-			(0xf << DEFECT7374_FSM_FIELD);
-
-	/* See if firmware needs to set up for workaround: */
-	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ) {
-		INFO(dev, "%s: Defect 7374 FsmValue 0x%08X\n", __func__,
-		     fsmvalue);
-	} else {
-		/* disable automatic responses, and irqs */
-		writel(0, &dev->usb->stdrsp);
-		writel(0, &dev->regs->pciirqenb0);
-		writel(0, &dev->regs->pciirqenb1);
-	}
-
-	/* clear old dma and irq state */
-	for (tmp = 0; tmp < 4; tmp++) {
-		struct net2280_ep *ep = &dev->ep[tmp + 1];
-
-		if (ep->dma)
-			abort_dma(ep);
-	}
-
-	writel(~0, &dev->regs->irqstat0), writel(~0, &dev->regs->irqstat1);
-
-	if (fsmvalue == DEFECT7374_FSM_SS_CONTROL_READ) {
-		/* reset, and enable pci */
-		tmp = readl(&dev->regs->devinit) |
-		    (1 << PCI_ENABLE) |
-		    (1 << FIFO_SOFT_RESET) |
-		    (1 << USB_SOFT_RESET) |
-		    (1 << M8051_RESET);
-
-		writel(tmp, &dev->regs->devinit);
-	}
-
-	/* always ep-{1,2,3,4} ... maybe not ep-3 or ep-4 */
-	INIT_LIST_HEAD(&dev->gadget.ep_list);
-
-	for (tmp = 1; tmp < dev->n_ep; tmp++)
-		list_add_tail(&dev->ep[tmp].ep.ep_list, &dev->gadget.ep_list);
-
-}
-
-static void usb_reset(struct net2280 *dev)
-{
-	if (dev->pdev->vendor == 0x17cc)
-		return usb_reset_228x(dev);
-	return usb_reset_338x(dev);
-}
-
-static void usb_reinit_228x(struct net2280 *dev)
+static void usb_reinit (struct net2280 *dev)
 {
 	u32	tmp;
 	int	init_dma;
@@ -2130,8 +1803,7 @@ static void usb_reinit_228x(struct net2280 *dev)
 		} else
 			ep->fifo_size = 64;
 		ep->regs = &dev->epregs [tmp];
-		ep->cfg = &dev->epregs[tmp];
-		ep_reset_228x(dev->regs, ep);
+		ep_reset (dev->regs, ep);
 	}
 	usb_ep_set_maxpacket_limit(&dev->ep [0].ep, 64);
 	usb_ep_set_maxpacket_limit(&dev->ep [5].ep, 64);
@@ -2148,122 +1820,7 @@ static void usb_reinit_228x(struct net2280 *dev)
 		writel (EP_DONTUSE, &dev->dep [tmp].dep_cfg);
 }
 
-static void usb_reinit_338x(struct net2280 *dev)
-{
-	int init_dma;
-	int i;
-	u32 tmp, val;
-	u32 fsmvalue;
-	static const u32 ne[9] = { 0, 1, 2, 3, 4, 1, 2, 3, 4 };
-	static const u32 ep_reg_addr[9] = { 0x00, 0xC0, 0x00, 0xC0, 0x00,
-						0x00, 0xC0, 0x00, 0xC0 };
-
-	/* use_dma changes are ignored till next device re-init */
-	init_dma = use_dma;
-
-	/* basic endpoint init */
-	for (i = 0; i < dev->n_ep; i++) {
-		struct net2280_ep *ep = &dev->ep[i];
-
-		ep->ep.name = ep_name[i];
-		ep->dev = dev;
-		ep->num = i;
-
-		if (i > 0 && i <= 4 && init_dma)
-			ep->dma = &dev->dma[i - 1];
-
-		if (dev->enhanced_mode) {
-			ep->cfg = &dev->epregs[ne[i]];
-			ep->regs = (struct net2280_ep_regs __iomem *)
-				(((void *)&dev->epregs[ne[i]]) +
-				ep_reg_addr[i]);
-			ep->fiforegs = &dev->fiforegs[i];
-		} else {
-			ep->cfg = &dev->epregs[i];
-			ep->regs = &dev->epregs[i];
-			ep->fiforegs = &dev->fiforegs[i];
-		}
-
-		ep->fifo_size = (i != 0) ? 2048 : 512;
-
-		ep_reset_338x(dev->regs, ep);
-	}
-	usb_ep_set_maxpacket_limit(&dev->ep[0].ep, 512);
-
-	dev->gadget.ep0 = &dev->ep[0].ep;
-	dev->ep[0].stopped = 0;
-
-	/* Link layer set up */
-	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
-				(0xf << DEFECT7374_FSM_FIELD);
-
-	/* See if driver needs to set up for workaround: */
-	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ)
-		INFO(dev, "%s: Defect 7374 FsmValue %08x\n",
-						__func__, fsmvalue);
-	else {
-		tmp = readl(&dev->usb_ext->usbctl2) &
-		    ~((1 << U1_ENABLE) | (1 << U2_ENABLE) | (1 << LTM_ENABLE));
-		writel(tmp, &dev->usb_ext->usbctl2);
-	}
-
-	/* Hardware Defect and Workaround */
-	val = readl(&dev->ll_lfps_regs->ll_lfps_5);
-	val &= ~(0xf << TIMER_LFPS_6US);
-	val |= 0x5 << TIMER_LFPS_6US;
-	writel(val, &dev->ll_lfps_regs->ll_lfps_5);
-
-	val = readl(&dev->ll_lfps_regs->ll_lfps_6);
-	val &= ~(0xffff << TIMER_LFPS_80US);
-	val |= 0x0100 << TIMER_LFPS_80US;
-	writel(val, &dev->ll_lfps_regs->ll_lfps_6);
-
-	/*
-	 * AA_AB Errata. Issue 4. Workaround for SuperSpeed USB
-	 * Hot Reset Exit Handshake may Fail in Specific Case using
-	 * Default Register Settings. Workaround for Enumeration test.
-	 */
-	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_2);
-	val &= ~(0x1f << HOT_TX_NORESET_TS2);
-	val |= 0x10 << HOT_TX_NORESET_TS2;
-	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_2);
-
-	val = readl(&dev->ll_tsn_regs->ll_tsn_counters_3);
-	val &= ~(0x1f << HOT_RX_RESET_TS2);
-	val |= 0x3 << HOT_RX_RESET_TS2;
-	writel(val, &dev->ll_tsn_regs->ll_tsn_counters_3);
-
-	/*
-	 * Set Recovery Idle to Recover bit:
-	 * - On SS connections, setting Recovery Idle to Recover Fmw improves
-	 *   link robustness with various hosts and hubs.
-	 * - It is safe to set for all connection speeds; all chip revisions.
-	 * - R-M-W to leave other bits undisturbed.
-	 * - Reference PLX TT-7372
-	*/
-	val = readl(&dev->ll_chicken_reg->ll_tsn_chicken_bit);
-	val |= (1 << RECOVERY_IDLE_TO_RECOVER_FMW);
-	writel(val, &dev->ll_chicken_reg->ll_tsn_chicken_bit);
-
-	INIT_LIST_HEAD(&dev->gadget.ep0->ep_list);
-
-	/* disable dedicated endpoints */
-	writel(0x0D, &dev->dep[0].dep_cfg);
-	writel(0x0D, &dev->dep[1].dep_cfg);
-	writel(0x0E, &dev->dep[2].dep_cfg);
-	writel(0x0E, &dev->dep[3].dep_cfg);
-	writel(0x0F, &dev->dep[4].dep_cfg);
-	writel(0x0C, &dev->dep[5].dep_cfg);
-}
-
-static void usb_reinit(struct net2280 *dev)
-{
-	if (dev->pdev->vendor == 0x17cc)
-		return usb_reinit_228x(dev);
-	return usb_reinit_338x(dev);
-}
-
-static void ep0_start_228x(struct net2280 *dev)
+static void ep0_start (struct net2280 *dev)
 {
 	writel (  (1 << CLEAR_EP_HIDE_STATUS_PHASE)
 		| (1 << CLEAR_NAK_OUT_PACKETS)
@@ -2306,61 +1863,6 @@ static void ep0_start_228x(struct net2280 *dev)
 	(void) readl (&dev->usb->usbctl);
 }
 
-static void ep0_start_338x(struct net2280 *dev)
-{
-	u32 fsmvalue;
-
-	fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
-			(0xf << DEFECT7374_FSM_FIELD);
-
-	if (fsmvalue != DEFECT7374_FSM_SS_CONTROL_READ)
-		INFO(dev, "%s: Defect 7374 FsmValue %08x\n", __func__,
-		     fsmvalue);
-	else
-		writel((1 << CLEAR_NAK_OUT_PACKETS_MODE) |
-		       (1 << SET_EP_HIDE_STATUS_PHASE),
-		       &dev->epregs[0].ep_rsp);
-
-	/*
-	 * hardware optionally handles a bunch of standard requests
-	 * that the API hides from drivers anyway.  have it do so.
-	 * endpoint status/features are handled in software, to
-	 * help pass tests for some dubious behavior.
-	 */
-	writel((1 << SET_ISOCHRONOUS_DELAY) |
-	       (1 << SET_SEL) |
-	       (1 << SET_TEST_MODE) |
-	       (1 << SET_ADDRESS) |
-	       (1 << GET_INTERFACE_STATUS) |
-	       (1 << GET_DEVICE_STATUS),
-		&dev->usb->stdrsp);
-	dev->wakeup_enable = 1;
-	writel((1 << USB_ROOT_PORT_WAKEUP_ENABLE) |
-	       (dev->softconnect << USB_DETECT_ENABLE) |
-	       (1 << DEVICE_REMOTE_WAKEUP_ENABLE),
-	       &dev->usb->usbctl);
-
-	/* enable irqs so we can see ep0 and general operation  */
-	writel((1 << SETUP_PACKET_INTERRUPT_ENABLE) |
-	       (1 << ENDPOINT_0_INTERRUPT_ENABLE)
-	       , &dev->regs->pciirqenb0);
-	writel((1 << PCI_INTERRUPT_ENABLE) |
-	       (1 << ROOT_PORT_RESET_INTERRUPT_ENABLE) |
-	       (1 << SUSPEND_REQUEST_CHANGE_INTERRUPT_ENABLE) |
-	       (1 << VBUS_INTERRUPT_ENABLE),
-	       &dev->regs->pciirqenb1);
-
-	/* don't leave any writes posted */
-	(void)readl(&dev->usb->usbctl);
-}
-
-static void ep0_start(struct net2280 *dev)
-{
-	if (dev->pdev->vendor == 0x17cc)
-		return ep0_start_228x(dev);
-	return ep0_start_338x(dev);
-}
-
 /* when a driver is successfully registered, it will receive
  * control requests including set_configuration(), which enables
  * non-control requests.  then usb traffic follows until a
@@ -2384,7 +1886,7 @@ static int net2280_start(struct usb_gadget *_gadget,
 
 	dev = container_of (_gadget, struct net2280, gadget);
 
-	for (i = 0; i < dev->n_ep; i++)
+	for (i = 0; i < 7; i++)
 		dev->ep [i].irqs = 0;
 
 	/* hook up the driver ... */
@@ -2398,17 +1900,13 @@ static int net2280_start(struct usb_gadget *_gadget,
 	if (retval) goto err_func;
 
 	/* Enable force-full-speed testing mode, if desired */
-	if (full_speed && dev->pdev->vendor == 0x17cc)
+	if (full_speed)
 		writel(1 << FORCE_FULL_SPEED_MODE, &dev->usb->xcvrdiag);
 
 	/* ... then enable host detection and ep0; and we're ready
 	 * for set_configuration as well as eventual disconnect.
 	 */
 	net2280_led_active (dev, 1);
-
-	if (dev->pdev->vendor == 0x10b5)
-		defect7374_enable_data_eps_zero(dev);
-
 	ep0_start (dev);
 
 	DEBUG (dev, "%s ready, usbctl %08x stdrsp %08x\n",
@@ -2439,7 +1937,7 @@ stop_activity (struct net2280 *dev, struct usb_gadget_driver *driver)
 	 * and kill any outstanding requests.
 	 */
 	usb_reset (dev);
-	for (i = 0; i < dev->n_ep; i++)
+	for (i = 0; i < 7; i++)
 		nuke (&dev->ep [i]);
 
 	/* report disconnect; the driver is already quiesced */
@@ -2469,8 +1967,7 @@ static int net2280_stop(struct usb_gadget *_gadget,
 	net2280_led_active (dev, 0);
 
 	/* Disable full-speed test mode */
-	if (dev->pdev->vendor == 0x17cc)
-		writel(0, &dev->usb->xcvrdiag);
+	writel(0, &dev->usb->xcvrdiag);
 
 	device_remove_file (&dev->pdev->dev, &dev_attr_function);
 	device_remove_file (&dev->pdev->dev, &dev_attr_queues);
@@ -2722,350 +2219,6 @@ get_ep_by_addr (struct net2280 *dev, u16 wIndex)
 	return NULL;
 }
 
-static void defect7374_workaround(struct net2280 *dev, struct usb_ctrlrequest r)
-{
-	u32 scratch, fsmvalue;
-	u32 ack_wait_timeout, state;
-
-	/* Workaround for Defect 7374 (U1/U2 erroneously rejected): */
-	scratch = get_idx_reg(dev->regs, SCRATCH);
-	fsmvalue = scratch & (0xf << DEFECT7374_FSM_FIELD);
-	scratch &= ~(0xf << DEFECT7374_FSM_FIELD);
-
-	if (!((fsmvalue == DEFECT7374_FSM_WAITING_FOR_CONTROL_READ) &&
-				(r.bRequestType & USB_DIR_IN)))
-		return;
-
-	/* This is the first Control Read for this connection: */
-	if (!(readl(&dev->usb->usbstat) & (1 << SUPER_SPEED_MODE))) {
-		/*
-		 * Connection is NOT SS:
-		 * - Connection must be FS or HS.
-		 * - This FSM state should allow workaround software to
-		 * run after the next USB connection.
-		 */
-		scratch |= DEFECT7374_FSM_NON_SS_CONTROL_READ;
-		goto restore_data_eps;
-	}
-
-	/* Connection is SS: */
-	for (ack_wait_timeout = 0;
-			ack_wait_timeout < DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS;
-			ack_wait_timeout++) {
-
-		state =	readl(&dev->plregs->pl_ep_status_1)
-			& (0xff << STATE);
-		if ((state >= (ACK_GOOD_NORMAL << STATE)) &&
-			(state <= (ACK_GOOD_MORE_ACKS_TO_COME << STATE))) {
-			scratch |= DEFECT7374_FSM_SS_CONTROL_READ;
-			break;
-		}
-
-		/*
-		 * We have not yet received host's Data Phase ACK
-		 * - Wait and try again.
-		 */
-		udelay(DEFECT_7374_PROCESSOR_WAIT_TIME);
-
-		continue;
-	}
-
-
-	if (ack_wait_timeout >= DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS) {
-		ERROR(dev, "FAIL: Defect 7374 workaround waited but failed");
-		ERROR(dev, "to detect SS host's data phase ACK.");
-		ERROR(dev, "PL_EP_STATUS_1(23:16):.Expected from 0x11 to 0x16");
-		ERROR(dev, "got 0x%2.2x.\n", state >> STATE);
-	} else {
-		WARNING(dev, "INFO: Defect 7374 workaround waited about\n");
-		WARNING(dev, "%duSec for Control Read Data Phase ACK\n",
-			DEFECT_7374_PROCESSOR_WAIT_TIME * ack_wait_timeout);
-	}
-
-restore_data_eps:
-	/*
-	 * Restore data EPs to their pre-workaround settings (disabled,
-	 * initialized, and other details).
-	 */
-	defect7374_disable_data_eps(dev);
-
-	set_idx_reg(dev->regs, SCRATCH, scratch);
-
-	return;
-}
-
-static void ep_stall(struct net2280_ep *ep, int stall)
-{
-	struct net2280 *dev = ep->dev;
-	u32 val;
-	static const u32 ep_pl[9] = { 0, 3, 4, 7, 8, 2, 5, 6, 9 };
-
-	if (stall) {
-		writel((1 << SET_ENDPOINT_HALT) |
-		       /* (1 << SET_NAK_PACKETS) | */
-		       (1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE),
-		       &ep->regs->ep_rsp);
-		ep->is_halt = 1;
-	} else {
-		if (dev->gadget.speed == USB_SPEED_SUPER) {
-			/*
-			 * Workaround for SS SeqNum not cleared via
-			 * Endpoint Halt (Clear) bit. select endpoint
-			 */
-			val = readl(&dev->plregs->pl_ep_ctrl);
-			val = (val & ~0x1f) | ep_pl[ep->num];
-			writel(val, &dev->plregs->pl_ep_ctrl);
-
-			val |= (1 << SEQUENCE_NUMBER_RESET);
-			writel(val, &dev->plregs->pl_ep_ctrl);
-		}
-		val = readl(&ep->regs->ep_rsp);
-		val |= (1 << CLEAR_ENDPOINT_HALT) |
-			(1 << CLEAR_ENDPOINT_TOGGLE);
-		writel(val
-		       /* | (1 << CLEAR_NAK_PACKETS)*/
-		       , &ep->regs->ep_rsp);
-		ep->is_halt = 0;
-		val = readl(&ep->regs->ep_rsp);
-	}
-}
-
-static void ep_stdrsp(struct net2280_ep *ep, int value, int wedged)
-{
-	/* set/clear, then synch memory views with the device */
-	if (value) {
-		ep->stopped = 1;
-		if (ep->num == 0)
-			ep->dev->protocol_stall = 1;
-		else {
-			if (ep->dma)
-				ep_stop_dma(ep);
-			ep_stall(ep, true);
-		}
-
-		if (wedged)
-			ep->wedged = 1;
-	} else {
-		ep->stopped = 0;
-		ep->wedged = 0;
-
-		ep_stall(ep, false);
-
-		/* Flush the queue */
-		if (!list_empty(&ep->queue)) {
-			struct net2280_request *req =
-			    list_entry(ep->queue.next, struct net2280_request,
-				       queue);
-			if (ep->dma)
-				resume_dma(ep);
-			else {
-				if (ep->is_in)
-					write_fifo(ep, &req->req);
-				else {
-					if (read_fifo(ep, req))
-						done(ep, req, 0);
-				}
-			}
-		}
-	}
-}
-
-static void handle_stat0_irqs_superspeed(struct net2280 *dev,
-		struct net2280_ep *ep, struct usb_ctrlrequest r)
-{
-	int tmp = 0;
-
-#define	w_value		le16_to_cpu(r.wValue)
-#define	w_index		le16_to_cpu(r.wIndex)
-#define	w_length	le16_to_cpu(r.wLength)
-
-	switch (r.bRequest) {
-		struct net2280_ep *e;
-		u16 status;
-
-	case USB_REQ_SET_CONFIGURATION:
-		dev->addressed_state = !w_value;
-		goto usb3_delegate;
-
-	case USB_REQ_GET_STATUS:
-		switch (r.bRequestType) {
-		case (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
-			status = dev->wakeup_enable ? 0x02 : 0x00;
-			if (dev->selfpowered)
-				status |= 1 << 0;
-			status |= (dev->u1_enable << 2 | dev->u2_enable << 3 |
-							dev->ltm_enable << 4);
-			writel(0, &dev->epregs[0].ep_irqenb);
-			set_fifo_bytecount(ep, sizeof(status));
-			writel((__force u32) status, &dev->epregs[0].ep_data);
-			allow_status_338x(ep);
-			break;
-
-		case (USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
-			e = get_ep_by_addr(dev, w_index);
-			if (!e)
-				goto do_stall3;
-			status = readl(&e->regs->ep_rsp) &
-						(1 << CLEAR_ENDPOINT_HALT);
-			writel(0, &dev->epregs[0].ep_irqenb);
-			set_fifo_bytecount(ep, sizeof(status));
-			writel((__force u32) status, &dev->epregs[0].ep_data);
-			allow_status_338x(ep);
-			break;
-
-		default:
-			goto usb3_delegate;
-		}
-		break;
-
-	case USB_REQ_CLEAR_FEATURE:
-		switch (r.bRequestType) {
-		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
-			if (!dev->addressed_state) {
-				switch (w_value) {
-				case USB_DEVICE_U1_ENABLE:
-					dev->u1_enable = 0;
-					writel(readl(&dev->usb_ext->usbctl2) &
-						~(1 << U1_ENABLE),
-						&dev->usb_ext->usbctl2);
-					allow_status_338x(ep);
-					goto next_endpoints3;
-
-				case USB_DEVICE_U2_ENABLE:
-					dev->u2_enable = 0;
-					writel(readl(&dev->usb_ext->usbctl2) &
-						~(1 << U2_ENABLE),
-						&dev->usb_ext->usbctl2);
-					allow_status_338x(ep);
-					goto next_endpoints3;
-
-				case USB_DEVICE_LTM_ENABLE:
-					dev->ltm_enable = 0;
-					writel(readl(&dev->usb_ext->usbctl2) &
-						~(1 << LTM_ENABLE),
-						&dev->usb_ext->usbctl2);
-					allow_status_338x(ep);
-					goto next_endpoints3;
-
-				default:
-					break;
-				}
-			}
-			if (w_value == USB_DEVICE_REMOTE_WAKEUP) {
-				dev->wakeup_enable = 0;
-				writel(readl(&dev->usb->usbctl) &
-					~(1 << DEVICE_REMOTE_WAKEUP_ENABLE),
-					&dev->usb->usbctl);
-				allow_status_338x(ep);
-				break;
-			}
-			goto usb3_delegate;
-
-		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
-			e = get_ep_by_addr(dev,	w_index);
-			if (!e)
-				goto do_stall3;
-			if (w_value != USB_ENDPOINT_HALT)
-				goto do_stall3;
-			VDEBUG(dev, "%s clear halt\n", e->ep.name);
-			ep_stall(e, false);
-			if (!list_empty(&e->queue) && e->td_dma)
-				restart_dma(e);
-			allow_status(ep);
-			ep->stopped = 1;
-			break;
-
-		default:
-			goto usb3_delegate;
-		}
-		break;
-	case USB_REQ_SET_FEATURE:
-		switch (r.bRequestType) {
-		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE):
-			if (!dev->addressed_state) {
-				switch (w_value) {
-				case USB_DEVICE_U1_ENABLE:
-					dev->u1_enable = 1;
-					writel(readl(&dev->usb_ext->usbctl2) |
-						(1 << U1_ENABLE),
-						&dev->usb_ext->usbctl2);
-					allow_status_338x(ep);
-					goto next_endpoints3;
-
-				case USB_DEVICE_U2_ENABLE:
-					dev->u2_enable = 1;
-					writel(readl(&dev->usb_ext->usbctl2) |
-						(1 << U2_ENABLE),
-						&dev->usb_ext->usbctl2);
-					allow_status_338x(ep);
-					goto next_endpoints3;
-
-				case USB_DEVICE_LTM_ENABLE:
-					dev->ltm_enable = 1;
-					writel(readl(&dev->usb_ext->usbctl2) |
-						(1 << LTM_ENABLE),
-						&dev->usb_ext->usbctl2);
-					allow_status_338x(ep);
-					goto next_endpoints3;
-				default:
-					break;
-				}
-			}
-
-			if (w_value == USB_DEVICE_REMOTE_WAKEUP) {
-				dev->wakeup_enable = 1;
-				writel(readl(&dev->usb->usbctl) |
-					(1 << DEVICE_REMOTE_WAKEUP_ENABLE),
-					&dev->usb->usbctl);
-				allow_status_338x(ep);
-				break;
-			}
-			goto usb3_delegate;
-
-		case (USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_ENDPOINT):
-			e = get_ep_by_addr(dev,	w_index);
-			if (!e || (w_value != USB_ENDPOINT_HALT))
-				goto do_stall3;
-			ep_stdrsp(e, true, false);
-			allow_status_338x(ep);
-			break;
-
-		default:
-			goto usb3_delegate;
-		}
-
-		break;
-	default:
-
-usb3_delegate:
-		VDEBUG(dev, "setup %02x.%02x v%04x i%04x l%04x ep_cfg %08x\n",
-				r.bRequestType, r.bRequest,
-				w_value, w_index, w_length,
-				readl(&ep->cfg->ep_cfg));
-
-		ep->responded = 0;
-		spin_unlock(&dev->lock);
-		tmp = dev->driver->setup(&dev->gadget, &r);
-		spin_lock(&dev->lock);
-	}
-do_stall3:
-	if (tmp < 0) {
-		VDEBUG(dev, "req %02x.%02x protocol STALL; stat %d\n",
-				r.bRequestType, r.bRequest, tmp);
-		dev->protocol_stall = 1;
-		/* TD 9.9 Halt Endpoint test. TD 9.22 Set feature test */
-		ep_stall(ep, true);
-	}
-
-next_endpoints3:
-
-#undef	w_value
-#undef	w_index
-#undef	w_length
-
-	return;
-}
-
 static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 {
 	struct net2280_ep	*ep;
@@ -3087,20 +2240,10 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		struct net2280_request		*req;
 
 		if (dev->gadget.speed == USB_SPEED_UNKNOWN) {
-			u32 val = readl(&dev->usb->usbstat);
-			if (val & (1 << SUPER_SPEED)) {
-				dev->gadget.speed = USB_SPEED_SUPER;
-				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
-						EP0_SS_MAX_PACKET_SIZE);
-			} else if (val & (1 << HIGH_SPEED)) {
+			if (readl (&dev->usb->usbstat) & (1 << HIGH_SPEED))
 				dev->gadget.speed = USB_SPEED_HIGH;
-				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
-						EP0_HS_MAX_PACKET_SIZE);
-			} else {
+			else
 				dev->gadget.speed = USB_SPEED_FULL;
-				usb_ep_set_maxpacket_limit(&dev->ep[0].ep,
-						EP0_HS_MAX_PACKET_SIZE);
-			}
 			net2280_led_speed (dev, dev->gadget.speed);
 			DEBUG(dev, "%s\n", usb_speed_string(dev->gadget.speed));
 		}
@@ -3118,38 +2261,32 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		}
 		ep->stopped = 0;
 		dev->protocol_stall = 0;
-		if (dev->pdev->vendor == 0x10b5)
-			ep->is_halt = 0;
-		else{
-			if (ep->dev->pdev->device == 0x2280)
-				tmp = (1 << FIFO_OVERFLOW) |
-				    (1 << FIFO_UNDERFLOW);
-			else
-				tmp = 0;
-
-			writel(tmp | (1 << TIMEOUT) |
-				   (1 << USB_STALL_SENT) |
-				   (1 << USB_IN_NAK_SENT) |
-				   (1 << USB_IN_ACK_RCVD) |
-				   (1 << USB_OUT_PING_NAK_SENT) |
-				   (1 << USB_OUT_ACK_SENT) |
-				   (1 << SHORT_PACKET_OUT_DONE_INTERRUPT) |
-				   (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT) |
-				   (1 << DATA_PACKET_RECEIVED_INTERRUPT) |
-				   (1 << DATA_PACKET_TRANSMITTED_INTERRUPT) |
-				   (1 << DATA_OUT_PING_TOKEN_INTERRUPT) |
-				   (1 << DATA_IN_TOKEN_INTERRUPT)
-				   , &ep->regs->ep_stat);
-		}
-		u.raw[0] = readl(&dev->usb->setup0123);
-		u.raw[1] = readl(&dev->usb->setup4567);
+
+		if (ep->dev->pdev->device == 0x2280)
+			tmp = (1 << FIFO_OVERFLOW)
+				| (1 << FIFO_UNDERFLOW);
+		else
+			tmp = 0;
+
+		writel (tmp | (1 << TIMEOUT)
+			| (1 << USB_STALL_SENT)
+			| (1 << USB_IN_NAK_SENT)
+			| (1 << USB_IN_ACK_RCVD)
+			| (1 << USB_OUT_PING_NAK_SENT)
+			| (1 << USB_OUT_ACK_SENT)
+			| (1 << SHORT_PACKET_OUT_DONE_INTERRUPT)
+			| (1 << SHORT_PACKET_TRANSFERRED_INTERRUPT)
+			| (1 << DATA_PACKET_RECEIVED_INTERRUPT)
+			| (1 << DATA_PACKET_TRANSMITTED_INTERRUPT)
+			| (1 << DATA_OUT_PING_TOKEN_INTERRUPT)
+			| (1 << DATA_IN_TOKEN_INTERRUPT)
+			, &ep->regs->ep_stat);
+		u.raw [0] = readl (&dev->usb->setup0123);
+		u.raw [1] = readl (&dev->usb->setup4567);
 
 		cpu_to_le32s (&u.raw [0]);
 		cpu_to_le32s (&u.raw [1]);
 
-		if (dev->pdev->vendor == 0x10b5)
-			defect7374_workaround(dev, u.r);
-
 		tmp = 0;
 
 #define	w_value		le16_to_cpu(u.r.wValue)
@@ -3181,12 +2318,6 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 		 * everything else goes uplevel to the gadget code.
 		 */
 		ep->responded = 1;
-
-		if (dev->gadget.speed == USB_SPEED_SUPER) {
-			handle_stat0_irqs_superspeed(dev, ep, u.r);
-			goto next_endpoints;
-		}
-
 		switch (u.r.bRequest) {
 		case USB_REQ_GET_STATUS: {
 			struct net2280_ep	*e;
@@ -3229,11 +2360,8 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 				VDEBUG(dev, "%s wedged, halt not cleared\n",
 						ep->ep.name);
 			} else {
-				VDEBUG(dev, "%s clear halt\n", e->ep.name);
+				VDEBUG(dev, "%s clear halt\n", ep->ep.name);
 				clear_halt(e);
-				if (ep->dev->pdev->vendor == 0x10b5 &&
-					!list_empty(&e->queue) && e->td_dma)
-						restart_dma(e);
 			}
 			allow_status (ep);
 			goto next_endpoints;
@@ -3253,8 +2381,6 @@ static void handle_stat0_irqs (struct net2280 *dev, u32 stat)
 			if (e->ep.name == ep0name)
 				goto do_stall;
 			set_halt (e);
-			if (dev->pdev->vendor == 0x10b5 && e->dma)
-				abort_dma(e);
 			allow_status (ep);
 			VDEBUG (dev, "%s set halt\n", ep->ep.name);
 			goto next_endpoints;
@@ -3266,7 +2392,7 @@ delegate:
 				"ep_cfg %08x\n",
 				u.r.bRequestType, u.r.bRequest,
 				w_value, w_index, w_length,
-				readl(&ep->cfg->ep_cfg));
+				readl (&ep->regs->ep_cfg));
 			ep->responded = 0;
 			spin_unlock (&dev->lock);
 			tmp = dev->driver->setup (&dev->gadget, &u.r);
@@ -3329,7 +2455,7 @@ static void handle_stat1_irqs (struct net2280 *dev, u32 stat)
 
 	/* after disconnect there's nothing else to do! */
 	tmp = (1 << VBUS_INTERRUPT) | (1 << ROOT_PORT_RESET_INTERRUPT);
-	mask = (1 << SUPER_SPEED) | (1 << HIGH_SPEED) | (1 << FULL_SPEED);
+	mask = (1 << HIGH_SPEED) | (1 << FULL_SPEED);
 
 	/* VBUS disconnect is indicated by VBUS_PIN and VBUS_INTERRUPT set.
 	 * Root Port Reset is indicated by ROOT_PORT_RESET_INTERRUPT set and
@@ -3420,19 +2546,12 @@ static void handle_stat1_irqs (struct net2280 *dev, u32 stat)
 		tmp = readl (&dma->dmastat);
 		writel (tmp, &dma->dmastat);
 
-		/* dma sync*/
-		if (dev->pdev->vendor == 0x10b5) {
-			u32 r_dmacount = readl(&dma->dmacount);
-			if (!ep->is_in &&  (r_dmacount & 0x00FFFFFF) &&
-			    (tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT)))
-				continue;
-		}
-
 		/* chaining should stop on abort, short OUT from fifo,
 		 * or (stat0 codepath) short OUT transfer.
 		 */
 		if (!use_dma_chaining) {
-			if (!(tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT))) {
+			if ((tmp & (1 << DMA_TRANSACTION_DONE_INTERRUPT))
+					== 0) {
 				DEBUG (ep->dev, "%s no xact done? %08x\n",
 					ep->ep.name, tmp);
 				continue;
@@ -3506,8 +2625,7 @@ static irqreturn_t net2280_irq (int irq, void *_dev)
 	struct net2280		*dev = _dev;
 
 	/* shared interrupt, not ours */
-	if (dev->pdev->vendor == 0x17cc &&
-		(!(readl(&dev->regs->irqstat0) & (1 << INTA_ASSERTED))))
+	if (!(readl(&dev->regs->irqstat0) & (1 << INTA_ASSERTED)))
 		return IRQ_NONE;
 
 	spin_lock (&dev->lock);
@@ -3518,13 +2636,6 @@ static irqreturn_t net2280_irq (int irq, void *_dev)
 	/* control requests and PIO */
 	handle_stat0_irqs (dev, readl (&dev->regs->irqstat0));
 
-	if (dev->pdev->vendor == 0x10b5) {
-		/* re-enable interrupt to trigger any possible new interrupt */
-		u32 pciirqenb1 = readl(&dev->regs->pciirqenb1);
-		writel(pciirqenb1 & 0x7FFFFFFF, &dev->regs->pciirqenb1);
-		writel(pciirqenb1, &dev->regs->pciirqenb1);
-	}
-
 	spin_unlock (&dev->lock);
 
 	return IRQ_HANDLED;
@@ -3563,8 +2674,6 @@ static void net2280_remove (struct pci_dev *pdev)
 	}
 	if (dev->got_irq)
 		free_irq (pdev->irq, dev);
-	if (use_msi && dev->pdev->vendor == 0x10b5)
-		pci_disable_msi(pdev);
 	if (dev->regs)
 		iounmap (dev->regs);
 	if (dev->region)
@@ -3599,8 +2708,7 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	spin_lock_init (&dev->lock);
 	dev->pdev = pdev;
 	dev->gadget.ops = &net2280_ops;
-	dev->gadget.max_speed = (dev->pdev->vendor == 0x10b5) ?
-				USB_SPEED_SUPER : USB_SPEED_HIGH;
+	dev->gadget.max_speed = USB_SPEED_HIGH;
 
 	/* the "gadget" abstracts/virtualizes the controller */
 	dev->gadget.name = driver_name;
@@ -3642,39 +2750,8 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->dep = (struct net2280_dep_regs __iomem *) (base + 0x0200);
 	dev->epregs = (struct net2280_ep_regs __iomem *) (base + 0x0300);
 
-	if (dev->pdev->vendor == 0x10b5) {
-		u32 fsmvalue;
-		u32 usbstat;
-		dev->usb_ext = (struct usb338x_usb_ext_regs __iomem *)
-							(base + 0x00b4);
-		dev->fiforegs = (struct usb338x_fifo_regs __iomem *)
-							(base + 0x0500);
-		dev->llregs = (struct usb338x_ll_regs __iomem *)
-							(base + 0x0700);
-		dev->ll_lfps_regs = (struct usb338x_ll_lfps_regs __iomem *)
-							(base + 0x0748);
-		dev->ll_tsn_regs = (struct usb338x_ll_tsn_regs __iomem *)
-							(base + 0x077c);
-		dev->ll_chicken_reg = (struct usb338x_ll_chi_regs __iomem *)
-							(base + 0x079c);
-		dev->plregs = (struct usb338x_pl_regs __iomem *)
-							(base + 0x0800);
-		usbstat = readl(&dev->usb->usbstat);
-		dev->enhanced_mode = (usbstat & (1 << 11)) ? 1 : 0;
-		dev->n_ep = (dev->enhanced_mode) ? 9 : 5;
-		/* put into initial config, link up all endpoints */
-		fsmvalue = get_idx_reg(dev->regs, SCRATCH) &
-					(0xf << DEFECT7374_FSM_FIELD);
-		/* See if firmware needs to set up for workaround: */
-		if (fsmvalue == DEFECT7374_FSM_SS_CONTROL_READ)
-			writel(0, &dev->usb->usbctl);
-	} else{
-		dev->enhanced_mode = 0;
-		dev->n_ep = 7;
-		/* put into initial config, link up all endpoints */
-		writel(0, &dev->usb->usbctl);
-	}
-
+	/* put into initial config, link up all endpoints */
+	writel (0, &dev->usb->usbctl);
 	usb_reset (dev);
 	usb_reinit (dev);
 
@@ -3685,10 +2762,6 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 		goto done;
 	}
 
-	if (use_msi && dev->pdev->vendor == 0x10b5)
-		if (pci_enable_msi(pdev))
-			ERROR(dev, "Failed to enable MSI mode\n");
-
 	if (request_irq (pdev->irq, net2280_irq, IRQF_SHARED, driver_name, dev)
 			!= 0) {
 		ERROR (dev, "request interrupt %d failed\n", pdev->irq);
@@ -3724,8 +2797,7 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	/* enable lower-overhead pci memory bursts during DMA */
-	if (dev->pdev->vendor == 0x17cc)
-		writel((1 << DMA_MEMORY_WRITE_AND_INVALIDATE_ENABLE)
+	writel ( (1 << DMA_MEMORY_WRITE_AND_INVALIDATE_ENABLE)
 			// 256 write retries may not be enough...
 			// | (1 << PCI_RETRY_ABORT_ENABLE)
 			| (1 << DMA_READ_MULTIPLE_ENABLE)
@@ -3742,10 +2814,10 @@ static int net2280_probe (struct pci_dev *pdev, const struct pci_device_id *id)
 	INFO (dev, "%s\n", driver_desc);
 	INFO (dev, "irq %d, pci mem %p, chip rev %04x\n",
 			pdev->irq, base, dev->chiprev);
-	INFO(dev, "version: " DRIVER_VERSION "; dma %s %s\n",
-		use_dma	? (use_dma_chaining ? "chaining" : "enabled")
-			: "disabled",
-		dev->enhanced_mode ? "enhanced mode" : "legacy mode");
+	INFO (dev, "version: " DRIVER_VERSION "; dma %s\n",
+			use_dma
+				? (use_dma_chaining ? "chaining" : "enabled")
+				: "disabled");
 	retval = device_create_file (&pdev->dev, &dev_attr_registers);
 	if (retval) goto done;
 
@@ -3777,8 +2849,7 @@ static void net2280_shutdown (struct pci_dev *pdev)
 	writel (0, &dev->usb->usbctl);
 
 	/* Disable full-speed test mode */
-	if (dev->pdev->vendor == 0x17cc)
-		writel(0, &dev->usb->xcvrdiag);
+	writel(0, &dev->usb->xcvrdiag);
 }
 
 
@@ -3798,24 +2869,8 @@ static const struct pci_device_id pci_ids [] = { {
 	.device =	0x2282,
 	.subvendor =	PCI_ANY_ID,
 	.subdevice =	PCI_ANY_ID,
-},
-	{
-	 .class = ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
-	 .class_mask = ~0,
-	 .vendor = 0x10b5,
-	 .device = 0x3380,
-	 .subvendor = PCI_ANY_ID,
-	 .subdevice = PCI_ANY_ID,
-	 },
-	{
-	 .class = ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
-	 .class_mask = ~0,
-	 .vendor = 0x10b5,
-	 .device = 0x3382,
-	 .subvendor = PCI_ANY_ID,
-	 .subdevice = PCI_ANY_ID,
-	 },
-{ /* end: all zeroes */ }
+
+}, { /* end: all zeroes */ }
 };
 MODULE_DEVICE_TABLE (pci, pci_ids);
 
diff --git a/drivers/usb/gadget/net2280.h b/drivers/usb/gadget/net2280.h
index f32c2746b6ae..a844be0d683a 100644
--- a/drivers/usb/gadget/net2280.h
+++ b/drivers/usb/gadget/net2280.h
@@ -6,7 +6,6 @@
 /*
  * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com)
  * Copyright (C) 2003 David Brownell
- * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -15,7 +14,6 @@
  */
 
 #include <linux/usb/net2280.h>
-#include <linux/usb/usb338x.h>
 
 /*-------------------------------------------------------------------------*/
 
@@ -61,13 +59,6 @@ set_idx_reg (struct net2280_regs __iomem *regs, u32 index, u32 value)
 #define	CHIPREV_1	0x0100
 #define	CHIPREV_1A	0x0110
 
-/* DEFECT 7374 */
-#define DEFECT_7374_NUMBEROF_MAX_WAIT_LOOPS         200
-#define DEFECT_7374_PROCESSOR_WAIT_TIME             10
-
-/* ep0 max packet size */
-#define EP0_SS_MAX_PACKET_SIZE  0x200
-#define EP0_HS_MAX_PACKET_SIZE  0x40
 #ifdef	__KERNEL__
 
 /* ep a-f highspeed and fullspeed maxpacket, addresses
@@ -94,15 +85,12 @@ struct net2280_dma {
 
 struct net2280_ep {
 	struct usb_ep				ep;
-	struct net2280_ep_regs __iomem *cfg;
 	struct net2280_ep_regs			__iomem *regs;
 	struct net2280_dma_regs			__iomem *dma;
 	struct net2280_dma			*dummy;
-	struct usb338x_fifo_regs __iomem *fiforegs;
 	dma_addr_t				td_dma;	/* of dummy */
 	struct net2280				*dev;
 	unsigned long				irqs;
-	unsigned is_halt:1, dma_started:1;
 
 	/* analogous to a host-side qh */
 	struct list_head			queue;
@@ -128,19 +116,10 @@ static inline void allow_status (struct net2280_ep *ep)
 	ep->stopped = 1;
 }
 
-static void allow_status_338x(struct net2280_ep *ep)
+/* count (<= 4) bytes in the next fifo write will be valid */
+static inline void set_fifo_bytecount (struct net2280_ep *ep, unsigned count)
 {
-	/*
-	 * Control Status Phase Handshake was set by the chip when the setup
-	 * packet arrived. While set, the chip automatically NAKs the host's
-	 * Status Phase tokens.
-	 */
-	writel(1 << CLEAR_CONTROL_STATUS_PHASE_HANDSHAKE, &ep->regs->ep_rsp);
-
-	ep->stopped = 1;
-
-	/* TD 9.9 Halt Endpoint test.  TD 9.22 set feature test. */
-	ep->responded = 0;
+	writeb (count, 2 + (u8 __iomem *) &ep->regs->ep_cfg);
 }
 
 struct net2280_request {
@@ -156,38 +135,23 @@ struct net2280 {
 	/* each pci device provides one gadget, several endpoints */
 	struct usb_gadget		gadget;
 	spinlock_t			lock;
-	struct net2280_ep		ep[9];
+	struct net2280_ep		ep [7];
 	struct usb_gadget_driver 	*driver;
 	unsigned			enabled : 1,
 					protocol_stall : 1,
 					softconnect : 1,
 					got_irq : 1,
-					region:1,
-					u1_enable:1,
-					u2_enable:1,
-					ltm_enable:1,
-					wakeup_enable:1,
-					selfpowered:1,
-					addressed_state:1;
+					region : 1;
 	u16				chiprev;
-	int enhanced_mode;
-	int n_ep;
 
 	/* pci state used to access those endpoints */
 	struct pci_dev			*pdev;
 	struct net2280_regs		__iomem *regs;
 	struct net2280_usb_regs		__iomem *usb;
-	struct usb338x_usb_ext_regs	__iomem *usb_ext;
 	struct net2280_pci_regs		__iomem *pci;
 	struct net2280_dma_regs		__iomem *dma;
 	struct net2280_dep_regs		__iomem *dep;
 	struct net2280_ep_regs		__iomem *epregs;
-	struct usb338x_fifo_regs	__iomem *fiforegs;
-	struct usb338x_ll_regs		__iomem *llregs;
-	struct usb338x_ll_lfps_regs	__iomem *ll_lfps_regs;
-	struct usb338x_ll_tsn_regs	__iomem *ll_tsn_regs;
-	struct usb338x_ll_chi_regs	__iomem *ll_chicken_reg;
-	struct usb338x_pl_regs		__iomem *plregs;
 
 	struct pci_pool			*requests;
 	// statistics...
@@ -215,43 +179,6 @@ static inline void clear_halt (struct net2280_ep *ep)
 		, &ep->regs->ep_rsp);
 }
 
-/*
- * FSM value for Defect 7374 (U1U2 Test) is managed in
- * chip's SCRATCH register:
- */
-#define DEFECT7374_FSM_FIELD    28
-
-/* Waiting for Control Read:
- *  - A transition to this state indicates a fresh USB connection,
- *    before the first Setup Packet. The connection speed is not
- *    known. Firmware is waiting for the first Control Read.
- *  - Starting state: This state can be thought of as the FSM's typical
- *    starting state.
- *  - Tip: Upon the first SS Control Read the FSM never
- *    returns to this state.
- */
-#define DEFECT7374_FSM_WAITING_FOR_CONTROL_READ (1 << DEFECT7374_FSM_FIELD)
-
-/* Non-SS Control Read:
- *  - A transition to this state indicates detection of the first HS
- *    or FS Control Read.
- *  - Tip: Upon the first SS Control Read the FSM never
- *    returns to this state.
- */
-#define	DEFECT7374_FSM_NON_SS_CONTROL_READ (2 << DEFECT7374_FSM_FIELD)
-
-/* SS Control Read:
- *  - A transition to this state indicates detection of the
- *    first SS Control Read.
- *  - This state indicates workaround completion. Workarounds no longer
- *    need to be applied (as long as the chip remains powered up).
- *  - Tip: Once in this state the FSM state does not change (until
- *    the chip's power is lost and restored).
- *  - This can be thought of as the final state of the FSM;
- *    the FSM 'locks-up' in this state until the chip loses power.
- */
-#define DEFECT7374_FSM_SS_CONTROL_READ (3 << DEFECT7374_FSM_FIELD)
-
 #ifdef USE_RDK_LEDS
 
 static inline void net2280_led_init (struct net2280 *dev)
@@ -271,9 +198,6 @@ void net2280_led_speed (struct net2280 *dev, enum usb_device_speed speed)
 {
 	u32	val = readl (&dev->regs->gpioctl);
 	switch (speed) {
-	case USB_SPEED_SUPER:		/* green + red */
-		val |= (1 << GPIO0_DATA) | (1 << GPIO1_DATA);
-		break;
 	case USB_SPEED_HIGH:		/* green */
 		val &= ~(1 << GPIO0_DATA);
 		val |= (1 << GPIO1_DATA);
@@ -347,17 +271,6 @@ static inline void net2280_led_shutdown (struct net2280 *dev)
 
 /*-------------------------------------------------------------------------*/
 
-static inline void set_fifo_bytecount(struct net2280_ep *ep, unsigned count)
-{
-	if (ep->dev->pdev->vendor == 0x17cc)
-		writeb(count, 2 + (u8 __iomem *) &ep->regs->ep_cfg);
-	else{
-		u32 tmp = readl(&ep->cfg->ep_cfg) &
-					(~(0x07 << EP_FIFO_BYTE_COUNT));
-		writel(tmp | (count << EP_FIFO_BYTE_COUNT), &ep->cfg->ep_cfg);
-	}
-}
-
 static inline void start_out_naking (struct net2280_ep *ep)
 {
 	/* NOTE:  hardware races lurk here, and PING protocol issues */
diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h
deleted file mode 100644
index f92eb635b9d3..000000000000
--- a/include/linux/usb/usb338x.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * USB 338x super/high/full speed USB device controller.
- * Unlike many such controllers, this one talks PCI.
- *
- * Copyright (C) 2002 NetChip Technology, Inc. (http://www.netchip.com)
- * Copyright (C) 2003 David Brownell
- * Copyright (C) 2014 Ricardo Ribalda - Qtechnology/AS
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __LINUX_USB_USB338X_H
-#define __LINUX_USB_USB338X_H
-
-#include <linux/usb/net2280.h>
-
-/*
- * Extra defined bits for net2280 registers
- */
-#define     SCRATCH			      0x0b
-
-#define     DEFECT7374_FSM_FIELD                28
-#define     SUPER_SPEED				 8
-#define     DMA_REQUEST_OUTSTANDING              5
-#define     DMA_PAUSE_DONE_INTERRUPT            26
-#define     SET_ISOCHRONOUS_DELAY               24
-#define     SET_SEL                             22
-#define     SUPER_SPEED_MODE                     8
-
-/*ep_cfg*/
-#define     MAX_BURST_SIZE                      24
-#define     EP_FIFO_BYTE_COUNT                  16
-#define     IN_ENDPOINT_ENABLE                  14
-#define     IN_ENDPOINT_TYPE                    12
-#define     OUT_ENDPOINT_ENABLE                 10
-#define     OUT_ENDPOINT_TYPE                    8
-
-struct usb338x_usb_ext_regs {
-	u32     usbclass;
-#define     DEVICE_PROTOCOL                     16
-#define     DEVICE_SUB_CLASS                     8
-#define     DEVICE_CLASS                         0
-	u32     ss_sel;
-#define     U2_SYSTEM_EXIT_LATENCY               8
-#define     U1_SYSTEM_EXIT_LATENCY               0
-	u32     ss_del;
-#define     U2_DEVICE_EXIT_LATENCY               8
-#define     U1_DEVICE_EXIT_LATENCY               0
-	u32     usb2lpm;
-#define     USB_L1_LPM_HIRD                      2
-#define     USB_L1_LPM_REMOTE_WAKE               1
-#define     USB_L1_LPM_SUPPORT                   0
-	u32     usb3belt;
-#define     BELT_MULTIPLIER                     10
-#define     BEST_EFFORT_LATENCY_TOLERANCE        0
-	u32     usbctl2;
-#define     LTM_ENABLE                           7
-#define     U2_ENABLE                            6
-#define     U1_ENABLE                            5
-#define     FUNCTION_SUSPEND                     4
-#define     USB3_CORE_ENABLE                     3
-#define     USB2_CORE_ENABLE                     2
-#define     SERIAL_NUMBER_STRING_ENABLE          0
-	u32     in_timeout;
-#define     GPEP3_TIMEOUT                       19
-#define     GPEP2_TIMEOUT                       18
-#define     GPEP1_TIMEOUT                       17
-#define     GPEP0_TIMEOUT                       16
-#define     GPEP3_TIMEOUT_VALUE                 13
-#define     GPEP3_TIMEOUT_ENABLE                12
-#define     GPEP2_TIMEOUT_VALUE                  9
-#define     GPEP2_TIMEOUT_ENABLE                 8
-#define     GPEP1_TIMEOUT_VALUE                  5
-#define     GPEP1_TIMEOUT_ENABLE                 4
-#define     GPEP0_TIMEOUT_VALUE                  1
-#define     GPEP0_TIMEOUT_ENABLE                 0
-	u32     isodelay;
-#define     ISOCHRONOUS_DELAY                    0
-} __packed;
-
-struct usb338x_fifo_regs {
-	/* offset 0x0500, 0x0520, 0x0540, 0x0560, 0x0580 */
-	u32     ep_fifo_size_base;
-#define     IN_FIFO_BASE_ADDRESS                                22
-#define     IN_FIFO_SIZE                                        16
-#define     OUT_FIFO_BASE_ADDRESS                               6
-#define     OUT_FIFO_SIZE                                       0
-	u32     ep_fifo_out_wrptr;
-	u32     ep_fifo_out_rdptr;
-	u32     ep_fifo_in_wrptr;
-	u32     ep_fifo_in_rdptr;
-	u32     unused[3];
-} __packed;
-
-
-/* Link layer */
-struct usb338x_ll_regs {
-	/* offset 0x700 */
-	u32   ll_ltssm_ctrl1;
-	u32   ll_ltssm_ctrl2;
-	u32   ll_ltssm_ctrl3;
-	u32   unused[2];
-	u32   ll_general_ctrl0;
-	u32   ll_general_ctrl1;
-#define     PM_U3_AUTO_EXIT                                     29
-#define     PM_U2_AUTO_EXIT                                     28
-#define     PM_U1_AUTO_EXIT                                     27
-#define     PM_FORCE_U2_ENTRY                                   26
-#define     PM_FORCE_U1_ENTRY                                   25
-#define     PM_LGO_COLLISION_SEND_LAU                           24
-#define     PM_DIR_LINK_REJECT                                  23
-#define     PM_FORCE_LINK_ACCEPT                                22
-#define     PM_DIR_ENTRY_U3                                     20
-#define     PM_DIR_ENTRY_U2                                     19
-#define     PM_DIR_ENTRY_U1                                     18
-#define     PM_U2_ENABLE                                        17
-#define     PM_U1_ENABLE                                        16
-#define     SKP_THRESHOLD_ADJUST_FMW                            8
-#define     RESEND_DPP_ON_LRTY_FMW                              7
-#define     DL_BIT_VALUE_FMW                                    6
-#define     FORCE_DL_BIT                                        5
-	u32   ll_general_ctrl2;
-#define     SELECT_INVERT_LANE_POLARITY                         7
-#define     FORCE_INVERT_LANE_POLARITY                          6
-	u32   ll_general_ctrl3;
-	u32   ll_general_ctrl4;
-	u32   ll_error_gen;
-} __packed;
-
-struct usb338x_ll_lfps_regs {
-	/* offset 0x748 */
-	u32   ll_lfps_5;
-#define     TIMER_LFPS_6US                                      16
-	u32   ll_lfps_6;
-#define     TIMER_LFPS_80US                                     0
-} __packed;
-
-struct usb338x_ll_tsn_regs {
-	/* offset 0x77C */
-	u32   ll_tsn_counters_2;
-#define     HOT_TX_NORESET_TS2                                  24
-	u32   ll_tsn_counters_3;
-#define     HOT_RX_RESET_TS2                                    0
-} __packed;
-
-struct usb338x_ll_chi_regs {
-	/* offset 0x79C */
-	u32   ll_tsn_chicken_bit;
-#define     RECOVERY_IDLE_TO_RECOVER_FMW                        3
-} __packed;
-
-/* protocol layer */
-struct usb338x_pl_regs {
-	/* offset 0x800 */
-	u32   pl_reg_1;
-	u32   pl_reg_2;
-	u32   pl_reg_3;
-	u32   pl_reg_4;
-	u32   pl_ep_ctrl;
-	/* Protocol Layer Endpoint Control*/
-#define     PL_EP_CTRL                                  0x810
-#define     ENDPOINT_SELECT                             0
-	/* [4:0] */
-#define     EP_INITIALIZED                              16
-#define     SEQUENCE_NUMBER_RESET                       17
-#define     CLEAR_ACK_ERROR_CODE                        20
-	u32   pl_reg_6;
-	u32   pl_reg_7;
-	u32   pl_reg_8;
-	u32   pl_ep_status_1;
-	/* Protocol Layer Endpoint Status 1*/
-#define     PL_EP_STATUS_1                              0x820
-#define     STATE                                       16
-#define     ACK_GOOD_NORMAL                             0x11
-#define     ACK_GOOD_MORE_ACKS_TO_COME                  0x16
-	u32   pl_ep_status_2;
-	u32   pl_ep_status_3;
-	/* Protocol Layer Endpoint Status 3*/
-#define     PL_EP_STATUS_3                              0x828
-#define     SEQUENCE_NUMBER                             0
-	u32   pl_ep_status_4;
-	/* Protocol Layer Endpoint Status 4*/
-#define     PL_EP_STATUS_4                              0x82c
-	u32   pl_ep_cfg_4;
-	/* Protocol Layer Endpoint Configuration 4*/
-#define     PL_EP_CFG_4                                 0x830
-#define     NON_CTRL_IN_TOLERATE_BAD_DIR                6
-} __packed;
-
-#endif /* __LINUX_USB_USB338X_H */
-- 
cgit 


From e0b0baadb7a4509bdcd5ba37d0be61e2c4bb0d48 Mon Sep 17 00:00:00 2001
From: Richard Genoud <richard.genoud@gmail.com>
Date: Tue, 13 May 2014 20:20:44 +0200
Subject: tty/serial: at91: use mctrl_gpio helpers

On sam9x5, dedicated CTS (and RTS) pins are unusable together with the
LCDC, the EMAC, or the MMC because they share the same line.

Moreover, the USART controller doesn't handle DTR/DSR/DCD/RI signals,
so we have to control them via GPIO.

This patch permits to use GPIOs to control the CTS/RTS/DTR/DSR/DCD/RI
signals.

Signed-off-by: Richard Genoud <richard.genoud@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/serial/atmel-usart.txt     |  10 +-
 arch/arm/mach-at91/at91rm9200_devices.c            |  16 ++--
 arch/arm/mach-at91/at91sam9260_devices.c           |   7 --
 arch/arm/mach-at91/at91sam9261_devices.c           |   4 -
 arch/arm/mach-at91/at91sam9263_devices.c           |   4 -
 arch/arm/mach-at91/at91sam9g45_devices.c           |   5 -
 arch/arm/mach-at91/at91sam9rl_devices.c            |   5 -
 drivers/tty/serial/Kconfig                         |   1 +
 drivers/tty/serial/atmel_serial.c                  | 105 ++++++++++++++-------
 include/linux/platform_data/atmel.h                |   1 -
 10 files changed, 89 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/serial/atmel-usart.txt b/Documentation/devicetree/bindings/serial/atmel-usart.txt
index 2f7aad71b3c9..a6391e70a8fd 100644
--- a/Documentation/devicetree/bindings/serial/atmel-usart.txt
+++ b/Documentation/devicetree/bindings/serial/atmel-usart.txt
@@ -13,8 +13,9 @@ Required properties:
 Optional properties:
 - atmel,use-dma-rx: use of PDC or DMA for receiving data
 - atmel,use-dma-tx: use of PDC or DMA for transmitting data
-- rts-gpios: specify a GPIO for RTS line. It will use specified PIO instead of the peripheral
-  function pin for the USART RTS feature. If unsure, don't specify this property.
+- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD line respectively.
+  It will use specified PIO instead of the peripheral function pin for the USART feature.
+  If unsure, don't specify this property.
 - add dma bindings for dma transfer:
 	- dmas: DMA specifier, consisting of a phandle to DMA controller node,
 		memory peripheral interface and USART DMA channel ID, FIFO configuration.
@@ -36,6 +37,11 @@ Example:
 		atmel,use-dma-rx;
 		atmel,use-dma-tx;
 		rts-gpios = <&pioD 15 GPIO_ACTIVE_LOW>;
+		cts-gpios = <&pioD 16 GPIO_ACTIVE_LOW>;
+		dtr-gpios = <&pioD 17 GPIO_ACTIVE_LOW>;
+		dsr-gpios = <&pioD 18 GPIO_ACTIVE_LOW>;
+		dcd-gpios = <&pioD 20 GPIO_ACTIVE_LOW>;
+		rng-gpios = <&pioD 19 GPIO_ACTIVE_LOW>;
 	};
 
 - use DMA:
diff --git a/arch/arm/mach-at91/at91rm9200_devices.c b/arch/arm/mach-at91/at91rm9200_devices.c
index f3f19f21352a..291a90a5b1d4 100644
--- a/arch/arm/mach-at91/at91rm9200_devices.c
+++ b/arch/arm/mach-at91/at91rm9200_devices.c
@@ -15,6 +15,7 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/gpio.h>
+#include <linux/gpio/driver.h>
 #include <linux/platform_device.h>
 #include <linux/i2c-gpio.h>
 
@@ -923,7 +924,6 @@ static struct resource dbgu_resources[] = {
 static struct atmel_uart_data dbgu_data = {
 	.use_dma_tx	= 0,
 	.use_dma_rx	= 0,		/* DBGU not capable of receive DMA */
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 dbgu_dmamask = DMA_BIT_MASK(32);
@@ -962,7 +962,14 @@ static struct resource uart0_resources[] = {
 static struct atmel_uart_data uart0_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
+};
+
+static struct gpiod_lookup_table uart0_gpios_table = {
+	.dev_id = "atmel_usart",
+	.table = {
+		GPIO_LOOKUP("pioA", 21, "rts", GPIO_ACTIVE_LOW),
+		{ },
+	},
 };
 
 static u64 uart0_dmamask = DMA_BIT_MASK(32);
@@ -993,7 +1000,7 @@ static inline void configure_usart0_pins(unsigned pins)
 		 * We need to drive the pin manually. The serial driver will driver
 		 * this to high when initializing.
 		 */
-		uart0_data.rts_gpio = AT91_PIN_PA21;
+		gpiod_add_lookup_table(&uart0_gpios_table);
 	}
 }
 
@@ -1013,7 +1020,6 @@ static struct resource uart1_resources[] = {
 static struct atmel_uart_data uart1_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart1_dmamask = DMA_BIT_MASK(32);
@@ -1065,7 +1071,6 @@ static struct resource uart2_resources[] = {
 static struct atmel_uart_data uart2_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart2_dmamask = DMA_BIT_MASK(32);
@@ -1109,7 +1114,6 @@ static struct resource uart3_resources[] = {
 static struct atmel_uart_data uart3_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart3_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/arm/mach-at91/at91sam9260_devices.c b/arch/arm/mach-at91/at91sam9260_devices.c
index a0282928e9c1..526453ecdaff 100644
--- a/arch/arm/mach-at91/at91sam9260_devices.c
+++ b/arch/arm/mach-at91/at91sam9260_devices.c
@@ -820,7 +820,6 @@ static struct resource dbgu_resources[] = {
 static struct atmel_uart_data dbgu_data = {
 	.use_dma_tx	= 0,
 	.use_dma_rx	= 0,		/* DBGU not capable of receive DMA */
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 dbgu_dmamask = DMA_BIT_MASK(32);
@@ -859,7 +858,6 @@ static struct resource uart0_resources[] = {
 static struct atmel_uart_data uart0_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart0_dmamask = DMA_BIT_MASK(32);
@@ -911,7 +909,6 @@ static struct resource uart1_resources[] = {
 static struct atmel_uart_data uart1_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart1_dmamask = DMA_BIT_MASK(32);
@@ -955,7 +952,6 @@ static struct resource uart2_resources[] = {
 static struct atmel_uart_data uart2_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart2_dmamask = DMA_BIT_MASK(32);
@@ -999,7 +995,6 @@ static struct resource uart3_resources[] = {
 static struct atmel_uart_data uart3_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart3_dmamask = DMA_BIT_MASK(32);
@@ -1043,7 +1038,6 @@ static struct resource uart4_resources[] = {
 static struct atmel_uart_data uart4_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart4_dmamask = DMA_BIT_MASK(32);
@@ -1082,7 +1076,6 @@ static struct resource uart5_resources[] = {
 static struct atmel_uart_data uart5_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart5_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index 80e35895d28f..b5f7a7226ff8 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -881,7 +881,6 @@ static struct resource dbgu_resources[] = {
 static struct atmel_uart_data dbgu_data = {
 	.use_dma_tx	= 0,
 	.use_dma_rx	= 0,		/* DBGU not capable of receive DMA */
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 dbgu_dmamask = DMA_BIT_MASK(32);
@@ -920,7 +919,6 @@ static struct resource uart0_resources[] = {
 static struct atmel_uart_data uart0_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart0_dmamask = DMA_BIT_MASK(32);
@@ -964,7 +962,6 @@ static struct resource uart1_resources[] = {
 static struct atmel_uart_data uart1_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart1_dmamask = DMA_BIT_MASK(32);
@@ -1008,7 +1005,6 @@ static struct resource uart2_resources[] = {
 static struct atmel_uart_data uart2_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart2_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/arm/mach-at91/at91sam9263_devices.c b/arch/arm/mach-at91/at91sam9263_devices.c
index 43d53d6156dd..39803c3296b2 100644
--- a/arch/arm/mach-at91/at91sam9263_devices.c
+++ b/arch/arm/mach-at91/at91sam9263_devices.c
@@ -1325,7 +1325,6 @@ static struct resource dbgu_resources[] = {
 static struct atmel_uart_data dbgu_data = {
 	.use_dma_tx	= 0,
 	.use_dma_rx	= 0,		/* DBGU not capable of receive DMA */
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 dbgu_dmamask = DMA_BIT_MASK(32);
@@ -1364,7 +1363,6 @@ static struct resource uart0_resources[] = {
 static struct atmel_uart_data uart0_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart0_dmamask = DMA_BIT_MASK(32);
@@ -1408,7 +1406,6 @@ static struct resource uart1_resources[] = {
 static struct atmel_uart_data uart1_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart1_dmamask = DMA_BIT_MASK(32);
@@ -1452,7 +1449,6 @@ static struct resource uart2_resources[] = {
 static struct atmel_uart_data uart2_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart2_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c
index dab362c06487..4dfedd3f2e15 100644
--- a/arch/arm/mach-at91/at91sam9g45_devices.c
+++ b/arch/arm/mach-at91/at91sam9g45_devices.c
@@ -1588,7 +1588,6 @@ static struct resource dbgu_resources[] = {
 static struct atmel_uart_data dbgu_data = {
 	.use_dma_tx	= 0,
 	.use_dma_rx	= 0,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 dbgu_dmamask = DMA_BIT_MASK(32);
@@ -1627,7 +1626,6 @@ static struct resource uart0_resources[] = {
 static struct atmel_uart_data uart0_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart0_dmamask = DMA_BIT_MASK(32);
@@ -1671,7 +1669,6 @@ static struct resource uart1_resources[] = {
 static struct atmel_uart_data uart1_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart1_dmamask = DMA_BIT_MASK(32);
@@ -1715,7 +1712,6 @@ static struct resource uart2_resources[] = {
 static struct atmel_uart_data uart2_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart2_dmamask = DMA_BIT_MASK(32);
@@ -1759,7 +1755,6 @@ static struct resource uart3_resources[] = {
 static struct atmel_uart_data uart3_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart3_dmamask = DMA_BIT_MASK(32);
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index 428fc412aaf1..f75985062913 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -957,7 +957,6 @@ static struct resource dbgu_resources[] = {
 static struct atmel_uart_data dbgu_data = {
 	.use_dma_tx	= 0,
 	.use_dma_rx	= 0,		/* DBGU not capable of receive DMA */
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 dbgu_dmamask = DMA_BIT_MASK(32);
@@ -996,7 +995,6 @@ static struct resource uart0_resources[] = {
 static struct atmel_uart_data uart0_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart0_dmamask = DMA_BIT_MASK(32);
@@ -1048,7 +1046,6 @@ static struct resource uart1_resources[] = {
 static struct atmel_uart_data uart1_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart1_dmamask = DMA_BIT_MASK(32);
@@ -1092,7 +1089,6 @@ static struct resource uart2_resources[] = {
 static struct atmel_uart_data uart2_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart2_dmamask = DMA_BIT_MASK(32);
@@ -1136,7 +1132,6 @@ static struct resource uart3_resources[] = {
 static struct atmel_uart_data uart3_data = {
 	.use_dma_tx	= 1,
 	.use_dma_rx	= 1,
-	.rts_gpio	= -EINVAL,
 };
 
 static u64 uart3_dmamask = DMA_BIT_MASK(32);
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 4bf6d220357b..fb57159bad3a 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -117,6 +117,7 @@ config SERIAL_ATMEL
 	bool "AT91 / AT32 on-chip serial port support"
 	depends on ARCH_AT91 || AVR32
 	select SERIAL_CORE
+	select SERIAL_MCTRL_GPIO
 	help
 	  This enables the driver for the on-chip UARTs of the Atmel
 	  AT91 and AT32 processors.
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 53eeea13ff16..43ca659c1d4b 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -43,6 +43,8 @@
 #include <linux/platform_data/atmel.h>
 #include <linux/timer.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/err.h>
 
 #include <asm/io.h>
 #include <asm/ioctls.h>
@@ -57,6 +59,8 @@
 
 #include <linux/serial_core.h>
 
+#include "serial_mctrl_gpio.h"
+
 static void atmel_start_rx(struct uart_port *port);
 static void atmel_stop_rx(struct uart_port *port);
 
@@ -162,7 +166,7 @@ struct atmel_uart_port {
 	struct circ_buf		rx_ring;
 
 	struct serial_rs485	rs485;		/* rs485 settings */
-	int			rts_gpio;	/* optional RTS GPIO */
+	struct mctrl_gpios	*gpios;
 	unsigned int		tx_done_mask;
 	bool			is_usart;	/* usart or uart */
 	struct timer_list	uart_timer;	/* uart timer */
@@ -237,6 +241,50 @@ static bool atmel_use_dma_rx(struct uart_port *port)
 	return atmel_port->use_dma_rx;
 }
 
+static unsigned int atmel_get_lines_status(struct uart_port *port)
+{
+	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+	unsigned int status, ret = 0;
+
+	status = UART_GET_CSR(port);
+
+	mctrl_gpio_get(atmel_port->gpios, &ret);
+
+	if (!IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(atmel_port->gpios,
+						UART_GPIO_CTS))) {
+		if (ret & TIOCM_CTS)
+			status &= ~ATMEL_US_CTS;
+		else
+			status |= ATMEL_US_CTS;
+	}
+
+	if (!IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(atmel_port->gpios,
+						UART_GPIO_DSR))) {
+		if (ret & TIOCM_DSR)
+			status &= ~ATMEL_US_DSR;
+		else
+			status |= ATMEL_US_DSR;
+	}
+
+	if (!IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(atmel_port->gpios,
+						UART_GPIO_RI))) {
+		if (ret & TIOCM_RI)
+			status &= ~ATMEL_US_RI;
+		else
+			status |= ATMEL_US_RI;
+	}
+
+	if (!IS_ERR_OR_NULL(mctrl_gpio_to_gpiod(atmel_port->gpios,
+						UART_GPIO_DCD))) {
+		if (ret & TIOCM_CD)
+			status &= ~ATMEL_US_DCD;
+		else
+			status |= ATMEL_US_DCD;
+	}
+
+	return status;
+}
+
 /* Enable or disable the rs485 support */
 void atmel_config_rs485(struct uart_port *port, struct serial_rs485 *rs485conf)
 {
@@ -296,17 +344,6 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl)
 	unsigned int mode;
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 
-	/*
-	 * AT91RM9200 Errata #39: RTS0 is not internally connected
-	 * to PA21. We need to drive the pin as a GPIO.
-	 */
-	if (gpio_is_valid(atmel_port->rts_gpio)) {
-		if (mctrl & TIOCM_RTS)
-			gpio_set_value(atmel_port->rts_gpio, 0);
-		else
-			gpio_set_value(atmel_port->rts_gpio, 1);
-	}
-
 	if (mctrl & TIOCM_RTS)
 		control |= ATMEL_US_RTSEN;
 	else
@@ -319,6 +356,8 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl)
 
 	UART_PUT_CR(port, control);
 
+	mctrl_gpio_set(atmel_port->gpios, mctrl);
+
 	/* Local loopback mode? */
 	mode = UART_GET_MR(port) & ~ATMEL_US_CHMODE;
 	if (mctrl & TIOCM_LOOP)
@@ -346,7 +385,8 @@ static void atmel_set_mctrl(struct uart_port *port, u_int mctrl)
  */
 static u_int atmel_get_mctrl(struct uart_port *port)
 {
-	unsigned int status, ret = 0;
+	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+	unsigned int ret = 0, status;
 
 	status = UART_GET_CSR(port);
 
@@ -362,7 +402,7 @@ static u_int atmel_get_mctrl(struct uart_port *port)
 	if (!(status & ATMEL_US_RI))
 		ret |= TIOCM_RI;
 
-	return ret;
+	return mctrl_gpio_get(atmel_port->gpios, &ret);
 }
 
 /*
@@ -1042,7 +1082,7 @@ static irqreturn_t atmel_interrupt(int irq, void *dev_id)
 	unsigned int status, pending, pass_counter = 0;
 
 	do {
-		status = UART_GET_CSR(port);
+		status = atmel_get_lines_status(port);
 		pending = status & UART_GET_IMR(port);
 		if (!pending)
 			break;
@@ -1568,7 +1608,7 @@ static int atmel_startup(struct uart_port *port)
 	}
 
 	/* Save current CSR for comparison in atmel_tasklet_func() */
-	atmel_port->irq_status_prev = UART_GET_CSR(port);
+	atmel_port->irq_status_prev = atmel_get_lines_status(port);
 	atmel_port->irq_status = atmel_port->irq_status_prev;
 
 	/*
@@ -2324,6 +2364,15 @@ static int atmel_serial_resume(struct platform_device *pdev)
 #define atmel_serial_resume NULL
 #endif
 
+static int atmel_init_gpios(struct atmel_uart_port *p, struct device *dev)
+{
+	p->gpios = mctrl_gpio_init(dev, 0);
+	if (IS_ERR_OR_NULL(p->gpios))
+		return -1;
+
+	return 0;
+}
+
 static int atmel_serial_probe(struct platform_device *pdev)
 {
 	struct atmel_uart_port *port;
@@ -2359,25 +2408,11 @@ static int atmel_serial_probe(struct platform_device *pdev)
 	port = &atmel_ports[ret];
 	port->backup_imr = 0;
 	port->uart.line = ret;
-	port->rts_gpio = -EINVAL; /* Invalid, zero could be valid */
-	if (pdata)
-		port->rts_gpio = pdata->rts_gpio;
-	else if (np)
-		port->rts_gpio = of_get_named_gpio(np, "rts-gpios", 0);
-
-	if (gpio_is_valid(port->rts_gpio)) {
-		ret = devm_gpio_request(&pdev->dev, port->rts_gpio, "RTS");
-		if (ret) {
-			dev_err(&pdev->dev, "error requesting RTS GPIO\n");
-			goto err;
-		}
-		/* Default to 1 as RTS is active low */
-		ret = gpio_direction_output(port->rts_gpio, 1);
-		if (ret) {
-			dev_err(&pdev->dev, "error setting up RTS GPIO\n");
-			goto err;
-		}
-	}
+
+	ret = atmel_init_gpios(port, &pdev->dev);
+	if (ret < 0)
+		dev_err(&pdev->dev, "%s",
+			"Failed to initialize GPIOs. The serial port may not work as expected");
 
 	ret = atmel_init_port(port, pdev);
 	if (ret)
diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h
index e26b0c14edea..cea9f70133c5 100644
--- a/include/linux/platform_data/atmel.h
+++ b/include/linux/platform_data/atmel.h
@@ -84,7 +84,6 @@ struct atmel_uart_data {
 	short			use_dma_rx;	/* use receive DMA? */
 	void __iomem		*regs;		/* virt. base address, if any */
 	struct serial_rs485	rs485;		/* rs485 settings */
-	int			rts_gpio;	/* optional RTS GPIO */
 };
 
  /* Touchscreen Controller */
-- 
cgit 


From e4bdab70dd07d8648a1ec3e029239aa86eb836b6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 13 May 2014 12:09:28 +0200
Subject: console: Use explicit pointer type for vc_uni_pagedir* fields

The vc_data.vc_uni_pagedir filed is currently long int, supposedly to
be served generically.  This, however, leads to lots of cast to
pointer, and rather it worsens the readability significantly.

Actually, we have now only a single uni_pagedir map implementation,
and this won't change likely.  So, it'd be much more simple and
error-prone to just use the exact pointer for struct uni_pagedir
instead of long.

Ditto for vc_uni_pagedir_loc.  It's a pointer to the uni_pagedir, thus
it can be changed similarly to the exact type.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/consolemap.c    | 38 +++++++++++++++++++-------------------
 drivers/tty/vt/vt.c            |  2 +-
 drivers/video/console/vgacon.c |  4 ++--
 include/linux/console_struct.h |  5 +++--
 4 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c
index 2978ca596a7f..3fdc786b6b2f 100644
--- a/drivers/tty/vt/consolemap.c
+++ b/drivers/tty/vt/consolemap.c
@@ -262,7 +262,7 @@ u16 inverse_translate(struct vc_data *conp, int glyph, int use_unicode)
 	int m;
 	if (glyph < 0 || glyph >= MAX_GLYPH)
 		return 0;
-	else if (!(p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc))
+	else if (!(p = *conp->vc_uni_pagedir_loc))
 		return glyph;
 	else if (use_unicode) {
 		if (!p->inverse_trans_unicode)
@@ -287,7 +287,7 @@ static void update_user_maps(void)
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
 		if (!vc_cons_allocated(i))
 			continue;
-		p = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc;
+		p = *vc_cons[i].d->vc_uni_pagedir_loc;
 		if (p && p != q) {
 			set_inverse_transl(vc_cons[i].d, p, USER_MAP);
 			set_inverse_trans_unicode(vc_cons[i].d, p);
@@ -418,10 +418,10 @@ void con_free_unimap(struct vc_data *vc)
 {
 	struct uni_pagedir *p;
 
-	p = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+	p = *vc->vc_uni_pagedir_loc;
 	if (!p)
 		return;
-	*vc->vc_uni_pagedir_loc = 0;
+	*vc->vc_uni_pagedir_loc = NULL;
 	if (--p->refcount)
 		return;
 	con_release_unimap(p);
@@ -436,7 +436,7 @@ static int con_unify_unimap(struct vc_data *conp, struct uni_pagedir *p)
 	for (i = 0; i < MAX_NR_CONSOLES; i++) {
 		if (!vc_cons_allocated(i))
 			continue;
-		q = (struct uni_pagedir *)*vc_cons[i].d->vc_uni_pagedir_loc;
+		q = *vc_cons[i].d->vc_uni_pagedir_loc;
 		if (!q || q == p || q->sum != p->sum)
 			continue;
 		for (j = 0; j < 32; j++) {
@@ -459,7 +459,7 @@ static int con_unify_unimap(struct vc_data *conp, struct uni_pagedir *p)
 		}
 		if (j == 32) {
 			q->refcount++;
-			*conp->vc_uni_pagedir_loc = (unsigned long)q;
+			*conp->vc_uni_pagedir_loc = q;
 			con_release_unimap(p);
 			kfree(p);
 			return 1;
@@ -500,7 +500,7 @@ static int con_do_clear_unimap(struct vc_data *vc, struct unimapinit *ui)
 {
 	struct uni_pagedir *p, *q;
 
-	p = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+	p = *vc->vc_uni_pagedir_loc;
 	if (p && p->readonly)
 		return -EIO;
 
@@ -512,7 +512,7 @@ static int con_do_clear_unimap(struct vc_data *vc, struct unimapinit *ui)
 			return -ENOMEM;
 		}
 		q->refcount=1;
-		*vc->vc_uni_pagedir_loc = (unsigned long)q;
+		*vc->vc_uni_pagedir_loc = q;
 	} else {
 		if (p == dflt) dflt = NULL;
 		p->refcount++;
@@ -539,7 +539,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
 	console_lock();
 
 	/* Save original vc_unipagdir_loc in case we allocate a new one */
-	p = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+	p = *vc->vc_uni_pagedir_loc;
 	if (p->readonly) {
 		console_unlock();
 		return -EIO;
@@ -564,7 +564,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
 		 * Since refcount was > 1, con_clear_unimap() allocated a
 		 * a new uni_pagedir for this vc.  Re: p != q
 		 */
-		q = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+		q = *vc->vc_uni_pagedir_loc;
 
 		/*
 		 * uni_pgdir is a 32*32*64 table with rows allocated
@@ -586,7 +586,7 @@ int con_set_unimap(struct vc_data *vc, ushort ct, struct unipair __user *list)
 					err1 = con_insert_unipair(q, l, p2[k]);
 					if (err1) {
 						p->refcount++;
-						*vc->vc_uni_pagedir_loc = (unsigned long)p;
+						*vc->vc_uni_pagedir_loc = p;
 						con_release_unimap(q);
 						kfree(q);
 						console_unlock();
@@ -655,12 +655,12 @@ int con_set_default_unimap(struct vc_data *vc)
 	struct uni_pagedir *p;
 
 	if (dflt) {
-		p = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+		p = *vc->vc_uni_pagedir_loc;
 		if (p == dflt)
 			return 0;
 
 		dflt->refcount++;
-		*vc->vc_uni_pagedir_loc = (unsigned long)dflt;
+		*vc->vc_uni_pagedir_loc = dflt;
 		if (p && !--p->refcount) {
 			con_release_unimap(p);
 			kfree(p);
@@ -674,7 +674,7 @@ int con_set_default_unimap(struct vc_data *vc)
 	if (err)
 		return err;
     
-	p = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+	p = *vc->vc_uni_pagedir_loc;
 	q = dfont_unitable;
 	
 	for (i = 0; i < 256; i++)
@@ -685,7 +685,7 @@ int con_set_default_unimap(struct vc_data *vc)
 		}
 			
 	if (con_unify_unimap(vc, p)) {
-		dflt = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+		dflt = *vc->vc_uni_pagedir_loc;
 		return err;
 	}
 
@@ -713,9 +713,9 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc)
 	if (*dst_vc->vc_uni_pagedir_loc == *src_vc->vc_uni_pagedir_loc)
 		return 0;
 	con_free_unimap(dst_vc);
-	q = (struct uni_pagedir *)*src_vc->vc_uni_pagedir_loc;
+	q = *src_vc->vc_uni_pagedir_loc;
 	q->refcount++;
-	*dst_vc->vc_uni_pagedir_loc = (long)q;
+	*dst_vc->vc_uni_pagedir_loc = q;
 	return 0;
 }
 EXPORT_SYMBOL(con_copy_unimap);
@@ -737,7 +737,7 @@ int con_get_unimap(struct vc_data *vc, ushort ct, ushort __user *uct, struct uni
 
 	ect = 0;
 	if (*vc->vc_uni_pagedir_loc) {
-		p = (struct uni_pagedir *)*vc->vc_uni_pagedir_loc;
+		p = *vc->vc_uni_pagedir_loc;
 		for (i = 0; i < 32; i++)
 		if ((p1 = p->uni_pgdir[i]))
 			for (j = 0; j < 32; j++)
@@ -810,7 +810,7 @@ conv_uni_to_pc(struct vc_data *conp, long ucs)
 	if (!*conp->vc_uni_pagedir_loc)
 		return -3;
 
-	p = (struct uni_pagedir *)*conp->vc_uni_pagedir_loc;  
+	p = *conp->vc_uni_pagedir_loc;
 	if ((p1 = p->uni_pgdir[ucs >> 11]) &&
 	    (p2 = p1[(ucs >> 6) & 0x1f]) &&
 	    (h = p2[ucs & 0x3f]) < MAX_GLYPH)
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 5149a72a84ff..5e0f6ff2e2f5 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -735,7 +735,7 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_num = num;
 	vc->vc_display_fg = &master_display_fg;
 	vc->vc_uni_pagedir_loc = &vc->vc_uni_pagedir;
-	vc->vc_uni_pagedir = 0;
+	vc->vc_uni_pagedir = NULL;
 	vc->vc_hi_font_mask = 0;
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index 9e18770aaba6..f267284b423b 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -87,7 +87,7 @@ static void vgacon_save_screen(struct vc_data *c);
 static int vgacon_scroll(struct vc_data *c, int t, int b, int dir,
 			 int lines);
 static void vgacon_invert_region(struct vc_data *c, u16 * p, int count);
-static unsigned long vgacon_uni_pagedir;
+static struct uni_pagedir *vgacon_uni_pagedir;
 static int vgacon_refcount;
 
 /* Description of the hardware situation */
@@ -554,7 +554,7 @@ static const char *vgacon_startup(void)
 
 static void vgacon_init(struct vc_data *c, int init)
 {
-	unsigned long p;
+	struct uni_pagedir *p;
 
 	/*
 	 * We cannot be loaded as a module, therefore init is always 1,
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index 7f0c32908568..e859c98d1767 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -17,6 +17,7 @@
 #include <linux/workqueue.h>
 
 struct vt_struct;
+struct uni_pagedir;
 
 #define NPAR 16
 
@@ -104,8 +105,8 @@ struct vc_data {
 	unsigned int	vc_bell_pitch;		/* Console bell pitch */
 	unsigned int	vc_bell_duration;	/* Console bell duration */
 	struct vc_data **vc_display_fg;		/* [!] Ptr to var holding fg console for this display */
-	unsigned long	vc_uni_pagedir;
-	unsigned long	*vc_uni_pagedir_loc;  /* [!] Location of uni_pagedir variable for this console */
+	struct uni_pagedir *vc_uni_pagedir;
+	struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
 	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
 	/* additional information is in vt_kern.h */
 };
-- 
cgit 


From 6b121592f8a3fd2bd0de128637b76a0d0864d993 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 22 May 2014 17:07:36 -0600
Subject: PCI: Convert pci_dev_flags definitions to bit shifts

Convert the pci_dev_flags definitions from decimal constants to bit shifts.
We're only a few entries away from where using the decimal value becomes
cumbersome.  No functional change.

Tested-by: George Spelvin <linux@horizon.com>
Tested-by: Pat Erley <pat-lkml@erley.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 14b074bbc841..545903df00dc 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -164,13 +164,13 @@ enum pci_dev_flags {
 	/* INTX_DISABLE in PCI_COMMAND register disables MSI
 	 * generation too.
 	 */
-	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1,
+	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0),
 	/* Device configuration is irrevocably lost if disabled into D3 */
-	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
+	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1),
 	/* Provide indication device is assigned by a Virtual Machine Manager */
-	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
+	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
-	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) 8,
+	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit 


From 31c2b8153c58f11ddb80dfd392c16f13c2d709c6 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Thu, 22 May 2014 17:07:43 -0600
Subject: PCI: Add support for DMA alias quirks

Some devices are broken and use a requester ID other than their physical
devfn.  Add a byte, using an existing gap in the pci_dev structure, to
store an alternate "alias" devfn.  A bit in the dev_flags tells us when
this is valid.  We then add the alias as one more step in the
pci_for_each_dma_alias() iterator.

Tested-by: George Spelvin <linux@horizon.com>
Tested-by: Pat Erley <pat-lkml@erley.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 11 +++++++++++
 include/linux/pci.h  |  3 +++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 5601cdb8bbb3..2c19f3f40621 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -37,6 +37,17 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
+	/*
+	 * If the device is broken and uses an alias requester ID for
+	 * DMA, iterate over that too.
+	 */
+	if (unlikely(pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) {
+		ret = fn(pdev, PCI_DEVID(pdev->bus->number,
+					 pdev->dma_alias_devfn), data);
+		if (ret)
+			return ret;
+	}
+
 	for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
 		struct pci_dev *tmp;
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 545903df00dc..9d4035c276f4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -171,6 +171,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) (1 << 2),
 	/* Flag for quirk use to store if quirk-specific ACS is enabled */
 	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
+	/* Flag to indicate the device uses dma_alias_devfn */
+	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
 };
 
 enum pci_irq_reroute_variant {
@@ -268,6 +270,7 @@ struct pci_dev {
 	u8		rom_base_reg;	/* which config register controls the ROM */
 	u8		pin;		/* which interrupt pin this device uses */
 	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
+	u8		dma_alias_devfn;/* devfn of DMA alias, if any */
 
 	struct pci_driver *driver;	/* which driver has allocated this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
-- 
cgit 


From a20c93e3160e37ecccc738d8eef085c8507949ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 15:07:21 +0200
Subject: nfs: remove ->write_pageio_init from rpc ops

The write_pageio_init method is just a very convoluted way to grab the
right nfs_pageio_ops vector.  The vector to chose is not a choice of
protocol version, but just a pNFS vs MDS I/O choice that can simply be
done inside nfs_pageio_init_write based on the presence of a layout
driver, and a new force_mds flag to the special case of falling back
to MDS I/O on a pNFS-capable volume.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         |  4 ++--
 fs/nfs/internal.h       |  2 +-
 fs/nfs/nfs3proc.c       |  1 -
 fs/nfs/nfs4proc.c       |  1 -
 fs/nfs/pnfs.c           | 16 +---------------
 fs/nfs/pnfs.h           |  8 --------
 fs/nfs/proc.c           |  1 -
 fs/nfs/write.c          | 21 +++++++++++++--------
 include/linux/nfs_xdr.h |  2 --
 9 files changed, 17 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..6a31102b0819 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -564,7 +564,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	dreq->count = 0;
 	get_dreq(dreq);
 
-	NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 
@@ -874,7 +874,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dd8bfc2e2464..8431083de179 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -422,7 +422,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 
 /* write.c */
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode, int ioflags,
+			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern struct nfs_write_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index db60149c4579..e98488053906 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -951,7 +951,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
-	.write_pageio_init = nfs_pageio_init_write,
 	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39c6dc8..8da0c62966b5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8437,7 +8437,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
-	.write_pageio_init = pnfs_pageio_init_write,
 	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index cb53d450ae32..9edac9f01c2a 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1447,20 +1447,6 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
 		nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
 }
 
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-		       int ioflags,
-		       const struct nfs_pgio_completion_ops *compl_ops)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
-	if (ld == NULL)
-		nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-	else
-		nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
-}
-
 bool
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
@@ -1496,7 +1482,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
+	nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
 	pgio.pg_dreq = dreq;
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 023793909778..e9ac8fbaee3d 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -182,8 +182,6 @@ void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
 
 void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
 			   const struct nfs_pgio_completion_ops *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
-			    int, const struct nfs_pgio_completion_ops *);
 
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
@@ -467,12 +465,6 @@ static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, str
 	nfs_pageio_init_read(pgio, inode, compl_ops);
 }
 
-static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
-					  const struct nfs_pgio_completion_ops *compl_ops)
-{
-	nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
 		 struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e55ce9e8b034..f9cc29590a18 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -739,7 +739,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
-	.write_pageio_init = nfs_pageio_init_write,
 	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cd7c651f9b84..ee6d46fde76c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -354,10 +354,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
 	struct nfs_pageio_descriptor pgio;
 	int err;
 
-	NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
-							  page->mapping->host,
-							  wb_priority(wbc),
-							  &nfs_async_write_completion_ops);
+	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+				false, &nfs_async_write_completion_ops);
 	err = nfs_do_writepage(page, wbc, &pgio);
 	nfs_pageio_complete(&pgio);
 	if (err < 0)
@@ -400,7 +398,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-	NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
+	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+				&nfs_async_write_completion_ops);
 	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
 	nfs_pageio_complete(&pgio);
 
@@ -1282,11 +1281,17 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
 };
 
 void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
-			       struct inode *inode, int ioflags,
+			       struct inode *inode, int ioflags, bool force_mds,
 			       const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
-				NFS_SERVER(inode)->wsize, ioflags);
+	struct nfs_server *server = NFS_SERVER(inode);
+	const struct nfs_pageio_ops *pg_ops = &nfs_pageio_write_ops;
+
+#ifdef CONFIG_NFS_V4_1
+	if (server->pnfs_curr_ld && !force_mds)
+		pg_ops = server->pnfs_curr_ld->pg_write_ops;
+#endif
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6fb5b2335b59..78216f859527 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1471,8 +1471,6 @@ struct nfs_rpc_ops {
 	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-	void	(*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int,
-				     const struct nfs_pgio_completion_ops *);
 	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
-- 
cgit 


From fab5fc25d230edcc8ee72367e505955a2fae0cac Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Apr 2014 15:07:22 +0200
Subject: nfs: remove ->read_pageio_init from rpc ops

The read_pageio_init method is just a very convoluted way to grab the
right nfs_pageio_ops vector.  The vector to chose is not a choice of
protocol version, but just a pNFS vs MDS I/O choice that can simply be
done inside nfs_pageio_init_read based on the presence of a layout
driver, and a new force_mds flag to the special case of falling back
to MDS I/O on a pNFS-capable volume.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         |  2 +-
 fs/nfs/internal.h       |  2 +-
 fs/nfs/nfs3proc.c       |  1 -
 fs/nfs/nfs4proc.c       |  1 -
 fs/nfs/pnfs.c           | 15 +--------------
 fs/nfs/pnfs.h           |  9 ---------
 fs/nfs/proc.c           |  1 -
 fs/nfs/read.c           | 19 ++++++++++++++-----
 include/linux/nfs_xdr.h |  2 --
 9 files changed, 17 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 6a31102b0819..bbe688e2cc89 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -424,7 +424,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
+	nfs_pageio_init_read(&desc, dreq->inode, false,
 			     &nfs_direct_read_completion_ops);
 	get_dreq(dreq);
 	desc.pg_dreq = dreq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8431083de179..98fe618db2aa 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -398,7 +398,7 @@ struct nfs_pgio_completion_ops;
 extern struct nfs_read_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			struct inode *inode,
+			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
 			     struct nfs_read_data *data,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e98488053906..d873241a9b3a 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -947,7 +947,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
 	.read_setup	= nfs3_proc_read_setup,
-	.read_pageio_init = nfs_pageio_init_read,
 	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8da0c62966b5..21cd1f2ee35a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8433,7 +8433,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
 	.read_setup	= nfs4_proc_read_setup,
-	.read_pageio_init = pnfs_pageio_init_read,
 	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 9edac9f01c2a..3d5bc2baafd1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1434,19 +1434,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-		      const struct nfs_pgio_completion_ops *compl_ops)
-{
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
-	if (ld == NULL)
-		nfs_pageio_init_read(pgio, inode, compl_ops);
-	else
-		nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
-}
-
 bool
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
@@ -1641,7 +1628,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
 	LIST_HEAD(failed);
 
 	/* Resend all requests through the MDS */
-	nfs_pageio_init_read(&pgio, inode, compl_ops);
+	nfs_pageio_init_read(&pgio, inode, true, compl_ops);
 	pgio.pg_dreq = dreq;
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e9ac8fbaee3d..94a9a1834b3f 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -180,9 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
 void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
 
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
-			   const struct nfs_pgio_completion_ops *);
-
 void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
 void unset_pnfs_layoutdriver(struct nfs_server *);
 void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -459,12 +456,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
 {
 }
 
-static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
-					 const struct nfs_pgio_completion_ops *compl_ops)
-{
-	nfs_pageio_init_read(pgio, inode, compl_ops);
-}
-
 static inline int
 pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
 		 struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index f9cc29590a18..8cc227fcd4d2 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -735,7 +735,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs2_decode_dirent,
 	.read_setup	= nfs_proc_read_setup,
-	.read_pageio_init = nfs_pageio_init_read,
 	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 411aedda14bb..7f87461be3a9 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,6 +24,7 @@
 #include "internal.h"
 #include "iostat.h"
 #include "fscache.h"
+#include "pnfs.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
 
@@ -114,11 +115,17 @@ int nfs_return_empty_page(struct page *page)
 }
 
 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
-			      struct inode *inode,
+			      struct inode *inode, bool force_mds,
 			      const struct nfs_pgio_completion_ops *compl_ops)
 {
-	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
-			NFS_SERVER(inode)->rsize, 0);
+	struct nfs_server *server = NFS_SERVER(inode);
+	const struct nfs_pageio_ops *pg_ops = &nfs_pageio_read_ops;
+
+#ifdef CONFIG_NFS_V4_1
+	if (server->pnfs_curr_ld && !force_mds)
+		pg_ops = server->pnfs_curr_ld->pg_read_ops;
+#endif
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
@@ -147,7 +154,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	if (len < PAGE_CACHE_SIZE)
 		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-	NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+	nfs_pageio_init_read(&pgio, inode, false,
+			     &nfs_async_read_completion_ops);
 	nfs_pageio_add_request(&pgio, new);
 	nfs_pageio_complete(&pgio);
 	NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -654,7 +662,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	if (ret == 0)
 		goto read_complete; /* all pages were read */
 
-	NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+	nfs_pageio_init_read(&pgio, inode, false,
+			     &nfs_async_read_completion_ops);
 
 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 78216f859527..3e8fc1fe585b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1466,8 +1466,6 @@ struct nfs_rpc_ops {
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
 	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
-	void	(*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *,
-				    const struct nfs_pgio_completion_ops *);
 	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-- 
cgit 


From c8fe16e3f96a9bb95a10cedb19d2be2d2d580940 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 28 May 2014 14:57:02 -0600
Subject: PCI: Add support for PCIe-to-PCI bridge DMA alias quirks

Several PCIe-to-PCI bridges fail to provide a PCIe capability, causing us
to handle them as conventional PCI devices when they really use the
requester ID of the secondary bus.  We need to differentiate these from
PCIe-to-PCI bridges that actually use the conventional PCI ID when a PCIe
capability is not present, such as those found on the root complex of may
Intel chipsets.  Add a dev_flag bit to identify devices to be handled as
standard PCIe-to-PCI bridges.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 10 ++++++++--
 include/linux/pci.h  |  2 ++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 2c19f3f40621..df38f73f091f 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -88,8 +88,14 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 				continue;
 			}
 		} else {
-			ret = fn(tmp, PCI_DEVID(tmp->bus->number, tmp->devfn),
-				 data);
+			if (tmp->dev_flags & PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS)
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->subordinate->number,
+						   PCI_DEVFN(0, 0)), data);
+			else
+				ret = fn(tmp,
+					 PCI_DEVID(tmp->bus->number,
+						   tmp->devfn), data);
 			if (ret)
 				return ret;
 		}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9d4035c276f4..85ab35e974a9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -173,6 +173,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_ACS_ENABLED_QUIRK = (__force pci_dev_flags_t) (1 << 3),
 	/* Flag to indicate the device uses dma_alias_devfn */
 	PCI_DEV_FLAGS_DMA_ALIAS_DEVFN = (__force pci_dev_flags_t) (1 << 4),
+	/* Use a PCIe-to-PCI bridge alias even if !pci_is_pcie */
+	PCI_DEV_FLAG_PCIE_BRIDGE_ALIAS = (__force pci_dev_flags_t) (1 << 5),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit 


From 782a985d7af26db39e86070d28f987cad21313c0 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 20 May 2014 08:53:21 -0600
Subject: PCI: Introduce new device binding path using pci_dev.driver_override

The driver_override field allows us to specify the driver for a device
rather than relying on the driver to provide a positive match of the
device.  This shortcuts the existing process of looking up the vendor and
device ID, adding them to the driver new_id, binding the device, then
removing the ID, but it also provides a couple advantages.

First, the above existing process allows the driver to bind to any device
matching the new_id for the window where it's enabled.  This is often not
desired, such as the case of trying to bind a single device to a meta
driver like pci-stub or vfio-pci.  Using driver_override we can do this
deterministically using:

  echo pci-stub > /sys/bus/pci/devices/0000:03:00.0/driver_override
  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe

Previously we could not invoke drivers_probe after adding a device to
new_id for a driver as we get non-deterministic behavior whether the driver
we intend or the standard driver will claim the device.  Now it becomes a
deterministic process, only the driver matching driver_override will probe
the device.

To return the device to the standard driver, we simply clear the
driver_override and reprobe the device:

  echo > /sys/bus/pci/devices/0000:03:00.0/driver_override
  echo 0000:03:00.0 > /sys/bus/pci/devices/0000:03:00.0/driver/unbind
  echo 0000:03:00.0 > /sys/bus/pci/drivers_probe

Another advantage to this approach is that we can specify a driver override
to force a specific binding or prevent any binding.  For instance when an
IOMMU group is exposed to userspace through VFIO we require that all
devices within that group are owned by VFIO.  However, devices can be
hot-added into an IOMMU group, in which case we want to prevent the device
from binding to any driver (override driver = "none") or perhaps have it
automatically bind to vfio-pci.  With driver_override it's a simple matter
for this field to be set internally when the device is first discovered to
prevent driver matches.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-pci | 21 +++++++++++++++++
 drivers/pci/pci-driver.c                | 25 ++++++++++++++++++---
 drivers/pci/pci-sysfs.c                 | 40 +++++++++++++++++++++++++++++++++
 drivers/pci/probe.c                     |  1 +
 include/linux/pci.h                     |  1 +
 5 files changed, 85 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index a3c5a6685036..898ddc4440e6 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -250,3 +250,24 @@ Description:
 		valid.  For example, writing a 2 to this file when sriov_numvfs
 		is not 0 and not 2 already will return an error. Writing a 10
 		when the value of sriov_totalvfs is 8 will return an error.
+
+What:		/sys/bus/pci/devices/.../driver_override
+Date:		April 2014
+Contact:	Alex Williamson <alex.williamson@redhat.com>
+Description:
+		This file allows the driver for a device to be specified which
+		will override standard static and dynamic ID matching.  When
+		specified, only a driver with a name matching the value written
+		to driver_override will have an opportunity to bind to the
+		device.  The override is specified by writing a string to the
+		driver_override file (echo pci-stub > driver_override) and
+		may be cleared with an empty string (echo > driver_override).
+		This returns the device to standard matching rules binding.
+		Writing to driver_override does not automatically unbind the
+		device from its current driver or make any attempt to
+		automatically load the specified driver.  If no driver with a
+		matching name is currently loaded in the kernel, the device
+		will not bind to any driver.  This also allows devices to
+		opt-out of driver binding using a driver_override name such as
+		"none".  Only a single driver may be specified in the override,
+		there is no support for parsing delimiters.
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d911e0c1f359..4393c12e9135 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -216,6 +216,13 @@ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
 	return NULL;
 }
 
+static const struct pci_device_id pci_device_id_any = {
+	.vendor = PCI_ANY_ID,
+	.device = PCI_ANY_ID,
+	.subvendor = PCI_ANY_ID,
+	.subdevice = PCI_ANY_ID,
+};
+
 /**
  * pci_match_device - Tell if a PCI device structure has a matching PCI device id structure
  * @drv: the PCI driver to match against
@@ -229,18 +236,30 @@ static const struct pci_device_id *pci_match_device(struct pci_driver *drv,
 						    struct pci_dev *dev)
 {
 	struct pci_dynid *dynid;
+	const struct pci_device_id *found_id = NULL;
+
+	/* When driver_override is set, only bind to the matching driver */
+	if (dev->driver_override && strcmp(dev->driver_override, drv->name))
+		return NULL;
 
 	/* Look at the dynamic ids first, before the static ones */
 	spin_lock(&drv->dynids.lock);
 	list_for_each_entry(dynid, &drv->dynids.list, node) {
 		if (pci_match_one_device(&dynid->id, dev)) {
-			spin_unlock(&drv->dynids.lock);
-			return &dynid->id;
+			found_id = &dynid->id;
+			break;
 		}
 	}
 	spin_unlock(&drv->dynids.lock);
 
-	return pci_match_id(drv->id_table, dev);
+	if (!found_id)
+		found_id = pci_match_id(drv->id_table, dev);
+
+	/* driver_override will always match, send a dummy id */
+	if (!found_id && dev->driver_override)
+		found_id = &pci_device_id_any;
+
+	return found_id;
 }
 
 struct drv_dev_and_id {
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 4e0acefb7565..faa4ab554d68 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -499,6 +499,45 @@ static struct device_attribute sriov_numvfs_attr =
 		       sriov_numvfs_show, sriov_numvfs_store);
 #endif /* CONFIG_PCI_IOV */
 
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	char *driver_override, *old = pdev->driver_override, *cp;
+
+	if (count > PATH_MAX)
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, count, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	cp = strchr(driver_override, '\n');
+	if (cp)
+		*cp = '\0';
+
+	if (strlen(driver_override)) {
+		pdev->driver_override = driver_override;
+	} else {
+		kfree(driver_override);
+		pdev->driver_override = NULL;
+	}
+
+	kfree(old);
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	return sprintf(buf, "%s\n", pdev->driver_override);
+}
+static DEVICE_ATTR_RW(driver_override);
+
 static struct attribute *pci_dev_attrs[] = {
 	&dev_attr_resource.attr,
 	&dev_attr_vendor.attr,
@@ -521,6 +560,7 @@ static struct attribute *pci_dev_attrs[] = {
 #if defined(CONFIG_PM_RUNTIME) && defined(CONFIG_ACPI)
 	&dev_attr_d3cold_allowed.attr,
 #endif
+	&dev_attr_driver_override.attr,
 	NULL,
 };
 
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ef09f5f2fe6c..54268de45f59 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1215,6 +1215,7 @@ static void pci_release_dev(struct device *dev)
 	pci_release_of_node(pci_dev);
 	pcibios_release_device(pci_dev);
 	pci_bus_put(pci_dev->bus);
+	kfree(pci_dev->driver_override);
 	kfree(pci_dev);
 }
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index aab57b4abe7f..b72af276f591 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -365,6 +365,7 @@ struct pci_dev {
 #endif
 	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
 	size_t romlen; /* Length of ROM if it's not from the BAR */
+	char *driver_override; /* Driver name to force a match */
 };
 
 static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
-- 
cgit 


From 3c6b899c49e5e9c2803b59ee553eddaf69cea7f6 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:24 -0400
Subject: NFS: Create a common argument structure for reads and writes

Reads and writes have very similar arguments.  This patch combines them
together and documents the few fields used only by write.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs2xdr.c        |  8 ++++----
 fs/nfs/nfs3xdr.c        |  8 ++++----
 fs/nfs/nfs4proc.c       |  4 ++--
 fs/nfs/nfs4xdr.c        | 10 ++++++----
 fs/nfs/read.c           |  2 +-
 fs/nfs/write.c          |  2 +-
 include/linux/nfs_xdr.h | 47 +++++++++++++++++++----------------------------
 7 files changed, 37 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136339ea..461cd8bd9401 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
  *	};
  */
 static void encode_readargs(struct xdr_stream *xdr,
-			    const struct nfs_readargs *args)
+			    const struct nfs_pgio_args *args)
 {
 	u32 offset = args->offset;
 	u32 count = args->count;
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 				  struct xdr_stream *xdr,
-				  const struct nfs_readargs *args)
+				  const struct nfs_pgio_args *args)
 {
 	encode_readargs(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
  *	};
  */
 static void encode_writeargs(struct xdr_stream *xdr,
-			     const struct nfs_writeargs *args)
+			     const struct nfs_pgio_args *args)
 {
 	u32 offset = args->offset;
 	u32 count = args->count;
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
 
 static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_writeargs *args)
+				   const struct nfs_pgio_args *args)
 {
 	encode_writeargs(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d72131c19..02f16c212007 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_read3args(struct xdr_stream *xdr,
-			     const struct nfs_readargs *args)
+			     const struct nfs_pgio_args *args)
 {
 	__be32 *p;
 
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   struct xdr_stream *xdr,
-				   const struct nfs_readargs *args)
+				   const struct nfs_pgio_args *args)
 {
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
  *	};
  */
 static void encode_write3args(struct xdr_stream *xdr,
-			      const struct nfs_writeargs *args)
+			      const struct nfs_pgio_args *args)
 {
 	__be32 *p;
 
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
 
 static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
 				    struct xdr_stream *xdr,
-				    const struct nfs_writeargs *args)
+				    const struct nfs_pgio_args *args)
 {
 	encode_write3args(xdr, args);
 	xdr->buf->flags |= XDRBUF_WRITE;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 21cd1f2ee35a..4794ca693367 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
 }
 
 static bool nfs4_read_stateid_changed(struct rpc_task *task,
-		struct nfs_readargs *args)
+		struct nfs_pgio_args *args)
 {
 
 	if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
 }
 
 static bool nfs4_write_stateid_changed(struct rpc_task *task,
-		struct nfs_writeargs *args)
+		struct nfs_pgio_args *args)
 {
 
 	if (!nfs4_error_stateid_expired(task->tk_status) ||
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73ce8d4fe2c8..032159c36a57 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
 	encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
 }
 
-static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+			struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
 	encode_nfs4_verifier(xdr, &arg->confirm);
 }
 
-static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+			 struct compound_hdr *hdr)
 {
 	__be32 *p;
 
@@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a READ request
  */
 static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
-			      struct nfs_readargs *args)
+			      struct nfs_pgio_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
  * Encode a WRITE request
  */
 static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
-			       struct nfs_writeargs *args)
+			       struct nfs_pgio_args *args)
 {
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 7f87461be3a9..46d555206023 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -470,7 +470,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 
 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 {
-	struct nfs_readargs *argp = &data->args;
+	struct nfs_pgio_args *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 
 	/* This is a short read! */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ee6d46fde76c..25ba3830ec8b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1388,7 +1388,7 @@ static int nfs_should_remove_suid(const struct inode *inode)
  */
 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-	struct nfs_writeargs	*argp = &data->args;
+	struct nfs_pgio_args	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
 	struct inode		*inode = data->header->inode;
 	int status;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 3e8fc1fe585b..5875001928f9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -491,18 +491,6 @@ struct nfs4_delegreturnres {
 /*
  * Arguments to the read call.
  */
-struct nfs_readargs {
-	struct nfs4_sequence_args	seq_args;
-	struct nfs_fh *		fh;
-	struct nfs_open_context *context;
-	struct nfs_lock_context *lock_context;
-	nfs4_stateid		stateid;
-	__u64			offset;
-	__u32			count;
-	unsigned int		pgbase;
-	struct page **		pages;
-};
-
 struct nfs_readres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *	fattr;
@@ -513,20 +501,6 @@ struct nfs_readres {
 /*
  * Arguments to the write call.
  */
-struct nfs_writeargs {
-	struct nfs4_sequence_args	seq_args;
-	struct nfs_fh *		fh;
-	struct nfs_open_context *context;
-	struct nfs_lock_context *lock_context;
-	nfs4_stateid		stateid;
-	__u64			offset;
-	__u32			count;
-	enum nfs3_stable_how	stable;
-	unsigned int		pgbase;
-	struct page **		pages;
-	const u32 *		bitmask;
-};
-
 struct nfs_write_verifier {
 	char			data[8];
 };
@@ -544,6 +518,23 @@ struct nfs_writeres {
 	const struct nfs_server *server;
 };
 
+/*
+ * Arguments shared by the read and write call.
+ */
+struct nfs_pgio_args {
+	struct nfs4_sequence_args	seq_args;
+	struct nfs_fh *		fh;
+	struct nfs_open_context *context;
+	struct nfs_lock_context *lock_context;
+	nfs4_stateid		stateid;
+	__u64			offset;
+	__u32			count;
+	unsigned int		pgbase;
+	struct page **		pages;
+	const u32 *		bitmask;	/* used by write */
+	enum nfs3_stable_how	stable;		/* used by write */
+};
+
 /*
  * Arguments to the commit call.
  */
@@ -1269,7 +1260,7 @@ struct nfs_read_data {
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;	/* fattr storage */
-	struct nfs_readargs args;
+	struct nfs_pgio_args	args;
 	struct nfs_readres  res;
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
@@ -1321,7 +1312,7 @@ struct nfs_write_data {
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
-	struct nfs_writeargs	args;		/* argument struct */
+	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
-- 
cgit 


From 9137bdf3d241fc2cbeb2a8ced51d1546150aa6a1 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:25 -0400
Subject: NFS: Create a common results structure for reads and writes

Reads and writes have very similar results.  This patch combines the two
structs together with comments to show where the differing fields are
used.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs2xdr.c        |  6 +++---
 fs/nfs/nfs3xdr.c        |  8 ++++----
 fs/nfs/nfs4xdr.c        |  9 +++++----
 fs/nfs/read.c           |  2 +-
 fs/nfs/write.c          |  2 +-
 include/linux/nfs_xdr.h | 32 ++++++++++++--------------------
 6 files changed, 26 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 461cd8bd9401..5f61b83f4a1c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
 /*
  *	typedef opaque	nfsdata<>;
  */
-static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
 {
 	u32 recvd, count;
 	__be32 *p;
@@ -857,7 +857,7 @@ out_default:
  *	};
  */
 static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				struct nfs_readres *result)
+				struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -878,7 +878,7 @@ out_default:
 }
 
 static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_writeres *result)
+				 struct nfs_pgio_res *result)
 {
 	/* All NFSv2 writes are "file sync" writes */
 	result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 02f16c212007..8f4cbe7f4aa8 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1589,7 +1589,7 @@ out_default:
  *	};
  */
 static int decode_read3resok(struct xdr_stream *xdr,
-			     struct nfs_readres *result)
+			     struct nfs_pgio_res *result)
 {
 	u32 eof, count, ocount, recvd;
 	__be32 *p;
@@ -1625,7 +1625,7 @@ out_overflow:
 }
 
 static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				 struct nfs_readres *result)
+				 struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
@@ -1673,7 +1673,7 @@ out_status:
  *	};
  */
 static int decode_write3resok(struct xdr_stream *xdr,
-			      struct nfs_writeres *result)
+			      struct nfs_pgio_res *result)
 {
 	__be32 *p;
 
@@ -1697,7 +1697,7 @@ out_eio:
 }
 
 static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
-				  struct nfs_writeres *result)
+				  struct nfs_pgio_res *result)
 {
 	enum nfs_stat status;
 	int error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 032159c36a57..939ae606cfa4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5087,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_PUTROOTFH);
 }
 
-static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
+		       struct nfs_pgio_res *res)
 {
 	__be32 *p;
 	uint32_t count, eof, recvd;
@@ -5341,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
 	return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
 }
 
-static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 {
 	__be32 *p;
 	int status;
@@ -6638,7 +6639,7 @@ out:
  * Decode Read response
  */
 static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			     struct nfs_readres *res)
+			     struct nfs_pgio_res *res)
 {
 	struct compound_hdr hdr;
 	int status;
@@ -6663,7 +6664,7 @@ out:
  * Decode WRITE response
  */
 static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
-			      struct nfs_writeres *res)
+			      struct nfs_pgio_res *res)
 {
 	struct compound_hdr hdr;
 	int status;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 46d555206023..473bba35a2cb 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -471,7 +471,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 {
 	struct nfs_pgio_args *argp = &data->args;
-	struct nfs_readres *resp = &data->res;
+	struct nfs_pgio_res  *resp = &data->res;
 
 	/* This is a short read! */
 	nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 25ba3830ec8b..d392a70092fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1389,7 +1389,7 @@ static int nfs_should_remove_suid(const struct inode *inode)
 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
 	struct nfs_pgio_args	*argp = &data->args;
-	struct nfs_writeres	*resp = &data->res;
+	struct nfs_pgio_res	*resp = &data->res;
 	struct inode		*inode = data->header->inode;
 	int status;
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5875001928f9..381f832b03c6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -488,16 +488,6 @@ struct nfs4_delegreturnres {
 	const struct nfs_server *server;
 };
 
-/*
- * Arguments to the read call.
- */
-struct nfs_readres {
-	struct nfs4_sequence_res	seq_res;
-	struct nfs_fattr *	fattr;
-	__u32			count;
-	int                     eof;
-};
-
 /*
  * Arguments to the write call.
  */
@@ -510,14 +500,6 @@ struct nfs_writeverf {
 	enum nfs3_stable_how	committed;
 };
 
-struct nfs_writeres {
-	struct nfs4_sequence_res	seq_res;
-	struct nfs_fattr *	fattr;
-	struct nfs_writeverf *	verf;
-	__u32			count;
-	const struct nfs_server *server;
-};
-
 /*
  * Arguments shared by the read and write call.
  */
@@ -535,6 +517,16 @@ struct nfs_pgio_args {
 	enum nfs3_stable_how	stable;		/* used by write */
 };
 
+struct nfs_pgio_res {
+	struct nfs4_sequence_res	seq_res;
+	struct nfs_fattr *	fattr;
+	__u32			count;
+	int			eof;		/* used by read */
+	struct nfs_writeverf *	verf;		/* used by write */
+	const struct nfs_server *server;	/* used by write */
+
+};
+
 /*
  * Arguments to the commit call.
  */
@@ -1261,7 +1253,7 @@ struct nfs_read_data {
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;	/* fattr storage */
 	struct nfs_pgio_args	args;
-	struct nfs_readres  res;
+	struct nfs_pgio_res	res;
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
 	__u64			mds_offset;
@@ -1313,7 +1305,7 @@ struct nfs_write_data {
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;
 	struct nfs_pgio_args	args;		/* argument struct */
-	struct nfs_writeres	res;		/* result struct */
+	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
 	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
-- 
cgit 


From 9c7e1b3d50b56b8d8f6237ed232350b7c6476cd5 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:26 -0400
Subject: NFS: Create a common read and write data struct

At this point, the only difference between nfs_read_data and
nfs_write_data is the write verifier.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c | 22 ++++++++++----------
 fs/nfs/internal.h                | 10 ++++-----
 fs/nfs/nfs3proc.c                | 12 +++++------
 fs/nfs/nfs4_fs.h                 |  4 ++--
 fs/nfs/nfs4filelayout.c          | 34 +++++++++++++++----------------
 fs/nfs/nfs4proc.c                | 30 +++++++++++++--------------
 fs/nfs/nfs4trace.h               |  8 ++++----
 fs/nfs/objlayout/objio_osd.c     |  6 +++---
 fs/nfs/objlayout/objlayout.c     | 16 +++++++--------
 fs/nfs/objlayout/objlayout.h     |  8 ++++----
 fs/nfs/pnfs.c                    | 26 ++++++++++++------------
 fs/nfs/pnfs.h                    | 10 ++++-----
 fs/nfs/proc.c                    | 12 +++++------
 fs/nfs/read.c                    | 32 ++++++++++++++---------------
 fs/nfs/write.c                   | 36 ++++++++++++++++----------------
 include/linux/nfs_fs.h           |  4 ++--
 include/linux/nfs_xdr.h          | 44 ++++++++++++++--------------------------
 17 files changed, 150 insertions(+), 164 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65d849bdf77a..206cc68c9694 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
 			SetPageUptodate(bvec->bv_page);
 
 	if (err) {
-		struct nfs_read_data *rdata = par->data;
+		struct nfs_pgio_data *rdata = par->data;
 		struct nfs_pgio_header *header = rdata->header;
 
 		if (!header->pnfs_error)
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
 static void bl_read_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_read_data *rdata;
+	struct nfs_pgio_data *rdata;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_read_data, task);
+	rdata = container_of(task, struct nfs_pgio_data, task);
 	pnfs_ld_read_done(rdata);
 }
 
 static void
 bl_end_par_io_read(void *data, int unused)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	rdata->task.tk_status = rdata->header->pnfs_error;
 	INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
 }
 
 static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *header = rdata->header;
 	int i, hole;
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
 	}
 
 	if (unlikely(err)) {
-		struct nfs_write_data *data = par->data;
+		struct nfs_pgio_data *data = par->data;
 		struct nfs_pgio_header *header = data->header;
 
 		if (!header->pnfs_error)
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct nfs_write_data *data = par->data;
+	struct nfs_pgio_data *data = par->data;
 	struct nfs_pgio_header *header = data->header;
 
 	if (!uptodate) {
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
 static void bl_write_cleanup(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_write_data *wdata;
+	struct nfs_pgio_data *wdata;
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_write_data, task);
+	wdata = container_of(task, struct nfs_pgio_data, task);
 	if (likely(!wdata->header->pnfs_error)) {
 		/* Marks for LAYOUTCOMMIT */
 		mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
 /* Called when last of bios associated with a bl_write_pagelist call finishes */
 static void bl_end_par_io_write(void *data, int num_se)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (unlikely(wdata->header->pnfs_error)) {
 		bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@ check_page:
 }
 
 static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
 {
 	struct nfs_pgio_header *header = wdata->header;
 	int i, ret, npg_zero, pg_index, last = 0;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 98fe618db2aa..af01b80412dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -401,13 +401,13 @@ extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern int nfs_initiate_read(struct rpc_clnt *clnt,
-			     struct nfs_read_data *data,
+			     struct nfs_pgio_data *data,
 			     const struct rpc_call_ops *call_ops, int flags);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
 			      struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_readdata_release(struct nfs_read_data *rdata);
+extern void nfs_readdata_release(struct nfs_pgio_data *rdata);
 
 /* super.c */
 void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -429,10 +429,10 @@ extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_writedata_release(struct nfs_write_data *wdata);
+extern void nfs_writedata_release(struct nfs_pgio_data *wdata);
 extern void nfs_commit_free(struct nfs_commit_data *p);
 extern int nfs_initiate_write(struct rpc_clnt *clnt,
-			      struct nfs_write_data *data,
+			      struct nfs_pgio_data *data,
 			      const struct rpc_call_ops *call_ops,
 			      int how, int flags);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
@@ -492,7 +492,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
 extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
 
 /* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_read_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
 			    const struct rpc_timeout *timeparms,
 			    const char *ip_addr);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d873241a9b3a..d235369c3dfb 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return status;
 }
 
-static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -829,12 +829,12 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1d1badbe53c..f63cb87cd730 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
  */
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_write_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
 {
 	if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
 	    !test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
 
 static inline void
 nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
-			 struct rpc_message *msg, struct nfs_write_data *wdata)
+			 struct rpc_message *msg, struct nfs_pgio_data *wdata)
 {
 }
 #endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index b9a35c05b60f..e6936147ad95 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
 	BUG();
 }
 
-static void filelayout_reset_write(struct nfs_write_data *data)
+static void filelayout_reset_write(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data)
 	}
 }
 
-static void filelayout_reset_read(struct nfs_read_data *data)
+static void filelayout_reset_read(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@ wait_on_recovery:
 /* NFS_PROTO call done callback routines */
 
 static int filelayout_read_done_cb(struct rpc_task *task,
-				struct nfs_read_data *data)
+				struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
  * rfc5661 is not clear about which credential should be used.
  */
 static void
-filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
  */
 static void filelayout_read_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
 		rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 		rpc_exit(task, 0);
 		return;
 	}
-	rdata->read_done_cb = filelayout_read_done_cb;
+	rdata->pgio_done_cb = filelayout_read_done_cb;
 
 	if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
 			&rdata->args.seq_args,
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_read_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
 
@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
 
 static void filelayout_read_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_read_release(void *data)
 {
-	struct nfs_read_data *rdata = data;
+	struct nfs_pgio_data *rdata = data;
 	struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data)
 }
 
 static int filelayout_write_done_cb(struct rpc_task *task,
-				struct nfs_write_data *data)
+				struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	int err;
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
 
 static void filelayout_write_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
 		rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
 
 static void filelayout_write_call_done(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
 	    task->tk_status == 0) {
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
 
 static void filelayout_write_count_stats(struct rpc_task *task, void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 
 	rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
 }
 
 static void filelayout_write_release(void *data)
 {
-	struct nfs_write_data *wdata = data;
+	struct nfs_pgio_data *wdata = data;
 	struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
 
 	filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
 };
 
 static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_read_data *data)
+filelayout_read_pagelist(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -575,7 +575,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
 
 /* Perform async writes. */
 static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 {
 	struct nfs_pgio_header *hdr = data->header;
 	struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,7 +600,7 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
 		__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
 		offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
 
-	data->write_done_cb = filelayout_write_done_cb;
+	data->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4794ca693367..e793aa91454a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err)
 	return false;
 }
 
-void __nfs4_read_done_cb(struct nfs_read_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_data *data)
 {
 	nfs_invalidate_atime(data->header->inode);
 }
 
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
@@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 
 	dprintk("--> %s\n", __func__);
@@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 		return -EAGAIN;
 	if (nfs4_read_stateid_changed(task, &data->args))
 		return -EAGAIN;
-	return data->read_done_cb ? data->read_done_cb(task, data) :
+	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
 				    nfs4_read_done_cb(task, data);
 }
 
-static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	data->timestamp   = jiffies;
-	data->read_done_cb = nfs4_read_done_cb;
+	data->pgio_done_cb = nfs4_read_done_cb;
 	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 }
 
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
@@ -4104,7 +4104,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
 	return 0;
 }
 
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 	
@@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
 	return true;
 }
 
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (!nfs4_sequence_done(task, &data->res.seq_res))
 		return -EAGAIN;
 	if (nfs4_write_stateid_changed(task, &data->args))
 		return -EAGAIN;
-	return data->write_done_cb ? data->write_done_cb(task, data) :
+	return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
 		nfs4_write_done_cb(task, data);
 }
 
 static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
+bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
 {
 	const struct nfs_pgio_header *hdr = data->header;
 
@@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
 	return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
 }
 
-static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	struct nfs_server *server = NFS_SERVER(data->header->inode);
 
@@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 	} else
 		data->args.bitmask = server->cache_consistency_bitmask;
 
-	if (!data->write_done_cb)
-		data->write_done_cb = nfs4_write_done_cb;
+	if (!data->pgio_done_cb)
+		data->pgio_done_cb = nfs4_write_done_cb;
 	data->res.server = server;
 	data->timestamp   = jiffies;
 
@@ -4177,7 +4177,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf146db30..0a744f3a86f6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
 
 DECLARE_EVENT_CLASS(nfs4_read_event,
 		TP_PROTO(
-			const struct nfs_read_data *data,
+			const struct nfs_pgio_data *data,
 			int error
 		),
 
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 #define DEFINE_NFS4_READ_EVENT(name) \
 	DEFINE_EVENT(nfs4_read_event, name, \
 			TP_PROTO( \
-				const struct nfs_read_data *data, \
+				const struct nfs_pgio_data *data, \
 				int error \
 			), \
 			TP_ARGS(data, error))
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
 
 DECLARE_EVENT_CLASS(nfs4_write_event,
 		TP_PROTO(
-			const struct nfs_write_data *data,
+			const struct nfs_pgio_data *data,
 			int error
 		),
 
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 #define DEFINE_NFS4_WRITE_EVENT(name) \
 	DEFINE_EVENT(nfs4_write_event, name, \
 			TP_PROTO( \
-				const struct nfs_write_data *data, \
+				const struct nfs_pgio_data *data, \
 				int error \
 			), \
 			TP_ARGS(data, error))
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745dd4f1..426b366b0b33 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
 	objlayout_read_done(&objios->oir, status, objios->sync);
 }
 
-int objio_read_pagelist(struct nfs_read_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
 static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
 {
 	struct objio_state *objios = priv;
-	struct nfs_write_data *wdata = objios->oir.rpcdata;
+	struct nfs_pgio_data *wdata = objios->oir.rpcdata;
 	struct address_space *mapping = wdata->header->inode->i_mapping;
 	pgoff_t index = offset / PAGE_SIZE;
 	struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
 	.put_page = &__r4w_put_page,
 };
 
-int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct objio_state *objios;
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbfec67b..2f955f671003 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
 static void _rpc_read_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_read_data *rdata;
+	struct nfs_pgio_data *rdata;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	rdata = container_of(task, struct nfs_read_data, task);
+	rdata = container_of(task, struct nfs_pgio_data, task);
 
 	pnfs_ld_read_done(rdata);
 }
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
 void
 objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_read_data *rdata = oir->rpcdata;
+	struct nfs_pgio_data *rdata = oir->rpcdata;
 
 	oir->status = rdata->task.tk_status = status;
 	if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async reads.
  */
 enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_read_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
 static void _rpc_write_complete(struct work_struct *work)
 {
 	struct rpc_task *task;
-	struct nfs_write_data *wdata;
+	struct nfs_pgio_data *wdata;
 
 	dprintk("%s enter\n", __func__);
 	task = container_of(work, struct rpc_task, u.tk_work);
-	wdata = container_of(task, struct nfs_write_data, task);
+	wdata = container_of(task, struct nfs_pgio_data, task);
 
 	pnfs_ld_write_done(wdata);
 }
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
 void
 objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
 {
-	struct nfs_write_data *wdata = oir->rpcdata;
+	struct nfs_pgio_data *wdata = oir->rpcdata;
 
 	oir->status = wdata->task.tk_status = status;
 	if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
  * Perform sync or async writes.
  */
 enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_write_data *wdata,
+objlayout_write_pagelist(struct nfs_pgio_data *wdata,
 			 int how)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1dec6120..01e041029a6c 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
  */
 extern void objio_free_result(struct objlayout_io_res *oir);
 
-extern int objio_read_pagelist(struct nfs_read_data *rdata);
-extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
 
 /*
  * callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
 extern void objlayout_free_lseg(struct pnfs_layout_segment *);
 
 extern enum pnfs_try_status objlayout_read_pagelist(
-	struct nfs_read_data *);
+	struct nfs_pgio_data *);
 
 extern enum pnfs_try_status objlayout_write_pagelist(
-	struct nfs_write_data *,
+	struct nfs_pgio_data *,
 	int how);
 
 extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 3d5bc2baafd1..e9cea3ab7cf9 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1492,7 +1492,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
 
-static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1511,7 +1511,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1527,7 +1527,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
 static void
 pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_write_data *data)
+		struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1540,7 +1540,7 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
 }
 
 static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 			const struct rpc_call_ops *call_ops,
 			struct pnfs_layout_segment *lseg,
 			int how)
@@ -1564,7 +1564,7 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
 static void
 pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
 {
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 
@@ -1572,7 +1572,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_first_entry(head, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
@@ -1647,7 +1647,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
 }
 EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
 
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1666,7 +1666,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
 /*
  * Called by non rpc-based layout drivers
  */
-void pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1682,7 +1682,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
 static void
 pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
-		struct nfs_read_data *data)
+		struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
 
@@ -1698,7 +1698,7 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
  * Call the appropriate parallel I/O subsystem read function.
  */
 static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 		       const struct rpc_call_ops *call_ops,
 		       struct pnfs_layout_segment *lseg)
 {
@@ -1722,7 +1722,7 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
 static void
 pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
 {
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
 
@@ -1730,7 +1730,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 	while (!list_empty(head)) {
 		enum pnfs_try_status trypnfs;
 
-		data = list_first_entry(head, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 
 		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
@@ -1821,7 +1821,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
 EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
 
 void
-pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct inode *inode = hdr->inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 94a9a1834b3f..0031267d7492 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
 	 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
 	 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
 	 */
-	enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
-	enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+	enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
+	enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
 
 	void (*free_deviceid_node) (struct nfs4_deviceid_node *);
 
@@ -212,13 +212,13 @@ bool pnfs_roc(struct inode *ino);
 void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_write_data *);
-void pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_pgio_data *);
+void pnfs_ld_read_done(struct nfs_pgio_data *);
 struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
 					       struct nfs_open_context *ctx,
 					       loff_t pos,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 8cc227fcd4d2..c54829eb2156 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return 0;
 }
 
-static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
 }
 
-static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 
@@ -614,14 +614,14 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
 {
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 473bba35a2cb..9e426cc179ed 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -51,10 +51,10 @@ struct nfs_read_header *nfs_readhdr_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
 
-static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
+static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 						unsigned int pagecount)
 {
-	struct nfs_read_data *data, *prealloc;
+	struct nfs_pgio_data *data, *prealloc;
 
 	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
 	if (prealloc->header == NULL)
@@ -84,7 +84,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 }
 EXPORT_SYMBOL_GPL(nfs_readhdr_free);
 
-void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
 	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
@@ -212,7 +212,7 @@ out:
 }
 
 int nfs_initiate_read(struct rpc_clnt *clnt,
-		      struct nfs_read_data *data,
+		      struct nfs_pgio_data *data,
 		      const struct rpc_call_ops *call_ops, int flags)
 {
 	struct inode *inode = data->header->inode;
@@ -255,7 +255,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static void nfs_read_rpcsetup(struct nfs_read_data *data,
+static void nfs_read_rpcsetup(struct nfs_pgio_data *data,
 		unsigned int count, unsigned int offset)
 {
 	struct nfs_page *req = data->header->req;
@@ -274,7 +274,7 @@ static void nfs_read_rpcsetup(struct nfs_read_data *data,
 	nfs_fattr_init(&data->fattr);
 }
 
-static int nfs_do_read(struct nfs_read_data *data,
+static int nfs_do_read(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops)
 {
 	struct inode *inode = data->header->inode;
@@ -286,13 +286,13 @@ static int
 nfs_do_multiple_reads(struct list_head *head,
 		const struct rpc_call_ops *call_ops)
 {
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data;
 	int ret = 0;
 
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_first_entry(head, struct nfs_read_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 
 		ret2 = nfs_do_read(data, call_ops);
@@ -324,8 +324,8 @@ static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	while (!list_empty(&hdr->rpc_list)) {
-		struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
-				struct nfs_read_data, list);
+		struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list,
+				struct nfs_pgio_data, list);
 		list_del(&data->list);
 		nfs_readdata_release(data);
 	}
@@ -350,7 +350,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_read_data *data;
+	struct nfs_pgio_data *data;
 	size_t rsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
 
@@ -382,7 +382,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_read_data    *data;
+	struct nfs_pgio_data	*data;
 	struct list_head *head = &desc->pg_list;
 
 	data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
@@ -447,7 +447,7 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct inode *inode = data->header->inode;
 	int status;
@@ -468,7 +468,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_args *argp = &data->args;
 	struct nfs_pgio_res  *resp = &data->res;
@@ -490,7 +490,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
 
 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = calldata;
+	struct nfs_pgio_data *data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 
 	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
@@ -520,7 +520,7 @@ static void nfs_readpage_release_common(void *calldata)
 
 void nfs_read_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = calldata;
+	struct nfs_pgio_data *data = calldata;
 	int err;
 	err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
 	if (err)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d392a70092fe..3a2fc5c4c79a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -88,10 +88,10 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
 }
 EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
 
-static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
+static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
 						  unsigned int pagecount)
 {
-	struct nfs_write_data *data, *prealloc;
+	struct nfs_pgio_data *data, *prealloc;
 
 	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
 	if (prealloc->header == NULL)
@@ -120,7 +120,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 }
 EXPORT_SYMBOL_GPL(nfs_writehdr_free);
 
-void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
 	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
@@ -582,7 +582,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
 	if (data->verf.committed == NFS_DATA_SYNC)
 		return data->header->lseg == NULL;
@@ -613,7 +613,7 @@ nfs_clear_request_commit(struct nfs_page *req)
 }
 
 static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
 {
 	return 0;
 }
@@ -990,7 +990,7 @@ static int flush_task_priority(int how)
 }
 
 int nfs_initiate_write(struct rpc_clnt *clnt,
-		       struct nfs_write_data *data,
+		       struct nfs_pgio_data *data,
 		       const struct rpc_call_ops *call_ops,
 		       int how, int flags)
 {
@@ -1047,7 +1047,7 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_write_rpcsetup(struct nfs_write_data *data,
+static void nfs_write_rpcsetup(struct nfs_pgio_data *data,
 		unsigned int count, unsigned int offset,
 		int how, struct nfs_commit_info *cinfo)
 {
@@ -1082,7 +1082,7 @@ static void nfs_write_rpcsetup(struct nfs_write_data *data,
 	nfs_fattr_init(&data->fattr);
 }
 
-static int nfs_do_write(struct nfs_write_data *data,
+static int nfs_do_write(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops,
 		int how)
 {
@@ -1095,13 +1095,13 @@ static int nfs_do_multiple_writes(struct list_head *head,
 		const struct rpc_call_ops *call_ops,
 		int how)
 {
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data;
 	int ret = 0;
 
 	while (!list_empty(head)) {
 		int ret2;
 
-		data = list_first_entry(head, struct nfs_write_data, list);
+		data = list_first_entry(head, struct nfs_pgio_data, list);
 		list_del_init(&data->list);
 		
 		ret2 = nfs_do_write(data, call_ops, how);
@@ -1144,8 +1144,8 @@ static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
 {
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
 	while (!list_empty(&hdr->rpc_list)) {
-		struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
-				struct nfs_write_data, list);
+		struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list,
+				struct nfs_pgio_data, list);
 		list_del(&data->list);
 		nfs_writedata_release(data);
 	}
@@ -1161,7 +1161,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page *req = hdr->req;
 	struct page *page = req->wb_page;
-	struct nfs_write_data *data;
+	struct nfs_pgio_data *data;
 	size_t wsize = desc->pg_bsize, nbytes;
 	unsigned int offset;
 	int requests = 0;
@@ -1211,7 +1211,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
 {
 	struct nfs_page		*req;
 	struct page		**pages;
-	struct nfs_write_data	*data;
+	struct nfs_pgio_data	*data;
 	struct list_head *head = &desc->pg_list;
 	struct nfs_commit_info cinfo;
 
@@ -1305,7 +1305,7 @@ EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 void nfs_write_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = calldata;
+	struct nfs_pgio_data *data = calldata;
 	int err;
 	err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
 	if (err)
@@ -1328,14 +1328,14 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
  */
 static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = calldata;
+	struct nfs_pgio_data	*data = calldata;
 
 	nfs_writeback_done(task, data);
 }
 
 static void nfs_writeback_release_common(void *calldata)
 {
-	struct nfs_write_data	*data = calldata;
+	struct nfs_pgio_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
@@ -1386,7 +1386,7 @@ static int nfs_should_remove_suid(const struct inode *inode)
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_args	*argp = &data->args;
 	struct nfs_pgio_res	*resp = &data->res;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fa6918b0f829..7e0db561d829 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -520,7 +520,7 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writeback_done(struct rpc_task *, struct nfs_pgio_data *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -553,7 +553,7 @@ nfs_have_writebacks(struct inode *inode)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern int  nfs_readpage_result(struct rpc_task *, struct nfs_pgio_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 381f832b03c6..64f6f1491c0d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1247,20 +1247,6 @@ struct nfs_page_array {
 	struct page		*page_array[NFS_PAGEVEC_SIZE];
 };
 
-struct nfs_read_data {
-	struct nfs_pgio_header	*header;
-	struct list_head	list;
-	struct rpc_task		task;
-	struct nfs_fattr	fattr;	/* fattr storage */
-	struct nfs_pgio_args	args;
-	struct nfs_pgio_res	res;
-	unsigned long		timestamp;	/* For lease renewal */
-	int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
-	__u64			mds_offset;
-	struct nfs_page_array	pages;
-	struct nfs_client	*ds_clp;	/* pNFS data server */
-};
-
 /* used as flag bits in nfs_pgio_header */
 enum {
 	NFS_IOHDR_ERROR = 0,
@@ -1293,29 +1279,29 @@ struct nfs_pgio_header {
 	unsigned long		flags;
 };
 
-struct nfs_read_header {
-	struct nfs_pgio_header	header;
-	struct nfs_read_data	rpc_data;
-};
-
-struct nfs_write_data {
+struct nfs_pgio_data {
 	struct nfs_pgio_header	*header;
 	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
-	struct nfs_writeverf	verf;
+	struct nfs_writeverf	verf;		/* Used for writes */
 	struct nfs_pgio_args	args;		/* argument struct */
 	struct nfs_pgio_res	res;		/* result struct */
 	unsigned long		timestamp;	/* For lease renewal */
-	int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
+	int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data);
 	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
+struct nfs_read_header {
+	struct nfs_pgio_header	header;
+	struct nfs_pgio_data	rpc_data;
+};
+
 struct nfs_write_header {
 	struct nfs_pgio_header	header;
-	struct nfs_write_data	rpc_data;
+	struct nfs_pgio_data	rpc_data;
 	struct nfs_writeverf	verf;
 };
 
@@ -1448,12 +1434,12 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
-	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
-	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
-	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
-	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
-	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
-	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
+	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
+	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
+	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
+	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
+	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
+	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
 	int	(*commit_done) (struct rpc_task *, struct nfs_commit_data *);
-- 
cgit 


From c0752cdfbbb691cfe98812f7aed8ce1e766823c4 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:27 -0400
Subject: NFS: Create a common read and write header struct

The only difference is the write verifier field, but we can keep that
for a little bit longer.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h       |  4 ++--
 fs/nfs/pnfs.c           |  4 ++--
 fs/nfs/read.c           | 14 +++++++-------
 fs/nfs/write.c          | 14 +++++++-------
 include/linux/nfs_xdr.h |  7 +------
 5 files changed, 19 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index af01b80412dd..b0e7a41d14a8 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -395,7 +395,7 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
 
 struct nfs_pgio_completion_ops;
 /* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(void);
+extern struct nfs_rw_header *nfs_readhdr_alloc(void);
 extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
@@ -424,7 +424,7 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_write_header *nfs_writehdr_alloc(void);
+extern struct nfs_rw_header *nfs_writehdr_alloc(void);
 extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e9cea3ab7cf9..43cfe11aa1a4 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1592,7 +1592,7 @@ EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 int
 pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_write_header *whdr;
+	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
@@ -1750,7 +1750,7 @@ EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 int
 pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 9e426cc179ed..d29ca3673694 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -34,9 +34,9 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_read_header *nfs_readhdr_alloc(void)
+struct nfs_rw_header *nfs_readhdr_alloc(void)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 
 	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 	if (rhdr) {
@@ -56,7 +56,7 @@ static struct nfs_pgio_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
 {
 	struct nfs_pgio_data *data, *prealloc;
 
-	prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
+	prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data;
 	if (prealloc->header == NULL)
 		data = prealloc;
 	else
@@ -78,7 +78,7 @@ out:
 
 void nfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
-	struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
+	struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header);
 
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
@@ -87,7 +87,7 @@ EXPORT_SYMBOL_GPL(nfs_readhdr_free);
 void nfs_readdata_release(struct nfs_pgio_data *rdata)
 {
 	struct nfs_pgio_header *hdr = rdata->header;
-	struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
+	struct nfs_rw_header *read_header = container_of(hdr, struct nfs_rw_header, header);
 
 	put_nfs_open_context(rdata->args.context);
 	if (rdata->pages.pagevec != rdata->pages.page_array)
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pagein);
 
 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_read_header *rhdr;
+	struct nfs_rw_header *rhdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
@@ -680,7 +680,7 @@ out:
 int __init nfs_init_readpagecache(void)
 {
 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
-					     sizeof(struct nfs_read_header),
+					     sizeof(struct nfs_rw_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_rdata_cachep == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 3a2fc5c4c79a..37c4c988519c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -70,9 +70,9 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_write_header *nfs_writehdr_alloc(void)
+struct nfs_rw_header *nfs_writehdr_alloc(void)
 {
-	struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
 	if (p) {
 		struct nfs_pgio_header *hdr = &p->header;
@@ -93,7 +93,7 @@ static struct nfs_pgio_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
 {
 	struct nfs_pgio_data *data, *prealloc;
 
-	prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
+	prealloc = &container_of(hdr, struct nfs_rw_header, header)->rpc_data;
 	if (prealloc->header == NULL)
 		data = prealloc;
 	else
@@ -115,7 +115,7 @@ out:
 
 void nfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
-	struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
+	struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header);
 	mempool_free(whdr, nfs_wdata_mempool);
 }
 EXPORT_SYMBOL_GPL(nfs_writehdr_free);
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(nfs_writehdr_free);
 void nfs_writedata_release(struct nfs_pgio_data *wdata)
 {
 	struct nfs_pgio_header *hdr = wdata->header;
-	struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
+	struct nfs_rw_header *write_header = container_of(hdr, struct nfs_rw_header, header);
 
 	put_nfs_open_context(wdata->args.context);
 	if (wdata->pages.pagevec != wdata->pages.page_array)
@@ -1253,7 +1253,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_flush);
 
 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 {
-	struct nfs_write_header *whdr;
+	struct nfs_rw_header *whdr;
 	struct nfs_pgio_header *hdr;
 	int ret;
 
@@ -1910,7 +1910,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
 int __init nfs_init_writepagecache(void)
 {
 	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
-					     sizeof(struct nfs_write_header),
+					     sizeof(struct nfs_rw_header),
 					     0, SLAB_HWCACHE_ALIGN,
 					     NULL);
 	if (nfs_wdata_cachep == NULL)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 64f6f1491c0d..2d34e0dc1870 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1294,12 +1294,7 @@ struct nfs_pgio_data {
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 };
 
-struct nfs_read_header {
-	struct nfs_pgio_header	header;
-	struct nfs_pgio_data	rpc_data;
-};
-
-struct nfs_write_header {
+struct nfs_rw_header {
 	struct nfs_pgio_header	header;
 	struct nfs_pgio_data	rpc_data;
 	struct nfs_writeverf	verf;
-- 
cgit 


From f79d06f544a797d75cbf5256a5d06c4b3d2759cc Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:28 -0400
Subject: NFS: Move the write verifier into the nfs_pgio_header

The header had a pointer to the verifier that was set from the old write
data struct.  We don't need to keep the pointer around now that we have
shared structures.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         | 4 ++--
 fs/nfs/write.c          | 7 +++----
 include/linux/nfs_xdr.h | 3 +--
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bbe688e2cc89..164b0167677b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -813,12 +813,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, hdr->verf,
+				memcpy(&dreq->verf, &hdr->verf,
 				       sizeof(dreq->verf));
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
+				if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) {
 					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 37c4c988519c..321a791c72bf 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -82,7 +82,6 @@ struct nfs_rw_header *nfs_writehdr_alloc(void)
 		INIT_LIST_HEAD(&hdr->rpc_list);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
-		hdr->verf = &p->verf;
 	}
 	return p;
 }
@@ -644,7 +643,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
-			memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
+			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
@@ -1344,8 +1343,8 @@ static void nfs_writeback_release_common(void *calldata)
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
-		else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
+			memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
+		else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 2d34e0dc1870..965c2aa6b33f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1263,7 +1263,7 @@ struct nfs_pgio_header {
 	struct list_head	rpc_list;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
-	struct nfs_writeverf	*verf;
+	struct nfs_writeverf	verf;		/* Used for writes */
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
@@ -1297,7 +1297,6 @@ struct nfs_pgio_data {
 struct nfs_rw_header {
 	struct nfs_pgio_header	header;
 	struct nfs_pgio_data	rpc_data;
-	struct nfs_writeverf	verf;
 };
 
 struct nfs_mds_commit_info {
-- 
cgit 


From 4a0de55c565a36cac8422b76a948c4634a90781e Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:30 -0400
Subject: NFS: Create a common rw_header_alloc and rw_header_free function

I create a new struct nfs_rw_ops to decide the differences between reads
and writes.  This struct will be set when initializing a new
nfs_pgio_descriptor, and then passed on to the nfs_rw_header when a new
header is allocated.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  6 ++----
 fs/nfs/pagelist.c        | 33 +++++++++++++++++++++++++++++++++
 fs/nfs/pnfs.c            |  8 ++++----
 fs/nfs/read.c            | 34 +++++++++++++---------------------
 fs/nfs/write.c           | 28 ++++++++++++----------------
 include/linux/nfs_page.h |  7 +++++++
 include/linux/nfs_xdr.h  |  1 +
 7 files changed, 72 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5ddc142c5062..9d6a40eae11c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -237,6 +237,8 @@ extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
 int nfs_iocounter_wait(struct nfs_io_counter *c);
 
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
+void nfs_rw_header_free(struct nfs_pgio_header *);
 struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int);
 void nfs_pgio_data_release(struct nfs_pgio_data *);
 
@@ -397,8 +399,6 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
 
 struct nfs_pgio_completion_ops;
 /* read.c */
-extern struct nfs_rw_header *nfs_readhdr_alloc(void);
-extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
@@ -425,8 +425,6 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
 extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, int ioflags, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_rw_header *nfs_writehdr_alloc(void);
-extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
 extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
 			     struct nfs_pgio_header *hdr);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index a98ccf722d7b..ca356fe0836b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -300,6 +300,37 @@ static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
 	return container_of(hdr, struct nfs_rw_header, header);
 }
 
+/**
+ * nfs_rw_header_alloc - Allocate a header for a read or write
+ * @ops: Read or write function vector
+ */
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+{
+	struct nfs_rw_header *header = ops->rw_alloc_header();
+
+	if (header) {
+		struct nfs_pgio_header *hdr = &header->header;
+
+		INIT_LIST_HEAD(&hdr->pages);
+		INIT_LIST_HEAD(&hdr->rpc_list);
+		spin_lock_init(&hdr->lock);
+		atomic_set(&hdr->refcnt, 0);
+		hdr->rw_ops = ops;
+	}
+	return header;
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+
+/*
+ * nfs_rw_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+{
+	hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+
 /**
  * nfs_pgio_data_alloc - Allocate pageio data
  * @hdr: The header making a request
@@ -367,6 +398,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 		     struct inode *inode,
 		     const struct nfs_pageio_ops *pg_ops,
 		     const struct nfs_pgio_completion_ops *compl_ops,
+		     const struct nfs_rw_ops *rw_ops,
 		     size_t bsize,
 		     int io_flags)
 {
@@ -380,6 +412,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_inode = inode;
 	desc->pg_ops = pg_ops;
 	desc->pg_completion_ops = compl_ops;
+	desc->pg_rw_ops = rw_ops;
 	desc->pg_ioflags = io_flags;
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index e192ba69a7d4..54c84c128b2b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1585,7 +1585,7 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
 static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_writehdr_free(hdr);
+	nfs_rw_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
 
@@ -1596,7 +1596,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_writehdr_alloc();
+	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!whdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		pnfs_put_lseg(desc->pg_lseg);
@@ -1743,7 +1743,7 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
 static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
 {
 	pnfs_put_lseg(hdr->lseg);
-	nfs_readhdr_free(hdr);
+	nfs_rw_header_free(hdr);
 }
 EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
 
@@ -1754,7 +1754,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_readhdr_alloc();
+	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!rhdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		ret = -ENOMEM;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ab4c1a5b5fbd..4cf3577bd54e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -31,33 +31,19 @@
 static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_common_ops;
 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+static const struct nfs_rw_ops nfs_rw_read_ops;
 
 static struct kmem_cache *nfs_rdata_cachep;
 
-struct nfs_rw_header *nfs_readhdr_alloc(void)
+static struct nfs_rw_header *nfs_readhdr_alloc(void)
 {
-	struct nfs_rw_header *rhdr;
-
-	rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
-	if (rhdr) {
-		struct nfs_pgio_header *hdr = &rhdr->header;
-
-		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
-		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
-	}
-	return rhdr;
+	return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
 }
-EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
 
-void nfs_readhdr_free(struct nfs_pgio_header *hdr)
+static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
 {
-	struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header);
-
 	kmem_cache_free(nfs_rdata_cachep, rhdr);
 }
-EXPORT_SYMBOL_GPL(nfs_readhdr_free);
 
 static
 int nfs_return_empty_page(struct page *page)
@@ -79,7 +65,8 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 	if (server->pnfs_curr_ld && !force_mds)
 		pg_ops = server->pnfs_curr_ld->pg_read_ops;
 #endif
-	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0);
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
+			server->rsize, 0);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
@@ -375,13 +362,13 @@ static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	rhdr = nfs_readhdr_alloc();
+	rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!rhdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
 	hdr = &rhdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
+	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pagein(desc, hdr);
 	if (ret == 0)
@@ -647,3 +634,8 @@ void nfs_destroy_readpagecache(void)
 {
 	kmem_cache_destroy(nfs_rdata_cachep);
 }
+
+static const struct nfs_rw_ops nfs_rw_read_ops = {
+	.rw_alloc_header	= nfs_readhdr_alloc,
+	.rw_free_header		= nfs_readhdr_free,
+};
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0dc4d6a28bd0..9c5cde38da45 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -46,6 +46,7 @@ static const struct rpc_call_ops nfs_write_common_ops;
 static const struct rpc_call_ops nfs_commit_ops;
 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
 static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+static const struct nfs_rw_ops nfs_rw_write_ops;
 
 static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
@@ -70,29 +71,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
 }
 EXPORT_SYMBOL_GPL(nfs_commit_free);
 
-struct nfs_rw_header *nfs_writehdr_alloc(void)
+static struct nfs_rw_header *nfs_writehdr_alloc(void)
 {
 	struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
 
-	if (p) {
-		struct nfs_pgio_header *hdr = &p->header;
-
+	if (p)
 		memset(p, 0, sizeof(*p));
-		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
-		spin_lock_init(&hdr->lock);
-		atomic_set(&hdr->refcnt, 0);
-	}
 	return p;
 }
-EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
 
-void nfs_writehdr_free(struct nfs_pgio_header *hdr)
+static void nfs_writehdr_free(struct nfs_rw_header *whdr)
 {
-	struct nfs_rw_header *whdr = container_of(hdr, struct nfs_rw_header, header);
 	mempool_free(whdr, nfs_wdata_mempool);
 }
-EXPORT_SYMBOL_GPL(nfs_writehdr_free);
 
 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
 {
@@ -1210,13 +1201,13 @@ static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 	struct nfs_pgio_header *hdr;
 	int ret;
 
-	whdr = nfs_writehdr_alloc();
+	whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
 	if (!whdr) {
 		desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 		return -ENOMEM;
 	}
 	hdr = &whdr->header;
-	nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
+	nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_flush(desc, hdr);
 	if (ret == 0)
@@ -1244,7 +1235,8 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 	if (server->pnfs_curr_ld && !force_mds)
 		pg_ops = server->pnfs_curr_ld->pg_write_ops;
 #endif
-	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->wsize, ioflags);
+	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
+			server->wsize, ioflags);
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
 
@@ -1925,3 +1917,7 @@ void nfs_destroy_writepagecache(void)
 	kmem_cache_destroy(nfs_wdata_cachep);
 }
 
+static const struct nfs_rw_ops nfs_rw_write_ops = {
+	.rw_alloc_header	= nfs_writehdr_alloc,
+	.rw_free_header		= nfs_writehdr_free,
+};
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 92ce5783b707..594812546c25 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -52,6 +52,11 @@ struct nfs_pageio_ops {
 	int	(*pg_doio)(struct nfs_pageio_descriptor *);
 };
 
+struct nfs_rw_ops {
+	struct nfs_rw_header *(*rw_alloc_header)(void);
+	void (*rw_free_header)(struct nfs_rw_header *);
+};
+
 struct nfs_pageio_descriptor {
 	struct list_head	pg_list;
 	unsigned long		pg_bytes_written;
@@ -63,6 +68,7 @@ struct nfs_pageio_descriptor {
 
 	struct inode		*pg_inode;
 	const struct nfs_pageio_ops *pg_ops;
+	const struct nfs_rw_ops *pg_rw_ops;
 	int 			pg_ioflags;
 	int			pg_error;
 	const struct rpc_call_ops *pg_rpc_callops;
@@ -86,6 +92,7 @@ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 			     struct inode *inode,
 			     const struct nfs_pageio_ops *pg_ops,
 			     const struct nfs_pgio_completion_ops *compl_ops,
+			     const struct nfs_rw_ops *rw_ops,
 			     size_t bsize,
 			     int how);
 extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 965c2aa6b33f..a1b91b67145e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1269,6 +1269,7 @@ struct nfs_pgio_header {
 	const struct rpc_call_ops *mds_ops;
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
+	const struct nfs_rw_ops	*rw_ops;
 	struct nfs_direct_req	*dreq;
 	void			*layout_private;
 	spinlock_t		lock;
-- 
cgit 


From a4cdda59111f92000297e0d3edb1e0e08ba3549b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:31 -0400
Subject: NFS: Create a common pgio_rpc_prepare function

The read and write paths do exactly the same thing for the rpc_prepare
rpc_op.  This patch combines them together into a single function.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  2 ++
 fs/nfs/nfs3proc.c        | 11 ++---------
 fs/nfs/nfs4proc.c        | 22 +++-------------------
 fs/nfs/pagelist.c        | 26 ++++++++++++++++++++++++++
 fs/nfs/proc.c            | 11 ++---------
 fs/nfs/read.c            | 19 +++----------------
 fs/nfs/write.c           | 19 +++++--------------
 include/linux/nfs_page.h |  2 ++
 include/linux/nfs_xdr.h  |  3 +--
 9 files changed, 46 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9d6a40eae11c..1959260f8c57 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
 void nfs_rw_header_free(struct nfs_pgio_header *);
 struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int);
 void nfs_pgio_data_release(struct nfs_pgio_data *);
+void nfs_pgio_prepare(struct rpc_task *, void *);
+void nfs_pgio_release(void *);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d235369c3dfb..e7daa42bbc86 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -812,7 +812,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
@@ -834,12 +834,6 @@ static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	rpc_call_start(task);
@@ -946,11 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.fsinfo		= nfs3_proc_fsinfo,
 	.pathconf	= nfs3_proc_pathconf,
 	.decode_dirent	= nfs3_decode_dirent,
+	.pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
 	.read_setup	= nfs3_proc_read_setup,
-	.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
 	.read_done	= nfs3_read_done,
 	.write_setup	= nfs3_proc_write_setup,
-	.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
 	.write_done	= nfs3_write_done,
 	.commit_setup	= nfs3_proc_commit_setup,
 	.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e793aa91454a..44fb93a66d26 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4089,7 +4089,7 @@ static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
 }
 
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
 			&data->args.seq_args,
@@ -4097,7 +4097,7 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_dat
 			task))
 		return 0;
 	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, FMODE_READ) == -EIO)
+				data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
 		return -EIO;
 	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
 		return -EIO;
@@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message
 	nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
 }
 
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
-{
-	if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
-			&data->args.seq_args,
-			&data->res.seq_res,
-			task))
-		return 0;
-	if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
-				data->args.lock_context, FMODE_WRITE) == -EIO)
-		return -EIO;
-	if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
-		return -EIO;
-	return 0;
-}
-
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -8432,11 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
 	.pathconf	= nfs4_proc_pathconf,
 	.set_capabilities = nfs4_server_capabilities,
 	.decode_dirent	= nfs4_decode_dirent,
+	.pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
 	.read_setup	= nfs4_proc_read_setup,
-	.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
 	.read_done	= nfs4_read_done,
 	.write_setup	= nfs4_proc_write_setup,
-	.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
 	.write_done	= nfs4_write_done,
 	.commit_setup	= nfs4_proc_commit_setup,
 	.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ca356fe0836b..0fa211d35e40 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -386,6 +386,32 @@ void nfs_pgio_data_release(struct nfs_pgio_data *data)
 }
 EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
 
+/**
+ * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * @task: The current task
+ * @calldata: pageio data to prepare
+ */
+void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	int err;
+	err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+	if (err)
+		rpc_exit(task, err);
+}
+
+/**
+ * nfs_pgio_release - Release pageio data
+ * @calldata: The pageio data to release
+ */
+void nfs_pgio_release(void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	if (data->header->rw_ops->rw_release)
+		data->header->rw_ops->rw_release(data);
+	nfs_pgio_data_release(data);
+}
+
 /**
  * nfs_pageio_init - initialise a page io descriptor
  * @desc: pointer to descriptor
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c54829eb2156..c171ce1a8a30 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -599,7 +599,7 @@ static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *
 	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
 {
 	rpc_call_start(task);
 	return 0;
@@ -621,12 +621,6 @@ static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message
 	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
-{
-	rpc_call_start(task);
-	return 0;
-}
-
 static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
 {
 	BUG();
@@ -734,11 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
 	.fsinfo		= nfs_proc_fsinfo,
 	.pathconf	= nfs_proc_pathconf,
 	.decode_dirent	= nfs2_decode_dirent,
+	.pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
 	.read_setup	= nfs_proc_read_setup,
-	.read_rpc_prepare = nfs_proc_read_rpc_prepare,
 	.read_done	= nfs_read_done,
 	.write_setup	= nfs_proc_write_setup,
-	.write_rpc_prepare = nfs_proc_write_rpc_prepare,
 	.write_done	= nfs_write_done,
 	.commit_setup	= nfs_proc_commit_setup,
 	.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4cf3577bd54e..cfa15e828dd6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -454,24 +454,10 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 		nfs_readpage_retry(task, data);
 }
 
-static void nfs_readpage_release_common(void *calldata)
-{
-	nfs_pgio_data_release(calldata);
-}
-
-void nfs_read_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs_pgio_data *data = calldata;
-	int err;
-	err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
-	if (err)
-		rpc_exit(task, err);
-}
-
 static const struct rpc_call_ops nfs_read_common_ops = {
-	.rpc_call_prepare = nfs_read_prepare,
+	.rpc_call_prepare = nfs_pgio_prepare,
 	.rpc_call_done = nfs_readpage_result_common,
-	.rpc_release = nfs_readpage_release_common,
+	.rpc_release = nfs_pgio_release,
 };
 
 /*
@@ -636,6 +622,7 @@ void nfs_destroy_readpagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_read_ops = {
+	.rw_mode		= FMODE_READ,
 	.rw_alloc_header	= nfs_readhdr_alloc,
 	.rw_free_header		= nfs_readhdr_free,
 };
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9c5cde38da45..ae799c96ec2b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1248,15 +1248,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
 
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs_pgio_data *data = calldata;
-	int err;
-	err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
-	if (err)
-		rpc_exit(task, err);
-}
-
 void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs_commit_data *data = calldata;
@@ -1278,9 +1269,8 @@ static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
 	nfs_writeback_done(task, data);
 }
 
-static void nfs_writeback_release_common(void *calldata)
+static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 {
-	struct nfs_pgio_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
 
@@ -1294,13 +1284,12 @@ static void nfs_writeback_release_common(void *calldata)
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
-	nfs_pgio_data_release(data);
 }
 
 static const struct rpc_call_ops nfs_write_common_ops = {
-	.rpc_call_prepare = nfs_write_prepare,
+	.rpc_call_prepare = nfs_pgio_prepare,
 	.rpc_call_done = nfs_writeback_done_common,
-	.rpc_release = nfs_writeback_release_common,
+	.rpc_release = nfs_pgio_release,
 };
 
 /*
@@ -1918,6 +1907,8 @@ void nfs_destroy_writepagecache(void)
 }
 
 static const struct nfs_rw_ops nfs_rw_write_ops = {
+	.rw_mode		= FMODE_WRITE,
 	.rw_alloc_header	= nfs_writehdr_alloc,
 	.rw_free_header		= nfs_writehdr_free,
+	.rw_release		= nfs_writeback_release_common,
 };
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 594812546c25..da00a4d6f470 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -53,8 +53,10 @@ struct nfs_pageio_ops {
 };
 
 struct nfs_rw_ops {
+	const fmode_t rw_mode;
 	struct nfs_rw_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_rw_header *);
+	void (*rw_release)(struct nfs_pgio_data *);
 };
 
 struct nfs_pageio_descriptor {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index a1b91b67145e..adef7bd2d06d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1429,11 +1429,10 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	int	(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
+	int	(*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
 	void	(*read_setup)   (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*read_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*write_setup)  (struct nfs_pgio_data *, struct rpc_message *);
-	int	(*write_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_pgio_data *);
 	void	(*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
 	void	(*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
-- 
cgit 


From 0eecb2145c1ce18e36617008424a93836ad0a3bd Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:32 -0400
Subject: NFS: Create a common nfs_pgio_result_common function

Combining these functions will let me make a single nfs_rw_common_ops
struct (see the next patch).

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  1 +
 fs/nfs/pagelist.c        | 23 ++++++++++++++++++++++
 fs/nfs/read.c            | 25 ++++++++----------------
 fs/nfs/write.c           | 51 +++++++++++++++++++-----------------------------
 include/linux/nfs_fs.h   |  2 --
 include/linux/nfs_page.h |  2 ++
 6 files changed, 54 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1959260f8c57..7c0ae364bdad 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -243,6 +243,7 @@ struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *, unsigned int
 void nfs_pgio_data_release(struct nfs_pgio_data *);
 void nfs_pgio_prepare(struct rpc_task *, void *);
 void nfs_pgio_release(void *);
+void nfs_pgio_result(struct rpc_task *, void *);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 0fa211d35e40..f74df87058b6 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,6 +24,8 @@
 #include "internal.h"
 #include "pnfs.h"
 
+#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
+
 static struct kmem_cache *nfs_page_cachep;
 
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
@@ -447,6 +449,27 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init);
 
+/**
+ * nfs_pgio_result - Basic pageio error handling
+ * @task: The task that ran
+ * @calldata: Pageio data to check
+ */
+void nfs_pgio_result(struct rpc_task *task, void *calldata)
+{
+	struct nfs_pgio_data *data = calldata;
+	struct inode *inode = data->header->inode;
+
+	dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+		task->tk_pid, task->tk_status);
+
+	if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+		return;
+	if (task->tk_status < 0)
+		nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+	else
+		data->header->rw_ops->rw_result(task, data);
+}
+
 static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
 		const struct nfs_open_context *ctx2)
 {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index cfa15e828dd6..bc78bd248eb8 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -388,15 +388,10 @@ static const struct nfs_pageio_ops nfs_pageio_read_ops = {
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+			     struct inode *inode)
 {
-	struct inode *inode = data->header->inode;
-	int status;
-
-	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
-			task->tk_status);
-
-	status = NFS_PROTO(inode)->read_done(task, data);
+	int status = NFS_PROTO(inode)->read_done(task, data);
 	if (status != 0)
 		return status;
 
@@ -429,17 +424,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data
 	rpc_restart_call_prepare(task);
 }
 
-static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
 {
-	struct nfs_pgio_data *data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 
-	/* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
-	if (nfs_readpage_result(task, data) != 0)
-		return;
-	if (task->tk_status < 0)
-		nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
-	else if (data->res.eof) {
+	if (data->res.eof) {
 		loff_t bound;
 
 		bound = data->args.offset + data->res.count;
@@ -456,7 +445,7 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
 
 static const struct rpc_call_ops nfs_read_common_ops = {
 	.rpc_call_prepare = nfs_pgio_prepare,
-	.rpc_call_done = nfs_readpage_result_common,
+	.rpc_call_done = nfs_pgio_result,
 	.rpc_release = nfs_pgio_release,
 };
 
@@ -625,4 +614,6 @@ static const struct nfs_rw_ops nfs_rw_read_ops = {
 	.rw_mode		= FMODE_READ,
 	.rw_alloc_header	= nfs_readhdr_alloc,
 	.rw_free_header		= nfs_readhdr_free,
+	.rw_done		= nfs_readpage_done,
+	.rw_result		= nfs_readpage_result,
 };
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ae799c96ec2b..1d3e1d75c8c5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1255,20 +1255,6 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
 	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
 }
 
-/*
- * Handle a write reply that flushes a whole page.
- *
- * FIXME: There is an inherent race with invalidate_inode_pages and
- *	  writebacks since the page->count is kept > 1 for as long
- *	  as the page has a write request pending.
- */
-static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
-{
-	struct nfs_pgio_data	*data = calldata;
-
-	nfs_writeback_done(task, data);
-}
-
 static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 {
 	struct nfs_pgio_header *hdr = data->header;
@@ -1288,7 +1274,7 @@ static void nfs_writeback_release_common(struct nfs_pgio_data *data)
 
 static const struct rpc_call_ops nfs_write_common_ops = {
 	.rpc_call_prepare = nfs_pgio_prepare,
-	.rpc_call_done = nfs_writeback_done_common,
+	.rpc_call_done = nfs_pgio_result,
 	.rpc_release = nfs_pgio_release,
 };
 
@@ -1320,16 +1306,11 @@ static int nfs_should_remove_suid(const struct inode *inode)
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+			      struct inode *inode)
 {
-	struct nfs_pgio_args	*argp = &data->args;
-	struct nfs_pgio_res	*resp = &data->res;
-	struct inode		*inode = data->header->inode;
 	int status;
 
-	dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
-		task->tk_pid, task->tk_status);
-
 	/*
 	 * ->write_done will attempt to use post-op attributes to detect
 	 * conflicting writes by other clients.  A strict interpretation
@@ -1339,11 +1320,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
 	 */
 	status = NFS_PROTO(inode)->write_done(task, data);
 	if (status != 0)
-		return;
-	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+		return status;
+	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
 
 #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+	if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
 		 * commit data to stable storage even though we
 		 * requested it.
@@ -1359,25 +1340,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data)
 			dprintk("NFS:       faulty NFS server %s:"
 				" (committed = %d) != (stable = %d)\n",
 				NFS_SERVER(inode)->nfs_client->cl_hostname,
-				resp->verf->committed, argp->stable);
+				data->res.verf->committed, data->args.stable);
 			complain = jiffies + 300 * HZ;
 		}
 	}
 #endif
-	if (task->tk_status < 0) {
-		nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
-		return;
-	}
 
 	/* Deal with the suid/sgid bit corner case */
 	if (nfs_should_remove_suid(inode))
 		nfs_mark_for_revalidate(inode);
+	return 0;
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+{
+	struct nfs_pgio_args	*argp = &data->args;
+	struct nfs_pgio_res	*resp = &data->res;
 
 	if (resp->count < argp->count) {
 		static unsigned long    complain;
 
 		/* This a short write! */
-		nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
+		nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
 
 		/* Has the server at least made some progress? */
 		if (resp->count == 0) {
@@ -1911,4 +1898,6 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
 	.rw_alloc_header	= nfs_writehdr_alloc,
 	.rw_free_header		= nfs_writehdr_free,
 	.rw_release		= nfs_writeback_release_common,
+	.rw_done		= nfs_writeback_done,
+	.rw_result		= nfs_writeback_result,
 };
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7e0db561d829..919576b8e2cf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -520,7 +520,6 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *, struct nfs_pgio_data *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -553,7 +552,6 @@ nfs_have_writebacks(struct inode *inode)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern int  nfs_readpage_result(struct rpc_task *, struct nfs_pgio_data *);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index da00a4d6f470..01aa29c5ec42 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -57,6 +57,8 @@ struct nfs_rw_ops {
 	struct nfs_rw_header *(*rw_alloc_header)(void);
 	void (*rw_free_header)(struct nfs_rw_header *);
 	void (*rw_release)(struct nfs_pgio_data *);
+	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
+	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
 };
 
 struct nfs_pageio_descriptor {
-- 
cgit 


From b98abe52fa8e2a3797d3cc2db3d0e109f4549c03 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Wed, 28 May 2014 23:51:53 -0700
Subject: Input: add common DT binding for touchscreens

Add common DT binding documentation for touchscreen devices and
implement input_parse_touchscreen_of_params, which parses the common
properties and configures the input device accordingly.

The method currently does not interpret the axis inversion properties,
since there is no matching flag in the generic linux input device.

Reviewed-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 .../bindings/input/touchscreen/touchscreen.txt     | 27 +++++++++++++
 drivers/input/touchscreen/Kconfig                  |  4 ++
 drivers/input/touchscreen/Makefile                 |  1 +
 drivers/input/touchscreen/of_touchscreen.c         | 45 ++++++++++++++++++++++
 include/linux/input/touchscreen.h                  | 22 +++++++++++
 5 files changed, 99 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
 create mode 100644 drivers/input/touchscreen/of_touchscreen.c
 create mode 100644 include/linux/input/touchscreen.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
new file mode 100644
index 000000000000..d8e06163c54e
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
@@ -0,0 +1,27 @@
+General Touchscreen Properties:
+
+Optional properties for Touchscreens:
+ - touchscreen-size-x		: horizontal resolution of touchscreen
+				  (in pixels)
+ - touchscreen-size-y		: vertical resolution of touchscreen
+				  (in pixels)
+ - touchscreen-max-pressure	: maximum reported pressure (arbitrary range
+				  dependent on the controller)
+ - touchscreen-fuzz-x		: horizontal noise value of the absolute input
+				  device (in pixels)
+ - touchscreen-fuzz-y		: vertical noise value of the absolute input
+				  device (in pixels)
+ - touchscreen-fuzz-pressure	: pressure noise value of the absolute input
+				  device (arbitrary range dependent on the
+				  controller)
+ - touchscreen-inverted-x	: X axis is inverted (boolean)
+ - touchscreen-inverted-y	: Y axis is inverted (boolean)
+
+Deprecated properties for Touchscreens:
+ - x-size			: deprecated name for touchscreen-size-x
+ - y-size			: deprecated name for touchscreen-size-y
+ - moving-threshold		: deprecated name for a combination of
+				  touchscreen-fuzz-x and touchscreen-fuzz-y
+ - contact-threshold		: deprecated name for touchscreen-fuzz-pressure
+ - x-invert			: deprecated name for touchscreen-inverted-x
+ - y-invert			: deprecated name for touchscreen-inverted-y
diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 43389c097b02..8e07fe8505fd 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -11,6 +11,10 @@ menuconfig INPUT_TOUCHSCREEN
 
 if INPUT_TOUCHSCREEN
 
+config OF_TOUCHSCREEN
+	def_tristate INPUT
+	depends on INPUT && OF
+
 config TOUCHSCREEN_88PM860X
 	tristate "Marvell 88PM860x touchscreen"
 	depends on MFD_88PM860X
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 71a97559ce68..4d479fb0a768 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -6,6 +6,7 @@
 
 wm97xx-ts-y := wm97xx-core.o
 
+obj-$(CONFIG_OF_TOUCHSCREEN)		+= of_touchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_88PM860X)	+= 88pm860x-ts.o
 obj-$(CONFIG_TOUCHSCREEN_AD7877)	+= ad7877.o
 obj-$(CONFIG_TOUCHSCREEN_AD7879)	+= ad7879.o
diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c
new file mode 100644
index 000000000000..f8f9b84230b1
--- /dev/null
+++ b/drivers/input/touchscreen/of_touchscreen.c
@@ -0,0 +1,45 @@
+/*
+ *  Generic DT helper functions for touchscreen devices
+ *
+ *  Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/of.h>
+#include <linux/input.h>
+#include <linux/input/touchscreen.h>
+
+/**
+ * touchscreen_parse_of_params - parse common touchscreen DT properties
+ * @dev: device that should be parsed
+ *
+ * This function parses common DT properties for touchscreens and setups the
+ * input device accordingly. The function keeps previously setuped default
+ * values if no value is specified via DT.
+ */
+void touchscreen_parse_of_params(struct input_dev *dev)
+{
+	struct device_node *np = dev->dev.parent->of_node;
+	struct input_absinfo *absinfo;
+
+	input_alloc_absinfo(dev);
+	if (!dev->absinfo)
+		return;
+
+	absinfo = &dev->absinfo[ABS_X];
+	of_property_read_u32(np, "touchscreen-size-x", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-x", &absinfo->fuzz);
+
+	absinfo = &dev->absinfo[ABS_Y];
+	of_property_read_u32(np, "touchscreen-size-y", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-y", &absinfo->fuzz);
+
+	absinfo = &dev->absinfo[ABS_PRESSURE];
+	of_property_read_u32(np, "touchscreen-max-pressure", &absinfo->maximum);
+	of_property_read_u32(np, "touchscreen-fuzz-pressure", &absinfo->fuzz);
+}
+EXPORT_SYMBOL(touchscreen_parse_of_params);
diff --git a/include/linux/input/touchscreen.h b/include/linux/input/touchscreen.h
new file mode 100644
index 000000000000..08a5ef6e8f25
--- /dev/null
+++ b/include/linux/input/touchscreen.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _TOUCHSCREEN_H
+#define _TOUCHSCREEN_H
+
+#include <linux/input.h>
+
+#ifdef CONFIG_OF
+void touchscreen_parse_of_params(struct input_dev *dev);
+#else
+static inline void touchscreen_parse_of_params(struct input_dev *dev)
+{
+}
+#endif
+
+#endif
-- 
cgit 


From 61721c88b8d85c9dc13bfeedf75dfc245f397c3c Mon Sep 17 00:00:00 2001
From: Joachim Eastwood <manabian@gmail.com>
Date: Thu, 29 May 2014 00:30:02 -0700
Subject: Input: omap-keypad - remove platform data support

This is unused since all users (OMAP4/5) are DT only.

Signed-off-by: Joachim Eastwood <manabian@gmail.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/keyboard/Kconfig             |  2 +-
 drivers/input/keyboard/omap4-keypad.c      | 32 ++++++------------------------
 include/linux/platform_data/omap4-keypad.h | 13 ------------
 3 files changed, 7 insertions(+), 40 deletions(-)
 delete mode 100644 include/linux/platform_data/omap4-keypad.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 948a30304870..0f84f2346fe4 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -589,7 +589,7 @@ config KEYBOARD_OMAP
 
 config KEYBOARD_OMAP4
 	tristate "TI OMAP4+ keypad support"
-	depends on ARCH_OMAP2PLUS
+	depends on OF || ARCH_OMAP2PLUS
 	select INPUT_MATRIXKMAP
 	help
 	  Say Y here if you want to use the OMAP4+ keypad.
diff --git a/drivers/input/keyboard/omap4-keypad.c b/drivers/input/keyboard/omap4-keypad.c
index 0400b3f2b4b9..024b7bdffe5b 100644
--- a/drivers/input/keyboard/omap4-keypad.c
+++ b/drivers/input/keyboard/omap4-keypad.c
@@ -28,11 +28,10 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/input.h>
+#include <linux/input/matrix_keypad.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
 
-#include <linux/platform_data/omap4-keypad.h>
-
 /* OMAP4 registers */
 #define OMAP4_KBD_REVISION		0x00
 #define OMAP4_KBD_SYSCONFIG		0x10
@@ -218,7 +217,6 @@ static void omap4_keypad_close(struct input_dev *input)
 	pm_runtime_put_sync(input->dev.parent);
 }
 
-#ifdef CONFIG_OF
 static int omap4_keypad_parse_dt(struct device *dev,
 				 struct omap4_keypad *keypad_data)
 {
@@ -235,20 +233,9 @@ static int omap4_keypad_parse_dt(struct device *dev,
 
 	return 0;
 }
-#else
-static inline int omap4_keypad_parse_dt(struct device *dev,
-					struct omap4_keypad *keypad_data)
-{
-	return -ENOSYS;
-}
-#endif
 
 static int omap4_keypad_probe(struct platform_device *pdev)
 {
-	const struct omap4_keypad_platform_data *pdata =
-				dev_get_platdata(&pdev->dev);
-	const struct matrix_keymap_data *keymap_data =
-				pdata ? pdata->keymap_data : NULL;
 	struct omap4_keypad *keypad_data;
 	struct input_dev *input_dev;
 	struct resource *res;
@@ -277,14 +264,9 @@ static int omap4_keypad_probe(struct platform_device *pdev)
 
 	keypad_data->irq = irq;
 
-	if (pdata) {
-		keypad_data->rows = pdata->rows;
-		keypad_data->cols = pdata->cols;
-	} else {
-		error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
-		if (error)
-			return error;
-	}
+	error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
+	if (error)
+		return error;
 
 	res = request_mem_region(res->start, resource_size(res), pdev->name);
 	if (!res) {
@@ -363,7 +345,7 @@ static int omap4_keypad_probe(struct platform_device *pdev)
 		goto err_free_input;
 	}
 
-	error = matrix_keypad_build_keymap(keymap_data, NULL,
+	error = matrix_keypad_build_keymap(NULL, NULL,
 					   keypad_data->rows, keypad_data->cols,
 					   keypad_data->keymap, input_dev);
 	if (error) {
@@ -434,13 +416,11 @@ static int omap4_keypad_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id omap_keypad_dt_match[] = {
 	{ .compatible = "ti,omap4-keypad" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, omap_keypad_dt_match);
-#endif
 
 #ifdef CONFIG_PM_SLEEP
 static int omap4_keypad_suspend(struct device *dev)
@@ -482,7 +462,7 @@ static struct platform_driver omap4_keypad_driver = {
 		.name	= "omap4-keypad",
 		.owner	= THIS_MODULE,
 		.pm	= &omap4_keypad_pm_ops,
-		.of_match_table = of_match_ptr(omap_keypad_dt_match),
+		.of_match_table = omap_keypad_dt_match,
 	},
 };
 module_platform_driver(omap4_keypad_driver);
diff --git a/include/linux/platform_data/omap4-keypad.h b/include/linux/platform_data/omap4-keypad.h
deleted file mode 100644
index 4eef5fb05a17..000000000000
--- a/include/linux/platform_data/omap4-keypad.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __LINUX_INPUT_OMAP4_KEYPAD_H
-#define __LINUX_INPUT_OMAP4_KEYPAD_H
-
-#include <linux/input/matrix_keypad.h>
-
-struct omap4_keypad_platform_data {
-	const struct matrix_keymap_data *keymap_data;
-
-	u8 rows;
-	u8 cols;
-};
-
-#endif /* __LINUX_INPUT_OMAP4_KEYPAD_H */
-- 
cgit 


From 4d92a9beb39d80a7d8ff7c04ae12a10290105ae5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 29 May 2014 08:09:00 -0600
Subject: block: remove 'magic' from struct blk_plug

I don't think we've ever caught any bugs with this, and there's the
list poisoning for the plug lists to catch uninitialized cases.
So remove the magic member and save 8 bytes in the struct.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 5 -----
 include/linux/blkdev.h | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index d87be5b4e554..40d654861c33 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2957,8 +2957,6 @@ int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
 
-#define PLUG_MAGIC	0x91827364
-
 /**
  * blk_start_plug - initialize blk_plug and track it inside the task_struct
  * @plug:	The &struct blk_plug that needs to be initialized
@@ -2977,7 +2975,6 @@ void blk_start_plug(struct blk_plug *plug)
 {
 	struct task_struct *tsk = current;
 
-	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->mq_list);
 	INIT_LIST_HEAD(&plug->cb_list);
@@ -3074,8 +3071,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	LIST_HEAD(list);
 	unsigned int depth;
 
-	BUG_ON(plug->magic != PLUG_MAGIC);
-
 	flush_plug_callbacks(plug, from_schedule);
 
 	if (!list_empty(&plug->mq_list))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 913f1c2d3be0..098304576d51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1060,7 +1060,6 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
  * schedule() where blk_schedule_flush_plug() is called.
  */
 struct blk_plug {
-	unsigned long magic; /* detect uninitialized use-cases */
 	struct list_head list; /* requests */
 	struct list_head mq_list; /* blk-mq requests */
 	struct list_head cb_list; /* md requires an unplug callback */
-- 
cgit 


From 1ed26f33008e954a8e91d26f97d4380dea8145db Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 6 May 2014 09:12:37 -0400
Subject: NFS: Create a common initiate_pgio() function

Most of this code is the same for both the read and write paths, so
combine everything and use the rw_ops when necessary.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/internal.h        |  9 ++------
 fs/nfs/nfs4filelayout.c  |  6 +++---
 fs/nfs/pagelist.c        | 46 ++++++++++++++++++++++++++++++++++++++++
 fs/nfs/read.c            | 42 ++++++------------------------------
 fs/nfs/write.c           | 55 ++++++------------------------------------------
 include/linux/nfs_page.h |  2 ++
 6 files changed, 66 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 365cdb11d0de..be4f2a7e9178 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -241,6 +241,8 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
 void nfs_rw_header_free(struct nfs_pgio_header *);
 void nfs_pgio_data_release(struct nfs_pgio_data *);
 int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+		      const struct rpc_call_ops *, int, int);
 
 static inline void nfs_iocounter_init(struct nfs_io_counter *c)
 {
@@ -402,9 +404,6 @@ struct nfs_pgio_completion_ops;
 extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 			struct inode *inode, bool force_mds,
 			const struct nfs_pgio_completion_ops *compl_ops);
-extern int nfs_initiate_read(struct rpc_clnt *clnt,
-			     struct nfs_pgio_data *data,
-			     const struct rpc_call_ops *call_ops, int flags);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
 
@@ -425,10 +424,6 @@ extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 			const struct nfs_pgio_completion_ops *compl_ops);
 extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
 extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct rpc_clnt *clnt,
-			      struct nfs_pgio_data *data,
-			      const struct rpc_call_ops *call_ops,
-			      int how, int flags);
 extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
 extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
 extern int nfs_initiate_commit(struct rpc_clnt *clnt,
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e6936147ad95..7954e16a6d83 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -568,8 +568,8 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
 	data->mds_offset = offset;
 
 	/* Perform an asynchronous read to ds */
-	nfs_initiate_read(ds_clnt, data,
-				  &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
+	nfs_initiate_pgio(ds_clnt, data,
+			    &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
 }
 
@@ -613,7 +613,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 	data->args.offset = filelayout_get_dserver_offset(lseg, offset);
 
 	/* Perform an asynchronous write */
-	nfs_initiate_write(ds_clnt, data,
+	nfs_initiate_pgio(ds_clnt, data,
 				    &filelayout_write_call_ops, sync,
 				    RPC_TASK_SOFTCONN);
 	return PNFS_ATTEMPTED;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index d8d25a4deb88..ab5b1850ca4f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -447,6 +447,52 @@ static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
 		rpc_exit(task, err);
 }
 
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+		      const struct rpc_call_ops *call_ops, int how, int flags)
+{
+	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->header->cred,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.task = &data->task,
+		.rpc_message = &msg,
+		.callback_ops = call_ops,
+		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
+		.flags = RPC_TASK_ASYNC | flags,
+	};
+	int ret = 0;
+
+	data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+
+	dprintk("NFS: %5u initiated pgio call "
+		"(req %s/%llu, %u bytes @ offset %llu)\n",
+		data->task.tk_pid,
+		data->header->inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(data->header->inode),
+		data->args.count,
+		(unsigned long long)data->args.offset);
+
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		ret = PTR_ERR(task);
+		goto out;
+	}
+	if (how & FLUSH_SYNC) {
+		ret = rpc_wait_for_completion_task(task);
+		if (ret == 0)
+			ret = task->tk_status;
+	}
+	rpc_put_task(task);
+out:
+	return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
+
 /**
  * nfs_pgio_error - Clean up from a pageio error
  * @desc: IO descriptor
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4fcef82d78b4..0359b0d76ef6 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -151,53 +151,22 @@ out:
 	hdr->release(hdr);
 }
 
-int nfs_initiate_read(struct rpc_clnt *clnt,
-		      struct nfs_pgio_data *data,
-		      const struct rpc_call_ops *call_ops, int flags)
+static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+			      struct rpc_task_setup *task_setup_data, int how)
 {
 	struct inode *inode = data->header->inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.task = &data->task,
-		.rpc_client = clnt,
-		.rpc_message = &msg,
-		.callback_ops = call_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC | swap_flags | flags,
-	};
 
-	/* Set up the initial task struct. */
-	NFS_PROTO(inode)->read_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
-			"offset %llu)\n",
-			data->task.tk_pid,
-			inode->i_sb->s_id,
-			(unsigned long long)NFS_FILEID(inode),
-			data->args.count,
-			(unsigned long long)data->args.offset);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task))
-		return PTR_ERR(task);
-	rpc_put_task(task);
-	return 0;
+	task_setup_data->flags |= swap_flags;
+	NFS_PROTO(inode)->read_setup(data, msg);
 }
-EXPORT_SYMBOL_GPL(nfs_initiate_read);
 
 static int nfs_do_read(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops)
 {
 	struct inode *inode = data->header->inode;
 
-	return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
+	return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, 0, 0);
 }
 
 static int
@@ -491,4 +460,5 @@ static const struct nfs_rw_ops nfs_rw_read_ops = {
 	.rw_free_header		= nfs_readhdr_free,
 	.rw_done		= nfs_readpage_done,
 	.rw_result		= nfs_readpage_result,
+	.rw_initiate		= nfs_initiate_read,
 };
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0e34c7024195..e46a1fc6c1fe 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -932,60 +932,18 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-int nfs_initiate_write(struct rpc_clnt *clnt,
-		       struct nfs_pgio_data *data,
-		       const struct rpc_call_ops *call_ops,
-		       int how, int flags)
+static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+			       struct rpc_task_setup *task_setup_data, int how)
 {
 	struct inode *inode = data->header->inode;
 	int priority = flush_task_priority(how);
-	struct rpc_task *task;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->header->cred,
-	};
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = clnt,
-		.task = &data->task,
-		.rpc_message = &msg,
-		.callback_ops = call_ops,
-		.callback_data = data,
-		.workqueue = nfsiod_workqueue,
-		.flags = RPC_TASK_ASYNC | flags,
-		.priority = priority,
-	};
-	int ret = 0;
 
-	/* Set up the initial task struct.  */
-	NFS_PROTO(inode)->write_setup(data, &msg);
-
-	dprintk("NFS: %5u initiated write call "
-		"(req %s/%llu, %u bytes @ offset %llu)\n",
-		data->task.tk_pid,
-		inode->i_sb->s_id,
-		(unsigned long long)NFS_FILEID(inode),
-		data->args.count,
-		(unsigned long long)data->args.offset);
+	task_setup_data->priority = priority;
+	NFS_PROTO(inode)->write_setup(data, msg);
 
 	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
-				 &task_setup_data.rpc_client, &msg, data);
-
-	task = rpc_run_task(&task_setup_data);
-	if (IS_ERR(task)) {
-		ret = PTR_ERR(task);
-		goto out;
-	}
-	if (how & FLUSH_SYNC) {
-		ret = rpc_wait_for_completion_task(task);
-		if (ret == 0)
-			ret = task->tk_status;
-	}
-	rpc_put_task(task);
-out:
-	return ret;
+				 &task_setup_data->rpc_client, msg, data);
 }
-EXPORT_SYMBOL_GPL(nfs_initiate_write);
 
 static int nfs_do_write(struct nfs_pgio_data *data,
 		const struct rpc_call_ops *call_ops,
@@ -993,7 +951,7 @@ static int nfs_do_write(struct nfs_pgio_data *data,
 {
 	struct inode *inode = data->header->inode;
 
-	return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
+	return nfs_initiate_pgio(NFS_CLIENT(inode), data, call_ops, how, 0);
 }
 
 static int nfs_do_multiple_writes(struct list_head *head,
@@ -1743,4 +1701,5 @@ static const struct nfs_rw_ops nfs_rw_write_ops = {
 	.rw_release		= nfs_writeback_release_common,
 	.rw_done		= nfs_writeback_done,
 	.rw_result		= nfs_writeback_result,
+	.rw_initiate		= nfs_initiate_write,
 };
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 01aa29c5ec42..c6a587f7118f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -59,6 +59,8 @@ struct nfs_rw_ops {
 	void (*rw_release)(struct nfs_pgio_data *);
 	int  (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
 	void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
+	void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *,
+			    struct rpc_task_setup *, int);
 };
 
 struct nfs_pageio_descriptor {
-- 
cgit 


From 12c05792599ec57ebab33096b2c75b863dfe6ea4 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:41 -0400
Subject: nfs: clean up PG_* flags

Remove unused flags PG_NEED_COMMIT and PG_NEED_RESCHED.
Add comments describing how each flag is used.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/nfs_page.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index c6a587f7118f..eb2eb6396874 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -22,12 +22,10 @@
  * Valid flags for a dirty buffer
  */
 enum {
-	PG_BUSY = 0,
-	PG_MAPPED,
-	PG_CLEAN,
-	PG_NEED_COMMIT,
-	PG_NEED_RESCHED,
-	PG_COMMIT_TO_DS,
+	PG_BUSY = 0,		/* nfs_{un}lock_request */
+	PG_MAPPED,		/* page private set for buffered io */
+	PG_CLEAN,		/* write succeeded */
+	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
 };
 
 struct nfs_inode;
-- 
cgit 


From 8c8f1ac109726e4ed44a920f5c962c84610d4a17 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:42 -0400
Subject: nfs: remove unused arg from nfs_create_request

@inode is passed but not used.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c          | 6 ++----
 fs/nfs/pagelist.c        | 4 +---
 fs/nfs/read.c            | 5 ++---
 fs/nfs/write.c           | 2 +-
 include/linux/nfs_page.h | 1 -
 5 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 164b0167677b..1dd8c622d719 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -380,8 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 			/* XXX do we need to do the eof zeroing found in async_filler? */
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i],
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -750,8 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-			req = nfs_create_request(dreq->ctx, dreq->inode,
-						 pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i],
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 29591094125a..4b4b212ec6b2 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -139,7 +139,6 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
 /**
  * nfs_create_request - Create an NFS read/write request.
  * @ctx: open context to use
- * @inode: inode to which the request is attached
  * @page: page to write
  * @offset: starting offset within the page for the write
  * @count: number of bytes to read/write
@@ -149,8 +148,7 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
  * User should ensure it is safe to sleep in this function.
  */
 struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
-		   struct page *page,
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
 		   unsigned int offset, unsigned int count)
 {
 	struct nfs_page		*req;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3986668e4390..46d90448f69b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
-	new = nfs_create_request(ctx, inode, page, 0, len);
+	new = nfs_create_request(ctx, page, 0, len);
 	if (IS_ERR(new)) {
 		unlock_page(page);
 		return PTR_ERR(new);
@@ -303,7 +303,6 @@ static int
 readpage_async_filler(void *data, struct page *page)
 {
 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
-	struct inode *inode = page_file_mapping(page)->host;
 	struct nfs_page *new;
 	unsigned int len;
 	int error;
@@ -312,7 +311,7 @@ readpage_async_filler(void *data, struct page *page)
 	if (len == 0)
 		return nfs_return_empty_page(page);
 
-	new = nfs_create_request(desc->ctx, inode, page, 0, len);
+	new = nfs_create_request(desc->ctx, page, 0, len);
 	if (IS_ERR(new))
 		goto out_error;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2680f29f8a51..e773df207c05 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -761,7 +761,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 	req = nfs_try_to_update_request(inode, page, offset, bytes);
 	if (req != NULL)
 		goto out;
-	req = nfs_create_request(ctx, inode, page, offset, bytes);
+	req = nfs_create_request(ctx, page, offset, bytes);
 	if (IS_ERR(req))
 		goto out;
 	nfs_inode_add_request(inode, req);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index eb2eb6396874..be0b0981e7a0 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -85,7 +85,6 @@ struct nfs_pageio_descriptor {
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
-					    struct inode *inode,
 					    struct page *page,
 					    unsigned int offset,
 					    unsigned int count);
-- 
cgit 


From b4fdac1a5150174df0847a45dc6612ce5ce3daeb Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:43 -0400
Subject: nfs: modify pg_test interface to return size_t

This is a step toward allowing pg_test to inform the the
coalescing code to reduce the size of requests so they may fit in
whatever scheme the pg_test callback wants to define.

For now, just return the size of the request if there is space, or 0
if there is not.  This shouldn't change any behavior as it acts
the same as when the pg_test functions returned bool.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c | 16 ++++++++++++----
 fs/nfs/nfs4filelayout.c          | 12 +++++++-----
 fs/nfs/objlayout/objio_osd.c     | 15 ++++++++++-----
 fs/nfs/pagelist.c                | 22 +++++++++++++++++++---
 fs/nfs/pnfs.c                    | 12 +++++++++---
 fs/nfs/pnfs.h                    |  3 ++-
 include/linux/nfs_page.h         |  5 +++--
 7 files changed, 62 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 206cc68c9694..9b431f44fad9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 		pnfs_generic_pg_init_read(pgio, req);
 }
 
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
 bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		struct nfs_page *req)
 {
 	if (pgio->pg_dreq != NULL &&
 	    !is_aligned_req(req, SECTOR_SIZE))
-		return false;
+		return 0;
 
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 	}
 }
 
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
 bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		 struct nfs_page *req)
 {
 	if (pgio->pg_dreq != NULL &&
 	    !is_aligned_req(req, PAGE_CACHE_SIZE))
-		return false;
+		return 0;
 
 	return pnfs_generic_pg_test(pgio, prev, req);
 }
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 9fd7cebbff04..ba9a9aadf6c8 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -915,10 +915,10 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
  *
- * return true  : coalesce page
- * return false : don't coalesce page
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
  */
-static bool
+static size_t
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		   struct nfs_page *req)
 {
@@ -927,7 +927,7 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 
 	if (!pnfs_generic_pg_test(pgio, prev, req) ||
 	    !nfs_generic_pg_test(pgio, prev, req))
-		return false;
+		return 0;
 
 	p_stripe = (u64)req_offset(prev);
 	r_stripe = (u64)req_offset(req);
@@ -936,7 +936,9 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	do_div(p_stripe, stripe_unit);
 	do_div(r_stripe, stripe_unit);
 
-	return (p_stripe == r_stripe);
+	if (p_stripe == r_stripe)
+		return req->wb_bytes;
+	return 0;
 }
 
 static void
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 426b366b0b33..71b9c69dbe9c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -564,14 +564,19 @@ int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
 	return 0;
 }
 
-static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
 			  struct nfs_page *prev, struct nfs_page *req)
 {
-	if (!pnfs_generic_pg_test(pgio, prev, req))
-		return false;
+	if (!pnfs_generic_pg_test(pgio, prev, req) ||
+	    pgio->pg_count + req->wb_bytes >
+	    (unsigned long)pgio->pg_layout_private)
+		return 0;
 
-	return pgio->pg_count + req->wb_bytes <=
-			(unsigned long)pgio->pg_layout_private;
+	return req->wb_bytes;
 }
 
 static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 4b4b212ec6b2..82233431880d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -280,7 +280,17 @@ nfs_wait_on_request(struct nfs_page *req)
 			TASK_UNINTERRUPTIBLE);
 }
 
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+/*
+ * nfs_generic_pg_test - determine if requests can be coalesced
+ * @desc: pointer to descriptor
+ * @prev: previous request in desc, or NULL
+ * @req: this request
+ *
+ * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * the size of the request.
+ */
+size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+			   struct nfs_page *prev, struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -292,7 +302,9 @@ bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *pr
 	if (desc->pg_bsize < PAGE_SIZE)
 		return 0;
 
-	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+	if (desc->pg_count + req->wb_bytes <= desc->pg_bsize)
+		return req->wb_bytes;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
@@ -747,6 +759,8 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 				      struct nfs_page *req,
 				      struct nfs_pageio_descriptor *pgio)
 {
+	size_t size;
+
 	if (!nfs_match_open_context(req->wb_context, prev->wb_context))
 		return false;
 	if (req->wb_context->dentry->d_inode->i_flock != NULL &&
@@ -758,7 +772,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
 		return false;
 	if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
 		return false;
-	return pgio->pg_ops->pg_test(pgio, prev, req);
+	size = pgio->pg_ops->pg_test(pgio, prev, req);
+	WARN_ON_ONCE(size && size != req->wb_bytes);
+	return size > 0;
 }
 
 /**
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0fe670189fd1..de6eb16f94d1 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1434,7 +1434,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
 
-bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+size_t
 pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 		     struct nfs_page *req)
 {
@@ -1455,8 +1459,10 @@ pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
 	 * first byte that lies outside the pnfs_layout_range. FIXME?
 	 *
 	 */
-	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
-					 pgio->pg_lseg->pls_range.length);
+	if (req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+					 pgio->pg_lseg->pls_range.length))
+		return req->wb_bytes;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 0031267d7492..dccf182ec4d8 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -187,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
 void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
 			        struct nfs_page *req, u64 wb_size);
 int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
+			    struct nfs_page *prev, struct nfs_page *req);
 void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
 struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index be0b0981e7a0..13d59af561f6 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -46,7 +46,8 @@ struct nfs_page {
 struct nfs_pageio_descriptor;
 struct nfs_pageio_ops {
 	void	(*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
-	bool	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+	size_t	(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
+			   struct nfs_page *);
 	int	(*pg_doio)(struct nfs_pageio_descriptor *);
 };
 
@@ -102,7 +103,7 @@ extern	int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
 				   struct nfs_page *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
-extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *prev,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
-- 
cgit 


From 2bfc6e566daa8386c9cffef2f7de17fc330d3835 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:45 -0400
Subject: nfs: add support for multiple nfs reqs per page

Add "page groups" - a circular list of nfs requests (struct nfs_page)
that all reference the same page. This gives nfs read and write paths
the ability to account for sub-page regions independently.  This
somewhat follows the design of struct buffer_head's sub-page
accounting.

Only "head" requests are ever added/removed from the inode list in
the buffered write path. "head" and "sub" requests are treated the
same through the read path and the rest of the write/commit path.
Requests are given an extra reference across the life of the list.

Page groups are never rejoined after being split. If the read/write
request fails and the client falls back to another path (ie revert
to MDS in PNFS case), the already split requests are pushed through
the recoalescing code again, which may split them further and then
coalesce them into properly sized requests on the wire. Fragmentation
shouldn't be a problem with the current design, because we flush all
requests in page group when a non-contiguous request is added, so
the only time resplitting should occur is on a resend of a read or
write.

This patch lays the groundwork for sub-page splitting, but does not
actually do any splitting. For now all page groups have one request
as pg_test functions don't yet split pages. There are several related
patches that are needed support multiple requests per page group.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c          |   7 +-
 fs/nfs/pagelist.c        | 220 ++++++++++++++++++++++++++++++++++++++++++++---
 fs/nfs/read.c            |   4 +-
 fs/nfs/write.c           |  13 ++-
 include/linux/nfs_page.h |  13 ++-
 5 files changed, 236 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1dd8c622d719..2c0e08f4cf71 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -380,7 +380,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 			/* XXX do we need to do the eof zeroing found in async_filler? */
-			req = nfs_create_request(dreq->ctx, pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -749,7 +749,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
 			struct nfs_page *req;
 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
 
-			req = nfs_create_request(dreq->ctx, pagevec[i],
+			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
 						 pgbase, req_len);
 			if (IS_ERR(req)) {
 				result = PTR_ERR(req);
@@ -827,6 +827,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 	spin_unlock(&dreq->lock);
 
 	while (!list_empty(&hdr->pages)) {
+		bool do_destroy = true;
+
 		req = nfs_list_entry(hdr->pages.next);
 		nfs_list_remove_request(req);
 		switch (bit) {
@@ -834,6 +836,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 		case NFS_IOHDR_NEED_COMMIT:
 			kref_get(&req->wb_kref);
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+			do_destroy = false;
 		}
 		nfs_unlock_and_release_request(req);
 	}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index f343f49ff596..015fb7b48dfe 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,6 +29,8 @@
 static struct kmem_cache *nfs_page_cachep;
 static const struct rpc_call_ops nfs_pgio_common_ops;
 
+static void nfs_free_request(struct nfs_page *);
+
 static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
 {
 	p->npages = pagecount;
@@ -136,10 +138,151 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
 	return __nfs_iocounter_wait(c);
 }
 
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req - request in group that is to be locked
+ *
+ * this lock must be held if modifying the page group list
+ */
+void
+nfs_page_group_lock(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+	int err = -EAGAIN;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	while (err)
+		err = wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+			nfs_wait_bit_killable, TASK_KILLABLE);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req - request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+	struct nfs_page *head = req->wb_head;
+
+	WARN_ON_ONCE(head != head->wb_head);
+
+	smp_mb__before_clear_bit();
+	clear_bit(PG_HEADLOCK, &head->wb_flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_sync_on_bit_locked
+ *
+ * must be called with page group lock held
+ */
+static bool
+nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+{
+	struct nfs_page *head = req->wb_head;
+	struct nfs_page *tmp;
+
+	WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
+	WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
+
+	tmp = req->wb_this_page;
+	while (tmp != req) {
+		if (!test_bit(bit, &tmp->wb_flags))
+			return false;
+		tmp = tmp->wb_this_page;
+	}
+
+	/* true! reset all bits */
+	tmp = req;
+	do {
+		clear_bit(bit, &tmp->wb_flags);
+		tmp = tmp->wb_this_page;
+	} while (tmp != req);
+
+	return true;
+}
+
+/*
+ * nfs_page_group_sync_on_bit - set bit on current request, but only
+ *   return true if the bit is set for all requests in page group
+ * @req - request in page group
+ * @bit - PG_* bit that is used to sync page group
+ */
+bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
+{
+	bool ret;
+
+	nfs_page_group_lock(req);
+	ret = nfs_page_group_sync_on_bit_locked(req, bit);
+	nfs_page_group_unlock(req);
+
+	return ret;
+}
+
+/*
+ * nfs_page_group_init - Initialize the page group linkage for @req
+ * @req - a new nfs request
+ * @prev - the previous request in page group, or NULL if @req is the first
+ *         or only request in the group (the head).
+ */
+static inline void
+nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
+{
+	WARN_ON_ONCE(prev == req);
+
+	if (!prev) {
+		req->wb_head = req;
+		req->wb_this_page = req;
+	} else {
+		WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
+		WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
+		req->wb_head = prev->wb_head;
+		req->wb_this_page = prev->wb_this_page;
+		prev->wb_this_page = req;
+
+		/* grab extra ref if head request has extra ref from
+		 * the write/commit path to handle handoff between write
+		 * and commit lists */
+		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+			kref_get(&req->wb_kref);
+	}
+}
+
+/*
+ * nfs_page_group_destroy - sync the destruction of page groups
+ * @req - request that no longer needs the page group
+ *
+ * releases the page group reference from each member once all
+ * members have called this function.
+ */
+static void
+nfs_page_group_destroy(struct kref *kref)
+{
+	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+	struct nfs_page *tmp, *next;
+
+	if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
+		return;
+
+	tmp = req;
+	do {
+		next = tmp->wb_this_page;
+		/* unlink and free */
+		tmp->wb_this_page = tmp;
+		tmp->wb_head = tmp;
+		nfs_free_request(tmp);
+		tmp = next;
+	} while (tmp != req);
+}
+
 /**
  * nfs_create_request - Create an NFS read/write request.
  * @ctx: open context to use
  * @page: page to write
+ * @last: last nfs request created for this page group or NULL if head
  * @offset: starting offset within the page for the write
  * @count: number of bytes to read/write
  *
@@ -149,7 +292,8 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
  */
 struct nfs_page *
 nfs_create_request(struct nfs_open_context *ctx, struct page *page,
-		   unsigned int offset, unsigned int count)
+		   struct nfs_page *last, unsigned int offset,
+		   unsigned int count)
 {
 	struct nfs_page		*req;
 	struct nfs_lock_context *l_ctx;
@@ -181,6 +325,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
 	req->wb_bytes   = count;
 	req->wb_context = get_nfs_open_context(ctx);
 	kref_init(&req->wb_kref);
+	nfs_page_group_init(req, last);
 	return req;
 }
 
@@ -238,16 +383,18 @@ static void nfs_clear_request(struct nfs_page *req)
 	}
 }
 
-
 /**
  * nfs_release_request - Release the count on an NFS read/write request
  * @req: request to release
  *
  * Note: Should never be called with the spinlock held!
  */
-static void nfs_free_request(struct kref *kref)
+static void nfs_free_request(struct nfs_page *req)
 {
-	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+	WARN_ON_ONCE(req->wb_this_page != req);
+
+	/* extra debug: make sure no sync bits are still set */
+	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
@@ -256,7 +403,7 @@ static void nfs_free_request(struct kref *kref)
 
 void nfs_release_request(struct nfs_page *req)
 {
-	kref_put(&req->wb_kref, nfs_free_request);
+	kref_put(&req->wb_kref, nfs_page_group_destroy);
 }
 
 static int nfs_wait_bit_uninterruptible(void *word)
@@ -832,21 +979,66 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
  * @desc: destination io descriptor
  * @req: request
  *
+ * This may split a request into subrequests which are all part of the
+ * same page group.
+ *
  * Returns true if the request 'req' was successfully coalesced into the
  * existing list of pages 'desc'.
  */
 static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
 			   struct nfs_page *req)
 {
-	while (!nfs_pageio_do_add_request(desc, req)) {
-		desc->pg_moreio = 1;
-		nfs_pageio_doio(desc);
-		if (desc->pg_error < 0)
-			return 0;
-		desc->pg_moreio = 0;
-		if (desc->pg_recoalesce)
-			return 0;
-	}
+	struct nfs_page *subreq;
+	unsigned int bytes_left = 0;
+	unsigned int offset, pgbase;
+
+	nfs_page_group_lock(req);
+
+	subreq = req;
+	bytes_left = subreq->wb_bytes;
+	offset = subreq->wb_offset;
+	pgbase = subreq->wb_pgbase;
+
+	do {
+		if (!nfs_pageio_do_add_request(desc, subreq)) {
+			/* make sure pg_test call(s) did nothing */
+			WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
+			WARN_ON_ONCE(subreq->wb_offset != offset);
+			WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
+
+			nfs_page_group_unlock(req);
+			desc->pg_moreio = 1;
+			nfs_pageio_doio(desc);
+			if (desc->pg_error < 0)
+				return 0;
+			desc->pg_moreio = 0;
+			if (desc->pg_recoalesce)
+				return 0;
+			/* retry add_request for this subreq */
+			nfs_page_group_lock(req);
+			continue;
+		}
+
+		/* check for buggy pg_test call(s) */
+		WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
+		WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
+		WARN_ON_ONCE(subreq->wb_bytes == 0);
+
+		bytes_left -= subreq->wb_bytes;
+		offset += subreq->wb_bytes;
+		pgbase += subreq->wb_bytes;
+
+		if (bytes_left) {
+			subreq = nfs_create_request(req->wb_context,
+					req->wb_page,
+					subreq, pgbase, bytes_left);
+			nfs_lock_request(subreq);
+			subreq->wb_offset  = offset;
+			subreq->wb_index = req->wb_index;
+		}
+	} while (bytes_left > 0);
+
+	nfs_page_group_unlock(req);
 	return 1;
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 46d90448f69b..902ba2c63d05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -85,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
 	len = nfs_page_length(page);
 	if (len == 0)
 		return nfs_return_empty_page(page);
-	new = nfs_create_request(ctx, page, 0, len);
+	new = nfs_create_request(ctx, page, NULL, 0, len);
 	if (IS_ERR(new)) {
 		unlock_page(page);
 		return PTR_ERR(new);
@@ -311,7 +311,7 @@ readpage_async_filler(void *data, struct page *page)
 	if (len == 0)
 		return nfs_return_empty_page(page);
 
-	new = nfs_create_request(desc->ctx, page, 0, len);
+	new = nfs_create_request(desc->ctx, page, NULL, 0, len);
 	if (IS_ERR(new))
 		goto out_error;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e773df207c05..d0f30f12a8b3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -367,6 +367,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 
+	WARN_ON_ONCE(req->wb_this_page != req);
+
 	/* Lock the request! */
 	nfs_lock_request(req);
 
@@ -383,6 +385,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		set_page_private(req->wb_page, (unsigned long)req);
 	}
 	nfsi->npages++;
+	set_bit(PG_INODE_REF, &req->wb_flags);
 	kref_get(&req->wb_kref);
 	spin_unlock(&inode->i_lock);
 }
@@ -567,6 +570,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
 	unsigned long bytes = 0;
+	bool do_destroy;
 
 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 		goto out;
@@ -596,6 +600,7 @@ remove_req:
 next:
 		nfs_unlock_request(req);
 		nfs_end_page_writeback(req->wb_page);
+		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
 		nfs_release_request(req);
 	}
 out:
@@ -700,6 +705,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
 		if (req == NULL)
 			goto out_unlock;
 
+		/* should be handled by nfs_flush_incompatible */
+		WARN_ON_ONCE(req->wb_head != req);
+		WARN_ON_ONCE(req->wb_this_page != req);
+
 		rqend = req->wb_offset + req->wb_bytes;
 		/*
 		 * Tell the caller to flush out the request if
@@ -761,7 +770,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
 	req = nfs_try_to_update_request(inode, page, offset, bytes);
 	if (req != NULL)
 		goto out;
-	req = nfs_create_request(ctx, page, offset, bytes);
+	req = nfs_create_request(ctx, page, NULL, offset, bytes);
 	if (IS_ERR(req))
 		goto out;
 	nfs_inode_add_request(inode, req);
@@ -805,6 +814,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
 			return 0;
 		l_ctx = req->wb_lock_context;
 		do_flush = req->wb_page != page || req->wb_context != ctx;
+		/* for now, flush if more than 1 request in page_group */
+		do_flush |= req->wb_this_page != req;
 		if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
 			do_flush |= l_ctx->lockowner.l_owner != current->files
 				|| l_ctx->lockowner.l_pid != current->tgid;
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 13d59af561f6..986c0c279d0e 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -26,6 +26,9 @@ enum {
 	PG_MAPPED,		/* page private set for buffered io */
 	PG_CLEAN,		/* write succeeded */
 	PG_COMMIT_TO_DS,	/* used by pnfs layouts */
+	PG_INODE_REF,		/* extra ref held by inode (head req only) */
+	PG_HEADLOCK,		/* page group lock of wb_head */
+	PG_TEARDOWN,		/* page group sync for destroy */
 };
 
 struct nfs_inode;
@@ -41,6 +44,8 @@ struct nfs_page {
 	struct kref		wb_kref;	/* reference count */
 	unsigned long		wb_flags;
 	struct nfs_write_verifier	wb_verf;	/* Commit cookie */
+	struct nfs_page		*wb_this_page;  /* list of reqs for this page */
+	struct nfs_page		*wb_head;       /* head pointer for req list */
 };
 
 struct nfs_pageio_descriptor;
@@ -87,9 +92,10 @@ struct nfs_pageio_descriptor {
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
 					    struct page *page,
+					    struct nfs_page *last,
 					    unsigned int offset,
 					    unsigned int count);
-extern	void nfs_release_request(struct nfs_page *req);
+extern	void nfs_release_request(struct nfs_page *);
 
 
 extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
@@ -108,7 +114,10 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
 				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
-extern	void nfs_unlock_and_release_request(struct nfs_page *req);
+extern	void nfs_unlock_and_release_request(struct nfs_page *);
+extern void nfs_page_group_lock(struct nfs_page *);
+extern void nfs_page_group_unlock(struct nfs_page *);
+extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
 
 /*
  * Lock the page of an asynchronous request
-- 
cgit 


From 67d0338edd71db9a4f406d8778f7c525d31e9f7f Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:46 -0400
Subject: nfs: page group syncing in read path

Operations that modify state for a whole page must be syncronized across
all requests within a page group. In the read path, this is calling
unlock_page and SetPageUptodate. Both of these functions should not be
called until all requests in a page group have reached the point where
they would call them.

This patch should have no effect yet since all page groups currently
have one request, but will come into play when pg_test functions are
modified to split pages into sub-page regions.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        |  2 ++
 fs/nfs/read.c            | 22 +++++++++++++++++-----
 include/linux/nfs_page.h |  2 ++
 3 files changed, 21 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 015fb7b48dfe..18ee4e99347e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -395,6 +395,8 @@ static void nfs_free_request(struct nfs_page *req)
 
 	/* extra debug: make sure no sync bits are still set */
 	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 902ba2c63d05..53d5b83611ce 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -105,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req)
 {
 	struct inode *d_inode = req->wb_context->dentry->d_inode;
 
-	if (PageUptodate(req->wb_page))
-		nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+	dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
+		(unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+		(long long)req_offset(req));
 
-	unlock_page(req->wb_page);
+	if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+		if (PageUptodate(req->wb_page))
+			nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+
+		unlock_page(req->wb_page);
+	}
 
 	dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
 			req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -118,6 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+static void nfs_page_group_set_uptodate(struct nfs_page *req)
+{
+	if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+		SetPageUptodate(req->wb_page);
+}
+
 /* Note io was page aligned */
 static void nfs_read_completion(struct nfs_pgio_header *hdr)
 {
@@ -140,9 +152,9 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
 		bytes += req->wb_bytes;
 		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
 			if (bytes <= hdr->good_bytes)
-				SetPageUptodate(page);
+				nfs_page_group_set_uptodate(req);
 		} else
-			SetPageUptodate(page);
+			nfs_page_group_set_uptodate(req);
 		nfs_list_remove_request(req);
 		nfs_readpage_release(req);
 	}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 986c0c279d0e..6385175a127b 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -29,6 +29,8 @@ enum {
 	PG_INODE_REF,		/* extra ref held by inode (head req only) */
 	PG_HEADLOCK,		/* page group lock of wb_head */
 	PG_TEARDOWN,		/* page group sync for destroy */
+	PG_UNLOCKPAGE,		/* page group sync bit in read path */
+	PG_UPTODATE,		/* page group sync bit in read path */
 };
 
 struct nfs_inode;
-- 
cgit 


From 20633f042fd0907300069714b98aaf607a8b5bf8 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:47 -0400
Subject: nfs: page group syncing in write path

Operations that modify state for a whole page must be syncronized across
all requests within a page group. In the write path, this is calling
end_page_writeback and removing the head request from an inode.
Both of these operations should not be called until all requests
in a page group have reached the point where they would call them.

This patch should have no effect yet since all page groups currently
have one request, but will come into play when pg_test functions are
modified to split pages into sub-page regions.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c        |  2 ++
 fs/nfs/write.c           | 32 ++++++++++++++++++++------------
 include/linux/nfs_page.h |  2 ++
 3 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18ee4e99347e..ceb4424614aa 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -397,6 +397,8 @@ static void nfs_free_request(struct nfs_page *req)
 	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
 	WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
 	WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
+	WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
 
 	/* Release struct file and open context */
 	nfs_clear_request(req);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d0f30f12a8b3..5d752766139d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -201,12 +201,15 @@ static void nfs_set_page_writeback(struct page *page)
 	}
 }
 
-static void nfs_end_page_writeback(struct page *page)
+static void nfs_end_page_writeback(struct nfs_page *req)
 {
-	struct inode *inode = page_file_mapping(page)->host;
+	struct inode *inode = page_file_mapping(req->wb_page)->host;
 	struct nfs_server *nfss = NFS_SERVER(inode);
 
-	end_page_writeback(page);
+	if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+		return;
+
+	end_page_writeback(req->wb_page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
 		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
@@ -397,15 +400,20 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 {
 	struct inode *inode = req->wb_context->dentry->d_inode;
 	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_page *head;
 
-	spin_lock(&inode->i_lock);
-	if (likely(!PageSwapCache(req->wb_page))) {
-		set_page_private(req->wb_page, 0);
-		ClearPagePrivate(req->wb_page);
-		clear_bit(PG_MAPPED, &req->wb_flags);
+	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+		head = req->wb_head;
+
+		spin_lock(&inode->i_lock);
+		if (likely(!PageSwapCache(head->wb_page))) {
+			set_page_private(head->wb_page, 0);
+			ClearPagePrivate(head->wb_page);
+			clear_bit(PG_MAPPED, &head->wb_flags);
+		}
+		nfsi->npages--;
+		spin_unlock(&inode->i_lock);
 	}
-	nfsi->npages--;
-	spin_unlock(&inode->i_lock);
 	nfs_release_request(req);
 }
 
@@ -599,7 +607,7 @@ remove_req:
 		nfs_inode_remove_request(req);
 next:
 		nfs_unlock_request(req);
-		nfs_end_page_writeback(req->wb_page);
+		nfs_end_page_writeback(req);
 		do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
 		nfs_release_request(req);
 	}
@@ -964,7 +972,7 @@ static void nfs_redirty_request(struct nfs_page *req)
 {
 	nfs_mark_request_dirty(req);
 	nfs_unlock_request(req);
-	nfs_end_page_writeback(req->wb_page);
+	nfs_end_page_writeback(req);
 	nfs_release_request(req);
 }
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 6385175a127b..7d9096d95d4a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -31,6 +31,8 @@ enum {
 	PG_TEARDOWN,		/* page group sync for destroy */
 	PG_UNLOCKPAGE,		/* page group sync bit in read path */
 	PG_UPTODATE,		/* page group sync bit in read path */
+	PG_WB_END,		/* page group sync bit in write path */
+	PG_REMOVE,		/* page group sync bit in write path */
 };
 
 struct nfs_inode;
-- 
cgit 


From 7f714720fac03383d687dbe39494cc96b845bd46 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:53 -0400
Subject: nfs: remove data list from pgio header

Since the ability to split pages into subpage requests has been added,
nfs_pgio_header->rpc_list only ever has one pgio data.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pagelist.c       | 39 ++++++---------------------------------
 fs/nfs/pnfs.c           | 41 +++++++++++++++--------------------------
 include/linux/nfs_xdr.h |  5 +++--
 3 files changed, 24 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ec4311df05d9..fab78d13ee14 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -470,7 +470,6 @@ struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
 		struct nfs_pgio_header *hdr = &header->header;
 
 		INIT_LIST_HEAD(&hdr->pages);
-		INIT_LIST_HEAD(&hdr->rpc_list);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
 		hdr->rw_ops = ops;
@@ -648,27 +647,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
 
-static int nfs_do_multiple_pgios(struct list_head *head,
-				 const struct rpc_call_ops *call_ops,
-				 int how)
-{
-	struct nfs_pgio_data *data;
-	int ret = 0;
-
-	while (!list_empty(head)) {
-		int ret2;
-
-		data = list_first_entry(head, struct nfs_pgio_data, list);
-		list_del_init(&data->list);
-
-		ret2 = nfs_initiate_pgio(NFS_CLIENT(data->header->inode),
-					 data, call_ops, how, 0);
-		if (ret == 0)
-			 ret = ret2;
-	}
-	return ret;
-}
-
 /**
  * nfs_pgio_error - Clean up from a pageio error
  * @desc: IO descriptor
@@ -677,14 +655,9 @@ static int nfs_do_multiple_pgios(struct list_head *head,
 static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
 			  struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data;
-
 	set_bit(NFS_IOHDR_REDO, &hdr->flags);
-	while (!list_empty(&hdr->rpc_list)) {
-		data = list_first_entry(&hdr->rpc_list, struct nfs_pgio_data, list);
-		list_del(&data->list);
-		nfs_pgio_data_release(data);
-	}
+	nfs_pgio_data_release(hdr->data);
+	hdr->data = NULL;
 	desc->pg_completion_ops->error_cleanup(&desc->pg_list);
 	return -ENOMEM;
 }
@@ -794,7 +767,7 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
 
 	/* Set up the argument struct */
 	nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
-	list_add(&data->list, &hdr->rpc_list);
+	hdr->data = data;
 	desc->pg_rpc_callops = &nfs_pgio_common_ops;
 	return 0;
 }
@@ -816,9 +789,9 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
 	atomic_inc(&hdr->refcnt);
 	ret = nfs_generic_pgio(desc, hdr);
 	if (ret == 0)
-		ret = nfs_do_multiple_pgios(&hdr->rpc_list,
-					    desc->pg_rpc_callops,
-					    desc->pg_ioflags);
+		ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
+					hdr->data, desc->pg_rpc_callops,
+					desc->pg_ioflags, 0);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 354c53cd4095..6ef108b1d85f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1573,23 +1573,18 @@ pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
 }
 
 static void
-pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+pnfs_do_write(struct nfs_pageio_descriptor *desc,
+	      struct nfs_pgio_header *hdr, int how)
 {
-	struct nfs_pgio_data *data;
+	struct nfs_pgio_data *data = hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	while (!list_empty(head)) {
-		enum pnfs_try_status trypnfs;
-
-		data = list_first_entry(head, struct nfs_pgio_data, list);
-		list_del_init(&data->list);
-
-		trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
-		if (trypnfs == PNFS_NOT_ATTEMPTED)
-			pnfs_write_through_mds(desc, data);
-	}
+	trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+	if (trypnfs == PNFS_NOT_ATTEMPTED)
+		pnfs_write_through_mds(desc, data);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1623,7 +1618,7 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
-		pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+		pnfs_do_write(desc, hdr, desc->pg_ioflags);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
@@ -1731,23 +1726,17 @@ pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
 }
 
 static void
-pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
 {
-	struct nfs_pgio_data *data;
+	struct nfs_pgio_data *data = hdr->data;
 	const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
 	struct pnfs_layout_segment *lseg = desc->pg_lseg;
+	enum pnfs_try_status trypnfs;
 
 	desc->pg_lseg = NULL;
-	while (!list_empty(head)) {
-		enum pnfs_try_status trypnfs;
-
-		data = list_first_entry(head, struct nfs_pgio_data, list);
-		list_del_init(&data->list);
-
-		trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
-		if (trypnfs == PNFS_NOT_ATTEMPTED)
-			pnfs_read_through_mds(desc, data);
-	}
+	trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+	if (trypnfs == PNFS_NOT_ATTEMPTED)
+		pnfs_read_through_mds(desc, data);
 	pnfs_put_lseg(lseg);
 }
 
@@ -1782,7 +1771,7 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
 		pnfs_put_lseg(desc->pg_lseg);
 		desc->pg_lseg = NULL;
 	} else
-		pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+		pnfs_do_read(desc, hdr);
 	if (atomic_dec_and_test(&hdr->refcnt))
 		hdr->completion_ops->completion(hdr);
 	return ret;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index adef7bd2d06d..ae636013fb1f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1256,11 +1256,13 @@ enum {
 	NFS_IOHDR_NEED_RESCHED,
 };
 
+struct nfs_pgio_data;
+
 struct nfs_pgio_header {
 	struct inode		*inode;
 	struct rpc_cred		*cred;
 	struct list_head	pages;
-	struct list_head	rpc_list;
+	struct nfs_pgio_data	*data;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
 	struct nfs_writeverf	verf;		/* Used for writes */
@@ -1282,7 +1284,6 @@ struct nfs_pgio_header {
 
 struct nfs_pgio_data {
 	struct nfs_pgio_header	*header;
-	struct list_head	list;
 	struct rpc_task		task;
 	struct nfs_fattr	fattr;
 	struct nfs_writeverf	verf;		/* Used for writes */
-- 
cgit 


From 5002c58639d41b93e800c8a4b7eca49c40d57822 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Thu, 15 May 2014 11:56:54 -0400
Subject: pnfs: support multiple verfs per direct req

Support direct requests that span multiple pnfs data servers by
comparing nfs_pgio_header->verf to a cached verf in pnfs_commit_bucket.
Continue to use dreq->verf if the MDS is used / non-pNFS.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         | 102 +++++++++++++++++++++++++++++++++++++++++++++---
 fs/nfs/nfs4filelayout.c |   6 +++
 include/linux/nfs.h     |   5 ++-
 include/linux/nfs_xdr.h |   2 +
 4 files changed, 109 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2c0e08f4cf71..4ad7bc388679 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
 	return atomic_dec_and_test(&dreq->io_count);
 }
 
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+		       struct nfs_client *ds_clp,
+		       int ds_idx)
+{
+	struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+	if (ds_clp) {
+		/* pNFS is in use, use the DS verf */
+		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+		else
+			WARN_ON_ONCE(1);
+	}
+#endif
+	return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+				    struct nfs_pgio_header *hdr)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+				      hdr->data->ds_idx);
+	WARN_ON_ONCE(verfp->committed >= 0);
+	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+	WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+					  struct nfs_pgio_header *hdr)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+					 hdr->data->ds_idx);
+	if (verfp->committed < 0) {
+		nfs_direct_set_hdr_verf(dreq, hdr);
+		return 0;
+	}
+	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+					   struct nfs_commit_data *data)
+{
+	struct nfs_writeverf *verfp;
+
+	verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+					 data->ds_commit_index);
+	WARN_ON_ONCE(verfp->committed < 0);
+	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
  * @rw: direction (read or write)
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 	kref_get(&dreq->kref);
 	init_completion(&dreq->completion);
 	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */
 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
 	spin_lock_init(&dreq->lock);
 
@@ -602,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 		dprintk("NFS: %5u commit failed with error %d.\n",
 			data->task.tk_pid, status);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
-	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
 		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
@@ -811,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, &hdr->verf,
-				       sizeof(dreq->verf));
+				nfs_direct_set_hdr_verf(dreq, hdr);
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, &hdr->verf, sizeof(dreq->verf))) {
-					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+					dreq->flags =
+						NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
 					bit = NFS_IOHDR_NEED_COMMIT;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 7a665e0f35b7..0ebc521ea6fc 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_pgio_data *data)
 	/* No multipath support. Use first DS */
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
+	data->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
 		data->args.fh = fh;
@@ -603,6 +604,7 @@ filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
 	data->pgio_done_cb = filelayout_write_done_cb;
 	atomic_inc(&ds->ds_clp->cl_count);
 	data->ds_clp = ds->ds_clp;
+	data->ds_idx = idx;
 	fh = nfs4_fl_select_ds_fh(lseg, j);
 	if (fh)
 		data->args.fh = fh;
@@ -875,6 +877,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
 	for (i = 0; i < size; i++) {
 		INIT_LIST_HEAD(&buckets[i].written);
 		INIT_LIST_HEAD(&buckets[i].committing);
+		/* mark direct verifier as unset */
+		buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
 	}
 
 	spin_lock(cinfo->lock);
@@ -885,6 +889,8 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
 			    &buckets[i].written);
 		list_splice(&cinfo->ds->buckets[i].committing,
 			    &buckets[i].committing);
+		buckets[i].direct_verf.committed =
+			cinfo->ds->buckets[i].direct_verf.committed;
 		buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
 		buckets[i].clseg = cinfo->ds->buckets[i].clseg;
 	}
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 3e794c12e90a..610af5155ef2 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -46,6 +46,9 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
 enum nfs3_stable_how {
 	NFS_UNSTABLE = 0,
 	NFS_DATA_SYNC = 1,
-	NFS_FILE_SYNC = 2
+	NFS_FILE_SYNC = 2,
+
+	/* used by direct.c to mark verf as invalid */
+	NFS_INVALID_STABLE_HOW = -1
 };
 #endif /* _LINUX_NFS_H */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ae636013fb1f..9a1396e70310 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1112,6 +1112,7 @@ struct pnfs_commit_bucket {
 	struct list_head committing;
 	struct pnfs_layout_segment *wlseg;
 	struct pnfs_layout_segment *clseg;
+	struct nfs_writeverf direct_verf;
 };
 
 struct pnfs_ds_commit_info {
@@ -1294,6 +1295,7 @@ struct nfs_pgio_data {
 	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct nfs_page_array	pages;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
+	int			ds_idx;		/* ds index if ds_clp is set */
 };
 
 struct nfs_rw_header {
-- 
cgit 


From 05f1dd5315217398fc8d122bdee80f96a9f21274 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 29 May 2014 09:53:32 -0600
Subject: block: add queue flag for disabling SG merging

If devices are not SG starved, we waste a lot of time potentially
collapsing SG segments. Enough that 1.5% of the CPU time goes
to this, at only 400K IOPS. Add a queue flag, QUEUE_FLAG_NO_SG_MERGE,
which just returns the number of vectors in a bio instead of looping
over all segments and checking for collapsible ones.

Add a BLK_MQ_F_SG_MERGE flag so that drivers can opt-in on the sg
merging, if they so desire.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-merge.c      | 28 +++++++++++++++++++++-------
 block/blk-mq.c         |  3 +++
 include/linux/blk-mq.h |  1 +
 include/linux/blkdev.h |  1 +
 4 files changed, 26 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6c583f9c5b65..b3bf0df0f4c2 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -13,7 +13,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
 	struct bio_vec bv, bvprv = { NULL };
-	int cluster, high, highprv = 1;
+	int cluster, high, highprv = 1, no_sg_merge;
 	unsigned int seg_size, nr_phys_segs;
 	struct bio *fbio, *bbio;
 	struct bvec_iter iter;
@@ -35,12 +35,21 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 	cluster = blk_queue_cluster(q);
 	seg_size = 0;
 	nr_phys_segs = 0;
+	no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
+	high = 0;
 	for_each_bio(bio) {
 		bio_for_each_segment(bv, bio, iter) {
+			/*
+			 * If SG merging is disabled, each bio vector is
+			 * a segment
+			 */
+			if (no_sg_merge)
+				goto new_segment;
+
 			/*
 			 * the trick here is making sure that a high page is
-			 * never considered part of another segment, since that
-			 * might change with the bounce page.
+			 * never considered part of another segment, since
+			 * that might change with the bounce page.
 			 */
 			high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q);
 			if (!high && !highprv && cluster) {
@@ -84,11 +93,16 @@ void blk_recalc_rq_segments(struct request *rq)
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
 {
-	struct bio *nxt = bio->bi_next;
+	if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags))
+		bio->bi_phys_segments = bio->bi_vcnt;
+	else {
+		struct bio *nxt = bio->bi_next;
+
+		bio->bi_next = NULL;
+		bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
+		bio->bi_next = nxt;
+	}
 
-	bio->bi_next = NULL;
-	bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
-	bio->bi_next = nxt;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f27fe44230c2..f98d977fd150 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1829,6 +1829,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	q->mq_ops = set->ops;
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
+	if (!(set->flags & BLK_MQ_F_SG_MERGE))
+		q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
+
 	q->sg_reserved_size = INT_MAX;
 
 	INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 91dfb75ce39f..95de239444d2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -129,6 +129,7 @@ enum {
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
 	BLK_MQ_F_TAG_SHARED	= 1 << 2,
+	BLK_MQ_F_SG_MERGE	= 1 << 3,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 098304576d51..695b9fd41efe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -510,6 +510,7 @@ struct request_queue {
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
+#define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit 


From 4055e5e54ecea4a41edec42f6bd4ee274892e872 Mon Sep 17 00:00:00 2001
From: David Mosberger-Tang <davidm@egauge.net>
Date: Thu, 29 May 2014 10:23:55 -0600
Subject: usb: host: max3421-hcd: Allow platform-data to specify Vbus polarity

Signed-off-by: Davidm Mosberger <davidm@egauge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/max3421-hcd.c            | 6 ++++--
 include/linux/platform_data/max3421-hcd.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c
index ccb1bc42b4d2..fd3ed994fa4d 100644
--- a/drivers/usb/host/max3421-hcd.c
+++ b/drivers/usb/host/max3421-hcd.c
@@ -1717,7 +1717,8 @@ max3421_hub_control(struct usb_hcd *hcd, u16 type_req, u16 value, u16 index,
 			break;
 		case USB_PORT_FEAT_POWER:
 			dev_dbg(hcd->self.controller, "power-off\n");
-			max3421_gpout_set_value(hcd, pdata->vbus_gpout, 0);
+			max3421_gpout_set_value(hcd, pdata->vbus_gpout,
+						!pdata->vbus_active_level);
 			/* FALLS THROUGH */
 		default:
 			max3421_hcd->port_status &= ~(1 << value);
@@ -1766,7 +1767,8 @@ max3421_hub_control(struct usb_hcd *hcd, u16 type_req, u16 value, u16 index,
 		case USB_PORT_FEAT_POWER:
 			dev_dbg(hcd->self.controller, "power-on\n");
 			max3421_hcd->port_status |= USB_PORT_STAT_POWER;
-			max3421_gpout_set_value(hcd, pdata->vbus_gpout, 1);
+			max3421_gpout_set_value(hcd, pdata->vbus_gpout,
+						pdata->vbus_active_level);
 			break;
 		case USB_PORT_FEAT_RESET:
 			max3421_reset_port(hcd);
diff --git a/include/linux/platform_data/max3421-hcd.h b/include/linux/platform_data/max3421-hcd.h
index 4ad459605d87..0303d1970084 100644
--- a/include/linux/platform_data/max3421-hcd.h
+++ b/include/linux/platform_data/max3421-hcd.h
@@ -18,6 +18,7 @@
  */
 struct max3421_hcd_platform_data {
 	u8 vbus_gpout;			/* pin controlling Vbus */
+	u8 vbus_active_level;		/* level that turns on power */
 };
 
 #endif /* MAX3421_HCD_PLAT_H_INCLUDED */
-- 
cgit 


From 86f6cf41272de9d6ffa05ab46028b15d160a6f3e Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Sat, 24 May 2014 09:34:26 +0200
Subject: net: of_mdio: add of_mdiobus_link_phydev()

Add a function to walk the list of subnodes of a mdio bus and look for
a node that matches the phy's address with its 'reg' property. If found,
set the of_node pointer for the phy. This allows auto-probed pyh
devices to be augmented by information passed in via DT.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c |  6 ++++++
 drivers/of/of_mdio.c       | 33 +++++++++++++++++++++++++++++++++
 include/linux/of_mdio.h    |  8 ++++++++
 3 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index a6284964b711..2e58aa54484c 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -300,6 +300,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr)
 	if (IS_ERR(phydev) || phydev == NULL)
 		return phydev;
 
+	/*
+	 * For DT, see if the auto-probed phy has a correspoding child
+	 * in the bus node, and set the of_node pointer in this case.
+	 */
+	of_mdiobus_link_phydev(bus, phydev);
+
 	err = phy_device_register(phydev);
 	if (err) {
 		phy_device_free(phydev);
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 731d3d9052d7..7c8c142e4eb8 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -183,6 +183,39 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 }
 EXPORT_SYMBOL(of_mdiobus_register);
 
+/**
+ * of_mdiobus_link_phydev - Find a device node for a phy
+ * @mdio: pointer to mii_bus structure
+ * @phydev: phydev for which the of_node pointer should be set
+ *
+ * Walk the list of subnodes of a mdio bus and look for a node that matches the
+ * phy's address with its 'reg' property. If found, set the of_node pointer for
+ * the phy. This allows auto-probed pyh devices to be supplied with information
+ * passed in via DT.
+ */
+void of_mdiobus_link_phydev(struct mii_bus *mdio,
+			    struct phy_device *phydev)
+{
+	struct device *dev = &phydev->dev;
+	struct device_node *child;
+
+	if (dev->of_node || !mdio->dev.of_node)
+		return;
+
+	for_each_available_child_of_node(mdio->dev.of_node, child) {
+		int addr;
+
+		addr = of_mdio_parse_addr(&mdio->dev, child);
+		if (addr < 0)
+			continue;
+
+		if (addr == phydev->addr) {
+			dev->of_node = child;
+			return;
+		}
+	}
+}
+
 /* Helper function for of_phy_find_device */
 static int of_phy_match(struct device *dev, void *phy_np)
 {
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index d449018d0726..a70c9493d55a 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -25,6 +25,9 @@ struct phy_device *of_phy_attach(struct net_device *dev,
 
 extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 
+extern void of_mdiobus_link_phydev(struct mii_bus *mdio,
+				   struct phy_device *phydev);
+
 #else /* CONFIG_OF */
 static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
 {
@@ -60,6 +63,11 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np)
 {
 	return NULL;
 }
+
+static inline void of_mdiobus_link_phydev(struct mii_bus *mdio,
+					  struct phy_device *phydev)
+{
+}
 #endif /* CONFIG_OF */
 
 #if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY)
-- 
cgit 


From 97982f5a91e91dab26dd0246083b9adf3ba8b2e3 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 May 2014 16:31:02 +0300
Subject: IB/mlx4: Preparation for VFs to issue/receive SMI (QP0)
 requests/responses

Currently, VFs in SRIOV VFs are denied QP0 access.  The main reason
for this decision is security, since Subnet Management Datagrams
(SMPs) are not restricted by network partitioning and may affect the
physical network topology.  Moreover, even the SM may be denied access
from portions of the network by setting management keys unknown to the
SM.

However, it is desirable to grant SMI access to certain privileged
VFs, so that certain network management activities may be conducted
within virtual machines instead of the hypervisor.

This commit does the following:

1. Create QP0 tunnel QPs for all VFs.

2. Discard SMI mads sent-from/received-for non-privileged VFs in the
   hypervisor MAD multiplex/demultiplex logic.  SMI mads from/for
   privileged VFs are allowed to pass.

3. MAD_IFC wrapper changes/fixes.  For non-privileged VFs, only
   host-view MAD_IFC commands are allowed, and only for SMI LID-Routed
   GET mads.  For privileged VFs, there are no restrictions.

This commit does not allow privileged VFs as yet.  To determine if a VF
is privileged, it calls function mlx4_vf_smi_enabled().  This function
returns 0 unconditionally for now.

The next two commits allow defining and activating privileged VFs.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/mad.c         | 40 ++++++++++++++++-----------
 drivers/infiniband/hw/mlx4/qp.c          | 16 +++++------
 drivers/net/ethernet/mellanox/mlx4/cmd.c | 47 ++++++++++++++++++++++----------
 include/linux/mlx4/device.h              |  1 +
 4 files changed, 66 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index fd36ec672632..287ad0564acd 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -478,10 +478,6 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
 		return -EAGAIN;
 
-	/* QP0 forwarding only for Dom0 */
-	if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
-		return -EINVAL;
-
 	if (!dest_qpt)
 		tun_qp = &tun_ctx->qp[0];
 	else
@@ -667,6 +663,21 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
 	}
 	/* Class-specific handling */
 	switch (mad->mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+		/* 255 indicates the dom0 */
+		if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
+			if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
+				return -EPERM;
+			/* for a VF. drop unsolicited MADs */
+			if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
+				mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
+					     slave, mad->mad_hdr.mgmt_class,
+					     mad->mad_hdr.method);
+				return -EINVAL;
+			}
+		}
+		break;
 	case IB_MGMT_CLASS_SUBN_ADM:
 		if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
 					     (struct ib_sa_mad *) mad))
@@ -1165,10 +1176,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 	if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
 		return -EAGAIN;
 
-	/* QP0 forwarding only for Dom0 */
-	if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
-		return -EINVAL;
-
 	if (dest_qpt == IB_QPT_SMI) {
 		src_qpnum = 0;
 		sqp = &sqp_ctx->qp[0];
@@ -1285,11 +1292,6 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 			     "belongs to another slave\n", wc->src_qp);
 		return;
 	}
-	if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
-		mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
-			     "non-master trying to send QP0 packets\n", wc->src_qp);
-		return;
-	}
 
 	/* Map transaction ID */
 	ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
@@ -1317,6 +1319,12 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
 
 	/* Class-specific handling */
 	switch (tunnel->mad.mad_hdr.mgmt_class) {
+	case IB_MGMT_CLASS_SUBN_LID_ROUTED:
+	case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
+		if (slave != mlx4_master_func_num(dev->dev) &&
+		    !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
+			return;
+		break;
 	case IB_MGMT_CLASS_SUBN_ADM:
 		if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
 			      (struct ib_sa_mad *) &tunnel->mad))
@@ -1749,9 +1757,9 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
 		return -EEXIST;
 
 	ctx->state = DEMUX_PV_STATE_STARTING;
-	/* have QP0 only on port owner, and only if link layer is IB */
-	if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
-	    rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
+	/* have QP0 only if link layer is IB */
+	if (rdma_port_get_link_layer(ibdev, ctx->port) ==
+	    IB_LINK_LAYER_INFINIBAND)
 		ctx->has_smi = 1;
 
 	if (ctx->has_smi) {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 41308af4163c..2e8c58806e2f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -2370,7 +2370,8 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 				    struct mlx4_wqe_datagram_seg *dseg,
-				    struct ib_send_wr *wr, enum ib_qp_type qpt)
+				    struct ib_send_wr *wr,
+				    enum mlx4_ib_qp_type qpt)
 {
 	union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
 	struct mlx4_av sqp_av = {0};
@@ -2383,8 +2384,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 			cpu_to_be32(0xf0000000);
 
 	memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
-	/* This function used only for sending on QP1 proxies */
-	dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	if (qpt == MLX4_IB_QPT_PROXY_GSI)
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
+	else
+		dseg->dqpn = cpu_to_be32(dev->dev->caps.qp0_tunnel[port - 1]);
 	/* Use QKEY from the QP context, which is set by master */
 	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
@@ -2700,16 +2703,13 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			size += seglen / 16;
 			break;
 		case MLX4_IB_QPT_PROXY_SMI:
-			/* don't allow QP0 sends on guests */
-			err = -ENOSYS;
-			*bad_wr = wr;
-			goto out;
 		case MLX4_IB_QPT_PROXY_GSI:
 			/* If we are tunneling special qps, this is a UD qp.
 			 * In this case we first add a UD segment targeting
 			 * the tunnel qp, and then add a header with address
 			 * information */
-			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
+			set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+						qp->mlx4_ib_qp_type);
 			wqe  += sizeof (struct mlx4_wqe_datagram_seg);
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 			build_tunnel_header(wr, wqe, &seglen);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 78099eab7673..b0e48d426fce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -705,20 +705,28 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 	struct ib_smp *smp = inbox->buf;
 	u32 index;
 	u8 port;
+	u8 opcode_modifier;
 	u16 *table;
 	int err;
 	int vidx, pidx;
+	int network_view;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct ib_smp *outsmp = outbox->buf;
 	__be16 *outtab = (__be16 *)(outsmp->data);
 	__be32 slave_cap_mask;
 	__be64 slave_node_guid;
+
 	port = vhcr->in_modifier;
 
+	/* network-view bit is for driver use only, and should not be passed to FW */
+	opcode_modifier = vhcr->op_modifier & ~0x8; /* clear netw view bit */
+	network_view = !!(vhcr->op_modifier & 0x8);
+
 	if (smp->base_version == 1 &&
 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
 	    smp->class_version == 1) {
-		if (smp->method	== IB_MGMT_METHOD_GET) {
+		/* host view is paravirtualized */
+		if (!network_view && smp->method == IB_MGMT_METHOD_GET) {
 			if (smp->attr_id == IB_SMP_ATTR_PKEY_TABLE) {
 				index = be32_to_cpu(smp->attr_mod);
 				if (port < 1 || port > dev->caps.num_ports)
@@ -743,7 +751,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 				/*get the slave specific caps:*/
 				/*do the command */
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					    vhcr->in_modifier, vhcr->op_modifier,
+					    vhcr->in_modifier, opcode_modifier,
 					    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				/* modify the response for slaves */
 				if (!err && slave != mlx4_master_func_num(dev)) {
@@ -760,7 +768,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 				smp->attr_mod = cpu_to_be32(slave / 8);
 				/* execute cmd */
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					     vhcr->in_modifier, vhcr->op_modifier,
+					     vhcr->in_modifier, opcode_modifier,
 					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				if (!err) {
 					/* if needed, move slave gid to index 0 */
@@ -774,7 +782,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 			}
 			if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
 				err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-					     vhcr->in_modifier, vhcr->op_modifier,
+					     vhcr->in_modifier, opcode_modifier,
 					     vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 				if (!err) {
 					slave_node_guid =  mlx4_get_slave_node_guid(dev, slave);
@@ -784,19 +792,24 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
 			}
 		}
 	}
+
+	/* Non-privileged VFs are only allowed "host" view LID-routed 'Get' MADs.
+	 * These are the MADs used by ib verbs (such as ib_query_gids).
+	 */
 	if (slave != mlx4_master_func_num(dev) &&
-	    ((smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) ||
-	     (smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
-	      smp->method == IB_MGMT_METHOD_SET))) {
-		mlx4_err(dev, "slave %d is trying to execute a Subnet MGMT MAD, "
-			 "class 0x%x, method 0x%x for attr 0x%x. Rejecting\n",
-			 slave, smp->method, smp->mgmt_class,
-			 be16_to_cpu(smp->attr_id));
-		return -EPERM;
+	    !mlx4_vf_smi_enabled(dev, slave, port)) {
+		if (!(smp->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+		      smp->method == IB_MGMT_METHOD_GET) || network_view) {
+			mlx4_err(dev, "Unprivileged slave %d is trying to execute a Subnet MGMT MAD, class 0x%x, method 0x%x, view=%s for attr 0x%x. Rejecting\n",
+				 slave, smp->method, smp->mgmt_class,
+				 network_view ? "Network" : "Host",
+				 be16_to_cpu(smp->attr_id));
+			return -EPERM;
+		}
 	}
-	/*default:*/
+
 	return mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-				    vhcr->in_modifier, vhcr->op_modifier,
+				    vhcr->in_modifier, opcode_modifier,
 				    vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 }
 
@@ -2537,3 +2550,9 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
+
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd21295a..83612faa819c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1234,4 +1234,5 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
 int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 #endif /* MLX4_DEVICE_H */
-- 
cgit 


From 99ec41d0a48cb6d14af25765f9449762f9d101f6 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 May 2014 16:31:03 +0300
Subject: mlx4: Add infrastructure for selecting VFs to enable QP0 via MLX
 proxy QPs

This commit adds the infrastructure for enabling selected VFs to
operate SMI (QP0) MADs without restriction.

Additionally, for these enabled VFs, their QP0 proxy and tunnel QPs
are MLX QPs.  As such, they operate over VL15.  Therefore, they are
not affected by "credit" problems or changes in the VLArb table (which
may shut down VL0).

Non-enabled VFs may only create UD proxy QP0 qps (which are forced by
the hypervisor to send packets using the q-key it assigns and places
in the qp-context).  Thus, non-enabled VFs will not pose a security
risk.  The hypervisor discards any privileged MADs it receives from
these non-enabled VFs.

By default, all VFs are NOT enabled, and must explicitly be enabled
by the administrator.

The sysfs interface which operates the VF enablement infrastructure
is provided in the next commit.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/qp.c                    | 52 +++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx4/cmd.c           | 13 +++++-
 drivers/net/ethernet/mellanox/mlx4/fw.c            | 44 ++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/fw.h            |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c          | 16 +++++--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  8 ++++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 26 ++++++++---
 include/linux/mlx4/device.h                        |  1 +
 8 files changed, 126 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2e8c58806e2f..b25600997cb6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -608,6 +608,16 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
 	return !attr->srq;
 }
 
+static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
+{
+	int i;
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (qpn == dev->caps.qp0_proxy[i])
+			return !!dev->caps.qp0_qkey[i];
+	}
+	return 0;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
 			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
@@ -625,10 +635,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		     !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
 			if (init_attr->qp_type == IB_QPT_GSI)
 				qp_type = MLX4_IB_QPT_PROXY_GSI;
-			else if (mlx4_is_master(dev->dev))
-				qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
-			else
-				qp_type = MLX4_IB_QPT_PROXY_SMI;
+			else {
+				if (mlx4_is_master(dev->dev) ||
+				    qp0_enabled_vf(dev->dev, sqpn))
+					qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
+				else
+					qp_type = MLX4_IB_QPT_PROXY_SMI;
+			}
 		}
 		qpn = sqpn;
 		/* add extra sg entry for tunneling */
@@ -643,7 +656,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			return -EINVAL;
 		if (tnl_init->proxy_qp_type == IB_QPT_GSI)
 			qp_type = MLX4_IB_QPT_TUN_GSI;
-		else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
+		else if (tnl_init->slave == mlx4_master_func_num(dev->dev) ||
+			 mlx4_vf_smi_enabled(dev->dev, tnl_init->slave,
+					     tnl_init->port))
 			qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
 		else
 			qp_type = MLX4_IB_QPT_TUN_SMI;
@@ -1930,6 +1945,19 @@ out:
 	return err;
 }
 
+static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
+{
+	int i;
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		if (qpn == dev->caps.qp0_proxy[i] ||
+		    qpn == dev->caps.qp0_tunnel[i]) {
+			*qkey = dev->caps.qp0_qkey[i];
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 				  struct ib_send_wr *wr,
 				  void *wqe, unsigned *mlx_seg_len)
@@ -1987,8 +2015,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
 			cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
 
 	sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-	if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
-		return -EINVAL;
+	if (mlx4_is_master(mdev->dev)) {
+		if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+			return -EINVAL;
+	} else {
+		if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
+			return -EINVAL;
+	}
 	sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
 	sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
 
@@ -2682,11 +2715,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			break;
 
 		case MLX4_IB_QPT_PROXY_SMI_OWNER:
-			if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) {
-				err = -ENOSYS;
-				*bad_wr = wr;
-				goto out;
-			}
 			err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
 			if (unlikely(err)) {
 				*bad_wr = wr;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index b0e48d426fce..26c3ebaa49d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1666,6 +1666,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave)
 	for (port = min_port; port <= max_port; port++) {
 		if (!test_bit(port - 1, actv_ports.ports))
 			continue;
+		priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+			priv->mfunc.master.vf_admin[slave].enable_smi[port];
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port];
 		vp_oper->state = *vp_admin;
@@ -1717,6 +1719,8 @@ static void mlx4_master_deactivate_admin_state(struct mlx4_priv *priv, int slave
 	for (port = min_port; port <= max_port; port++) {
 		if (!test_bit(port - 1, actv_ports.ports))
 			continue;
+		priv->mfunc.master.vf_oper[slave].smi_enabled[port] =
+			MLX4_VF_SMI_DISABLED;
 		vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port];
 		if (NO_INDX != vp_oper->vlan_idx) {
 			__mlx4_unregister_vlan(&priv->dev,
@@ -2553,6 +2557,13 @@ EXPORT_SYMBOL_GPL(mlx4_set_vf_link_state);
 
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
 {
-	return 0;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS)
+		return 0;
+
+	return priv->mfunc.master.vf_oper[slave].smi_enabled[port] ==
+		MLX4_VF_SMI_ENABLED;
 }
 EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index ef242e19766f..01e6dd61ee3c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -178,8 +178,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 				struct mlx4_cmd_info *cmd)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	u8	field;
-	u32	size;
+	u8	field, port;
+	u32	size, proxy_qp, qkey;
 	int	err = 0;
 
 #define QUERY_FUNC_CAP_FLAGS_OFFSET		0x0
@@ -209,6 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 
 /* when opcode modifier = 1 */
 #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET		0x3
+#define QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET	0x4
 #define QUERY_FUNC_CAP_FLAGS0_OFFSET		0x8
 #define QUERY_FUNC_CAP_FLAGS1_OFFSET		0xc
 
@@ -221,6 +222,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_MAC		0x40
 #define QUERY_FUNC_CAP_FLAGS1_FORCE_VLAN	0x80
 #define QUERY_FUNC_CAP_FLAGS1_NIC_INFO			0x10
+#define QUERY_FUNC_CAP_VF_ENABLE_QP0		0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
 
@@ -234,28 +236,35 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 			return -EINVAL;
 
 		vhcr->in_modifier = converted_port;
-		/* Set nic_info bit to mark new fields support */
-		field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
-		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
-
 		/* phys-port = logical-port */
 		field = vhcr->in_modifier -
 			find_first_bit(actv_ports.ports, dev->caps.num_ports);
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET);
 
-		field = vhcr->in_modifier;
+		port = vhcr->in_modifier;
+		proxy_qp = dev->phys_caps.base_proxy_sqpn + 8 * slave + port - 1;
+
+		/* Set nic_info bit to mark new fields support */
+		field  = QUERY_FUNC_CAP_FLAGS1_NIC_INFO;
+
+		if (mlx4_vf_smi_enabled(dev, slave, port) &&
+		    !mlx4_get_parav_qkey(dev, proxy_qp, &qkey)) {
+			field |= QUERY_FUNC_CAP_VF_ENABLE_QP0;
+			MLX4_PUT(outbox->buf, qkey,
+				 QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+		}
+		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS1_OFFSET);
+
 		/* size is now the QP number */
-		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1;
+		size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + port - 1;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL);
 
 		size += 2;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL);
 
-		size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY);
-
-		size += 2;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY);
+		MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP0_PROXY);
+		proxy_qp += 2;
+		MLX4_PUT(outbox->buf, proxy_qp, QUERY_FUNC_CAP_QP1_PROXY);
 
 		MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier],
 			 QUERY_FUNC_CAP_PHYS_PORT_ID);
@@ -326,7 +335,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
 	struct mlx4_cmd_mailbox *mailbox;
 	u32			*outbox;
 	u8			field, op_modifier;
-	u32			size;
+	u32			size, qkey;
 	int			err = 0, quotas = 0;
 
 	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
@@ -442,6 +451,13 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port,
 		goto out;
 	}
 
+	if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) {
+		MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET);
+		func_cap->qp0_qkey = qkey;
+	} else {
+		func_cap->qp0_qkey = 0;
+	}
+
 	MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL);
 	func_cap->qp0_tunnel_qpn = size & 0xFFFFFF;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 6811ee00ba7c..1fce03ebe5c4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -134,6 +134,7 @@ struct mlx4_func_cap {
 	int	max_eq;
 	int	reserved_eq;
 	int	mcg_quota;
+	u32	qp0_qkey;
 	u32	qp0_tunnel_qpn;
 	u32	qp0_proxy_qpn;
 	u32	qp1_tunnel_qpn;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 12a7ee2e6098..908326876ab5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -666,13 +666,15 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 		return -ENODEV;
 	}
 
+	dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL);
 	dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 	dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL);
 
 	if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy ||
-	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) {
+	    !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy ||
+	    !dev->caps.qp0_qkey) {
 		err = -ENOMEM;
 		goto err_mem;
 	}
@@ -684,6 +686,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 				 " port %d, aborting (%d).\n", i, err);
 			goto err_mem;
 		}
+		dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey;
 		dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn;
 		dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn;
 		dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn;
@@ -729,12 +732,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	return 0;
 
 err_mem:
+	kfree(dev->caps.qp0_qkey);
 	kfree(dev->caps.qp0_tunnel);
 	kfree(dev->caps.qp0_proxy);
 	kfree(dev->caps.qp1_tunnel);
 	kfree(dev->caps.qp1_proxy);
-	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
-		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+	dev->caps.qp0_qkey = NULL;
+	dev->caps.qp0_tunnel = NULL;
+	dev->caps.qp0_proxy = NULL;
+	dev->caps.qp1_tunnel = NULL;
+	dev->caps.qp1_proxy = NULL;
 
 	return err;
 }
@@ -1697,6 +1704,7 @@ unmap_bf:
 	unmap_bf_area(dev);
 
 	if (mlx4_is_slave(dev)) {
+		kfree(dev->caps.qp0_qkey);
 		kfree(dev->caps.qp0_tunnel);
 		kfree(dev->caps.qp0_proxy);
 		kfree(dev->caps.qp1_tunnel);
@@ -2573,6 +2581,7 @@ err_master_mfunc:
 		mlx4_multi_func_cleanup(dev);
 
 	if (mlx4_is_slave(dev)) {
+		kfree(dev->caps.qp0_qkey);
 		kfree(dev->caps.qp0_tunnel);
 		kfree(dev->caps.qp0_proxy);
 		kfree(dev->caps.qp1_tunnel);
@@ -2702,6 +2711,7 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
+	kfree(dev->caps.qp0_qkey);
 	kfree(dev->caps.qp0_tunnel);
 	kfree(dev->caps.qp0_proxy);
 	kfree(dev->caps.qp1_tunnel);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f9c465101963..0efd1738fcb3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -133,6 +133,11 @@ enum {
 	MLX4_COMM_CMD_FLR = 254
 };
 
+enum {
+	MLX4_VF_SMI_DISABLED,
+	MLX4_VF_SMI_ENABLED
+};
+
 /*The flag indicates that the slave should delay the RESET cmd*/
 #define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
 /*indicates how many retries will be done if we are in the middle of FLR*/
@@ -488,6 +493,7 @@ struct mlx4_vport_state {
 
 struct mlx4_vf_admin_state {
 	struct mlx4_vport_state vport[MLX4_MAX_PORTS + 1];
+	u8 enable_smi[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_vport_oper_state {
@@ -495,8 +501,10 @@ struct mlx4_vport_oper_state {
 	int mac_idx;
 	int vlan_idx;
 };
+
 struct mlx4_vf_oper_state {
 	struct mlx4_vport_oper_state vport[MLX4_MAX_PORTS + 1];
+	u8 smi_enabled[MLX4_MAX_PORTS + 1];
 };
 
 struct slave_list {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 1c3fdd4a1f7d..ad98162a8d79 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2827,10 +2827,12 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
 }
 
 static int verify_qp_parameters(struct mlx4_dev *dev,
+				struct mlx4_vhcr *vhcr,
 				struct mlx4_cmd_mailbox *inbox,
 				enum qp_transition transition, u8 slave)
 {
 	u32			qp_type;
+	u32			qpn;
 	struct mlx4_qp_context	*qp_ctx;
 	enum mlx4_qp_optpar	optpar;
 	int port;
@@ -2870,6 +2872,20 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 						return -EINVAL;
 				}
 			break;
+		case MLX4_QP_ST_MLX:
+			qpn = vhcr->in_modifier & 0x7fffff;
+			port = (qp_ctx->pri_path.sched_queue >> 6 & 1) + 1;
+			if (transition == QP_TRANS_INIT2RTR &&
+			    slave != mlx4_master_func_num(dev) &&
+			    mlx4_is_qp_reserved(dev, qpn) &&
+			    !mlx4_vf_smi_enabled(dev, slave, port)) {
+				/* only enabled VFs may create MLX proxy QPs */
+				mlx4_err(dev, "%s: unprivileged slave %d attempting to create an MLX proxy special QP on port %d\n",
+					 __func__, slave, port);
+				return -EPERM;
+			}
+			break;
+
 		default:
 			break;
 		}
@@ -3454,7 +3470,7 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, qpc, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_INIT2RTR, slave);
 	if (err)
 		return err;
 
@@ -3508,7 +3524,7 @@ int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTR2RTS, slave);
 	if (err)
 		return err;
 
@@ -3530,7 +3546,7 @@ int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_RTS2RTS, slave);
 	if (err)
 		return err;
 
@@ -3567,7 +3583,7 @@ int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2SQD, slave);
 	if (err)
 		return err;
 
@@ -3589,7 +3605,7 @@ int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave,
 	err = adjust_qp_sched_queue(dev, slave, context, inbox);
 	if (err)
 		return err;
-	err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave);
+	err = verify_qp_parameters(dev, vhcr, inbox, QP_TRANS_SQD2RTS, slave);
 	if (err)
 		return err;
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 83612faa819c..e2fc7011314c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -401,6 +401,7 @@ struct mlx4_caps {
 	int			max_rq_desc_sz;
 	int			max_qp_init_rdma;
 	int			max_qp_dest_rdma;
+	u32			*qp0_qkey;
 	u32			*qp0_proxy;
 	u32			*qp1_proxy;
 	u32			*qp0_tunnel;
-- 
cgit 


From 65fed8a8c155271cf647651bd62eecb5928ae3a4 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Thu, 29 May 2014 16:31:04 +0300
Subject: IB/mlx4: Add interface for selecting VFs to enable QP0 via MLX proxy
 QPs

This commit adds the sysfs interface for enabling QP0 on VFs for
selected VF/port.

By default, no VFs are enabled for QP0 operation.

To enable QP0 operation on a VF/port, under
/sys/class/infiniband/mlx4_x/iov/<b:d:f>/ports/x there are two new entries:

- smi_enabled (read-only). Indicates whether smi is currently
  enabled for the indicated VF/port

- enable_smi_admin (rw). Used by the admin to request that smi
  capability be enabled or disabled for the indicated VF/port.
  0 = disable, 1 = enable.
  The requested enablement will occur at the next reset of the
  VF (e.g. driver restart on the VM which owns the VF).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/sysfs.c       | 105 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/cmd.c |  34 ++++++++++
 include/linux/mlx4/device.h              |   3 +
 3 files changed, 141 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 5a38e43eca65..cb4c66e723b5 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -389,8 +389,10 @@ struct mlx4_port {
 	struct mlx4_ib_dev    *dev;
 	struct attribute_group pkey_group;
 	struct attribute_group gid_group;
-	u8                     port_num;
+	struct device_attribute	enable_smi_admin;
+	struct device_attribute	smi_enabled;
 	int		       slave;
+	u8                     port_num;
 };
 
 
@@ -558,6 +560,101 @@ err:
 	return NULL;
 }
 
+static ssize_t sysfs_show_smi_enabled(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, smi_enabled);
+	ssize_t len = 0;
+
+	if (mlx4_vf_smi_enabled(p->dev->dev, p->slave, p->port_num))
+		len = sprintf(buf, "%d\n", 1);
+	else
+		len = sprintf(buf, "%d\n", 0);
+
+	return len;
+}
+
+static ssize_t sysfs_show_enable_smi_admin(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, enable_smi_admin);
+	ssize_t len = 0;
+
+	if (mlx4_vf_get_enable_smi_admin(p->dev->dev, p->slave, p->port_num))
+		len = sprintf(buf, "%d\n", 1);
+	else
+		len = sprintf(buf, "%d\n", 0);
+
+	return len;
+}
+
+static ssize_t sysfs_store_enable_smi_admin(struct device *dev,
+					    struct device_attribute *attr,
+					    const char *buf, size_t count)
+{
+	struct mlx4_port *p =
+		container_of(attr, struct mlx4_port, enable_smi_admin);
+	int enable;
+
+	if (sscanf(buf, "%i", &enable) != 1 ||
+	    enable < 0 || enable > 1)
+		return -EINVAL;
+
+	if (mlx4_vf_set_enable_smi_admin(p->dev->dev, p->slave, p->port_num, enable))
+		return -EINVAL;
+	return count;
+}
+
+static int add_vf_smi_entries(struct mlx4_port *p)
+{
+	int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+			IB_LINK_LAYER_ETHERNET;
+	int ret;
+
+	/* do not display entries if eth transport, or if master */
+	if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+		return 0;
+
+	sysfs_attr_init(&p->smi_enabled.attr);
+	p->smi_enabled.show = sysfs_show_smi_enabled;
+	p->smi_enabled.store = NULL;
+	p->smi_enabled.attr.name = "smi_enabled";
+	p->smi_enabled.attr.mode = 0444;
+	ret = sysfs_create_file(&p->kobj, &p->smi_enabled.attr);
+	if (ret) {
+		pr_err("failed to create smi_enabled\n");
+		return ret;
+	}
+
+	sysfs_attr_init(&p->enable_smi_admin.attr);
+	p->enable_smi_admin.show = sysfs_show_enable_smi_admin;
+	p->enable_smi_admin.store = sysfs_store_enable_smi_admin;
+	p->enable_smi_admin.attr.name = "enable_smi_admin";
+	p->enable_smi_admin.attr.mode = 0644;
+	ret = sysfs_create_file(&p->kobj, &p->enable_smi_admin.attr);
+	if (ret) {
+		pr_err("failed to create enable_smi_admin\n");
+		sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+		return ret;
+	}
+	return 0;
+}
+
+static void remove_vf_smi_entries(struct mlx4_port *p)
+{
+	int is_eth = rdma_port_get_link_layer(&p->dev->ib_dev, p->port_num) ==
+			IB_LINK_LAYER_ETHERNET;
+
+	if (is_eth || p->slave == mlx4_master_func_num(p->dev->dev))
+		return;
+
+	sysfs_remove_file(&p->kobj, &p->smi_enabled.attr);
+	sysfs_remove_file(&p->kobj, &p->enable_smi_admin.attr);
+}
+
 static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 {
 	struct mlx4_port *p;
@@ -602,6 +699,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 	if (ret)
 		goto err_free_gid;
 
+	ret = add_vf_smi_entries(p);
+	if (ret)
+		goto err_free_gid;
+
 	list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
 	return 0;
 
@@ -669,6 +770,7 @@ err_add:
 		mport = container_of(p, struct mlx4_port, kobj);
 		sysfs_remove_group(p, &mport->pkey_group);
 		sysfs_remove_group(p, &mport->gid_group);
+		remove_vf_smi_entries(mport);
 		kobject_put(p);
 	}
 	kobject_put(dev->dev_ports_parent[slave]);
@@ -713,6 +815,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
 			port = container_of(p, struct mlx4_port, kobj);
 			sysfs_remove_group(p, &port->pkey_group);
 			sysfs_remove_group(p, &port->gid_group);
+			remove_vf_smi_entries(port);
 			kobject_put(p);
 			kobject_put(device->dev_ports_parent[slave]);
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 26c3ebaa49d1..3370ecb8c3d2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2567,3 +2567,37 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port)
 		MLX4_VF_SMI_ENABLED;
 }
 EXPORT_SYMBOL_GPL(mlx4_vf_smi_enabled);
+
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave == mlx4_master_func_num(dev))
+		return 1;
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS)
+		return 0;
+
+	return priv->mfunc.master.vf_admin[slave].enable_smi[port] ==
+		MLX4_VF_SMI_ENABLED;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_get_enable_smi_admin);
+
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+				 int enabled)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (slave == mlx4_master_func_num(dev))
+		return 0;
+
+	if (slave < 1 || slave >= dev->num_slaves ||
+	    port < 1 || port > MLX4_MAX_PORTS ||
+	    enabled < 0 || enabled > 1)
+		return -EINVAL;
+
+	priv->mfunc.master.vf_admin[slave].enable_smi[port] = enabled;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_vf_set_enable_smi_admin);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e2fc7011314c..dcabe11f37f7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1236,4 +1236,7 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
+int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
+				 int enable);
 #endif /* MLX4_DEVICE_H */
-- 
cgit 


From 9113e260767b1cb44f8da0e5922e1a9a5417c4b8 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 28 May 2014 15:23:37 +0800
Subject: power_supply: allow power supply devices registered w/o wakeup source

Currently, all the power supply devices are registered with wakeup source,
this results in that every power_supply_changed() invocation brings
the system out of suspend-to-freeze state.

This is overkill as some device drivers, e.g. ACPI battery driver,
have the ability to check the device status and wake up the system
from sleeping only when necessary.

Thus introduce a new API which allows device to be registered
w/o wakeup source.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/power/power_supply_core.c | 15 +++++++++++++--
 include/linux/power_supply.h      |  2 ++
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 26606641fe44..5a5a24e7d43c 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -537,7 +537,7 @@ static void psy_unregister_cooler(struct power_supply *psy)
 }
 #endif
 
-int power_supply_register(struct device *parent, struct power_supply *psy)
+int __power_supply_register(struct device *parent, struct power_supply *psy, bool ws)
 {
 	struct device *dev;
 	int rc;
@@ -568,7 +568,7 @@ int power_supply_register(struct device *parent, struct power_supply *psy)
 	}
 
 	spin_lock_init(&psy->changed_lock);
-	rc = device_init_wakeup(dev, true);
+	rc = device_init_wakeup(dev, ws);
 	if (rc)
 		goto wakeup_init_failed;
 
@@ -606,8 +606,19 @@ dev_set_name_failed:
 success:
 	return rc;
 }
+
+int power_supply_register(struct device *parent, struct power_supply *psy)
+{
+	return __power_supply_register(parent, psy, true);
+}
 EXPORT_SYMBOL_GPL(power_supply_register);
 
+int power_supply_register_no_ws(struct device *parent, struct power_supply *psy)
+{
+	return __power_supply_register(parent, psy, false);
+}
+EXPORT_SYMBOL_GPL(power_supply_register_no_ws);
+
 void power_supply_unregister(struct power_supply *psy)
 {
 	cancel_work_sync(&psy->changed_work);
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index c9dc4e09854c..f2b76aeaf4e4 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -264,6 +264,8 @@ static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
 
 extern int power_supply_register(struct device *parent,
 				 struct power_supply *psy);
+extern int power_supply_register_no_ws(struct device *parent,
+				 struct power_supply *psy);
 extern void power_supply_unregister(struct power_supply *psy);
 extern int power_supply_powers(struct power_supply *psy, struct device *dev);
 
-- 
cgit 


From eec15edbb0e14485998635ea7c62e30911b465f0 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 30 May 2014 04:23:01 +0200
Subject: ACPI / PNP: use device ID list for PNPACPI device enumeration

ACPI can be used to enumerate PNP devices, but the code does not
handle this in the right way currently.  Namely, if an ACPI device
object
 1. Has a _CRS method,
 2. Has an identification of
    "three capital characters followed by four hex digits",
 3. Is not in the excluded IDs list,
it will be enumerated to PNP bus (that is, a PNP device object will
be create for it).  This means that, actually, the PNP bus type is
used as the default bus type for enumerating _HID devices in ACPI.

However, more and more _HID devices need to be enumerated to the
platform bus instead (that is, platform device objects need to be
created for them).  As a result, the device ID list in acpi_platform.c
is used to enforce creating platform device objects rather than PNP
device objects for matching devices.  That list has been continuously
growing recently, unfortunately, and it is pretty much guaranteed to
grow even more in the future.

To address that problem it is better to enumerate _HID devices
as platform devices by default.  To this end, change the way of
enumerating PNP devices by adding a PNP ACPI scan handler that
will use a device ID list to create PNP devices for the ACPI
device objects whose device IDs are present in that list.

The initial device ID list in the PNP ACPI scan handler contains
all of the pnp_device_id strings from all the existing PNP drivers,
so this change should be transparent to the PNP core and all of the
PNP drivers.  Still, in the future it should be possible to reduce
its size by converting PNP drivers that need not be PNP for any
technical reasons into platform drivers.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
[rjw: Rewrote the changelog, modified the PNP ACPI scan handler code]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/acpi/Makefile        |   1 +
 drivers/acpi/acpi_cmos_rtc.c |   2 +-
 drivers/acpi/acpi_pnp.c      | 401 +++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/internal.h      |   1 +
 drivers/acpi/scan.c          |   1 +
 drivers/pnp/pnpacpi/core.c   |  28 +--
 include/linux/acpi.h         |   2 +
 7 files changed, 411 insertions(+), 25 deletions(-)
 create mode 100644 drivers/acpi/acpi_pnp.c

(limited to 'include/linux')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index bce34afadcd0..144671a2030f 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -41,6 +41,7 @@ acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
 acpi-y				+= pci_root.o pci_link.o pci_irq.o
 acpi-$(CONFIG_X86_INTEL_LPSS)	+= acpi_lpss.o
 acpi-y				+= acpi_platform.o
+acpi-y				+= acpi_pnp.o
 acpi-y				+= power.o
 acpi-y				+= event.o
 acpi-y				+= sysfs.o
diff --git a/drivers/acpi/acpi_cmos_rtc.c b/drivers/acpi/acpi_cmos_rtc.c
index 961b45d18a5d..2da8660262e5 100644
--- a/drivers/acpi/acpi_cmos_rtc.c
+++ b/drivers/acpi/acpi_cmos_rtc.c
@@ -68,7 +68,7 @@ static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev,
 		return -ENODEV;
 	}
 
-	return 0;
+	return 1;
 }
 
 static void acpi_remove_cmos_rtc_space_handler(struct acpi_device *adev)
diff --git a/drivers/acpi/acpi_pnp.c b/drivers/acpi/acpi_pnp.c
new file mode 100644
index 000000000000..567e7fc6330c
--- /dev/null
+++ b/drivers/acpi/acpi_pnp.c
@@ -0,0 +1,401 @@
+/*
+ * ACPI support for PNP bus type
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Zhang Rui <rui.zhang@intel.com>
+ *          Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+
+static const struct acpi_device_id acpi_pnp_device_ids[] = {
+	/* pata_isapnp */
+	{"PNP0600"},		/* Generic ESDI/IDE/ATA compatible hard disk controller */
+	/* floppy */
+	{"PNP0700"},
+	/* ipmi_si */
+	{"IPI0001"},
+	/* tpm_inf_pnp */
+	{"IFX0101"},		/* Infineon TPMs */
+	{"IFX0102"},		/* Infineon TPMs */
+	/*tpm_tis */
+	{"PNP0C31"},		/* TPM */
+	{"ATM1200"},		/* Atmel */
+	{"IFX0102"},		/* Infineon */
+	{"BCM0101"},		/* Broadcom */
+	{"BCM0102"},		/* Broadcom */
+	{"NSC1200"},		/* National */
+	{"ICO0102"},		/* Intel */
+	/* ide   */
+	{"PNP0600"},		/* Generic ESDI/IDE/ATA compatible hard disk controller */
+	/* ns558 */
+	{"@P@0001"},		/* ALS 100 */
+	{"@P@0020"},		/* ALS 200 */
+	{"@P@1001"},		/* ALS 100+ */
+	{"@P@2001"},		/* ALS 120 */
+	{"ASB16fd"},		/* AdLib NSC16 */
+	{"AZT3001"},		/* AZT1008 */
+	{"CDC0001"},		/* Opl3-SAx */
+	{"CSC0001"},		/* CS4232 */
+	{"CSC000f"},		/* CS4236 */
+	{"CSC0101"},		/* CS4327 */
+	{"CTL7001"},		/* SB16 */
+	{"CTL7002"},		/* AWE64 */
+	{"CTL7005"},		/* Vibra16 */
+	{"ENS2020"},		/* SoundscapeVIVO */
+	{"ESS0001"},		/* ES1869 */
+	{"ESS0005"},		/* ES1878 */
+	{"ESS6880"},		/* ES688 */
+	{"IBM0012"},		/* CS4232 */
+	{"OPT0001"},		/* OPTi Audio16 */
+	{"YMH0006"},		/* Opl3-SA */
+	{"YMH0022"},		/* Opl3-SAx */
+	{"PNPb02f"},		/* Generic */
+	/* i8042 kbd */
+	{"PNP0300"},
+	{"PNP0301"},
+	{"PNP0302"},
+	{"PNP0303"},
+	{"PNP0304"},
+	{"PNP0305"},
+	{"PNP0306"},
+	{"PNP0309"},
+	{"PNP030a"},
+	{"PNP030b"},
+	{"PNP0320"},
+	{"PNP0343"},
+	{"PNP0344"},
+	{"PNP0345"},
+	{"CPQA0D7"},
+	/* i8042 aux */
+	{"AUI0200"},
+	{"FJC6000"},
+	{"FJC6001"},
+	{"PNP0f03"},
+	{"PNP0f0b"},
+	{"PNP0f0e"},
+	{"PNP0f12"},
+	{"PNP0f13"},
+	{"PNP0f19"},
+	{"PNP0f1c"},
+	{"SYN0801"},
+	/* fcpnp */
+	{"AVM0900"},
+	/* radio-cadet */
+	{"MSM0c24"},		/* ADS Cadet AM/FM Radio Card */
+	/* radio-gemtek */
+	{"ADS7183"},		/* AOpen FX-3D/Pro Radio */
+	/* radio-sf16fmr2 */
+	{"MFRad13"},		/* tuner subdevice of SF16-FMD2 */
+	/* ene_ir */
+	{"ENE0100"},
+	{"ENE0200"},
+	{"ENE0201"},
+	{"ENE0202"},
+	/* fintek-cir */
+	{"FIT0002"},		/* CIR */
+	/* ite-cir */
+	{"ITE8704"},		/* Default model */
+	{"ITE8713"},		/* CIR found in EEEBox 1501U */
+	{"ITE8708"},		/* Bridged IT8512 */
+	{"ITE8709"},		/* SRAM-Bridged IT8512 */
+	/* nuvoton-cir */
+	{"WEC0530"},		/* CIR */
+	{"NTN0530"},		/* CIR for new chip's pnp id */
+	/* Winbond CIR */
+	{"WEC1022"},
+	/* wbsd */
+	{"WEC0517"},
+	{"WEC0518"},
+	/* Winbond CIR */
+	{"TCM5090"},		/* 3Com Etherlink III (TP) */
+	{"TCM5091"},		/* 3Com Etherlink III */
+	{"TCM5094"},		/* 3Com Etherlink III (combo) */
+	{"TCM5095"},		/* 3Com Etherlink III (TPO) */
+	{"TCM5098"},		/* 3Com Etherlink III (TPC) */
+	{"PNP80f7"},		/* 3Com Etherlink III compatible */
+	{"PNP80f8"},		/* 3Com Etherlink III compatible */
+	/* nsc-ircc */
+	{"NSC6001"},
+	{"HWPC224"},
+	{"IBM0071"},
+	/* smsc-ircc2 */
+	{"SMCf010"},
+	/* sb1000 */
+	{"GIC1000"},
+	/* parport_pc */
+	{"PNP0400"},		/* Standard LPT Printer Port */
+	{"PNP0401"},		/* ECP Printer Port */
+	/* apple-gmux */
+	{"APP000B"},
+	/* fujitsu-laptop.c */
+	{"FUJ02bf"},
+	{"FUJ02B1"},
+	{"FUJ02E3"},
+	/* system */
+	{"PNP0c02"},		/* General ID for reserving resources */
+	{"PNP0c01"},		/* memory controller */
+	/* rtc_cmos */
+	{"PNP0b00"},
+	{"PNP0b01"},
+	{"PNP0b02"},
+	/* c6xdigio */
+	{"PNP0400"},		/* Standard LPT Printer Port */
+	{"PNP0401"},		/* ECP Printer Port */
+	/* ni_atmio.c */
+	{"NIC1900"},
+	{"NIC2400"},
+	{"NIC2500"},
+	{"NIC2600"},
+	{"NIC2700"},
+	/* serial */
+	{"AAC000F"},		/* Archtek America Corp. Archtek SmartLink Modem 3334BT Plug & Play */
+	{"ADC0001"},		/* Anchor Datacomm BV. SXPro 144 External Data Fax Modem Plug & Play */
+	{"ADC0002"},		/* SXPro 288 External Data Fax Modem Plug & Play */
+	{"AEI0250"},		/* PROLiNK 1456VH ISA PnP K56flex Fax Modem */
+	{"AEI1240"},		/* Actiontec ISA PNP 56K X2 Fax Modem */
+	{"AKY1021"},		/* Rockwell 56K ACF II Fax+Data+Voice Modem */
+	{"AZT4001"},		/* AZT3005 PnP SOUND DEVICE */
+	{"BDP3336"},		/* Best Data Products Inc. Smart One 336F PnP Modem */
+	{"BRI0A49"},		/* Boca Complete Ofc Communicator 14.4 Data-FAX */
+	{"BRI1400"},		/* Boca Research 33,600 ACF Modem */
+	{"BRI3400"},		/* Boca 33.6 Kbps Internal FD34FSVD */
+	{"BRI0A49"},		/* Boca 33.6 Kbps Internal FD34FSVD */
+	{"BDP3336"},		/* Best Data Products Inc. Smart One 336F PnP Modem */
+	{"CPI4050"},		/* Computer Peripherals Inc. EuroViVa CommCenter-33.6 SP PnP */
+	{"CTL3001"},		/* Creative Labs Phone Blaster 28.8 DSVD PnP Voice */
+	{"CTL3011"},		/* Creative Labs Modem Blaster 28.8 DSVD PnP Voice */
+	{"DAV0336"},		/* Davicom ISA 33.6K Modem */
+	{"DMB1032"},		/* Creative Modem Blaster Flash56 DI5601-1 */
+	{"DMB2001"},		/* Creative Modem Blaster V.90 DI5660 */
+	{"ETT0002"},		/* E-Tech CyberBULLET PC56RVP */
+	{"FUJ0202"},		/* Fujitsu 33600 PnP-I2 R Plug & Play */
+	{"FUJ0205"},		/* Fujitsu FMV-FX431 Plug & Play */
+	{"FUJ0206"},		/* Fujitsu 33600 PnP-I4 R Plug & Play */
+	{"FUJ0209"},		/* Fujitsu Fax Voice 33600 PNP-I5 R Plug & Play */
+	{"GVC000F"},		/* Archtek SmartLink Modem 3334BT Plug & Play */
+	{"GVC0303"},		/* Archtek SmartLink Modem 3334BRV 33.6K Data Fax Voice */
+	{"HAY0001"},		/* Hayes Optima 288 V.34-V.FC + FAX + Voice Plug & Play */
+	{"HAY000C"},		/* Hayes Optima 336 V.34 + FAX + Voice PnP */
+	{"HAY000D"},		/* Hayes Optima 336B V.34 + FAX + Voice PnP */
+	{"HAY5670"},		/* Hayes Accura 56K Ext Fax Modem PnP */
+	{"HAY5674"},		/* Hayes Accura 56K Ext Fax Modem PnP */
+	{"HAY5675"},		/* Hayes Accura 56K Fax Modem PnP */
+	{"HAYF000"},		/* Hayes 288, V.34 + FAX */
+	{"HAYF001"},		/* Hayes Optima 288 V.34 + FAX + Voice, Plug & Play */
+	{"IBM0033"},		/* IBM Thinkpad 701 Internal Modem Voice */
+	{"PNP4972"},		/* Intermec CV60 touchscreen port */
+	{"IXDC801"},		/* Intertex 28k8 33k6 Voice EXT PnP */
+	{"IXDC901"},		/* Intertex 33k6 56k Voice EXT PnP */
+	{"IXDD801"},		/* Intertex 28k8 33k6 Voice SP EXT PnP */
+	{"IXDD901"},		/* Intertex 33k6 56k Voice SP EXT PnP */
+	{"IXDF401"},		/* Intertex 28k8 33k6 Voice SP INT PnP */
+	{"IXDF801"},		/* Intertex 28k8 33k6 Voice SP EXT PnP */
+	{"IXDF901"},		/* Intertex 33k6 56k Voice SP EXT PnP */
+	{"KOR4522"},		/* KORTEX 28800 Externe PnP */
+	{"KORF661"},		/* KXPro 33.6 Vocal ASVD PnP */
+	{"LAS4040"},		/* LASAT Internet 33600 PnP */
+	{"LAS4540"},		/* Lasat Safire 560 PnP */
+	{"LAS5440"},		/* Lasat Safire 336  PnP */
+	{"MNP0281"},		/* Microcom TravelPorte FAST V.34 Plug & Play */
+	{"MNP0336"},		/* Microcom DeskPorte V.34 FAST or FAST+ Plug & Play */
+	{"MNP0339"},		/* Microcom DeskPorte FAST EP 28.8 Plug & Play */
+	{"MNP0342"},		/* Microcom DeskPorte 28.8P Plug & Play */
+	{"MNP0500"},		/* Microcom DeskPorte FAST ES 28.8 Plug & Play */
+	{"MNP0501"},		/* Microcom DeskPorte FAST ES 28.8 Plug & Play */
+	{"MNP0502"},		/* Microcom DeskPorte 28.8S Internal Plug & Play */
+	{"MOT1105"},		/* Motorola BitSURFR Plug & Play */
+	{"MOT1111"},		/* Motorola TA210 Plug & Play */
+	{"MOT1114"},		/* Motorola HMTA 200 (ISDN) Plug & Play */
+	{"MOT1115"},		/* Motorola BitSURFR Plug & Play */
+	{"MOT1190"},		/* Motorola Lifestyle 28.8 Internal */
+	{"MOT1501"},		/* Motorola V.3400 Plug & Play */
+	{"MOT1502"},		/* Motorola Lifestyle 28.8 V.34 Plug & Play */
+	{"MOT1505"},		/* Motorola Power 28.8 V.34 Plug & Play */
+	{"MOT1509"},		/* Motorola ModemSURFR External 28.8 Plug & Play */
+	{"MOT150A"},		/* Motorola Premier 33.6 Desktop Plug & Play */
+	{"MOT150F"},		/* Motorola VoiceSURFR 56K External PnP */
+	{"MOT1510"},		/* Motorola ModemSURFR 56K External PnP */
+	{"MOT1550"},		/* Motorola ModemSURFR 56K Internal PnP */
+	{"MOT1560"},		/* Motorola ModemSURFR Internal 28.8 Plug & Play */
+	{"MOT1580"},		/* Motorola Premier 33.6 Internal Plug & Play */
+	{"MOT15B0"},		/* Motorola OnlineSURFR 28.8 Internal Plug & Play */
+	{"MOT15F0"},		/* Motorola VoiceSURFR 56K Internal PnP */
+	{"MVX00A1"},		/*  Deskline K56 Phone System PnP */
+	{"MVX00F2"},		/* PC Rider K56 Phone System PnP */
+	{"nEC8241"},		/* NEC 98NOTE SPEAKER PHONE FAX MODEM(33600bps) */
+	{"PMC2430"},		/* Pace 56 Voice Internal Plug & Play Modem */
+	{"PNP0500"},		/* Generic standard PC COM port     */
+	{"PNP0501"},		/* Generic 16550A-compatible COM port */
+	{"PNPC000"},		/* Compaq 14400 Modem */
+	{"PNPC001"},		/* Compaq 2400/9600 Modem */
+	{"PNPC031"},		/* Dial-Up Networking Serial Cable between 2 PCs */
+	{"PNPC032"},		/* Dial-Up Networking Parallel Cable between 2 PCs */
+	{"PNPC100"},		/* Standard 9600 bps Modem */
+	{"PNPC101"},		/* Standard 14400 bps Modem */
+	{"PNPC102"},		/*  Standard 28800 bps Modem */
+	{"PNPC103"},		/*  Standard Modem */
+	{"PNPC104"},		/*  Standard 9600 bps Modem */
+	{"PNPC105"},		/*  Standard 14400 bps Modem */
+	{"PNPC106"},		/*  Standard 28800 bps Modem */
+	{"PNPC107"},		/*  Standard Modem */
+	{"PNPC108"},		/* Standard 9600 bps Modem */
+	{"PNPC109"},		/* Standard 14400 bps Modem */
+	{"PNPC10A"},		/* Standard 28800 bps Modem */
+	{"PNPC10B"},		/* Standard Modem */
+	{"PNPC10C"},		/* Standard 9600 bps Modem */
+	{"PNPC10D"},		/* Standard 14400 bps Modem */
+	{"PNPC10E"},		/* Standard 28800 bps Modem */
+	{"PNPC10F"},		/* Standard Modem */
+	{"PNP2000"},		/* Standard PCMCIA Card Modem */
+	{"ROK0030"},		/* Rockwell 33.6 DPF Internal PnP, Modular Technology 33.6 Internal PnP */
+	{"ROK0100"},		/* KORTEX 14400 Externe PnP */
+	{"ROK4120"},		/* Rockwell 28.8 */
+	{"ROK4920"},		/* Viking 28.8 INTERNAL Fax+Data+Voice PnP */
+	{"RSS00A0"},		/* Rockwell 33.6 DPF External PnP, BT Prologue 33.6 External PnP, Modular Technology 33.6 External PnP */
+	{"RSS0262"},		/* Viking 56K FAX INT */
+	{"RSS0250"},		/* K56 par,VV,Voice,Speakphone,AudioSpan,PnP */
+	{"SUP1310"},		/* SupraExpress 28.8 Data/Fax PnP modem */
+	{"SUP1381"},		/* SupraExpress 336i PnP Voice Modem */
+	{"SUP1421"},		/* SupraExpress 33.6 Data/Fax PnP modem */
+	{"SUP1590"},		/* SupraExpress 33.6 Data/Fax PnP modem */
+	{"SUP1620"},		/* SupraExpress 336i Sp ASVD */
+	{"SUP1760"},		/* SupraExpress 33.6 Data/Fax PnP modem */
+	{"SUP2171"},		/* SupraExpress 56i Sp Intl */
+	{"TEX0011"},		/* Phoebe Micro 33.6 Data Fax 1433VQH Plug & Play */
+	{"UAC000F"},		/* Archtek SmartLink Modem 3334BT Plug & Play */
+	{"USR0000"},		/* 3Com Corp. Gateway Telepath IIvi 33.6 */
+	{"USR0002"},		/* U.S. Robotics Sporster 33.6K Fax INT PnP */
+	{"USR0004"},		/*  Sportster Vi 14.4 PnP FAX Voicemail */
+	{"USR0006"},		/* U.S. Robotics 33.6K Voice INT PnP */
+	{"USR0007"},		/* U.S. Robotics 33.6K Voice EXT PnP */
+	{"USR0009"},		/* U.S. Robotics Courier V.Everything INT PnP */
+	{"USR2002"},		/* U.S. Robotics 33.6K Voice INT PnP */
+	{"USR2070"},		/* U.S. Robotics 56K Voice INT PnP */
+	{"USR2080"},		/* U.S. Robotics 56K Voice EXT PnP */
+	{"USR3031"},		/* U.S. Robotics 56K FAX INT */
+	{"USR3050"},		/* U.S. Robotics 56K FAX INT */
+	{"USR3070"},		/* U.S. Robotics 56K Voice INT PnP */
+	{"USR3080"},		/* U.S. Robotics 56K Voice EXT PnP */
+	{"USR3090"},		/* U.S. Robotics 56K Voice INT PnP */
+	{"USR9100"},		/* U.S. Robotics 56K Message  */
+	{"USR9160"},		/* U.S. Robotics 56K FAX EXT PnP */
+	{"USR9170"},		/* U.S. Robotics 56K FAX INT PnP */
+	{"USR9180"},		/* U.S. Robotics 56K Voice EXT PnP */
+	{"USR9190"},		/* U.S. Robotics 56K Voice INT PnP */
+	{"WACFXXX"},		/* Wacom tablets */
+	{"FPI2002"},		/* Compaq touchscreen */
+	{"FUJ02B2"},		/* Fujitsu Stylistic touchscreens */
+	{"FUJ02B3"},
+	{"FUJ02B4"},		/* Fujitsu Stylistic LT touchscreens */
+	{"FUJ02B6"},		/* Passive Fujitsu Stylistic touchscreens */
+	{"FUJ02B7"},
+	{"FUJ02B8"},
+	{"FUJ02B9"},
+	{"FUJ02BC"},
+	{"FUJ02E5"},		/* Fujitsu Wacom Tablet PC device */
+	{"FUJ02E6"},		/* Fujitsu P-series tablet PC device */
+	{"FUJ02E7"},		/* Fujitsu Wacom 2FGT Tablet PC device */
+	{"FUJ02E9"},		/* Fujitsu Wacom 1FGT Tablet PC device */
+	{"LTS0001"},		/* LG C1 EXPRESS DUAL (C1-PB11A3) touch screen (actually a FUJ02E6 in disguise) */
+	{"WCI0003"},		/* Rockwell's (PORALiNK) 33600 INT PNP */
+	{"WEC1022"},		/* Winbond CIR port, should not be probed. We should keep track of it to prevent the legacy serial driver from probing it */
+	{"PNPCXXX"},		/* Unknown PnP modems */
+	{"PNPDXXX"},		/* More unknown PnP modems */
+	/* scl200wdt */
+	{"NSC0800"},		/* National Semiconductor PC87307/PC97307 watchdog component */
+	/* mpu401 */
+	{"PNPb006"},
+	/* cs423x-pnpbios */
+	{"CSC0100"},
+	{"CSC0000"},
+	{"GIM0100"},		/* Guillemot Turtlebeach something appears to be cs4232 compatible */
+	/* es18xx-pnpbios */
+	{"ESS1869"},
+	{"ESS1879"},
+	/* snd-opl3sa2-pnpbios */
+	{"YMH0021"},
+	{"NMX2210"},		/* Gateway Solo 2500 */
+	{""},
+};
+
+static bool is_hex_digit(char c)
+{
+	return (c >= 0 && c <= '9') || (c >= 'A' && c <= 'F');
+}
+
+static bool matching_id(char *idstr, char *list_id)
+{
+	int i;
+
+	if (memcmp(idstr, list_id, 3))
+		return false;
+
+	for (i = 3; i < 7; i++) {
+		char c = toupper(idstr[i]);
+
+		if (!is_hex_digit(c)
+		    || (list_id[i] != 'X' && c != toupper(list_id[i])))
+			return false;
+	}
+	return true;
+}
+
+static bool acpi_pnp_match(char *idstr, const struct acpi_device_id **matchid)
+{
+	const struct acpi_device_id *devid;
+
+	for (devid = acpi_pnp_device_ids; devid->id[0]; devid++)
+		if (matching_id(idstr, (char *)devid->id)) {
+			if (matchid)
+				*matchid = devid;
+
+			return true;
+		}
+
+	return false;
+}
+
+static int acpi_pnp_attach(struct acpi_device *adev,
+			   const struct acpi_device_id *id)
+{
+	return 1;
+}
+
+static struct acpi_scan_handler acpi_pnp_handler = {
+	.ids = acpi_pnp_device_ids,
+	.match = acpi_pnp_match,
+	.attach = acpi_pnp_attach,
+};
+
+/*
+ * For CMOS RTC devices, the PNP ACPI scan handler does not work, because
+ * there is a CMOS RTC ACPI scan handler installed already, so we need to
+ * check those devices and enumerate them to the PNP bus directly.
+ */
+static int is_cmos_rtc_device(struct acpi_device *adev)
+{
+	struct acpi_device_id ids[] = {
+		{ "PNP0B00" },
+		{ "PNP0B01" },
+		{ "PNP0B02" },
+		{""},
+	};
+	return !acpi_match_device_ids(adev, ids);
+}
+
+bool acpi_is_pnp_device(struct acpi_device *adev)
+{
+	return adev->handler == &acpi_pnp_handler || is_cmos_rtc_device(adev);
+}
+EXPORT_SYMBOL_GPL(acpi_is_pnp_device);
+
+void __init acpi_pnp_init(void)
+{
+	acpi_scan_add_handler(&acpi_pnp_handler);
+}
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index bb7de413d06d..5c16cb6bc76d 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -30,6 +30,7 @@ void acpi_pci_root_init(void);
 void acpi_pci_link_init(void);
 void acpi_processor_init(void);
 void acpi_platform_init(void);
+void acpi_pnp_init(void);
 int acpi_sysfs_init(void);
 #ifdef CONFIG_ACPI_CONTAINER
 void acpi_container_init(void);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e44438f7917b..19d524c5c0c8 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -2253,6 +2253,7 @@ int __init acpi_scan_init(void)
 	acpi_cmos_rtc_init();
 	acpi_container_init();
 	acpi_memory_hotplug_init();
+	acpi_pnp_init();
 
 	mutex_lock(&acpi_scan_lock);
 	/*
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index c31aa07b3ba5..b81448b2c75d 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -30,26 +30,6 @@
 
 static int num;
 
-/* We need only to blacklist devices that have already an acpi driver that
- * can't use pnp layer. We don't need to blacklist device that are directly
- * used by the kernel (PCI root, ...), as it is harmless and there were
- * already present in pnpbios. But there is an exception for devices that
- * have irqs (PIC, Timer) because we call acpi_register_gsi.
- * Finally, only devices that have a CRS method need to be in this list.
- */
-static struct acpi_device_id excluded_id_list[] __initdata = {
-	{"PNP0C09", 0},		/* EC */
-	{"PNP0C0F", 0},		/* Link device */
-	{"PNP0000", 0},		/* PIC */
-	{"PNP0100", 0},		/* Timer */
-	{"", 0},
-};
-
-static inline int __init is_exclusive_device(struct acpi_device *dev)
-{
-	return (!acpi_match_device_ids(dev, excluded_id_list));
-}
-
 /*
  * Compatible Device IDs
  */
@@ -266,7 +246,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	if (!pnpid)
 		return 0;
 
-	if (is_exclusive_device(device) || !device->status.present)
+	if (!device->status.present)
 		return 0;
 
 	dev = pnp_alloc_dev(&pnpacpi_protocol, num, pnpid);
@@ -326,10 +306,10 @@ static acpi_status __init pnpacpi_add_device_handler(acpi_handle handle,
 {
 	struct acpi_device *device;
 
-	if (!acpi_bus_get_device(handle, &device))
-		pnpacpi_add_device(device);
-	else
+	if (acpi_bus_get_device(handle, &device))
 		return AE_CTRL_DEPTH;
+	if (acpi_is_pnp_device(device))
+		pnpacpi_add_device(device);
 	return AE_OK;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 4c007262e891..0b9927f4edd2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -184,6 +184,8 @@ extern int ec_transaction(u8 command,
                           u8 *rdata, unsigned rdata_len);
 extern acpi_handle ec_get_handle(void);
 
+extern bool acpi_is_pnp_device(struct acpi_device *);
+
 #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE)
 
 typedef void (*wmi_notify_handler) (u32 value, void *context);
-- 
cgit 


From 2230237500821aedfcf2bba2a79d9cbca389233c Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 30 May 2014 08:06:42 -0600
Subject: blk-mq: blk_mq_tag_to_rq should handle flush request

flush request is special, which borrows the tag from the parent
request. Hence blk_mq_tag_to_rq needs special handling to return
the flush request from the tag.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c      |  4 +++-
 block/blk-mq.c         | 12 +++++++++---
 include/linux/blk-mq.h |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index ef608b35d9be..ff87c664b7df 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -223,8 +223,10 @@ static void flush_end_io(struct request *flush_rq, int error)
 	struct request *rq, *n;
 	unsigned long flags = 0;
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		q->flush_rq->cmd_flags = 0;
+	}
 
 	running = &q->flush_queue[q->flush_running_idx];
 	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6160128085fc..21f952ab3581 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -541,9 +541,15 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
-	return tags->rqs[tag];
+	struct request_queue *q = hctx->queue;
+
+	if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) &&
+	    q->flush_rq->tag == tag)
+		return q->flush_rq;
+
+	return hctx->tags->rqs[tag];
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
@@ -572,7 +578,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
 		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
+		rq = blk_mq_tag_to_rq(hctx, tag++);
 		if (rq->q != hctx->queue)
 			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 95de239444d2..ad3adb73cc70 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -154,7 +154,7 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
-- 
cgit 


From 67aec14ce87fe25bdfff7dbf468556333df11c4e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 30 May 2014 08:25:36 -0600
Subject: blk-mq: make the sysfs mq/ layout reflect current mappings

Currently blk-mq registers all the hardware queues in sysfs,
regardless of whether it uses them (e.g. they have CPU mappings)
or not. The unused hardware queues lack the cpux/ directories,
and the other sysfs entries (like active, pending, etc) are all
zeroes.

Change this so that sysfs correctly reflects the current mappings
of the hardware queues.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-sysfs.c   | 102 ++++++++++++++++++++++++++++++++++++++++---------
 block/blk-mq.c         |   4 ++
 block/blk-mq.h         |   6 +++
 include/linux/blk-mq.h |   1 +
 4 files changed, 94 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 99a60a829e69..e5f575ff0bf9 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -327,6 +327,42 @@ static struct kobj_type blk_mq_hw_ktype = {
 	.release	= blk_mq_sysfs_release,
 };
 
+void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_ctx *ctx;
+	int i;
+
+	if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP))
+		return;
+
+	hctx_for_each_ctx(hctx, ctx, i)
+		kobject_del(&ctx->kobj);
+
+	kobject_del(&hctx->kobj);
+}
+
+int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
+{
+	struct request_queue *q = hctx->queue;
+	struct blk_mq_ctx *ctx;
+	int i, ret;
+
+	if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP))
+		return 0;
+
+	ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
+	if (ret)
+		return ret;
+
+	hctx_for_each_ctx(hctx, ctx, i) {
+		ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
 void blk_mq_unregister_disk(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
@@ -335,11 +371,11 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	int i, j;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		hctx_for_each_ctx(hctx, ctx, j) {
-			kobject_del(&ctx->kobj);
+		blk_mq_unregister_hctx(hctx);
+
+		hctx_for_each_ctx(hctx, ctx, j)
 			kobject_put(&ctx->kobj);
-		}
-		kobject_del(&hctx->kobj);
+
 		kobject_put(&hctx->kobj);
 	}
 
@@ -350,15 +386,30 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	kobject_put(&disk_to_dev(disk)->kobj);
 }
 
+static void blk_mq_sysfs_init(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct blk_mq_ctx *ctx;
+	int i, j;
+
+	kobject_init(&q->mq_kobj, &blk_mq_ktype);
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
+
+		hctx_for_each_ctx(hctx, ctx, j)
+			kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
+	}
+}
+
 int blk_mq_register_disk(struct gendisk *disk)
 {
 	struct device *dev = disk_to_dev(disk);
 	struct request_queue *q = disk->queue;
 	struct blk_mq_hw_ctx *hctx;
-	struct blk_mq_ctx *ctx;
-	int ret, i, j;
+	int ret, i;
 
-	kobject_init(&q->mq_kobj, &blk_mq_ktype);
+	blk_mq_sysfs_init(q);
 
 	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
@@ -367,20 +418,10 @@ int blk_mq_register_disk(struct gendisk *disk)
 	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
-		ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", i);
+		hctx->flags |= BLK_MQ_F_SYSFS_UP;
+		ret = blk_mq_register_hctx(hctx);
 		if (ret)
 			break;
-
-		if (!hctx->nr_ctx)
-			continue;
-
-		hctx_for_each_ctx(hctx, ctx, j) {
-			kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
-			ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
-			if (ret)
-				break;
-		}
 	}
 
 	if (ret) {
@@ -390,3 +431,26 @@ int blk_mq_register_disk(struct gendisk *disk)
 
 	return 0;
 }
+
+void blk_mq_sysfs_unregister(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i)
+		blk_mq_unregister_hctx(hctx);
+}
+
+int blk_mq_sysfs_register(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	int i, ret = 0;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_mq_register_hctx(hctx);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 21f952ab3581..71f564e8812e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1924,6 +1924,8 @@ static void blk_mq_queue_reinit(struct request_queue *q)
 {
 	blk_mq_freeze_queue(q);
 
+	blk_mq_sysfs_unregister(q);
+
 	blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues);
 
 	/*
@@ -1934,6 +1936,8 @@ static void blk_mq_queue_reinit(struct request_queue *q)
 
 	blk_mq_map_swqueue(q);
 
+	blk_mq_sysfs_register(q);
+
 	blk_mq_unfreeze_queue(q);
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ff5e6bf0f691..de7b3bbd5bd6 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -54,6 +54,12 @@ extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set);
 extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues);
 extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int);
 
+/*
+ * sysfs helpers
+ */
+extern int blk_mq_sysfs_register(struct request_queue *q);
+extern void blk_mq_sysfs_unregister(struct request_queue *q);
+
 /*
  * Basic implementation of sparser bitmap, allowing the user to spread
  * the bits over more cachelines.
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ad3adb73cc70..c15128833100 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -130,6 +130,7 @@ enum {
 	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
 	BLK_MQ_F_TAG_SHARED	= 1 << 2,
 	BLK_MQ_F_SG_MERGE	= 1 << 3,
+	BLK_MQ_F_SYSFS_UP	= 1 << 4,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
-- 
cgit 


From c893d133eaccdda2516a3e71cd05a7dac2e14b00 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Fri, 30 May 2014 11:01:03 +0800
Subject: PCI: Make pci_bus_add_device() void

pci_bus_add_device() always returns 0, so there's no point in returning
anything at all.  Make it a void function and remove the tests of the
return value from the callers.

[bhelgaas: changelog, remove unused "err" from i82875p_setup_overfl_dev()]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/edac/i82875p_edac.c         |  8 +-------
 drivers/pci/bus.c                   | 10 ++--------
 drivers/pci/iov.c                   |  2 +-
 drivers/platform/x86/asus-wmi.c     |  3 +--
 drivers/platform/x86/eeepc-laptop.c |  3 +--
 include/linux/pci.h                 |  2 +-
 6 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c
index 8d0450b9b9af..4009077c8839 100644
--- a/drivers/edac/i82875p_edac.c
+++ b/drivers/edac/i82875p_edac.c
@@ -275,7 +275,6 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
 {
 	struct pci_dev *dev;
 	void __iomem *window;
-	int err;
 
 	*ovrfl_pdev = NULL;
 	*ovrfl_window = NULL;
@@ -293,12 +292,7 @@ static int i82875p_setup_overfl_dev(struct pci_dev *pdev,
 		if (dev == NULL)
 			return 1;
 
-		err = pci_bus_add_device(dev);
-		if (err) {
-			i82875p_printk(KERN_ERR,
-				"%s(): pci_bus_add_device() Failed\n",
-				__func__);
-		}
+		pci_bus_add_device(dev);
 		pci_bus_assign_resources(dev->bus);
 	}
 
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index ba2bf55a38df..447d393725e1 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -235,7 +235,7 @@ void __weak pcibios_resource_survey_bus(struct pci_bus *bus) { }
  *
  * This adds add sysfs entries and start device drivers
  */
-int pci_bus_add_device(struct pci_dev *dev)
+void pci_bus_add_device(struct pci_dev *dev)
 {
 	int retval;
 
@@ -252,8 +252,6 @@ int pci_bus_add_device(struct pci_dev *dev)
 	WARN_ON(retval < 0);
 
 	dev->is_added = 1;
-
-	return 0;
 }
 
 /**
@@ -266,16 +264,12 @@ void pci_bus_add_devices(const struct pci_bus *bus)
 {
 	struct pci_dev *dev;
 	struct pci_bus *child;
-	int retval;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		/* Skip already-added devices */
 		if (dev->is_added)
 			continue;
-		retval = pci_bus_add_device(dev);
-		if (retval)
-			dev_err(&dev->dev, "Error adding device (%d)\n",
-				retval);
+		pci_bus_add_device(dev);
 	}
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index de7a74782f92..cb6f24740ee3 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -106,7 +106,7 @@ static int virtfn_add(struct pci_dev *dev, int id, int reset)
 	pci_device_add(virtfn, virtfn->bus);
 	mutex_unlock(&iov->dev->sriov->lock);
 
-	rc = pci_bus_add_device(virtfn);
+	pci_bus_add_device(virtfn);
 	sprintf(buf, "virtfn%u", id);
 	rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf);
 	if (rc)
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index c5e082fb82fa..91ef69a52263 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -642,8 +642,7 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus)
 			dev = pci_scan_single_device(bus, 0);
 			if (dev) {
 				pci_bus_assign_resources(bus);
-				if (pci_bus_add_device(dev))
-					pr_err("Unable to hotplug wifi\n");
+				pci_bus_add_device(dev);
 			}
 		} else {
 			dev = pci_get_slot(bus, 0);
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 399e8c562192..9b0c57cd1d4a 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -633,8 +633,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle)
 			dev = pci_scan_single_device(bus, 0);
 			if (dev) {
 				pci_bus_assign_resources(bus);
-				if (pci_bus_add_device(dev))
-					pr_err("Unable to hotplug wifi\n");
+				pci_bus_add_device(dev);
 			}
 		} else {
 			dev = pci_get_slot(bus, 0);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 322335aaa7e1..785149a6aec1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -764,7 +764,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn);
 struct pci_dev *pci_scan_single_device(struct pci_bus *bus, int devfn);
 void pci_device_add(struct pci_dev *dev, struct pci_bus *bus);
 unsigned int pci_scan_child_bus(struct pci_bus *bus);
-int __must_check pci_bus_add_device(struct pci_dev *dev);
+void pci_bus_add_device(struct pci_dev *dev);
 void pci_read_bridge_bases(struct pci_bus *child);
 struct resource *pci_find_parent_resource(const struct pci_dev *dev,
 					  struct resource *res);
-- 
cgit 


From b13460b92093b29347e99d6c3242e350052b62cd Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Fri, 30 May 2014 11:35:54 -0600
Subject: drivers/vfio: Rework offsetofend()

The macro offsetofend() introduces unnecessary temporary variable
"tmp". The patch avoids that and saves a bit memory in stack.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 81022a52bc34..8ec980b5e3af 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -86,9 +86,8 @@ extern void vfio_unregister_iommu_driver(
  * from user space.  This allows us to easily determine if the provided
  * structure is sized to include various fields.
  */
-#define offsetofend(TYPE, MEMBER) ({				\
-	TYPE tmp;						\
-	offsetof(TYPE, MEMBER) + sizeof(tmp.MEMBER); })		\
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER)	+ sizeof(((TYPE *)0)->MEMBER))
 
 /*
  * External user API
-- 
cgit 


From 3e19ce762b537dd9aeefdd0849ba5f2f01ff83cf Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 25 Feb 2014 17:44:21 -0500
Subject: rpc: xdr_truncate_encode

This will be used in the server side in a few cases:
	- when certain operations (read, readdir, readlink) fail after
	  encoding a partial response.
	- when we run out of space after encoding a partial response.
	- in readlink, where we initially reserve PAGE_SIZE bytes for
	  data, then truncate to the actual size.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/xdr.c           | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 15f9204ee70b..e7bb2e3bd0fb 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,7 @@ typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index dd97ba3c4456..352f3b35bbe5 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -508,6 +508,72 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 }
 EXPORT_SYMBOL_GPL(xdr_reserve_space);
 
+/**
+ * xdr_truncate_encode - truncate an encode buffer
+ * @xdr: pointer to xdr_stream
+ * @len: new length of buffer
+ *
+ * Truncates the xdr stream, so that xdr->buf->len == len,
+ * and xdr->p points at offset len from the start of the buffer, and
+ * head, tail, and page lengths are adjusted to correspond.
+ *
+ * If this means moving xdr->p to a different buffer, we assume that
+ * that the end pointer should be set to the end of the current page,
+ * except in the case of the head buffer when we assume the head
+ * buffer's current length represents the end of the available buffer.
+ *
+ * This is *not* safe to use on a buffer that already has inlined page
+ * cache pages (as in a zero-copy server read reply), except for the
+ * simple case of truncating from one position in the tail to another.
+ *
+ */
+void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
+{
+	struct xdr_buf *buf = xdr->buf;
+	struct kvec *head = buf->head;
+	struct kvec *tail = buf->tail;
+	int fraglen;
+	int new, old;
+
+	if (len > buf->len) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	fraglen = min_t(int, buf->len - len, tail->iov_len);
+	tail->iov_len -= fraglen;
+	buf->len -= fraglen;
+	if (tail->iov_len && buf->len == len) {
+		xdr->p = tail->iov_base + tail->iov_len;
+		/* xdr->end, xdr->iov should be set already */
+		return;
+	}
+	WARN_ON_ONCE(fraglen);
+	fraglen = min_t(int, buf->len - len, buf->page_len);
+	buf->page_len -= fraglen;
+	buf->len -= fraglen;
+
+	new = buf->page_base + buf->page_len;
+	old = new + fraglen;
+	xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
+
+	if (buf->page_len && buf->len == len) {
+		xdr->p = page_address(*xdr->page_ptr);
+		xdr->end = (void *)xdr->p + PAGE_SIZE;
+		xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
+		/* xdr->iov should already be NULL */
+		return;
+	}
+	if (fraglen)
+		xdr->end = head->iov_base + head->iov_len;
+	/* (otherwise assume xdr->end is already set) */
+	head->iov_len = len;
+	buf->len = len;
+	xdr->p = head->iov_base + head->iov_len;
+	xdr->iov = buf->head;
+}
+EXPORT_SYMBOL(xdr_truncate_encode);
+
 /**
  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
  * @xdr: pointer to xdr_stream
-- 
cgit 


From 2825a7f90753012babe7ee292f4a1eadd3706f92 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 26 Aug 2013 16:04:46 -0400
Subject: nfsd4: allow encoding across page boundaries

After this we can handle for example getattr of very large ACLs.

Read, readdir, readlink are still special cases with their own limits.

Also we can't handle a new operation starting close to the end of a
page.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c         |  4 +++
 fs/nfsd/nfs4xdr.c          | 60 +++++++++++++++++++++++++----------
 include/linux/sunrpc/svc.h |  1 +
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/svc_xprt.c      |  1 +
 net/sunrpc/xdr.c           | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 126 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3ce431b9b577..5d8f9158701d 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1264,6 +1264,10 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
 	xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
 	/* Tail and page_len should be zero at this point: */
 	buf->len = buf->head[0].iov_len;
+	xdr->scratch.iov_len = 0;
+	xdr->page_ptr = buf->pages;
+	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+		- 2 * RPC_MAX_AUTH_SIZE;
 }
 
 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index bd529e523087..d3a576dbd99b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1624,6 +1624,7 @@ static int nfsd4_max_reply(u32 opnum)
 		 * the head and tail in another page:
 		 */
 		return 2 * PAGE_SIZE;
+	case OP_GETATTR:
 	case OP_READ:
 		return INT_MAX;
 	default:
@@ -2560,21 +2561,31 @@ out_resource:
 	goto out;
 }
 
+static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
+				struct xdr_buf *buf, __be32 *p, int bytes)
+{
+	xdr->scratch.iov_len = 0;
+	memset(buf, 0, sizeof(struct xdr_buf));
+	buf->head[0].iov_base = p;
+	buf->head[0].iov_len = 0;
+	buf->len = 0;
+	xdr->buf = buf;
+	xdr->iov = buf->head;
+	xdr->p = p;
+	xdr->end = (void *)p + bytes;
+	buf->buflen = bytes;
+}
+
 __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
 			struct svc_fh *fhp, struct svc_export *exp,
 			struct dentry *dentry, u32 *bmval,
 			struct svc_rqst *rqstp, int ignore_crossmnt)
 {
-	struct xdr_buf dummy = {
-			.head[0] = {
-				.iov_base = *p,
-			},
-			.buflen = words << 2,
-		};
+	struct xdr_buf dummy;
 	struct xdr_stream xdr;
 	__be32 ret;
 
-	xdr_init_encode(&xdr, &dummy, NULL);
+	svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
 	ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp,
 							ignore_crossmnt);
 	*p = xdr.p;
@@ -3064,8 +3075,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
 
 	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
 	if (!p)
@@ -3075,6 +3084,9 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
+	if (resp->xdr.buf->page_len)
+		return nfserr_resource;
+
 	maxcount = svc_max_payload(resp->rqstp);
 	if (maxcount > read->rd_length)
 		maxcount = read->rd_length;
@@ -3119,6 +3131,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 				- (char *)resp->xdr.buf->head[0].iov_base);
 	resp->xdr.buf->page_len = maxcount;
 	xdr->buf->len += maxcount;
+	xdr->page_ptr += v;
+	xdr->buf->buflen = maxcount + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
 	xdr->iov = xdr->buf->tail;
 
 	/* Use rest of head for padding and remaining ops: */
@@ -3145,6 +3159,11 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 
 	if (nfserr)
 		return nfserr;
+
+	p = xdr_reserve_space(xdr, 4);
+	if (!p)
+		return nfserr_resource;
+
 	if (resp->xdr.buf->page_len)
 		return nfserr_resource;
 	if (!*resp->rqstp->rq_next_page)
@@ -3154,10 +3173,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 
 	maxcount = PAGE_SIZE;
 
-	p = xdr_reserve_space(xdr, 4);
-	if (!p)
-		return nfserr_resource;
-
 	if (xdr->end - xdr->p < 1)
 		return nfserr_resource;
 
@@ -3180,6 +3195,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 				- (char *)resp->xdr.buf->head[0].iov_base;
 	resp->xdr.buf->page_len = maxcount;
 	xdr->buf->len += maxcount;
+	xdr->page_ptr += 1;
+	xdr->buf->buflen -= PAGE_SIZE;
 	xdr->iov = xdr->buf->tail;
 
 	/* Use rest of head for padding and remaining ops: */
@@ -3206,15 +3223,16 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 
 	if (nfserr)
 		return nfserr;
-	if (resp->xdr.buf->page_len)
-		return nfserr_resource;
-	if (!*resp->rqstp->rq_next_page)
-		return nfserr_resource;
 
 	p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
 	if (!p)
 		return nfserr_resource;
 
+	if (resp->xdr.buf->page_len)
+		return nfserr_resource;
+	if (!*resp->rqstp->rq_next_page)
+		return nfserr_resource;
+
 	/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
 	WRITE32(0);
 	WRITE32(0);
@@ -3266,6 +3284,10 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 
 	xdr->iov = xdr->buf->tail;
 
+	xdr->page_ptr++;
+	xdr->buf->buflen -= PAGE_SIZE;
+	xdr->iov = xdr->buf->tail;
+
 	/* Use rest of head for padding and remaining ops: */
 	resp->xdr.buf->tail[0].iov_base = tailbase;
 	resp->xdr.buf->tail[0].iov_len = 0;
@@ -3800,6 +3822,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 	       !nfsd4_enc_ops[op->opnum]);
 	encoder = nfsd4_enc_ops[op->opnum];
 	op->status = encoder(resp, op->status, &op->u);
+	xdr_commit_encode(xdr);
+
 	/* nfsd4_check_resp_size guarantees enough room for error status */
 	if (!op->status) {
 		int space_needed = 0;
@@ -3919,6 +3943,8 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
 				 buf->tail[0].iov_len);
 
+	rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
 	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
 	memcpy(p, resp->tag, resp->taglen);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a0dbbd1e00e9..85cb6472a423 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -244,6 +244,7 @@ struct svc_rqst {
 	struct page *		rq_pages[RPCSVC_MAXPAGES];
 	struct page *		*rq_respages;	/* points into rq_pages */
 	struct page *		*rq_next_page; /* next reply page to use */
+	struct page *		*rq_page_end;  /* one past the last page */
 
 	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
 
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index e7bb2e3bd0fb..b23d69ffd5ec 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,7 @@ typedef int	(*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
 
 extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 29772e01b179..b4737fbdec13 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 			}
 			rqstp->rq_pages[i] = p;
 		}
+	rqstp->rq_page_end = &rqstp->rq_pages[i];
 	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
 
 	/* Make arg->head point to first page and arg->pages point to rest */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 352f3b35bbe5..2b546e8ce43d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 	struct kvec *iov = buf->head;
 	int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
 
+	xdr_set_scratch_buffer(xdr, NULL, 0);
 	BUG_ON(scratch_len < 0);
 	xdr->buf = buf;
 	xdr->iov = iov;
@@ -481,6 +482,74 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 }
 EXPORT_SYMBOL_GPL(xdr_init_encode);
 
+/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it.  But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+	int shift = xdr->scratch.iov_len;
+	void *page;
+
+	if (shift == 0)
+		return;
+	page = page_address(*xdr->page_ptr);
+	memcpy(xdr->scratch.iov_base, page, shift);
+	memmove(page, page + shift, (void *)xdr->p - page);
+	xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+	static __be32 *p;
+	int space_left;
+	int frag1bytes, frag2bytes;
+
+	if (nbytes > PAGE_SIZE)
+		return NULL; /* Bigger buffers require special handling */
+	if (xdr->buf->len + nbytes > xdr->buf->buflen)
+		return NULL; /* Sorry, we're totally out of space */
+	frag1bytes = (xdr->end - xdr->p) << 2;
+	frag2bytes = nbytes - frag1bytes;
+	if (xdr->iov)
+		xdr->iov->iov_len += frag1bytes;
+	else {
+		xdr->buf->page_len += frag1bytes;
+		xdr->page_ptr++;
+	}
+	xdr->iov = NULL;
+	/*
+	 * If the last encode didn't end exactly on a page boundary, the
+	 * next one will straddle boundaries.  Encode into the next
+	 * page, then copy it back later in xdr_commit_encode.  We use
+	 * the "scratch" iov to track any temporarily unused fragment of
+	 * space at the end of the previous buffer:
+	 */
+	xdr->scratch.iov_base = xdr->p;
+	xdr->scratch.iov_len = frag1bytes;
+	p = page_address(*xdr->page_ptr);
+	/*
+	 * Note this is where the next encode will start after we've
+	 * shifted this one back:
+	 */
+	xdr->p = (void *)p + frag2bytes;
+	space_left = xdr->buf->buflen - xdr->buf->len;
+	xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+	xdr->buf->page_len += frag2bytes;
+	xdr->buf->len += nbytes;
+	return p;
+}
+
 /**
  * xdr_reserve_space - Reserve buffer space for sending
  * @xdr: pointer to xdr_stream
@@ -495,14 +564,18 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
 	__be32 *p = xdr->p;
 	__be32 *q;
 
+	xdr_commit_encode(xdr);
 	/* align nbytes on the next 32-bit boundary */
 	nbytes += 3;
 	nbytes &= ~3;
 	q = p + (nbytes >> 2);
 	if (unlikely(q > xdr->end || q < p))
-		return NULL;
+		return xdr_get_next_encode_buffer(xdr, nbytes);
 	xdr->p = q;
-	xdr->iov->iov_len += nbytes;
+	if (xdr->iov)
+		xdr->iov->iov_len += nbytes;
+	else
+		xdr->buf->page_len += nbytes;
 	xdr->buf->len += nbytes;
 	return p;
 }
@@ -539,6 +612,7 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
 		WARN_ON_ONCE(1);
 		return;
 	}
+	xdr_commit_encode(xdr);
 
 	fraglen = min_t(int, buf->len - len, tail->iov_len);
 	tail->iov_len -= fraglen;
-- 
cgit 


From db3f58a95beea6752d90fed03f9f198d282a3913 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Thu, 6 Mar 2014 13:22:18 -0500
Subject: rpc: define xdr_restrict_buflen

With this xdr_reserve_space can help us enforce various limits.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/xdr.h |  1 +
 net/sunrpc/xdr.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b23d69ffd5ec..70c6b92e15a7 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -217,6 +217,7 @@ extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32
 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_commit_encode(struct xdr_stream *xdr);
 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
+extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
 		unsigned int base, unsigned int len);
 extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 2b546e8ce43d..39928444c7fb 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -648,6 +648,35 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
 }
 EXPORT_SYMBOL(xdr_truncate_encode);
 
+/**
+ * xdr_restrict_buflen - decrease available buffer space
+ * @xdr: pointer to xdr_stream
+ * @newbuflen: new maximum number of bytes available
+ *
+ * Adjust our idea of how much space is available in the buffer.
+ * If we've already used too much space in the buffer, returns -1.
+ * If the available space is already smaller than newbuflen, returns 0
+ * and does nothing.  Otherwise, adjusts xdr->buf->buflen to newbuflen
+ * and ensures xdr->end is set at most offset newbuflen from the start
+ * of the buffer.
+ */
+int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
+{
+	struct xdr_buf *buf = xdr->buf;
+	int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
+	int end_offset = buf->len + left_in_this_buf;
+
+	if (newbuflen < 0 || newbuflen < buf->len)
+		return -1;
+	if (newbuflen > buf->buflen)
+		return 0;
+	if (newbuflen < end_offset)
+		xdr->end = (void *)xdr->end + newbuflen - end_offset;
+	buf->buflen = newbuflen;
+	return 0;
+}
+EXPORT_SYMBOL(xdr_restrict_buflen);
+
 /**
  * xdr_write_pages - Insert a list of pages into an XDR buffer for sending
  * @xdr: pointer to xdr_stream
-- 
cgit 


From a5cddc885b99458df963a75abbe0b40cbef56c48 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 12 May 2014 18:10:58 -0400
Subject: nfsd4: better reservation of head space for krb5

RPC_MAX_AUTH_SIZE is scattered around several places.  Better to set it
once in the auth code, where this kind of estimate should be made.  And
while we're at it we can leave it zero when we're not using krb5i or
krb5p.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c                |  4 ++--
 fs/nfsd/nfs4state.c               |  2 +-
 fs/nfsd/nfs4xdr.c                 |  5 +++--
 include/linux/sunrpc/svc.h        | 11 +++++------
 net/sunrpc/auth_gss/svcauth_gss.c |  2 ++
 net/sunrpc/svcauth.c              |  2 ++
 6 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2d786b813c7e..16e71d033ea5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1261,13 +1261,13 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
 	xdr->buf = buf;
 	xdr->iov = head;
 	xdr->p   = head->iov_base + head->iov_len;
-	xdr->end = head->iov_base + PAGE_SIZE - 2 * RPC_MAX_AUTH_SIZE;
+	xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
 	/* Tail and page_len should be zero at this point: */
 	buf->len = buf->head[0].iov_len;
 	xdr->scratch.iov_len = 0;
 	xdr->page_ptr = buf->pages;
 	buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
-		- 2 * RPC_MAX_AUTH_SIZE;
+		- rqstp->rq_auth_slack;
 }
 
 /*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 62b882dc48ec..d0a016a502be 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2288,7 +2288,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 			session->se_fchannel.maxresp_sz;
 	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
 				    nfserr_rep_too_big;
-	if (xdr_restrict_buflen(xdr, buflen - 2 * RPC_MAX_AUTH_SIZE))
+	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
 		goto out_put_session;
 	svc_reserve(rqstp, buflen);
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3e347a1caec4..470fe8998c9b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1611,7 +1611,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	DECODE_HEAD;
 	struct nfsd4_op *op;
 	bool cachethis = false;
-	int max_reply = 2 * RPC_MAX_AUTH_SIZE + 8; /* opcnt, status */
+	int auth_slack= argp->rqstp->rq_auth_slack;
+	int max_reply = auth_slack + 8; /* opcnt, status */
 	int readcount = 0;
 	int readbytes = 0;
 	int i;
@@ -1677,7 +1678,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	svc_reserve(argp->rqstp, max_reply + readbytes);
 	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
 
-	if (readcount > 1 || max_reply > PAGE_SIZE - 2*RPC_MAX_AUTH_SIZE)
+	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
 		argp->rqstp->rq_splice_ok = false;
 
 	DECODE_TAIL;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 85cb6472a423..1bc7cd05b22e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -260,7 +260,10 @@ struct svc_rqst {
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
 	void *			rq_auth_data;	/* flavor-specific data */
-
+	int			rq_auth_slack;	/* extra space xdr code
+						 * should leave in head
+						 * for krb5i, krb5p.
+						 */
 	int			rq_reserved;	/* space on socket outq
 						 * reserved for this request
 						 */
@@ -456,11 +459,7 @@ char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
  */
 static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 {
-	int added_space = 0;
-
-	if (rqstp->rq_authop->flavour)
-		added_space = RPC_MAX_AUTH_SIZE;
-	svc_reserve(rqstp, space + added_space);
+	svc_reserve(rqstp, space + rqstp->rq_auth_slack);
 }
 
 #endif /* SUNRPC_SVC_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 0f73f4507746..4ce5eccec1f6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 			if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
 			break;
 		case RPC_GSS_SVC_PRIVACY:
 			/* placeholders for length and seq. number: */
@@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 			if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
 					gc->gc_seq, rsci->mechctx))
 				goto garbage_args;
+			rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
 			break;
 		default:
 			goto auth_err;
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 2af7b0cba43a..79c0f3459b5c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
 	}
 	spin_unlock(&authtab_lock);
 
+	rqstp->rq_auth_slack = 0;
+
 	rqstp->rq_authop = aops;
 	return aops->accept(rqstp, authp);
 }
-- 
cgit 


From c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Sun, 25 May 2014 17:47:26 +0300
Subject: cpumask: Utility function to set n'th cpu - local cpu first

This function sets the n'th cpu - local cpu's first.
For example: in a 16 cores server with even cpu's local, will get the
following values:
cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set
cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set
...
cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set
cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set
cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set
...
cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set

Curently this function will be used by multi queue networking devices to
calculate the irq affinity mask, such that as many local cpu's as
possible will be utilized to handle the mq device irq's.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h |  2 ++
 lib/cpumask.c           | 64 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..3551d667ef9f 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -257,6 +257,8 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
+
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b810b753c607..14049a96f04a 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,4 +163,68 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
 	memblock_free_early(__pa(mask), cpumask_size());
 }
+
+/**
+ * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ *
+ * @i: index number
+ * @numa_node: local numa_node
+ * @dstp: cpumask with the relevant cpu bit set according to the policy
+ *
+ * This function sets the cpumask according to a numa aware policy.
+ * cpumask could be used as an affinity hint for the IRQ related to a
+ * queue. When the policy is to spread queues across cores - local cores
+ * first.
+ *
+ * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * the cpu bit and need to re-call the function.
+ */
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	cpumask_var_t mask;
+	int cpu;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	i %= num_online_cpus();
+
+	if (!cpumask_of_node(numa_node)) {
+		/* Use all online cpu's for non numa aware system */
+		cpumask_copy(mask, cpu_online_mask);
+	} else {
+		int n;
+
+		cpumask_and(mask,
+			    cpumask_of_node(numa_node), cpu_online_mask);
+
+		n = cpumask_weight(mask);
+		if (i >= n) {
+			i -= n;
+
+			/* If index > number of local cpu's, mask out local
+			 * cpu's
+			 */
+			cpumask_andnot(mask, cpu_online_mask, mask);
+		}
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (--i < 0)
+			goto out;
+	}
+
+	ret = -EAGAIN;
+
+out:
+	free_cpumask_var(mask);
+
+	if (!ret)
+		cpumask_set_cpu(cpu, dstp);
+
+	return ret;
+}
+EXPORT_SYMBOL(cpumask_set_cpu_local_first);
+
 #endif
-- 
cgit 


From 70a640d0dae3a9b1b222ce673eb5d92c263ddd61 Mon Sep 17 00:00:00 2001
From: Yuval Atias <yuvala@mellanox.com>
Date: Sun, 25 May 2014 17:47:27 +0300
Subject: net/mlx4_en: Use affinity hint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The “affinity hint” mechanism is used by the user space
daemon, irqbalancer, to indicate a preferred CPU mask for irqs.
Irqbalancer can use this hint to balance the irqs between the
cpus indicated by the mask.

We wish the HCA to preferentially map the IRQs it uses to numa cores
close to it.  To accomplish this, we use cpumask_set_cpu_local_first(), that
sets the affinity hint according the following policy:
First it maps IRQs to “close” numa cores.  If these are exhausted, the
remaining IRQs are mapped to “far” numa cores.

Signed-off-by: Yuval Atias <yuvala@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     |  6 +++++-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 ++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/eq.c        | 13 ++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 +
 include/linux/mlx4/device.h                    |  2 +-
 6 files changed, 50 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 199c7896f081..58b1f239ac2b 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 				 i, j, dev->pdev->bus->name);
 			/* Set IRQ for specific name (per ring) */
 			if (mlx4_assign_eq(dev, name, NULL,
-					   &ibdev->eq_table[eq])) {
+					   &ibdev->eq_table[eq], NULL)) {
 				/* Use legacy (same as mlx4_en driver) */
 				pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
 				ibdev->eq_table[eq] =
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 636963db598a..ea2cd72e5368 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -118,11 +118,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	if (cq->is_tx == RX) {
 		if (mdev->dev->caps.comp_pool) {
 			if (!cq->vector) {
+				struct mlx4_en_rx_ring *ring =
+					priv->rx_ring[cq->ring];
+
 				sprintf(name, "%s-%d", priv->dev->name,
 					cq->ring);
 				/* Set IRQ for specific name (per ring) */
 				if (mlx4_assign_eq(mdev->dev, name, rmap,
-						   &cq->vector)) {
+						   &cq->vector,
+						   ring->affinity_mask)) {
 					cq->vector = (cq->ring + 1 + priv->port)
 					    % mdev->dev->caps.num_comp_vectors;
 					mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 58209bd0c94c..05d135572abc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1526,6 +1526,32 @@ static void mlx4_en_linkstate(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
+static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
+	int numa_node = priv->mdev->dev->numa_node;
+
+	if (numa_node == -1)
+		return;
+
+	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) {
+		en_err(priv, "Failed to allocate core mask\n");
+		return;
+	}
+
+	if (cpumask_set_cpu_local_first(ring_idx, numa_node,
+					ring->affinity_mask)) {
+		en_err(priv, "Failed setting affinity hint\n");
+		free_cpumask_var(ring->affinity_mask);
+		ring->affinity_mask = NULL;
+	}
+}
+
+static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
+{
+	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
+	priv->rx_ring[ring_idx]->affinity_mask = NULL;
+}
 
 int mlx4_en_start_port(struct net_device *dev)
 {
@@ -1567,6 +1593,8 @@ int mlx4_en_start_port(struct net_device *dev)
 
 		mlx4_en_cq_init_lock(cq);
 
+		mlx4_en_init_affinity_hint(priv, i);
+
 		err = mlx4_en_activate_cq(priv, cq, i);
 		if (err) {
 			en_err(priv, "Failed activating Rx CQ\n");
@@ -1847,6 +1875,8 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 			msleep(1);
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 		mlx4_en_deactivate_cq(priv, cq);
+
+		mlx4_en_free_affinity_hint(priv, i);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index d954ec1eac17..f91659e5fa13 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
 EXPORT_SYMBOL(mlx4_test_interrupts);
 
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector)
+		   int *vector, cpumask_var_t cpu_hint_mask)
 {
 
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1411,6 +1411,15 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
 			}
 			mlx4_assign_irq_notifier(priv, dev,
 						 priv->eq_table.eq[vec].irq);
+			if (cpu_hint_mask) {
+				err = irq_set_affinity_hint(
+						priv->eq_table.eq[vec].irq,
+						cpu_hint_mask);
+				if (err) {
+					mlx4_warn(dev, "Failed setting affinity hint\n");
+					/*we dont want to break here*/
+				}
+			}
 
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
@@ -1441,6 +1450,8 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
 			irq_set_affinity_notifier(
 				priv->eq_table.eq[vec].irq,
 				NULL);
+			irq_set_affinity_hint(priv->eq_table.eq[vec].irq,
+					      NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index b5db1bf361dc..0e15295bedd6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -313,6 +313,7 @@ struct mlx4_en_rx_ring {
 	unsigned long csum_ok;
 	unsigned long csum_none;
 	int hwtstamp_rx_filter;
+	cpumask_var_t affinity_mask;
 };
 
 struct mlx4_en_cq {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ca38871a585c..b9b70e00e3c1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector);
+		   int *vector, cpumask_t *cpu_hint_mask);
 void mlx4_release_eq(struct mlx4_dev *dev, int vec);
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev);
-- 
cgit 


From ee39facbf82e73e468c504d2b40e83e2d223c28c Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 1 Jun 2014 21:58:02 -0700
Subject: net: Revert mlx4 cpumask changes.

This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61
("net/mlx4_en: Use affinity hint") and commit
c8865b64b05b2f4eeefd369373e9c8aeb069e7a1 ("cpumask: Utility function
to set n'th cpu - local cpu first") because these changes break
the build when SMP is disabled amongst other things.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 50 +++++++++++++++-------------
 include/linux/cpumask.h              |  2 --
 lib/cpumask.c                        | 64 ------------------------------------
 3 files changed, 28 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 66d4ab703f45..e72918970a58 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1441,30 +1441,32 @@ static int ks8851_probe(struct spi_device *spi)
 		}
 	}
 
-	ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io");
+	ks->vdd_io = devm_regulator_get_optional(&spi->dev, "vdd-io");
 	if (IS_ERR(ks->vdd_io)) {
 		ret = PTR_ERR(ks->vdd_io);
-		goto err_reg_io;
-	}
-
-	ret = regulator_enable(ks->vdd_io);
-	if (ret) {
-		dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n",
-			ret);
-		goto err_reg_io;
+		if (ret == -EPROBE_DEFER)
+			goto err_reg_io;
+	} else {
+		ret = regulator_enable(ks->vdd_io);
+		if (ret) {
+			dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n",
+				ret);
+			goto err_reg_io;
+		}
 	}
 
-	ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd");
+	ks->vdd_reg = devm_regulator_get_optional(&spi->dev, "vdd");
 	if (IS_ERR(ks->vdd_reg)) {
 		ret = PTR_ERR(ks->vdd_reg);
-		goto err_reg;
-	}
-
-	ret = regulator_enable(ks->vdd_reg);
-	if (ret) {
-		dev_err(&spi->dev, "regulator vdd enable fail: %d\n",
-			ret);
-		goto err_reg;
+		if (ret == -EPROBE_DEFER)
+			goto err_reg;
+	} else {
+		ret = regulator_enable(ks->vdd_reg);
+		if (ret) {
+			dev_err(&spi->dev, "regulator vdd enable fail: %d\n",
+				ret);
+			goto err_reg;
+		}
 	}
 
 	if (gpio_is_valid(gpio)) {
@@ -1570,9 +1572,11 @@ err_irq:
 	if (gpio_is_valid(gpio))
 		gpio_set_value(gpio, 0);
 err_id:
-	regulator_disable(ks->vdd_reg);
+	if (!IS_ERR(ks->vdd_reg))
+		regulator_disable(ks->vdd_reg);
 err_reg:
-	regulator_disable(ks->vdd_io);
+	if (!IS_ERR(ks->vdd_io))
+		regulator_disable(ks->vdd_io);
 err_reg_io:
 err_gpio:
 	free_netdev(ndev);
@@ -1590,8 +1594,10 @@ static int ks8851_remove(struct spi_device *spi)
 	free_irq(spi->irq, priv);
 	if (gpio_is_valid(priv->gpio))
 		gpio_set_value(priv->gpio, 0);
-	regulator_disable(priv->vdd_reg);
-	regulator_disable(priv->vdd_io);
+	if (!IS_ERR(priv->vdd_reg))
+		regulator_disable(priv->vdd_reg);
+	if (!IS_ERR(priv->vdd_io))
+		regulator_disable(priv->vdd_io);
 	free_netdev(priv->netdev);
 
 	return 0;
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 3551d667ef9f..d08e4d2a9b92 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -257,8 +257,6 @@ static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
 	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
 }
 
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
-
 /**
  * cpumask_clear_cpu - clear a cpu in a cpumask
  * @cpu: cpu number (< nr_cpu_ids)
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 14049a96f04a..b810b753c607 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,68 +163,4 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
 	memblock_free_early(__pa(mask), cpumask_size());
 }
-
-/**
- * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
- *
- * @i: index number
- * @numa_node: local numa_node
- * @dstp: cpumask with the relevant cpu bit set according to the policy
- *
- * This function sets the cpumask according to a numa aware policy.
- * cpumask could be used as an affinity hint for the IRQ related to a
- * queue. When the policy is to spread queues across cores - local cores
- * first.
- *
- * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
- * the cpu bit and need to re-call the function.
- */
-int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
-{
-	cpumask_var_t mask;
-	int cpu;
-	int ret = 0;
-
-	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
-		return -ENOMEM;
-
-	i %= num_online_cpus();
-
-	if (!cpumask_of_node(numa_node)) {
-		/* Use all online cpu's for non numa aware system */
-		cpumask_copy(mask, cpu_online_mask);
-	} else {
-		int n;
-
-		cpumask_and(mask,
-			    cpumask_of_node(numa_node), cpu_online_mask);
-
-		n = cpumask_weight(mask);
-		if (i >= n) {
-			i -= n;
-
-			/* If index > number of local cpu's, mask out local
-			 * cpu's
-			 */
-			cpumask_andnot(mask, cpu_online_mask, mask);
-		}
-	}
-
-	for_each_cpu(cpu, mask) {
-		if (--i < 0)
-			goto out;
-	}
-
-	ret = -EAGAIN;
-
-out:
-	free_cpumask_var(mask);
-
-	if (!ret)
-		cpumask_set_cpu(cpu, dstp);
-
-	return ret;
-}
-EXPORT_SYMBOL(cpumask_set_cpu_local_first);
-
 #endif
-- 
cgit 


From 3480593131e0b781287dae0139bf7ccee7cba7ff Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 29 May 2014 10:22:50 +0200
Subject: net: filter: get rid of BPF_S_* enum

This patch finally allows us to get rid of the BPF_S_* enum.
Currently, the code performs unnecessary encode and decode
workarounds in seccomp and filter migration itself when a filter
is being attached in order to overcome BPF_S_* encoding which
is not used anymore by the new interpreter resp. JIT compilers.

Keeping it around would mean that also in future we would need
to extend and maintain this enum and related encoders/decoders.
We can get rid of all that and save us these operations during
filter attaching. Naturally, also JIT compilers need to be updated
by this.

Before JIT conversion is being done, each compiler checks if A
is being loaded at startup to obtain information if it needs to
emit instructions to clear A first. Since BPF extensions are a
subset of BPF_LD | BPF_{W,H,B} | BPF_ABS variants, case statements
for extensions can be removed at that point. To ease and minimalize
code changes in the classic JITs, we have introduced bpf_anc_helper().

Tested with test_bpf on x86_64 (JIT, int), s390x (JIT, int),
arm (JIT, int), i368 (int), ppc64 (JIT, int); for sparc we
unfortunately didn't have access, but changes are analogous to
the rest.

Joint work with Alexei Starovoitov.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mircea Gherzan <mgherzan@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Chema Gonzalez <chemag@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       | 139 ++++++++--------
 arch/powerpc/net/bpf_jit_64.S   |   2 +-
 arch/powerpc/net/bpf_jit_comp.c | 157 +++++++++---------
 arch/s390/net/bpf_jit_comp.c    | 163 +++++++++----------
 arch/sparc/net/bpf_jit_comp.c   | 154 +++++++++---------
 include/linux/filter.h          | 108 +++++--------
 kernel/seccomp.c                |  83 +++++-----
 net/core/filter.c               | 341 +++++++++++++++-------------------------
 8 files changed, 498 insertions(+), 649 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 6f879c319a9d..fb5503ce016f 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -136,7 +136,7 @@ static u16 saved_regs(struct jit_ctx *ctx)
 	u16 ret = 0;
 
 	if ((ctx->skf->len > 1) ||
-	    (ctx->skf->insns[0].code == BPF_S_RET_A))
+	    (ctx->skf->insns[0].code == (BPF_RET | BPF_A)))
 		ret |= 1 << r_A;
 
 #ifdef CONFIG_FRAME_POINTER
@@ -164,18 +164,10 @@ static inline int mem_words_used(struct jit_ctx *ctx)
 static inline bool is_load_to_a(u16 inst)
 {
 	switch (inst) {
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_QUEUE:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		return true;
 	default:
 		return false;
@@ -215,7 +207,7 @@ static void build_prologue(struct jit_ctx *ctx)
 		emit(ARM_MOV_I(r_X, 0), ctx);
 
 	/* do not leak kernel data to userspace */
-	if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst)))
+	if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst)))
 		emit(ARM_MOV_I(r_A, 0), ctx);
 
 	/* stack space for the BPF_MEM words */
@@ -480,36 +472,39 @@ static int build_body(struct jit_ctx *ctx)
 	u32 k;
 
 	for (i = 0; i < prog->len; i++) {
+		u16 code;
+
 		inst = &(prog->insns[i]);
 		/* K as an immediate value operand */
 		k = inst->k;
+		code = bpf_anc_helper(inst);
 
 		/* compute offsets only in the fake pass */
 		if (ctx->target == NULL)
 			ctx->offsets[i] = ctx->idx * 4;
 
-		switch (inst->code) {
-		case BPF_S_LD_IMM:
+		switch (code) {
+		case BPF_LD | BPF_IMM:
 			emit_mov_i(r_A, k, ctx);
 			break;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			emit(ARM_LDR_I(r_A, r_skb,
 				       offsetof(struct sk_buff, len)), ctx);
 			break;
-		case BPF_S_LD_MEM:
+		case BPF_LD | BPF_MEM:
 			/* A = scratch[k] */
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_LDR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			load_order = 2;
 			goto load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			load_order = 1;
 			goto load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			load_order = 0;
 load:
 			/* the interpreter will deal with the negative K */
@@ -552,31 +547,31 @@ load_common:
 			emit_err_ret(ARM_COND_NE, ctx);
 			emit(ARM_MOV_R(r_A, ARM_R0), ctx);
 			break;
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			load_order = 2;
 			goto load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			load_order = 1;
 			goto load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			load_order = 0;
 load_ind:
 			OP_IMM3(ARM_ADD, r_off, r_X, k, ctx);
 			goto load_common;
-		case BPF_S_LDX_IMM:
+		case BPF_LDX | BPF_IMM:
 			ctx->seen |= SEEN_X;
 			emit_mov_i(r_X, k, ctx);
 			break;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			ctx->seen |= SEEN_X | SEEN_SKB;
 			emit(ARM_LDR_I(r_X, r_skb,
 				       offsetof(struct sk_buff, len)), ctx);
 			break;
-		case BPF_S_LDX_MEM:
+		case BPF_LDX | BPF_MEM:
 			ctx->seen |= SEEN_X | SEEN_MEM_WORD(k);
 			emit(ARM_LDR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			/* x = ((*(frame + k)) & 0xf) << 2; */
 			ctx->seen |= SEEN_X | SEEN_DATA | SEEN_CALL;
 			/* the interpreter should deal with the negative K */
@@ -606,113 +601,113 @@ load_ind:
 			emit(ARM_AND_I(r_X, ARM_R0, 0x00f), ctx);
 			emit(ARM_LSL_I(r_X, r_X, 2), ctx);
 			break;
-		case BPF_S_ST:
+		case BPF_ST:
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_STR_I(r_A, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_STX:
+		case BPF_STX:
 			update_on_xread(ctx);
 			ctx->seen |= SEEN_MEM_WORD(k);
 			emit(ARM_STR_I(r_X, ARM_SP, SCRATCH_OFF(k)), ctx);
 			break;
-		case BPF_S_ALU_ADD_K:
+		case BPF_ALU | BPF_ADD | BPF_K:
 			/* A += K */
 			OP_IMM3(ARM_ADD, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_ADD_X:
+		case BPF_ALU | BPF_ADD | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_ADD_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_SUB_K:
+		case BPF_ALU | BPF_SUB | BPF_K:
 			/* A -= K */
 			OP_IMM3(ARM_SUB, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_SUB_X:
+		case BPF_ALU | BPF_SUB | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_SUB_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_MUL_K:
+		case BPF_ALU | BPF_MUL | BPF_K:
 			/* A *= K */
 			emit_mov_i(r_scratch, k, ctx);
 			emit(ARM_MUL(r_A, r_A, r_scratch), ctx);
 			break;
-		case BPF_S_ALU_MUL_X:
+		case BPF_ALU | BPF_MUL | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_MUL(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_DIV_K:
+		case BPF_ALU | BPF_DIV | BPF_K:
 			if (k == 1)
 				break;
 			emit_mov_i(r_scratch, k, ctx);
 			emit_udiv(r_A, r_A, r_scratch, ctx);
 			break;
-		case BPF_S_ALU_DIV_X:
+		case BPF_ALU | BPF_DIV | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_CMP_I(r_X, 0), ctx);
 			emit_err_ret(ARM_COND_EQ, ctx);
 			emit_udiv(r_A, r_A, r_X, ctx);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			/* A |= K */
 			OP_IMM3(ARM_ORR, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_ORR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_XOR_K:
+		case BPF_ALU | BPF_XOR | BPF_K:
 			/* A ^= K; */
 			OP_IMM3(ARM_EOR, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X:
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X:
 			/* A ^= X */
 			update_on_xread(ctx);
 			emit(ARM_EOR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			/* A &= K */
 			OP_IMM3(ARM_AND, r_A, r_A, k, ctx);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_AND_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
 			emit(ARM_LSL_I(r_A, r_A, k), ctx);
 			break;
-		case BPF_S_ALU_LSH_X:
+		case BPF_ALU | BPF_LSH | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_LSL_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_RSH_K:
+		case BPF_ALU | BPF_RSH | BPF_K:
 			if (unlikely(k > 31))
 				return -1;
 			emit(ARM_LSR_I(r_A, r_A, k), ctx);
 			break;
-		case BPF_S_ALU_RSH_X:
+		case BPF_ALU | BPF_RSH | BPF_X:
 			update_on_xread(ctx);
 			emit(ARM_LSR_R(r_A, r_A, r_X), ctx);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			/* A = -A */
 			emit(ARM_RSB_I(r_A, r_A, 0), ctx);
 			break;
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			/* pc += K */
 			emit(ARM_B(b_imm(i + k + 1, ctx)), ctx);
 			break;
-		case BPF_S_JMP_JEQ_K:
+		case BPF_JMP | BPF_JEQ | BPF_K:
 			/* pc += (A == K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_EQ;
 			goto cmp_imm;
-		case BPF_S_JMP_JGT_K:
+		case BPF_JMP | BPF_JGT | BPF_K:
 			/* pc += (A > K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_HI;
 			goto cmp_imm;
-		case BPF_S_JMP_JGE_K:
+		case BPF_JMP | BPF_JGE | BPF_K:
 			/* pc += (A >= K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_HS;
 cmp_imm:
@@ -731,22 +726,22 @@ cond_jump:
 				_emit(condt ^ 1, ARM_B(b_imm(i + inst->jf + 1,
 							     ctx)), ctx);
 			break;
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			/* pc += (A == X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_EQ;
 			goto cmp_x;
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			/* pc += (A > X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_HI;
 			goto cmp_x;
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			/* pc += (A >= X) ? pc->jt : pc->jf */
 			condt   = ARM_COND_CS;
 cmp_x:
 			update_on_xread(ctx);
 			emit(ARM_CMP_R(r_A, r_X), ctx);
 			goto cond_jump;
-		case BPF_S_JMP_JSET_K:
+		case BPF_JMP | BPF_JSET | BPF_K:
 			/* pc += (A & K) ? pc->jt : pc->jf */
 			condt  = ARM_COND_NE;
 			/* not set iff all zeroes iff Z==1 iff EQ */
@@ -759,16 +754,16 @@ cmp_x:
 				emit(ARM_TST_I(r_A, imm12), ctx);
 			}
 			goto cond_jump;
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			/* pc += (A & X) ? pc->jt : pc->jf */
 			update_on_xread(ctx);
 			condt  = ARM_COND_NE;
 			emit(ARM_TST_R(r_A, r_X), ctx);
 			goto cond_jump;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			emit(ARM_MOV_R(ARM_R0, r_A), ctx);
 			goto b_epilogue;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			if ((k == 0) && (ctx->ret0_fp_idx < 0))
 				ctx->ret0_fp_idx = i;
 			emit_mov_i(ARM_R0, k, ctx);
@@ -776,17 +771,17 @@ b_epilogue:
 			if (i != ctx->skf->len - 1)
 				emit(ARM_B(b_imm(prog->len, ctx)), ctx);
 			break;
-		case BPF_S_MISC_TAX:
+		case BPF_MISC | BPF_TAX:
 			/* X = A */
 			ctx->seen |= SEEN_X;
 			emit(ARM_MOV_R(r_X, r_A), ctx);
 			break;
-		case BPF_S_MISC_TXA:
+		case BPF_MISC | BPF_TXA:
 			/* A = X */
 			update_on_xread(ctx);
 			emit(ARM_MOV_R(r_A, r_X), ctx);
 			break;
-		case BPF_S_ANC_PROTOCOL:
+		case BPF_ANC | SKF_AD_PROTOCOL:
 			/* A = ntohs(skb->protocol) */
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
@@ -795,7 +790,7 @@ b_epilogue:
 			emit(ARM_LDRH_I(r_scratch, r_skb, off), ctx);
 			emit_swap16(r_A, r_scratch, ctx);
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 			/* r_scratch = current_thread_info() */
 			OP_IMM3(ARM_BIC, r_scratch, ARM_SP, THREAD_SIZE - 1, ctx);
 			/* A = current_thread_info()->cpu */
@@ -803,7 +798,7 @@ b_epilogue:
 			off = offsetof(struct thread_info, cpu);
 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			/* A = skb->dev->ifindex */
 			ctx->seen |= SEEN_SKB;
 			off = offsetof(struct sk_buff, dev);
@@ -817,30 +812,30 @@ b_epilogue:
 			off = offsetof(struct net_device, ifindex);
 			emit(ARM_LDR_I(r_A, r_scratch, off), ctx);
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			off = offsetof(struct sk_buff, mark);
 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			off = offsetof(struct sk_buff, hash);
 			emit(ARM_LDR_I(r_A, r_skb, off), ctx);
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 			off = offsetof(struct sk_buff, vlan_tci);
 			emit(ARM_LDRH_I(r_A, r_skb, off), ctx);
-			if (inst->code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_VID_MASK, ctx);
 			else
 				OP_IMM3(ARM_AND, r_A, r_A, VLAN_TAG_PRESENT, ctx);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			ctx->seen |= SEEN_SKB;
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index e76eba74d9da..8f87d9217122 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -78,7 +78,7 @@ sk_load_byte_positive_offset:
 	blr
 
 /*
- * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
+ * BPF_LDX | BPF_B | BPF_MSH: ldxb  4*([offset]&0xf)
  * r_addr is the offset value
  */
 	.globl sk_load_byte_msh
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 808ce1cae21a..6dcdadefd8d0 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -79,19 +79,11 @@ static void bpf_jit_build_prologue(struct sk_filter *fp, u32 *image,
 	}
 
 	switch (filter[0].code) {
-	case BPF_S_RET_K:
-	case BPF_S_LD_W_LEN:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_QUEUE:
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
+	case BPF_RET | BPF_K:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
 		/* first instruction sets A register (or is RET 'constant') */
 		break;
 	default:
@@ -144,6 +136,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 
 	for (i = 0; i < flen; i++) {
 		unsigned int K = filter[i].k;
+		u16 code = bpf_anc_helper(&filter[i]);
 
 		/*
 		 * addrs[] maps a BPF bytecode address into a real offset from
@@ -151,35 +144,35 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 		 */
 		addrs[i] = ctx->idx * 4;
 
-		switch (filter[i].code) {
+		switch (code) {
 			/*** ALU ops ***/
-		case BPF_S_ALU_ADD_X: /* A += X; */
+		case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_ADD(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_ADD_K: /* A += K; */
+		case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
 			if (!K)
 				break;
 			PPC_ADDI(r_A, r_A, IMM_L(K));
 			if (K >= 32768)
 				PPC_ADDIS(r_A, r_A, IMM_HA(K));
 			break;
-		case BPF_S_ALU_SUB_X: /* A -= X; */
+		case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SUB(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_SUB_K: /* A -= K */
+		case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 			if (!K)
 				break;
 			PPC_ADDI(r_A, r_A, IMM_L(-K));
 			if (K >= 32768)
 				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
 			break;
-		case BPF_S_ALU_MUL_X: /* A *= X; */
+		case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_MUL(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_MUL_K: /* A *= K */
+		case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 			if (K < 32768)
 				PPC_MULI(r_A, r_A, K);
 			else {
@@ -187,7 +180,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				PPC_MUL(r_A, r_A, r_scratch1);
 			}
 			break;
-		case BPF_S_ALU_MOD_X: /* A %= X; */
+		case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -201,13 +194,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			PPC_MUL(r_scratch1, r_X, r_scratch1);
 			PPC_SUB(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_MOD_K: /* A %= K; */
+		case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
 			PPC_LI32(r_scratch2, K);
 			PPC_DIVWU(r_scratch1, r_A, r_scratch2);
 			PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
 			PPC_SUB(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_DIV_X: /* A /= X; */
+		case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
 			if (ctx->pc_ret0 != -1) {
@@ -223,17 +216,17 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			}
 			PPC_DIVWU(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_DIV_K: /* A /= K */
+		case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 			if (K == 1)
 				break;
 			PPC_LI32(r_scratch1, K);
 			PPC_DIVWU(r_A, r_A, r_scratch1);
 			break;
-		case BPF_S_ALU_AND_X:
+		case BPF_ALU | BPF_AND | BPF_X:
 			ctx->seen |= SEEN_XREG;
 			PPC_AND(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_AND_K:
+		case BPF_ALU | BPF_AND | BPF_K:
 			if (!IMM_H(K))
 				PPC_ANDI(r_A, r_A, K);
 			else {
@@ -241,51 +234,51 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				PPC_AND(r_A, r_A, r_scratch1);
 			}
 			break;
-		case BPF_S_ALU_OR_X:
+		case BPF_ALU | BPF_OR | BPF_X:
 			ctx->seen |= SEEN_XREG;
 			PPC_OR(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_OR_K:
+		case BPF_ALU | BPF_OR | BPF_K:
 			if (IMM_L(K))
 				PPC_ORI(r_A, r_A, IMM_L(K));
 			if (K >= 65536)
 				PPC_ORIS(r_A, r_A, IMM_H(K));
 			break;
-		case BPF_S_ANC_ALU_XOR_X:
-		case BPF_S_ALU_XOR_X: /* A ^= X */
+		case BPF_ANC | SKF_AD_ALU_XOR_X:
+		case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
 			ctx->seen |= SEEN_XREG;
 			PPC_XOR(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_XOR_K: /* A ^= K */
+		case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 			if (IMM_L(K))
 				PPC_XORI(r_A, r_A, IMM_L(K));
 			if (K >= 65536)
 				PPC_XORIS(r_A, r_A, IMM_H(K));
 			break;
-		case BPF_S_ALU_LSH_X: /* A <<= X; */
+		case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SLW(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_LSH_K:
+		case BPF_ALU | BPF_LSH | BPF_K:
 			if (K == 0)
 				break;
 			else
 				PPC_SLWI(r_A, r_A, K);
 			break;
-		case BPF_S_ALU_RSH_X: /* A >>= X; */
+		case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_SRW(r_A, r_A, r_X);
 			break;
-		case BPF_S_ALU_RSH_K: /* A >>= K; */
+		case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 			if (K == 0)
 				break;
 			else
 				PPC_SRWI(r_A, r_A, K);
 			break;
-		case BPF_S_ALU_NEG:
+		case BPF_ALU | BPF_NEG:
 			PPC_NEG(r_A, r_A);
 			break;
-		case BPF_S_RET_K:
+		case BPF_RET | BPF_K:
 			PPC_LI32(r_ret, K);
 			if (!K) {
 				if (ctx->pc_ret0 == -1)
@@ -312,7 +305,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 					PPC_BLR();
 			}
 			break;
-		case BPF_S_RET_A:
+		case BPF_RET | BPF_A:
 			PPC_MR(r_ret, r_A);
 			if (i != flen - 1) {
 				if (ctx->seen)
@@ -321,53 +314,53 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 					PPC_BLR();
 			}
 			break;
-		case BPF_S_MISC_TAX: /* X = A */
+		case BPF_MISC | BPF_TAX: /* X = A */
 			PPC_MR(r_X, r_A);
 			break;
-		case BPF_S_MISC_TXA: /* A = X */
+		case BPF_MISC | BPF_TXA: /* A = X */
 			ctx->seen |= SEEN_XREG;
 			PPC_MR(r_A, r_X);
 			break;
 
 			/*** Constant loads/M[] access ***/
-		case BPF_S_LD_IMM: /* A = K */
+		case BPF_LD | BPF_IMM: /* A = K */
 			PPC_LI32(r_A, K);
 			break;
-		case BPF_S_LDX_IMM: /* X = K */
+		case BPF_LDX | BPF_IMM: /* X = K */
 			PPC_LI32(r_X, K);
 			break;
-		case BPF_S_LD_MEM: /* A = mem[K] */
+		case BPF_LD | BPF_MEM: /* A = mem[K] */
 			PPC_MR(r_A, r_M + (K & 0xf));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_LDX_MEM: /* X = mem[K] */
+		case BPF_LDX | BPF_MEM: /* X = mem[K] */
 			PPC_MR(r_X, r_M + (K & 0xf));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_ST: /* mem[K] = A */
+		case BPF_ST: /* mem[K] = A */
 			PPC_MR(r_M + (K & 0xf), r_A);
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_STX: /* mem[K] = X */
+		case BPF_STX: /* mem[K] = X */
 			PPC_MR(r_M + (K & 0xf), r_X);
 			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
 			break;
-		case BPF_S_LD_W_LEN: /*	A = skb->len; */
+		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
 			break;
-		case BPF_S_LDX_W_LEN: /* X = skb->len; */
+		case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 			PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
 			break;
 
 			/*** Ancillary info loads ***/
-		case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+		case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  protocol) != 2);
 			PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							    protocol));
 			break;
-		case BPF_S_ANC_IFINDEX:
+		case BPF_ANC | SKF_AD_IFINDEX:
 			PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 								dev));
 			PPC_CMPDI(r_scratch1, 0);
@@ -384,33 +377,33 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			PPC_LWZ_OFFS(r_A, r_scratch1,
 				     offsetof(struct net_device, ifindex));
 			break;
-		case BPF_S_ANC_MARK:
+		case BPF_ANC | SKF_AD_MARK:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  mark));
 			break;
-		case BPF_S_ANC_RXHASH:
+		case BPF_ANC | SKF_AD_RXHASH:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 			PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  hash));
 			break;
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
+		case BPF_ANC | SKF_AD_VLAN_TAG:
+		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  vlan_tci));
-			if (filter[i].code == BPF_S_ANC_VLAN_TAG)
+			if (code == (BPF_ANC | SKF_AD_VLAN_TAG))
 				PPC_ANDI(r_A, r_A, VLAN_VID_MASK);
 			else
 				PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
 			break;
-		case BPF_S_ANC_QUEUE:
+		case BPF_ANC | SKF_AD_QUEUE:
 			BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
 						  queue_mapping) != 2);
 			PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
 							  queue_mapping));
 			break;
-		case BPF_S_ANC_CPU:
+		case BPF_ANC | SKF_AD_CPU:
 #ifdef CONFIG_SMP
 			/*
 			 * PACA ptr is r13:
@@ -426,13 +419,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 			/*** Absolute loads from packet header/data ***/
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
 			goto common_load;
-		case BPF_S_LD_H_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
 			goto common_load;
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
 		common_load:
 			/* Load from [K]. */
@@ -449,13 +442,13 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			break;
 
 			/*** Indirect loads from packet header/data ***/
-		case BPF_S_LD_W_IND:
+		case BPF_LD | BPF_W | BPF_IND:
 			func = sk_load_word;
 			goto common_load_ind;
-		case BPF_S_LD_H_IND:
+		case BPF_LD | BPF_H | BPF_IND:
 			func = sk_load_half;
 			goto common_load_ind;
-		case BPF_S_LD_B_IND:
+		case BPF_LD | BPF_B | BPF_IND:
 			func = sk_load_byte;
 		common_load_ind:
 			/*
@@ -473,31 +466,31 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 			PPC_BCC(COND_LT, exit_addr);
 			break;
 
-		case BPF_S_LDX_B_MSH:
+		case BPF_LDX | BPF_B | BPF_MSH:
 			func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
 			goto common_load;
 			break;
 
 			/*** Jump and branches ***/
-		case BPF_S_JMP_JA:
+		case BPF_JMP | BPF_JA:
 			if (K != 0)
 				PPC_JMP(addrs[i + 1 + K]);
 			break;
 
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
 			true_cond = COND_GT;
 			goto cond_branch;
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
 			true_cond = COND_GE;
 			goto cond_branch;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
 			true_cond = COND_EQ;
 			goto cond_branch;
-		case BPF_S_JMP_JSET_K:
-		case BPF_S_JMP_JSET_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			true_cond = COND_NE;
 			/* Fall through */
 		cond_branch:
@@ -508,20 +501,20 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				break;
 			}
 
-			switch (filter[i].code) {
-			case BPF_S_JMP_JGT_X:
-			case BPF_S_JMP_JGE_X:
-			case BPF_S_JMP_JEQ_X:
+			switch (code) {
+			case BPF_JMP | BPF_JGT | BPF_X:
+			case BPF_JMP | BPF_JGE | BPF_X:
+			case BPF_JMP | BPF_JEQ | BPF_X:
 				ctx->seen |= SEEN_XREG;
 				PPC_CMPLW(r_A, r_X);
 				break;
-			case BPF_S_JMP_JSET_X:
+			case BPF_JMP | BPF_JSET | BPF_X:
 				ctx->seen |= SEEN_XREG;
 				PPC_AND_DOT(r_scratch1, r_A, r_X);
 				break;
-			case BPF_S_JMP_JEQ_K:
-			case BPF_S_JMP_JGT_K:
-			case BPF_S_JMP_JGE_K:
+			case BPF_JMP | BPF_JEQ | BPF_K:
+			case BPF_JMP | BPF_JGT | BPF_K:
+			case BPF_JMP | BPF_JGE | BPF_K:
 				if (K < 32768)
 					PPC_CMPLWI(r_A, K);
 				else {
@@ -529,7 +522,7 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 					PPC_CMPLW(r_A, r_scratch1);
 				}
 				break;
-			case BPF_S_JMP_JSET_K:
+			case BPF_JMP | BPF_JSET | BPF_K:
 				if (K < 32768)
 					/* PPC_ANDI is /only/ dot-form */
 					PPC_ANDI(r_scratch1, r_A, K);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e9f8fa9337fe..a2cbd875543a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -269,27 +269,17 @@ static void bpf_jit_noleaks(struct bpf_jit *jit, struct sock_filter *filter)
 		EMIT4(0xa7c80000);
 	/* Clear A if the first register does not set it. */
 	switch (filter[0].code) {
-	case BPF_S_LD_W_ABS:
-	case BPF_S_LD_H_ABS:
-	case BPF_S_LD_B_ABS:
-	case BPF_S_LD_W_LEN:
-	case BPF_S_LD_W_IND:
-	case BPF_S_LD_H_IND:
-	case BPF_S_LD_B_IND:
-	case BPF_S_LD_IMM:
-	case BPF_S_LD_MEM:
-	case BPF_S_MISC_TXA:
-	case BPF_S_ANC_PROTOCOL:
-	case BPF_S_ANC_PKTTYPE:
-	case BPF_S_ANC_IFINDEX:
-	case BPF_S_ANC_MARK:
-	case BPF_S_ANC_QUEUE:
-	case BPF_S_ANC_HATYPE:
-	case BPF_S_ANC_RXHASH:
-	case BPF_S_ANC_CPU:
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
-	case BPF_S_RET_K:
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+	case BPF_LD | BPF_W | BPF_LEN:
+	case BPF_LD | BPF_W | BPF_IND:
+	case BPF_LD | BPF_H | BPF_IND:
+	case BPF_LD | BPF_B | BPF_IND:
+	case BPF_LD | BPF_IMM:
+	case BPF_LD | BPF_MEM:
+	case BPF_MISC | BPF_TXA:
+	case BPF_RET | BPF_K:
 		/* first instruction sets A register */
 		break;
 	default: /* A = 0 */
@@ -304,15 +294,18 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 	unsigned int K;
 	int offset;
 	unsigned int mask;
+	u16 code;
 
 	K = filter->k;
-	switch (filter->code) {
-	case BPF_S_ALU_ADD_X: /* A += X */
+	code = bpf_anc_helper(filter);
+
+	switch (code) {
+	case BPF_ALU | BPF_ADD | BPF_X: /* A += X */
 		jit->seen |= SEEN_XREG;
 		/* ar %r5,%r12 */
 		EMIT2(0x1a5c);
 		break;
-	case BPF_S_ALU_ADD_K: /* A += K */
+	case BPF_ALU | BPF_ADD | BPF_K: /* A += K */
 		if (!K)
 			break;
 		if (K <= 16383)
@@ -325,12 +318,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* a %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5a50d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_SUB_X: /* A -= X */
+	case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */
 		jit->seen |= SEEN_XREG;
 		/* sr %r5,%r12 */
 		EMIT2(0x1b5c);
 		break;
-	case BPF_S_ALU_SUB_K: /* A -= K */
+	case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 		if (!K)
 			break;
 		if (K <= 16384)
@@ -343,12 +336,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* s %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5b50d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_MUL_X: /* A *= X */
+	case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */
 		jit->seen |= SEEN_XREG;
 		/* msr %r5,%r12 */
 		EMIT4(0xb252005c);
 		break;
-	case BPF_S_ALU_MUL_K: /* A *= K */
+	case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 		if (K <= 16383)
 			/* mhi %r5,K */
 			EMIT4_IMM(0xa75c0000, K);
@@ -359,7 +352,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* ms %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x7150d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_DIV_X: /* A /= X */
+	case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
 		/* ltr %r12,%r12 */
 		EMIT2(0x12cc);
@@ -370,7 +363,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* dlr %r4,%r12 */
 		EMIT4(0xb997004c);
 		break;
-	case BPF_S_ALU_DIV_K: /* A /= K */
+	case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 		if (K == 1)
 			break;
 		/* lhi %r4,0 */
@@ -378,7 +371,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* dl %r4,<d(K)>(%r13) */
 		EMIT6_DISP(0xe340d000, 0x0097, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_MOD_X: /* A %= X */
+	case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */
 		jit->seen |= SEEN_XREG | SEEN_RET0;
 		/* ltr %r12,%r12 */
 		EMIT2(0x12cc);
@@ -391,7 +384,7 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-	case BPF_S_ALU_MOD_K: /* A %= K */
+	case BPF_ALU | BPF_MOD | BPF_K: /* A %= K */
 		if (K == 1) {
 			/* lhi %r5,0 */
 			EMIT4(0xa7580000);
@@ -404,12 +397,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 		/* lr %r5,%r4 */
 		EMIT2(0x1854);
 		break;
-	case BPF_S_ALU_AND_X: /* A &= X */
+	case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
 		jit->seen |= SEEN_XREG;
 		/* nr %r5,%r12 */
 		EMIT2(0x145c);
 		break;
-	case BPF_S_ALU_AND_K: /* A &= K */
+	case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
 		if (test_facility(21))
 			/* nilf %r5,<K> */
 			EMIT6_IMM(0xc05b0000, K);
@@ -417,12 +410,12 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* n %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5450d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_OR_X: /* A |= X */
+	case BPF_ALU | BPF_OR | BPF_X: /* A |= X */
 		jit->seen |= SEEN_XREG;
 		/* or %r5,%r12 */
 		EMIT2(0x165c);
 		break;
-	case BPF_S_ALU_OR_K: /* A |= K */
+	case BPF_ALU | BPF_OR | BPF_K: /* A |= K */
 		if (test_facility(21))
 			/* oilf %r5,<K> */
 			EMIT6_IMM(0xc05d0000, K);
@@ -430,55 +423,55 @@ static int bpf_jit_insn(struct bpf_jit *jit, struct sock_filter *filter,
 			/* o %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5650d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-	case BPF_S_ALU_XOR_X:
+	case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+	case BPF_ALU | BPF_XOR | BPF_X:
 		jit->seen |= SEEN_XREG;
 		/* xr %r5,%r12 */
 		EMIT2(0x175c);
 		break;
-	case BPF_S_ALU_XOR_K: /* A ^= K */
+	case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 		if (!K)
 			break;
 		/* x %r5,<d(K)>(%r13) */
 		EMIT4_DISP(0x5750d000, EMIT_CONST(K));
 		break;
-	case BPF_S_ALU_LSH_X: /* A <<= X; */
+	case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 		jit->seen |= SEEN_XREG;
 		/* sll %r5,0(%r12) */
 		EMIT4(0x8950c000);
 		break;
-	case BPF_S_ALU_LSH_K: /* A <<= K */
+	case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
 		if (K == 0)
 			break;
 		/* sll %r5,K */
 		EMIT4_DISP(0x89500000, K);
 		break;
-	case BPF_S_ALU_RSH_X: /* A >>= X; */
+	case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 		jit->seen |= SEEN_XREG;
 		/* srl %r5,0(%r12) */
 		EMIT4(0x8850c000);
 		break;
-	case BPF_S_ALU_RSH_K: /* A >>= K; */
+	case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 		if (K == 0)
 			break;
 		/* srl %r5,K */
 		EMIT4_DISP(0x88500000, K);
 		break;
-	case BPF_S_ALU_NEG: /* A = -A */
+	case BPF_ALU | BPF_NEG: /* A = -A */
 		/* lnr %r5,%r5 */
 		EMIT2(0x1155);
 		break;
-	case BPF_S_JMP_JA: /* ip += K */
+	case BPF_JMP | BPF_JA: /* ip += K */
 		offset = addrs[i + K] + jit->start - jit->prg;
 		EMIT4_PCREL(0xa7f40000, offset);
 		break;
-	case BPF_S_JMP_JGT_K: /* ip += (A > K) ? jt : jf */
+	case BPF_JMP | BPF_JGT | BPF_K: /* ip += (A > K) ? jt : jf */
 		mask = 0x200000; /* jh */
 		goto kbranch;
-	case BPF_S_JMP_JGE_K: /* ip += (A >= K) ? jt : jf */
+	case BPF_JMP | BPF_JGE | BPF_K: /* ip += (A >= K) ? jt : jf */
 		mask = 0xa00000; /* jhe */
 		goto kbranch;
-	case BPF_S_JMP_JEQ_K: /* ip += (A == K) ? jt : jf */
+	case BPF_JMP | BPF_JEQ | BPF_K: /* ip += (A == K) ? jt : jf */
 		mask = 0x800000; /* je */
 kbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -511,7 +504,7 @@ branch:		if (filter->jt == filter->jf) {
 			EMIT4_PCREL(0xa7040000 | (mask ^ 0xf00000), offset);
 		}
 		break;
-	case BPF_S_JMP_JSET_K: /* ip += (A & K) ? jt : jf */
+	case BPF_JMP | BPF_JSET | BPF_K: /* ip += (A & K) ? jt : jf */
 		mask = 0x700000; /* jnz */
 		/* Emit test if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -525,13 +518,13 @@ branch:		if (filter->jt == filter->jf) {
 				EMIT4_IMM(0xa7510000, K);
 		}
 		goto branch;
-	case BPF_S_JMP_JGT_X: /* ip += (A > X) ? jt : jf */
+	case BPF_JMP | BPF_JGT | BPF_X: /* ip += (A > X) ? jt : jf */
 		mask = 0x200000; /* jh */
 		goto xbranch;
-	case BPF_S_JMP_JGE_X: /* ip += (A >= X) ? jt : jf */
+	case BPF_JMP | BPF_JGE | BPF_X: /* ip += (A >= X) ? jt : jf */
 		mask = 0xa00000; /* jhe */
 		goto xbranch;
-	case BPF_S_JMP_JEQ_X: /* ip += (A == X) ? jt : jf */
+	case BPF_JMP | BPF_JEQ | BPF_X: /* ip += (A == X) ? jt : jf */
 		mask = 0x800000; /* je */
 xbranch:	/* Emit compare if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -540,7 +533,7 @@ xbranch:	/* Emit compare if the branch targets are different */
 			EMIT2(0x195c);
 		}
 		goto branch;
-	case BPF_S_JMP_JSET_X: /* ip += (A & X) ? jt : jf */
+	case BPF_JMP | BPF_JSET | BPF_X: /* ip += (A & X) ? jt : jf */
 		mask = 0x700000; /* jnz */
 		/* Emit test if the branch targets are different */
 		if (filter->jt != filter->jf) {
@@ -551,15 +544,15 @@ xbranch:	/* Emit compare if the branch targets are different */
 			EMIT2(0x144c);
 		}
 		goto branch;
-	case BPF_S_LD_W_ABS: /* A = *(u32 *) (skb->data+K) */
+	case BPF_LD | BPF_W | BPF_ABS: /* A = *(u32 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_WORD;
 		offset = jit->off_load_word;
 		goto load_abs;
-	case BPF_S_LD_H_ABS: /* A = *(u16 *) (skb->data+K) */
+	case BPF_LD | BPF_H | BPF_ABS: /* A = *(u16 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_HALF;
 		offset = jit->off_load_half;
 		goto load_abs;
-	case BPF_S_LD_B_ABS: /* A = *(u8 *) (skb->data+K) */
+	case BPF_LD | BPF_B | BPF_ABS: /* A = *(u8 *) (skb->data+K) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_BYTE;
 		offset = jit->off_load_byte;
 load_abs:	if ((int) K < 0)
@@ -573,19 +566,19 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* jnz <ret0> */
 		EMIT4_PCREL(0xa7740000, (jit->ret0_ip - jit->prg));
 		break;
-	case BPF_S_LD_W_IND: /* A = *(u32 *) (skb->data+K+X) */
+	case BPF_LD | BPF_W | BPF_IND: /* A = *(u32 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IWORD;
 		offset = jit->off_load_iword;
 		goto call_fn;
-	case BPF_S_LD_H_IND: /* A = *(u16 *) (skb->data+K+X) */
+	case BPF_LD | BPF_H | BPF_IND: /* A = *(u16 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IHALF;
 		offset = jit->off_load_ihalf;
 		goto call_fn;
-	case BPF_S_LD_B_IND: /* A = *(u8 *) (skb->data+K+X) */
+	case BPF_LD | BPF_B | BPF_IND: /* A = *(u8 *) (skb->data+K+X) */
 		jit->seen |= SEEN_DATAREF | SEEN_RET0 | SEEN_LOAD_IBYTE;
 		offset = jit->off_load_ibyte;
 		goto call_fn;
-	case BPF_S_LDX_B_MSH:
+	case BPF_LDX | BPF_B | BPF_MSH:
 		/* X = (*(u8 *)(skb->data+K) & 0xf) << 2 */
 		jit->seen |= SEEN_RET0;
 		if ((int) K < 0) {
@@ -596,17 +589,17 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		jit->seen |= SEEN_DATAREF | SEEN_LOAD_BMSH;
 		offset = jit->off_load_bmsh;
 		goto call_fn;
-	case BPF_S_LD_W_LEN: /*	A = skb->len; */
+	case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
 		/* l %r5,<d(len)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, len));
 		break;
-	case BPF_S_LDX_W_LEN: /* X = skb->len; */
+	case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
 		jit->seen |= SEEN_XREG;
 		/* l %r12,<d(len)>(%r2) */
 		EMIT4_DISP(0x58c02000, offsetof(struct sk_buff, len));
 		break;
-	case BPF_S_LD_IMM: /* A = K */
+	case BPF_LD | BPF_IMM: /* A = K */
 		if (K <= 16383)
 			/* lhi %r5,K */
 			EMIT4_IMM(0xa7580000, K);
@@ -617,7 +610,7 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			/* l %r5,<d(K)>(%r13) */
 			EMIT4_DISP(0x5850d000, EMIT_CONST(K));
 		break;
-	case BPF_S_LDX_IMM: /* X = K */
+	case BPF_LDX | BPF_IMM: /* X = K */
 		jit->seen |= SEEN_XREG;
 		if (K <= 16383)
 			/* lhi %r12,<K> */
@@ -629,29 +622,29 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			/* l %r12,<d(K)>(%r13) */
 			EMIT4_DISP(0x58c0d000, EMIT_CONST(K));
 		break;
-	case BPF_S_LD_MEM: /* A = mem[K] */
+	case BPF_LD | BPF_MEM: /* A = mem[K] */
 		jit->seen |= SEEN_MEM;
 		/* l %r5,<K>(%r15) */
 		EMIT4_DISP(0x5850f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_LDX_MEM: /* X = mem[K] */
+	case BPF_LDX | BPF_MEM: /* X = mem[K] */
 		jit->seen |= SEEN_XREG | SEEN_MEM;
 		/* l %r12,<K>(%r15) */
 		EMIT4_DISP(0x58c0f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_MISC_TAX: /* X = A */
+	case BPF_MISC | BPF_TAX: /* X = A */
 		jit->seen |= SEEN_XREG;
 		/* lr %r12,%r5 */
 		EMIT2(0x18c5);
 		break;
-	case BPF_S_MISC_TXA: /* A = X */
+	case BPF_MISC | BPF_TXA: /* A = X */
 		jit->seen |= SEEN_XREG;
 		/* lr %r5,%r12 */
 		EMIT2(0x185c);
 		break;
-	case BPF_S_RET_K:
+	case BPF_RET | BPF_K:
 		if (K == 0) {
 			jit->seen |= SEEN_RET0;
 			if (last)
@@ -671,33 +664,33 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		}
 		break;
-	case BPF_S_RET_A:
+	case BPF_RET | BPF_A:
 		/* llgfr %r2,%r5 */
 		EMIT4(0xb9160025);
 		/* j <exit> */
 		EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
 		break;
-	case BPF_S_ST: /* mem[K] = A */
+	case BPF_ST: /* mem[K] = A */
 		jit->seen |= SEEN_MEM;
 		/* st %r5,<K>(%r15) */
 		EMIT4_DISP(0x5050f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
+	case BPF_STX: /* mem[K] = X : mov %ebx,off8(%rbp) */
 		jit->seen |= SEEN_XREG | SEEN_MEM;
 		/* st %r12,<K>(%r15) */
 		EMIT4_DISP(0x50c0f000,
 			   (jit->seen & SEEN_DATAREF) ? 160 + K*4 : K*4);
 		break;
-	case BPF_S_ANC_PROTOCOL: /* A = ntohs(skb->protocol); */
+	case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(protocol)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, protocol));
 		break;
-	case BPF_S_ANC_IFINDEX:	/* if (!skb->dev) return 0;
-				 * A = skb->dev->ifindex */
+	case BPF_ANC | SKF_AD_IFINDEX:	/* if (!skb->dev) return 0;
+					 * A = skb->dev->ifindex */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		jit->seen |= SEEN_RET0;
 		/* lg %r1,<d(dev)>(%r2) */
@@ -709,20 +702,20 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* l %r5,<d(ifindex)>(%r1) */
 		EMIT4_DISP(0x58501000, offsetof(struct net_device, ifindex));
 		break;
-	case BPF_S_ANC_MARK: /* A = skb->mark */
+	case BPF_ANC | SKF_AD_MARK: /* A = skb->mark */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
 		/* l %r5,<d(mark)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, mark));
 		break;
-	case BPF_S_ANC_QUEUE: /* A = skb->queue_mapping */
+	case BPF_ANC | SKF_AD_QUEUE: /* A = skb->queue_mapping */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(queue_mapping)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, queue_mapping));
 		break;
-	case BPF_S_ANC_HATYPE:	/* if (!skb->dev) return 0;
-				 * A = skb->dev->type */
+	case BPF_ANC | SKF_AD_HATYPE:	/* if (!skb->dev) return 0;
+					 * A = skb->dev->type */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
 		jit->seen |= SEEN_RET0;
 		/* lg %r1,<d(dev)>(%r2) */
@@ -736,20 +729,20 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* icm	%r5,3,<d(type)>(%r1) */
 		EMIT4_DISP(0xbf531000, offsetof(struct net_device, type));
 		break;
-	case BPF_S_ANC_RXHASH: /* A = skb->hash */
+	case BPF_ANC | SKF_AD_RXHASH: /* A = skb->hash */
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
 		/* l %r5,<d(hash)>(%r2) */
 		EMIT4_DISP(0x58502000, offsetof(struct sk_buff, hash));
 		break;
-	case BPF_S_ANC_VLAN_TAG:
-	case BPF_S_ANC_VLAN_TAG_PRESENT:
+	case BPF_ANC | SKF_AD_VLAN_TAG:
+	case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* icm	%r5,3,<d(vlan_tci)>(%r2) */
 		EMIT4_DISP(0xbf532000, offsetof(struct sk_buff, vlan_tci));
-		if (filter->code == BPF_S_ANC_VLAN_TAG) {
+		if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 			/* nill %r5,0xefff */
 			EMIT4_IMM(0xa5570000, ~VLAN_TAG_PRESENT);
 		} else {
@@ -759,7 +752,7 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 			EMIT4_DISP(0x88500000, 12);
 		}
 		break;
-	case BPF_S_ANC_PKTTYPE:
+	case BPF_ANC | SKF_AD_PKTTYPE:
 		if (pkt_type_offset < 0)
 			goto out;
 		/* lhi %r5,0 */
@@ -769,7 +762,7 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
-	case BPF_S_ANC_CPU: /* A = smp_processor_id() */
+	case BPF_ANC | SKF_AD_CPU: /* A = smp_processor_id() */
 #ifdef CONFIG_SMP
 		/* l %r5,<d(cpu_nr)> */
 		EMIT4_DISP(0x58500000, offsetof(struct _lowcore, cpu_nr));
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index a82c6b2a9780..c88cf147deed 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -415,20 +415,11 @@ void bpf_jit_compile(struct sk_filter *fp)
 		emit_reg_move(O7, r_saved_O7);
 
 		switch (filter[0].code) {
-		case BPF_S_RET_K:
-		case BPF_S_LD_W_LEN:
-		case BPF_S_ANC_PROTOCOL:
-		case BPF_S_ANC_PKTTYPE:
-		case BPF_S_ANC_IFINDEX:
-		case BPF_S_ANC_MARK:
-		case BPF_S_ANC_RXHASH:
-		case BPF_S_ANC_VLAN_TAG:
-		case BPF_S_ANC_VLAN_TAG_PRESENT:
-		case BPF_S_ANC_CPU:
-		case BPF_S_ANC_QUEUE:
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
+		case BPF_RET | BPF_K:
+		case BPF_LD | BPF_W | BPF_LEN:
+		case BPF_LD | BPF_W | BPF_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			/* The first instruction sets the A register (or is
 			 * a "RET 'constant'")
 			 */
@@ -445,59 +436,60 @@ void bpf_jit_compile(struct sk_filter *fp)
 			unsigned int t_offset;
 			unsigned int f_offset;
 			u32 t_op, f_op;
+			u16 code = bpf_anc_helper(&filter[i]);
 			int ilen;
 
-			switch (filter[i].code) {
-			case BPF_S_ALU_ADD_X:	/* A += X; */
+			switch (code) {
+			case BPF_ALU | BPF_ADD | BPF_X:	/* A += X; */
 				emit_alu_X(ADD);
 				break;
-			case BPF_S_ALU_ADD_K:	/* A += K; */
+			case BPF_ALU | BPF_ADD | BPF_K:	/* A += K; */
 				emit_alu_K(ADD, K);
 				break;
-			case BPF_S_ALU_SUB_X:	/* A -= X; */
+			case BPF_ALU | BPF_SUB | BPF_X:	/* A -= X; */
 				emit_alu_X(SUB);
 				break;
-			case BPF_S_ALU_SUB_K:	/* A -= K */
+			case BPF_ALU | BPF_SUB | BPF_K:	/* A -= K */
 				emit_alu_K(SUB, K);
 				break;
-			case BPF_S_ALU_AND_X:	/* A &= X */
+			case BPF_ALU | BPF_AND | BPF_X:	/* A &= X */
 				emit_alu_X(AND);
 				break;
-			case BPF_S_ALU_AND_K:	/* A &= K */
+			case BPF_ALU | BPF_AND | BPF_K:	/* A &= K */
 				emit_alu_K(AND, K);
 				break;
-			case BPF_S_ALU_OR_X:	/* A |= X */
+			case BPF_ALU | BPF_OR | BPF_X:	/* A |= X */
 				emit_alu_X(OR);
 				break;
-			case BPF_S_ALU_OR_K:	/* A |= K */
+			case BPF_ALU | BPF_OR | BPF_K:	/* A |= K */
 				emit_alu_K(OR, K);
 				break;
-			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
-			case BPF_S_ALU_XOR_X:
+			case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
+			case BPF_ALU | BPF_XOR | BPF_X:
 				emit_alu_X(XOR);
 				break;
-			case BPF_S_ALU_XOR_K:	/* A ^= K */
+			case BPF_ALU | BPF_XOR | BPF_K:	/* A ^= K */
 				emit_alu_K(XOR, K);
 				break;
-			case BPF_S_ALU_LSH_X:	/* A <<= X */
+			case BPF_ALU | BPF_LSH | BPF_X:	/* A <<= X */
 				emit_alu_X(SLL);
 				break;
-			case BPF_S_ALU_LSH_K:	/* A <<= K */
+			case BPF_ALU | BPF_LSH | BPF_K:	/* A <<= K */
 				emit_alu_K(SLL, K);
 				break;
-			case BPF_S_ALU_RSH_X:	/* A >>= X */
+			case BPF_ALU | BPF_RSH | BPF_X:	/* A >>= X */
 				emit_alu_X(SRL);
 				break;
-			case BPF_S_ALU_RSH_K:	/* A >>= K */
+			case BPF_ALU | BPF_RSH | BPF_K:	/* A >>= K */
 				emit_alu_K(SRL, K);
 				break;
-			case BPF_S_ALU_MUL_X:	/* A *= X; */
+			case BPF_ALU | BPF_MUL | BPF_X:	/* A *= X; */
 				emit_alu_X(MUL);
 				break;
-			case BPF_S_ALU_MUL_K:	/* A *= K */
+			case BPF_ALU | BPF_MUL | BPF_K:	/* A *= K */
 				emit_alu_K(MUL, K);
 				break;
-			case BPF_S_ALU_DIV_K:	/* A /= K with K != 0*/
+			case BPF_ALU | BPF_DIV | BPF_K:	/* A /= K with K != 0*/
 				if (K == 1)
 					break;
 				emit_write_y(G0);
@@ -512,7 +504,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				emit_alu_K(DIV, K);
 				break;
-			case BPF_S_ALU_DIV_X:	/* A /= X; */
+			case BPF_ALU | BPF_DIV | BPF_X:	/* A /= X; */
 				emit_cmpi(r_X, 0);
 				if (pc_ret0 > 0) {
 					t_offset = addrs[pc_ret0 - 1];
@@ -544,10 +536,10 @@ void bpf_jit_compile(struct sk_filter *fp)
 #endif
 				emit_alu_X(DIV);
 				break;
-			case BPF_S_ALU_NEG:
+			case BPF_ALU | BPF_NEG:
 				emit_neg();
 				break;
-			case BPF_S_RET_K:
+			case BPF_RET | BPF_K:
 				if (!K) {
 					if (pc_ret0 == -1)
 						pc_ret0 = i;
@@ -556,7 +548,7 @@ void bpf_jit_compile(struct sk_filter *fp)
 					emit_loadimm(K, r_A);
 				}
 				/* Fallthrough */
-			case BPF_S_RET_A:
+			case BPF_RET | BPF_A:
 				if (seen_or_pass0) {
 					if (i != flen - 1) {
 						emit_jump(cleanup_addr);
@@ -573,18 +565,18 @@ void bpf_jit_compile(struct sk_filter *fp)
 				emit_jmpl(r_saved_O7, 8, G0);
 				emit_reg_move(r_A, O0); /* delay slot */
 				break;
-			case BPF_S_MISC_TAX:
+			case BPF_MISC | BPF_TAX:
 				seen |= SEEN_XREG;
 				emit_reg_move(r_A, r_X);
 				break;
-			case BPF_S_MISC_TXA:
+			case BPF_MISC | BPF_TXA:
 				seen |= SEEN_XREG;
 				emit_reg_move(r_X, r_A);
 				break;
-			case BPF_S_ANC_CPU:
+			case BPF_ANC | SKF_AD_CPU:
 				emit_load_cpu(r_A);
 				break;
-			case BPF_S_ANC_PROTOCOL:
+			case BPF_ANC | SKF_AD_PROTOCOL:
 				emit_skb_load16(protocol, r_A);
 				break;
 #if 0
@@ -592,38 +584,38 @@ void bpf_jit_compile(struct sk_filter *fp)
 				 * a bit field even though we very much
 				 * know what we are doing here.
 				 */
-			case BPF_S_ANC_PKTTYPE:
+			case BPF_ANC | SKF_AD_PKTTYPE:
 				__emit_skb_load8(pkt_type, r_A);
 				emit_alu_K(SRL, 5);
 				break;
 #endif
-			case BPF_S_ANC_IFINDEX:
+			case BPF_ANC | SKF_AD_IFINDEX:
 				emit_skb_loadptr(dev, r_A);
 				emit_cmpi(r_A, 0);
 				emit_branch(BNE_PTR, cleanup_addr + 4);
 				emit_nop();
 				emit_load32(r_A, struct net_device, ifindex, r_A);
 				break;
-			case BPF_S_ANC_MARK:
+			case BPF_ANC | SKF_AD_MARK:
 				emit_skb_load32(mark, r_A);
 				break;
-			case BPF_S_ANC_QUEUE:
+			case BPF_ANC | SKF_AD_QUEUE:
 				emit_skb_load16(queue_mapping, r_A);
 				break;
-			case BPF_S_ANC_HATYPE:
+			case BPF_ANC | SKF_AD_HATYPE:
 				emit_skb_loadptr(dev, r_A);
 				emit_cmpi(r_A, 0);
 				emit_branch(BNE_PTR, cleanup_addr + 4);
 				emit_nop();
 				emit_load16(r_A, struct net_device, type, r_A);
 				break;
-			case BPF_S_ANC_RXHASH:
+			case BPF_ANC | SKF_AD_RXHASH:
 				emit_skb_load32(hash, r_A);
 				break;
-			case BPF_S_ANC_VLAN_TAG:
-			case BPF_S_ANC_VLAN_TAG_PRESENT:
+			case BPF_ANC | SKF_AD_VLAN_TAG:
+			case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 				emit_skb_load16(vlan_tci, r_A);
-				if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
+				if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
 					emit_andi(r_A, VLAN_VID_MASK, r_A);
 				} else {
 					emit_loadimm(VLAN_TAG_PRESENT, r_TMP);
@@ -631,44 +623,44 @@ void bpf_jit_compile(struct sk_filter *fp)
 				}
 				break;
 
-			case BPF_S_LD_IMM:
+			case BPF_LD | BPF_IMM:
 				emit_loadimm(K, r_A);
 				break;
-			case BPF_S_LDX_IMM:
+			case BPF_LDX | BPF_IMM:
 				emit_loadimm(K, r_X);
 				break;
-			case BPF_S_LD_MEM:
+			case BPF_LD | BPF_MEM:
 				emit_ldmem(K * 4, r_A);
 				break;
-			case BPF_S_LDX_MEM:
+			case BPF_LDX | BPF_MEM:
 				emit_ldmem(K * 4, r_X);
 				break;
-			case BPF_S_ST:
+			case BPF_ST:
 				emit_stmem(K * 4, r_A);
 				break;
-			case BPF_S_STX:
+			case BPF_STX:
 				emit_stmem(K * 4, r_X);
 				break;
 
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-			case BPF_S_LD_W_ABS:
+			case BPF_LD | BPF_W | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
 common_load:			seen |= SEEN_DATAREF;
 				emit_loadimm(K, r_OFF);
 				emit_call(func);
 				break;
-			case BPF_S_LD_H_ABS:
+			case BPF_LD | BPF_H | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
 				goto common_load;
-			case BPF_S_LD_B_ABS:
+			case BPF_LD | BPF_B | BPF_ABS:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
 				goto common_load;
-			case BPF_S_LDX_B_MSH:
+			case BPF_LDX | BPF_B | BPF_MSH:
 				func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
 				goto common_load;
-			case BPF_S_LD_W_IND:
+			case BPF_LD | BPF_W | BPF_IND:
 				func = bpf_jit_load_word;
 common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				if (K) {
@@ -683,13 +675,13 @@ common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 				}
 				emit_call(func);
 				break;
-			case BPF_S_LD_H_IND:
+			case BPF_LD | BPF_H | BPF_IND:
 				func = bpf_jit_load_half;
 				goto common_load_ind;
-			case BPF_S_LD_B_IND:
+			case BPF_LD | BPF_B | BPF_IND:
 				func = bpf_jit_load_byte;
 				goto common_load_ind;
-			case BPF_S_JMP_JA:
+			case BPF_JMP | BPF_JA:
 				emit_jump(addrs[i + K]);
 				emit_nop();
 				break;
@@ -700,14 +692,14 @@ common_load_ind:		seen |= SEEN_DATAREF | SEEN_XREG;
 		f_op = FOP;		\
 		goto cond_branch
 
-			COND_SEL(BPF_S_JMP_JGT_K, BGU, BLEU);
-			COND_SEL(BPF_S_JMP_JGE_K, BGEU, BLU);
-			COND_SEL(BPF_S_JMP_JEQ_K, BE, BNE);
-			COND_SEL(BPF_S_JMP_JSET_K, BNE, BE);
-			COND_SEL(BPF_S_JMP_JGT_X, BGU, BLEU);
-			COND_SEL(BPF_S_JMP_JGE_X, BGEU, BLU);
-			COND_SEL(BPF_S_JMP_JEQ_X, BE, BNE);
-			COND_SEL(BPF_S_JMP_JSET_X, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
+			COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
+			COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
+			COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
+			COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);
 
 cond_branch:			f_offset = addrs[i + filter[i].jf];
 				t_offset = addrs[i + filter[i].jt];
@@ -719,20 +711,20 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 					break;
 				}
 
-				switch (filter[i].code) {
-				case BPF_S_JMP_JGT_X:
-				case BPF_S_JMP_JGE_X:
-				case BPF_S_JMP_JEQ_X:
+				switch (code) {
+				case BPF_JMP | BPF_JGT | BPF_X:
+				case BPF_JMP | BPF_JGE | BPF_X:
+				case BPF_JMP | BPF_JEQ | BPF_X:
 					seen |= SEEN_XREG;
 					emit_cmp(r_A, r_X);
 					break;
-				case BPF_S_JMP_JSET_X:
+				case BPF_JMP | BPF_JSET | BPF_X:
 					seen |= SEEN_XREG;
 					emit_btst(r_A, r_X);
 					break;
-				case BPF_S_JMP_JEQ_K:
-				case BPF_S_JMP_JGT_K:
-				case BPF_S_JMP_JGE_K:
+				case BPF_JMP | BPF_JEQ | BPF_K:
+				case BPF_JMP | BPF_JGT | BPF_K:
+				case BPF_JMP | BPF_JGE | BPF_K:
 					if (is_simm13(K)) {
 						emit_cmpi(r_A, K);
 					} else {
@@ -740,7 +732,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 						emit_cmp(r_A, r_TMP);
 					}
 					break;
-				case BPF_S_JMP_JSET_K:
+				case BPF_JMP | BPF_JSET | BPF_K:
 					if (is_simm13(K)) {
 						emit_btsti(r_A, K);
 					} else {
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 625f4de9bdf2..49ef7a298c92 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -197,7 +197,6 @@ int sk_detach_filter(struct sock *sk);
 int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
 int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 		  unsigned int len);
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
 
 void sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
@@ -205,6 +204,41 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 void bpf_int_jit_compile(struct sk_filter *fp);
 
+#define BPF_ANC		BIT(15)
+
+static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
+{
+	BUG_ON(ftest->code & BPF_ANC);
+
+	switch (ftest->code) {
+	case BPF_LD | BPF_W | BPF_ABS:
+	case BPF_LD | BPF_H | BPF_ABS:
+	case BPF_LD | BPF_B | BPF_ABS:
+#define BPF_ANCILLARY(CODE)	case SKF_AD_OFF + SKF_AD_##CODE:	\
+				return BPF_ANC | SKF_AD_##CODE
+		switch (ftest->k) {
+		BPF_ANCILLARY(PROTOCOL);
+		BPF_ANCILLARY(PKTTYPE);
+		BPF_ANCILLARY(IFINDEX);
+		BPF_ANCILLARY(NLATTR);
+		BPF_ANCILLARY(NLATTR_NEST);
+		BPF_ANCILLARY(MARK);
+		BPF_ANCILLARY(QUEUE);
+		BPF_ANCILLARY(HATYPE);
+		BPF_ANCILLARY(RXHASH);
+		BPF_ANCILLARY(CPU);
+		BPF_ANCILLARY(ALU_XOR_X);
+		BPF_ANCILLARY(VLAN_TAG);
+		BPF_ANCILLARY(VLAN_TAG_PRESENT);
+		BPF_ANCILLARY(PAY_OFFSET);
+		BPF_ANCILLARY(RANDOM);
+		}
+		/* Fallthrough. */
+	default:
+		return ftest->code;
+	}
+}
+
 #ifdef CONFIG_BPF_JIT
 #include <stdarg.h>
 #include <linux/linkage.h>
@@ -224,86 +258,20 @@ static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 }
 #else
 #include <linux/slab.h>
+
 static inline void bpf_jit_compile(struct sk_filter *fp)
 {
 }
+
 static inline void bpf_jit_free(struct sk_filter *fp)
 {
 	kfree(fp);
 }
-#endif
+#endif /* CONFIG_BPF_JIT */
 
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
 }
 
-enum {
-	BPF_S_RET_K = 1,
-	BPF_S_RET_A,
-	BPF_S_ALU_ADD_K,
-	BPF_S_ALU_ADD_X,
-	BPF_S_ALU_SUB_K,
-	BPF_S_ALU_SUB_X,
-	BPF_S_ALU_MUL_K,
-	BPF_S_ALU_MUL_X,
-	BPF_S_ALU_DIV_X,
-	BPF_S_ALU_MOD_K,
-	BPF_S_ALU_MOD_X,
-	BPF_S_ALU_AND_K,
-	BPF_S_ALU_AND_X,
-	BPF_S_ALU_OR_K,
-	BPF_S_ALU_OR_X,
-	BPF_S_ALU_XOR_K,
-	BPF_S_ALU_XOR_X,
-	BPF_S_ALU_LSH_K,
-	BPF_S_ALU_LSH_X,
-	BPF_S_ALU_RSH_K,
-	BPF_S_ALU_RSH_X,
-	BPF_S_ALU_NEG,
-	BPF_S_LD_W_ABS,
-	BPF_S_LD_H_ABS,
-	BPF_S_LD_B_ABS,
-	BPF_S_LD_W_LEN,
-	BPF_S_LD_W_IND,
-	BPF_S_LD_H_IND,
-	BPF_S_LD_B_IND,
-	BPF_S_LD_IMM,
-	BPF_S_LDX_W_LEN,
-	BPF_S_LDX_B_MSH,
-	BPF_S_LDX_IMM,
-	BPF_S_MISC_TAX,
-	BPF_S_MISC_TXA,
-	BPF_S_ALU_DIV_K,
-	BPF_S_LD_MEM,
-	BPF_S_LDX_MEM,
-	BPF_S_ST,
-	BPF_S_STX,
-	BPF_S_JMP_JA,
-	BPF_S_JMP_JEQ_K,
-	BPF_S_JMP_JEQ_X,
-	BPF_S_JMP_JGE_K,
-	BPF_S_JMP_JGE_X,
-	BPF_S_JMP_JGT_K,
-	BPF_S_JMP_JGT_X,
-	BPF_S_JMP_JSET_K,
-	BPF_S_JMP_JSET_X,
-	/* Ancillary data */
-	BPF_S_ANC_PROTOCOL,
-	BPF_S_ANC_PKTTYPE,
-	BPF_S_ANC_IFINDEX,
-	BPF_S_ANC_NLATTR,
-	BPF_S_ANC_NLATTR_NEST,
-	BPF_S_ANC_MARK,
-	BPF_S_ANC_QUEUE,
-	BPF_S_ANC_HATYPE,
-	BPF_S_ANC_RXHASH,
-	BPF_S_ANC_CPU,
-	BPF_S_ANC_ALU_XOR_X,
-	BPF_S_ANC_VLAN_TAG,
-	BPF_S_ANC_VLAN_TAG_PRESENT,
-	BPF_S_ANC_PAY_OFFSET,
-	BPF_S_ANC_RANDOM,
-};
-
 #endif /* __LINUX_FILTER_H__ */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 1036b6f2fded..44e69483b383 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -103,60 +103,59 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
 		u32 k = ftest->k;
 
 		switch (code) {
-		case BPF_S_LD_W_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
 			ftest->code = BPF_LDX | BPF_W | BPF_ABS;
 			/* 32-bit aligned and not out of bounds. */
 			if (k >= sizeof(struct seccomp_data) || k & 3)
 				return -EINVAL;
 			continue;
-		case BPF_S_LD_W_LEN:
+		case BPF_LD | BPF_W | BPF_LEN:
 			ftest->code = BPF_LD | BPF_IMM;
 			ftest->k = sizeof(struct seccomp_data);
 			continue;
-		case BPF_S_LDX_W_LEN:
+		case BPF_LDX | BPF_W | BPF_LEN:
 			ftest->code = BPF_LDX | BPF_IMM;
 			ftest->k = sizeof(struct seccomp_data);
 			continue;
 		/* Explicitly include allowed calls. */
-		case BPF_S_RET_K:
-		case BPF_S_RET_A:
-		case BPF_S_ALU_ADD_K:
-		case BPF_S_ALU_ADD_X:
-		case BPF_S_ALU_SUB_K:
-		case BPF_S_ALU_SUB_X:
-		case BPF_S_ALU_MUL_K:
-		case BPF_S_ALU_MUL_X:
-		case BPF_S_ALU_DIV_X:
-		case BPF_S_ALU_AND_K:
-		case BPF_S_ALU_AND_X:
-		case BPF_S_ALU_OR_K:
-		case BPF_S_ALU_OR_X:
-		case BPF_S_ALU_XOR_K:
-		case BPF_S_ALU_XOR_X:
-		case BPF_S_ALU_LSH_K:
-		case BPF_S_ALU_LSH_X:
-		case BPF_S_ALU_RSH_K:
-		case BPF_S_ALU_RSH_X:
-		case BPF_S_ALU_NEG:
-		case BPF_S_LD_IMM:
-		case BPF_S_LDX_IMM:
-		case BPF_S_MISC_TAX:
-		case BPF_S_MISC_TXA:
-		case BPF_S_ALU_DIV_K:
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
-		case BPF_S_ST:
-		case BPF_S_STX:
-		case BPF_S_JMP_JA:
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_K:
-		case BPF_S_JMP_JSET_X:
-			sk_decode_filter(ftest, ftest);
+		case BPF_RET | BPF_K:
+		case BPF_RET | BPF_A:
+		case BPF_ALU | BPF_ADD | BPF_K:
+		case BPF_ALU | BPF_ADD | BPF_X:
+		case BPF_ALU | BPF_SUB | BPF_K:
+		case BPF_ALU | BPF_SUB | BPF_X:
+		case BPF_ALU | BPF_MUL | BPF_K:
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU | BPF_DIV | BPF_X:
+		case BPF_ALU | BPF_AND | BPF_K:
+		case BPF_ALU | BPF_AND | BPF_X:
+		case BPF_ALU | BPF_OR | BPF_K:
+		case BPF_ALU | BPF_OR | BPF_X:
+		case BPF_ALU | BPF_XOR | BPF_K:
+		case BPF_ALU | BPF_XOR | BPF_X:
+		case BPF_ALU | BPF_LSH | BPF_K:
+		case BPF_ALU | BPF_LSH | BPF_X:
+		case BPF_ALU | BPF_RSH | BPF_K:
+		case BPF_ALU | BPF_RSH | BPF_X:
+		case BPF_ALU | BPF_NEG:
+		case BPF_LD | BPF_IMM:
+		case BPF_LDX | BPF_IMM:
+		case BPF_MISC | BPF_TAX:
+		case BPF_MISC | BPF_TXA:
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+		case BPF_JMP | BPF_JA:
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
 			continue;
 		default:
 			return -EINVAL;
diff --git a/net/core/filter.c b/net/core/filter.c
index 2c2d35d9d101..328aaf6ff4d1 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -536,11 +536,13 @@ load_word:
 		 * Output:
 		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
 		 */
+
 		ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be32(ptr);
 			CONT;
 		}
+
 		return 0;
 	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
 		off = K;
@@ -550,6 +552,7 @@ load_half:
 			BPF_R0 = get_unaligned_be16(ptr);
 			CONT;
 		}
+
 		return 0;
 	LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
 		off = K;
@@ -559,6 +562,7 @@ load_byte:
 			BPF_R0 = *(u8 *)ptr;
 			CONT;
 		}
+
 		return 0;
 	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
 		off = K + X;
@@ -1136,44 +1140,46 @@ err:
  */
 static int check_load_and_stores(struct sock_filter *filter, int flen)
 {
-	u16 *masks, memvalid = 0; /* one bit per cell, 16 cells */
+	u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
 	int pc, ret = 0;
 
 	BUILD_BUG_ON(BPF_MEMWORDS > 16);
+
 	masks = kmalloc(flen * sizeof(*masks), GFP_KERNEL);
 	if (!masks)
 		return -ENOMEM;
+
 	memset(masks, 0xff, flen * sizeof(*masks));
 
 	for (pc = 0; pc < flen; pc++) {
 		memvalid &= masks[pc];
 
 		switch (filter[pc].code) {
-		case BPF_S_ST:
-		case BPF_S_STX:
+		case BPF_ST:
+		case BPF_STX:
 			memvalid |= (1 << filter[pc].k);
 			break;
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
 			if (!(memvalid & (1 << filter[pc].k))) {
 				ret = -EINVAL;
 				goto error;
 			}
 			break;
-		case BPF_S_JMP_JA:
-			/* a jump must set masks on target */
+		case BPF_JMP | BPF_JA:
+			/* A jump must set masks on target */
 			masks[pc + 1 + filter[pc].k] &= memvalid;
 			memvalid = ~0;
 			break;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_X:
-		case BPF_S_JMP_JSET_K:
-			/* a jump must set masks on targets */
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* A jump must set masks on targets */
 			masks[pc + 1 + filter[pc].jt] &= memvalid;
 			masks[pc + 1 + filter[pc].jf] &= memvalid;
 			memvalid = ~0;
@@ -1185,6 +1191,72 @@ error:
 	return ret;
 }
 
+static bool chk_code_allowed(u16 code_to_probe)
+{
+	static const bool codes[] = {
+		/* 32 bit ALU operations */
+		[BPF_ALU | BPF_ADD | BPF_K] = true,
+		[BPF_ALU | BPF_ADD | BPF_X] = true,
+		[BPF_ALU | BPF_SUB | BPF_K] = true,
+		[BPF_ALU | BPF_SUB | BPF_X] = true,
+		[BPF_ALU | BPF_MUL | BPF_K] = true,
+		[BPF_ALU | BPF_MUL | BPF_X] = true,
+		[BPF_ALU | BPF_DIV | BPF_K] = true,
+		[BPF_ALU | BPF_DIV | BPF_X] = true,
+		[BPF_ALU | BPF_MOD | BPF_K] = true,
+		[BPF_ALU | BPF_MOD | BPF_X] = true,
+		[BPF_ALU | BPF_AND | BPF_K] = true,
+		[BPF_ALU | BPF_AND | BPF_X] = true,
+		[BPF_ALU | BPF_OR | BPF_K] = true,
+		[BPF_ALU | BPF_OR | BPF_X] = true,
+		[BPF_ALU | BPF_XOR | BPF_K] = true,
+		[BPF_ALU | BPF_XOR | BPF_X] = true,
+		[BPF_ALU | BPF_LSH | BPF_K] = true,
+		[BPF_ALU | BPF_LSH | BPF_X] = true,
+		[BPF_ALU | BPF_RSH | BPF_K] = true,
+		[BPF_ALU | BPF_RSH | BPF_X] = true,
+		[BPF_ALU | BPF_NEG] = true,
+		/* Load instructions */
+		[BPF_LD | BPF_W | BPF_ABS] = true,
+		[BPF_LD | BPF_H | BPF_ABS] = true,
+		[BPF_LD | BPF_B | BPF_ABS] = true,
+		[BPF_LD | BPF_W | BPF_LEN] = true,
+		[BPF_LD | BPF_W | BPF_IND] = true,
+		[BPF_LD | BPF_H | BPF_IND] = true,
+		[BPF_LD | BPF_B | BPF_IND] = true,
+		[BPF_LD | BPF_IMM] = true,
+		[BPF_LD | BPF_MEM] = true,
+		[BPF_LDX | BPF_W | BPF_LEN] = true,
+		[BPF_LDX | BPF_B | BPF_MSH] = true,
+		[BPF_LDX | BPF_IMM] = true,
+		[BPF_LDX | BPF_MEM] = true,
+		/* Store instructions */
+		[BPF_ST] = true,
+		[BPF_STX] = true,
+		/* Misc instructions */
+		[BPF_MISC | BPF_TAX] = true,
+		[BPF_MISC | BPF_TXA] = true,
+		/* Return instructions */
+		[BPF_RET | BPF_K] = true,
+		[BPF_RET | BPF_A] = true,
+		/* Jump instructions */
+		[BPF_JMP | BPF_JA] = true,
+		[BPF_JMP | BPF_JEQ | BPF_K] = true,
+		[BPF_JMP | BPF_JEQ | BPF_X] = true,
+		[BPF_JMP | BPF_JGE | BPF_K] = true,
+		[BPF_JMP | BPF_JGE | BPF_X] = true,
+		[BPF_JMP | BPF_JGT | BPF_K] = true,
+		[BPF_JMP | BPF_JGT | BPF_X] = true,
+		[BPF_JMP | BPF_JSET | BPF_K] = true,
+		[BPF_JMP | BPF_JSET | BPF_X] = true,
+	};
+
+	if (code_to_probe >= ARRAY_SIZE(codes))
+		return false;
+
+	return codes[code_to_probe];
+}
+
 /**
  *	sk_chk_filter - verify socket filter code
  *	@filter: filter to verify
@@ -1201,154 +1273,76 @@ error:
  */
 int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 {
-	/*
-	 * Valid instructions are initialized to non-0.
-	 * Invalid instructions are initialized to 0.
-	 */
-	static const u8 codes[] = {
-		[BPF_ALU|BPF_ADD|BPF_K]  = BPF_S_ALU_ADD_K,
-		[BPF_ALU|BPF_ADD|BPF_X]  = BPF_S_ALU_ADD_X,
-		[BPF_ALU|BPF_SUB|BPF_K]  = BPF_S_ALU_SUB_K,
-		[BPF_ALU|BPF_SUB|BPF_X]  = BPF_S_ALU_SUB_X,
-		[BPF_ALU|BPF_MUL|BPF_K]  = BPF_S_ALU_MUL_K,
-		[BPF_ALU|BPF_MUL|BPF_X]  = BPF_S_ALU_MUL_X,
-		[BPF_ALU|BPF_DIV|BPF_X]  = BPF_S_ALU_DIV_X,
-		[BPF_ALU|BPF_MOD|BPF_K]  = BPF_S_ALU_MOD_K,
-		[BPF_ALU|BPF_MOD|BPF_X]  = BPF_S_ALU_MOD_X,
-		[BPF_ALU|BPF_AND|BPF_K]  = BPF_S_ALU_AND_K,
-		[BPF_ALU|BPF_AND|BPF_X]  = BPF_S_ALU_AND_X,
-		[BPF_ALU|BPF_OR|BPF_K]   = BPF_S_ALU_OR_K,
-		[BPF_ALU|BPF_OR|BPF_X]   = BPF_S_ALU_OR_X,
-		[BPF_ALU|BPF_XOR|BPF_K]  = BPF_S_ALU_XOR_K,
-		[BPF_ALU|BPF_XOR|BPF_X]  = BPF_S_ALU_XOR_X,
-		[BPF_ALU|BPF_LSH|BPF_K]  = BPF_S_ALU_LSH_K,
-		[BPF_ALU|BPF_LSH|BPF_X]  = BPF_S_ALU_LSH_X,
-		[BPF_ALU|BPF_RSH|BPF_K]  = BPF_S_ALU_RSH_K,
-		[BPF_ALU|BPF_RSH|BPF_X]  = BPF_S_ALU_RSH_X,
-		[BPF_ALU|BPF_NEG]        = BPF_S_ALU_NEG,
-		[BPF_LD|BPF_W|BPF_ABS]   = BPF_S_LD_W_ABS,
-		[BPF_LD|BPF_H|BPF_ABS]   = BPF_S_LD_H_ABS,
-		[BPF_LD|BPF_B|BPF_ABS]   = BPF_S_LD_B_ABS,
-		[BPF_LD|BPF_W|BPF_LEN]   = BPF_S_LD_W_LEN,
-		[BPF_LD|BPF_W|BPF_IND]   = BPF_S_LD_W_IND,
-		[BPF_LD|BPF_H|BPF_IND]   = BPF_S_LD_H_IND,
-		[BPF_LD|BPF_B|BPF_IND]   = BPF_S_LD_B_IND,
-		[BPF_LD|BPF_IMM]         = BPF_S_LD_IMM,
-		[BPF_LDX|BPF_W|BPF_LEN]  = BPF_S_LDX_W_LEN,
-		[BPF_LDX|BPF_B|BPF_MSH]  = BPF_S_LDX_B_MSH,
-		[BPF_LDX|BPF_IMM]        = BPF_S_LDX_IMM,
-		[BPF_MISC|BPF_TAX]       = BPF_S_MISC_TAX,
-		[BPF_MISC|BPF_TXA]       = BPF_S_MISC_TXA,
-		[BPF_RET|BPF_K]          = BPF_S_RET_K,
-		[BPF_RET|BPF_A]          = BPF_S_RET_A,
-		[BPF_ALU|BPF_DIV|BPF_K]  = BPF_S_ALU_DIV_K,
-		[BPF_LD|BPF_MEM]         = BPF_S_LD_MEM,
-		[BPF_LDX|BPF_MEM]        = BPF_S_LDX_MEM,
-		[BPF_ST]                 = BPF_S_ST,
-		[BPF_STX]                = BPF_S_STX,
-		[BPF_JMP|BPF_JA]         = BPF_S_JMP_JA,
-		[BPF_JMP|BPF_JEQ|BPF_K]  = BPF_S_JMP_JEQ_K,
-		[BPF_JMP|BPF_JEQ|BPF_X]  = BPF_S_JMP_JEQ_X,
-		[BPF_JMP|BPF_JGE|BPF_K]  = BPF_S_JMP_JGE_K,
-		[BPF_JMP|BPF_JGE|BPF_X]  = BPF_S_JMP_JGE_X,
-		[BPF_JMP|BPF_JGT|BPF_K]  = BPF_S_JMP_JGT_K,
-		[BPF_JMP|BPF_JGT|BPF_X]  = BPF_S_JMP_JGT_X,
-		[BPF_JMP|BPF_JSET|BPF_K] = BPF_S_JMP_JSET_K,
-		[BPF_JMP|BPF_JSET|BPF_X] = BPF_S_JMP_JSET_X,
-	};
-	int pc;
 	bool anc_found;
+	int pc;
 
 	if (flen == 0 || flen > BPF_MAXINSNS)
 		return -EINVAL;
 
-	/* check the filter code now */
+	/* Check the filter code now */
 	for (pc = 0; pc < flen; pc++) {
 		struct sock_filter *ftest = &filter[pc];
-		u16 code = ftest->code;
 
-		if (code >= ARRAY_SIZE(codes))
-			return -EINVAL;
-		code = codes[code];
-		if (!code)
+		/* May we actually operate on this code? */
+		if (!chk_code_allowed(ftest->code))
 			return -EINVAL;
+
 		/* Some instructions need special checks */
-		switch (code) {
-		case BPF_S_ALU_DIV_K:
-		case BPF_S_ALU_MOD_K:
-			/* check for division by zero */
+		switch (ftest->code) {
+		case BPF_ALU | BPF_DIV | BPF_K:
+		case BPF_ALU | BPF_MOD | BPF_K:
+			/* Check for division by zero */
 			if (ftest->k == 0)
 				return -EINVAL;
 			break;
-		case BPF_S_LD_MEM:
-		case BPF_S_LDX_MEM:
-		case BPF_S_ST:
-		case BPF_S_STX:
-			/* check for invalid memory addresses */
+		case BPF_LD | BPF_MEM:
+		case BPF_LDX | BPF_MEM:
+		case BPF_ST:
+		case BPF_STX:
+			/* Check for invalid memory addresses */
 			if (ftest->k >= BPF_MEMWORDS)
 				return -EINVAL;
 			break;
-		case BPF_S_JMP_JA:
-			/*
-			 * Note, the large ftest->k might cause loops.
+		case BPF_JMP | BPF_JA:
+			/* Note, the large ftest->k might cause loops.
 			 * Compare this with conditional jumps below,
 			 * where offsets are limited. --ANK (981016)
 			 */
-			if (ftest->k >= (unsigned int)(flen-pc-1))
+			if (ftest->k >= (unsigned int)(flen - pc - 1))
 				return -EINVAL;
 			break;
-		case BPF_S_JMP_JEQ_K:
-		case BPF_S_JMP_JEQ_X:
-		case BPF_S_JMP_JGE_K:
-		case BPF_S_JMP_JGE_X:
-		case BPF_S_JMP_JGT_K:
-		case BPF_S_JMP_JGT_X:
-		case BPF_S_JMP_JSET_X:
-		case BPF_S_JMP_JSET_K:
-			/* for conditionals both must be safe */
+		case BPF_JMP | BPF_JEQ | BPF_K:
+		case BPF_JMP | BPF_JEQ | BPF_X:
+		case BPF_JMP | BPF_JGE | BPF_K:
+		case BPF_JMP | BPF_JGE | BPF_X:
+		case BPF_JMP | BPF_JGT | BPF_K:
+		case BPF_JMP | BPF_JGT | BPF_X:
+		case BPF_JMP | BPF_JSET | BPF_K:
+		case BPF_JMP | BPF_JSET | BPF_X:
+			/* Both conditionals must be safe */
 			if (pc + ftest->jt + 1 >= flen ||
 			    pc + ftest->jf + 1 >= flen)
 				return -EINVAL;
 			break;
-		case BPF_S_LD_W_ABS:
-		case BPF_S_LD_H_ABS:
-		case BPF_S_LD_B_ABS:
+		case BPF_LD | BPF_W | BPF_ABS:
+		case BPF_LD | BPF_H | BPF_ABS:
+		case BPF_LD | BPF_B | BPF_ABS:
 			anc_found = false;
-#define ANCILLARY(CODE) case SKF_AD_OFF + SKF_AD_##CODE:	\
-				code = BPF_S_ANC_##CODE;	\
-				anc_found = true;		\
-				break
-			switch (ftest->k) {
-			ANCILLARY(PROTOCOL);
-			ANCILLARY(PKTTYPE);
-			ANCILLARY(IFINDEX);
-			ANCILLARY(NLATTR);
-			ANCILLARY(NLATTR_NEST);
-			ANCILLARY(MARK);
-			ANCILLARY(QUEUE);
-			ANCILLARY(HATYPE);
-			ANCILLARY(RXHASH);
-			ANCILLARY(CPU);
-			ANCILLARY(ALU_XOR_X);
-			ANCILLARY(VLAN_TAG);
-			ANCILLARY(VLAN_TAG_PRESENT);
-			ANCILLARY(PAY_OFFSET);
-			ANCILLARY(RANDOM);
-			}
-
-			/* ancillary operation unknown or unsupported */
+			if (bpf_anc_helper(ftest) & BPF_ANC)
+				anc_found = true;
+			/* Ancillary operation unknown or unsupported */
 			if (anc_found == false && ftest->k >= SKF_AD_OFF)
 				return -EINVAL;
 		}
-		ftest->code = code;
 	}
 
-	/* last instruction must be a RET code */
+	/* Last instruction must be a RET code */
 	switch (filter[flen - 1].code) {
-	case BPF_S_RET_K:
-	case BPF_S_RET_A:
+	case BPF_RET | BPF_K:
+	case BPF_RET | BPF_A:
 		return check_load_and_stores(filter, flen);
 	}
+
 	return -EINVAL;
 }
 EXPORT_SYMBOL(sk_chk_filter);
@@ -1448,7 +1442,7 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 {
 	struct sock_filter *old_prog;
 	struct sk_filter *old_fp;
-	int i, err, new_len, old_len = fp->len;
+	int err, new_len, old_len = fp->len;
 
 	/* We are free to overwrite insns et al right here as it
 	 * won't be used at this point in time anymore internally
@@ -1458,13 +1452,6 @@ static struct sk_filter *__sk_migrate_filter(struct sk_filter *fp,
 	BUILD_BUG_ON(sizeof(struct sock_filter) !=
 		     sizeof(struct sock_filter_int));
 
-	/* For now, we need to unfiddle BPF_S_* identifiers in place.
-	 * This can sooner or later on be subject to removal, e.g. when
-	 * JITs have been converted.
-	 */
-	for (i = 0; i < fp->len; i++)
-		sk_decode_filter(&fp->insns[i], &fp->insns[i]);
-
 	/* Conversion cannot happen on overlapping memory areas,
 	 * so we need to keep the user BPF around until the 2nd
 	 * pass. At this time, the user BPF is stored in fp->insns.
@@ -1706,84 +1693,6 @@ int sk_detach_filter(struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_detach_filter);
 
-void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
-{
-	static const u16 decodes[] = {
-		[BPF_S_ALU_ADD_K]	= BPF_ALU|BPF_ADD|BPF_K,
-		[BPF_S_ALU_ADD_X]	= BPF_ALU|BPF_ADD|BPF_X,
-		[BPF_S_ALU_SUB_K]	= BPF_ALU|BPF_SUB|BPF_K,
-		[BPF_S_ALU_SUB_X]	= BPF_ALU|BPF_SUB|BPF_X,
-		[BPF_S_ALU_MUL_K]	= BPF_ALU|BPF_MUL|BPF_K,
-		[BPF_S_ALU_MUL_X]	= BPF_ALU|BPF_MUL|BPF_X,
-		[BPF_S_ALU_DIV_X]	= BPF_ALU|BPF_DIV|BPF_X,
-		[BPF_S_ALU_MOD_K]	= BPF_ALU|BPF_MOD|BPF_K,
-		[BPF_S_ALU_MOD_X]	= BPF_ALU|BPF_MOD|BPF_X,
-		[BPF_S_ALU_AND_K]	= BPF_ALU|BPF_AND|BPF_K,
-		[BPF_S_ALU_AND_X]	= BPF_ALU|BPF_AND|BPF_X,
-		[BPF_S_ALU_OR_K]	= BPF_ALU|BPF_OR|BPF_K,
-		[BPF_S_ALU_OR_X]	= BPF_ALU|BPF_OR|BPF_X,
-		[BPF_S_ALU_XOR_K]	= BPF_ALU|BPF_XOR|BPF_K,
-		[BPF_S_ALU_XOR_X]	= BPF_ALU|BPF_XOR|BPF_X,
-		[BPF_S_ALU_LSH_K]	= BPF_ALU|BPF_LSH|BPF_K,
-		[BPF_S_ALU_LSH_X]	= BPF_ALU|BPF_LSH|BPF_X,
-		[BPF_S_ALU_RSH_K]	= BPF_ALU|BPF_RSH|BPF_K,
-		[BPF_S_ALU_RSH_X]	= BPF_ALU|BPF_RSH|BPF_X,
-		[BPF_S_ALU_NEG]		= BPF_ALU|BPF_NEG,
-		[BPF_S_LD_W_ABS]	= BPF_LD|BPF_W|BPF_ABS,
-		[BPF_S_LD_H_ABS]	= BPF_LD|BPF_H|BPF_ABS,
-		[BPF_S_LD_B_ABS]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PROTOCOL]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PKTTYPE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_IFINDEX]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_NLATTR]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_NLATTR_NEST]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_MARK]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_QUEUE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_HATYPE]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_RXHASH]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_CPU]		= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_ALU_XOR_X]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_ANC_RANDOM]	= BPF_LD|BPF_B|BPF_ABS,
-		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
-		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
-		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
-		[BPF_S_LD_B_IND]	= BPF_LD|BPF_B|BPF_IND,
-		[BPF_S_LD_IMM]		= BPF_LD|BPF_IMM,
-		[BPF_S_LDX_W_LEN]	= BPF_LDX|BPF_W|BPF_LEN,
-		[BPF_S_LDX_B_MSH]	= BPF_LDX|BPF_B|BPF_MSH,
-		[BPF_S_LDX_IMM]		= BPF_LDX|BPF_IMM,
-		[BPF_S_MISC_TAX]	= BPF_MISC|BPF_TAX,
-		[BPF_S_MISC_TXA]	= BPF_MISC|BPF_TXA,
-		[BPF_S_RET_K]		= BPF_RET|BPF_K,
-		[BPF_S_RET_A]		= BPF_RET|BPF_A,
-		[BPF_S_ALU_DIV_K]	= BPF_ALU|BPF_DIV|BPF_K,
-		[BPF_S_LD_MEM]		= BPF_LD|BPF_MEM,
-		[BPF_S_LDX_MEM]		= BPF_LDX|BPF_MEM,
-		[BPF_S_ST]		= BPF_ST,
-		[BPF_S_STX]		= BPF_STX,
-		[BPF_S_JMP_JA]		= BPF_JMP|BPF_JA,
-		[BPF_S_JMP_JEQ_K]	= BPF_JMP|BPF_JEQ|BPF_K,
-		[BPF_S_JMP_JEQ_X]	= BPF_JMP|BPF_JEQ|BPF_X,
-		[BPF_S_JMP_JGE_K]	= BPF_JMP|BPF_JGE|BPF_K,
-		[BPF_S_JMP_JGE_X]	= BPF_JMP|BPF_JGE|BPF_X,
-		[BPF_S_JMP_JGT_K]	= BPF_JMP|BPF_JGT|BPF_K,
-		[BPF_S_JMP_JGT_X]	= BPF_JMP|BPF_JGT|BPF_X,
-		[BPF_S_JMP_JSET_K]	= BPF_JMP|BPF_JSET|BPF_K,
-		[BPF_S_JMP_JSET_X]	= BPF_JMP|BPF_JSET|BPF_X,
-	};
-	u16 code;
-
-	code = filt->code;
-
-	to->code = decodes[code];
-	to->jt = filt->jt;
-	to->jf = filt->jf;
-	to->k = filt->k;
-}
-
 int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
 		  unsigned int len)
 {
-- 
cgit 


From f8f6d679aaa78b989d9aee8d2935066fbdca2a30 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Thu, 29 May 2014 10:22:51 +0200
Subject: net: filter: improve filter block macros

Commit 9739eef13c92 ("net: filter: make BPF conversion more readable")
started to introduce helper macros similar to BPF_STMT()/BPF_JUMP()
macros from classic BPF.

However, quite some statements in the filter conversion functions
remained in the old style which gives a mixture of block macros and
non block macros in the code. This patch makes the block macros itself
more readable by using explicit member initialization, and converts
the remaining ones where possible to remain in a more consistent state.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 255 +++++++++++++++++++++++++++++++++++++++----------
 net/core/filter.c      | 196 ++++++++++++++-----------------------
 2 files changed, 277 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 49ef7a298c92..f0c2ad43b4af 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -76,56 +76,211 @@ enum {
 /* BPF program can access up to 512 bytes of stack space. */
 #define MAX_BPF_STACK	512
 
-/* bpf_add|sub|...: a += x, bpf_mov: a = x */
-#define BPF_ALU64_REG(op, a, x) \
-	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_X, a, x, 0, 0})
-#define BPF_ALU32_REG(op, a, x) \
-	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_X, a, x, 0, 0})
-
-/* bpf_add|sub|...: a += imm, bpf_mov: a = imm */
-#define BPF_ALU64_IMM(op, a, imm) \
-	((struct sock_filter_int) {BPF_ALU64|BPF_OP(op)|BPF_K, a, 0, 0, imm})
-#define BPF_ALU32_IMM(op, a, imm) \
-	((struct sock_filter_int) {BPF_ALU|BPF_OP(op)|BPF_K, a, 0, 0, imm})
-
-/* R0 = *(uint *) (skb->data + off) */
-#define BPF_LD_ABS(size, off) \
-	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_ABS, 0, 0, 0, off})
-
-/* R0 = *(uint *) (skb->data + x + off) */
-#define BPF_LD_IND(size, x, off) \
-	((struct sock_filter_int) {BPF_LD|BPF_SIZE(size)|BPF_IND, 0, x, 0, off})
-
-/* a = *(uint *) (x + off) */
-#define BPF_LDX_MEM(sz, a, x, off) \
-	((struct sock_filter_int) {BPF_LDX|BPF_SIZE(sz)|BPF_MEM, a, x, off, 0})
-
-/* if (a 'op' x) goto pc+off */
-#define BPF_JMP_REG(op, a, x, off) \
-	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_X, a, x, off, 0})
-
-/* if (a 'op' imm) goto pc+off */
-#define BPF_JMP_IMM(op, a, imm, off) \
-	((struct sock_filter_int) {BPF_JMP|BPF_OP(op)|BPF_K, a, 0, off, imm})
-
-#define BPF_EXIT_INSN() \
-	((struct sock_filter_int) {BPF_JMP|BPF_EXIT, 0, 0, 0, 0})
-
-static inline int size_to_bpf(int size)
-{
-	switch (size) {
-	case 1:
-		return BPF_B;
-	case 2:
-		return BPF_H;
-	case 4:
-		return BPF_W;
-	case 8:
-		return BPF_DW;
-	default:
-		return -EINVAL;
-	}
-}
+/* Helper macros for filter block array initializers. */
+
+/* ALU ops on registers, bpf_add|sub|...: A += X */
+
+#define BPF_ALU64_REG(OP, A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_ALU32_REG(OP, A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: A += IMM */
+
+#define BPF_ALU64_IMM(OP, A, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_ALU32_IMM(OP, A, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
+
+#define BPF_ENDIAN(TYPE, A, LEN)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = LEN })
+
+/* Short form of mov, A = X */
+
+#define BPF_MOV64_REG(A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define BPF_MOV32_REG(A, X)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+/* Short form of mov, A = IMM */
+
+#define BPF_MOV64_IMM(A, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_IMM(A, IMM)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Short form of mov based on type, BPF_X: A = X,  BPF_K: A = IMM */
+
+#define BPF_MOV64_RAW(TYPE, A, X, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+#define BPF_MOV32_RAW(TYPE, A, X, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = IMM })
+
+/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */
+
+#define BPF_LD_ABS(SIZE, OFF)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
+		.a_reg = 0,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = OFF })
+
+/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */
+
+#define BPF_LD_IND(SIZE, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
+		.a_reg = 0,					\
+		.x_reg = X,					\
+		.off   = 0,					\
+		.imm   = OFF })
+
+/* Memory store, A = *(uint *) (X + OFF), and vice versa */
+
+#define BPF_LDX_MEM(SIZE, A, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+#define BPF_STX_MEM(SIZE, A, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */
+
+#define BPF_JMP_REG(OP, A, X, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = 0 })
+
+/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */
+
+#define BPF_JMP_IMM(OP, A, IMM, OFF)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
+		.a_reg = A,					\
+		.x_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Function call */
+
+#define BPF_EMIT_CALL(FUNC)					\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_CALL,			\
+		.a_reg = 0,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((FUNC) - __bpf_call_base) })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, A, X, OFF, IMM)			\
+	((struct sock_filter_int) {				\
+		.code  = CODE,					\
+		.a_reg = A,					\
+		.x_reg = X,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN()						\
+	((struct sock_filter_int) {				\
+		.code  = BPF_JMP | BPF_EXIT,			\
+		.a_reg = 0,					\
+		.x_reg = 0,					\
+		.off   = 0,					\
+		.imm   = 0 })
+
+#define bytes_to_bpf_size(bytes)				\
+({								\
+	int bpf_size = -EINVAL;					\
+								\
+	if (bytes == sizeof(u8))				\
+		bpf_size = BPF_B;				\
+	else if (bytes == sizeof(u16))				\
+		bpf_size = BPF_H;				\
+	else if (bytes == sizeof(u32))				\
+		bpf_size = BPF_W;				\
+	else if (bytes == sizeof(u64))				\
+		bpf_size = BPF_DW;				\
+								\
+	bpf_size;						\
+})
 
 /* Macro to invoke filter function. */
 #define SK_RUN_FILTER(filter, ctx)  (*filter->bpf_func)(ctx, filter->insnsi)
diff --git a/net/core/filter.c b/net/core/filter.c
index 328aaf6ff4d1..842f8393121d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -672,14 +672,10 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
 		/* A = *(u16 *) (ctx + offsetof(protocol)) */
-		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, protocol));
-		insn++;
-
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, protocol));
 		/* A = ntohs(A) [emitting a nop or swap16] */
-		insn->code = BPF_ALU | BPF_END | BPF_FROM_BE;
-		insn->a_reg = BPF_REG_A;
-		insn->imm = 16;
+		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
@@ -688,37 +684,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		if (insn->off < 0)
 			return false;
 		insn++;
-
 		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
 	case SKF_AD_OFF + SKF_AD_HATYPE:
-		*insn = BPF_LDX_MEM(size_to_bpf(FIELD_SIZEOF(struct sk_buff, dev)),
-				    BPF_REG_TMP, BPF_REG_CTX,
-				    offsetof(struct sk_buff, dev));
-		insn++;
-
-		/* if (tmp != 0) goto pc+1 */
-		*insn = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
-		insn++;
-
-		*insn = BPF_EXIT_INSN();
-		insn++;
-
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
 		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
-
-		insn->a_reg = BPF_REG_A;
-		insn->x_reg = BPF_REG_TMP;
-
-		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX) {
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->off = offsetof(struct net_device, ifindex);
-		} else {
-			insn->code = BPF_LDX | BPF_MEM | BPF_H;
-			insn->off = offsetof(struct net_device, type);
-		}
+		BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0);
+
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+				      BPF_REG_TMP, BPF_REG_CTX,
+				      offsetof(struct sk_buff, dev));
+		/* if (tmp != 0) goto pc + 1 */
+		*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
+		*insn++ = BPF_EXIT_INSN();
+		if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
+			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
+					    offsetof(struct net_device, ifindex));
+		else
+			*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
+					    offsetof(struct net_device, type));
 		break;
 
 	case SKF_AD_OFF + SKF_AD_MARK:
@@ -745,22 +731,17 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-
-		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
-		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, vlan_tci));
-		insn++;
-
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
+		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, vlan_tci));
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
 			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
 					      ~VLAN_TAG_PRESENT);
 		} else {
 			/* A >>= 12 */
-			*insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
-			insn++;
-
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
 			/* A &= 1 */
 			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
 		}
@@ -772,34 +753,27 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_CPU:
 	case SKF_AD_OFF + SKF_AD_RANDOM:
 		/* arg1 = ctx */
-		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG1, BPF_REG_CTX);
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 		/* arg2 = A */
-		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG2, BPF_REG_A);
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
 		/* arg3 = X */
-		*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_ARG3, BPF_REG_X);
-		insn++;
-
+		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
 		/* Emit call(ctx, arg2=A, arg3=X) */
-		insn->code = BPF_JMP | BPF_CALL;
 		switch (fp->k) {
 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
-			insn->imm = __skb_get_pay_offset - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
 			break;
 		case SKF_AD_OFF + SKF_AD_NLATTR:
-			insn->imm = __skb_get_nlattr - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_nlattr);
 			break;
 		case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
-			insn->imm = __skb_get_nlattr_nest - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
 			break;
 		case SKF_AD_OFF + SKF_AD_CPU:
-			insn->imm = __get_raw_cpu_id - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
 			break;
 		case SKF_AD_OFF + SKF_AD_RANDOM:
-			insn->imm = __get_random_u32 - __bpf_call_base;
+			*insn = BPF_EMIT_CALL(__get_random_u32);
 			break;
 		}
 		break;
@@ -871,9 +845,8 @@ do_pass:
 	new_insn = new_prog;
 	fp = prog;
 
-	if (new_insn) {
-		*new_insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_CTX, BPF_REG_ARG1);
-	}
+	if (new_insn)
+		*new_insn = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
 	new_insn++;
 
 	for (i = 0; i < len; fp++, i++) {
@@ -921,17 +894,16 @@ do_pass:
 			    convert_bpf_extensions(fp, &insn))
 				break;
 
-			insn->code = fp->code;
-			insn->a_reg = BPF_REG_A;
-			insn->x_reg = BPF_REG_X;
-			insn->imm = fp->k;
+			*insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
 			break;
 
-		/* Jump opcodes map as-is, but offsets need adjustment. */
-		case BPF_JMP | BPF_JA:
-			target = i + fp->k + 1;
-			insn->code = fp->code;
-#define EMIT_JMP							\
+		/* Jump transformation cannot use BPF block macros
+		 * everywhere as offset calculation and target updates
+		 * require a bit more work than the rest, i.e. jump
+		 * opcodes map as-is, but offsets need adjustment.
+		 */
+
+#define BPF_EMIT_JMP							\
 	do {								\
 		if (target >= len || target < 0)			\
 			goto err;					\
@@ -940,7 +912,10 @@ do_pass:
 		insn->off -= insn - tmp_insns;				\
 	} while (0)
 
-			EMIT_JMP;
+		case BPF_JMP | BPF_JA:
+			target = i + fp->k + 1;
+			insn->code = fp->code;
+			BPF_EMIT_JMP;
 			break;
 
 		case BPF_JMP | BPF_JEQ | BPF_K:
@@ -956,10 +931,7 @@ do_pass:
 				 * immediate into tmp register and use it
 				 * in compare insn.
 				 */
-				insn->code = BPF_ALU | BPF_MOV | BPF_K;
-				insn->a_reg = BPF_REG_TMP;
-				insn->imm = fp->k;
-				insn++;
+				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
 
 				insn->a_reg = BPF_REG_A;
 				insn->x_reg = BPF_REG_TMP;
@@ -975,7 +947,7 @@ do_pass:
 			if (fp->jf == 0) {
 				insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
 				target = i + fp->jt + 1;
-				EMIT_JMP;
+				BPF_EMIT_JMP;
 				break;
 			}
 
@@ -983,116 +955,94 @@ do_pass:
 			if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) {
 				insn->code = BPF_JMP | BPF_JNE | bpf_src;
 				target = i + fp->jf + 1;
-				EMIT_JMP;
+				BPF_EMIT_JMP;
 				break;
 			}
 
 			/* Other jumps are mapped into two insns: Jxx and JA. */
 			target = i + fp->jt + 1;
 			insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
-			EMIT_JMP;
+			BPF_EMIT_JMP;
 			insn++;
 
 			insn->code = BPF_JMP | BPF_JA;
 			target = i + fp->jf + 1;
-			EMIT_JMP;
+			BPF_EMIT_JMP;
 			break;
 
 		/* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
 		case BPF_LDX | BPF_MSH | BPF_B:
 			/* tmp = A */
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_TMP, BPF_REG_A);
-			insn++;
-
+			*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
 			/* A = BPF_R0 = *(u8 *) (skb->data + K) */
-			*insn = BPF_LD_ABS(BPF_B, fp->k);
-			insn++;
-
+			*insn++ = BPF_LD_ABS(BPF_B, fp->k);
 			/* A &= 0xf */
-			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
-			insn++;
-
+			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
 			/* A <<= 2 */
-			*insn = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
-			insn++;
-
+			*insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
 			/* X = A */
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
-			insn++;
-
+			*insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 			/* A = tmp */
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_TMP);
+			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
 			break;
 
 		/* RET_K, RET_A are remaped into 2 insns. */
 		case BPF_RET | BPF_A:
 		case BPF_RET | BPF_K:
-			insn->code = BPF_ALU | BPF_MOV |
-				     (BPF_RVAL(fp->code) == BPF_K ?
-				      BPF_K : BPF_X);
-			insn->a_reg = 0;
-			insn->x_reg = BPF_REG_A;
-			insn->imm = fp->k;
-			insn++;
-
+			*insn++ = BPF_MOV32_RAW(BPF_RVAL(fp->code) == BPF_K ?
+						BPF_K : BPF_X, BPF_REG_0,
+						BPF_REG_A, fp->k);
 			*insn = BPF_EXIT_INSN();
 			break;
 
 		/* Store to stack. */
 		case BPF_ST:
 		case BPF_STX:
-			insn->code = BPF_STX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_REG_FP;
-			insn->x_reg = fp->code == BPF_ST ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			*insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
+					    BPF_ST ? BPF_REG_A : BPF_REG_X,
+					    -(BPF_MEMWORDS - fp->k) * 4);
 			break;
 
 		/* Load from stack. */
 		case BPF_LD | BPF_MEM:
 		case BPF_LDX | BPF_MEM:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->x_reg = BPF_REG_FP;
-			insn->off = -(BPF_MEMWORDS - fp->k) * 4;
+			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
+					    BPF_REG_A : BPF_REG_X, BPF_REG_FP,
+					    -(BPF_MEMWORDS - fp->k) * 4);
 			break;
 
 		/* A = K or X = K */
 		case BPF_LD | BPF_IMM:
 		case BPF_LDX | BPF_IMM:
-			insn->code = BPF_ALU | BPF_MOV | BPF_K;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->imm = fp->k;
+			*insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
+					      BPF_REG_A : BPF_REG_X, fp->k);
 			break;
 
 		/* X = A */
 		case BPF_MISC | BPF_TAX:
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_X, BPF_REG_A);
+			*insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
 			break;
 
 		/* A = X */
 		case BPF_MISC | BPF_TXA:
-			*insn = BPF_ALU64_REG(BPF_MOV, BPF_REG_A, BPF_REG_X);
+			*insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
 			break;
 
 		/* A = skb->len or X = skb->len */
 		case BPF_LD | BPF_W | BPF_LEN:
 		case BPF_LDX | BPF_W | BPF_LEN:
-			insn->code = BPF_LDX | BPF_MEM | BPF_W;
-			insn->a_reg = BPF_CLASS(fp->code) == BPF_LD ?
-				      BPF_REG_A : BPF_REG_X;
-			insn->x_reg = BPF_REG_CTX;
-			insn->off = offsetof(struct sk_buff, len);
+			*insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
+					    BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
+					    offsetof(struct sk_buff, len));
 			break;
 
-		/* access seccomp_data fields */
+		/* Access seccomp_data fields. */
 		case BPF_LDX | BPF_ABS | BPF_W:
 			/* A = *(u32 *) (ctx + K) */
 			*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
 			break;
 
+		/* Unkown instruction. */
 		default:
 			goto err;
 		}
@@ -1101,7 +1051,6 @@ do_pass:
 		if (new_prog)
 			memcpy(new_insn, tmp_insns,
 			       sizeof(*insn) * (insn - tmp_insns));
-
 		new_insn += insn - tmp_insns;
 	}
 
@@ -1116,7 +1065,6 @@ do_pass:
 		new_flen = new_insn - new_prog;
 		if (pass > 2)
 			goto err;
-
 		goto do_pass;
 	}
 
-- 
cgit 


From 96b2e73c5471542cb9c622c4360716684f8797ed Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 2 Jun 2014 00:18:48 -0700
Subject: Revert "net/mlx4_en: Use affinity hint"

This reverts commit 70a640d0dae3a9b1b222ce673eb5d92c263ddd61.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_cq.c     |  6 +-----
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 30 --------------------------
 drivers/net/ethernet/mellanox/mlx4/eq.c        | 13 +----------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 -
 include/linux/mlx4/device.h                    |  2 +-
 6 files changed, 4 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 58b1f239ac2b..199c7896f081 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1897,7 +1897,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 				 i, j, dev->pdev->bus->name);
 			/* Set IRQ for specific name (per ring) */
 			if (mlx4_assign_eq(dev, name, NULL,
-					   &ibdev->eq_table[eq], NULL)) {
+					   &ibdev->eq_table[eq])) {
 				/* Use legacy (same as mlx4_en driver) */
 				pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
 				ibdev->eq_table[eq] =
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index ea2cd72e5368..636963db598a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -118,15 +118,11 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	if (cq->is_tx == RX) {
 		if (mdev->dev->caps.comp_pool) {
 			if (!cq->vector) {
-				struct mlx4_en_rx_ring *ring =
-					priv->rx_ring[cq->ring];
-
 				sprintf(name, "%s-%d", priv->dev->name,
 					cq->ring);
 				/* Set IRQ for specific name (per ring) */
 				if (mlx4_assign_eq(mdev->dev, name, rmap,
-						   &cq->vector,
-						   ring->affinity_mask)) {
+						   &cq->vector)) {
 					cq->vector = (cq->ring + 1 + priv->port)
 					    % mdev->dev->caps.num_comp_vectors;
 					mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 05d135572abc..58209bd0c94c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1526,32 +1526,6 @@ static void mlx4_en_linkstate(struct work_struct *work)
 	mutex_unlock(&mdev->state_lock);
 }
 
-static void mlx4_en_init_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
-{
-	struct mlx4_en_rx_ring *ring = priv->rx_ring[ring_idx];
-	int numa_node = priv->mdev->dev->numa_node;
-
-	if (numa_node == -1)
-		return;
-
-	if (!zalloc_cpumask_var(&ring->affinity_mask, GFP_KERNEL)) {
-		en_err(priv, "Failed to allocate core mask\n");
-		return;
-	}
-
-	if (cpumask_set_cpu_local_first(ring_idx, numa_node,
-					ring->affinity_mask)) {
-		en_err(priv, "Failed setting affinity hint\n");
-		free_cpumask_var(ring->affinity_mask);
-		ring->affinity_mask = NULL;
-	}
-}
-
-static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
-{
-	free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask);
-	priv->rx_ring[ring_idx]->affinity_mask = NULL;
-}
 
 int mlx4_en_start_port(struct net_device *dev)
 {
@@ -1593,8 +1567,6 @@ int mlx4_en_start_port(struct net_device *dev)
 
 		mlx4_en_cq_init_lock(cq);
 
-		mlx4_en_init_affinity_hint(priv, i);
-
 		err = mlx4_en_activate_cq(priv, cq, i);
 		if (err) {
 			en_err(priv, "Failed activating Rx CQ\n");
@@ -1875,8 +1847,6 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 			msleep(1);
 		mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]);
 		mlx4_en_deactivate_cq(priv, cq);
-
-		mlx4_en_free_affinity_hint(priv, i);
 	}
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index f91659e5fa13..d954ec1eac17 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1376,7 +1376,7 @@ int mlx4_test_interrupts(struct mlx4_dev *dev)
 EXPORT_SYMBOL(mlx4_test_interrupts);
 
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector, cpumask_var_t cpu_hint_mask)
+		   int *vector)
 {
 
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1411,15 +1411,6 @@ int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
 			}
 			mlx4_assign_irq_notifier(priv, dev,
 						 priv->eq_table.eq[vec].irq);
-			if (cpu_hint_mask) {
-				err = irq_set_affinity_hint(
-						priv->eq_table.eq[vec].irq,
-						cpu_hint_mask);
-				if (err) {
-					mlx4_warn(dev, "Failed setting affinity hint\n");
-					/*we dont want to break here*/
-				}
-			}
 
 			eq_set_ci(&priv->eq_table.eq[vec], 1);
 		}
@@ -1450,8 +1441,6 @@ void mlx4_release_eq(struct mlx4_dev *dev, int vec)
 			irq_set_affinity_notifier(
 				priv->eq_table.eq[vec].irq,
 				NULL);
-			irq_set_affinity_hint(priv->eq_table.eq[vec].irq,
-					      NULL);
 			free_irq(priv->eq_table.eq[vec].irq,
 				 &priv->eq_table.eq[vec]);
 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 0e15295bedd6..b5db1bf361dc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -313,7 +313,6 @@ struct mlx4_en_rx_ring {
 	unsigned long csum_ok;
 	unsigned long csum_none;
 	int hwtstamp_rx_filter;
-	cpumask_var_t affinity_mask;
 };
 
 struct mlx4_en_cq {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b9b70e00e3c1..ca38871a585c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1161,7 +1161,7 @@ int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr);
 int mlx4_SYNC_TPT(struct mlx4_dev *dev);
 int mlx4_test_interrupts(struct mlx4_dev *dev);
 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap,
-		   int *vector, cpumask_t *cpu_hint_mask);
+		   int *vector);
 void mlx4_release_eq(struct mlx4_dev *dev, int vec);
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev);
-- 
cgit 


From 8c3a05b489ef097f86bf87c64192456553f57781 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 20 May 2014 06:45:54 +0200
Subject: mmc: mmci: Enforce DMA configuration through DT

Remove the option to provide DMA configuration as platform data,
enforce it through DT.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Roland Stigge <stigge@antcom.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-lpc32xx/phy3250.c |  3 ---
 drivers/mmc/host/mmci.c         | 24 +-----------------------
 include/linux/amba/mmci.h       | 17 -----------------
 3 files changed, 1 insertion(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 34932e0e31fa..7858d5b6f6ce 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -202,9 +202,6 @@ static struct mmci_platform_data lpc32xx_mmci_data = {
 	.ocr_mask	= MMC_VDD_30_31 | MMC_VDD_31_32 |
 			  MMC_VDD_32_33 | MMC_VDD_33_34,
 	.ios_handler	= mmc_handle_ios,
-	.dma_filter	= NULL,
-	/* No DMA for now since AMBA PL080 dmaengine driver only does scatter
-	 * gather, and the MMCI driver doesn't do it this way */
 };
 
 static struct lpc32xx_slc_platform_data lpc32xx_slc_data = {
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 758efea184c9..a084edd37af5 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -366,7 +366,6 @@ static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
 #ifdef CONFIG_DMA_ENGINE
 static void mmci_dma_setup(struct mmci_host *host)
 {
-	struct mmci_platform_data *plat = host->plat;
 	const char *rxname, *txname;
 	dma_cap_mask_t mask;
 
@@ -380,25 +379,6 @@ static void mmci_dma_setup(struct mmci_host *host)
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	if (plat && plat->dma_filter) {
-		if (!host->dma_rx_channel && plat->dma_rx_param) {
-			host->dma_rx_channel = dma_request_channel(mask,
-							   plat->dma_filter,
-							   plat->dma_rx_param);
-			/* E.g if no DMA hardware is present */
-			if (!host->dma_rx_channel)
-				dev_err(mmc_dev(host->mmc), "no RX DMA channel\n");
-		}
-
-		if (!host->dma_tx_channel && plat->dma_tx_param) {
-			host->dma_tx_channel = dma_request_channel(mask,
-							   plat->dma_filter,
-							   plat->dma_tx_param);
-			if (!host->dma_tx_channel)
-				dev_warn(mmc_dev(host->mmc), "no TX DMA channel\n");
-		}
-	}
-
 	/*
 	 * If only an RX channel is specified, the driver will
 	 * attempt to use it bidirectionally, however if it is
@@ -446,11 +426,9 @@ static void mmci_dma_setup(struct mmci_host *host)
  */
 static inline void mmci_dma_release(struct mmci_host *host)
 {
-	struct mmci_platform_data *plat = host->plat;
-
 	if (host->dma_rx_channel)
 		dma_release_channel(host->dma_rx_channel);
-	if (host->dma_tx_channel && plat->dma_tx_param)
+	if (host->dma_tx_channel)
 		dma_release_channel(host->dma_tx_channel);
 	host->dma_rx_channel = host->dma_tx_channel = NULL;
 }
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 3f95d32d5277..8c98113069ce 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,9 +6,6 @@
 
 #include <linux/mmc/host.h>
 
-/* Just some dummy forwarding */
-struct dma_chan;
-
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
  * (also known as PL180) block.
@@ -26,17 +23,6 @@ struct dma_chan;
  * @gpio_wp: read this GPIO pin to see if the card is write protected
  * @gpio_cd: read this GPIO pin to detect card insertion
  * @cd_invert: true if the gpio_cd pin value is active low
- * @dma_filter: function used to select an appropriate RX and TX
- * DMA channel to be used for DMA, if and only if you're deploying the
- * generic DMA engine
- * @dma_rx_param: parameter passed to the DMA allocation
- * filter in order to select an appropriate RX channel. If
- * there is a bidirectional RX+TX channel, then just specify
- * this and leave dma_tx_param set to NULL
- * @dma_tx_param: parameter passed to the DMA allocation
- * filter in order to select an appropriate TX channel. If this
- * is NULL the driver will attempt to use the RX channel as a
- * bidirectional channel
  */
 struct mmci_platform_data {
 	unsigned int ocr_mask;
@@ -45,9 +31,6 @@ struct mmci_platform_data {
 	int	gpio_wp;
 	int	gpio_cd;
 	bool	cd_invert;
-	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
-	void *dma_rx_param;
-	void *dma_tx_param;
 };
 
 #endif
-- 
cgit 


From 9c5de2c1754c2bb3c69c4d7bf0d0edc0a61d8232 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 2 Jun 2014 15:38:05 +0200
Subject: spi: rspi: Remove unused 16-bit DMA support

The 16-bit DMA support doesn't fit well within the SPI core DMA framework,
as it needs to manage its own double-sized temporary buffers, for handling
the interleaved data.
Remove it, as there is no in-tree board code that sets
rspi_plat_data.dma_width_16bit.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/spi/spi-rspi.c   | 84 ++++--------------------------------------------
 include/linux/spi/rspi.h |  2 --
 2 files changed, 6 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index 57beda209599..3bd06fd9af47 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -201,7 +201,6 @@ struct rspi_data {
 	struct dma_chan *chan_tx;
 	struct dma_chan *chan_rx;
 
-	unsigned dma_width_16bit:1;
 	unsigned dma_callbacked:1;
 	unsigned byte_access:1;
 };
@@ -475,60 +474,17 @@ static void rspi_dma_unmap_sg(struct scatterlist *sg, struct dma_chan *chan,
 	dma_unmap_sg(chan->device->dev, sg, 1, dir);
 }
 
-static void rspi_memory_to_8bit(void *buf, const void *data, unsigned len)
-{
-	u16 *dst = buf;
-	const u8 *src = data;
-
-	while (len) {
-		*dst++ = (u16)(*src++);
-		len--;
-	}
-}
-
-static void rspi_memory_from_8bit(void *buf, const void *data, unsigned len)
-{
-	u8 *dst = buf;
-	const u16 *src = data;
-
-	while (len) {
-		*dst++ = (u8)*src++;
-		len--;
-	}
-}
-
 static int rspi_send_dma(struct rspi_data *rspi, struct spi_transfer *t)
 {
 	struct scatterlist sg;
-	const void *buf = NULL;
+	const void *buf = t->tx_buf;
 	struct dma_async_tx_descriptor *desc;
-	unsigned int len;
+	unsigned int len = t->len;
 	int ret = 0;
 
-	if (rspi->dma_width_16bit) {
-		void *tmp;
-		/*
-		 * If DMAC bus width is 16-bit, the driver allocates a dummy
-		 * buffer. And, the driver converts original data into the
-		 * DMAC data as the following format:
-		 *  original data: 1st byte, 2nd byte ...
-		 *  DMAC data:     1st byte, dummy, 2nd byte, dummy ...
-		 */
-		len = t->len * 2;
-		tmp = kmalloc(len, GFP_KERNEL);
-		if (!tmp)
-			return -ENOMEM;
-		rspi_memory_to_8bit(tmp, t->tx_buf, t->len);
-		buf = tmp;
-	} else {
-		len = t->len;
-		buf = t->tx_buf;
-	}
+	if (!rspi_dma_map_sg(&sg, buf, len, rspi->chan_tx, DMA_TO_DEVICE))
+		return -EFAULT;
 
-	if (!rspi_dma_map_sg(&sg, buf, len, rspi->chan_tx, DMA_TO_DEVICE)) {
-		ret = -EFAULT;
-		goto end_nomap;
-	}
 	desc = dmaengine_prep_slave_sg(rspi->chan_tx, &sg, 1, DMA_TO_DEVICE,
 				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!desc) {
@@ -563,10 +519,6 @@ static int rspi_send_dma(struct rspi_data *rspi, struct spi_transfer *t)
 
 end:
 	rspi_dma_unmap_sg(&sg, rspi->chan_tx, DMA_TO_DEVICE);
-end_nomap:
-	if (rspi->dma_width_16bit)
-		kfree(buf);
-
 	return ret;
 }
 
@@ -603,28 +555,11 @@ static void qspi_receive_init(const struct rspi_data *rspi)
 static int rspi_receive_dma(struct rspi_data *rspi, struct spi_transfer *t)
 {
 	struct scatterlist sg, sg_dummy;
-	void *dummy = NULL, *rx_buf = NULL;
+	void *dummy = NULL, *rx_buf = t->rx_buf;
 	struct dma_async_tx_descriptor *desc, *desc_dummy;
-	unsigned int len;
+	unsigned int len = t->len;
 	int ret = 0;
 
-	if (rspi->dma_width_16bit) {
-		/*
-		 * If DMAC bus width is 16-bit, the driver allocates a dummy
-		 * buffer. And, finally the driver converts the DMAC data into
-		 * actual data as the following format:
-		 *  DMAC data:   1st byte, dummy, 2nd byte, dummy ...
-		 *  actual data: 1st byte, 2nd byte ...
-		 */
-		len = t->len * 2;
-		rx_buf = kmalloc(len, GFP_KERNEL);
-		if (!rx_buf)
-			return -ENOMEM;
-	 } else {
-		len = t->len;
-		rx_buf = t->rx_buf;
-	}
-
 	/* prepare dummy transfer to generate SPI clocks */
 	dummy = kzalloc(len, GFP_KERNEL);
 	if (!dummy) {
@@ -697,11 +632,6 @@ end:
 end_dummy_mapped:
 	rspi_dma_unmap_sg(&sg_dummy, rspi->chan_tx, DMA_TO_DEVICE);
 end_nomap:
-	if (rspi->dma_width_16bit) {
-		if (!ret)
-			rspi_memory_from_8bit(t->rx_buf, rx_buf, t->len);
-		kfree(rx_buf);
-	}
 	kfree(dummy);
 
 	return ret;
@@ -1073,8 +1003,6 @@ static int rspi_request_dma(struct rspi_data *rspi,
 	if (!res || !rspi_pd)
 		return 0;	/* The driver assumes no error. */
 
-	rspi->dma_width_16bit = rspi_pd->dma_width_16bit;
-
 	/* If the module receives data by DMAC, it also needs TX DMAC */
 	if (rspi_pd->dma_rx_id && rspi_pd->dma_tx_id) {
 		dma_cap_zero(mask);
diff --git a/include/linux/spi/rspi.h b/include/linux/spi/rspi.h
index a25bd6f65e7f..e546b2ceb623 100644
--- a/include/linux/spi/rspi.h
+++ b/include/linux/spi/rspi.h
@@ -25,8 +25,6 @@ struct rspi_plat_data {
 	unsigned int dma_tx_id;
 	unsigned int dma_rx_id;
 
-	unsigned dma_width_16bit:1;	/* DMAC read/write width = 16-bit */
-
 	u16 num_chipselect;
 };
 
-- 
cgit 


From 670e5b8eaf85704742bc3cb1df51fdd3ce08fc15 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 28 May 2014 18:44:46 -0700
Subject: net: Add support for device specific address syncing

This change provides a function to be used in order to break the
ndo_set_rx_mode call into a set of address add and remove calls.  The code
is based on the implementation of dev_uc_sync/dev_mc_sync.  Since they
essentially do the same thing but with only one dev I simply named my
functions __dev_uc_sync/__dev_mc_sync.

I also implemented an unsync version of the functions as well to allow for
cleanup on close.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 73 ++++++++++++++++++++++++++++++++++++++++
 net/core/dev_addr_lists.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2db1610bf109..774e5391eb8e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3003,6 +3003,15 @@ int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 		   struct netdev_hw_addr_list *from_list, int addr_len);
 void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 		      struct netdev_hw_addr_list *from_list, int addr_len);
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+		       struct net_device *dev,
+		       int (*sync)(struct net_device *, const unsigned char *),
+		       int (*unsync)(struct net_device *,
+				     const unsigned char *));
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+			  struct net_device *dev,
+			  int (*unsync)(struct net_device *,
+					const unsigned char *));
 void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
@@ -3023,6 +3032,38 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from);
 void dev_uc_flush(struct net_device *dev);
 void dev_uc_init(struct net_device *dev);
 
+/**
+ *  __dev_uc_sync - Synchonize device's unicast list
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  Add newly added addresses to the interface, and release
+ *  addresses that have been deleted.
+ **/
+static inline int __dev_uc_sync(struct net_device *dev,
+				int (*sync)(struct net_device *,
+					    const unsigned char *),
+				int (*unsync)(struct net_device *,
+					      const unsigned char *))
+{
+	return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync);
+}
+
+/**
+ *  __dev_uc_unsync - Remove synchonized addresses from device
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by dev_uc_sync().
+ **/
+static inline void __dev_uc_unsync(struct net_device *dev,
+				   int (*unsync)(struct net_device *,
+						 const unsigned char *))
+{
+	__hw_addr_unsync_dev(&dev->uc, dev, unsync);
+}
+
 /* Functions used for multicast addresses handling */
 int dev_mc_add(struct net_device *dev, const unsigned char *addr);
 int dev_mc_add_global(struct net_device *dev, const unsigned char *addr);
@@ -3035,6 +3076,38 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from);
 void dev_mc_flush(struct net_device *dev);
 void dev_mc_init(struct net_device *dev);
 
+/**
+ *  __dev_mc_sync - Synchonize device's multicast list
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  Add newly added addresses to the interface, and release
+ *  addresses that have been deleted.
+ **/
+static inline int __dev_mc_sync(struct net_device *dev,
+				int (*sync)(struct net_device *,
+					    const unsigned char *),
+				int (*unsync)(struct net_device *,
+					      const unsigned char *))
+{
+	return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync);
+}
+
+/**
+ *  __dev_mc_unsync - Remove synchonized addresses from device
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by dev_mc_sync().
+ **/
+static inline void __dev_mc_unsync(struct net_device *dev,
+				   int (*unsync)(struct net_device *,
+						 const unsigned char *))
+{
+	__hw_addr_unsync_dev(&dev->mc, dev, unsync);
+}
+
 /* Functions used for secondary unicast and multicast support */
 void dev_set_rx_mode(struct net_device *dev);
 void __dev_set_rx_mode(struct net_device *dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 329d5794e7dc..b6b230600b97 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -225,6 +225,91 @@ void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 }
 EXPORT_SYMBOL(__hw_addr_unsync);
 
+/**
+ *  __hw_addr_sync_dev - Synchonize device's multicast list
+ *  @list: address list to syncronize
+ *  @dev:  device to sync
+ *  @sync: function to call if address should be added
+ *  @unsync: function to call if address should be removed
+ *
+ *  This funciton is intended to be called from the ndo_set_rx_mode
+ *  function of devices that require explicit address add/remove
+ *  notifications.  The unsync function may be NULL in which case
+ *  the addresses requiring removal will simply be removed without
+ *  any notification to the device.
+ **/
+int __hw_addr_sync_dev(struct netdev_hw_addr_list *list,
+		       struct net_device *dev,
+		       int (*sync)(struct net_device *, const unsigned char *),
+		       int (*unsync)(struct net_device *,
+				     const unsigned char *))
+{
+	struct netdev_hw_addr *ha, *tmp;
+	int err;
+
+	/* first go through and flush out any stale entries */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt || ha->refcount != 1)
+			continue;
+
+		/* if unsync is defined and fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr))
+			continue;
+
+		ha->sync_cnt--;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+
+	/* go through and sync new entries to the list */
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (ha->sync_cnt)
+			continue;
+
+		err = sync(dev, ha->addr);
+		if (err)
+			return err;
+
+		ha->sync_cnt++;
+		ha->refcount++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(__hw_addr_sync_dev);
+
+/**
+ *  __hw_addr_unsync_dev - Remove synchonized addresses from device
+ *  @list: address list to remove syncronized addresses from
+ *  @dev:  device to sync
+ *  @unsync: function to call if address should be removed
+ *
+ *  Remove all addresses that were added to the device by __hw_addr_sync_dev().
+ *  This function is intended to be called from the ndo_stop or ndo_open
+ *  functions on devices that require explicit address add/remove
+ *  notifications.  If the unsync function pointer is NULL then this function
+ *  can be used to just reset the sync_cnt for the addresses in the list.
+ **/
+void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list,
+			  struct net_device *dev,
+			  int (*unsync)(struct net_device *,
+					const unsigned char *))
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		if (!ha->sync_cnt)
+			continue;
+
+		/* if unsync is defined and fails defer unsyncing address */
+		if (unsync && unsync(dev, ha->addr))
+			continue;
+
+		ha->sync_cnt--;
+		__hw_addr_del_entry(list, ha, false, false);
+	}
+}
+EXPORT_SYMBOL(__hw_addr_unsync_dev);
+
 static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
-- 
cgit 


From 1b49dcf3d7c765ad18ca7167a0e441824eb1f7af Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Sun, 16 Mar 2014 14:51:35 -0700
Subject: virtio-scsi.h: Add virtio_scsi_cmd_req_pi + VIRTIO_SCSI_F_T10_PI bits

This patch adds a virtio_scsi_cmd_req_pi header as recommened by
Paolo that contains pi_bytesout + pi_bytesin elements used for
signaling when protection information buffers (in bytes) are
expected to preceed the data payload buffers.

Also add new VIRTIO_SCSI_F_T10_PI feature bit to be used to signal
host support.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Sagi Grimberg <sagig@dev.mellanox.co.il>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/virtio_scsi.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index 4195b97a3def..de429d1f4357 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h
@@ -35,11 +35,23 @@ struct virtio_scsi_cmd_req {
 	u8 lun[8];		/* Logical Unit Number */
 	u64 tag;		/* Command identifier */
 	u8 task_attr;		/* Task attribute */
-	u8 prio;
+	u8 prio;		/* SAM command priority field */
 	u8 crn;
 	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
 } __packed;
 
+/* SCSI command request, followed by protection information */
+struct virtio_scsi_cmd_req_pi {
+	u8 lun[8];		/* Logical Unit Number */
+	u64 tag;		/* Command identifier */
+	u8 task_attr;		/* Task attribute */
+	u8 prio;		/* SAM command priority field */
+	u8 crn;
+	u32 pi_bytesout;	/* DataOUT PI Number of bytes */
+	u32 pi_bytesin;		/* DataIN PI Number of bytes */
+	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __packed;
+
 /* Response, followed by sense data and data-in */
 struct virtio_scsi_cmd_resp {
 	u32 sense_len;		/* Sense data length */
@@ -97,6 +109,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_INOUT                    0
 #define VIRTIO_SCSI_F_HOTPLUG                  1
 #define VIRTIO_SCSI_F_CHANGE                   2
+#define VIRTIO_SCSI_F_T10_PI                   3
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK                       0
-- 
cgit 


From 9d0d68faea6962d62dd501cd6e71ce5cc8ed262b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 29 May 2014 20:46:17 +0200
Subject: team: fix mtu setting

Now it is not possible to set mtu to team device which has a port
enslaved to it. The reason is that when team_change_mtu() calls
dev_set_mtu() for port device, notificator for NETDEV_PRECHANGEMTU
event is called and team_device_event() returns NOTIFY_BAD forbidding
the change. So fix this by returning NOTIFY_DONE here in case team is
changing mtu in team_change_mtu().

Introduced-by: 3d249d4c "net: introduce ethernet teaming device"
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 7 ++++++-
 include/linux/if_team.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 767fe61b5ac9..ce4989be86d9 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1724,6 +1724,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	 * to traverse list in reverse under rcu_read_lock
 	 */
 	mutex_lock(&team->lock);
+	team->port_mtu_change_allowed = true;
 	list_for_each_entry(port, &team->port_list, list) {
 		err = dev_set_mtu(port->dev, new_mtu);
 		if (err) {
@@ -1732,6 +1733,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 			goto unwind;
 		}
 	}
+	team->port_mtu_change_allowed = false;
 	mutex_unlock(&team->lock);
 
 	dev->mtu = new_mtu;
@@ -1741,6 +1743,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 unwind:
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		dev_set_mtu(port->dev, dev->mtu);
+	team->port_mtu_change_allowed = false;
 	mutex_unlock(&team->lock);
 
 	return err;
@@ -2851,7 +2854,9 @@ static int team_device_event(struct notifier_block *unused,
 		break;
 	case NETDEV_PRECHANGEMTU:
 		/* Forbid to change mtu of underlaying device */
-		return NOTIFY_BAD;
+		if (!port->team->port_mtu_change_allowed)
+			return NOTIFY_BAD;
+		break;
 	case NETDEV_PRE_TYPE_CHANGE:
 		/* Forbid to change type of underlaying device */
 		return NOTIFY_BAD;
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index a899dc24be15..a6aa970758a2 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -194,6 +194,7 @@ struct team {
 	bool user_carrier_enabled;
 	bool queue_override_enabled;
 	struct list_head *qom_lists; /* array of queue override mapping lists */
+	bool port_mtu_change_allowed;
 	struct {
 		unsigned int count;
 		unsigned int interval; /* in ms */
-- 
cgit 


From 40f2287bd583f4df4c602c1a29a48df2730fb6d4 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Sun, 11 May 2014 15:15:12 +0300
Subject: IB/mlx4: Implement IB_QP_CREATE_USE_GFP_NOIO

Modify the various routines used to allocate memory resources which
serve QPs in mlx4 to get an input GFP directive.  Have the Ethernet
driver to use GFP_KERNEL in it's QP allocations as done prior to this
commit, and the IB driver to use GFP_NOIO when the IB verbs
IB_QP_CREATE_USE_GFP_NOIO QP creation flag is provided.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx4/cq.c                    |  6 ++---
 drivers/infiniband/hw/mlx4/mlx4_ib.h               |  1 +
 drivers/infiniband/hw/mlx4/qp.c                    | 30 ++++++++++++----------
 drivers/infiniband/hw/mlx4/srq.c                   |  7 ++---
 drivers/net/ethernet/mellanox/mlx4/alloc.c         | 27 +++++++++----------
 drivers/net/ethernet/mellanox/mlx4/cq.c            |  4 +--
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |  6 ++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/icm.c           |  7 ++---
 drivers/net/ethernet/mellanox/mlx4/icm.h           |  3 ++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  4 +--
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 17 ++++++------
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 20 +++++++--------
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  4 +--
 drivers/net/ethernet/mellanox/mlx4/srq.c           |  4 +--
 include/linux/mlx4/device.h                        | 10 +++++---
 16 files changed, 82 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 5f640814cc81..1066eec854a9 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -102,7 +102,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	int err;
 
 	err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size,
-			     PAGE_SIZE * 2, &buf->buf);
+			     PAGE_SIZE * 2, &buf->buf, GFP_KERNEL);
 
 	if (err)
 		goto out;
@@ -113,7 +113,7 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf);
+	err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf, GFP_KERNEL);
 	if (err)
 		goto err_mtt;
 
@@ -209,7 +209,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 
 		uar = &to_mucontext(context)->uar;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &cq->db, 1);
+		err = mlx4_db_alloc(dev->dev, &cq->db, 1, GFP_KERNEL);
 		if (err)
 			goto err_cq;
 
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index f589522fddfd..bb8c9dd442ae 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -156,6 +156,7 @@ enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
 	MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
+	MLX4_IB_QP_CREATE_USE_GFP_NOIO = IB_QP_CREATE_USE_GFP_NOIO,
 	MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
 	MLX4_IB_SRIOV_SQP = 1 << 31,
 };
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 41308af4163c..8710baf60bb9 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -610,7 +610,8 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
 
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp)
+			    struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
+			    gfp_t gfp)
 {
 	int qpn;
 	int err;
@@ -748,14 +749,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err;
 
 		if (qp_has_rq(init_attr)) {
-			err = mlx4_db_alloc(dev->dev, &qp->db, 0);
+			err = mlx4_db_alloc(dev->dev, &qp->db, 0, gfp);
 			if (err)
 				goto err;
 
 			*qp->db.db = 0;
 		}
 
-		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf)) {
+		if (mlx4_buf_alloc(dev->dev, qp->buf_size, PAGE_SIZE * 2, &qp->buf, gfp)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -765,13 +766,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf);
+		err = mlx4_buf_write_mtt(dev->dev, &qp->mtt, &qp->buf, gfp);
 		if (err)
 			goto err_mtt;
 
-		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), GFP_KERNEL);
-
+		qp->sq.wrid  = kmalloc(qp->sq.wqe_cnt * sizeof (u64), gfp);
+		qp->rq.wrid  = kmalloc(qp->rq.wqe_cnt * sizeof (u64), gfp);
 		if (!qp->sq.wrid || !qp->rq.wrid) {
 			err = -ENOMEM;
 			goto err_wrid;
@@ -801,7 +801,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			goto err_proxy;
 	}
 
-	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+	err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
 	if (err)
 		goto err_qpn;
 
@@ -1040,7 +1040,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	struct mlx4_ib_qp *qp = NULL;
 	int err;
 	u16 xrcdn = 0;
+	gfp_t gfp;
 
+	gfp = (init_attr->create_flags & MLX4_IB_QP_CREATE_USE_GFP_NOIO) ?
+		GFP_NOIO : GFP_KERNEL;
 	/*
 	 * We only support LSO, vendor flag1, and multicast loopback blocking,
 	 * and only for kernel UD QPs.
@@ -1049,7 +1052,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
 					MLX4_IB_SRIOV_TUNNEL_QP |
 					MLX4_IB_SRIOV_SQP |
-					MLX4_IB_QP_NETIF))
+					MLX4_IB_QP_NETIF |
+					MLX4_IB_QP_CREATE_USE_GFP_NOIO))
 		return ERR_PTR(-EINVAL);
 
 	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
@@ -1059,7 +1063,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
 	if (init_attr->create_flags &&
 	    (udata ||
-	     ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
+	     ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
 	      init_attr->qp_type != IB_QPT_UD) ||
 	     ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
 	      init_attr->qp_type > IB_QPT_GSI)))
@@ -1079,7 +1083,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_RAW_PACKET:
-		qp = kzalloc(sizeof *qp, GFP_KERNEL);
+		qp = kzalloc(sizeof *qp, gfp);
 		if (!qp)
 			return ERR_PTR(-ENOMEM);
 		qp->pri.vid = 0xFFFF;
@@ -1088,7 +1092,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_UD:
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
-				       udata, 0, &qp);
+				       udata, 0, &qp, gfp);
 		if (err)
 			return ERR_PTR(err);
 
@@ -1106,7 +1110,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
 				       get_sqp_num(to_mdev(pd->device), init_attr),
-				       &qp);
+				       &qp, gfp);
 		if (err)
 			return ERR_PTR(err);
 
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb025fc7..62d9285300af 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -134,13 +134,14 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_mtt;
 	} else {
-		err = mlx4_db_alloc(dev->dev, &srq->db, 0);
+		err = mlx4_db_alloc(dev->dev, &srq->db, 0, GFP_KERNEL);
 		if (err)
 			goto err_srq;
 
 		*srq->db.db = 0;
 
-		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf)) {
+		if (mlx4_buf_alloc(dev->dev, buf_size, PAGE_SIZE * 2, &srq->buf,
+				   GFP_KERNEL)) {
 			err = -ENOMEM;
 			goto err_db;
 		}
@@ -165,7 +166,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 		if (err)
 			goto err_buf;
 
-		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf);
+		err = mlx4_buf_write_mtt(dev->dev, &srq->mtt, &srq->buf, GFP_KERNEL);
 		if (err)
 			goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index c3ad464d0627..b0297da50304 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -171,7 +171,7 @@ void mlx4_bitmap_cleanup(struct mlx4_bitmap *bitmap)
  */
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf)
+		   struct mlx4_buf *buf, gfp_t gfp)
 {
 	dma_addr_t t;
 
@@ -180,7 +180,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->npages       = 1;
 		buf->page_shift   = get_order(size) + PAGE_SHIFT;
 		buf->direct.buf   = dma_alloc_coherent(&dev->pdev->dev,
-						       size, &t, GFP_KERNEL);
+						       size, &t, gfp);
 		if (!buf->direct.buf)
 			return -ENOMEM;
 
@@ -200,14 +200,14 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->npages      = buf->nbufs;
 		buf->page_shift  = PAGE_SHIFT;
 		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
-					   GFP_KERNEL);
+					   gfp);
 		if (!buf->page_list)
 			return -ENOMEM;
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
 				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
-						   &t, GFP_KERNEL);
+						   &t, gfp);
 			if (!buf->page_list[i].buf)
 				goto err_free;
 
@@ -218,7 +218,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
 		if (BITS_PER_LONG == 64) {
 			struct page **pages;
-			pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL);
+			pages = kmalloc(sizeof *pages * buf->nbufs, gfp);
 			if (!pages)
 				goto err_free;
 			for (i = 0; i < buf->nbufs; ++i)
@@ -260,11 +260,12 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx4_buf_free);
 
-static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
+static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device,
+						 gfp_t gfp)
 {
 	struct mlx4_db_pgdir *pgdir;
 
-	pgdir = kzalloc(sizeof *pgdir, GFP_KERNEL);
+	pgdir = kzalloc(sizeof *pgdir, gfp);
 	if (!pgdir)
 		return NULL;
 
@@ -272,7 +273,7 @@ static struct mlx4_db_pgdir *mlx4_alloc_db_pgdir(struct device *dma_device)
 	pgdir->bits[0] = pgdir->order0;
 	pgdir->bits[1] = pgdir->order1;
 	pgdir->db_page = dma_alloc_coherent(dma_device, PAGE_SIZE,
-					    &pgdir->db_dma, GFP_KERNEL);
+					    &pgdir->db_dma, gfp);
 	if (!pgdir->db_page) {
 		kfree(pgdir);
 		return NULL;
@@ -312,7 +313,7 @@ found:
 	return 0;
 }
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_db_pgdir *pgdir;
@@ -324,7 +325,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order)
 		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
 			goto out;
 
-	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev));
+	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
 	if (!pgdir) {
 		ret = -ENOMEM;
 		goto out;
@@ -376,13 +377,13 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 {
 	int err;
 
-	err = mlx4_db_alloc(dev, &wqres->db, 1);
+	err = mlx4_db_alloc(dev, &wqres->db, 1, GFP_KERNEL);
 	if (err)
 		return err;
 
 	*wqres->db.db = 0;
 
-	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf);
+	err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_db;
 
@@ -391,7 +392,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
 	if (err)
 		goto err_buf;
 
-	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf);
+	err = mlx4_buf_write_mtt(dev, &wqres->mtt, &wqres->buf, GFP_KERNEL);
 	if (err)
 		goto err_mtt;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 0487121e4a0f..c90cde5b4aee 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -173,11 +173,11 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
 	if (*cqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &cq_table->table, *cqn);
+	err = mlx4_table_get(dev, &cq_table->table, *cqn, GFP_KERNEL);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn);
+	err = mlx4_table_get(dev, &cq_table->cmpt_table, *cqn, GFP_KERNEL);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ba049ae88749..87857a6463eb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -972,7 +972,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
 	if (!context)
 		return -ENOMEM;
 
-	err = mlx4_qp_alloc(mdev->dev, qpn, qp);
+	err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed to allocate qp #%x\n", qpn);
 		goto out;
@@ -1012,7 +1012,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
 	}
-	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+	err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating drop qp\n");
 		mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
@@ -1071,7 +1071,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	}
 
 	/* Configure RSS indirection qp */
-	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp);
+	err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed to allocate RSS indirection QP\n");
 		goto rss_err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index dd1f6d346459..bc0cc1eb214d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -113,7 +113,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	       ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map);
 
 	ring->qpn = qpn;
-	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp);
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 		goto err_map;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 5fbf4924c272..eb1747e1937d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -245,7 +245,8 @@ int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+		   int gfp)
 {
 	u32 i = (obj & (table->num_obj - 1)) /
 			(MLX4_TABLE_CHUNK_SIZE / table->obj_size);
@@ -259,7 +260,7 @@ int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj)
 	}
 
 	table->icm[i] = mlx4_alloc_icm(dev, MLX4_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
-				       (table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
+				       (table->lowmem ? gfp : GFP_HIGHUSER) |
 				       __GFP_NOWARN, table->coherent);
 	if (!table->icm[i]) {
 		ret = -ENOMEM;
@@ -356,7 +357,7 @@ int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 	u32 i;
 
 	for (i = start; i <= end; i += inc) {
-		err = mlx4_table_get(dev, table, i);
+		err = mlx4_table_get(dev, table, i, GFP_KERNEL);
 		if (err)
 			goto fail;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h
index dee67fa39107..067e6e0af36c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.h
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.h
@@ -71,7 +71,8 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 				gfp_t gfp_mask, int coherent);
 void mlx4_free_icm(struct mlx4_dev *dev, struct mlx4_icm *icm, int coherent);
 
-int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
+int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj,
+		   int gfp);
 void mlx4_table_put(struct mlx4_dev *dev, struct mlx4_icm_table *table, u32 obj);
 int mlx4_table_get_range(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			 u32 start, u32 end);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f9c465101963..627a54ef2955 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -888,7 +888,7 @@ void mlx4_cleanup_cq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn);
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp);
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn);
 int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn);
 void __mlx4_cq_free_icm(struct mlx4_dev *dev, int cqn);
@@ -896,7 +896,7 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn);
 void __mlx4_srq_free_icm(struct mlx4_dev *dev, int srqn);
 int __mlx4_mpt_reserve(struct mlx4_dev *dev);
 void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index);
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index);
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp);
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index);
 u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order);
 void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 24835853b753..4c71dafad217 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -364,14 +364,14 @@ static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
 	__mlx4_mpt_release(dev, index);
 }
 
-int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	return mlx4_table_get(dev, &mr_table->dmpt_table, index);
+	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
 }
 
-static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
+static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
 {
 	u64 param = 0;
 
@@ -382,7 +382,7 @@ static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index)
 							MLX4_CMD_TIME_CLASS_A,
 							MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_mpt_alloc_icm(dev, index);
+	return __mlx4_mpt_alloc_icm(dev, index, gfp);
 }
 
 void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
@@ -469,7 +469,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key));
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
 	if (err)
 		return err;
 
@@ -627,13 +627,14 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 EXPORT_SYMBOL_GPL(mlx4_write_mtt);
 
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf)
+		       struct mlx4_buf *buf, gfp_t gfp)
 {
 	u64 *page_list;
 	int err;
 	int i;
 
-	page_list = kmalloc(buf->npages * sizeof *page_list, GFP_KERNEL);
+	page_list = kmalloc(buf->npages * sizeof *page_list,
+			    gfp);
 	if (!page_list)
 		return -ENOMEM;
 
@@ -680,7 +681,7 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
 	struct mlx4_mpt_entry *mpt_entry;
 	int err;
 
-	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key));
+	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 61d64ebffd56..917f0d0ba7c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -272,29 +272,29 @@ void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_release_range);
 
-int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+int __mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 	int err;
 
-	err = mlx4_table_get(dev, &qp_table->qp_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->qp_table, qpn, gfp);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->auxc_table, qpn, gfp);
 	if (err)
 		goto err_put_qp;
 
-	err = mlx4_table_get(dev, &qp_table->altc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->altc_table, qpn, gfp);
 	if (err)
 		goto err_put_auxc;
 
-	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->rdmarc_table, qpn, gfp);
 	if (err)
 		goto err_put_altc;
 
-	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn);
+	err = mlx4_table_get(dev, &qp_table->cmpt_table, qpn, gfp);
 	if (err)
 		goto err_put_rdmarc;
 
@@ -316,7 +316,7 @@ err_out:
 	return err;
 }
 
-static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
+static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn, int gfp)
 {
 	u64 param = 0;
 
@@ -326,7 +326,7 @@ static int mlx4_qp_alloc_icm(struct mlx4_dev *dev, int qpn)
 				    MLX4_CMD_ALLOC_RES, MLX4_CMD_TIME_CLASS_A,
 				    MLX4_CMD_WRAPPED);
 	}
-	return __mlx4_qp_alloc_icm(dev, qpn);
+	return __mlx4_qp_alloc_icm(dev, qpn, gfp);
 }
 
 void __mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
@@ -355,7 +355,7 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn)
 		__mlx4_qp_free_icm(dev, qpn);
 }
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
@@ -366,7 +366,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
 
 	qp->qpn = qpn;
 
-	err = mlx4_qp_alloc_icm(dev, qpn);
+	err = mlx4_qp_alloc_icm(dev, qpn, gfp);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 1c3fdd4a1f7d..45da913b5679 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1532,7 +1532,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 			return err;
 
 		if (!fw_reserved(dev, qpn)) {
-			err = __mlx4_qp_alloc_icm(dev, qpn);
+			err = __mlx4_qp_alloc_icm(dev, qpn, GFP_KERNEL);
 			if (err) {
 				res_abort_move(dev, slave, RES_QP, qpn);
 				return err;
@@ -1619,7 +1619,7 @@ static int mpt_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 		if (err)
 			return err;
 
-		err = __mlx4_mpt_alloc_icm(dev, mpt->key);
+		err = __mlx4_mpt_alloc_icm(dev, mpt->key, GFP_KERNEL);
 		if (err) {
 			res_abort_move(dev, slave, RES_MPT, id);
 			return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 98faf870b0b0..67146624eb58 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -103,11 +103,11 @@ int __mlx4_srq_alloc_icm(struct mlx4_dev *dev, int *srqn)
 	if (*srqn == -1)
 		return -ENOMEM;
 
-	err = mlx4_table_get(dev, &srq_table->table, *srqn);
+	err = mlx4_table_get(dev, &srq_table->table, *srqn, GFP_KERNEL);
 	if (err)
 		goto err_out;
 
-	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn);
+	err = mlx4_table_get(dev, &srq_table->cmpt_table, *srqn, GFP_KERNEL);
 	if (err)
 		goto err_put;
 	return 0;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ba87bd21295a..be60b002bb37 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -837,7 +837,7 @@ static inline int mlx4_is_slave(struct mlx4_dev *dev)
 }
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
-		   struct mlx4_buf *buf);
+		   struct mlx4_buf *buf, gfp_t gfp);
 void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf);
 static inline void *mlx4_buf_offset(struct mlx4_buf *buf, int offset)
 {
@@ -874,9 +874,10 @@ int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw);
 int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   int start_index, int npages, u64 *page_list);
 int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
-		       struct mlx4_buf *buf);
+		       struct mlx4_buf *buf, gfp_t gfp);
 
-int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order);
+int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order,
+		  gfp_t gfp);
 void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db);
 
 int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres,
@@ -892,7 +893,8 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
 int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
-int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
+int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
+		  gfp_t gfp);
 void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp);
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcdn,
-- 
cgit 


From 688cea83f4396fa98b77a126ed278b89daccccdc Mon Sep 17 00:00:00 2001
From: dingtianhong <dingtianhong@huawei.com>
Date: Fri, 30 May 2014 16:00:56 +0800
Subject: macvlan: add netpoll support

Add netpoll support to macvlan devices. Based on the netpoll support in the 802.1q vlan code.

Tested and macvlan could work well with netconsole.

Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 66 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h |  3 +++
 2 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d2dbcfc68ee4..eee9106d1da1 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -33,6 +33,7 @@
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
 #include <net/xfrm.h>
+#include <linux/netpoll.h>
 
 #define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
 
@@ -357,12 +358,26 @@ xmit_world:
 	return dev_queue_xmit(skb);
 }
 
+static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (vlan->netpoll)
+		netpoll_send_skb(vlan->netpoll, skb);
+#else
+	BUG();
+#endif
+	return NETDEV_TX_OK;
+}
+
 static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	unsigned int len = skb->len;
 	int ret;
-	const struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	if (unlikely(netpoll_tx_running(dev)))
+		return macvlan_netpoll_send_skb(vlan, skb);
 
 	if (vlan->fwd_priv) {
 		skb->dev = vlan->lowerdev;
@@ -788,6 +803,50 @@ static netdev_features_t macvlan_fix_features(struct net_device *dev,
 	return features;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void macvlan_dev_poll_controller(struct net_device *dev)
+{
+	return;
+}
+
+static int macvlan_dev_netpoll_setup(struct net_device *dev, struct netpoll_info *npinfo)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct net_device *real_dev = vlan->lowerdev;
+	struct netpoll *netpoll;
+	int err = 0;
+
+	netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+	err = -ENOMEM;
+	if (!netpoll)
+		goto out;
+
+	err = __netpoll_setup(netpoll, real_dev);
+	if (err) {
+		kfree(netpoll);
+		goto out;
+	}
+
+	vlan->netpoll = netpoll;
+
+out:
+	return err;
+}
+
+static void macvlan_dev_netpoll_cleanup(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	struct netpoll *netpoll = vlan->netpoll;
+
+	if (!netpoll)
+		return;
+
+	vlan->netpoll = NULL;
+
+	__netpoll_free_async(netpoll);
+}
+#endif	/* CONFIG_NET_POLL_CONTROLLER */
+
 static const struct ethtool_ops macvlan_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_settings		= macvlan_ethtool_get_settings,
@@ -813,6 +872,11 @@ static const struct net_device_ops macvlan_netdev_ops = {
 	.ndo_fdb_del		= macvlan_fdb_del,
 	.ndo_fdb_dump		= ndo_dflt_fdb_dump,
 	.ndo_get_lock_subclass  = macvlan_get_nest_level,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller	= macvlan_dev_poll_controller,
+	.ndo_netpoll_setup	= macvlan_dev_netpoll_setup,
+	.ndo_netpoll_cleanup	= macvlan_dev_netpoll_cleanup,
+#endif
 };
 
 void macvlan_common_setup(struct net_device *dev)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index a9a53b12397b..6b2c7cf352a5 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -57,6 +57,9 @@ struct macvlan_dev {
 	netdev_features_t	tap_features;
 	int			minor;
 	int			nest_level;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct netpoll		*netpoll;
+#endif
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-- 
cgit 


From 2d7a85f4b06e9c27ff629f07a524c48074f07f81 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 30 May 2014 11:04:00 -0700
Subject: netlink: Only check file credentials for implicit destinations

It was possible to get a setuid root or setcap executable to write to
it's stdout or stderr (which has been set made a netlink socket) and
inadvertently reconfigure the networking stack.

To prevent this we check that both the creator of the socket and
the currentl applications has permission to reconfigure the network
stack.

Unfortunately this breaks Zebra which always uses sendto/sendmsg
and creates it's socket without any privileges.

To keep Zebra working don't bother checking if the creator of the
socket has privilege when a destination address is specified.  Instead
rely exclusively on the privileges of the sender of the socket.

Note from Andy: This is exactly Eric's code except for some comment
clarifications and formatting fixes.  Neither I nor, I think, anyone
else is thrilled with this approach, but I'm hesitant to wait on a
better fix since 3.15 is almost here.

Note to stable maintainers: This is a mess.  An earlier series of
patches in 3.15 fix a rather serious security issue (CVE-2014-0181),
but they did so in a way that breaks Zebra.  The offending series
includes:

    commit aa4cf9452f469f16cea8c96283b641b4576d4a7b
    Author: Eric W. Biederman <ebiederm@xmission.com>
    Date:   Wed Apr 23 14:28:03 2014 -0700

        net: Add variants of capable for use on netlink messages

If a given kernel version is missing that series of fixes, it's
probably worth backporting it and this patch.  if that series is
present, then this fix is critical if you care about Zebra.

Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 7 ++++---
 net/netlink/af_netlink.c | 7 ++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index f64b01787ddc..034cda789a15 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -16,9 +16,10 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb)
 }
 
 enum netlink_skb_flags {
-	NETLINK_SKB_MMAPED	= 0x1,		/* Packet data is mmaped */
-	NETLINK_SKB_TX		= 0x2,		/* Packet was sent by userspace */
-	NETLINK_SKB_DELIVERED	= 0x4,		/* Packet was delivered */
+	NETLINK_SKB_MMAPED	= 0x1,	/* Packet data is mmaped */
+	NETLINK_SKB_TX		= 0x2,	/* Packet was sent by userspace */
+	NETLINK_SKB_DELIVERED	= 0x4,	/* Packet was delivered */
+	NETLINK_SKB_DST		= 0x8,	/* Dst set in sendto or sendmsg */
 };
 
 struct netlink_skb_parms {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 81dca96d2be6..f22757a29cd0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1373,7 +1373,9 @@ retry:
 bool __netlink_ns_capable(const struct netlink_skb_parms *nsp,
 			struct user_namespace *user_ns, int cap)
 {
-	return sk_ns_capable(nsp->sk, user_ns, cap);
+	return ((nsp->flags & NETLINK_SKB_DST) ||
+		file_ns_capable(nsp->sk->sk_socket->file, user_ns, cap)) &&
+		ns_capable(user_ns, cap);
 }
 EXPORT_SYMBOL(__netlink_ns_capable);
 
@@ -2293,6 +2295,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct sk_buff *skb;
 	int err;
 	struct scm_cookie scm;
+	u32 netlink_skb_flags = 0;
 
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
@@ -2314,6 +2317,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		if ((dst_group || dst_portid) &&
 		    !netlink_allowed(sock, NL_CFG_F_NONROOT_SEND))
 			goto out;
+		netlink_skb_flags |= NETLINK_SKB_DST;
 	} else {
 		dst_portid = nlk->dst_portid;
 		dst_group = nlk->dst_group;
@@ -2343,6 +2347,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	NETLINK_CB(skb).portid	= nlk->portid;
 	NETLINK_CB(skb).dst_group = dst_group;
 	NETLINK_CB(skb).creds	= siocb->scm->creds;
+	NETLINK_CB(skb).flags	= netlink_skb_flags;
 
 	err = -EFAULT;
 	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
-- 
cgit 


From fe62d001372388abb15a324148c913f9b43722a8 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Thu, 15 May 2014 01:25:27 +0100
Subject: ethtool: Replace ethtool_ops::{get,set}_rxfh_indir() with
 {get,set}_rxfh()

ETHTOOL_{G,S}RXFHINDIR and ETHTOOL_{G,S}RSSH should work for drivers
regardless of whether they expose the hash key, unless you try to
set a hash key for a driver that doesn't expose it.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c | 15 ++++++++-------
 drivers/net/ethernet/broadcom/tg3.c                 |  8 ++++----
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c     |  8 ++++----
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c  | 15 +++++++++------
 drivers/net/ethernet/intel/igb/igb_ethtool.c        |  9 +++++----
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c     | 10 +++++-----
 drivers/net/ethernet/sfc/ethtool.c                  | 10 +++++-----
 drivers/net/vmxnet3/vmxnet3_ethtool.c               |  8 ++++----
 include/linux/ethtool.h                             |  6 ------
 net/core/ethtool.c                                  |  8 ++++----
 10 files changed, 48 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 03224090ecf9..af138f8aa361 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3316,7 +3316,7 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev)
 	return T_ETH_INDIRECTION_TABLE_SIZE;
 }
 
-static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
+static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
@@ -3340,14 +3340,15 @@ static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
 	return 0;
 }
 
-static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir)
+static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir,
+			  const u8 *key)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	size_t i;
 
 	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) {
 		/*
-		 * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy()
+		 * The same as in bnx2x_get_rxfh: we can't use a memcpy()
 		 * as an internal storage of an indirection table is a u8 array
 		 * while indir->ring_index points to an array of u32.
 		 *
@@ -3471,8 +3472,8 @@ static const struct ethtool_ops bnx2x_ethtool_ops = {
 	.get_rxnfc		= bnx2x_get_rxnfc,
 	.set_rxnfc		= bnx2x_set_rxnfc,
 	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
-	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
-	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
+	.get_rxfh		= bnx2x_get_rxfh,
+	.set_rxfh		= bnx2x_set_rxfh,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
 	.get_module_info	= bnx2x_get_module_info,
@@ -3498,8 +3499,8 @@ static const struct ethtool_ops bnx2x_vf_ethtool_ops = {
 	.get_rxnfc		= bnx2x_get_rxnfc,
 	.set_rxnfc		= bnx2x_set_rxnfc,
 	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
-	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
-	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
+	.get_rxfh		= bnx2x_get_rxfh,
+	.set_rxfh		= bnx2x_set_rxfh,
 	.get_channels		= bnx2x_get_channels,
 	.set_channels		= bnx2x_set_channels,
 };
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ccd90156aebc..8c2314ed260c 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12501,7 +12501,7 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev)
 	return size;
 }
 
-static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir)
+static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int i;
@@ -12512,7 +12512,7 @@ static int tg3_get_rxfh_indir(struct net_device *dev, u32 *indir)
 	return 0;
 }
 
-static int tg3_set_rxfh_indir(struct net_device *dev, const u32 *indir)
+static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	size_t i;
@@ -14044,8 +14044,8 @@ static const struct ethtool_ops tg3_ethtool_ops = {
 	.get_sset_count		= tg3_get_sset_count,
 	.get_rxnfc		= tg3_get_rxnfc,
 	.get_rxfh_indir_size    = tg3_get_rxfh_indir_size,
-	.get_rxfh_indir		= tg3_get_rxfh_indir,
-	.set_rxfh_indir		= tg3_set_rxfh_indir,
+	.get_rxfh		= tg3_get_rxfh,
+	.set_rxfh		= tg3_set_rxfh,
 	.get_channels		= tg3_get_channels,
 	.set_channels		= tg3_set_channels,
 	.get_ts_info		= tg3_get_ts_info,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 266a5bc6aedf..8cf6be93f491 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2739,7 +2739,7 @@ static u32 get_rss_table_size(struct net_device *dev)
 	return pi->rss_size;
 }
 
-static int get_rss_table(struct net_device *dev, u32 *p)
+static int get_rss_table(struct net_device *dev, u32 *p, u8 *key)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	unsigned int n = pi->rss_size;
@@ -2749,7 +2749,7 @@ static int get_rss_table(struct net_device *dev, u32 *p)
 	return 0;
 }
 
-static int set_rss_table(struct net_device *dev, const u32 *p)
+static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key)
 {
 	unsigned int i;
 	struct port_info *pi = netdev_priv(dev);
@@ -2851,8 +2851,8 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
 	.set_wol           = set_wol,
 	.get_rxnfc         = get_rxnfc,
 	.get_rxfh_indir_size = get_rss_table_size,
-	.get_rxfh_indir    = get_rss_table,
-	.set_rxfh_indir    = set_rss_table,
+	.get_rxfh	   = get_rss_table,
+	.set_rxfh	   = set_rss_table,
 	.flash_device      = set_flash,
 };
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 77e786d2d0e0..dbc8986c2dae 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -626,13 +626,14 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
 }
 
 /**
- * i40evf_get_rxfh_indir - get the rx flow hash indirection table
+ * i40evf_get_rxfh - get the rx flow hash indirection table
  * @netdev: network interface device structure
  * @indir: indirection table
+ * @key: hash key (will be %NULL until get_rxfh_key_size is implemented)
  *
  * Reads the indirection table directly from the hardware. Always returns 0.
  **/
-static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir)
+static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_hw *hw = &adapter->hw;
@@ -650,14 +651,16 @@ static int i40evf_get_rxfh_indir(struct net_device *netdev, u32 *indir)
 }
 
 /**
- * i40evf_set_rxfh_indir - set the rx flow hash indirection table
+ * i40evf_set_rxfh - set the rx flow hash indirection table
  * @netdev: network interface device structure
  * @indir: indirection table
+ * @key: hash key (will be %NULL until get_rxfh_key_size is implemented)
  *
  * Returns -EINVAL if the table specifies an inavlid queue id, otherwise
  * returns 0 after programming the table.
  **/
-static int i40evf_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
+static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
+			   const u8 *key)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_hw *hw = &adapter->hw;
@@ -691,8 +694,8 @@ static struct ethtool_ops i40evf_ethtool_ops = {
 	.get_rxnfc		= i40evf_get_rxnfc,
 	.set_rxnfc		= i40evf_set_rxnfc,
 	.get_rxfh_indir_size	= i40evf_get_rxfh_indir_size,
-	.get_rxfh_indir		= i40evf_get_rxfh_indir,
-	.set_rxfh_indir		= i40evf_set_rxfh_indir,
+	.get_rxfh		= i40evf_get_rxfh,
+	.set_rxfh		= i40evf_set_rxfh,
 	.get_channels		= i40evf_get_channels,
 };
 
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index a84297c85fb1..d8bbcf1873ca 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2830,7 +2830,7 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev)
 	return IGB_RETA_SIZE;
 }
 
-static int igb_get_rxfh_indir(struct net_device *netdev, u32 *indir)
+static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	int i;
@@ -2876,7 +2876,8 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter)
 	}
 }
 
-static int igb_set_rxfh_indir(struct net_device *netdev, const u32 *indir)
+static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
+			const u8 *key)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -3025,8 +3026,8 @@ static const struct ethtool_ops igb_ethtool_ops = {
 	.get_module_info	= igb_get_module_info,
 	.get_module_eeprom	= igb_get_module_eeprom,
 	.get_rxfh_indir_size	= igb_get_rxfh_indir_size,
-	.get_rxfh_indir		= igb_get_rxfh_indir,
-	.set_rxfh_indir		= igb_set_rxfh_indir,
+	.get_rxfh		= igb_get_rxfh,
+	.set_rxfh		= igb_set_rxfh,
 	.get_channels		= igb_get_channels,
 	.set_channels		= igb_set_channels,
 	.begin			= igb_ethtool_begin,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index a72d99fd7a2d..263a1c7a3370 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -564,7 +564,7 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
 	return priv->rx_ring_num;
 }
 
-static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index)
+static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
@@ -582,8 +582,8 @@ static int mlx4_en_get_rxfh_indir(struct net_device *dev, u32 *ring_index)
 	return err;
 }
 
-static int mlx4_en_set_rxfh_indir(struct net_device *dev,
-		const u32 *ring_index)
+static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
+			    const u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -1224,8 +1224,8 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_rxnfc = mlx4_en_get_rxnfc,
 	.set_rxnfc = mlx4_en_set_rxnfc,
 	.get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
-	.get_rxfh_indir = mlx4_en_get_rxfh_indir,
-	.set_rxfh_indir = mlx4_en_set_rxfh_indir,
+	.get_rxfh = mlx4_en_get_rxfh,
+	.set_rxfh = mlx4_en_set_rxfh,
 	.get_channels = mlx4_en_get_channels,
 	.set_channels = mlx4_en_set_channels,
 	.get_ts_info = mlx4_en_get_ts_info,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 0de8b07c24c2..74739c4b9997 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1033,7 +1033,7 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 		0 : ARRAY_SIZE(efx->rx_indir_table));
 }
 
-static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
+static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -1041,8 +1041,8 @@ static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
 	return 0;
 }
 
-static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
-				      const u32 *indir)
+static int efx_ethtool_set_rxfh(struct net_device *net_dev,
+				const u32 *indir, const u8 *key)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
@@ -1125,8 +1125,8 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.get_rxnfc		= efx_ethtool_get_rxnfc,
 	.set_rxnfc		= efx_ethtool_set_rxnfc,
 	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
-	.get_rxfh_indir		= efx_ethtool_get_rxfh_indir,
-	.set_rxfh_indir		= efx_ethtool_set_rxfh_indir,
+	.get_rxfh		= efx_ethtool_get_rxfh,
+	.set_rxfh		= efx_ethtool_set_rxfh,
 	.get_ts_info		= efx_ethtool_get_ts_info,
 	.get_module_info	= efx_ethtool_get_module_info,
 	.get_module_eeprom	= efx_ethtool_get_module_eeprom,
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 00e120296e92..9396cca93b09 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -579,7 +579,7 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev)
 }
 
 static int
-vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
+vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
@@ -592,7 +592,7 @@ vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
 }
 
 static int
-vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p)
+vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key)
 {
 	unsigned int i;
 	unsigned long flags;
@@ -628,8 +628,8 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
 	.get_rxnfc         = vmxnet3_get_rxnfc,
 #ifdef VMXNET3_RSS
 	.get_rxfh_indir_size = vmxnet3_get_rss_indir_size,
-	.get_rxfh_indir    = vmxnet3_get_rss_indir,
-	.set_rxfh_indir    = vmxnet3_set_rss_indir,
+	.get_rxfh          = vmxnet3_get_rss,
+	.set_rxfh          = vmxnet3_set_rss,
 #endif
 };
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 874fde01d398..e658229fee39 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -158,15 +158,11 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
- * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
- *	Will not be called if @get_rxfh_indir_size returns zero.
  * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
  *	key.
  *	Will only be called if one or both of @get_rxfh_indir_size and
  *	@get_rxfh_key_size are implemented and return non-zero.
  *	Returns a negative error code or zero.
- * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
- *	Will not be called if @get_rxfh_indir_size returns zero.
  * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
  *	hash key.  In case only the indirection table or hash key is to be
  *	changed, the other argument will be %NULL.
@@ -248,8 +244,6 @@ struct ethtool_ops {
 	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
 	int	(*set_rxfh)(struct net_device *, const u32 *indir,
 			    const u8 *key);
-	int	(*get_rxfh_indir)(struct net_device *, u32 *);
-	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b8857348bdf3..8ae452afb545 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -582,7 +582,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 	int ret;
 
 	if (!dev->ethtool_ops->get_rxfh_indir_size ||
-	    !dev->ethtool_ops->get_rxfh_indir)
+	    !dev->ethtool_ops->get_rxfh)
 		return -EOPNOTSUPP;
 	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
 	if (dev_size == 0)
@@ -608,7 +608,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 	if (!indir)
 		return -ENOMEM;
 
-	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
 	if (ret)
 		goto out;
 
@@ -632,7 +632,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 	int ret;
 	u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
 
-	if (!ops->get_rxfh_indir_size || !ops->set_rxfh_indir ||
+	if (!ops->get_rxfh_indir_size || !ops->set_rxfh ||
 	    !ops->get_rxnfc)
 		return -EOPNOTSUPP;
 
@@ -669,7 +669,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 			goto out;
 	}
 
-	ret = ops->set_rxfh_indir(dev, indir);
+	ret = ops->set_rxfh(dev, indir, NULL);
 
 out:
 	kfree(indir);
-- 
cgit 


From 58a9e5b98360e8dcf9c958c0552fb35279e3933f Mon Sep 17 00:00:00 2001
From: Michael Brunner <mibru@gmx.de>
Date: Tue, 8 Apr 2014 08:21:06 +0200
Subject: mfd: Add sysfs attributes for Kontron PLD firmware revision

This patch adds attributes to the Kontron PLD driver to allow
applications to retrieve firmware information.
Additionally the format has been changed to conform with the
representation in other Kontron software.

Signed-off-by: Michael Brunner <michael.brunner@kontron.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/kempld-core.c  | 127 +++++++++++++++++++++++++++++++++++++--------
 include/linux/mfd/kempld.h |   4 ++
 2 files changed, 110 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c
index 07692604e119..25c5ca6797da 100644
--- a/drivers/mfd/kempld-core.c
+++ b/drivers/mfd/kempld-core.c
@@ -288,9 +288,38 @@ EXPORT_SYMBOL_GPL(kempld_release_mutex);
  */
 static int kempld_get_info(struct kempld_device_data *pld)
 {
+	int ret;
 	struct kempld_platform_data *pdata = dev_get_platdata(pld->dev);
+	char major, minor;
+
+	ret = pdata->get_info(pld);
+	if (ret)
+		return ret;
+
+	/* The Kontron PLD firmware version string has the following format:
+	 * Pwxy.zzzz
+	 *   P:    Fixed
+	 *   w:    PLD number    - 1 hex digit
+	 *   x:    Major version - 1 alphanumerical digit (0-9A-V)
+	 *   y:    Minor version - 1 alphanumerical digit (0-9A-V)
+	 *   zzzz: Build number  - 4 zero padded hex digits */
 
-	return pdata->get_info(pld);
+	if (pld->info.major < 10)
+		major = pld->info.major + '0';
+	else
+		major = (pld->info.major - 10) + 'A';
+	if (pld->info.minor < 10)
+		minor = pld->info.minor + '0';
+	else
+		minor = (pld->info.minor - 10) + 'A';
+
+	ret = scnprintf(pld->info.version, sizeof(pld->info.version),
+			"P%X%c%c.%04X", pld->info.number, major, minor,
+			pld->info.buildnr);
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 /*
@@ -307,9 +336,71 @@ static int kempld_register_cells(struct kempld_device_data *pld)
 	return pdata->register_cells(pld);
 }
 
+static const char *kempld_get_type_string(struct kempld_device_data *pld)
+{
+	const char *version_type;
+
+	switch (pld->info.type) {
+	case 0:
+		version_type = "release";
+		break;
+	case 1:
+		version_type = "debug";
+		break;
+	case 2:
+		version_type = "custom";
+		break;
+	default:
+		version_type = "unspecified";
+		break;
+	}
+
+	return version_type;
+}
+
+static ssize_t kempld_version_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kempld_device_data *pld = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", pld->info.version);
+}
+
+static ssize_t kempld_specification_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kempld_device_data *pld = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d.%d\n", pld->info.spec_major,
+		       pld->info.spec_minor);
+}
+
+static ssize_t kempld_type_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct kempld_device_data *pld = dev_get_drvdata(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", kempld_get_type_string(pld));
+}
+
+static DEVICE_ATTR(pld_version, S_IRUGO, kempld_version_show, NULL);
+static DEVICE_ATTR(pld_specification, S_IRUGO, kempld_specification_show,
+		   NULL);
+static DEVICE_ATTR(pld_type, S_IRUGO, kempld_type_show, NULL);
+
+static struct attribute *pld_attributes[] = {
+	&dev_attr_pld_version.attr,
+	&dev_attr_pld_specification.attr,
+	&dev_attr_pld_type.attr,
+	NULL
+};
+
+static const struct attribute_group pld_attr_group = {
+	.attrs = pld_attributes,
+};
+
 static int kempld_detect_device(struct kempld_device_data *pld)
 {
-	char *version_type;
 	u8 index_reg;
 	int ret;
 
@@ -335,27 +426,19 @@ static int kempld_detect_device(struct kempld_device_data *pld)
 	if (ret)
 		return ret;
 
-	switch (pld->info.type) {
-	case 0:
-		version_type = "release";
-		break;
-	case 1:
-		version_type = "debug";
-		break;
-	case 2:
-		version_type = "custom";
-		break;
-	default:
-		version_type = "unspecified";
-	}
+	dev_info(pld->dev, "Found Kontron PLD - %s (%s), spec %d.%d\n",
+		 pld->info.version, kempld_get_type_string(pld),
+		 pld->info.spec_major, pld->info.spec_minor);
+
+	ret = sysfs_create_group(&pld->dev->kobj, &pld_attr_group);
+	if (ret)
+		return ret;
 
-	dev_info(pld->dev, "Found Kontron PLD %d\n", pld->info.number);
-	dev_info(pld->dev, "%s version %d.%d build %d, specification %d.%d\n",
-		 version_type, pld->info.major, pld->info.minor,
-		 pld->info.buildnr, pld->info.spec_major,
-		 pld->info.spec_minor);
+	ret = kempld_register_cells(pld);
+	if (ret)
+		sysfs_remove_group(&pld->dev->kobj, &pld_attr_group);
 
-	return kempld_register_cells(pld);
+	return ret;
 }
 
 static int kempld_probe(struct platform_device *pdev)
@@ -399,6 +482,8 @@ static int kempld_remove(struct platform_device *pdev)
 	struct kempld_device_data *pld = platform_get_drvdata(pdev);
 	struct kempld_platform_data *pdata = dev_get_platdata(pld->dev);
 
+	sysfs_remove_group(&pld->dev->kobj, &pld_attr_group);
+
 	mfd_remove_devices(&pdev->dev);
 	pdata->release_hardware_mutex(pld);
 
diff --git a/include/linux/mfd/kempld.h b/include/linux/mfd/kempld.h
index b911ef3add03..26e0b469e567 100644
--- a/include/linux/mfd/kempld.h
+++ b/include/linux/mfd/kempld.h
@@ -51,6 +51,8 @@
 #define	KEMPLD_TYPE_DEBUG		0x1
 #define	KEMPLD_TYPE_CUSTOM		0x2
 
+#define KEMPLD_VERSION_LEN		10
+
 /**
  * struct kempld_info - PLD device information structure
  * @major:	PLD major revision
@@ -60,6 +62,7 @@
  * @type:	PLD type
  * @spec_major:	PLD FW specification major revision
  * @spec_minor:	PLD FW specification minor revision
+ * @version:	PLD version string
  */
 struct kempld_info {
 	unsigned int major;
@@ -69,6 +72,7 @@ struct kempld_info {
 	unsigned int type;
 	unsigned int spec_major;
 	unsigned int spec_minor;
+	char version[KEMPLD_VERSION_LEN];
 };
 
 /**
-- 
cgit 


From 7abafa0a66414e385d122bcbc655a1d55ecbaecf Mon Sep 17 00:00:00 2001
From: Jay Aurabind <mail@aurabindo.in>
Date: Wed, 21 May 2014 22:49:54 +0530
Subject: mfd: abx500-core: Fix compiler warning larger stack frame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On systems with CONFIG_FRAME_WARN=1024, compiler warns the allocation of
an object of struct device on stack. Make the allocation dynamically to
fix the warning. Also change the caller's return type to int so as to
account for error handling.

drivers/mfd/abx500-core.c: In function ‘abx500_dump_all_banks’:
drivers/mfd/abx500-core.c:167:1: warning: the frame size of 1032 bytes
is larger than 1024 bytes [-Wframe-larger-than=]

Signed-off-by: Aurabindo J <mail@aurabindo.in>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/abx500-core.c  | 12 ++++++++----
 include/linux/mfd/abx500.h |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/abx500-core.c b/drivers/mfd/abx500-core.c
index f3a15aa54d7b..d6d0ec4d21e4 100644
--- a/drivers/mfd/abx500-core.c
+++ b/drivers/mfd/abx500-core.c
@@ -151,19 +151,23 @@ int abx500_startup_irq_enabled(struct device *dev, unsigned int irq)
 }
 EXPORT_SYMBOL(abx500_startup_irq_enabled);
 
-void abx500_dump_all_banks(void)
+int abx500_dump_all_banks(void)
 {
 	struct abx500_ops *ops;
-	struct device dummy_child = {NULL};
+	struct device *dummy_child;
 	struct abx500_device_entry *dev_entry;
 
+	dummy_child = kzalloc(sizeof(struct device), GFP_KERNEL);
+	if (!dummy_child)
+		return -ENOMEM;
 	list_for_each_entry(dev_entry, &abx500_list, list) {
-		dummy_child.parent = dev_entry->dev;
+		dummy_child->parent = dev_entry->dev;
 		ops = &dev_entry->ops;
 
 		if ((ops != NULL) && (ops->dump_all_banks != NULL))
-			ops->dump_all_banks(&dummy_child);
+			ops->dump_all_banks(dummy_child);
 	}
+	kfree(dummy_child);
 }
 EXPORT_SYMBOL(abx500_dump_all_banks);
 
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index 3301b2031c8d..df2508f7f3d2 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -330,7 +330,7 @@ int abx500_mask_and_set_register_interruptible(struct device *dev, u8 bank,
 int abx500_get_chip_id(struct device *dev);
 int abx500_event_registers_startup_state_get(struct device *dev, u8 *event);
 int abx500_startup_irq_enabled(struct device *dev, unsigned int irq);
-void abx500_dump_all_banks(void);
+int abx500_dump_all_banks(void);
 
 struct abx500_ops {
 	int (*get_chip_id) (struct device *);
-- 
cgit 


From d09b711a31ed891dc372039ccd347cdc8402da04 Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Wed, 16 Apr 2014 10:13:33 +0400
Subject: mfd: mc13xxx: Move definitions out of structures

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/mc13xxx.h | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index a326c850f046..d63b1d309106 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -117,10 +117,6 @@ struct mc13xxx_led_platform_data {
 
 #define MAX_LED_CONTROL_REGS	6
 
-struct mc13xxx_leds_platform_data {
-	struct mc13xxx_led_platform_data *led;
-	int num_leds;
-
 /* MC13783 LED Control 0 */
 #define MC13783_LED_C0_ENABLE		(1 << 0)
 #define MC13783_LED_C0_TRIODE_MD	(1 << 7)
@@ -169,10 +165,13 @@ struct mc13xxx_leds_platform_data {
 /* MC34708 LED Control 0 */
 #define MC34708_LED_C0_CURRENT_R(x)	(((x) & 0x3) << 9)
 #define MC34708_LED_C0_CURRENT_G(x)	(((x) & 0x3) << 21)
+
+struct mc13xxx_leds_platform_data {
+	struct mc13xxx_led_platform_data *led;
+	int num_leds;
 	u32 led_control[MAX_LED_CONTROL_REGS];
 };
 
-struct mc13xxx_buttons_platform_data {
 #define MC13783_BUTTON_DBNC_0MS		0
 #define MC13783_BUTTON_DBNC_30MS	1
 #define MC13783_BUTTON_DBNC_150MS	2
@@ -180,6 +179,8 @@ struct mc13xxx_buttons_platform_data {
 #define MC13783_BUTTON_ENABLE		(1 << 2)
 #define MC13783_BUTTON_POL_INVERT	(1 << 3)
 #define MC13783_BUTTON_RESET_EN		(1 << 4)
+
+struct mc13xxx_buttons_platform_data {
 	int b1on_flags;
 	unsigned short b1on_key;
 	int b2on_flags;
@@ -188,14 +189,14 @@ struct mc13xxx_buttons_platform_data {
 	unsigned short b3on_key;
 };
 
+#define MC13783_TS_ATO_FIRST	false
+#define MC13783_TS_ATO_EACH	true
+
 struct mc13xxx_ts_platform_data {
 	/* Delay between Touchscreen polarization and ADC Conversion.
 	 * Given in clock ticks of a 32 kHz clock which gives a granularity of
 	 * about 30.5ms */
 	u8 ato;
-
-#define MC13783_TS_ATO_FIRST false
-#define MC13783_TS_ATO_EACH  true
 	/* Use the ATO delay only for the first conversion or for each one */
 	bool atox;
 };
@@ -210,11 +211,12 @@ struct mc13xxx_codec_platform_data {
 	enum mc13783_ssi_port dac_ssi_port;
 };
 
-struct mc13xxx_platform_data {
-#define MC13XXX_USE_TOUCHSCREEN (1 << 0)
+#define MC13XXX_USE_TOUCHSCREEN	(1 << 0)
 #define MC13XXX_USE_CODEC	(1 << 1)
 #define MC13XXX_USE_ADC		(1 << 2)
 #define MC13XXX_USE_RTC		(1 << 3)
+
+struct mc13xxx_platform_data {
 	unsigned int flags;
 
 	struct mc13xxx_regulator_platform_data regulators;
-- 
cgit 


From 3176a521922b8ebcf7a593063cc55344486d2cd7 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Tue, 15 Apr 2014 19:40:09 +0800
Subject: mfd: tps65218: Remove unused *rdev[] from struct tps65218

The *rdev[] is not used since commit 413be59e2f333
"regulator: tps65218: Remove unnecessary regulator_unregister call".

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/tps65218.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h
index d2e357df5a0e..2f9b593246ee 100644
--- a/include/linux/mfd/tps65218.h
+++ b/include/linux/mfd/tps65218.h
@@ -267,7 +267,6 @@ struct tps65218 {
 	u32 irq_mask;
 	struct regmap_irq_chip_data *irq_data;
 	struct regulator_desc desc[TPS65218_NUM_REGULATOR];
-	struct regulator_dev *rdev[TPS65218_NUM_REGULATOR];
 	struct tps_info *info[TPS65218_NUM_REGULATOR];
 	struct regmap *regmap;
 };
-- 
cgit 


From e349c910e2398cbff59d7c58851503191a8e9157 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 14 Apr 2014 09:40:45 +0200
Subject: mfd/rtc: s5m: Do not allocate RTC I2C dummy and regmap for
 unsupported chipsets

The rtc-s5m driver does not support all of S2M and S5M chipsets
supported by main MFD sec-core driver. For such chipsets unsupported by
rtc-s5m, the MFD sec-core driver initialized regmap with default config.
This config in such cases wouldn't work at all.

The main MFD sec-core driver shouldn't initialize regmap for child
drivers which is not used by them and even not valid.

Move the allocation of RTC I2C dummy device and initialization of RTC
regmap from main MFD sec-core driver to the rtc-s5m driver. The rtc-s5m
driver will use proper regmap config for supported devices.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Alessandro Zummo <a.zummo@towertech.it>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c           | 53 +---------------------------
 drivers/rtc/rtc-s5m.c            | 75 +++++++++++++++++++++++++++++++++++++---
 include/linux/mfd/samsung/core.h |  3 --
 3 files changed, 71 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 1cf27521fff4..d4682c6cbff5 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -25,7 +25,6 @@
 #include <linux/mfd/core.h>
 #include <linux/mfd/samsung/core.h>
 #include <linux/mfd/samsung/irq.h>
-#include <linux/mfd/samsung/rtc.h>
 #include <linux/mfd/samsung/s2mpa01.h>
 #include <linux/mfd/samsung/s2mps11.h>
 #include <linux/mfd/samsung/s2mps14.h>
@@ -196,20 +195,6 @@ static const struct regmap_config s5m8767_regmap_config = {
 	.cache_type = REGCACHE_FLAT,
 };
 
-static const struct regmap_config s5m_rtc_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-
-	.max_register = SEC_RTC_REG_MAX,
-};
-
-static const struct regmap_config s2mps14_rtc_regmap_config = {
-	.reg_bits = 8,
-	.val_bits = 8,
-
-	.max_register = S2MPS_RTC_REG_MAX,
-};
-
 #ifdef CONFIG_OF
 /*
  * Only the common platform data elements for s5m8767 are parsed here from the
@@ -264,7 +249,7 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
 	struct sec_platform_data *pdata = dev_get_platdata(&i2c->dev);
-	const struct regmap_config *regmap, *regmap_rtc;
+	const struct regmap_config *regmap;
 	struct sec_pmic_dev *sec_pmic;
 	int ret;
 
@@ -298,39 +283,21 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	switch (sec_pmic->device_type) {
 	case S2MPA01:
 		regmap = &s2mpa01_regmap_config;
-		/*
-		 * The rtc-s5m driver does not support S2MPA01 and there
-		 * is no mfd_cell for S2MPA01 RTC device.
-		 * However we must pass something to devm_regmap_init_i2c()
-		 * so use S5M-like regmap config even though it wouldn't work.
-		 */
-		regmap_rtc = &s5m_rtc_regmap_config;
 		break;
 	case S2MPS11X:
 		regmap = &s2mps11_regmap_config;
-		/*
-		 * The rtc-s5m driver does not support S2MPS11 and there
-		 * is no mfd_cell for S2MPS11 RTC device.
-		 * However we must pass something to devm_regmap_init_i2c()
-		 * so use S5M-like regmap config even though it wouldn't work.
-		 */
-		regmap_rtc = &s5m_rtc_regmap_config;
 		break;
 	case S2MPS14X:
 		regmap = &s2mps14_regmap_config;
-		regmap_rtc = &s2mps14_rtc_regmap_config;
 		break;
 	case S5M8763X:
 		regmap = &s5m8763_regmap_config;
-		regmap_rtc = &s5m_rtc_regmap_config;
 		break;
 	case S5M8767X:
 		regmap = &s5m8767_regmap_config;
-		regmap_rtc = &s5m_rtc_regmap_config;
 		break;
 	default:
 		regmap = &sec_regmap_config;
-		regmap_rtc = &s5m_rtc_regmap_config;
 		break;
 	}
 
@@ -342,21 +309,6 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	sec_pmic->rtc = i2c_new_dummy(i2c->adapter, RTC_I2C_ADDR);
-	if (!sec_pmic->rtc) {
-		dev_err(&i2c->dev, "Failed to allocate I2C for RTC\n");
-		return -ENODEV;
-	}
-	i2c_set_clientdata(sec_pmic->rtc, sec_pmic);
-
-	sec_pmic->regmap_rtc = devm_regmap_init_i2c(sec_pmic->rtc, regmap_rtc);
-	if (IS_ERR(sec_pmic->regmap_rtc)) {
-		ret = PTR_ERR(sec_pmic->regmap_rtc);
-		dev_err(&i2c->dev, "Failed to allocate RTC register map: %d\n",
-			ret);
-		goto err_regmap_rtc;
-	}
-
 	if (pdata && pdata->cfg_pmic_irq)
 		pdata->cfg_pmic_irq();
 
@@ -403,8 +355,6 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 
 err_mfd:
 	sec_irq_exit(sec_pmic);
-err_regmap_rtc:
-	i2c_unregister_device(sec_pmic->rtc);
 	return ret;
 }
 
@@ -414,7 +364,6 @@ static int sec_pmic_remove(struct i2c_client *i2c)
 
 	mfd_remove_devices(sec_pmic->dev);
 	sec_irq_exit(sec_pmic);
-	i2c_unregister_device(sec_pmic->rtc);
 	return 0;
 }
 
diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 476af93543f6..8ec2d6a1dbe1 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -40,6 +40,7 @@
 
 struct s5m_rtc_info {
 	struct device *dev;
+	struct i2c_client *i2c;
 	struct sec_pmic_dev *s5m87xx;
 	struct regmap *regmap;
 	struct rtc_device *rtc_dev;
@@ -49,6 +50,20 @@ struct s5m_rtc_info {
 	bool wtsr_smpl;
 };
 
+static const struct regmap_config s5m_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = SEC_RTC_REG_MAX,
+};
+
+static const struct regmap_config s2mps14_rtc_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = S2MPS_RTC_REG_MAX,
+};
+
 static void s5m8767_data_to_tm(u8 *data, struct rtc_time *tm,
 			       int rtc_24hr_mode)
 {
@@ -554,6 +569,7 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 	struct sec_pmic_dev *s5m87xx = dev_get_drvdata(pdev->dev.parent);
 	struct sec_platform_data *pdata = s5m87xx->pdata;
 	struct s5m_rtc_info *info;
+	const struct regmap_config *regmap_cfg;
 	int ret;
 
 	if (!pdata) {
@@ -565,9 +581,37 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 	if (!info)
 		return -ENOMEM;
 
+	switch (pdata->device_type) {
+	case S2MPS14X:
+		regmap_cfg = &s2mps14_rtc_regmap_config;
+		break;
+	case S5M8763X:
+		regmap_cfg = &s5m_rtc_regmap_config;
+		break;
+	case S5M8767X:
+		regmap_cfg = &s5m_rtc_regmap_config;
+		break;
+	default:
+		dev_err(&pdev->dev, "Device type is not supported by RTC driver\n");
+		return -ENODEV;
+	}
+
+	info->i2c = i2c_new_dummy(s5m87xx->i2c->adapter, RTC_I2C_ADDR);
+	if (!info->i2c) {
+		dev_err(&pdev->dev, "Failed to allocate I2C for RTC\n");
+		return -ENODEV;
+	}
+
+	info->regmap = devm_regmap_init_i2c(info->i2c, regmap_cfg);
+	if (IS_ERR(info->regmap)) {
+		ret = PTR_ERR(info->regmap);
+		dev_err(&pdev->dev, "Failed to allocate RTC register map: %d\n",
+				ret);
+		goto err;
+	}
+
 	info->dev = &pdev->dev;
 	info->s5m87xx = s5m87xx;
-	info->regmap = s5m87xx->regmap_rtc;
 	info->device_type = s5m87xx->device_type;
 	info->wtsr_smpl = s5m87xx->wtsr_smpl;
 
@@ -585,7 +629,7 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 	default:
 		ret = -EINVAL;
 		dev_err(&pdev->dev, "Unsupported device type: %d\n", ret);
-		return ret;
+		goto err;
 	}
 
 	platform_set_drvdata(pdev, info);
@@ -602,15 +646,24 @@ static int s5m_rtc_probe(struct platform_device *pdev)
 	info->rtc_dev = devm_rtc_device_register(&pdev->dev, "s5m-rtc",
 						 &s5m_rtc_ops, THIS_MODULE);
 
-	if (IS_ERR(info->rtc_dev))
-		return PTR_ERR(info->rtc_dev);
+	if (IS_ERR(info->rtc_dev)) {
+		ret = PTR_ERR(info->rtc_dev);
+		goto err;
+	}
 
 	ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
 					s5m_rtc_alarm_irq, 0, "rtc-alarm0",
 					info);
-	if (ret < 0)
+	if (ret < 0) {
 		dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
 			info->irq, ret);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	i2c_unregister_device(info->i2c);
 
 	return ret;
 }
@@ -639,6 +692,17 @@ static void s5m_rtc_shutdown(struct platform_device *pdev)
 	s5m_rtc_enable_smpl(info, false);
 }
 
+static int s5m_rtc_remove(struct platform_device *pdev)
+{
+	struct s5m_rtc_info *info = platform_get_drvdata(pdev);
+
+	/* Perform also all shutdown steps when removing */
+	s5m_rtc_shutdown(pdev);
+	i2c_unregister_device(info->i2c);
+
+	return 0;
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int s5m_rtc_resume(struct device *dev)
 {
@@ -676,6 +740,7 @@ static struct platform_driver s5m_rtc_driver = {
 		.pm	= &s5m_rtc_pm_ops,
 	},
 	.probe		= s5m_rtc_probe,
+	.remove		= s5m_rtc_remove,
 	.shutdown	= s5m_rtc_shutdown,
 	.id_table	= s5m_rtc_id,
 };
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 157e32b6ca28..84aaf6c25794 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -28,7 +28,6 @@ enum sec_device_type {
  * @dev: master device of the chip (can be used to access platform data)
  * @pdata: pointer to private data used to pass platform data to child
  * @i2c: i2c client private data for regulator
- * @rtc: i2c client private data for rtc
  * @iolock: mutex for serializing io access
  * @irqlock: mutex for buslock
  * @irq_base: base IRQ number for sec-pmic, required for IRQs
@@ -42,9 +41,7 @@ struct sec_pmic_dev {
 	struct device *dev;
 	struct sec_platform_data *pdata;
 	struct regmap *regmap_pmic;
-	struct regmap *regmap_rtc;
 	struct i2c_client *i2c;
-	struct i2c_client *rtc;
 
 	int device_type;
 	int irq_base;
-- 
cgit 


From 51f1f1cb24b870db44edcab56ffd89ecf8ce09e8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 24 Apr 2014 10:05:38 +0200
Subject: mfd: sec-core: Update sec_pmic documentation

Update the documentation for sec_pmic state container structure to
reflect current code.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/samsung/core.h | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 84aaf6c25794..1c66a6462887 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -24,18 +24,23 @@ enum sec_device_type {
 };
 
 /**
- * struct sec_pmic_dev - s5m87xx master device for sub-drivers
- * @dev: master device of the chip (can be used to access platform data)
- * @pdata: pointer to private data used to pass platform data to child
- * @i2c: i2c client private data for regulator
- * @iolock: mutex for serializing io access
- * @irqlock: mutex for buslock
- * @irq_base: base IRQ number for sec-pmic, required for IRQs
- * @irq: generic IRQ number for s5m87xx
- * @ono: power onoff IRQ number for s5m87xx
- * @irq_masks_cur: currently active value
- * @irq_masks_cache: cached hardware value
- * @type: indicate which s5m87xx "variant" is used
+ * struct sec_pmic_dev - s2m/s5m master device for sub-drivers
+ * @dev:		Master device of the chip
+ * @pdata:		Platform data populated with data from DTS
+ *			or board files
+ * @regmap_pmic:	Regmap associated with PMIC's I2C address
+ * @i2c:		I2C client of the main driver
+ * @device_type:	Type of device, matches enum sec_device_type
+ * @irq_base:		Base IRQ number for device, required for IRQs
+ * @irq:		Generic IRQ number for device
+ * @irq_data:		Runtime data structure for IRQ controller
+ * @ono:		Power onoff IRQ number for s5m87xx
+ * @wakeup:		Whether or not this is a wakeup device
+ * @wtsr_smpl:		Whether or not to enable in RTC driver the Watchdog
+ *			Timer Software Reset (registers set to default value
+ *			after PWRHOLD falling) and Sudden Momentary Power Loss
+ *			(PMIC will enter power on sequence after short drop in
+ *			VBATT voltage).
  */
 struct sec_pmic_dev {
 	struct device *dev;
-- 
cgit 


From 1ec93b9b176b3c4e065c326ccf40458fcc01e6c0 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Fri, 25 Apr 2014 09:24:21 +0800
Subject: mfd: rdc321x: Fix off-by-one for ngpio setting

The valid gpio is GPIO0 ~ GPIO58, so ngpio should be 59.
This patch also renames RDC321X_MAX_GPIO to RDC321X_NUM_GPIO because it
actually means the number of available GPIOs.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/rdc321x-southbridge.c | 2 +-
 include/linux/mfd/rdc321x.h       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rdc321x-southbridge.c b/drivers/mfd/rdc321x-southbridge.c
index c79569750be9..6575585f1d1f 100644
--- a/drivers/mfd/rdc321x-southbridge.c
+++ b/drivers/mfd/rdc321x-southbridge.c
@@ -38,7 +38,7 @@ static struct resource rdc321x_wdt_resource[] = {
 };
 
 static struct rdc321x_gpio_pdata rdc321x_gpio_pdata = {
-	.max_gpios	= RDC321X_MAX_GPIO,
+	.max_gpios	= RDC321X_NUM_GPIO,
 };
 
 static struct resource rdc321x_gpio_resources[] = {
diff --git a/include/linux/mfd/rdc321x.h b/include/linux/mfd/rdc321x.h
index 4bdf19c8eedf..442743a8f915 100644
--- a/include/linux/mfd/rdc321x.h
+++ b/include/linux/mfd/rdc321x.h
@@ -12,7 +12,7 @@
 #define RDC321X_GPIO_CTRL_REG2	0x84
 #define RDC321X_GPIO_DATA_REG2	0x88
 
-#define RDC321X_MAX_GPIO	58
+#define RDC321X_NUM_GPIO	59
 
 struct rdc321x_gpio_pdata {
 	struct pci_dev *sb_pdev;
-- 
cgit 


From 11e38e11afcdd598d0978746924a001e3e7cb723 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Tue, 1 Apr 2014 16:44:59 +0300
Subject: mfd: twl6040: Select i2c fast mode as default with regmap patch

All boards using twl6040 configures the i2c bus to 400KHz. While twl6040's
defaults to normal mode (100KHz). So far twl6040 has no problem with i2c
communication in this configuration it is safer to select fast i2c mode.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/twl6040.c       | 9 +++++++--
 include/linux/mfd/twl6040.h | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index 03dbff3597a2..cb37bb8f8e41 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -87,8 +87,13 @@ static struct reg_default twl6040_defaults[] = {
 };
 
 static struct reg_default twl6040_patch[] = {
-	/* Select I2C bus access to dual access registers */
-	{ TWL6040_REG_ACCCTL, 0x09 },
+	/*
+	 * Select I2C bus access to dual access registers
+	 * Interrupt register is cleared on read
+	 * Select fast mode for i2c (400KHz)
+	 */
+	{ TWL6040_REG_ACCCTL,
+		TWL6040_I2CSEL | TWL6040_INTCLRMODE | TWL6040_I2CMODE(1) },
 };
 
 
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index 81f639bc1ae6..a69d16b30c18 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -157,6 +157,7 @@
 #define TWL6040_I2CSEL			0x01
 #define TWL6040_RESETSPLIT		0x04
 #define TWL6040_INTCLRMODE		0x08
+#define TWL6040_I2CMODE(x)		((x & 0x3) << 4)
 
 /* STATUS (0x2E) fields */
 
-- 
cgit 


From 68bab8662f49b9e158f1d32f11becd4e48c04079 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Thu, 3 Apr 2014 13:54:41 +0300
Subject: mfd: twl6040: Optional clk32k clock handling

In certain boards the source for the clk32k clock can be gated. In these
boards the clk32k clock can be provided to the driver and it is going to be
enabled/disabled when it is needed.
If the clk32k clock is not provided the driver will assume that it is always
running.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/devicetree/bindings/mfd/twl6040.txt |  2 ++
 drivers/mfd/twl6040.c                             | 10 ++++++++++
 include/linux/mfd/twl6040.h                       |  2 ++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mfd/twl6040.txt b/Documentation/devicetree/bindings/mfd/twl6040.txt
index 0f5dd709d752..a41157b5d930 100644
--- a/Documentation/devicetree/bindings/mfd/twl6040.txt
+++ b/Documentation/devicetree/bindings/mfd/twl6040.txt
@@ -19,6 +19,8 @@ Required properties:
 
 Optional properties, nodes:
 - enable-active-high: To power on the twl6040 during boot.
+- clocks: phandle to the clk32k clock provider
+- clock-names: Must be "clk32k"
 
 Vibra functionality
 Required properties:
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index cb37bb8f8e41..574774d7f826 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -291,6 +291,8 @@ int twl6040_power(struct twl6040 *twl6040, int on)
 		if (twl6040->power_count++)
 			goto out;
 
+		clk_prepare_enable(twl6040->clk32k);
+
 		/* Allow writes to the chip */
 		regcache_cache_only(twl6040->regmap, false);
 
@@ -346,6 +348,8 @@ int twl6040_power(struct twl6040 *twl6040, int on)
 
 		twl6040->sysclk = 0;
 		twl6040->mclk = 0;
+
+		clk_disable_unprepare(twl6040->clk32k);
 	}
 
 out:
@@ -644,6 +648,12 @@ static int twl6040_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, twl6040);
 
+	twl6040->clk32k = devm_clk_get(&client->dev, "clk32k");
+	if (IS_ERR(twl6040->clk32k)) {
+		dev_info(&client->dev, "clk32k is not handled\n");
+		twl6040->clk32k = NULL;
+	}
+
 	twl6040->supplies[0].supply = "vio";
 	twl6040->supplies[1].supply = "v2v1";
 	ret = devm_regulator_bulk_get(&client->dev, TWL6040_NUM_SUPPLIES,
diff --git a/include/linux/mfd/twl6040.h b/include/linux/mfd/twl6040.h
index a69d16b30c18..8f9fc3d26e6d 100644
--- a/include/linux/mfd/twl6040.h
+++ b/include/linux/mfd/twl6040.h
@@ -28,6 +28,7 @@
 #include <linux/interrupt.h>
 #include <linux/mfd/core.h>
 #include <linux/regulator/consumer.h>
+#include <linux/clk.h>
 
 #define TWL6040_REG_ASICID		0x01
 #define TWL6040_REG_ASICREV		0x02
@@ -223,6 +224,7 @@ struct twl6040 {
 	struct regmap *regmap;
 	struct regmap_irq_chip_data *irq_data;
 	struct regulator_bulk_data supplies[2]; /* supplies for vio, v2v1 */
+	struct clk *clk32k;
 	struct mutex mutex;
 	struct mutex irq_mutex;
 	struct mfd_cell cells[TWL6040_CELLS];
-- 
cgit 


From 9549b5ff001a8904372370d10be9a2f05e10eca5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Wed, 23 Apr 2014 16:13:05 +0200
Subject: mfd: sec-core: Remove duplicated device type from sec_pmic_dev

The device type was stored in sec_pmic_dev state container twice:
 - unsigned long type (initialized from of_device_id or i2c_device_id)
 - int device_type (initialized as above or from board files when there
   is no DTS)

The 'type' field was never used outside of probe so it can be safely
removed.

Change also the device_type in sec_pmic_dev and sec_platform_data to
unsigned long to avoid any casts.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c           | 5 +++--
 drivers/mfd/sec-irq.c            | 2 +-
 include/linux/mfd/samsung/core.h | 3 +--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index d4682c6cbff5..09fd256abcf6 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -251,6 +251,7 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	struct sec_platform_data *pdata = dev_get_platdata(&i2c->dev);
 	const struct regmap_config *regmap;
 	struct sec_pmic_dev *sec_pmic;
+	unsigned long device_type;
 	int ret;
 
 	sec_pmic = devm_kzalloc(&i2c->dev, sizeof(struct sec_pmic_dev),
@@ -262,7 +263,7 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	sec_pmic->dev = &i2c->dev;
 	sec_pmic->i2c = i2c;
 	sec_pmic->irq = i2c->irq;
-	sec_pmic->type = sec_i2c_get_driver_data(i2c, id);
+	device_type = sec_i2c_get_driver_data(i2c, id);
 
 	if (sec_pmic->dev->of_node) {
 		pdata = sec_pmic_i2c_parse_dt_pdata(sec_pmic->dev);
@@ -270,7 +271,7 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 			ret = PTR_ERR(pdata);
 			return ret;
 		}
-		pdata->device_type = sec_pmic->type;
+		pdata->device_type = device_type;
 	}
 	if (pdata) {
 		sec_pmic->device_type = pdata->device_type;
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index 64e7913aadc6..654e2c1dbf7a 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -385,7 +385,7 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 				  &sec_pmic->irq_data);
 		break;
 	default:
-		dev_err(sec_pmic->dev, "Unknown device type %d\n",
+		dev_err(sec_pmic->dev, "Unknown device type %lu\n",
 			sec_pmic->device_type);
 		return -EINVAL;
 	}
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 1c66a6462887..47d84242940b 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -48,13 +48,12 @@ struct sec_pmic_dev {
 	struct regmap *regmap_pmic;
 	struct i2c_client *i2c;
 
-	int device_type;
+	unsigned long device_type;
 	int irq_base;
 	int irq;
 	struct regmap_irq_chip_data *irq_data;
 
 	int ono;
-	unsigned long type;
 	bool wakeup;
 	bool wtsr_smpl;
 };
-- 
cgit 


From 3e87933a68dce6a27bf1006964f8c850e13140b5 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Tue, 8 Apr 2014 17:14:15 -0700
Subject: mfd: pm8921: Remove pm8xxx API now that sub-devices use regmap

The pm8xxx read/write wrappers are no longer necessary now that
all the sub-device drivers are using the regmap API. Remove it.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/pm8921-core.c       | 123 +---------------------------------------
 include/linux/mfd/pm8xxx/core.h |  81 --------------------------
 2 files changed, 2 insertions(+), 202 deletions(-)
 delete mode 100644 include/linux/mfd/pm8xxx/core.h

(limited to 'include/linux')

diff --git a/drivers/mfd/pm8921-core.c b/drivers/mfd/pm8921-core.c
index b97a97187ae9..959513803542 100644
--- a/drivers/mfd/pm8921-core.c
+++ b/drivers/mfd/pm8921-core.c
@@ -26,7 +26,6 @@
 #include <linux/regmap.h>
 #include <linux/of_platform.h>
 #include <linux/mfd/core.h>
-#include <linux/mfd/pm8xxx/core.h>
 
 #define	SSBI_REG_ADDR_IRQ_BASE		0x1BB
 
@@ -57,7 +56,6 @@
 #define PM8921_NR_IRQS		256
 
 struct pm_irq_chip {
-	struct device		*dev;
 	struct regmap		*regmap;
 	spinlock_t		pm_irq_lock;
 	struct irq_domain	*irqdomain;
@@ -67,11 +65,6 @@ struct pm_irq_chip {
 	u8			config[0];
 };
 
-struct pm8921 {
-	struct device			*dev;
-	struct pm_irq_chip		*irq_chip;
-};
-
 static int pm8xxx_read_block_irq(struct pm_irq_chip *chip, unsigned int bp,
 				 unsigned int *ip)
 {
@@ -255,55 +248,6 @@ static struct irq_chip pm8xxx_irq_chip = {
 	.flags		= IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_SKIP_SET_WAKE,
 };
 
-/**
- * pm8xxx_get_irq_stat - get the status of the irq line
- * @chip: pointer to identify a pmic irq controller
- * @irq: the irq number
- *
- * The pm8xxx gpio and mpp rely on the interrupt block to read
- * the values on their pins. This function is to facilitate reading
- * the status of a gpio or an mpp line. The caller has to convert the
- * gpio number to irq number.
- *
- * RETURNS:
- * an int indicating the value read on that line
- */
-static int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq)
-{
-	int pmirq, rc;
-	unsigned int  block, bits, bit;
-	unsigned long flags;
-	struct irq_data *irq_data = irq_get_irq_data(irq);
-
-	pmirq = irq_data->hwirq;
-
-	block = pmirq / 8;
-	bit = pmirq % 8;
-
-	spin_lock_irqsave(&chip->pm_irq_lock, flags);
-
-	rc = regmap_write(chip->regmap, SSBI_REG_ADDR_IRQ_BLK_SEL, block);
-	if (rc) {
-		pr_err("Failed Selecting block irq=%d pmirq=%d blk=%d rc=%d\n",
-			irq, pmirq, block, rc);
-		goto bail_out;
-	}
-
-	rc = regmap_read(chip->regmap, SSBI_REG_ADDR_IRQ_RT_STATUS, &bits);
-	if (rc) {
-		pr_err("Failed Configuring irq=%d pmirq=%d blk=%d rc=%d\n",
-			irq, pmirq, block, rc);
-		goto bail_out;
-	}
-
-	rc = (bits & (1 << bit)) ? 1 : 0;
-
-bail_out:
-	spin_unlock_irqrestore(&chip->pm_irq_lock, flags);
-
-	return rc;
-}
-
 static int pm8xxx_irq_domain_map(struct irq_domain *d, unsigned int irq,
 				   irq_hw_number_t hwirq)
 {
@@ -324,56 +268,6 @@ static const struct irq_domain_ops pm8xxx_irq_domain_ops = {
 	.map = pm8xxx_irq_domain_map,
 };
 
-static int pm8921_readb(const struct device *dev, u16 addr, u8 *val)
-{
-	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-	return ssbi_read(pmic->dev->parent, addr, val, 1);
-}
-
-static int pm8921_writeb(const struct device *dev, u16 addr, u8 val)
-{
-	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-	return ssbi_write(pmic->dev->parent, addr, &val, 1);
-}
-
-static int pm8921_read_buf(const struct device *dev, u16 addr, u8 *buf,
-									int cnt)
-{
-	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-	return ssbi_read(pmic->dev->parent, addr, buf, cnt);
-}
-
-static int pm8921_write_buf(const struct device *dev, u16 addr, u8 *buf,
-									int cnt)
-{
-	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-	return ssbi_write(pmic->dev->parent, addr, buf, cnt);
-}
-
-static int pm8921_read_irq_stat(const struct device *dev, int irq)
-{
-	const struct pm8xxx_drvdata *pm8921_drvdata = dev_get_drvdata(dev);
-	const struct pm8921 *pmic = pm8921_drvdata->pm_chip_data;
-
-	return pm8xxx_get_irq_stat(pmic->irq_chip, irq);
-}
-
-static struct pm8xxx_drvdata pm8921_drvdata = {
-	.pmic_readb		= pm8921_readb,
-	.pmic_writeb		= pm8921_writeb,
-	.pmic_read_buf		= pm8921_read_buf,
-	.pmic_write_buf		= pm8921_write_buf,
-	.pmic_read_irq_stat	= pm8921_read_irq_stat,
-};
-
 static const struct regmap_config ssbi_regmap_config = {
 	.reg_bits = 16,
 	.val_bits = 8,
@@ -392,7 +286,6 @@ MODULE_DEVICE_TABLE(of, pm8921_id_table);
 
 static int pm8921_probe(struct platform_device *pdev)
 {
-	struct pm8921 *pmic;
 	struct regmap *regmap;
 	int irq, rc;
 	unsigned int val;
@@ -404,12 +297,6 @@ static int pm8921_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	pmic = devm_kzalloc(&pdev->dev, sizeof(struct pm8921), GFP_KERNEL);
-	if (!pmic) {
-		pr_err("Cannot alloc pm8921 struct\n");
-		return -ENOMEM;
-	}
-
 	regmap = devm_regmap_init(&pdev->dev, NULL, pdev->dev.parent,
 				  &ssbi_regmap_config);
 	if (IS_ERR(regmap))
@@ -434,18 +321,13 @@ static int pm8921_probe(struct platform_device *pdev)
 	pr_info("PMIC revision 2: %02X\n", val);
 	rev |= val << BITS_PER_BYTE;
 
-	pmic->dev = &pdev->dev;
-	pm8921_drvdata.pm_chip_data = pmic;
-	platform_set_drvdata(pdev, &pm8921_drvdata);
-
 	chip = devm_kzalloc(&pdev->dev, sizeof(*chip) +
 					sizeof(chip->config[0]) * nirqs,
 					GFP_KERNEL);
 	if (!chip)
 		return -ENOMEM;
 
-	pmic->irq_chip = chip;
-	chip->dev = &pdev->dev;
+	platform_set_drvdata(pdev, chip);
 	chip->regmap = regmap;
 	chip->num_irqs = nirqs;
 	chip->num_blocks = DIV_ROUND_UP(chip->num_irqs, 8);
@@ -481,8 +363,7 @@ static int pm8921_remove_child(struct device *dev, void *unused)
 static int pm8921_remove(struct platform_device *pdev)
 {
 	int irq = platform_get_irq(pdev, 0);
-	struct pm8921 *pmic = pm8921_drvdata.pm_chip_data;
-	struct pm_irq_chip *chip = pmic->irq_chip;
+	struct pm_irq_chip *chip = platform_get_drvdata(pdev);
 
 	device_for_each_child(&pdev->dev, NULL, pm8921_remove_child);
 	irq_set_chained_handler(irq, NULL);
diff --git a/include/linux/mfd/pm8xxx/core.h b/include/linux/mfd/pm8xxx/core.h
deleted file mode 100644
index bd2f4f64e931..000000000000
--- a/include/linux/mfd/pm8xxx/core.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2011, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-/*
- * Qualcomm PMIC 8xxx driver header file
- *
- */
-
-#ifndef __MFD_PM8XXX_CORE_H
-#define __MFD_PM8XXX_CORE_H
-
-#include <linux/mfd/core.h>
-
-struct pm8xxx_drvdata {
-	int	(*pmic_readb) (const struct device *dev, u16 addr, u8 *val);
-	int	(*pmic_writeb) (const struct device *dev, u16 addr, u8 val);
-	int	(*pmic_read_buf) (const struct device *dev, u16 addr, u8 *buf,
-									int n);
-	int	(*pmic_write_buf) (const struct device *dev, u16 addr, u8 *buf,
-									int n);
-	int	(*pmic_read_irq_stat) (const struct device *dev, int irq);
-	void	*pm_chip_data;
-};
-
-static inline int pm8xxx_readb(const struct device *dev, u16 addr, u8 *val)
-{
-	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
-
-	if (!dd)
-		return -EINVAL;
-	return dd->pmic_readb(dev, addr, val);
-}
-
-static inline int pm8xxx_writeb(const struct device *dev, u16 addr, u8 val)
-{
-	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
-
-	if (!dd)
-		return -EINVAL;
-	return dd->pmic_writeb(dev, addr, val);
-}
-
-static inline int pm8xxx_read_buf(const struct device *dev, u16 addr, u8 *buf,
-									int n)
-{
-	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
-
-	if (!dd)
-		return -EINVAL;
-	return dd->pmic_read_buf(dev, addr, buf, n);
-}
-
-static inline int pm8xxx_write_buf(const struct device *dev, u16 addr, u8 *buf,
-									int n)
-{
-	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
-
-	if (!dd)
-		return -EINVAL;
-	return dd->pmic_write_buf(dev, addr, buf, n);
-}
-
-static inline int pm8xxx_read_irq_stat(const struct device *dev, int irq)
-{
-	struct pm8xxx_drvdata *dd = dev_get_drvdata(dev);
-
-	if (!dd)
-		return -EINVAL;
-	return dd->pmic_read_irq_stat(dev, irq);
-}
-
-#endif
-- 
cgit 


From cfb61a419630a810033f2777aba724ab6b1272b3 Mon Sep 17 00:00:00 2001
From: Carlo Caione <carlo@caione.org>
Date: Thu, 1 May 2014 14:29:27 +0200
Subject: mfd: AXP20x: Add mfd driver for AXP20x PMIC

This patch introduces the preliminary support for PMICs X-Powers AXP202
and AXP209. The AXP209 and AXP202 are the PMUs (Power Management Unit)
used by A10, A13 and A20 SoCs and developed by X-Powers, a sister company
of Allwinner.

The core enables support for two subsystems:
- PEK (Power Enable Key)
- Regulators

Signed-off-by: Carlo Caione <carlo@caione.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig        |  12 +++
 drivers/mfd/Makefile       |   1 +
 drivers/mfd/axp20x.c       | 258 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/axp20x.h | 180 +++++++++++++++++++++++++++++++
 4 files changed, 451 insertions(+)
 create mode 100644 drivers/mfd/axp20x.c
 create mode 100644 include/linux/mfd/axp20x.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index e166d7176d7a..c681741ce492 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -67,6 +67,18 @@ config MFD_BCM590XX
 	help
 	  Support for the BCM590xx PMUs from Broadcom
 
+config MFD_AXP20X
+	bool "X-Powers AXP20X"
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	depends on I2C=y
+	help
+	  If you say Y here you get support for the X-Powers AXP202 and AXP209.
+	  This driver include only the core APIs. You have to select individual
+	  components like regulators or the PEK (Power Enable Key) under the
+	  corresponding menus.
+
 config MFD_CROS_EC
 	tristate "ChromeOS Embedded Controller"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2851275e2656..1efecf2793ae 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_PMIC_DA9052)	+= da9052-irq.o
 obj-$(CONFIG_PMIC_DA9052)	+= da9052-core.o
 obj-$(CONFIG_MFD_DA9052_SPI)	+= da9052-spi.o
 obj-$(CONFIG_MFD_DA9052_I2C)	+= da9052-i2c.o
+obj-$(CONFIG_MFD_AXP20X)	+= axp20x.o
 
 obj-$(CONFIG_MFD_LP3943)	+= lp3943.o
 obj-$(CONFIG_MFD_LP8788)	+= lp8788.o lp8788-irq.o
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
new file mode 100644
index 000000000000..dee653989e3a
--- /dev/null
+++ b/drivers/mfd/axp20x.c
@@ -0,0 +1,258 @@
+/*
+ * axp20x.c - MFD core driver for the X-Powers AXP202 and AXP209
+ *
+ * AXP20x comprises an adaptive USB-Compatible PWM charger, 2 BUCK DC-DC
+ * converters, 5 LDOs, multiple 12-bit ADCs of voltage, current and temperature
+ * as well as 4 configurable GPIOs.
+ *
+ * Author: Carlo Caione <carlo@caione.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mfd/axp20x.h>
+#include <linux/mfd/core.h>
+#include <linux/of_device.h>
+#include <linux/of_irq.h>
+
+#define AXP20X_OFF	0x80
+
+static const struct regmap_range axp20x_writeable_ranges[] = {
+	regmap_reg_range(AXP20X_DATACACHE(0), AXP20X_IRQ5_STATE),
+	regmap_reg_range(AXP20X_DCDC_MODE, AXP20X_FG_RES),
+};
+
+static const struct regmap_range axp20x_volatile_ranges[] = {
+	regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IRQ5_STATE),
+};
+
+static const struct regmap_access_table axp20x_writeable_table = {
+	.yes_ranges	= axp20x_writeable_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(axp20x_writeable_ranges),
+};
+
+static const struct regmap_access_table axp20x_volatile_table = {
+	.yes_ranges	= axp20x_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(axp20x_volatile_ranges),
+};
+
+static struct resource axp20x_pek_resources[] = {
+	{
+		.name	= "PEK_DBR",
+		.start	= AXP20X_IRQ_PEK_RIS_EDGE,
+		.end	= AXP20X_IRQ_PEK_RIS_EDGE,
+		.flags	= IORESOURCE_IRQ,
+	}, {
+		.name	= "PEK_DBF",
+		.start	= AXP20X_IRQ_PEK_FAL_EDGE,
+		.end	= AXP20X_IRQ_PEK_FAL_EDGE,
+		.flags	= IORESOURCE_IRQ,
+	},
+};
+
+static const struct regmap_config axp20x_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.wr_table	= &axp20x_writeable_table,
+	.volatile_table	= &axp20x_volatile_table,
+	.max_register	= AXP20X_FG_RES,
+	.cache_type	= REGCACHE_RBTREE,
+};
+
+#define AXP20X_IRQ(_irq, _off, _mask) \
+	[AXP20X_IRQ_##_irq] = { .reg_offset = (_off), .mask = BIT(_mask) }
+
+static const struct regmap_irq axp20x_regmap_irqs[] = {
+	AXP20X_IRQ(ACIN_OVER_V,		0, 7),
+	AXP20X_IRQ(ACIN_PLUGIN,		0, 6),
+	AXP20X_IRQ(ACIN_REMOVAL,	0, 5),
+	AXP20X_IRQ(VBUS_OVER_V,		0, 4),
+	AXP20X_IRQ(VBUS_PLUGIN,		0, 3),
+	AXP20X_IRQ(VBUS_REMOVAL,	0, 2),
+	AXP20X_IRQ(VBUS_V_LOW,		0, 1),
+	AXP20X_IRQ(BATT_PLUGIN,		1, 7),
+	AXP20X_IRQ(BATT_REMOVAL,	1, 6),
+	AXP20X_IRQ(BATT_ENT_ACT_MODE,	1, 5),
+	AXP20X_IRQ(BATT_EXIT_ACT_MODE,	1, 4),
+	AXP20X_IRQ(CHARG,		1, 3),
+	AXP20X_IRQ(CHARG_DONE,		1, 2),
+	AXP20X_IRQ(BATT_TEMP_HIGH,	1, 1),
+	AXP20X_IRQ(BATT_TEMP_LOW,	1, 0),
+	AXP20X_IRQ(DIE_TEMP_HIGH,	2, 7),
+	AXP20X_IRQ(CHARG_I_LOW,		2, 6),
+	AXP20X_IRQ(DCDC1_V_LONG,	2, 5),
+	AXP20X_IRQ(DCDC2_V_LONG,	2, 4),
+	AXP20X_IRQ(DCDC3_V_LONG,	2, 3),
+	AXP20X_IRQ(PEK_SHORT,		2, 1),
+	AXP20X_IRQ(PEK_LONG,		2, 0),
+	AXP20X_IRQ(N_OE_PWR_ON,		3, 7),
+	AXP20X_IRQ(N_OE_PWR_OFF,	3, 6),
+	AXP20X_IRQ(VBUS_VALID,		3, 5),
+	AXP20X_IRQ(VBUS_NOT_VALID,	3, 4),
+	AXP20X_IRQ(VBUS_SESS_VALID,	3, 3),
+	AXP20X_IRQ(VBUS_SESS_END,	3, 2),
+	AXP20X_IRQ(LOW_PWR_LVL1,	3, 1),
+	AXP20X_IRQ(LOW_PWR_LVL2,	3, 0),
+	AXP20X_IRQ(TIMER,		4, 7),
+	AXP20X_IRQ(PEK_RIS_EDGE,	4, 6),
+	AXP20X_IRQ(PEK_FAL_EDGE,	4, 5),
+	AXP20X_IRQ(GPIO3_INPUT,		4, 3),
+	AXP20X_IRQ(GPIO2_INPUT,		4, 2),
+	AXP20X_IRQ(GPIO1_INPUT,		4, 1),
+	AXP20X_IRQ(GPIO0_INPUT,		4, 0),
+};
+
+static const struct of_device_id axp20x_of_match[] = {
+	{ .compatible = "x-powers,axp202", .data = (void *) AXP202_ID },
+	{ .compatible = "x-powers,axp209", .data = (void *) AXP209_ID },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, axp20x_of_match);
+
+/*
+ * This is useless for OF-enabled devices, but it is needed by I2C subsystem
+ */
+static const struct i2c_device_id axp20x_i2c_id[] = {
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, axp20x_i2c_id);
+
+static const struct regmap_irq_chip axp20x_regmap_irq_chip = {
+	.name			= "axp20x_irq_chip",
+	.status_base		= AXP20X_IRQ1_STATE,
+	.ack_base		= AXP20X_IRQ1_STATE,
+	.mask_base		= AXP20X_IRQ1_EN,
+	.num_regs		= 5,
+	.irqs			= axp20x_regmap_irqs,
+	.num_irqs		= ARRAY_SIZE(axp20x_regmap_irqs),
+	.mask_invert		= true,
+	.init_ack_masked	= true,
+};
+
+static const char * const axp20x_supplies[] = {
+	"acin",
+	"vin2",
+	"vin3",
+	"ldo24in",
+	"ldo3in",
+	"ldo5in",
+};
+
+static struct mfd_cell axp20x_cells[] = {
+	{
+		.name			= "axp20x-pek",
+		.num_resources		= ARRAY_SIZE(axp20x_pek_resources),
+		.resources		= axp20x_pek_resources,
+	}, {
+		.name			= "axp20x-regulator",
+		.parent_supplies	= axp20x_supplies,
+		.num_parent_supplies	= ARRAY_SIZE(axp20x_supplies),
+	},
+};
+
+static struct axp20x_dev *axp20x_pm_power_off;
+static void axp20x_power_off(void)
+{
+	regmap_write(axp20x_pm_power_off->regmap, AXP20X_OFF_CTRL,
+		     AXP20X_OFF);
+}
+
+static int axp20x_i2c_probe(struct i2c_client *i2c,
+			 const struct i2c_device_id *id)
+{
+	struct axp20x_dev *axp20x;
+	const struct of_device_id *of_id;
+	int ret;
+
+	axp20x = devm_kzalloc(&i2c->dev, sizeof(*axp20x), GFP_KERNEL);
+	if (!axp20x)
+		return -ENOMEM;
+
+	of_id = of_match_device(axp20x_of_match, &i2c->dev);
+	if (!of_id) {
+		dev_err(&i2c->dev, "Unable to setup AXP20X data\n");
+		return -ENODEV;
+	}
+	axp20x->variant = (long) of_id->data;
+
+	axp20x->i2c_client = i2c;
+	axp20x->dev = &i2c->dev;
+	dev_set_drvdata(axp20x->dev, axp20x);
+
+	axp20x->regmap = devm_regmap_init_i2c(i2c, &axp20x_regmap_config);
+	if (IS_ERR(axp20x->regmap)) {
+		ret = PTR_ERR(axp20x->regmap);
+		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_add_irq_chip(axp20x->regmap, i2c->irq,
+				  IRQF_ONESHOT | IRQF_SHARED, -1,
+				  &axp20x_regmap_irq_chip,
+				  &axp20x->regmap_irqc);
+	if (ret) {
+		dev_err(&i2c->dev, "failed to add irq chip: %d\n", ret);
+		return ret;
+	}
+
+	ret = mfd_add_devices(axp20x->dev, -1, axp20x_cells,
+			      ARRAY_SIZE(axp20x_cells), NULL, 0, NULL);
+
+	if (ret) {
+		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
+		regmap_del_irq_chip(i2c->irq, axp20x->regmap_irqc);
+		return ret;
+	}
+
+	if (!pm_power_off) {
+		axp20x_pm_power_off = axp20x;
+		pm_power_off = axp20x_power_off;
+	}
+
+	dev_info(&i2c->dev, "AXP20X driver loaded\n");
+
+	return 0;
+}
+
+static int axp20x_i2c_remove(struct i2c_client *i2c)
+{
+	struct axp20x_dev *axp20x = i2c_get_clientdata(i2c);
+
+	if (axp20x == axp20x_pm_power_off) {
+		axp20x_pm_power_off = NULL;
+		pm_power_off = NULL;
+	}
+
+	mfd_remove_devices(axp20x->dev);
+	regmap_del_irq_chip(axp20x->i2c_client->irq, axp20x->regmap_irqc);
+
+	return 0;
+}
+
+static struct i2c_driver axp20x_i2c_driver = {
+	.driver = {
+		.name	= "axp20x",
+		.owner	= THIS_MODULE,
+		.of_match_table	= of_match_ptr(axp20x_of_match),
+	},
+	.probe		= axp20x_i2c_probe,
+	.remove		= axp20x_i2c_remove,
+	.id_table	= axp20x_i2c_id,
+};
+
+module_i2c_driver(axp20x_i2c_driver);
+
+MODULE_DESCRIPTION("PMIC MFD core driver for AXP20X");
+MODULE_AUTHOR("Carlo Caione <carlo@caione.org>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
new file mode 100644
index 000000000000..d0e31a2287ac
--- /dev/null
+++ b/include/linux/mfd/axp20x.h
@@ -0,0 +1,180 @@
+/*
+ * Functions and registers to access AXP20X power management chip.
+ *
+ * Copyright (C) 2013, Carlo Caione <carlo@caione.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_MFD_AXP20X_H
+#define __LINUX_MFD_AXP20X_H
+
+enum {
+	AXP202_ID = 0,
+	AXP209_ID,
+};
+
+#define AXP20X_DATACACHE(m)		(0x04 + (m))
+
+/* Power supply */
+#define AXP20X_PWR_INPUT_STATUS		0x00
+#define AXP20X_PWR_OP_MODE		0x01
+#define AXP20X_USB_OTG_STATUS		0x02
+#define AXP20X_PWR_OUT_CTRL		0x12
+#define AXP20X_DCDC2_V_OUT		0x23
+#define AXP20X_DCDC2_LDO3_V_SCAL	0x25
+#define AXP20X_DCDC3_V_OUT		0x27
+#define AXP20X_LDO24_V_OUT		0x28
+#define AXP20X_LDO3_V_OUT		0x29
+#define AXP20X_VBUS_IPSOUT_MGMT		0x30
+#define AXP20X_V_OFF			0x31
+#define AXP20X_OFF_CTRL			0x32
+#define AXP20X_CHRG_CTRL1		0x33
+#define AXP20X_CHRG_CTRL2		0x34
+#define AXP20X_CHRG_BAK_CTRL		0x35
+#define AXP20X_PEK_KEY			0x36
+#define AXP20X_DCDC_FREQ		0x37
+#define AXP20X_V_LTF_CHRG		0x38
+#define AXP20X_V_HTF_CHRG		0x39
+#define AXP20X_APS_WARN_L1		0x3a
+#define AXP20X_APS_WARN_L2		0x3b
+#define AXP20X_V_LTF_DISCHRG		0x3c
+#define AXP20X_V_HTF_DISCHRG		0x3d
+
+/* Interrupt */
+#define AXP20X_IRQ1_EN			0x40
+#define AXP20X_IRQ2_EN			0x41
+#define AXP20X_IRQ3_EN			0x42
+#define AXP20X_IRQ4_EN			0x43
+#define AXP20X_IRQ5_EN			0x44
+#define AXP20X_IRQ1_STATE		0x48
+#define AXP20X_IRQ2_STATE		0x49
+#define AXP20X_IRQ3_STATE		0x4a
+#define AXP20X_IRQ4_STATE		0x4b
+#define AXP20X_IRQ5_STATE		0x4c
+
+/* ADC */
+#define AXP20X_ACIN_V_ADC_H		0x56
+#define AXP20X_ACIN_V_ADC_L		0x57
+#define AXP20X_ACIN_I_ADC_H		0x58
+#define AXP20X_ACIN_I_ADC_L		0x59
+#define AXP20X_VBUS_V_ADC_H		0x5a
+#define AXP20X_VBUS_V_ADC_L		0x5b
+#define AXP20X_VBUS_I_ADC_H		0x5c
+#define AXP20X_VBUS_I_ADC_L		0x5d
+#define AXP20X_TEMP_ADC_H		0x5e
+#define AXP20X_TEMP_ADC_L		0x5f
+#define AXP20X_TS_IN_H			0x62
+#define AXP20X_TS_IN_L			0x63
+#define AXP20X_GPIO0_V_ADC_H		0x64
+#define AXP20X_GPIO0_V_ADC_L		0x65
+#define AXP20X_GPIO1_V_ADC_H		0x66
+#define AXP20X_GPIO1_V_ADC_L		0x67
+#define AXP20X_PWR_BATT_H		0x70
+#define AXP20X_PWR_BATT_M		0x71
+#define AXP20X_PWR_BATT_L		0x72
+#define AXP20X_BATT_V_H			0x78
+#define AXP20X_BATT_V_L			0x79
+#define AXP20X_BATT_CHRG_I_H		0x7a
+#define AXP20X_BATT_CHRG_I_L		0x7b
+#define AXP20X_BATT_DISCHRG_I_H		0x7c
+#define AXP20X_BATT_DISCHRG_I_L		0x7d
+#define AXP20X_IPSOUT_V_HIGH_H		0x7e
+#define AXP20X_IPSOUT_V_HIGH_L		0x7f
+
+/* Power supply */
+#define AXP20X_DCDC_MODE		0x80
+#define AXP20X_ADC_EN1			0x82
+#define AXP20X_ADC_EN2			0x83
+#define AXP20X_ADC_RATE			0x84
+#define AXP20X_GPIO10_IN_RANGE		0x85
+#define AXP20X_GPIO1_ADC_IRQ_RIS	0x86
+#define AXP20X_GPIO1_ADC_IRQ_FAL	0x87
+#define AXP20X_TIMER_CTRL		0x8a
+#define AXP20X_VBUS_MON			0x8b
+#define AXP20X_OVER_TMP			0x8f
+
+/* GPIO */
+#define AXP20X_GPIO0_CTRL		0x90
+#define AXP20X_LDO5_V_OUT		0x91
+#define AXP20X_GPIO1_CTRL		0x92
+#define AXP20X_GPIO2_CTRL		0x93
+#define AXP20X_GPIO20_SS		0x94
+#define AXP20X_GPIO3_CTRL		0x95
+
+/* Battery */
+#define AXP20X_CHRG_CC_31_24		0xb0
+#define AXP20X_CHRG_CC_23_16		0xb1
+#define AXP20X_CHRG_CC_15_8		0xb2
+#define AXP20X_CHRG_CC_7_0		0xb3
+#define AXP20X_DISCHRG_CC_31_24		0xb4
+#define AXP20X_DISCHRG_CC_23_16		0xb5
+#define AXP20X_DISCHRG_CC_15_8		0xb6
+#define AXP20X_DISCHRG_CC_7_0		0xb7
+#define AXP20X_CC_CTRL			0xb8
+#define AXP20X_FG_RES			0xb9
+
+/* Regulators IDs */
+enum {
+	AXP20X_LDO1 = 0,
+	AXP20X_LDO2,
+	AXP20X_LDO3,
+	AXP20X_LDO4,
+	AXP20X_LDO5,
+	AXP20X_DCDC2,
+	AXP20X_DCDC3,
+	AXP20X_REG_ID_MAX,
+};
+
+/* IRQs */
+enum {
+	AXP20X_IRQ_ACIN_OVER_V = 1,
+	AXP20X_IRQ_ACIN_PLUGIN,
+	AXP20X_IRQ_ACIN_REMOVAL,
+	AXP20X_IRQ_VBUS_OVER_V,
+	AXP20X_IRQ_VBUS_PLUGIN,
+	AXP20X_IRQ_VBUS_REMOVAL,
+	AXP20X_IRQ_VBUS_V_LOW,
+	AXP20X_IRQ_BATT_PLUGIN,
+	AXP20X_IRQ_BATT_REMOVAL,
+	AXP20X_IRQ_BATT_ENT_ACT_MODE,
+	AXP20X_IRQ_BATT_EXIT_ACT_MODE,
+	AXP20X_IRQ_CHARG,
+	AXP20X_IRQ_CHARG_DONE,
+	AXP20X_IRQ_BATT_TEMP_HIGH,
+	AXP20X_IRQ_BATT_TEMP_LOW,
+	AXP20X_IRQ_DIE_TEMP_HIGH,
+	AXP20X_IRQ_CHARG_I_LOW,
+	AXP20X_IRQ_DCDC1_V_LONG,
+	AXP20X_IRQ_DCDC2_V_LONG,
+	AXP20X_IRQ_DCDC3_V_LONG,
+	AXP20X_IRQ_PEK_SHORT = 22,
+	AXP20X_IRQ_PEK_LONG,
+	AXP20X_IRQ_N_OE_PWR_ON,
+	AXP20X_IRQ_N_OE_PWR_OFF,
+	AXP20X_IRQ_VBUS_VALID,
+	AXP20X_IRQ_VBUS_NOT_VALID,
+	AXP20X_IRQ_VBUS_SESS_VALID,
+	AXP20X_IRQ_VBUS_SESS_END,
+	AXP20X_IRQ_LOW_PWR_LVL1,
+	AXP20X_IRQ_LOW_PWR_LVL2,
+	AXP20X_IRQ_TIMER,
+	AXP20X_IRQ_PEK_RIS_EDGE,
+	AXP20X_IRQ_PEK_FAL_EDGE,
+	AXP20X_IRQ_GPIO3_INPUT,
+	AXP20X_IRQ_GPIO2_INPUT,
+	AXP20X_IRQ_GPIO1_INPUT,
+	AXP20X_IRQ_GPIO0_INPUT,
+};
+
+struct axp20x_dev {
+	struct device			*dev;
+	struct i2c_client		*i2c_client;
+	struct regmap			*regmap;
+	struct regmap_irq_chip_data	*regmap_irqc;
+	long				variant;
+};
+
+#endif /* __LINUX_MFD_AXP20X_H */
-- 
cgit 


From 3d2379909374ef2de6bc57ed8966c7ca8c9dfb82 Mon Sep 17 00:00:00 2001
From: Tushar Behera <tushar.behera@linaro.org>
Date: Fri, 9 May 2014 16:37:40 +0530
Subject: mfd: syscon: Include linux/err.h to fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit df73de9b0d412 ("mfd: syscon: Return -ENOSYS if CONFIG_MFD_SYSCON
is not enabled") introduced fallbacks for APIs, but missed out on adding
the header file. This would work only if linux/err.h is also included
in the source code from where this file is included. It would be better
to include linux/err.h in file to remove possible build errors.

Without this patch, we get following and similar build errors if this
header file is included in some source file and CONFIG_MFD_SYSCON is
not enabled.

include/linux/mfd/syscon.h: In function ‘syscon_node_to_regmap’:
include/linux/mfd/syscon.h:30:2: error: implicit declaration of function ‘ERR_PTR’ [-Werror=implicit-function-declaration]
  return ERR_PTR(-ENOSYS);
  ^
include/linux/mfd/syscon.h:30:18: error: ‘ENOSYS’ undeclared (first use in this function)
  return ERR_PTR(-ENOSYS);
                  ^
Signed-off-by: Tushar Behera <tushar.behera@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/syscon.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon.h b/include/linux/mfd/syscon.h
index 8789fa3c7fd9..75e543b78f53 100644
--- a/include/linux/mfd/syscon.h
+++ b/include/linux/mfd/syscon.h
@@ -15,6 +15,8 @@
 #ifndef __LINUX_MFD_SYSCON_H__
 #define __LINUX_MFD_SYSCON_H__
 
+#include <linux/err.h>
+
 struct device_node;
 
 #ifdef CONFIG_MFD_SYSCON
-- 
cgit 


From dcc21cc09e3c22d0ede4e105afa8884eba293b58 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 3 Apr 2014 17:45:15 +0200
Subject: mfd: Add driver for Atmel Microcontroller on iPaq h3xxx

This adds a driver for the Atmel Microcontroller found on the
iPAQ h3xxx series. This device handles some keys, the
touchscreen, and the battery monitoring.

This is a port of a driver from handhelds.org 2.6.21 kernel,
written by Alessandro Gardich based on Andrew Christians
original HAL-driver. It has been heavily cleaned and
converted to mfd-core by Dmitry Artamonow and rewritten
again for the v3.x series kernels by Linus Walleij,
bringing back some of the functionality lost from Andrew's
original driver.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Alessandro Gardich <gremlin@gremlin.it>
Signed-off-by: Dmitry Artamonow <mad_soft@inbox.ru>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig            |  10 +
 drivers/mfd/Makefile           |   1 +
 drivers/mfd/ipaq-micro.c       | 482 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/ipaq-micro.h | 148 +++++++++++++
 4 files changed, 641 insertions(+)
 create mode 100644 drivers/mfd/ipaq-micro.c
 create mode 100644 include/linux/mfd/ipaq-micro.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index e56fb3749bca..60cef41b0af4 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -262,6 +262,16 @@ config MFD_INTEL_MSIC
 	  Passage) chip. This chip embeds audio, battery, GPIO, etc.
 	  devices used in Intel Medfield platforms.
 
+config MFD_IPAQ_MICRO
+	bool "Atmel Micro ASIC (iPAQ h3100/h3600/h3700) Support"
+	depends on SA1100_H3100 || SA1100_H3600
+	select MFD_CORE
+	help
+	  Select this to get support for the Microcontroller found in
+	  the Compaq iPAQ handheld computers. This is an Atmel
+	  AT90LS8535 microcontroller flashed with a special iPAQ
+	  firmware using the custom protocol implemented in this driver.
+
 config MFD_JANZ_CMODIO
 	tristate "Janz CMOD-IO PCI MODULbus Carrier Board"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index df7823cae5af..5dec445ab139 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -168,3 +168,4 @@ obj-$(CONFIG_MFD_RETU)		+= retu-mfd.o
 obj-$(CONFIG_MFD_AS3711)	+= as3711.o
 obj-$(CONFIG_MFD_AS3722)	+= as3722.o
 obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
+obj-$(CONFIG_MFD_IPAQ_MICRO)	+= ipaq-micro.o
diff --git a/drivers/mfd/ipaq-micro.c b/drivers/mfd/ipaq-micro.c
new file mode 100644
index 000000000000..1763d6db346e
--- /dev/null
+++ b/drivers/mfd/ipaq-micro.c
@@ -0,0 +1,482 @@
+/*
+ * Compaq iPAQ h3xxx Atmel microcontroller companion support
+ *
+ * This is an Atmel AT90LS8535 with a special flashed-in firmware that
+ * implements the special protocol used by this driver.
+ *
+ * based on previous kernel 2.4 version by Andrew Christian
+ * Author : Alessandro Gardich <gremlin@gremlin.it>
+ * Author : Dmitry Artamonow <mad_soft@inbox.ru>
+ * Author : Linus Walleij <linus.walleij@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/pm.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/ipaq-micro.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+#include <mach/hardware.h>
+
+static void ipaq_micro_trigger_tx(struct ipaq_micro *micro)
+{
+	struct ipaq_micro_txdev *tx = &micro->tx;
+	struct ipaq_micro_msg *msg = micro->msg;
+	int i, bp;
+	u8 checksum;
+	u32 val;
+
+	bp = 0;
+	tx->buf[bp++] = CHAR_SOF;
+
+	checksum = ((msg->id & 0x0f) << 4) | (msg->tx_len & 0x0f);
+	tx->buf[bp++] = checksum;
+
+	for (i = 0; i < msg->tx_len; i++) {
+		tx->buf[bp++] = msg->tx_data[i];
+		checksum += msg->tx_data[i];
+	}
+
+	tx->buf[bp++] = checksum;
+	tx->len = bp;
+	tx->index = 0;
+	print_hex_dump(KERN_DEBUG, "data: ", DUMP_PREFIX_OFFSET, 16, 1,
+		       tx->buf, tx->len, true);
+
+	/* Enable interrupt */
+	val = readl(micro->base + UTCR3);
+	val |= UTCR3_TIE;
+	writel(val, micro->base + UTCR3);
+}
+
+int ipaq_micro_tx_msg(struct ipaq_micro *micro, struct ipaq_micro_msg *msg)
+{
+	unsigned long flags;
+
+	dev_dbg(micro->dev, "TX msg: %02x, %d bytes\n", msg->id, msg->tx_len);
+
+	spin_lock_irqsave(&micro->lock, flags);
+	if (micro->msg) {
+		list_add_tail(&msg->node, &micro->queue);
+		spin_unlock_irqrestore(&micro->lock, flags);
+		return 0;
+	}
+	micro->msg = msg;
+	ipaq_micro_trigger_tx(micro);
+	spin_unlock_irqrestore(&micro->lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(ipaq_micro_tx_msg);
+
+static void micro_rx_msg(struct ipaq_micro *micro, u8 id, int len, u8 *data)
+{
+	int i;
+
+	dev_dbg(micro->dev, "RX msg: %02x, %d bytes\n", id, len);
+
+	spin_lock(&micro->lock);
+	switch (id) {
+	case MSG_VERSION:
+	case MSG_EEPROM_READ:
+	case MSG_EEPROM_WRITE:
+	case MSG_BACKLIGHT:
+	case MSG_NOTIFY_LED:
+	case MSG_THERMAL_SENSOR:
+	case MSG_BATTERY:
+		/* Handle synchronous messages */
+		if (micro->msg && micro->msg->id == id) {
+			struct ipaq_micro_msg *msg = micro->msg;
+
+			memcpy(msg->rx_data, data, len);
+			msg->rx_len = len;
+			complete(&micro->msg->ack);
+			if (!list_empty(&micro->queue)) {
+				micro->msg = list_entry(micro->queue.next,
+							struct ipaq_micro_msg,
+							node);
+				list_del_init(&micro->msg->node);
+				ipaq_micro_trigger_tx(micro);
+			} else
+				micro->msg = NULL;
+			dev_dbg(micro->dev, "OK RX message 0x%02x\n", id);
+		} else {
+			dev_err(micro->dev,
+				"out of band RX message 0x%02x\n", id);
+			if(!micro->msg)
+				dev_info(micro->dev, "no message queued\n");
+			else
+				dev_info(micro->dev, "expected message %02x\n",
+					 micro->msg->id);
+		}
+		break;
+	case MSG_KEYBOARD:
+		if (micro->key)
+			micro->key(micro->key_data, len, data);
+		else
+			dev_dbg(micro->dev, "key message ignored, no handle \n");
+		break;
+	case MSG_TOUCHSCREEN:
+		if (micro->ts)
+			micro->ts(micro->ts_data, len, data);
+		else
+			dev_dbg(micro->dev, "touchscreen message ignored, no handle \n");
+		break;
+	default:
+		dev_err(micro->dev,
+			"unknown msg %d [%d] ", id, len);
+		for (i = 0; i < len; ++i)
+			pr_cont("0x%02x ", data[i]);
+		pr_cont("\n");
+	}
+	spin_unlock(&micro->lock);
+}
+
+static void micro_process_char(struct ipaq_micro *micro, u8 ch)
+{
+	struct ipaq_micro_rxdev *rx = &micro->rx;
+
+	switch (rx->state) {
+	case STATE_SOF:	/* Looking for SOF */
+		if (ch == CHAR_SOF)
+			rx->state = STATE_ID; /* Next byte is the id and len */
+		break;
+	case STATE_ID: /* Looking for id and len byte */
+		rx->id = (ch & 0xf0) >> 4 ;
+		rx->len = (ch & 0x0f);
+		rx->index = 0;
+		rx->chksum = ch;
+		rx->state = (rx->len > 0) ? STATE_DATA : STATE_CHKSUM;
+		break;
+	case STATE_DATA: /* Looking for 'len' data bytes */
+		rx->chksum += ch;
+		rx->buf[rx->index] = ch;
+		if (++rx->index == rx->len)
+			rx->state = STATE_CHKSUM;
+		break;
+	case STATE_CHKSUM: /* Looking for the checksum */
+		if (ch == rx->chksum)
+			micro_rx_msg(micro, rx->id, rx->len, rx->buf);
+		rx->state = STATE_SOF;
+		break;
+	}
+}
+
+static void micro_rx_chars(struct ipaq_micro *micro)
+{
+	u32 status, ch;
+
+	while ((status = readl(micro->base + UTSR1)) & UTSR1_RNE) {
+		ch = readl(micro->base + UTDR);
+		if (status & UTSR1_PRE)
+			dev_err(micro->dev, "rx: parity error\n");
+		else if (status & UTSR1_FRE)
+			dev_err(micro->dev, "rx: framing error\n");
+		else if (status & UTSR1_ROR)
+			dev_err(micro->dev, "rx: overrun error\n");
+		micro_process_char(micro, ch);
+	}
+}
+
+static void ipaq_micro_get_version(struct ipaq_micro *micro)
+{
+	struct ipaq_micro_msg msg = {
+		.id = MSG_VERSION,
+	};
+
+	ipaq_micro_tx_msg_sync(micro, &msg);
+	if (msg.rx_len == 4) {
+		memcpy(micro->version, msg.rx_data, 4);
+		micro->version[4] = '\0';
+	} else if (msg.rx_len == 9) {
+		memcpy(micro->version, msg.rx_data, 4);
+		micro->version[4] = '\0';
+		/* Bytes 4-7 are "pack", byte 8 is "boot type" */
+	} else {
+		dev_err(micro->dev,
+			"illegal version message %d bytes\n", msg.rx_len);
+	}
+}
+
+static void ipaq_micro_eeprom_read(struct ipaq_micro *micro,
+				   u8 address, u8 len, u8 *data)
+{
+	struct ipaq_micro_msg msg = {
+		.id = MSG_EEPROM_READ,
+	};
+	u8 i;
+
+	for (i = 0; i < len; i++) {
+		msg.tx_data[0] = address + i;
+		msg.tx_data[1] = 1;
+		msg.tx_len = 2;
+		ipaq_micro_tx_msg_sync(micro, &msg);
+		memcpy(data + (i * 2), msg.rx_data, 2);
+	}
+}
+
+static char *ipaq_micro_str(u8 *wchar, u8 len)
+{
+	char retstr[256];
+	u8 i;
+
+	for (i = 0; i < len / 2; i++)
+		retstr[i] = wchar[i * 2];
+	return kstrdup(retstr, GFP_KERNEL);
+}
+
+static u16 ipaq_micro_to_u16(u8 *data)
+{
+	return data[1] << 8 | data[0];
+}
+
+static void ipaq_micro_eeprom_dump(struct ipaq_micro *micro)
+{
+	u8 dump[256];
+	char *str;
+
+	ipaq_micro_eeprom_read(micro, 0, 128, dump);
+	str = ipaq_micro_str(dump, 10);
+	if (str) {
+		dev_info(micro->dev, "HM version %s\n", str);
+		kfree(str);
+	}
+	str = ipaq_micro_str(dump+10, 40);
+	if (str) {
+		dev_info(micro->dev, "serial number: %s\n", str);
+		/* Feed the random pool with this */
+		add_device_randomness(str, strlen(str));
+		kfree(str);
+	}
+	str = ipaq_micro_str(dump+50, 20);
+	if (str) {
+		dev_info(micro->dev, "module ID: %s\n", str);
+		kfree(str);
+	}
+	str = ipaq_micro_str(dump+70, 10);
+	if (str) {
+		dev_info(micro->dev, "product revision: %s\n", str);
+		kfree(str);
+	}
+	dev_info(micro->dev, "product ID: %u\n", ipaq_micro_to_u16(dump+80));
+	dev_info(micro->dev, "frame rate: %u fps\n",
+		 ipaq_micro_to_u16(dump+82));
+	dev_info(micro->dev, "page mode: %u\n", ipaq_micro_to_u16(dump+84));
+	dev_info(micro->dev, "country ID: %u\n", ipaq_micro_to_u16(dump+86));
+	dev_info(micro->dev, "color display: %s\n",
+		 ipaq_micro_to_u16(dump+88) ? "yes" : "no");
+	dev_info(micro->dev, "ROM size: %u MiB\n", ipaq_micro_to_u16(dump+90));
+	dev_info(micro->dev, "RAM size: %u KiB\n", ipaq_micro_to_u16(dump+92));
+	dev_info(micro->dev, "screen: %u x %u\n",
+		 ipaq_micro_to_u16(dump+94), ipaq_micro_to_u16(dump+96));
+	print_hex_dump(KERN_DEBUG, "eeprom: ", DUMP_PREFIX_OFFSET, 16, 1,
+		       dump, 256, true);
+
+}
+
+static void micro_tx_chars(struct ipaq_micro *micro)
+{
+	struct ipaq_micro_txdev *tx = &micro->tx;
+	u32 val;
+
+	while ((tx->index < tx->len) &&
+	       (readl(micro->base + UTSR1) & UTSR1_TNF)) {
+		writel(tx->buf[tx->index], micro->base + UTDR);
+		tx->index++;
+	}
+
+	/* Stop interrupts */
+	val = readl(micro->base + UTCR3);
+	val &= ~UTCR3_TIE;
+	writel(val, micro->base + UTCR3);
+}
+
+static void micro_reset_comm(struct ipaq_micro *micro)
+{
+	struct ipaq_micro_rxdev *rx = &micro->rx;
+	u32 val;
+
+	if (micro->msg)
+		complete(&micro->msg->ack);
+
+	/* Initialize Serial channel protocol frame */
+	rx->state = STATE_SOF;  /* Reset the state machine */
+
+	/* Set up interrupts */
+	writel(0x01, micro->sdlc + 0x0); /* Select UART mode */
+
+	/* Clean up CR3 */
+	writel(0x0, micro->base + UTCR3);
+
+	/* Format: 8N1 */
+	writel(UTCR0_8BitData | UTCR0_1StpBit, micro->base + UTCR0);
+
+	/* Baud rate: 115200 */
+	writel(0x0, micro->base + UTCR1);
+	writel(0x1, micro->base + UTCR2);
+
+	/* Clear SR0 */
+	writel(0xff, micro->base + UTSR0);
+
+	/* Enable RX int, disable TX int */
+	writel(UTCR3_TXE | UTCR3_RXE | UTCR3_RIE, micro->base + UTCR3);
+	val = readl(micro->base + UTCR3);
+	val &= ~UTCR3_TIE;
+	writel(val, micro->base + UTCR3);
+}
+
+static irqreturn_t micro_serial_isr(int irq, void *dev_id)
+{
+	struct ipaq_micro *micro = dev_id;
+	struct ipaq_micro_txdev *tx = &micro->tx;
+	u32 status;
+
+	status = readl(micro->base + UTSR0);
+	do {
+		if (status & (UTSR0_RID | UTSR0_RFS)) {
+			if (status & UTSR0_RID)
+				/* Clear the Receiver IDLE bit */
+				writel(UTSR0_RID, micro->base + UTSR0);
+			micro_rx_chars(micro);
+		}
+
+		/* Clear break bits */
+		if (status & (UTSR0_RBB | UTSR0_REB))
+			writel(status & (UTSR0_RBB | UTSR0_REB),
+			       micro->base + UTSR0);
+
+		if (status & UTSR0_TFS)
+			micro_tx_chars(micro);
+
+		status = readl(micro->base + UTSR0);
+
+	} while (((tx->index < tx->len) && (status & UTSR0_TFS)) ||
+		 (status & (UTSR0_RFS | UTSR0_RID)));
+
+	return IRQ_HANDLED;
+}
+
+static struct mfd_cell micro_cells[] = {
+	{ .name = "ipaq-micro-backlight", },
+	{ .name = "ipaq-micro-battery", },
+	{ .name = "ipaq-micro-keys", },
+	{ .name = "ipaq-micro-ts", },
+	{ .name = "ipaq-micro-leds", },
+};
+
+static int micro_resume(struct device *dev)
+{
+	struct ipaq_micro *micro = dev_get_drvdata(dev);
+
+	micro_reset_comm(micro);
+	mdelay(10);
+
+	return 0;
+}
+
+static int micro_probe(struct platform_device *pdev)
+{
+	struct ipaq_micro *micro;
+	struct resource *res;
+	int ret;
+	int irq;
+
+	micro = devm_kzalloc(&pdev->dev, sizeof(*micro), GFP_KERNEL);
+	if (!micro)
+		return -ENOMEM;
+
+	micro->dev = &pdev->dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
+
+	micro->base = devm_request_and_ioremap(&pdev->dev, res);
+	if (!micro->base)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -EINVAL;
+
+	micro->sdlc = devm_request_and_ioremap(&pdev->dev, res);
+	if (!micro->sdlc)
+		return -ENOMEM;
+
+	micro_reset_comm(micro);
+
+	irq = platform_get_irq(pdev, 0);
+	if (!irq)
+		return -EINVAL;
+	ret = devm_request_irq(&pdev->dev, irq, micro_serial_isr,
+			       IRQF_SHARED, "ipaq-micro",
+			       micro);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to grab serial port IRQ\n");
+		return ret;
+	} else
+		dev_info(&pdev->dev, "grabbed serial port IRQ\n");
+
+	spin_lock_init(&micro->lock);
+	INIT_LIST_HEAD(&micro->queue);
+	platform_set_drvdata(pdev, micro);
+
+	ret = mfd_add_devices(&pdev->dev, pdev->id, micro_cells,
+			      ARRAY_SIZE(micro_cells), NULL, 0, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "error adding MFD cells");
+		return ret;
+	}
+
+	/* Check version */
+	ipaq_micro_get_version(micro);
+	dev_info(&pdev->dev, "Atmel micro ASIC version %s\n", micro->version);
+	ipaq_micro_eeprom_dump(micro);
+
+	return 0;
+}
+
+static int micro_remove(struct platform_device *pdev)
+{
+	struct ipaq_micro *micro = platform_get_drvdata(pdev);
+	u32 val;
+
+	mfd_remove_devices(&pdev->dev);
+
+	val = readl(micro->base + UTCR3);
+	val &= ~(UTCR3_RXE | UTCR3_RIE); /* disable receive interrupt */
+	val &= ~(UTCR3_TXE | UTCR3_TIE); /* disable transmit interrupt */
+	writel(val, micro->base + UTCR3);
+
+	return 0;
+}
+
+static const struct dev_pm_ops micro_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(NULL, micro_resume)
+};
+
+static struct platform_driver micro_device_driver = {
+	.driver   = {
+		.name	= "ipaq-h3xxx-micro",
+		.pm	= &micro_dev_pm_ops,
+	},
+	.probe    = micro_probe,
+	.remove   = micro_remove,
+	/* .shutdown = micro_suspend, // FIXME */
+};
+module_platform_driver(micro_device_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("driver for iPAQ Atmel micro core and backlight");
diff --git a/include/linux/mfd/ipaq-micro.h b/include/linux/mfd/ipaq-micro.h
new file mode 100644
index 000000000000..5c4d29f6674f
--- /dev/null
+++ b/include/linux/mfd/ipaq-micro.h
@@ -0,0 +1,148 @@
+/*
+ * Header file for the compaq Micro MFD
+ */
+
+#ifndef _MFD_IPAQ_MICRO_H_
+#define _MFD_IPAQ_MICRO_H_
+
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+#include <linux/list.h>
+
+#define TX_BUF_SIZE	32
+#define RX_BUF_SIZE	16
+#define CHAR_SOF	0x02
+
+/*
+ * These are the different messages that can be sent to the microcontroller
+ * to control various aspects.
+ */
+#define MSG_VERSION		0x0
+#define MSG_KEYBOARD		0x2
+#define MSG_TOUCHSCREEN		0x3
+#define MSG_EEPROM_READ		0x4
+#define MSG_EEPROM_WRITE	0x5
+#define MSG_THERMAL_SENSOR	0x6
+#define MSG_NOTIFY_LED		0x8
+#define MSG_BATTERY		0x9
+#define MSG_SPI_READ		0xb
+#define MSG_SPI_WRITE		0xc
+#define MSG_BACKLIGHT		0xd /* H3600 only */
+#define MSG_CODEC_CTRL		0xe /* H3100 only */
+#define MSG_DISPLAY_CTRL	0xf /* H3100 only */
+
+/* state of receiver parser */
+enum rx_state {
+	STATE_SOF = 0,     /* Next byte should be start of frame */
+	STATE_ID,          /* Next byte is ID & message length   */
+	STATE_DATA,        /* Next byte is a data byte           */
+	STATE_CHKSUM       /* Next byte should be checksum       */
+};
+
+/**
+ * struct ipaq_micro_txdev - TX state
+ * @len: length of message in TX buffer
+ * @index: current index into TX buffer
+ * @buf: TX buffer
+ */
+struct ipaq_micro_txdev {
+	u8 len;
+	u8 index;
+	u8 buf[TX_BUF_SIZE];
+};
+
+/**
+ * struct ipaq_micro_rxdev - RX state
+ * @state: context of RX state machine
+ * @chksum: calculated checksum
+ * @id: message ID from packet
+ * @len: RX buffer length
+ * @index: RX buffer index
+ * @buf: RX buffer
+ */
+struct ipaq_micro_rxdev {
+	enum rx_state state;
+	unsigned char chksum;
+	u8            id;
+	unsigned int  len;
+	unsigned int  index;
+	u8            buf[RX_BUF_SIZE];
+};
+
+/**
+ * struct ipaq_micro_msg - message to the iPAQ microcontroller
+ * @id: 4-bit ID of the message
+ * @tx_len: length of TX data
+ * @tx_data: TX data to send
+ * @rx_len: length of receieved RX data
+ * @rx_data: RX data to recieve
+ * @ack: a completion that will be completed when RX is complete
+ * @node: list node if message gets queued
+ */
+struct ipaq_micro_msg {
+	u8 id;
+	u8 tx_len;
+	u8 tx_data[TX_BUF_SIZE];
+	u8 rx_len;
+	u8 rx_data[RX_BUF_SIZE];
+	struct completion ack;
+	struct list_head node;
+};
+
+/**
+ * struct ipaq_micro - iPAQ microcontroller state
+ * @dev: corresponding platform device
+ * @base: virtual memory base for underlying serial device
+ * @sdlc: virtual memory base for Synchronous Data Link Controller
+ * @version: version string
+ * @tx: TX state
+ * @rx: RX state
+ * @lock: lock for this state container
+ * @msg: current message
+ * @queue: message queue
+ * @key: callback for asynchronous key events
+ * @key_data: data to pass along with key events
+ * @ts: callback for asynchronous touchscreen events
+ * @ts_data: data to pass along with key events
+ */
+struct ipaq_micro {
+	struct device *dev;
+	void __iomem *base;
+	void __iomem *sdlc;
+	char version[5];
+	struct ipaq_micro_txdev tx;	/* transmit ISR state */
+	struct ipaq_micro_rxdev rx;	/* receive ISR state */
+	spinlock_t lock;
+	struct ipaq_micro_msg *msg;
+	struct list_head queue;
+	void (*key) (void *data, int len, unsigned char *rxdata);
+	void *key_data;
+	void (*ts) (void *data, int len, unsigned char *rxdata);
+	void *ts_data;
+};
+
+extern int
+ipaq_micro_tx_msg(struct ipaq_micro *micro, struct ipaq_micro_msg *msg);
+
+static inline int
+ipaq_micro_tx_msg_sync(struct ipaq_micro *micro,
+		       struct ipaq_micro_msg *msg)
+{
+	int ret;
+
+	init_completion(&msg->ack);
+	ret = ipaq_micro_tx_msg(micro, msg);
+	wait_for_completion(&msg->ack);
+
+	return ret;
+}
+
+static inline int
+ipaq_micro_tx_msg_async(struct ipaq_micro *micro,
+			struct ipaq_micro_msg *msg)
+{
+	init_completion(&msg->ack);
+	return ipaq_micro_tx_msg(micro, msg);
+}
+
+#endif /* _MFD_IPAQ_MICRO_H_ */
-- 
cgit 


From 5271db29d7199fe0ffb303ca4bbbb1485bba28c3 Mon Sep 17 00:00:00 2001
From: Bill Richardson <wfrichar@chromium.org>
Date: Wed, 30 Apr 2014 10:44:08 -0700
Subject: mfd: cros_ec: Sync to the latest cros_ec_commands.h from EC sources

This just updates include/linux/mfd/cros_ec_commands.h to match the
latest EC version (which is the One True Source for such things).  See
<https://chromium.googlesource.com/chromiumos/platform/ec>

[dianders: took today's ToT version from the Chromium OS EC; deleted
references to cros_ec_dev and cros_ec_lpc since those aren't upstream
yet]

Signed-off-by: Bill Richardson <wfrichar@chromium.org>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Tested-by: Andrew Bresticker <abrestic@chromium.org>
Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/cros_ec.c                |    2 +-
 include/linux/mfd/cros_ec.h          |    4 +-
 include/linux/mfd/cros_ec_commands.h | 1128 +++++++++++++++++++++++++++++++---
 3 files changed, 1059 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index fae69b1db5b1..8b6fb34c9b31 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -30,7 +30,7 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 	uint8_t *out;
 	int csum, i;
 
-	BUG_ON(msg->out_len > EC_HOST_PARAM_SIZE);
+	BUG_ON(msg->out_len > EC_PROTO2_MAX_PARAM_SIZE);
 	out = ec_dev->dout;
 	out[0] = EC_CMD_VERSION0 + msg->version;
 	out[1] = msg->cmd;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 032af7fc5b2e..887ef4f7bef7 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -29,8 +29,8 @@ enum {
 	EC_MSG_RX_PROTO_BYTES	= 3,
 
 	/* Max length of messages */
-	EC_MSG_BYTES		= EC_HOST_PARAM_SIZE + EC_MSG_TX_PROTO_BYTES,
-
+	EC_MSG_BYTES		= EC_PROTO2_MAX_PARAM_SIZE +
+					EC_MSG_TX_PROTO_BYTES,
 };
 
 /**
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 86fd06953bcd..7853a6410d14 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -24,25 +24,12 @@
 #define __CROS_EC_COMMANDS_H
 
 /*
- * Protocol overview
+ * Current version of this protocol
  *
- * request:  CMD [ P0 P1 P2 ... Pn S ]
- * response: ERR [ P0 P1 P2 ... Pn S ]
- *
- * where the bytes are defined as follow :
- *      - CMD is the command code. (defined by EC_CMD_ constants)
- *      - ERR is the error code. (defined by EC_RES_ constants)
- *      - Px is the optional payload.
- *        it is not sent if the error code is not success.
- *        (defined by ec_params_ and ec_response_ structures)
- *      - S is the checksum which is the sum of all payload bytes.
- *
- * On LPC, CMD and ERR are sent/received at EC_LPC_ADDR_KERNEL|USER_CMD
- * and the payloads are sent/received at EC_LPC_ADDR_KERNEL|USER_PARAM.
- * On I2C, all bytes are sent serially in the same message.
+ * TODO(crosbug.com/p/11223): This is effectively useless; protocol is
+ * determined in other ways.  Remove this once the kernel code no longer
+ * depends on it.
  */
-
-/* Current version of this protocol */
 #define EC_PROTO_VERSION          0x00000002
 
 /* Command version mask */
@@ -57,13 +44,19 @@
 #define EC_LPC_ADDR_HOST_CMD   0x204
 
 /* I/O addresses for host command args and params */
-#define EC_LPC_ADDR_HOST_ARGS  0x800
-#define EC_LPC_ADDR_HOST_PARAM 0x804
-#define EC_HOST_PARAM_SIZE     0x0fc  /* Size of param area in bytes */
-
-/* I/O addresses for host command params, old interface */
-#define EC_LPC_ADDR_OLD_PARAM  0x880
-#define EC_OLD_PARAM_SIZE      0x080  /* Size of param area in bytes */
+/* Protocol version 2 */
+#define EC_LPC_ADDR_HOST_ARGS    0x800  /* And 0x801, 0x802, 0x803 */
+#define EC_LPC_ADDR_HOST_PARAM   0x804  /* For version 2 params; size is
+					 * EC_PROTO2_MAX_PARAM_SIZE */
+/* Protocol version 3 */
+#define EC_LPC_ADDR_HOST_PACKET  0x800  /* Offset of version 3 packet */
+#define EC_LPC_HOST_PACKET_SIZE  0x100  /* Max size of version 3 packet */
+
+/* The actual block is 0x800-0x8ff, but some BIOSes think it's 0x880-0x8ff
+ * and they tell the kernel that so we have to think of it as two parts. */
+#define EC_HOST_CMD_REGION0    0x800
+#define EC_HOST_CMD_REGION1    0x880
+#define EC_HOST_CMD_REGION_SIZE 0x80
 
 /* EC command register bit functions */
 #define EC_LPC_CMDR_DATA	(1 << 0)  /* Data ready for host to read */
@@ -79,18 +72,22 @@
 #define EC_MEMMAP_TEXT_MAX     8   /* Size of a string in the memory map */
 
 /* The offset address of each type of data in mapped memory. */
-#define EC_MEMMAP_TEMP_SENSOR      0x00 /* Temp sensors */
-#define EC_MEMMAP_FAN              0x10 /* Fan speeds */
-#define EC_MEMMAP_TEMP_SENSOR_B    0x18 /* Temp sensors (second set) */
-#define EC_MEMMAP_ID               0x20 /* 'E' 'C' */
+#define EC_MEMMAP_TEMP_SENSOR      0x00 /* Temp sensors 0x00 - 0x0f */
+#define EC_MEMMAP_FAN              0x10 /* Fan speeds 0x10 - 0x17 */
+#define EC_MEMMAP_TEMP_SENSOR_B    0x18 /* More temp sensors 0x18 - 0x1f */
+#define EC_MEMMAP_ID               0x20 /* 0x20 == 'E', 0x21 == 'C' */
 #define EC_MEMMAP_ID_VERSION       0x22 /* Version of data in 0x20 - 0x2f */
 #define EC_MEMMAP_THERMAL_VERSION  0x23 /* Version of data in 0x00 - 0x1f */
 #define EC_MEMMAP_BATTERY_VERSION  0x24 /* Version of data in 0x40 - 0x7f */
 #define EC_MEMMAP_SWITCHES_VERSION 0x25 /* Version of data in 0x30 - 0x33 */
 #define EC_MEMMAP_EVENTS_VERSION   0x26 /* Version of data in 0x34 - 0x3f */
-#define EC_MEMMAP_HOST_CMD_FLAGS   0x27 /* Host command interface flags */
-#define EC_MEMMAP_SWITCHES         0x30
-#define EC_MEMMAP_HOST_EVENTS      0x34
+#define EC_MEMMAP_HOST_CMD_FLAGS   0x27 /* Host cmd interface flags (8 bits) */
+/* Unused 0x28 - 0x2f */
+#define EC_MEMMAP_SWITCHES         0x30	/* 8 bits */
+/* Unused 0x31 - 0x33 */
+#define EC_MEMMAP_HOST_EVENTS      0x34 /* 32 bits */
+/* Reserve 0x38 - 0x3f for additional host event-related stuff */
+/* Battery values are all 32 bits */
 #define EC_MEMMAP_BATT_VOLT        0x40 /* Battery Present Voltage */
 #define EC_MEMMAP_BATT_RATE        0x44 /* Battery Present Rate */
 #define EC_MEMMAP_BATT_CAP         0x48 /* Battery Remaining Capacity */
@@ -99,10 +96,24 @@
 #define EC_MEMMAP_BATT_DVLT        0x54 /* Battery Design Voltage */
 #define EC_MEMMAP_BATT_LFCC        0x58 /* Battery Last Full Charge Capacity */
 #define EC_MEMMAP_BATT_CCNT        0x5c /* Battery Cycle Count */
+/* Strings are all 8 bytes (EC_MEMMAP_TEXT_MAX) */
 #define EC_MEMMAP_BATT_MFGR        0x60 /* Battery Manufacturer String */
 #define EC_MEMMAP_BATT_MODEL       0x68 /* Battery Model Number String */
 #define EC_MEMMAP_BATT_SERIAL      0x70 /* Battery Serial Number String */
 #define EC_MEMMAP_BATT_TYPE        0x78 /* Battery Type String */
+#define EC_MEMMAP_ALS              0x80 /* ALS readings in lux (2 X 16 bits) */
+/* Unused 0x84 - 0x8f */
+#define EC_MEMMAP_ACC_STATUS       0x90 /* Accelerometer status (8 bits )*/
+/* Unused 0x91 */
+#define EC_MEMMAP_ACC_DATA         0x92 /* Accelerometer data 0x92 - 0x9f */
+#define EC_MEMMAP_GYRO_DATA        0xa0 /* Gyroscope data 0xa0 - 0xa5 */
+/* Unused 0xa6 - 0xfe (remember, 0xff is NOT part of the memmap region) */
+
+
+/* Define the format of the accelerometer mapped memory status byte. */
+#define EC_MEMMAP_ACC_STATUS_SAMPLE_ID_MASK  0x0f
+#define EC_MEMMAP_ACC_STATUS_BUSY_BIT        (1 << 4)
+#define EC_MEMMAP_ACC_STATUS_PRESENCE_BIT    (1 << 7)
 
 /* Number of temp sensors at EC_MEMMAP_TEMP_SENSOR */
 #define EC_TEMP_SENSOR_ENTRIES     16
@@ -112,6 +123,8 @@
  * Valid only if EC_MEMMAP_THERMAL_VERSION returns >= 2.
  */
 #define EC_TEMP_SENSOR_B_ENTRIES      8
+
+/* Special values for mapped temperature sensors */
 #define EC_TEMP_SENSOR_NOT_PRESENT    0xff
 #define EC_TEMP_SENSOR_ERROR          0xfe
 #define EC_TEMP_SENSOR_NOT_POWERED    0xfd
@@ -122,6 +135,18 @@
  */
 #define EC_TEMP_SENSOR_OFFSET      200
 
+/*
+ * Number of ALS readings at EC_MEMMAP_ALS
+ */
+#define EC_ALS_ENTRIES             2
+
+/*
+ * The default value a temperature sensor will return when it is present but
+ * has not been read this boot.  This is a reasonable number to avoid
+ * triggering alarms on the host.
+ */
+#define EC_TEMP_SENSOR_DEFAULT     (296 - EC_TEMP_SENSOR_OFFSET)
+
 #define EC_FAN_SPEED_ENTRIES       4       /* Number of fans at EC_MEMMAP_FAN */
 #define EC_FAN_SPEED_NOT_PRESENT   0xffff  /* Entry not present */
 #define EC_FAN_SPEED_STALLED       0xfffe  /* Fan stalled */
@@ -137,8 +162,8 @@
 #define EC_SWITCH_LID_OPEN               0x01
 #define EC_SWITCH_POWER_BUTTON_PRESSED   0x02
 #define EC_SWITCH_WRITE_PROTECT_DISABLED 0x04
-/* Recovery requested via keyboard */
-#define EC_SWITCH_KEYBOARD_RECOVERY      0x08
+/* Was recovery requested via keyboard; now unused. */
+#define EC_SWITCH_IGNORE1		 0x08
 /* Recovery requested via dedicated signal (from servo board) */
 #define EC_SWITCH_DEDICATED_RECOVERY     0x10
 /* Was fake developer mode switch; now unused.  Remove in next refactor. */
@@ -147,10 +172,15 @@
 /* Host command interface flags */
 /* Host command interface supports LPC args (LPC interface only) */
 #define EC_HOST_CMD_FLAG_LPC_ARGS_SUPPORTED  0x01
+/* Host command interface supports version 3 protocol */
+#define EC_HOST_CMD_FLAG_VERSION_3   0x02
 
 /* Wireless switch flags */
-#define EC_WIRELESS_SWITCH_WLAN      0x01
-#define EC_WIRELESS_SWITCH_BLUETOOTH 0x02
+#define EC_WIRELESS_SWITCH_ALL       ~0x00  /* All flags */
+#define EC_WIRELESS_SWITCH_WLAN       0x01  /* WLAN radio */
+#define EC_WIRELESS_SWITCH_BLUETOOTH  0x02  /* Bluetooth radio */
+#define EC_WIRELESS_SWITCH_WWAN       0x04  /* WWAN power */
+#define EC_WIRELESS_SWITCH_WLAN_POWER 0x08  /* WLAN power */
 
 /*
  * This header file is used in coreboot both in C and ACPI code.  The ACPI code
@@ -159,6 +189,14 @@
  */
 #ifndef __ACPI__
 
+/*
+ * Define __packed if someone hasn't beat us to it.  Linux kernel style
+ * checking prefers __packed over __attribute__((packed)).
+ */
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+
 /* LPC command status byte masks */
 /* EC has written a byte in the data register and host hasn't read it yet */
 #define EC_LPC_STATUS_TO_HOST     0x01
@@ -198,6 +236,9 @@ enum ec_status {
 	EC_RES_UNAVAILABLE = 9,		/* No response available */
 	EC_RES_TIMEOUT = 10,		/* We got a timeout */
 	EC_RES_OVERFLOW = 11,		/* Table / data overflow */
+	EC_RES_INVALID_HEADER = 12,     /* Header contains invalid data */
+	EC_RES_REQUEST_TRUNCATED = 13,  /* Didn't get the entire request */
+	EC_RES_RESPONSE_TOO_BIG = 14    /* Response was too big to handle */
 };
 
 /*
@@ -235,6 +276,16 @@ enum host_event_code {
 	/* Shutdown due to battery level too low */
 	EC_HOST_EVENT_BATTERY_SHUTDOWN = 17,
 
+	/* Suggest that the AP throttle itself */
+	EC_HOST_EVENT_THROTTLE_START = 18,
+	/* Suggest that the AP resume normal speed */
+	EC_HOST_EVENT_THROTTLE_STOP = 19,
+
+	/* Hang detect logic detected a hang and host event timeout expired */
+	EC_HOST_EVENT_HANG_DETECT = 20,
+	/* Hang detect logic detected a hang and warm rebooted the AP */
+	EC_HOST_EVENT_HANG_REBOOT = 21,
+
 	/*
 	 * The high bit of the event mask is not used as a host event code.  If
 	 * it reads back as set, then the entire event mask should be
@@ -279,6 +330,188 @@ struct ec_lpc_host_args {
  */
 #define EC_HOST_ARGS_FLAG_TO_HOST   0x02
 
+/*****************************************************************************/
+/*
+ * Byte codes returned by EC over SPI interface.
+ *
+ * These can be used by the AP to debug the EC interface, and to determine
+ * when the EC is not in a state where it will ever get around to responding
+ * to the AP.
+ *
+ * Example of sequence of bytes read from EC for a current good transfer:
+ *   1. -                  - AP asserts chip select (CS#)
+ *   2. EC_SPI_OLD_READY   - AP sends first byte(s) of request
+ *   3. -                  - EC starts handling CS# interrupt
+ *   4. EC_SPI_RECEIVING   - AP sends remaining byte(s) of request
+ *   5. EC_SPI_PROCESSING  - EC starts processing request; AP is clocking in
+ *                           bytes looking for EC_SPI_FRAME_START
+ *   6. -                  - EC finishes processing and sets up response
+ *   7. EC_SPI_FRAME_START - AP reads frame byte
+ *   8. (response packet)  - AP reads response packet
+ *   9. EC_SPI_PAST_END    - Any additional bytes read by AP
+ *   10 -                  - AP deasserts chip select
+ *   11 -                  - EC processes CS# interrupt and sets up DMA for
+ *                           next request
+ *
+ * If the AP is waiting for EC_SPI_FRAME_START and sees any value other than
+ * the following byte values:
+ *   EC_SPI_OLD_READY
+ *   EC_SPI_RX_READY
+ *   EC_SPI_RECEIVING
+ *   EC_SPI_PROCESSING
+ *
+ * Then the EC found an error in the request, or was not ready for the request
+ * and lost data.  The AP should give up waiting for EC_SPI_FRAME_START,
+ * because the EC is unable to tell when the AP is done sending its request.
+ */
+
+/*
+ * Framing byte which precedes a response packet from the EC.  After sending a
+ * request, the AP will clock in bytes until it sees the framing byte, then
+ * clock in the response packet.
+ */
+#define EC_SPI_FRAME_START    0xec
+
+/*
+ * Padding bytes which are clocked out after the end of a response packet.
+ */
+#define EC_SPI_PAST_END       0xed
+
+/*
+ * EC is ready to receive, and has ignored the byte sent by the AP.  EC expects
+ * that the AP will send a valid packet header (starting with
+ * EC_COMMAND_PROTOCOL_3) in the next 32 bytes.
+ */
+#define EC_SPI_RX_READY       0xf8
+
+/*
+ * EC has started receiving the request from the AP, but hasn't started
+ * processing it yet.
+ */
+#define EC_SPI_RECEIVING      0xf9
+
+/* EC has received the entire request from the AP and is processing it. */
+#define EC_SPI_PROCESSING     0xfa
+
+/*
+ * EC received bad data from the AP, such as a packet header with an invalid
+ * length.  EC will ignore all data until chip select deasserts.
+ */
+#define EC_SPI_RX_BAD_DATA    0xfb
+
+/*
+ * EC received data from the AP before it was ready.  That is, the AP asserted
+ * chip select and started clocking data before the EC was ready to receive it.
+ * EC will ignore all data until chip select deasserts.
+ */
+#define EC_SPI_NOT_READY      0xfc
+
+/*
+ * EC was ready to receive a request from the AP.  EC has treated the byte sent
+ * by the AP as part of a request packet, or (for old-style ECs) is processing
+ * a fully received packet but is not ready to respond yet.
+ */
+#define EC_SPI_OLD_READY      0xfd
+
+/*****************************************************************************/
+
+/*
+ * Protocol version 2 for I2C and SPI send a request this way:
+ *
+ *	0	EC_CMD_VERSION0 + (command version)
+ *	1	Command number
+ *	2	Length of params = N
+ *	3..N+2	Params, if any
+ *	N+3	8-bit checksum of bytes 0..N+2
+ *
+ * The corresponding response is:
+ *
+ *	0	Result code (EC_RES_*)
+ *	1	Length of params = M
+ *	2..M+1	Params, if any
+ *	M+2	8-bit checksum of bytes 0..M+1
+ */
+#define EC_PROTO2_REQUEST_HEADER_BYTES 3
+#define EC_PROTO2_REQUEST_TRAILER_BYTES 1
+#define EC_PROTO2_REQUEST_OVERHEAD (EC_PROTO2_REQUEST_HEADER_BYTES +	\
+				    EC_PROTO2_REQUEST_TRAILER_BYTES)
+
+#define EC_PROTO2_RESPONSE_HEADER_BYTES 2
+#define EC_PROTO2_RESPONSE_TRAILER_BYTES 1
+#define EC_PROTO2_RESPONSE_OVERHEAD (EC_PROTO2_RESPONSE_HEADER_BYTES +	\
+				     EC_PROTO2_RESPONSE_TRAILER_BYTES)
+
+/* Parameter length was limited by the LPC interface */
+#define EC_PROTO2_MAX_PARAM_SIZE 0xfc
+
+/* Maximum request and response packet sizes for protocol version 2 */
+#define EC_PROTO2_MAX_REQUEST_SIZE (EC_PROTO2_REQUEST_OVERHEAD +	\
+				    EC_PROTO2_MAX_PARAM_SIZE)
+#define EC_PROTO2_MAX_RESPONSE_SIZE (EC_PROTO2_RESPONSE_OVERHEAD +	\
+				     EC_PROTO2_MAX_PARAM_SIZE)
+
+/*****************************************************************************/
+
+/*
+ * Value written to legacy command port / prefix byte to indicate protocol
+ * 3+ structs are being used.  Usage is bus-dependent.
+ */
+#define EC_COMMAND_PROTOCOL_3 0xda
+
+#define EC_HOST_REQUEST_VERSION 3
+
+/* Version 3 request from host */
+struct ec_host_request {
+	/* Struct version (=3)
+	 *
+	 * EC will return EC_RES_INVALID_HEADER if it receives a header with a
+	 * version it doesn't know how to parse.
+	 */
+	uint8_t struct_version;
+
+	/*
+	 * Checksum of request and data; sum of all bytes including checksum
+	 * should total to 0.
+	 */
+	uint8_t checksum;
+
+	/* Command code */
+	uint16_t command;
+
+	/* Command version */
+	uint8_t command_version;
+
+	/* Unused byte in current protocol version; set to 0 */
+	uint8_t reserved;
+
+	/* Length of data which follows this header */
+	uint16_t data_len;
+} __packed;
+
+#define EC_HOST_RESPONSE_VERSION 3
+
+/* Version 3 response from EC */
+struct ec_host_response {
+	/* Struct version (=3) */
+	uint8_t struct_version;
+
+	/*
+	 * Checksum of response and data; sum of all bytes including checksum
+	 * should total to 0.
+	 */
+	uint8_t checksum;
+
+	/* Result code (EC_RES_*) */
+	uint16_t result;
+
+	/* Length of data which follows this header */
+	uint16_t data_len;
+
+	/* Unused bytes in current protocol version; set to 0 */
+	uint16_t reserved;
+} __packed;
+
+/*****************************************************************************/
 /*
  * Notes on commands:
  *
@@ -418,6 +651,68 @@ struct ec_response_get_comms_status {
 	uint32_t flags;		/* Mask of enum ec_comms_status */
 } __packed;
 
+/* Fake a variety of responses, purely for testing purposes. */
+#define EC_CMD_TEST_PROTOCOL		0x0a
+
+/* Tell the EC what to send back to us. */
+struct ec_params_test_protocol {
+	uint32_t ec_result;
+	uint32_t ret_len;
+	uint8_t buf[32];
+} __packed;
+
+/* Here it comes... */
+struct ec_response_test_protocol {
+	uint8_t buf[32];
+} __packed;
+
+/* Get prococol information */
+#define EC_CMD_GET_PROTOCOL_INFO	0x0b
+
+/* Flags for ec_response_get_protocol_info.flags */
+/* EC_RES_IN_PROGRESS may be returned if a command is slow */
+#define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED (1 << 0)
+
+struct ec_response_get_protocol_info {
+	/* Fields which exist if at least protocol version 3 supported */
+
+	/* Bitmask of protocol versions supported (1 << n means version n)*/
+	uint32_t protocol_versions;
+
+	/* Maximum request packet size, in bytes */
+	uint16_t max_request_packet_size;
+
+	/* Maximum response packet size, in bytes */
+	uint16_t max_response_packet_size;
+
+	/* Flags; see EC_PROTOCOL_INFO_* */
+	uint32_t flags;
+} __packed;
+
+
+/*****************************************************************************/
+/* Get/Set miscellaneous values */
+
+/* The upper byte of .flags tells what to do (nothing means "get") */
+#define EC_GSV_SET        0x80000000
+
+/* The lower three bytes of .flags identifies the parameter, if that has
+   meaning for an individual command. */
+#define EC_GSV_PARAM_MASK 0x00ffffff
+
+struct ec_params_get_set_value {
+	uint32_t flags;
+	uint32_t value;
+} __packed;
+
+struct ec_response_get_set_value {
+	uint32_t flags;
+	uint32_t value;
+} __packed;
+
+/* More than one command can use these structs to get/set paramters. */
+#define EC_CMD_GSV_PAUSE_IN_S5	0x0c
+
 
 /*****************************************************************************/
 /* Flash commands */
@@ -425,6 +720,7 @@ struct ec_response_get_comms_status {
 /* Get flash info */
 #define EC_CMD_FLASH_INFO 0x10
 
+/* Version 0 returns these fields */
 struct ec_response_flash_info {
 	/* Usable flash size, in bytes */
 	uint32_t flash_size;
@@ -445,6 +741,37 @@ struct ec_response_flash_info {
 	uint32_t protect_block_size;
 } __packed;
 
+/* Flags for version 1+ flash info command */
+/* EC flash erases bits to 0 instead of 1 */
+#define EC_FLASH_INFO_ERASE_TO_0 (1 << 0)
+
+/*
+ * Version 1 returns the same initial fields as version 0, with additional
+ * fields following.
+ *
+ * gcc anonymous structs don't seem to get along with the __packed directive;
+ * if they did we'd define the version 0 struct as a sub-struct of this one.
+ */
+struct ec_response_flash_info_1 {
+	/* Version 0 fields; see above for description */
+	uint32_t flash_size;
+	uint32_t write_block_size;
+	uint32_t erase_block_size;
+	uint32_t protect_block_size;
+
+	/* Version 1 adds these fields: */
+	/*
+	 * Ideal write size in bytes.  Writes will be fastest if size is
+	 * exactly this and offset is a multiple of this.  For example, an EC
+	 * may have a write buffer which can do half-page operations if data is
+	 * aligned, and a slower word-at-a-time write mode.
+	 */
+	uint32_t write_ideal_size;
+
+	/* Flags; see EC_FLASH_INFO_* */
+	uint32_t flags;
+} __packed;
+
 /*
  * Read flash
  *
@@ -459,15 +786,15 @@ struct ec_params_flash_read {
 
 /* Write flash */
 #define EC_CMD_FLASH_WRITE 0x12
+#define EC_VER_FLASH_WRITE 1
+
+/* Version 0 of the flash command supported only 64 bytes of data */
+#define EC_FLASH_WRITE_VER0_SIZE 64
 
 struct ec_params_flash_write {
 	uint32_t offset;   /* Byte offset to write */
 	uint32_t size;     /* Size to write in bytes */
-	/*
-	 * Data to write.  Could really use EC_PARAM_SIZE - 8, but tidiest to
-	 * use a power of 2 so writes stay aligned.
-	 */
-	uint8_t data[64];
+	/* Followed by data to write */
 } __packed;
 
 /* Erase flash */
@@ -543,7 +870,7 @@ struct ec_response_flash_protect {
 
 enum ec_flash_region {
 	/* Region which holds read-only EC image */
-	EC_FLASH_REGION_RO,
+	EC_FLASH_REGION_RO = 0,
 	/* Region which holds rewritable EC image */
 	EC_FLASH_REGION_RW,
 	/*
@@ -551,6 +878,8 @@ enum ec_flash_region {
 	 * EC_FLASH_REGION_RO)
 	 */
 	EC_FLASH_REGION_WP_RO,
+	/* Number of regions */
+	EC_FLASH_REGION_COUNT,
 };
 
 struct ec_params_flash_region_info {
@@ -639,15 +968,15 @@ struct rgb_s {
  */
 struct lightbar_params {
 	/* Timing */
-	int google_ramp_up;
-	int google_ramp_down;
-	int s3s0_ramp_up;
-	int s0_tick_delay[2];			/* AC=0/1 */
-	int s0a_tick_delay[2];			/* AC=0/1 */
-	int s0s3_ramp_down;
-	int s3_sleep_for;
-	int s3_ramp_up;
-	int s3_ramp_down;
+	int32_t google_ramp_up;
+	int32_t google_ramp_down;
+	int32_t s3s0_ramp_up;
+	int32_t s0_tick_delay[2];		/* AC=0/1 */
+	int32_t s0a_tick_delay[2];		/* AC=0/1 */
+	int32_t s0s3_ramp_down;
+	int32_t s3_sleep_for;
+	int32_t s3_ramp_up;
+	int32_t s3_ramp_down;
 
 	/* Oscillation */
 	uint8_t new_s0;
@@ -676,7 +1005,7 @@ struct ec_params_lightbar {
 	union {
 		struct {
 			/* no args */
-		} dump, off, on, init, get_seq, get_params;
+		} dump, off, on, init, get_seq, get_params, version;
 
 		struct num {
 			uint8_t num;
@@ -710,6 +1039,11 @@ struct ec_response_lightbar {
 
 		struct lightbar_params get_params;
 
+		struct version {
+			uint32_t num;
+			uint32_t flags;
+		} version;
+
 		struct {
 			/* no return params */
 		} off, on, init, brightness, seq, reg, rgb, demo, set_params;
@@ -730,9 +1064,61 @@ enum lightbar_command {
 	LIGHTBAR_CMD_DEMO = 9,
 	LIGHTBAR_CMD_GET_PARAMS = 10,
 	LIGHTBAR_CMD_SET_PARAMS = 11,
+	LIGHTBAR_CMD_VERSION = 12,
 	LIGHTBAR_NUM_CMDS
 };
 
+/*****************************************************************************/
+/* LED control commands */
+
+#define EC_CMD_LED_CONTROL 0x29
+
+enum ec_led_id {
+	/* LED to indicate battery state of charge */
+	EC_LED_ID_BATTERY_LED = 0,
+	/*
+	 * LED to indicate system power state (on or in suspend).
+	 * May be on power button or on C-panel.
+	 */
+	EC_LED_ID_POWER_LED,
+	/* LED on power adapter or its plug */
+	EC_LED_ID_ADAPTER_LED,
+
+	EC_LED_ID_COUNT
+};
+
+/* LED control flags */
+#define EC_LED_FLAGS_QUERY (1 << 0) /* Query LED capability only */
+#define EC_LED_FLAGS_AUTO  (1 << 1) /* Switch LED back to automatic control */
+
+enum ec_led_colors {
+	EC_LED_COLOR_RED = 0,
+	EC_LED_COLOR_GREEN,
+	EC_LED_COLOR_BLUE,
+	EC_LED_COLOR_YELLOW,
+	EC_LED_COLOR_WHITE,
+
+	EC_LED_COLOR_COUNT
+};
+
+struct ec_params_led_control {
+	uint8_t led_id;     /* Which LED to control */
+	uint8_t flags;      /* Control flags */
+
+	uint8_t brightness[EC_LED_COLOR_COUNT];
+} __packed;
+
+struct ec_response_led_control {
+	/*
+	 * Available brightness value range.
+	 *
+	 * Range 0 means color channel not present.
+	 * Range 1 means on/off control.
+	 * Other values means the LED is control by PWM.
+	 */
+	uint8_t brightness_range[EC_LED_COLOR_COUNT];
+} __packed;
+
 /*****************************************************************************/
 /* Verified boot commands */
 
@@ -789,6 +1175,181 @@ enum ec_vboot_hash_status {
 #define EC_VBOOT_HASH_OFFSET_RO 0xfffffffe
 #define EC_VBOOT_HASH_OFFSET_RW 0xfffffffd
 
+/*****************************************************************************/
+/*
+ * Motion sense commands. We'll make separate structs for sub-commands with
+ * different input args, so that we know how much to expect.
+ */
+#define EC_CMD_MOTION_SENSE_CMD 0x2B
+
+/* Motion sense commands */
+enum motionsense_command {
+	/*
+	 * Dump command returns all motion sensor data including motion sense
+	 * module flags and individual sensor flags.
+	 */
+	MOTIONSENSE_CMD_DUMP = 0,
+
+	/*
+	 * Info command returns data describing the details of a given sensor,
+	 * including enum motionsensor_type, enum motionsensor_location, and
+	 * enum motionsensor_chip.
+	 */
+	MOTIONSENSE_CMD_INFO = 1,
+
+	/*
+	 * EC Rate command is a setter/getter command for the EC sampling rate
+	 * of all motion sensors in milliseconds.
+	 */
+	MOTIONSENSE_CMD_EC_RATE = 2,
+
+	/*
+	 * Sensor ODR command is a setter/getter command for the output data
+	 * rate of a specific motion sensor in millihertz.
+	 */
+	MOTIONSENSE_CMD_SENSOR_ODR = 3,
+
+	/*
+	 * Sensor range command is a setter/getter command for the range of
+	 * a specified motion sensor in +/-G's or +/- deg/s.
+	 */
+	MOTIONSENSE_CMD_SENSOR_RANGE = 4,
+
+	/*
+	 * Setter/getter command for the keyboard wake angle. When the lid
+	 * angle is greater than this value, keyboard wake is disabled in S3,
+	 * and when the lid angle goes less than this value, keyboard wake is
+	 * enabled. Note, the lid angle measurement is an approximate,
+	 * un-calibrated value, hence the wake angle isn't exact.
+	 */
+	MOTIONSENSE_CMD_KB_WAKE_ANGLE = 5,
+
+	/* Number of motionsense sub-commands. */
+	MOTIONSENSE_NUM_CMDS
+};
+
+enum motionsensor_id {
+	EC_MOTION_SENSOR_ACCEL_BASE = 0,
+	EC_MOTION_SENSOR_ACCEL_LID = 1,
+	EC_MOTION_SENSOR_GYRO = 2,
+
+	/*
+	 * Note, if more sensors are added and this count changes, the padding
+	 * in ec_response_motion_sense dump command must be modified.
+	 */
+	EC_MOTION_SENSOR_COUNT = 3
+};
+
+/* List of motion sensor types. */
+enum motionsensor_type {
+	MOTIONSENSE_TYPE_ACCEL = 0,
+	MOTIONSENSE_TYPE_GYRO = 1,
+};
+
+/* List of motion sensor locations. */
+enum motionsensor_location {
+	MOTIONSENSE_LOC_BASE = 0,
+	MOTIONSENSE_LOC_LID = 1,
+};
+
+/* List of motion sensor chips. */
+enum motionsensor_chip {
+	MOTIONSENSE_CHIP_KXCJ9 = 0,
+};
+
+/* Module flag masks used for the dump sub-command. */
+#define MOTIONSENSE_MODULE_FLAG_ACTIVE (1<<0)
+
+/* Sensor flag masks used for the dump sub-command. */
+#define MOTIONSENSE_SENSOR_FLAG_PRESENT (1<<0)
+
+/*
+ * Send this value for the data element to only perform a read. If you
+ * send any other value, the EC will interpret it as data to set and will
+ * return the actual value set.
+ */
+#define EC_MOTION_SENSE_NO_VALUE -1
+
+struct ec_params_motion_sense {
+	uint8_t cmd;
+	union {
+		/* Used for MOTIONSENSE_CMD_DUMP. */
+		struct {
+			/* no args */
+		} dump;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_EC_RATE and
+		 * MOTIONSENSE_CMD_KB_WAKE_ANGLE.
+		 */
+		struct {
+			/* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */
+			int16_t data;
+		} ec_rate, kb_wake_angle;
+
+		/* Used for MOTIONSENSE_CMD_INFO. */
+		struct {
+			/* Should be element of enum motionsensor_id. */
+			uint8_t sensor_num;
+		} info;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_SENSOR_ODR and
+		 * MOTIONSENSE_CMD_SENSOR_RANGE.
+		 */
+		struct {
+			/* Should be element of enum motionsensor_id. */
+			uint8_t sensor_num;
+
+			/* Rounding flag, true for round-up, false for down. */
+			uint8_t roundup;
+
+			uint16_t reserved;
+
+			/* Data to set or EC_MOTION_SENSE_NO_VALUE to read. */
+			int32_t data;
+		} sensor_odr, sensor_range;
+	};
+} __packed;
+
+struct ec_response_motion_sense {
+	union {
+		/* Used for MOTIONSENSE_CMD_DUMP. */
+		struct {
+			/* Flags representing the motion sensor module. */
+			uint8_t module_flags;
+
+			/* Flags for each sensor in enum motionsensor_id. */
+			uint8_t sensor_flags[EC_MOTION_SENSOR_COUNT];
+
+			/* Array of all sensor data. Each sensor is 3-axis. */
+			int16_t data[3*EC_MOTION_SENSOR_COUNT];
+		} dump;
+
+		/* Used for MOTIONSENSE_CMD_INFO. */
+		struct {
+			/* Should be element of enum motionsensor_type. */
+			uint8_t type;
+
+			/* Should be element of enum motionsensor_location. */
+			uint8_t location;
+
+			/* Should be element of enum motionsensor_chip. */
+			uint8_t chip;
+		} info;
+
+		/*
+		 * Used for MOTIONSENSE_CMD_EC_RATE, MOTIONSENSE_CMD_SENSOR_ODR,
+		 * MOTIONSENSE_CMD_SENSOR_RANGE, and
+		 * MOTIONSENSE_CMD_KB_WAKE_ANGLE.
+		 */
+		struct {
+			/* Current value of the parameter queried. */
+			int32_t ret;
+		} ec_rate, sensor_odr, sensor_range, kb_wake_angle;
+	};
+} __packed;
+
 /*****************************************************************************/
 /* USB charging control commands */
 
@@ -868,20 +1429,27 @@ struct ec_response_port80_last_boot {
 } __packed;
 
 /*****************************************************************************/
-/* Thermal engine commands */
+/* Thermal engine commands. Note that there are two implementations. We'll
+ * reuse the command number, but the data and behavior is incompatible.
+ * Version 0 is what originally shipped on Link.
+ * Version 1 separates the CPU thermal limits from the fan control.
+ */
 
-/* Set thershold value */
 #define EC_CMD_THERMAL_SET_THRESHOLD 0x50
+#define EC_CMD_THERMAL_GET_THRESHOLD 0x51
+
+/* The version 0 structs are opaque. You have to know what they are for
+ * the get/set commands to make any sense.
+ */
 
+/* Version 0 - set */
 struct ec_params_thermal_set_threshold {
 	uint8_t sensor_type;
 	uint8_t threshold_id;
 	uint16_t value;
 } __packed;
 
-/* Get threshold value */
-#define EC_CMD_THERMAL_GET_THRESHOLD 0x51
-
+/* Version 0 - get */
 struct ec_params_thermal_get_threshold {
 	uint8_t sensor_type;
 	uint8_t threshold_id;
@@ -891,6 +1459,41 @@ struct ec_response_thermal_get_threshold {
 	uint16_t value;
 } __packed;
 
+
+/* The version 1 structs are visible. */
+enum ec_temp_thresholds {
+	EC_TEMP_THRESH_WARN = 0,
+	EC_TEMP_THRESH_HIGH,
+	EC_TEMP_THRESH_HALT,
+
+	EC_TEMP_THRESH_COUNT
+};
+
+/* Thermal configuration for one temperature sensor. Temps are in degrees K.
+ * Zero values will be silently ignored by the thermal task.
+ */
+struct ec_thermal_config {
+	uint32_t temp_host[EC_TEMP_THRESH_COUNT]; /* levels of hotness */
+	uint32_t temp_fan_off;		/* no active cooling needed */
+	uint32_t temp_fan_max;		/* max active cooling needed */
+} __packed;
+
+/* Version 1 - get config for one sensor. */
+struct ec_params_thermal_get_threshold_v1 {
+	uint32_t sensor_num;
+} __packed;
+/* This returns a struct ec_thermal_config */
+
+/* Version 1 - set config for one sensor.
+ * Use read-modify-write for best results! */
+struct ec_params_thermal_set_threshold_v1 {
+	uint32_t sensor_num;
+	struct ec_thermal_config cfg;
+} __packed;
+/* This returns no data */
+
+/****************************************************************************/
+
 /* Toggle automatic fan control */
 #define EC_CMD_THERMAL_AUTO_FAN_CTRL 0x52
 
@@ -920,6 +1523,18 @@ struct ec_params_tmp006_set_calibration {
 	float b2;
 } __packed;
 
+/* Read raw TMP006 data */
+#define EC_CMD_TMP006_GET_RAW 0x55
+
+struct ec_params_tmp006_get_raw {
+	uint8_t index;
+} __packed;
+
+struct ec_response_tmp006_get_raw {
+	int32_t t;  /* In 1/100 K */
+	int32_t v;  /* In nV */
+};
+
 /*****************************************************************************/
 /* MKBP - Matrix KeyBoard Protocol */
 
@@ -1118,11 +1733,41 @@ struct ec_params_switch_enable_backlight {
 
 /* Enable/disable WLAN/Bluetooth */
 #define EC_CMD_SWITCH_ENABLE_WIRELESS 0x91
+#define EC_VER_SWITCH_ENABLE_WIRELESS 1
 
-struct ec_params_switch_enable_wireless {
+/* Version 0 params; no response */
+struct ec_params_switch_enable_wireless_v0 {
 	uint8_t enabled;
 } __packed;
 
+/* Version 1 params */
+struct ec_params_switch_enable_wireless_v1 {
+	/* Flags to enable now */
+	uint8_t now_flags;
+
+	/* Which flags to copy from now_flags */
+	uint8_t now_mask;
+
+	/*
+	 * Flags to leave enabled in S3, if they're on at the S0->S3
+	 * transition.  (Other flags will be disabled by the S0->S3
+	 * transition.)
+	 */
+	uint8_t suspend_flags;
+
+	/* Which flags to copy from suspend_flags */
+	uint8_t suspend_mask;
+} __packed;
+
+/* Version 1 response */
+struct ec_response_switch_enable_wireless_v1 {
+	/* Flags to enable now */
+	uint8_t now_flags;
+
+	/* Flags to leave enabled in S3 */
+	uint8_t suspend_flags;
+} __packed;
+
 /*****************************************************************************/
 /* GPIO commands. Only available on EC if write protect has been disabled. */
 
@@ -1147,11 +1792,16 @@ struct ec_response_gpio_get {
 /*****************************************************************************/
 /* I2C commands. Only available when flash write protect is unlocked. */
 
+/*
+ * TODO(crosbug.com/p/23570): These commands are deprecated, and will be
+ * removed soon.  Use EC_CMD_I2C_XFER instead.
+ */
+
 /* Read I2C bus */
 #define EC_CMD_I2C_READ 0x94
 
 struct ec_params_i2c_read {
-	uint16_t addr;
+	uint16_t addr; /* 8-bit address (7-bit shifted << 1) */
 	uint8_t read_size; /* Either 8 or 16. */
 	uint8_t port;
 	uint8_t offset;
@@ -1165,7 +1815,7 @@ struct ec_response_i2c_read {
 
 struct ec_params_i2c_write {
 	uint16_t data;
-	uint16_t addr;
+	uint16_t addr; /* 8-bit address (7-bit shifted << 1) */
 	uint8_t write_size; /* Either 8 or 16. */
 	uint8_t port;
 	uint8_t offset;
@@ -1174,11 +1824,20 @@ struct ec_params_i2c_write {
 /*****************************************************************************/
 /* Charge state commands. Only available when flash write protect unlocked. */
 
-/* Force charge state machine to stop in idle mode */
-#define EC_CMD_CHARGE_FORCE_IDLE 0x96
+/* Force charge state machine to stop charging the battery or force it to
+ * discharge the battery.
+ */
+#define EC_CMD_CHARGE_CONTROL 0x96
+#define EC_VER_CHARGE_CONTROL 1
 
-struct ec_params_force_idle {
-	uint8_t enabled;
+enum ec_charge_control_mode {
+	CHARGE_CONTROL_NORMAL = 0,
+	CHARGE_CONTROL_IDLE,
+	CHARGE_CONTROL_DISCHARGE,
+};
+
+struct ec_params_charge_control {
+	uint32_t mode;  /* enum charge_control_mode */
 } __packed;
 
 /*****************************************************************************/
@@ -1206,14 +1865,231 @@ struct ec_params_force_idle {
 #define EC_CMD_BATTERY_CUT_OFF 0x99
 
 /*****************************************************************************/
-/* Temporary debug commands. TODO: remove this crosbug.com/p/13849 */
+/* USB port mux control. */
 
 /*
- * Dump charge state machine context.
- *
- * Response is a binary dump of charge state machine context.
+ * Switch USB mux or return to automatic switching.
+ */
+#define EC_CMD_USB_MUX 0x9a
+
+struct ec_params_usb_mux {
+	uint8_t mux;
+} __packed;
+
+/*****************************************************************************/
+/* LDOs / FETs control. */
+
+enum ec_ldo_state {
+	EC_LDO_STATE_OFF = 0,	/* the LDO / FET is shut down */
+	EC_LDO_STATE_ON = 1,	/* the LDO / FET is ON / providing power */
+};
+
+/*
+ * Switch on/off a LDO.
+ */
+#define EC_CMD_LDO_SET 0x9b
+
+struct ec_params_ldo_set {
+	uint8_t index;
+	uint8_t state;
+} __packed;
+
+/*
+ * Get LDO state.
+ */
+#define EC_CMD_LDO_GET 0x9c
+
+struct ec_params_ldo_get {
+	uint8_t index;
+} __packed;
+
+struct ec_response_ldo_get {
+	uint8_t state;
+} __packed;
+
+/*****************************************************************************/
+/* Power info. */
+
+/*
+ * Get power info.
+ */
+#define EC_CMD_POWER_INFO 0x9d
+
+struct ec_response_power_info {
+	uint32_t usb_dev_type;
+	uint16_t voltage_ac;
+	uint16_t voltage_system;
+	uint16_t current_system;
+	uint16_t usb_current_limit;
+} __packed;
+
+/*****************************************************************************/
+/* I2C passthru command */
+
+#define EC_CMD_I2C_PASSTHRU 0x9e
+
+/* Slave address is 10 (not 7) bit */
+#define EC_I2C_FLAG_10BIT	(1 << 16)
+
+/* Read data; if not present, message is a write */
+#define EC_I2C_FLAG_READ	(1 << 15)
+
+/* Mask for address */
+#define EC_I2C_ADDR_MASK	0x3ff
+
+#define EC_I2C_STATUS_NAK	(1 << 0) /* Transfer was not acknowledged */
+#define EC_I2C_STATUS_TIMEOUT	(1 << 1) /* Timeout during transfer */
+
+/* Any error */
+#define EC_I2C_STATUS_ERROR	(EC_I2C_STATUS_NAK | EC_I2C_STATUS_TIMEOUT)
+
+struct ec_params_i2c_passthru_msg {
+	uint16_t addr_flags;	/* I2C slave address (7 or 10 bits) and flags */
+	uint16_t len;		/* Number of bytes to read or write */
+} __packed;
+
+struct ec_params_i2c_passthru {
+	uint8_t port;		/* I2C port number */
+	uint8_t num_msgs;	/* Number of messages */
+	struct ec_params_i2c_passthru_msg msg[];
+	/* Data to write for all messages is concatenated here */
+} __packed;
+
+struct ec_response_i2c_passthru {
+	uint8_t i2c_status;	/* Status flags (EC_I2C_STATUS_...) */
+	uint8_t num_msgs;	/* Number of messages processed */
+	uint8_t data[];		/* Data read by messages concatenated here */
+} __packed;
+
+/*****************************************************************************/
+/* Power button hang detect */
+
+#define EC_CMD_HANG_DETECT 0x9f
+
+/* Reasons to start hang detection timer */
+/* Power button pressed */
+#define EC_HANG_START_ON_POWER_PRESS  (1 << 0)
+
+/* Lid closed */
+#define EC_HANG_START_ON_LID_CLOSE    (1 << 1)
+
+ /* Lid opened */
+#define EC_HANG_START_ON_LID_OPEN     (1 << 2)
+
+/* Start of AP S3->S0 transition (booting or resuming from suspend) */
+#define EC_HANG_START_ON_RESUME       (1 << 3)
+
+/* Reasons to cancel hang detection */
+
+/* Power button released */
+#define EC_HANG_STOP_ON_POWER_RELEASE (1 << 8)
+
+/* Any host command from AP received */
+#define EC_HANG_STOP_ON_HOST_COMMAND  (1 << 9)
+
+/* Stop on end of AP S0->S3 transition (suspending or shutting down) */
+#define EC_HANG_STOP_ON_SUSPEND       (1 << 10)
+
+/*
+ * If this flag is set, all the other fields are ignored, and the hang detect
+ * timer is started.  This provides the AP a way to start the hang timer
+ * without reconfiguring any of the other hang detect settings.  Note that
+ * you must previously have configured the timeouts.
+ */
+#define EC_HANG_START_NOW             (1 << 30)
+
+/*
+ * If this flag is set, all the other fields are ignored (including
+ * EC_HANG_START_NOW).  This provides the AP a way to stop the hang timer
+ * without reconfiguring any of the other hang detect settings.
  */
-#define EC_CMD_CHARGE_DUMP 0xa0
+#define EC_HANG_STOP_NOW              (1 << 31)
+
+struct ec_params_hang_detect {
+	/* Flags; see EC_HANG_* */
+	uint32_t flags;
+
+	/* Timeout in msec before generating host event, if enabled */
+	uint16_t host_event_timeout_msec;
+
+	/* Timeout in msec before generating warm reboot, if enabled */
+	uint16_t warm_reboot_timeout_msec;
+} __packed;
+
+/*****************************************************************************/
+/* Commands for battery charging */
+
+/*
+ * This is the single catch-all host command to exchange data regarding the
+ * charge state machine (v2 and up).
+ */
+#define EC_CMD_CHARGE_STATE 0xa0
+
+/* Subcommands for this host command */
+enum charge_state_command {
+	CHARGE_STATE_CMD_GET_STATE,
+	CHARGE_STATE_CMD_GET_PARAM,
+	CHARGE_STATE_CMD_SET_PARAM,
+	CHARGE_STATE_NUM_CMDS
+};
+
+/*
+ * Known param numbers are defined here. Ranges are reserved for board-specific
+ * params, which are handled by the particular implementations.
+ */
+enum charge_state_params {
+	CS_PARAM_CHG_VOLTAGE,	      /* charger voltage limit */
+	CS_PARAM_CHG_CURRENT,	      /* charger current limit */
+	CS_PARAM_CHG_INPUT_CURRENT,   /* charger input current limit */
+	CS_PARAM_CHG_STATUS,	      /* charger-specific status */
+	CS_PARAM_CHG_OPTION,	      /* charger-specific options */
+	/* How many so far? */
+	CS_NUM_BASE_PARAMS,
+
+	/* Range for CONFIG_CHARGER_PROFILE_OVERRIDE params */
+	CS_PARAM_CUSTOM_PROFILE_MIN = 0x10000,
+	CS_PARAM_CUSTOM_PROFILE_MAX = 0x1ffff,
+
+	/* Other custom param ranges go here... */
+};
+
+struct ec_params_charge_state {
+	uint8_t cmd;				/* enum charge_state_command */
+	union {
+		struct {
+			/* no args */
+		} get_state;
+
+		struct {
+			uint32_t param;		/* enum charge_state_param */
+		} get_param;
+
+		struct {
+			uint32_t param;		/* param to set */
+			uint32_t value;		/* value to set */
+		} set_param;
+	};
+} __packed;
+
+struct ec_response_charge_state {
+	union {
+		struct {
+			int ac;
+			int chg_voltage;
+			int chg_current;
+			int chg_input_current;
+			int batt_state_of_charge;
+		} get_state;
+
+		struct {
+			uint32_t value;
+		} get_param;
+		struct {
+			/* no return values */
+		} set_param;
+	};
+} __packed;
+
 
 /*
  * Set maximum battery charging current.
@@ -1221,15 +2097,59 @@ struct ec_params_force_idle {
 #define EC_CMD_CHARGE_CURRENT_LIMIT 0xa1
 
 struct ec_params_current_limit {
-	uint32_t limit;
+	uint32_t limit; /* in mA */
+} __packed;
+
+/*
+ * Set maximum external power current.
+ */
+#define EC_CMD_EXT_POWER_CURRENT_LIMIT 0xa2
+
+struct ec_params_ext_power_current_limit {
+	uint32_t limit; /* in mA */
+} __packed;
+
+/*****************************************************************************/
+/* Smart battery pass-through */
+
+/* Get / Set 16-bit smart battery registers */
+#define EC_CMD_SB_READ_WORD   0xb0
+#define EC_CMD_SB_WRITE_WORD  0xb1
+
+/* Get / Set string smart battery parameters
+ * formatted as SMBUS "block".
+ */
+#define EC_CMD_SB_READ_BLOCK  0xb2
+#define EC_CMD_SB_WRITE_BLOCK 0xb3
+
+struct ec_params_sb_rd {
+	uint8_t reg;
+} __packed;
+
+struct ec_response_sb_rd_word {
+	uint16_t value;
+} __packed;
+
+struct ec_params_sb_wr_word {
+	uint8_t reg;
+	uint16_t value;
+} __packed;
+
+struct ec_response_sb_rd_block {
+	uint8_t data[32];
+} __packed;
+
+struct ec_params_sb_wr_block {
+	uint8_t reg;
+	uint16_t data[32];
 } __packed;
 
 /*****************************************************************************/
 /* System commands */
 
 /*
- * TODO: this is a confusing name, since it doesn't necessarily reboot the EC.
- * Rename to "set image" or something similar.
+ * TODO(crosbug.com/p/23747): This is a confusing name, since it doesn't
+ * necessarily reboot the EC.  Rename to "image" or something similar?
  */
 #define EC_CMD_REBOOT_EC 0xd2
 
@@ -1308,6 +2228,7 @@ struct ec_params_reboot_ec {
 #define EC_CMD_ACPI_QUERY_EVENT 0x84
 
 /* Valid addresses in ACPI memory space, for read/write commands */
+
 /* Memory space version; set to EC_ACPI_MEM_VERSION_CURRENT */
 #define EC_ACPI_MEM_VERSION            0x00
 /*
@@ -1317,8 +2238,60 @@ struct ec_params_reboot_ec {
 #define EC_ACPI_MEM_TEST               0x01
 /* Test compliment; writes here are ignored. */
 #define EC_ACPI_MEM_TEST_COMPLIMENT    0x02
+
 /* Keyboard backlight brightness percent (0 - 100) */
 #define EC_ACPI_MEM_KEYBOARD_BACKLIGHT 0x03
+/* DPTF Target Fan Duty (0-100, 0xff for auto/none) */
+#define EC_ACPI_MEM_FAN_DUTY           0x04
+
+/*
+ * DPTF temp thresholds. Any of the EC's temp sensors can have up to two
+ * independent thresholds attached to them. The current value of the ID
+ * register determines which sensor is affected by the THRESHOLD and COMMIT
+ * registers. The THRESHOLD register uses the same EC_TEMP_SENSOR_OFFSET scheme
+ * as the memory-mapped sensors. The COMMIT register applies those settings.
+ *
+ * The spec does not mandate any way to read back the threshold settings
+ * themselves, but when a threshold is crossed the AP needs a way to determine
+ * which sensor(s) are responsible. Each reading of the ID register clears and
+ * returns one sensor ID that has crossed one of its threshold (in either
+ * direction) since the last read. A value of 0xFF means "no new thresholds
+ * have tripped". Setting or enabling the thresholds for a sensor will clear
+ * the unread event count for that sensor.
+ */
+#define EC_ACPI_MEM_TEMP_ID            0x05
+#define EC_ACPI_MEM_TEMP_THRESHOLD     0x06
+#define EC_ACPI_MEM_TEMP_COMMIT        0x07
+/*
+ * Here are the bits for the COMMIT register:
+ *   bit 0 selects the threshold index for the chosen sensor (0/1)
+ *   bit 1 enables/disables the selected threshold (0 = off, 1 = on)
+ * Each write to the commit register affects one threshold.
+ */
+#define EC_ACPI_MEM_TEMP_COMMIT_SELECT_MASK (1 << 0)
+#define EC_ACPI_MEM_TEMP_COMMIT_ENABLE_MASK (1 << 1)
+/*
+ * Example:
+ *
+ * Set the thresholds for sensor 2 to 50 C and 60 C:
+ *   write 2 to [0x05]      --  select temp sensor 2
+ *   write 0x7b to [0x06]   --  C_TO_K(50) - EC_TEMP_SENSOR_OFFSET
+ *   write 0x2 to [0x07]    --  enable threshold 0 with this value
+ *   write 0x85 to [0x06]   --  C_TO_K(60) - EC_TEMP_SENSOR_OFFSET
+ *   write 0x3 to [0x07]    --  enable threshold 1 with this value
+ *
+ * Disable the 60 C threshold, leaving the 50 C threshold unchanged:
+ *   write 2 to [0x05]      --  select temp sensor 2
+ *   write 0x1 to [0x07]    --  disable threshold 1
+ */
+
+/* DPTF battery charging current limit */
+#define EC_ACPI_MEM_CHARGING_LIMIT     0x08
+
+/* Charging limit is specified in 64 mA steps */
+#define EC_ACPI_MEM_CHARGING_LIMIT_STEP_MA   64
+/* Value to disable DPTF battery charging limit */
+#define EC_ACPI_MEM_CHARGING_LIMIT_DISABLED  0xff
 
 /* Current version of ACPI memory address space */
 #define EC_ACPI_MEM_VERSION_CURRENT 1
@@ -1360,10 +2333,21 @@ struct ec_params_reboot_ec {
  * Header bytes greater than this indicate a later version. For example,
  * EC_CMD_VERSION0 + 1 means we are using version 1.
  *
- * The old EC interface must not use commands 0dc or higher.
+ * The old EC interface must not use commands 0xdc or higher.
  */
 #define EC_CMD_VERSION0 0xdc
 
 #endif  /* !__ACPI__ */
 
+/*****************************************************************************/
+/*
+ * Deprecated constants. These constants have been renamed for clarity. The
+ * meaning and size has not changed. Programs that use the old names should
+ * switch to the new names soon, as the old names may not be carried forward
+ * forever.
+ */
+#define EC_HOST_PARAM_SIZE      EC_PROTO2_MAX_PARAM_SIZE
+#define EC_LPC_ADDR_OLD_PARAM   EC_HOST_CMD_REGION1
+#define EC_OLD_PARAM_SIZE       EC_HOST_CMD_REGION_SIZE
+
 #endif  /* __CROS_EC_COMMANDS_H */
-- 
cgit 


From a9cd92acabcb8aca8431647005ed868b8c7644c9 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Tue, 20 May 2014 13:48:51 +0100
Subject: mfd: arizona: Correct addresses of always-on trigger registers

Update the addresses and names to match current silicon.
The WM8997 regmap tables have been adjusted to match the new
names.
Missing registers have been added to WM5110 default value table.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5102-tables.c           |  2 ++
 drivers/mfd/wm5110-tables.c           | 12 ++++++++----
 drivers/mfd/wm8997-tables.c           | 12 ++++++------
 include/linux/mfd/arizona/registers.h | 14 ++++++++------
 4 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 070f8cfbbd7a..ada3286c68e9 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1037,6 +1037,8 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_4:
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_5:
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_6:
+	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_7:
+	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_8:
 	case ARIZONA_COMFORT_NOISE_GENERATOR:
 	case ARIZONA_HAPTICS_CONTROL_1:
 	case ARIZONA_HAPTICS_CONTROL_2:
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 58bc2be6e6a5..41a7f6fb7802 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -468,10 +468,12 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000062, 0x01FF },    /* R98    - Sample Rate Sequence Select 2 */
 	{ 0x00000063, 0x01FF },    /* R99    - Sample Rate Sequence Select 3 */
 	{ 0x00000064, 0x01FF },    /* R100   - Sample Rate Sequence Select 4 */
-	{ 0x00000068, 0x01FF },    /* R104   - Always On Triggers Sequence Select 1 */
-	{ 0x00000069, 0x01FF },    /* R105   - Always On Triggers Sequence Select 2 */
-	{ 0x0000006A, 0x01FF },    /* R106   - Always On Triggers Sequence Select 3 */
-	{ 0x0000006B, 0x01FF },    /* R107   - Always On Triggers Sequence Select 4 */
+	{ 0x00000066, 0x01FF },    /* R102   - Always On Triggers Sequence Select 1 */
+	{ 0x00000067, 0x01FF },    /* R103   - Always On Triggers Sequence Select 2 */
+	{ 0x00000068, 0x01FF },    /* R104   - Always On Triggers Sequence Select 3 */
+	{ 0x00000069, 0x01FF },    /* R105   - Always On Triggers Sequence Select 4 */
+	{ 0x0000006A, 0x01FF },    /* R106   - Always On Triggers Sequence Select 5 */
+	{ 0x0000006B, 0x01FF },    /* R107   - Always On Triggers Sequence Select 6 */
 	{ 0x00000070, 0x0000 },    /* R112   - Comfort Noise Generator */
 	{ 0x00000090, 0x0000 },    /* R144   - Haptics Control 1 */
 	{ 0x00000091, 0x7FFF },    /* R145   - Haptics Control 2 */
@@ -1499,6 +1501,8 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_2:
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_3:
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_4:
+	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_5:
+	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_6:
 	case ARIZONA_COMFORT_NOISE_GENERATOR:
 	case ARIZONA_HAPTICS_CONTROL_1:
 	case ARIZONA_HAPTICS_CONTROL_2:
diff --git a/drivers/mfd/wm8997-tables.c b/drivers/mfd/wm8997-tables.c
index c9c65197bb69..c7a81da64ee1 100644
--- a/drivers/mfd/wm8997-tables.c
+++ b/drivers/mfd/wm8997-tables.c
@@ -174,10 +174,10 @@ static const struct reg_default wm8997_reg_default[] = {
 	{ 0x00000062, 0x01FF },    /* R98    - Sample Rate Sequence Select 2 */
 	{ 0x00000063, 0x01FF },    /* R99    - Sample Rate Sequence Select 3 */
 	{ 0x00000064, 0x01FF },    /* R100   - Sample Rate Sequence Select 4 */
-	{ 0x00000068, 0x01FF },    /* R104   - Always On Triggers Sequence Select 1 */
-	{ 0x00000069, 0x01FF },    /* R105   - Always On Triggers Sequence Select 2 */
-	{ 0x0000006A, 0x01FF },    /* R106   - Always On Triggers Sequence Select 3 */
-	{ 0x0000006B, 0x01FF },    /* R107   - Always On Triggers Sequence Select 4 */
+	{ 0x00000068, 0x01FF },    /* R104   - Always On Triggers Sequence Select 3 */
+	{ 0x00000069, 0x01FF },    /* R105   - Always On Triggers Sequence Select 4 */
+	{ 0x0000006A, 0x01FF },    /* R106   - Always On Triggers Sequence Select 5 */
+	{ 0x0000006B, 0x01FF },    /* R107   - Always On Triggers Sequence Select 6 */
 	{ 0x00000070, 0x0000 },    /* R112   - Comfort Noise Generator */
 	{ 0x00000090, 0x0000 },    /* R144   - Haptics Control 1 */
 	{ 0x00000091, 0x7FFF },    /* R145   - Haptics Control 2 */
@@ -814,10 +814,10 @@ static bool wm8997_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_2:
 	case ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_3:
 	case ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_4:
-	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_1:
-	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_2:
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_3:
 	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_4:
+	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_5:
+	case ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_6:
 	case ARIZONA_COMFORT_NOISE_GENERATOR:
 	case ARIZONA_HAPTICS_CONTROL_1:
 	case ARIZONA_HAPTICS_CONTROL_2:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 7b35c21170d5..7204d8138b24 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -42,12 +42,14 @@
 #define ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_2    0x62
 #define ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_3    0x63
 #define ARIZONA_SAMPLE_RATE_SEQUENCE_SELECT_4    0x64
-#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_1 0x68
-#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_2 0x69
-#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_3 0x6A
-#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_4 0x6B
-#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_5 0x6C
-#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_6 0x6D
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_1 0x66
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_2 0x67
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_3 0x68
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_4 0x69
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_5 0x6A
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_6 0x6B
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_7 0x6C
+#define ARIZONA_ALWAYS_ON_TRIGGERS_SEQUENCE_SELECT_8 0x6D
 #define ARIZONA_COMFORT_NOISE_GENERATOR          0x70
 #define ARIZONA_HAPTICS_CONTROL_1                0x90
 #define ARIZONA_HAPTICS_CONTROL_2                0x91
-- 
cgit 


From a22c514c6a8bf21663b2e0a5339cc461be2f01a2 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 22 May 2014 09:50:57 +0100
Subject: mfd: abx500-core: Remove unused function abx500_dump_all_banks()

abx500_dump_all_banks() has no callers in the kernel, so it's probably
safe to remove it.

Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/abx500-core.c  | 20 --------------------
 include/linux/mfd/abx500.h |  1 -
 2 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/abx500-core.c b/drivers/mfd/abx500-core.c
index d6d0ec4d21e4..fe418995108c 100644
--- a/drivers/mfd/abx500-core.c
+++ b/drivers/mfd/abx500-core.c
@@ -151,26 +151,6 @@ int abx500_startup_irq_enabled(struct device *dev, unsigned int irq)
 }
 EXPORT_SYMBOL(abx500_startup_irq_enabled);
 
-int abx500_dump_all_banks(void)
-{
-	struct abx500_ops *ops;
-	struct device *dummy_child;
-	struct abx500_device_entry *dev_entry;
-
-	dummy_child = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!dummy_child)
-		return -ENOMEM;
-	list_for_each_entry(dev_entry, &abx500_list, list) {
-		dummy_child->parent = dev_entry->dev;
-		ops = &dev_entry->ops;
-
-		if ((ops != NULL) && (ops->dump_all_banks != NULL))
-			ops->dump_all_banks(dummy_child);
-	}
-	kfree(dummy_child);
-}
-EXPORT_SYMBOL(abx500_dump_all_banks);
-
 MODULE_AUTHOR("Mattias Wallin <mattias.wallin@stericsson.com>");
 MODULE_DESCRIPTION("ABX500 core driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/abx500.h b/include/linux/mfd/abx500.h
index df2508f7f3d2..552cc1d61cc7 100644
--- a/include/linux/mfd/abx500.h
+++ b/include/linux/mfd/abx500.h
@@ -330,7 +330,6 @@ int abx500_mask_and_set_register_interruptible(struct device *dev, u8 bank,
 int abx500_get_chip_id(struct device *dev);
 int abx500_event_registers_startup_state_get(struct device *dev, u8 *event);
 int abx500_startup_irq_enabled(struct device *dev, unsigned int irq);
-int abx500_dump_all_banks(void);
 
 struct abx500_ops {
 	int (*get_chip_id) (struct device *);
-- 
cgit 


From 45ac60c0bc93f64c3fe64de8308c8e4bd67ac165 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Thu, 22 May 2014 14:48:30 +0530
Subject: mfd: palmas: Format the header file

Formats the palmas header file. Convert all
the offset values to hexadecimal.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/palmas.h | 2166 ++++++++++++++++++++++----------------------
 1 file changed, 1083 insertions(+), 1083 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index 9974e387e483..ccbb21f718ed 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -482,10 +482,10 @@ enum usb_irq_events {
 
 /* helper macro to get correct slave number */
 #define PALMAS_BASE_TO_SLAVE(x)		((x >> 8) - 1)
-#define PALMAS_BASE_TO_REG(x, y)	((x & 0xff) + y)
+#define PALMAS_BASE_TO_REG(x, y)	((x & 0xFF) + y)
 
 /* Base addresses of IP blocks in Palmas */
-#define PALMAS_SMPS_DVS_BASE					0x20
+#define PALMAS_SMPS_DVS_BASE					0x020
 #define PALMAS_RTC_BASE						0x100
 #define PALMAS_VALIDITY_BASE					0x118
 #define PALMAS_SMPS_BASE					0x120
@@ -504,19 +504,19 @@ enum usb_irq_events {
 #define PALMAS_TRIM_GPADC_BASE					0x3CD
 
 /* Registers for function RTC */
-#define PALMAS_SECONDS_REG					0x0
-#define PALMAS_MINUTES_REG					0x1
-#define PALMAS_HOURS_REG					0x2
-#define PALMAS_DAYS_REG						0x3
-#define PALMAS_MONTHS_REG					0x4
-#define PALMAS_YEARS_REG					0x5
-#define PALMAS_WEEKS_REG					0x6
-#define PALMAS_ALARM_SECONDS_REG				0x8
-#define PALMAS_ALARM_MINUTES_REG				0x9
-#define PALMAS_ALARM_HOURS_REG					0xA
-#define PALMAS_ALARM_DAYS_REG					0xB
-#define PALMAS_ALARM_MONTHS_REG					0xC
-#define PALMAS_ALARM_YEARS_REG					0xD
+#define PALMAS_SECONDS_REG					0x00
+#define PALMAS_MINUTES_REG					0x01
+#define PALMAS_HOURS_REG					0x02
+#define PALMAS_DAYS_REG						0x03
+#define PALMAS_MONTHS_REG					0x04
+#define PALMAS_YEARS_REG					0x05
+#define PALMAS_WEEKS_REG					0x06
+#define PALMAS_ALARM_SECONDS_REG				0x08
+#define PALMAS_ALARM_MINUTES_REG				0x09
+#define PALMAS_ALARM_HOURS_REG					0x0A
+#define PALMAS_ALARM_DAYS_REG					0x0B
+#define PALMAS_ALARM_MONTHS_REG					0x0C
+#define PALMAS_ALARM_YEARS_REG					0x0D
 #define PALMAS_RTC_CTRL_REG					0x10
 #define PALMAS_RTC_STATUS_REG					0x11
 #define PALMAS_RTC_INTERRUPTS_REG				0x12
@@ -527,201 +527,201 @@ enum usb_irq_events {
 
 /* Bit definitions for SECONDS_REG */
 #define PALMAS_SECONDS_REG_SEC1_MASK				0x70
-#define PALMAS_SECONDS_REG_SEC1_SHIFT				4
-#define PALMAS_SECONDS_REG_SEC0_MASK				0x0f
-#define PALMAS_SECONDS_REG_SEC0_SHIFT				0
+#define PALMAS_SECONDS_REG_SEC1_SHIFT				0x04
+#define PALMAS_SECONDS_REG_SEC0_MASK				0x0F
+#define PALMAS_SECONDS_REG_SEC0_SHIFT				0x00
 
 /* Bit definitions for MINUTES_REG */
 #define PALMAS_MINUTES_REG_MIN1_MASK				0x70
-#define PALMAS_MINUTES_REG_MIN1_SHIFT				4
-#define PALMAS_MINUTES_REG_MIN0_MASK				0x0f
-#define PALMAS_MINUTES_REG_MIN0_SHIFT				0
+#define PALMAS_MINUTES_REG_MIN1_SHIFT				0x04
+#define PALMAS_MINUTES_REG_MIN0_MASK				0x0F
+#define PALMAS_MINUTES_REG_MIN0_SHIFT				0x00
 
 /* Bit definitions for HOURS_REG */
 #define PALMAS_HOURS_REG_PM_NAM					0x80
-#define PALMAS_HOURS_REG_PM_NAM_SHIFT				7
+#define PALMAS_HOURS_REG_PM_NAM_SHIFT				0x07
 #define PALMAS_HOURS_REG_HOUR1_MASK				0x30
-#define PALMAS_HOURS_REG_HOUR1_SHIFT				4
-#define PALMAS_HOURS_REG_HOUR0_MASK				0x0f
-#define PALMAS_HOURS_REG_HOUR0_SHIFT				0
+#define PALMAS_HOURS_REG_HOUR1_SHIFT				0x04
+#define PALMAS_HOURS_REG_HOUR0_MASK				0x0F
+#define PALMAS_HOURS_REG_HOUR0_SHIFT				0x00
 
 /* Bit definitions for DAYS_REG */
 #define PALMAS_DAYS_REG_DAY1_MASK				0x30
-#define PALMAS_DAYS_REG_DAY1_SHIFT				4
-#define PALMAS_DAYS_REG_DAY0_MASK				0x0f
-#define PALMAS_DAYS_REG_DAY0_SHIFT				0
+#define PALMAS_DAYS_REG_DAY1_SHIFT				0x04
+#define PALMAS_DAYS_REG_DAY0_MASK				0x0F
+#define PALMAS_DAYS_REG_DAY0_SHIFT				0x00
 
 /* Bit definitions for MONTHS_REG */
 #define PALMAS_MONTHS_REG_MONTH1				0x10
-#define PALMAS_MONTHS_REG_MONTH1_SHIFT				4
-#define PALMAS_MONTHS_REG_MONTH0_MASK				0x0f
-#define PALMAS_MONTHS_REG_MONTH0_SHIFT				0
+#define PALMAS_MONTHS_REG_MONTH1_SHIFT				0x04
+#define PALMAS_MONTHS_REG_MONTH0_MASK				0x0F
+#define PALMAS_MONTHS_REG_MONTH0_SHIFT				0x00
 
 /* Bit definitions for YEARS_REG */
 #define PALMAS_YEARS_REG_YEAR1_MASK				0xf0
-#define PALMAS_YEARS_REG_YEAR1_SHIFT				4
-#define PALMAS_YEARS_REG_YEAR0_MASK				0x0f
-#define PALMAS_YEARS_REG_YEAR0_SHIFT				0
+#define PALMAS_YEARS_REG_YEAR1_SHIFT				0x04
+#define PALMAS_YEARS_REG_YEAR0_MASK				0x0F
+#define PALMAS_YEARS_REG_YEAR0_SHIFT				0x00
 
 /* Bit definitions for WEEKS_REG */
 #define PALMAS_WEEKS_REG_WEEK_MASK				0x07
-#define PALMAS_WEEKS_REG_WEEK_SHIFT				0
+#define PALMAS_WEEKS_REG_WEEK_SHIFT				0x00
 
 /* Bit definitions for ALARM_SECONDS_REG */
 #define PALMAS_ALARM_SECONDS_REG_ALARM_SEC1_MASK		0x70
-#define PALMAS_ALARM_SECONDS_REG_ALARM_SEC1_SHIFT		4
-#define PALMAS_ALARM_SECONDS_REG_ALARM_SEC0_MASK		0x0f
-#define PALMAS_ALARM_SECONDS_REG_ALARM_SEC0_SHIFT		0
+#define PALMAS_ALARM_SECONDS_REG_ALARM_SEC1_SHIFT		0x04
+#define PALMAS_ALARM_SECONDS_REG_ALARM_SEC0_MASK		0x0F
+#define PALMAS_ALARM_SECONDS_REG_ALARM_SEC0_SHIFT		0x00
 
 /* Bit definitions for ALARM_MINUTES_REG */
 #define PALMAS_ALARM_MINUTES_REG_ALARM_MIN1_MASK		0x70
-#define PALMAS_ALARM_MINUTES_REG_ALARM_MIN1_SHIFT		4
-#define PALMAS_ALARM_MINUTES_REG_ALARM_MIN0_MASK		0x0f
-#define PALMAS_ALARM_MINUTES_REG_ALARM_MIN0_SHIFT		0
+#define PALMAS_ALARM_MINUTES_REG_ALARM_MIN1_SHIFT		0x04
+#define PALMAS_ALARM_MINUTES_REG_ALARM_MIN0_MASK		0x0F
+#define PALMAS_ALARM_MINUTES_REG_ALARM_MIN0_SHIFT		0x00
 
 /* Bit definitions for ALARM_HOURS_REG */
 #define PALMAS_ALARM_HOURS_REG_ALARM_PM_NAM			0x80
-#define PALMAS_ALARM_HOURS_REG_ALARM_PM_NAM_SHIFT		7
+#define PALMAS_ALARM_HOURS_REG_ALARM_PM_NAM_SHIFT		0x07
 #define PALMAS_ALARM_HOURS_REG_ALARM_HOUR1_MASK			0x30
-#define PALMAS_ALARM_HOURS_REG_ALARM_HOUR1_SHIFT		4
-#define PALMAS_ALARM_HOURS_REG_ALARM_HOUR0_MASK			0x0f
-#define PALMAS_ALARM_HOURS_REG_ALARM_HOUR0_SHIFT		0
+#define PALMAS_ALARM_HOURS_REG_ALARM_HOUR1_SHIFT		0x04
+#define PALMAS_ALARM_HOURS_REG_ALARM_HOUR0_MASK			0x0F
+#define PALMAS_ALARM_HOURS_REG_ALARM_HOUR0_SHIFT		0x00
 
 /* Bit definitions for ALARM_DAYS_REG */
 #define PALMAS_ALARM_DAYS_REG_ALARM_DAY1_MASK			0x30
-#define PALMAS_ALARM_DAYS_REG_ALARM_DAY1_SHIFT			4
-#define PALMAS_ALARM_DAYS_REG_ALARM_DAY0_MASK			0x0f
-#define PALMAS_ALARM_DAYS_REG_ALARM_DAY0_SHIFT			0
+#define PALMAS_ALARM_DAYS_REG_ALARM_DAY1_SHIFT			0x04
+#define PALMAS_ALARM_DAYS_REG_ALARM_DAY0_MASK			0x0F
+#define PALMAS_ALARM_DAYS_REG_ALARM_DAY0_SHIFT			0x00
 
 /* Bit definitions for ALARM_MONTHS_REG */
 #define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH1			0x10
-#define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH1_SHIFT		4
-#define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH0_MASK		0x0f
-#define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH0_SHIFT		0
+#define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH1_SHIFT		0x04
+#define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH0_MASK		0x0F
+#define PALMAS_ALARM_MONTHS_REG_ALARM_MONTH0_SHIFT		0x00
 
 /* Bit definitions for ALARM_YEARS_REG */
 #define PALMAS_ALARM_YEARS_REG_ALARM_YEAR1_MASK			0xf0
-#define PALMAS_ALARM_YEARS_REG_ALARM_YEAR1_SHIFT		4
-#define PALMAS_ALARM_YEARS_REG_ALARM_YEAR0_MASK			0x0f
-#define PALMAS_ALARM_YEARS_REG_ALARM_YEAR0_SHIFT		0
+#define PALMAS_ALARM_YEARS_REG_ALARM_YEAR1_SHIFT		0x04
+#define PALMAS_ALARM_YEARS_REG_ALARM_YEAR0_MASK			0x0F
+#define PALMAS_ALARM_YEARS_REG_ALARM_YEAR0_SHIFT		0x00
 
 /* Bit definitions for RTC_CTRL_REG */
 #define PALMAS_RTC_CTRL_REG_RTC_V_OPT				0x80
-#define PALMAS_RTC_CTRL_REG_RTC_V_OPT_SHIFT			7
+#define PALMAS_RTC_CTRL_REG_RTC_V_OPT_SHIFT			0x07
 #define PALMAS_RTC_CTRL_REG_GET_TIME				0x40
-#define PALMAS_RTC_CTRL_REG_GET_TIME_SHIFT			6
+#define PALMAS_RTC_CTRL_REG_GET_TIME_SHIFT			0x06
 #define PALMAS_RTC_CTRL_REG_SET_32_COUNTER			0x20
-#define PALMAS_RTC_CTRL_REG_SET_32_COUNTER_SHIFT		5
+#define PALMAS_RTC_CTRL_REG_SET_32_COUNTER_SHIFT		0x05
 #define PALMAS_RTC_CTRL_REG_TEST_MODE				0x10
-#define PALMAS_RTC_CTRL_REG_TEST_MODE_SHIFT			4
+#define PALMAS_RTC_CTRL_REG_TEST_MODE_SHIFT			0x04
 #define PALMAS_RTC_CTRL_REG_MODE_12_24				0x08
-#define PALMAS_RTC_CTRL_REG_MODE_12_24_SHIFT			3
+#define PALMAS_RTC_CTRL_REG_MODE_12_24_SHIFT			0x03
 #define PALMAS_RTC_CTRL_REG_AUTO_COMP				0x04
-#define PALMAS_RTC_CTRL_REG_AUTO_COMP_SHIFT			2
+#define PALMAS_RTC_CTRL_REG_AUTO_COMP_SHIFT			0x02
 #define PALMAS_RTC_CTRL_REG_ROUND_30S				0x02
-#define PALMAS_RTC_CTRL_REG_ROUND_30S_SHIFT			1
+#define PALMAS_RTC_CTRL_REG_ROUND_30S_SHIFT			0x01
 #define PALMAS_RTC_CTRL_REG_STOP_RTC				0x01
-#define PALMAS_RTC_CTRL_REG_STOP_RTC_SHIFT			0
+#define PALMAS_RTC_CTRL_REG_STOP_RTC_SHIFT			0x00
 
 /* Bit definitions for RTC_STATUS_REG */
 #define PALMAS_RTC_STATUS_REG_POWER_UP				0x80
-#define PALMAS_RTC_STATUS_REG_POWER_UP_SHIFT			7
+#define PALMAS_RTC_STATUS_REG_POWER_UP_SHIFT			0x07
 #define PALMAS_RTC_STATUS_REG_ALARM				0x40
-#define PALMAS_RTC_STATUS_REG_ALARM_SHIFT			6
+#define PALMAS_RTC_STATUS_REG_ALARM_SHIFT			0x06
 #define PALMAS_RTC_STATUS_REG_EVENT_1D				0x20
-#define PALMAS_RTC_STATUS_REG_EVENT_1D_SHIFT			5
+#define PALMAS_RTC_STATUS_REG_EVENT_1D_SHIFT			0x05
 #define PALMAS_RTC_STATUS_REG_EVENT_1H				0x10
-#define PALMAS_RTC_STATUS_REG_EVENT_1H_SHIFT			4
+#define PALMAS_RTC_STATUS_REG_EVENT_1H_SHIFT			0x04
 #define PALMAS_RTC_STATUS_REG_EVENT_1M				0x08
-#define PALMAS_RTC_STATUS_REG_EVENT_1M_SHIFT			3
+#define PALMAS_RTC_STATUS_REG_EVENT_1M_SHIFT			0x03
 #define PALMAS_RTC_STATUS_REG_EVENT_1S				0x04
-#define PALMAS_RTC_STATUS_REG_EVENT_1S_SHIFT			2
+#define PALMAS_RTC_STATUS_REG_EVENT_1S_SHIFT			0x02
 #define PALMAS_RTC_STATUS_REG_RUN				0x02
-#define PALMAS_RTC_STATUS_REG_RUN_SHIFT				1
+#define PALMAS_RTC_STATUS_REG_RUN_SHIFT				0x01
 
 /* Bit definitions for RTC_INTERRUPTS_REG */
 #define PALMAS_RTC_INTERRUPTS_REG_IT_SLEEP_MASK_EN		0x10
-#define PALMAS_RTC_INTERRUPTS_REG_IT_SLEEP_MASK_EN_SHIFT	4
+#define PALMAS_RTC_INTERRUPTS_REG_IT_SLEEP_MASK_EN_SHIFT	0x04
 #define PALMAS_RTC_INTERRUPTS_REG_IT_ALARM			0x08
-#define PALMAS_RTC_INTERRUPTS_REG_IT_ALARM_SHIFT		3
+#define PALMAS_RTC_INTERRUPTS_REG_IT_ALARM_SHIFT		0x03
 #define PALMAS_RTC_INTERRUPTS_REG_IT_TIMER			0x04
-#define PALMAS_RTC_INTERRUPTS_REG_IT_TIMER_SHIFT		2
+#define PALMAS_RTC_INTERRUPTS_REG_IT_TIMER_SHIFT		0x02
 #define PALMAS_RTC_INTERRUPTS_REG_EVERY_MASK			0x03
-#define PALMAS_RTC_INTERRUPTS_REG_EVERY_SHIFT			0
+#define PALMAS_RTC_INTERRUPTS_REG_EVERY_SHIFT			0x00
 
 /* Bit definitions for RTC_COMP_LSB_REG */
-#define PALMAS_RTC_COMP_LSB_REG_RTC_COMP_LSB_MASK		0xff
-#define PALMAS_RTC_COMP_LSB_REG_RTC_COMP_LSB_SHIFT		0
+#define PALMAS_RTC_COMP_LSB_REG_RTC_COMP_LSB_MASK		0xFF
+#define PALMAS_RTC_COMP_LSB_REG_RTC_COMP_LSB_SHIFT		0x00
 
 /* Bit definitions for RTC_COMP_MSB_REG */
-#define PALMAS_RTC_COMP_MSB_REG_RTC_COMP_MSB_MASK		0xff
-#define PALMAS_RTC_COMP_MSB_REG_RTC_COMP_MSB_SHIFT		0
+#define PALMAS_RTC_COMP_MSB_REG_RTC_COMP_MSB_MASK		0xFF
+#define PALMAS_RTC_COMP_MSB_REG_RTC_COMP_MSB_SHIFT		0x00
 
 /* Bit definitions for RTC_RES_PROG_REG */
-#define PALMAS_RTC_RES_PROG_REG_SW_RES_PROG_MASK		0x3f
-#define PALMAS_RTC_RES_PROG_REG_SW_RES_PROG_SHIFT		0
+#define PALMAS_RTC_RES_PROG_REG_SW_RES_PROG_MASK		0x3F
+#define PALMAS_RTC_RES_PROG_REG_SW_RES_PROG_SHIFT		0x00
 
 /* Bit definitions for RTC_RESET_STATUS_REG */
 #define PALMAS_RTC_RESET_STATUS_REG_RESET_STATUS		0x01
-#define PALMAS_RTC_RESET_STATUS_REG_RESET_STATUS_SHIFT		0
+#define PALMAS_RTC_RESET_STATUS_REG_RESET_STATUS_SHIFT		0x00
 
 /* Registers for function BACKUP */
-#define PALMAS_BACKUP0						0x0
-#define PALMAS_BACKUP1						0x1
-#define PALMAS_BACKUP2						0x2
-#define PALMAS_BACKUP3						0x3
-#define PALMAS_BACKUP4						0x4
-#define PALMAS_BACKUP5						0x5
-#define PALMAS_BACKUP6						0x6
-#define PALMAS_BACKUP7						0x7
+#define PALMAS_BACKUP0						0x00
+#define PALMAS_BACKUP1						0x01
+#define PALMAS_BACKUP2						0x02
+#define PALMAS_BACKUP3						0x03
+#define PALMAS_BACKUP4						0x04
+#define PALMAS_BACKUP5						0x05
+#define PALMAS_BACKUP6						0x06
+#define PALMAS_BACKUP7						0x07
 
 /* Bit definitions for BACKUP0 */
-#define PALMAS_BACKUP0_BACKUP_MASK				0xff
-#define PALMAS_BACKUP0_BACKUP_SHIFT				0
+#define PALMAS_BACKUP0_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP0_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP1 */
-#define PALMAS_BACKUP1_BACKUP_MASK				0xff
-#define PALMAS_BACKUP1_BACKUP_SHIFT				0
+#define PALMAS_BACKUP1_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP1_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP2 */
-#define PALMAS_BACKUP2_BACKUP_MASK				0xff
-#define PALMAS_BACKUP2_BACKUP_SHIFT				0
+#define PALMAS_BACKUP2_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP2_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP3 */
-#define PALMAS_BACKUP3_BACKUP_MASK				0xff
-#define PALMAS_BACKUP3_BACKUP_SHIFT				0
+#define PALMAS_BACKUP3_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP3_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP4 */
-#define PALMAS_BACKUP4_BACKUP_MASK				0xff
-#define PALMAS_BACKUP4_BACKUP_SHIFT				0
+#define PALMAS_BACKUP4_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP4_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP5 */
-#define PALMAS_BACKUP5_BACKUP_MASK				0xff
-#define PALMAS_BACKUP5_BACKUP_SHIFT				0
+#define PALMAS_BACKUP5_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP5_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP6 */
-#define PALMAS_BACKUP6_BACKUP_MASK				0xff
-#define PALMAS_BACKUP6_BACKUP_SHIFT				0
+#define PALMAS_BACKUP6_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP6_BACKUP_SHIFT				0x00
 
 /* Bit definitions for BACKUP7 */
-#define PALMAS_BACKUP7_BACKUP_MASK				0xff
-#define PALMAS_BACKUP7_BACKUP_SHIFT				0
+#define PALMAS_BACKUP7_BACKUP_MASK				0xFF
+#define PALMAS_BACKUP7_BACKUP_SHIFT				0x00
 
 /* Registers for function SMPS */
-#define PALMAS_SMPS12_CTRL					0x0
-#define PALMAS_SMPS12_TSTEP					0x1
-#define PALMAS_SMPS12_FORCE					0x2
-#define PALMAS_SMPS12_VOLTAGE					0x3
-#define PALMAS_SMPS3_CTRL					0x4
-#define PALMAS_SMPS3_VOLTAGE					0x7
-#define PALMAS_SMPS45_CTRL					0x8
-#define PALMAS_SMPS45_TSTEP					0x9
-#define PALMAS_SMPS45_FORCE					0xA
-#define PALMAS_SMPS45_VOLTAGE					0xB
-#define PALMAS_SMPS6_CTRL					0xC
-#define PALMAS_SMPS6_TSTEP					0xD
-#define PALMAS_SMPS6_FORCE					0xE
-#define PALMAS_SMPS6_VOLTAGE					0xF
+#define PALMAS_SMPS12_CTRL					0x00
+#define PALMAS_SMPS12_TSTEP					0x01
+#define PALMAS_SMPS12_FORCE					0x02
+#define PALMAS_SMPS12_VOLTAGE					0x03
+#define PALMAS_SMPS3_CTRL					0x04
+#define PALMAS_SMPS3_VOLTAGE					0x07
+#define PALMAS_SMPS45_CTRL					0x08
+#define PALMAS_SMPS45_TSTEP					0x09
+#define PALMAS_SMPS45_FORCE					0x0A
+#define PALMAS_SMPS45_VOLTAGE					0x0B
+#define PALMAS_SMPS6_CTRL					0x0C
+#define PALMAS_SMPS6_TSTEP					0x0D
+#define PALMAS_SMPS6_FORCE					0x0E
+#define PALMAS_SMPS6_VOLTAGE					0x0F
 #define PALMAS_SMPS7_CTRL					0x10
 #define PALMAS_SMPS7_VOLTAGE					0x13
 #define PALMAS_SMPS8_CTRL					0x14
@@ -744,303 +744,303 @@ enum usb_irq_events {
 
 /* Bit definitions for SMPS12_CTRL */
 #define PALMAS_SMPS12_CTRL_WR_S					0x80
-#define PALMAS_SMPS12_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS12_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS12_CTRL_ROOF_FLOOR_EN			0x40
-#define PALMAS_SMPS12_CTRL_ROOF_FLOOR_EN_SHIFT			6
+#define PALMAS_SMPS12_CTRL_ROOF_FLOOR_EN_SHIFT			0x06
 #define PALMAS_SMPS12_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS12_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS12_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS12_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS12_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS12_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS12_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS12_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS12_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS12_TSTEP */
 #define PALMAS_SMPS12_TSTEP_TSTEP_MASK				0x03
-#define PALMAS_SMPS12_TSTEP_TSTEP_SHIFT				0
+#define PALMAS_SMPS12_TSTEP_TSTEP_SHIFT				0x00
 
 /* Bit definitions for SMPS12_FORCE */
 #define PALMAS_SMPS12_FORCE_CMD					0x80
-#define PALMAS_SMPS12_FORCE_CMD_SHIFT				7
-#define PALMAS_SMPS12_FORCE_VSEL_MASK				0x7f
-#define PALMAS_SMPS12_FORCE_VSEL_SHIFT				0
+#define PALMAS_SMPS12_FORCE_CMD_SHIFT				0x07
+#define PALMAS_SMPS12_FORCE_VSEL_MASK				0x7F
+#define PALMAS_SMPS12_FORCE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS12_VOLTAGE */
 #define PALMAS_SMPS12_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS12_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS12_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS12_VOLTAGE_VSEL_SHIFT			0
+#define PALMAS_SMPS12_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS12_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS12_VOLTAGE_VSEL_SHIFT			0x00
 
 /* Bit definitions for SMPS3_CTRL */
 #define PALMAS_SMPS3_CTRL_WR_S					0x80
-#define PALMAS_SMPS3_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS3_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS3_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS3_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS3_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS3_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS3_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS3_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS3_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS3_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS3_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS3_VOLTAGE */
 #define PALMAS_SMPS3_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS3_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS3_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS3_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_SMPS3_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS3_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS3_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS45_CTRL */
 #define PALMAS_SMPS45_CTRL_WR_S					0x80
-#define PALMAS_SMPS45_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS45_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS45_CTRL_ROOF_FLOOR_EN			0x40
-#define PALMAS_SMPS45_CTRL_ROOF_FLOOR_EN_SHIFT			6
+#define PALMAS_SMPS45_CTRL_ROOF_FLOOR_EN_SHIFT			0x06
 #define PALMAS_SMPS45_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS45_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS45_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS45_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS45_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS45_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS45_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS45_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS45_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS45_TSTEP */
 #define PALMAS_SMPS45_TSTEP_TSTEP_MASK				0x03
-#define PALMAS_SMPS45_TSTEP_TSTEP_SHIFT				0
+#define PALMAS_SMPS45_TSTEP_TSTEP_SHIFT				0x00
 
 /* Bit definitions for SMPS45_FORCE */
 #define PALMAS_SMPS45_FORCE_CMD					0x80
-#define PALMAS_SMPS45_FORCE_CMD_SHIFT				7
-#define PALMAS_SMPS45_FORCE_VSEL_MASK				0x7f
-#define PALMAS_SMPS45_FORCE_VSEL_SHIFT				0
+#define PALMAS_SMPS45_FORCE_CMD_SHIFT				0x07
+#define PALMAS_SMPS45_FORCE_VSEL_MASK				0x7F
+#define PALMAS_SMPS45_FORCE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS45_VOLTAGE */
 #define PALMAS_SMPS45_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS45_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS45_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS45_VOLTAGE_VSEL_SHIFT			0
+#define PALMAS_SMPS45_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS45_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS45_VOLTAGE_VSEL_SHIFT			0x00
 
 /* Bit definitions for SMPS6_CTRL */
 #define PALMAS_SMPS6_CTRL_WR_S					0x80
-#define PALMAS_SMPS6_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS6_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS6_CTRL_ROOF_FLOOR_EN				0x40
-#define PALMAS_SMPS6_CTRL_ROOF_FLOOR_EN_SHIFT			6
+#define PALMAS_SMPS6_CTRL_ROOF_FLOOR_EN_SHIFT			0x06
 #define PALMAS_SMPS6_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS6_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS6_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS6_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS6_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS6_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS6_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS6_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS6_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS6_TSTEP */
 #define PALMAS_SMPS6_TSTEP_TSTEP_MASK				0x03
-#define PALMAS_SMPS6_TSTEP_TSTEP_SHIFT				0
+#define PALMAS_SMPS6_TSTEP_TSTEP_SHIFT				0x00
 
 /* Bit definitions for SMPS6_FORCE */
 #define PALMAS_SMPS6_FORCE_CMD					0x80
-#define PALMAS_SMPS6_FORCE_CMD_SHIFT				7
-#define PALMAS_SMPS6_FORCE_VSEL_MASK				0x7f
-#define PALMAS_SMPS6_FORCE_VSEL_SHIFT				0
+#define PALMAS_SMPS6_FORCE_CMD_SHIFT				0x07
+#define PALMAS_SMPS6_FORCE_VSEL_MASK				0x7F
+#define PALMAS_SMPS6_FORCE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS6_VOLTAGE */
 #define PALMAS_SMPS6_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS6_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS6_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS6_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_SMPS6_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS6_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS6_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS7_CTRL */
 #define PALMAS_SMPS7_CTRL_WR_S					0x80
-#define PALMAS_SMPS7_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS7_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS7_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS7_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS7_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS7_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS7_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS7_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS7_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS7_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS7_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS7_VOLTAGE */
 #define PALMAS_SMPS7_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS7_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS7_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS7_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_SMPS7_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS7_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS7_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS8_CTRL */
 #define PALMAS_SMPS8_CTRL_WR_S					0x80
-#define PALMAS_SMPS8_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS8_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS8_CTRL_ROOF_FLOOR_EN				0x40
-#define PALMAS_SMPS8_CTRL_ROOF_FLOOR_EN_SHIFT			6
+#define PALMAS_SMPS8_CTRL_ROOF_FLOOR_EN_SHIFT			0x06
 #define PALMAS_SMPS8_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS8_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS8_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS8_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS8_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS8_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS8_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS8_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS8_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS8_TSTEP */
 #define PALMAS_SMPS8_TSTEP_TSTEP_MASK				0x03
-#define PALMAS_SMPS8_TSTEP_TSTEP_SHIFT				0
+#define PALMAS_SMPS8_TSTEP_TSTEP_SHIFT				0x00
 
 /* Bit definitions for SMPS8_FORCE */
 #define PALMAS_SMPS8_FORCE_CMD					0x80
-#define PALMAS_SMPS8_FORCE_CMD_SHIFT				7
-#define PALMAS_SMPS8_FORCE_VSEL_MASK				0x7f
-#define PALMAS_SMPS8_FORCE_VSEL_SHIFT				0
+#define PALMAS_SMPS8_FORCE_CMD_SHIFT				0x07
+#define PALMAS_SMPS8_FORCE_VSEL_MASK				0x7F
+#define PALMAS_SMPS8_FORCE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS8_VOLTAGE */
 #define PALMAS_SMPS8_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS8_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS8_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS8_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_SMPS8_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS8_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS8_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS9_CTRL */
 #define PALMAS_SMPS9_CTRL_WR_S					0x80
-#define PALMAS_SMPS9_CTRL_WR_S_SHIFT				7
+#define PALMAS_SMPS9_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_SMPS9_CTRL_STATUS_MASK				0x30
-#define PALMAS_SMPS9_CTRL_STATUS_SHIFT				4
+#define PALMAS_SMPS9_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SMPS9_CTRL_MODE_SLEEP_MASK			0x0c
-#define PALMAS_SMPS9_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SMPS9_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SMPS9_CTRL_MODE_ACTIVE_MASK			0x03
-#define PALMAS_SMPS9_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS9_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS9_VOLTAGE */
 #define PALMAS_SMPS9_VOLTAGE_RANGE				0x80
-#define PALMAS_SMPS9_VOLTAGE_RANGE_SHIFT			7
-#define PALMAS_SMPS9_VOLTAGE_VSEL_MASK				0x7f
-#define PALMAS_SMPS9_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_SMPS9_VOLTAGE_RANGE_SHIFT			0x07
+#define PALMAS_SMPS9_VOLTAGE_VSEL_MASK				0x7F
+#define PALMAS_SMPS9_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for SMPS10_CTRL */
 #define PALMAS_SMPS10_CTRL_MODE_SLEEP_MASK			0xf0
-#define PALMAS_SMPS10_CTRL_MODE_SLEEP_SHIFT			4
-#define PALMAS_SMPS10_CTRL_MODE_ACTIVE_MASK			0x0f
-#define PALMAS_SMPS10_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SMPS10_CTRL_MODE_SLEEP_SHIFT			0x04
+#define PALMAS_SMPS10_CTRL_MODE_ACTIVE_MASK			0x0F
+#define PALMAS_SMPS10_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SMPS10_STATUS */
-#define PALMAS_SMPS10_STATUS_STATUS_MASK			0x0f
-#define PALMAS_SMPS10_STATUS_STATUS_SHIFT			0
+#define PALMAS_SMPS10_STATUS_STATUS_MASK			0x0F
+#define PALMAS_SMPS10_STATUS_STATUS_SHIFT			0x00
 
 /* Bit definitions for SMPS_CTRL */
 #define PALMAS_SMPS_CTRL_SMPS45_SMPS457_EN			0x20
-#define PALMAS_SMPS_CTRL_SMPS45_SMPS457_EN_SHIFT		5
+#define PALMAS_SMPS_CTRL_SMPS45_SMPS457_EN_SHIFT		0x05
 #define PALMAS_SMPS_CTRL_SMPS12_SMPS123_EN			0x10
-#define PALMAS_SMPS_CTRL_SMPS12_SMPS123_EN_SHIFT		4
+#define PALMAS_SMPS_CTRL_SMPS12_SMPS123_EN_SHIFT		0x04
 #define PALMAS_SMPS_CTRL_SMPS45_PHASE_CTRL_MASK			0x0c
-#define PALMAS_SMPS_CTRL_SMPS45_PHASE_CTRL_SHIFT		2
+#define PALMAS_SMPS_CTRL_SMPS45_PHASE_CTRL_SHIFT		0x02
 #define PALMAS_SMPS_CTRL_SMPS123_PHASE_CTRL_MASK		0x03
-#define PALMAS_SMPS_CTRL_SMPS123_PHASE_CTRL_SHIFT		0
+#define PALMAS_SMPS_CTRL_SMPS123_PHASE_CTRL_SHIFT		0x00
 
 /* Bit definitions for SMPS_PD_CTRL */
 #define PALMAS_SMPS_PD_CTRL_SMPS9				0x40
-#define PALMAS_SMPS_PD_CTRL_SMPS9_SHIFT				6
+#define PALMAS_SMPS_PD_CTRL_SMPS9_SHIFT				0x06
 #define PALMAS_SMPS_PD_CTRL_SMPS8				0x20
-#define PALMAS_SMPS_PD_CTRL_SMPS8_SHIFT				5
+#define PALMAS_SMPS_PD_CTRL_SMPS8_SHIFT				0x05
 #define PALMAS_SMPS_PD_CTRL_SMPS7				0x10
-#define PALMAS_SMPS_PD_CTRL_SMPS7_SHIFT				4
+#define PALMAS_SMPS_PD_CTRL_SMPS7_SHIFT				0x04
 #define PALMAS_SMPS_PD_CTRL_SMPS6				0x08
-#define PALMAS_SMPS_PD_CTRL_SMPS6_SHIFT				3
+#define PALMAS_SMPS_PD_CTRL_SMPS6_SHIFT				0x03
 #define PALMAS_SMPS_PD_CTRL_SMPS45				0x04
-#define PALMAS_SMPS_PD_CTRL_SMPS45_SHIFT			2
+#define PALMAS_SMPS_PD_CTRL_SMPS45_SHIFT			0x02
 #define PALMAS_SMPS_PD_CTRL_SMPS3				0x02
-#define PALMAS_SMPS_PD_CTRL_SMPS3_SHIFT				1
+#define PALMAS_SMPS_PD_CTRL_SMPS3_SHIFT				0x01
 #define PALMAS_SMPS_PD_CTRL_SMPS12				0x01
-#define PALMAS_SMPS_PD_CTRL_SMPS12_SHIFT			0
+#define PALMAS_SMPS_PD_CTRL_SMPS12_SHIFT			0x00
 
 /* Bit definitions for SMPS_THERMAL_EN */
 #define PALMAS_SMPS_THERMAL_EN_SMPS9				0x40
-#define PALMAS_SMPS_THERMAL_EN_SMPS9_SHIFT			6
+#define PALMAS_SMPS_THERMAL_EN_SMPS9_SHIFT			0x06
 #define PALMAS_SMPS_THERMAL_EN_SMPS8				0x20
-#define PALMAS_SMPS_THERMAL_EN_SMPS8_SHIFT			5
+#define PALMAS_SMPS_THERMAL_EN_SMPS8_SHIFT			0x05
 #define PALMAS_SMPS_THERMAL_EN_SMPS6				0x08
-#define PALMAS_SMPS_THERMAL_EN_SMPS6_SHIFT			3
+#define PALMAS_SMPS_THERMAL_EN_SMPS6_SHIFT			0x03
 #define PALMAS_SMPS_THERMAL_EN_SMPS457				0x04
-#define PALMAS_SMPS_THERMAL_EN_SMPS457_SHIFT			2
+#define PALMAS_SMPS_THERMAL_EN_SMPS457_SHIFT			0x02
 #define PALMAS_SMPS_THERMAL_EN_SMPS123				0x01
-#define PALMAS_SMPS_THERMAL_EN_SMPS123_SHIFT			0
+#define PALMAS_SMPS_THERMAL_EN_SMPS123_SHIFT			0x00
 
 /* Bit definitions for SMPS_THERMAL_STATUS */
 #define PALMAS_SMPS_THERMAL_STATUS_SMPS9			0x40
-#define PALMAS_SMPS_THERMAL_STATUS_SMPS9_SHIFT			6
+#define PALMAS_SMPS_THERMAL_STATUS_SMPS9_SHIFT			0x06
 #define PALMAS_SMPS_THERMAL_STATUS_SMPS8			0x20
-#define PALMAS_SMPS_THERMAL_STATUS_SMPS8_SHIFT			5
+#define PALMAS_SMPS_THERMAL_STATUS_SMPS8_SHIFT			0x05
 #define PALMAS_SMPS_THERMAL_STATUS_SMPS6			0x08
-#define PALMAS_SMPS_THERMAL_STATUS_SMPS6_SHIFT			3
+#define PALMAS_SMPS_THERMAL_STATUS_SMPS6_SHIFT			0x03
 #define PALMAS_SMPS_THERMAL_STATUS_SMPS457			0x04
-#define PALMAS_SMPS_THERMAL_STATUS_SMPS457_SHIFT		2
+#define PALMAS_SMPS_THERMAL_STATUS_SMPS457_SHIFT		0x02
 #define PALMAS_SMPS_THERMAL_STATUS_SMPS123			0x01
-#define PALMAS_SMPS_THERMAL_STATUS_SMPS123_SHIFT		0
+#define PALMAS_SMPS_THERMAL_STATUS_SMPS123_SHIFT		0x00
 
 /* Bit definitions for SMPS_SHORT_STATUS */
 #define PALMAS_SMPS_SHORT_STATUS_SMPS10				0x80
-#define PALMAS_SMPS_SHORT_STATUS_SMPS10_SHIFT			7
+#define PALMAS_SMPS_SHORT_STATUS_SMPS10_SHIFT			0x07
 #define PALMAS_SMPS_SHORT_STATUS_SMPS9				0x40
-#define PALMAS_SMPS_SHORT_STATUS_SMPS9_SHIFT			6
+#define PALMAS_SMPS_SHORT_STATUS_SMPS9_SHIFT			0x06
 #define PALMAS_SMPS_SHORT_STATUS_SMPS8				0x20
-#define PALMAS_SMPS_SHORT_STATUS_SMPS8_SHIFT			5
+#define PALMAS_SMPS_SHORT_STATUS_SMPS8_SHIFT			0x05
 #define PALMAS_SMPS_SHORT_STATUS_SMPS7				0x10
-#define PALMAS_SMPS_SHORT_STATUS_SMPS7_SHIFT			4
+#define PALMAS_SMPS_SHORT_STATUS_SMPS7_SHIFT			0x04
 #define PALMAS_SMPS_SHORT_STATUS_SMPS6				0x08
-#define PALMAS_SMPS_SHORT_STATUS_SMPS6_SHIFT			3
+#define PALMAS_SMPS_SHORT_STATUS_SMPS6_SHIFT			0x03
 #define PALMAS_SMPS_SHORT_STATUS_SMPS45				0x04
-#define PALMAS_SMPS_SHORT_STATUS_SMPS45_SHIFT			2
+#define PALMAS_SMPS_SHORT_STATUS_SMPS45_SHIFT			0x02
 #define PALMAS_SMPS_SHORT_STATUS_SMPS3				0x02
-#define PALMAS_SMPS_SHORT_STATUS_SMPS3_SHIFT			1
+#define PALMAS_SMPS_SHORT_STATUS_SMPS3_SHIFT			0x01
 #define PALMAS_SMPS_SHORT_STATUS_SMPS12				0x01
-#define PALMAS_SMPS_SHORT_STATUS_SMPS12_SHIFT			0
+#define PALMAS_SMPS_SHORT_STATUS_SMPS12_SHIFT			0x00
 
 /* Bit definitions for SMPS_NEGATIVE_CURRENT_LIMIT_EN */
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS9		0x40
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS9_SHIFT	6
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS9_SHIFT	0x06
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS8		0x20
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS8_SHIFT	5
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS8_SHIFT	0x05
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS7		0x10
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS7_SHIFT	4
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS7_SHIFT	0x04
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS6		0x08
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS6_SHIFT	3
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS6_SHIFT	0x03
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS45		0x04
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS45_SHIFT	2
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS45_SHIFT	0x02
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS3		0x02
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS3_SHIFT	1
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS3_SHIFT	0x01
 #define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS12		0x01
-#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS12_SHIFT	0
+#define PALMAS_SMPS_NEGATIVE_CURRENT_LIMIT_EN_SMPS12_SHIFT	0x00
 
 /* Bit definitions for SMPS_POWERGOOD_MASK1 */
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS10			0x80
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS10_SHIFT		7
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS10_SHIFT		0x07
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS9			0x40
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS9_SHIFT			6
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS9_SHIFT			0x06
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS8			0x20
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS8_SHIFT			5
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS8_SHIFT			0x05
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS7			0x10
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS7_SHIFT			4
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS7_SHIFT			0x04
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS6			0x08
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS6_SHIFT			3
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS6_SHIFT			0x03
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS45			0x04
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS45_SHIFT		2
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS45_SHIFT		0x02
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS3			0x02
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS3_SHIFT			1
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS3_SHIFT			0x01
 #define PALMAS_SMPS_POWERGOOD_MASK1_SMPS12			0x01
-#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS12_SHIFT		0
+#define PALMAS_SMPS_POWERGOOD_MASK1_SMPS12_SHIFT		0x00
 
 /* Bit definitions for SMPS_POWERGOOD_MASK2 */
 #define PALMAS_SMPS_POWERGOOD_MASK2_POWERGOOD_TYPE_SELECT	0x80
-#define PALMAS_SMPS_POWERGOOD_MASK2_POWERGOOD_TYPE_SELECT_SHIFT	7
+#define PALMAS_SMPS_POWERGOOD_MASK2_POWERGOOD_TYPE_SELECT_SHIFT	0x07
 #define PALMAS_SMPS_POWERGOOD_MASK2_GPIO_7			0x04
-#define PALMAS_SMPS_POWERGOOD_MASK2_GPIO_7_SHIFT		2
+#define PALMAS_SMPS_POWERGOOD_MASK2_GPIO_7_SHIFT		0x02
 #define PALMAS_SMPS_POWERGOOD_MASK2_VBUS			0x02
-#define PALMAS_SMPS_POWERGOOD_MASK2_VBUS_SHIFT			1
+#define PALMAS_SMPS_POWERGOOD_MASK2_VBUS_SHIFT			0x01
 #define PALMAS_SMPS_POWERGOOD_MASK2_ACOK			0x01
-#define PALMAS_SMPS_POWERGOOD_MASK2_ACOK_SHIFT			0
+#define PALMAS_SMPS_POWERGOOD_MASK2_ACOK_SHIFT			0x00
 
 /* Registers for function LDO */
-#define PALMAS_LDO1_CTRL					0x0
-#define PALMAS_LDO1_VOLTAGE					0x1
-#define PALMAS_LDO2_CTRL					0x2
-#define PALMAS_LDO2_VOLTAGE					0x3
-#define PALMAS_LDO3_CTRL					0x4
-#define PALMAS_LDO3_VOLTAGE					0x5
-#define PALMAS_LDO4_CTRL					0x6
-#define PALMAS_LDO4_VOLTAGE					0x7
-#define PALMAS_LDO5_CTRL					0x8
-#define PALMAS_LDO5_VOLTAGE					0x9
-#define PALMAS_LDO6_CTRL					0xA
-#define PALMAS_LDO6_VOLTAGE					0xB
-#define PALMAS_LDO7_CTRL					0xC
-#define PALMAS_LDO7_VOLTAGE					0xD
-#define PALMAS_LDO8_CTRL					0xE
-#define PALMAS_LDO8_VOLTAGE					0xF
+#define PALMAS_LDO1_CTRL					0x00
+#define PALMAS_LDO1_VOLTAGE					0x01
+#define PALMAS_LDO2_CTRL					0x02
+#define PALMAS_LDO2_VOLTAGE					0x03
+#define PALMAS_LDO3_CTRL					0x04
+#define PALMAS_LDO3_VOLTAGE					0x05
+#define PALMAS_LDO4_CTRL					0x06
+#define PALMAS_LDO4_VOLTAGE					0x07
+#define PALMAS_LDO5_CTRL					0x08
+#define PALMAS_LDO5_VOLTAGE					0x09
+#define PALMAS_LDO6_CTRL					0x0A
+#define PALMAS_LDO6_VOLTAGE					0x0B
+#define PALMAS_LDO7_CTRL					0x0C
+#define PALMAS_LDO7_VOLTAGE					0x0D
+#define PALMAS_LDO8_CTRL					0x0E
+#define PALMAS_LDO8_VOLTAGE					0x0F
 #define PALMAS_LDO9_CTRL					0x10
 #define PALMAS_LDO9_VOLTAGE					0x11
 #define PALMAS_LDOLN_CTRL					0x12
@@ -1055,236 +1055,236 @@ enum usb_irq_events {
 
 /* Bit definitions for LDO1_CTRL */
 #define PALMAS_LDO1_CTRL_WR_S					0x80
-#define PALMAS_LDO1_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO1_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO1_CTRL_STATUS					0x10
-#define PALMAS_LDO1_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO1_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO1_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO1_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO1_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO1_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO1_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO1_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO1_VOLTAGE */
-#define PALMAS_LDO1_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO1_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO1_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO1_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO2_CTRL */
 #define PALMAS_LDO2_CTRL_WR_S					0x80
-#define PALMAS_LDO2_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO2_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO2_CTRL_STATUS					0x10
-#define PALMAS_LDO2_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO2_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO2_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO2_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO2_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO2_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO2_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO2_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO2_VOLTAGE */
-#define PALMAS_LDO2_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO2_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO2_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO2_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO3_CTRL */
 #define PALMAS_LDO3_CTRL_WR_S					0x80
-#define PALMAS_LDO3_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO3_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO3_CTRL_STATUS					0x10
-#define PALMAS_LDO3_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO3_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO3_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO3_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO3_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO3_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO3_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO3_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO3_VOLTAGE */
-#define PALMAS_LDO3_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO3_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO3_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO3_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO4_CTRL */
 #define PALMAS_LDO4_CTRL_WR_S					0x80
-#define PALMAS_LDO4_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO4_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO4_CTRL_STATUS					0x10
-#define PALMAS_LDO4_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO4_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO4_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO4_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO4_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO4_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO4_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO4_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO4_VOLTAGE */
-#define PALMAS_LDO4_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO4_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO4_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO4_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO5_CTRL */
 #define PALMAS_LDO5_CTRL_WR_S					0x80
-#define PALMAS_LDO5_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO5_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO5_CTRL_STATUS					0x10
-#define PALMAS_LDO5_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO5_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO5_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO5_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO5_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO5_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO5_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO5_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO5_VOLTAGE */
-#define PALMAS_LDO5_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO5_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO5_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO5_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO6_CTRL */
 #define PALMAS_LDO6_CTRL_WR_S					0x80
-#define PALMAS_LDO6_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO6_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO6_CTRL_LDO_VIB_EN				0x40
-#define PALMAS_LDO6_CTRL_LDO_VIB_EN_SHIFT			6
+#define PALMAS_LDO6_CTRL_LDO_VIB_EN_SHIFT			0x06
 #define PALMAS_LDO6_CTRL_STATUS					0x10
-#define PALMAS_LDO6_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO6_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO6_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO6_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO6_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO6_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO6_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO6_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO6_VOLTAGE */
-#define PALMAS_LDO6_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO6_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO6_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO6_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO7_CTRL */
 #define PALMAS_LDO7_CTRL_WR_S					0x80
-#define PALMAS_LDO7_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO7_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO7_CTRL_STATUS					0x10
-#define PALMAS_LDO7_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO7_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO7_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO7_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO7_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO7_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO7_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO7_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO7_VOLTAGE */
-#define PALMAS_LDO7_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO7_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO7_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO7_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO8_CTRL */
 #define PALMAS_LDO8_CTRL_WR_S					0x80
-#define PALMAS_LDO8_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO8_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO8_CTRL_LDO_TRACKING_EN			0x40
-#define PALMAS_LDO8_CTRL_LDO_TRACKING_EN_SHIFT			6
+#define PALMAS_LDO8_CTRL_LDO_TRACKING_EN_SHIFT			0x06
 #define PALMAS_LDO8_CTRL_STATUS					0x10
-#define PALMAS_LDO8_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO8_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO8_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO8_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO8_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO8_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO8_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO8_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO8_VOLTAGE */
-#define PALMAS_LDO8_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO8_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO8_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO8_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDO9_CTRL */
 #define PALMAS_LDO9_CTRL_WR_S					0x80
-#define PALMAS_LDO9_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDO9_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDO9_CTRL_LDO_BYPASS_EN				0x40
-#define PALMAS_LDO9_CTRL_LDO_BYPASS_EN_SHIFT			6
+#define PALMAS_LDO9_CTRL_LDO_BYPASS_EN_SHIFT			0x06
 #define PALMAS_LDO9_CTRL_STATUS					0x10
-#define PALMAS_LDO9_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDO9_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDO9_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDO9_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDO9_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDO9_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDO9_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDO9_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDO9_VOLTAGE */
-#define PALMAS_LDO9_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDO9_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDO9_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDO9_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDOLN_CTRL */
 #define PALMAS_LDOLN_CTRL_WR_S					0x80
-#define PALMAS_LDOLN_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDOLN_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDOLN_CTRL_STATUS				0x10
-#define PALMAS_LDOLN_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDOLN_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDOLN_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDOLN_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDOLN_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDOLN_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDOLN_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDOLN_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDOLN_VOLTAGE */
-#define PALMAS_LDOLN_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDOLN_VOLTAGE_VSEL_SHIFT				0
+#define PALMAS_LDOLN_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDOLN_VOLTAGE_VSEL_SHIFT				0x00
 
 /* Bit definitions for LDOUSB_CTRL */
 #define PALMAS_LDOUSB_CTRL_WR_S					0x80
-#define PALMAS_LDOUSB_CTRL_WR_S_SHIFT				7
+#define PALMAS_LDOUSB_CTRL_WR_S_SHIFT				0x07
 #define PALMAS_LDOUSB_CTRL_STATUS				0x10
-#define PALMAS_LDOUSB_CTRL_STATUS_SHIFT				4
+#define PALMAS_LDOUSB_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_LDOUSB_CTRL_MODE_SLEEP				0x04
-#define PALMAS_LDOUSB_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_LDOUSB_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_LDOUSB_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_LDOUSB_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_LDOUSB_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for LDOUSB_VOLTAGE */
-#define PALMAS_LDOUSB_VOLTAGE_VSEL_MASK				0x3f
-#define PALMAS_LDOUSB_VOLTAGE_VSEL_SHIFT			0
+#define PALMAS_LDOUSB_VOLTAGE_VSEL_MASK				0x3F
+#define PALMAS_LDOUSB_VOLTAGE_VSEL_SHIFT			0x00
 
 /* Bit definitions for LDO_CTRL */
 #define PALMAS_LDO_CTRL_LDOUSB_ON_VBUS_VSYS			0x01
-#define PALMAS_LDO_CTRL_LDOUSB_ON_VBUS_VSYS_SHIFT		0
+#define PALMAS_LDO_CTRL_LDOUSB_ON_VBUS_VSYS_SHIFT		0x00
 
 /* Bit definitions for LDO_PD_CTRL1 */
 #define PALMAS_LDO_PD_CTRL1_LDO8				0x80
-#define PALMAS_LDO_PD_CTRL1_LDO8_SHIFT				7
+#define PALMAS_LDO_PD_CTRL1_LDO8_SHIFT				0x07
 #define PALMAS_LDO_PD_CTRL1_LDO7				0x40
-#define PALMAS_LDO_PD_CTRL1_LDO7_SHIFT				6
+#define PALMAS_LDO_PD_CTRL1_LDO7_SHIFT				0x06
 #define PALMAS_LDO_PD_CTRL1_LDO6				0x20
-#define PALMAS_LDO_PD_CTRL1_LDO6_SHIFT				5
+#define PALMAS_LDO_PD_CTRL1_LDO6_SHIFT				0x05
 #define PALMAS_LDO_PD_CTRL1_LDO5				0x10
-#define PALMAS_LDO_PD_CTRL1_LDO5_SHIFT				4
+#define PALMAS_LDO_PD_CTRL1_LDO5_SHIFT				0x04
 #define PALMAS_LDO_PD_CTRL1_LDO4				0x08
-#define PALMAS_LDO_PD_CTRL1_LDO4_SHIFT				3
+#define PALMAS_LDO_PD_CTRL1_LDO4_SHIFT				0x03
 #define PALMAS_LDO_PD_CTRL1_LDO3				0x04
-#define PALMAS_LDO_PD_CTRL1_LDO3_SHIFT				2
+#define PALMAS_LDO_PD_CTRL1_LDO3_SHIFT				0x02
 #define PALMAS_LDO_PD_CTRL1_LDO2				0x02
-#define PALMAS_LDO_PD_CTRL1_LDO2_SHIFT				1
+#define PALMAS_LDO_PD_CTRL1_LDO2_SHIFT				0x01
 #define PALMAS_LDO_PD_CTRL1_LDO1				0x01
-#define PALMAS_LDO_PD_CTRL1_LDO1_SHIFT				0
+#define PALMAS_LDO_PD_CTRL1_LDO1_SHIFT				0x00
 
 /* Bit definitions for LDO_PD_CTRL2 */
 #define PALMAS_LDO_PD_CTRL2_LDOUSB				0x04
-#define PALMAS_LDO_PD_CTRL2_LDOUSB_SHIFT			2
+#define PALMAS_LDO_PD_CTRL2_LDOUSB_SHIFT			0x02
 #define PALMAS_LDO_PD_CTRL2_LDOLN				0x02
-#define PALMAS_LDO_PD_CTRL2_LDOLN_SHIFT				1
+#define PALMAS_LDO_PD_CTRL2_LDOLN_SHIFT				0x01
 #define PALMAS_LDO_PD_CTRL2_LDO9				0x01
-#define PALMAS_LDO_PD_CTRL2_LDO9_SHIFT				0
+#define PALMAS_LDO_PD_CTRL2_LDO9_SHIFT				0x00
 
 /* Bit definitions for LDO_SHORT_STATUS1 */
 #define PALMAS_LDO_SHORT_STATUS1_LDO8				0x80
-#define PALMAS_LDO_SHORT_STATUS1_LDO8_SHIFT			7
+#define PALMAS_LDO_SHORT_STATUS1_LDO8_SHIFT			0x07
 #define PALMAS_LDO_SHORT_STATUS1_LDO7				0x40
-#define PALMAS_LDO_SHORT_STATUS1_LDO7_SHIFT			6
+#define PALMAS_LDO_SHORT_STATUS1_LDO7_SHIFT			0x06
 #define PALMAS_LDO_SHORT_STATUS1_LDO6				0x20
-#define PALMAS_LDO_SHORT_STATUS1_LDO6_SHIFT			5
+#define PALMAS_LDO_SHORT_STATUS1_LDO6_SHIFT			0x05
 #define PALMAS_LDO_SHORT_STATUS1_LDO5				0x10
-#define PALMAS_LDO_SHORT_STATUS1_LDO5_SHIFT			4
+#define PALMAS_LDO_SHORT_STATUS1_LDO5_SHIFT			0x04
 #define PALMAS_LDO_SHORT_STATUS1_LDO4				0x08
-#define PALMAS_LDO_SHORT_STATUS1_LDO4_SHIFT			3
+#define PALMAS_LDO_SHORT_STATUS1_LDO4_SHIFT			0x03
 #define PALMAS_LDO_SHORT_STATUS1_LDO3				0x04
-#define PALMAS_LDO_SHORT_STATUS1_LDO3_SHIFT			2
+#define PALMAS_LDO_SHORT_STATUS1_LDO3_SHIFT			0x02
 #define PALMAS_LDO_SHORT_STATUS1_LDO2				0x02
-#define PALMAS_LDO_SHORT_STATUS1_LDO2_SHIFT			1
+#define PALMAS_LDO_SHORT_STATUS1_LDO2_SHIFT			0x01
 #define PALMAS_LDO_SHORT_STATUS1_LDO1				0x01
-#define PALMAS_LDO_SHORT_STATUS1_LDO1_SHIFT			0
+#define PALMAS_LDO_SHORT_STATUS1_LDO1_SHIFT			0x00
 
 /* Bit definitions for LDO_SHORT_STATUS2 */
 #define PALMAS_LDO_SHORT_STATUS2_LDOVANA			0x08
-#define PALMAS_LDO_SHORT_STATUS2_LDOVANA_SHIFT			3
+#define PALMAS_LDO_SHORT_STATUS2_LDOVANA_SHIFT			0x03
 #define PALMAS_LDO_SHORT_STATUS2_LDOUSB				0x04
-#define PALMAS_LDO_SHORT_STATUS2_LDOUSB_SHIFT			2
+#define PALMAS_LDO_SHORT_STATUS2_LDOUSB_SHIFT			0x02
 #define PALMAS_LDO_SHORT_STATUS2_LDOLN				0x02
-#define PALMAS_LDO_SHORT_STATUS2_LDOLN_SHIFT			1
+#define PALMAS_LDO_SHORT_STATUS2_LDOLN_SHIFT			0x01
 #define PALMAS_LDO_SHORT_STATUS2_LDO9				0x01
-#define PALMAS_LDO_SHORT_STATUS2_LDO9_SHIFT			0
+#define PALMAS_LDO_SHORT_STATUS2_LDO9_SHIFT			0x00
 
 /* Registers for function PMU_CONTROL */
-#define PALMAS_DEV_CTRL						0x0
-#define PALMAS_POWER_CTRL					0x1
-#define PALMAS_VSYS_LO						0x2
-#define PALMAS_VSYS_MON						0x3
-#define PALMAS_VBAT_MON						0x4
-#define PALMAS_WATCHDOG						0x5
-#define PALMAS_BOOT_STATUS					0x6
-#define PALMAS_BATTERY_BOUNCE					0x7
-#define PALMAS_BACKUP_BATTERY_CTRL				0x8
-#define PALMAS_LONG_PRESS_KEY					0x9
-#define PALMAS_OSC_THERM_CTRL					0xA
-#define PALMAS_BATDEBOUNCING					0xB
-#define PALMAS_SWOFF_HWRST					0xF
+#define PALMAS_DEV_CTRL						0x00
+#define PALMAS_POWER_CTRL					0x01
+#define PALMAS_VSYS_LO						0x02
+#define PALMAS_VSYS_MON						0x03
+#define PALMAS_VBAT_MON						0x04
+#define PALMAS_WATCHDOG						0x05
+#define PALMAS_BOOT_STATUS					0x06
+#define PALMAS_BATTERY_BOUNCE					0x07
+#define PALMAS_BACKUP_BATTERY_CTRL				0x08
+#define PALMAS_LONG_PRESS_KEY					0x09
+#define PALMAS_OSC_THERM_CTRL					0x0A
+#define PALMAS_BATDEBOUNCING					0x0B
+#define PALMAS_SWOFF_HWRST					0x0F
 #define PALMAS_SWOFF_COLDRST					0x10
 #define PALMAS_SWOFF_STATUS					0x11
 #define PALMAS_PMU_CONFIG					0x12
@@ -1296,668 +1296,668 @@ enum usb_irq_events {
 
 /* Bit definitions for DEV_CTRL */
 #define PALMAS_DEV_CTRL_DEV_STATUS_MASK				0x0c
-#define PALMAS_DEV_CTRL_DEV_STATUS_SHIFT			2
+#define PALMAS_DEV_CTRL_DEV_STATUS_SHIFT			0x02
 #define PALMAS_DEV_CTRL_SW_RST					0x02
-#define PALMAS_DEV_CTRL_SW_RST_SHIFT				1
+#define PALMAS_DEV_CTRL_SW_RST_SHIFT				0x01
 #define PALMAS_DEV_CTRL_DEV_ON					0x01
-#define PALMAS_DEV_CTRL_DEV_ON_SHIFT				0
+#define PALMAS_DEV_CTRL_DEV_ON_SHIFT				0x00
 
 /* Bit definitions for POWER_CTRL */
 #define PALMAS_POWER_CTRL_ENABLE2_MASK				0x04
-#define PALMAS_POWER_CTRL_ENABLE2_MASK_SHIFT			2
+#define PALMAS_POWER_CTRL_ENABLE2_MASK_SHIFT			0x02
 #define PALMAS_POWER_CTRL_ENABLE1_MASK				0x02
-#define PALMAS_POWER_CTRL_ENABLE1_MASK_SHIFT			1
+#define PALMAS_POWER_CTRL_ENABLE1_MASK_SHIFT			0x01
 #define PALMAS_POWER_CTRL_NSLEEP_MASK				0x01
-#define PALMAS_POWER_CTRL_NSLEEP_MASK_SHIFT			0
+#define PALMAS_POWER_CTRL_NSLEEP_MASK_SHIFT			0x00
 
 /* Bit definitions for VSYS_LO */
-#define PALMAS_VSYS_LO_THRESHOLD_MASK				0x1f
-#define PALMAS_VSYS_LO_THRESHOLD_SHIFT				0
+#define PALMAS_VSYS_LO_THRESHOLD_MASK				0x1F
+#define PALMAS_VSYS_LO_THRESHOLD_SHIFT				0x00
 
 /* Bit definitions for VSYS_MON */
 #define PALMAS_VSYS_MON_ENABLE					0x80
-#define PALMAS_VSYS_MON_ENABLE_SHIFT				7
-#define PALMAS_VSYS_MON_THRESHOLD_MASK				0x3f
-#define PALMAS_VSYS_MON_THRESHOLD_SHIFT				0
+#define PALMAS_VSYS_MON_ENABLE_SHIFT				0x07
+#define PALMAS_VSYS_MON_THRESHOLD_MASK				0x3F
+#define PALMAS_VSYS_MON_THRESHOLD_SHIFT				0x00
 
 /* Bit definitions for VBAT_MON */
 #define PALMAS_VBAT_MON_ENABLE					0x80
-#define PALMAS_VBAT_MON_ENABLE_SHIFT				7
-#define PALMAS_VBAT_MON_THRESHOLD_MASK				0x3f
-#define PALMAS_VBAT_MON_THRESHOLD_SHIFT				0
+#define PALMAS_VBAT_MON_ENABLE_SHIFT				0x07
+#define PALMAS_VBAT_MON_THRESHOLD_MASK				0x3F
+#define PALMAS_VBAT_MON_THRESHOLD_SHIFT				0x00
 
 /* Bit definitions for WATCHDOG */
 #define PALMAS_WATCHDOG_LOCK					0x20
-#define PALMAS_WATCHDOG_LOCK_SHIFT				5
+#define PALMAS_WATCHDOG_LOCK_SHIFT				0x05
 #define PALMAS_WATCHDOG_ENABLE					0x10
-#define PALMAS_WATCHDOG_ENABLE_SHIFT				4
+#define PALMAS_WATCHDOG_ENABLE_SHIFT				0x04
 #define PALMAS_WATCHDOG_MODE					0x08
-#define PALMAS_WATCHDOG_MODE_SHIFT				3
+#define PALMAS_WATCHDOG_MODE_SHIFT				0x03
 #define PALMAS_WATCHDOG_TIMER_MASK				0x07
-#define PALMAS_WATCHDOG_TIMER_SHIFT				0
+#define PALMAS_WATCHDOG_TIMER_SHIFT				0x00
 
 /* Bit definitions for BOOT_STATUS */
 #define PALMAS_BOOT_STATUS_BOOT1				0x02
-#define PALMAS_BOOT_STATUS_BOOT1_SHIFT				1
+#define PALMAS_BOOT_STATUS_BOOT1_SHIFT				0x01
 #define PALMAS_BOOT_STATUS_BOOT0				0x01
-#define PALMAS_BOOT_STATUS_BOOT0_SHIFT				0
+#define PALMAS_BOOT_STATUS_BOOT0_SHIFT				0x00
 
 /* Bit definitions for BATTERY_BOUNCE */
-#define PALMAS_BATTERY_BOUNCE_BB_DELAY_MASK			0x3f
-#define PALMAS_BATTERY_BOUNCE_BB_DELAY_SHIFT			0
+#define PALMAS_BATTERY_BOUNCE_BB_DELAY_MASK			0x3F
+#define PALMAS_BATTERY_BOUNCE_BB_DELAY_SHIFT			0x00
 
 /* Bit definitions for BACKUP_BATTERY_CTRL */
 #define PALMAS_BACKUP_BATTERY_CTRL_VRTC_18_15			0x80
-#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_18_15_SHIFT		7
+#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_18_15_SHIFT		0x07
 #define PALMAS_BACKUP_BATTERY_CTRL_VRTC_EN_SLP			0x40
-#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_EN_SLP_SHIFT		6
+#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_EN_SLP_SHIFT		0x06
 #define PALMAS_BACKUP_BATTERY_CTRL_VRTC_EN_OFF			0x20
-#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_EN_OFF_SHIFT		5
+#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_EN_OFF_SHIFT		0x05
 #define PALMAS_BACKUP_BATTERY_CTRL_VRTC_PWEN			0x10
-#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_PWEN_SHIFT		4
+#define PALMAS_BACKUP_BATTERY_CTRL_VRTC_PWEN_SHIFT		0x04
 #define PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG		0x08
-#define PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG_SHIFT	3
+#define PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG_SHIFT	0x03
 #define PALMAS_BACKUP_BATTERY_CTRL_BB_SEL_MASK			0x06
-#define PALMAS_BACKUP_BATTERY_CTRL_BB_SEL_SHIFT			1
+#define PALMAS_BACKUP_BATTERY_CTRL_BB_SEL_SHIFT			0x01
 #define PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN			0x01
-#define PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN_SHIFT		0
+#define PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN_SHIFT		0x00
 
 /* Bit definitions for LONG_PRESS_KEY */
 #define PALMAS_LONG_PRESS_KEY_LPK_LOCK				0x80
-#define PALMAS_LONG_PRESS_KEY_LPK_LOCK_SHIFT			7
+#define PALMAS_LONG_PRESS_KEY_LPK_LOCK_SHIFT			0x07
 #define PALMAS_LONG_PRESS_KEY_LPK_INT_CLR			0x10
-#define PALMAS_LONG_PRESS_KEY_LPK_INT_CLR_SHIFT			4
+#define PALMAS_LONG_PRESS_KEY_LPK_INT_CLR_SHIFT			0x04
 #define PALMAS_LONG_PRESS_KEY_LPK_TIME_MASK			0x0c
-#define PALMAS_LONG_PRESS_KEY_LPK_TIME_SHIFT			2
+#define PALMAS_LONG_PRESS_KEY_LPK_TIME_SHIFT			0x02
 #define PALMAS_LONG_PRESS_KEY_PWRON_DEBOUNCE_MASK		0x03
-#define PALMAS_LONG_PRESS_KEY_PWRON_DEBOUNCE_SHIFT		0
+#define PALMAS_LONG_PRESS_KEY_PWRON_DEBOUNCE_SHIFT		0x00
 
 /* Bit definitions for OSC_THERM_CTRL */
 #define PALMAS_OSC_THERM_CTRL_VANA_ON_IN_SLEEP			0x80
-#define PALMAS_OSC_THERM_CTRL_VANA_ON_IN_SLEEP_SHIFT		7
+#define PALMAS_OSC_THERM_CTRL_VANA_ON_IN_SLEEP_SHIFT		0x07
 #define PALMAS_OSC_THERM_CTRL_INT_MASK_IN_SLEEP			0x40
-#define PALMAS_OSC_THERM_CTRL_INT_MASK_IN_SLEEP_SHIFT		6
+#define PALMAS_OSC_THERM_CTRL_INT_MASK_IN_SLEEP_SHIFT		0x06
 #define PALMAS_OSC_THERM_CTRL_RC15MHZ_ON_IN_SLEEP		0x20
-#define PALMAS_OSC_THERM_CTRL_RC15MHZ_ON_IN_SLEEP_SHIFT		5
+#define PALMAS_OSC_THERM_CTRL_RC15MHZ_ON_IN_SLEEP_SHIFT		0x05
 #define PALMAS_OSC_THERM_CTRL_THERM_OFF_IN_SLEEP		0x10
-#define PALMAS_OSC_THERM_CTRL_THERM_OFF_IN_SLEEP_SHIFT		4
+#define PALMAS_OSC_THERM_CTRL_THERM_OFF_IN_SLEEP_SHIFT		0x04
 #define PALMAS_OSC_THERM_CTRL_THERM_HD_SEL_MASK			0x0c
-#define PALMAS_OSC_THERM_CTRL_THERM_HD_SEL_SHIFT		2
+#define PALMAS_OSC_THERM_CTRL_THERM_HD_SEL_SHIFT		0x02
 #define PALMAS_OSC_THERM_CTRL_OSC_BYPASS			0x02
-#define PALMAS_OSC_THERM_CTRL_OSC_BYPASS_SHIFT			1
+#define PALMAS_OSC_THERM_CTRL_OSC_BYPASS_SHIFT			0x01
 #define PALMAS_OSC_THERM_CTRL_OSC_HPMODE			0x01
-#define PALMAS_OSC_THERM_CTRL_OSC_HPMODE_SHIFT			0
+#define PALMAS_OSC_THERM_CTRL_OSC_HPMODE_SHIFT			0x00
 
 /* Bit definitions for BATDEBOUNCING */
 #define PALMAS_BATDEBOUNCING_BAT_DEB_BYPASS			0x80
-#define PALMAS_BATDEBOUNCING_BAT_DEB_BYPASS_SHIFT		7
+#define PALMAS_BATDEBOUNCING_BAT_DEB_BYPASS_SHIFT		0x07
 #define PALMAS_BATDEBOUNCING_BINS_DEB_MASK			0x78
-#define PALMAS_BATDEBOUNCING_BINS_DEB_SHIFT			3
+#define PALMAS_BATDEBOUNCING_BINS_DEB_SHIFT			0x03
 #define PALMAS_BATDEBOUNCING_BEXT_DEB_MASK			0x07
-#define PALMAS_BATDEBOUNCING_BEXT_DEB_SHIFT			0
+#define PALMAS_BATDEBOUNCING_BEXT_DEB_SHIFT			0x00
 
 /* Bit definitions for SWOFF_HWRST */
 #define PALMAS_SWOFF_HWRST_PWRON_LPK				0x80
-#define PALMAS_SWOFF_HWRST_PWRON_LPK_SHIFT			7
+#define PALMAS_SWOFF_HWRST_PWRON_LPK_SHIFT			0x07
 #define PALMAS_SWOFF_HWRST_PWRDOWN				0x40
-#define PALMAS_SWOFF_HWRST_PWRDOWN_SHIFT			6
+#define PALMAS_SWOFF_HWRST_PWRDOWN_SHIFT			0x06
 #define PALMAS_SWOFF_HWRST_WTD					0x20
-#define PALMAS_SWOFF_HWRST_WTD_SHIFT				5
+#define PALMAS_SWOFF_HWRST_WTD_SHIFT				0x05
 #define PALMAS_SWOFF_HWRST_TSHUT				0x10
-#define PALMAS_SWOFF_HWRST_TSHUT_SHIFT				4
+#define PALMAS_SWOFF_HWRST_TSHUT_SHIFT				0x04
 #define PALMAS_SWOFF_HWRST_RESET_IN				0x08
-#define PALMAS_SWOFF_HWRST_RESET_IN_SHIFT			3
+#define PALMAS_SWOFF_HWRST_RESET_IN_SHIFT			0x03
 #define PALMAS_SWOFF_HWRST_SW_RST				0x04
-#define PALMAS_SWOFF_HWRST_SW_RST_SHIFT				2
+#define PALMAS_SWOFF_HWRST_SW_RST_SHIFT				0x02
 #define PALMAS_SWOFF_HWRST_VSYS_LO				0x02
-#define PALMAS_SWOFF_HWRST_VSYS_LO_SHIFT			1
+#define PALMAS_SWOFF_HWRST_VSYS_LO_SHIFT			0x01
 #define PALMAS_SWOFF_HWRST_GPADC_SHUTDOWN			0x01
-#define PALMAS_SWOFF_HWRST_GPADC_SHUTDOWN_SHIFT			0
+#define PALMAS_SWOFF_HWRST_GPADC_SHUTDOWN_SHIFT			0x00
 
 /* Bit definitions for SWOFF_COLDRST */
 #define PALMAS_SWOFF_COLDRST_PWRON_LPK				0x80
-#define PALMAS_SWOFF_COLDRST_PWRON_LPK_SHIFT			7
+#define PALMAS_SWOFF_COLDRST_PWRON_LPK_SHIFT			0x07
 #define PALMAS_SWOFF_COLDRST_PWRDOWN				0x40
-#define PALMAS_SWOFF_COLDRST_PWRDOWN_SHIFT			6
+#define PALMAS_SWOFF_COLDRST_PWRDOWN_SHIFT			0x06
 #define PALMAS_SWOFF_COLDRST_WTD				0x20
-#define PALMAS_SWOFF_COLDRST_WTD_SHIFT				5
+#define PALMAS_SWOFF_COLDRST_WTD_SHIFT				0x05
 #define PALMAS_SWOFF_COLDRST_TSHUT				0x10
-#define PALMAS_SWOFF_COLDRST_TSHUT_SHIFT			4
+#define PALMAS_SWOFF_COLDRST_TSHUT_SHIFT			0x04
 #define PALMAS_SWOFF_COLDRST_RESET_IN				0x08
-#define PALMAS_SWOFF_COLDRST_RESET_IN_SHIFT			3
+#define PALMAS_SWOFF_COLDRST_RESET_IN_SHIFT			0x03
 #define PALMAS_SWOFF_COLDRST_SW_RST				0x04
-#define PALMAS_SWOFF_COLDRST_SW_RST_SHIFT			2
+#define PALMAS_SWOFF_COLDRST_SW_RST_SHIFT			0x02
 #define PALMAS_SWOFF_COLDRST_VSYS_LO				0x02
-#define PALMAS_SWOFF_COLDRST_VSYS_LO_SHIFT			1
+#define PALMAS_SWOFF_COLDRST_VSYS_LO_SHIFT			0x01
 #define PALMAS_SWOFF_COLDRST_GPADC_SHUTDOWN			0x01
-#define PALMAS_SWOFF_COLDRST_GPADC_SHUTDOWN_SHIFT		0
+#define PALMAS_SWOFF_COLDRST_GPADC_SHUTDOWN_SHIFT		0x00
 
 /* Bit definitions for SWOFF_STATUS */
 #define PALMAS_SWOFF_STATUS_PWRON_LPK				0x80
-#define PALMAS_SWOFF_STATUS_PWRON_LPK_SHIFT			7
+#define PALMAS_SWOFF_STATUS_PWRON_LPK_SHIFT			0x07
 #define PALMAS_SWOFF_STATUS_PWRDOWN				0x40
-#define PALMAS_SWOFF_STATUS_PWRDOWN_SHIFT			6
+#define PALMAS_SWOFF_STATUS_PWRDOWN_SHIFT			0x06
 #define PALMAS_SWOFF_STATUS_WTD					0x20
-#define PALMAS_SWOFF_STATUS_WTD_SHIFT				5
+#define PALMAS_SWOFF_STATUS_WTD_SHIFT				0x05
 #define PALMAS_SWOFF_STATUS_TSHUT				0x10
-#define PALMAS_SWOFF_STATUS_TSHUT_SHIFT				4
+#define PALMAS_SWOFF_STATUS_TSHUT_SHIFT				0x04
 #define PALMAS_SWOFF_STATUS_RESET_IN				0x08
-#define PALMAS_SWOFF_STATUS_RESET_IN_SHIFT			3
+#define PALMAS_SWOFF_STATUS_RESET_IN_SHIFT			0x03
 #define PALMAS_SWOFF_STATUS_SW_RST				0x04
-#define PALMAS_SWOFF_STATUS_SW_RST_SHIFT			2
+#define PALMAS_SWOFF_STATUS_SW_RST_SHIFT			0x02
 #define PALMAS_SWOFF_STATUS_VSYS_LO				0x02
-#define PALMAS_SWOFF_STATUS_VSYS_LO_SHIFT			1
+#define PALMAS_SWOFF_STATUS_VSYS_LO_SHIFT			0x01
 #define PALMAS_SWOFF_STATUS_GPADC_SHUTDOWN			0x01
-#define PALMAS_SWOFF_STATUS_GPADC_SHUTDOWN_SHIFT		0
+#define PALMAS_SWOFF_STATUS_GPADC_SHUTDOWN_SHIFT		0x00
 
 /* Bit definitions for PMU_CONFIG */
 #define PALMAS_PMU_CONFIG_MULTI_CELL_EN				0x40
-#define PALMAS_PMU_CONFIG_MULTI_CELL_EN_SHIFT			6
+#define PALMAS_PMU_CONFIG_MULTI_CELL_EN_SHIFT			0x06
 #define PALMAS_PMU_CONFIG_SPARE_MASK				0x30
-#define PALMAS_PMU_CONFIG_SPARE_SHIFT				4
+#define PALMAS_PMU_CONFIG_SPARE_SHIFT				0x04
 #define PALMAS_PMU_CONFIG_SWOFF_DLY_MASK			0x0c
-#define PALMAS_PMU_CONFIG_SWOFF_DLY_SHIFT			2
+#define PALMAS_PMU_CONFIG_SWOFF_DLY_SHIFT			0x02
 #define PALMAS_PMU_CONFIG_GATE_RESET_OUT			0x02
-#define PALMAS_PMU_CONFIG_GATE_RESET_OUT_SHIFT			1
+#define PALMAS_PMU_CONFIG_GATE_RESET_OUT_SHIFT			0x01
 #define PALMAS_PMU_CONFIG_AUTODEVON				0x01
-#define PALMAS_PMU_CONFIG_AUTODEVON_SHIFT			0
+#define PALMAS_PMU_CONFIG_AUTODEVON_SHIFT			0x00
 
 /* Bit definitions for SPARE */
 #define PALMAS_SPARE_SPARE_MASK					0xf8
-#define PALMAS_SPARE_SPARE_SHIFT				3
+#define PALMAS_SPARE_SPARE_SHIFT				0x03
 #define PALMAS_SPARE_REGEN3_OD					0x04
-#define PALMAS_SPARE_REGEN3_OD_SHIFT				2
+#define PALMAS_SPARE_REGEN3_OD_SHIFT				0x02
 #define PALMAS_SPARE_REGEN2_OD					0x02
-#define PALMAS_SPARE_REGEN2_OD_SHIFT				1
+#define PALMAS_SPARE_REGEN2_OD_SHIFT				0x01
 #define PALMAS_SPARE_REGEN1_OD					0x01
-#define PALMAS_SPARE_REGEN1_OD_SHIFT				0
+#define PALMAS_SPARE_REGEN1_OD_SHIFT				0x00
 
 /* Bit definitions for PMU_SECONDARY_INT */
 #define PALMAS_PMU_SECONDARY_INT_VBUS_OVV_INT_SRC		0x80
-#define PALMAS_PMU_SECONDARY_INT_VBUS_OVV_INT_SRC_SHIFT		7
+#define PALMAS_PMU_SECONDARY_INT_VBUS_OVV_INT_SRC_SHIFT		0x07
 #define PALMAS_PMU_SECONDARY_INT_CHARG_DET_N_INT_SRC		0x40
-#define PALMAS_PMU_SECONDARY_INT_CHARG_DET_N_INT_SRC_SHIFT	6
+#define PALMAS_PMU_SECONDARY_INT_CHARG_DET_N_INT_SRC_SHIFT	0x06
 #define PALMAS_PMU_SECONDARY_INT_BB_INT_SRC			0x20
-#define PALMAS_PMU_SECONDARY_INT_BB_INT_SRC_SHIFT		5
+#define PALMAS_PMU_SECONDARY_INT_BB_INT_SRC_SHIFT		0x05
 #define PALMAS_PMU_SECONDARY_INT_FBI_INT_SRC			0x10
-#define PALMAS_PMU_SECONDARY_INT_FBI_INT_SRC_SHIFT		4
+#define PALMAS_PMU_SECONDARY_INT_FBI_INT_SRC_SHIFT		0x04
 #define PALMAS_PMU_SECONDARY_INT_VBUS_OVV_MASK			0x08
-#define PALMAS_PMU_SECONDARY_INT_VBUS_OVV_MASK_SHIFT		3
+#define PALMAS_PMU_SECONDARY_INT_VBUS_OVV_MASK_SHIFT		0x03
 #define PALMAS_PMU_SECONDARY_INT_CHARG_DET_N_MASK		0x04
-#define PALMAS_PMU_SECONDARY_INT_CHARG_DET_N_MASK_SHIFT		2
+#define PALMAS_PMU_SECONDARY_INT_CHARG_DET_N_MASK_SHIFT		0x02
 #define PALMAS_PMU_SECONDARY_INT_BB_MASK			0x02
-#define PALMAS_PMU_SECONDARY_INT_BB_MASK_SHIFT			1
+#define PALMAS_PMU_SECONDARY_INT_BB_MASK_SHIFT			0x01
 #define PALMAS_PMU_SECONDARY_INT_FBI_MASK			0x01
-#define PALMAS_PMU_SECONDARY_INT_FBI_MASK_SHIFT			0
+#define PALMAS_PMU_SECONDARY_INT_FBI_MASK_SHIFT			0x00
 
 /* Bit definitions for SW_REVISION */
-#define PALMAS_SW_REVISION_SW_REVISION_MASK			0xff
-#define PALMAS_SW_REVISION_SW_REVISION_SHIFT			0
+#define PALMAS_SW_REVISION_SW_REVISION_MASK			0xFF
+#define PALMAS_SW_REVISION_SW_REVISION_SHIFT			0x00
 
 /* Bit definitions for EXT_CHRG_CTRL */
 #define PALMAS_EXT_CHRG_CTRL_VBUS_OVV_STATUS			0x80
-#define PALMAS_EXT_CHRG_CTRL_VBUS_OVV_STATUS_SHIFT		7
+#define PALMAS_EXT_CHRG_CTRL_VBUS_OVV_STATUS_SHIFT		0x07
 #define PALMAS_EXT_CHRG_CTRL_CHARG_DET_N_STATUS			0x40
-#define PALMAS_EXT_CHRG_CTRL_CHARG_DET_N_STATUS_SHIFT		6
+#define PALMAS_EXT_CHRG_CTRL_CHARG_DET_N_STATUS_SHIFT		0x06
 #define PALMAS_EXT_CHRG_CTRL_VSYS_DEBOUNCE_DELAY		0x08
-#define PALMAS_EXT_CHRG_CTRL_VSYS_DEBOUNCE_DELAY_SHIFT		3
+#define PALMAS_EXT_CHRG_CTRL_VSYS_DEBOUNCE_DELAY_SHIFT		0x03
 #define PALMAS_EXT_CHRG_CTRL_CHRG_DET_N				0x04
-#define PALMAS_EXT_CHRG_CTRL_CHRG_DET_N_SHIFT			2
+#define PALMAS_EXT_CHRG_CTRL_CHRG_DET_N_SHIFT			0x02
 #define PALMAS_EXT_CHRG_CTRL_AUTO_ACA_EN			0x02
-#define PALMAS_EXT_CHRG_CTRL_AUTO_ACA_EN_SHIFT			1
+#define PALMAS_EXT_CHRG_CTRL_AUTO_ACA_EN_SHIFT			0x01
 #define PALMAS_EXT_CHRG_CTRL_AUTO_LDOUSB_EN			0x01
-#define PALMAS_EXT_CHRG_CTRL_AUTO_LDOUSB_EN_SHIFT		0
+#define PALMAS_EXT_CHRG_CTRL_AUTO_LDOUSB_EN_SHIFT		0x00
 
 /* Bit definitions for PMU_SECONDARY_INT2 */
 #define PALMAS_PMU_SECONDARY_INT2_DVFS2_INT_SRC			0x20
-#define PALMAS_PMU_SECONDARY_INT2_DVFS2_INT_SRC_SHIFT		5
+#define PALMAS_PMU_SECONDARY_INT2_DVFS2_INT_SRC_SHIFT		0x05
 #define PALMAS_PMU_SECONDARY_INT2_DVFS1_INT_SRC			0x10
-#define PALMAS_PMU_SECONDARY_INT2_DVFS1_INT_SRC_SHIFT		4
+#define PALMAS_PMU_SECONDARY_INT2_DVFS1_INT_SRC_SHIFT		0x04
 #define PALMAS_PMU_SECONDARY_INT2_DVFS2_MASK			0x02
-#define PALMAS_PMU_SECONDARY_INT2_DVFS2_MASK_SHIFT		1
+#define PALMAS_PMU_SECONDARY_INT2_DVFS2_MASK_SHIFT		0x01
 #define PALMAS_PMU_SECONDARY_INT2_DVFS1_MASK			0x01
-#define PALMAS_PMU_SECONDARY_INT2_DVFS1_MASK_SHIFT		0
+#define PALMAS_PMU_SECONDARY_INT2_DVFS1_MASK_SHIFT		0x00
 
 /* Registers for function RESOURCE */
-#define PALMAS_CLK32KG_CTRL					0x0
-#define PALMAS_CLK32KGAUDIO_CTRL				0x1
-#define PALMAS_REGEN1_CTRL					0x2
-#define PALMAS_REGEN2_CTRL					0x3
-#define PALMAS_SYSEN1_CTRL					0x4
-#define PALMAS_SYSEN2_CTRL					0x5
-#define PALMAS_NSLEEP_RES_ASSIGN				0x6
-#define PALMAS_NSLEEP_SMPS_ASSIGN				0x7
-#define PALMAS_NSLEEP_LDO_ASSIGN1				0x8
-#define PALMAS_NSLEEP_LDO_ASSIGN2				0x9
-#define PALMAS_ENABLE1_RES_ASSIGN				0xA
-#define PALMAS_ENABLE1_SMPS_ASSIGN				0xB
-#define PALMAS_ENABLE1_LDO_ASSIGN1				0xC
-#define PALMAS_ENABLE1_LDO_ASSIGN2				0xD
-#define PALMAS_ENABLE2_RES_ASSIGN				0xE
-#define PALMAS_ENABLE2_SMPS_ASSIGN				0xF
+#define PALMAS_CLK32KG_CTRL					0x00
+#define PALMAS_CLK32KGAUDIO_CTRL				0x01
+#define PALMAS_REGEN1_CTRL					0x02
+#define PALMAS_REGEN2_CTRL					0x03
+#define PALMAS_SYSEN1_CTRL					0x04
+#define PALMAS_SYSEN2_CTRL					0x05
+#define PALMAS_NSLEEP_RES_ASSIGN				0x06
+#define PALMAS_NSLEEP_SMPS_ASSIGN				0x07
+#define PALMAS_NSLEEP_LDO_ASSIGN1				0x08
+#define PALMAS_NSLEEP_LDO_ASSIGN2				0x09
+#define PALMAS_ENABLE1_RES_ASSIGN				0x0A
+#define PALMAS_ENABLE1_SMPS_ASSIGN				0x0B
+#define PALMAS_ENABLE1_LDO_ASSIGN1				0x0C
+#define PALMAS_ENABLE1_LDO_ASSIGN2				0x0D
+#define PALMAS_ENABLE2_RES_ASSIGN				0x0E
+#define PALMAS_ENABLE2_SMPS_ASSIGN				0x0F
 #define PALMAS_ENABLE2_LDO_ASSIGN1				0x10
 #define PALMAS_ENABLE2_LDO_ASSIGN2				0x11
 #define PALMAS_REGEN3_CTRL					0x12
 
 /* Bit definitions for CLK32KG_CTRL */
 #define PALMAS_CLK32KG_CTRL_STATUS				0x10
-#define PALMAS_CLK32KG_CTRL_STATUS_SHIFT			4
+#define PALMAS_CLK32KG_CTRL_STATUS_SHIFT			0x04
 #define PALMAS_CLK32KG_CTRL_MODE_SLEEP				0x04
-#define PALMAS_CLK32KG_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_CLK32KG_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_CLK32KG_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_CLK32KG_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_CLK32KG_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for CLK32KGAUDIO_CTRL */
 #define PALMAS_CLK32KGAUDIO_CTRL_STATUS				0x10
-#define PALMAS_CLK32KGAUDIO_CTRL_STATUS_SHIFT			4
+#define PALMAS_CLK32KGAUDIO_CTRL_STATUS_SHIFT			0x04
 #define PALMAS_CLK32KGAUDIO_CTRL_RESERVED3			0x08
-#define PALMAS_CLK32KGAUDIO_CTRL_RESERVED3_SHIFT		3
+#define PALMAS_CLK32KGAUDIO_CTRL_RESERVED3_SHIFT		0x03
 #define PALMAS_CLK32KGAUDIO_CTRL_MODE_SLEEP			0x04
-#define PALMAS_CLK32KGAUDIO_CTRL_MODE_SLEEP_SHIFT		2
+#define PALMAS_CLK32KGAUDIO_CTRL_MODE_SLEEP_SHIFT		0x02
 #define PALMAS_CLK32KGAUDIO_CTRL_MODE_ACTIVE			0x01
-#define PALMAS_CLK32KGAUDIO_CTRL_MODE_ACTIVE_SHIFT		0
+#define PALMAS_CLK32KGAUDIO_CTRL_MODE_ACTIVE_SHIFT		0x00
 
 /* Bit definitions for REGEN1_CTRL */
 #define PALMAS_REGEN1_CTRL_STATUS				0x10
-#define PALMAS_REGEN1_CTRL_STATUS_SHIFT				4
+#define PALMAS_REGEN1_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_REGEN1_CTRL_MODE_SLEEP				0x04
-#define PALMAS_REGEN1_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_REGEN1_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_REGEN1_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_REGEN1_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_REGEN1_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for REGEN2_CTRL */
 #define PALMAS_REGEN2_CTRL_STATUS				0x10
-#define PALMAS_REGEN2_CTRL_STATUS_SHIFT				4
+#define PALMAS_REGEN2_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_REGEN2_CTRL_MODE_SLEEP				0x04
-#define PALMAS_REGEN2_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_REGEN2_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_REGEN2_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_REGEN2_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_REGEN2_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SYSEN1_CTRL */
 #define PALMAS_SYSEN1_CTRL_STATUS				0x10
-#define PALMAS_SYSEN1_CTRL_STATUS_SHIFT				4
+#define PALMAS_SYSEN1_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SYSEN1_CTRL_MODE_SLEEP				0x04
-#define PALMAS_SYSEN1_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SYSEN1_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SYSEN1_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_SYSEN1_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SYSEN1_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for SYSEN2_CTRL */
 #define PALMAS_SYSEN2_CTRL_STATUS				0x10
-#define PALMAS_SYSEN2_CTRL_STATUS_SHIFT				4
+#define PALMAS_SYSEN2_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_SYSEN2_CTRL_MODE_SLEEP				0x04
-#define PALMAS_SYSEN2_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_SYSEN2_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_SYSEN2_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_SYSEN2_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_SYSEN2_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Bit definitions for NSLEEP_RES_ASSIGN */
 #define PALMAS_NSLEEP_RES_ASSIGN_REGEN3				0x40
-#define PALMAS_NSLEEP_RES_ASSIGN_REGEN3_SHIFT			6
+#define PALMAS_NSLEEP_RES_ASSIGN_REGEN3_SHIFT			0x06
 #define PALMAS_NSLEEP_RES_ASSIGN_CLK32KGAUDIO			0x20
-#define PALMAS_NSLEEP_RES_ASSIGN_CLK32KGAUDIO_SHIFT		5
+#define PALMAS_NSLEEP_RES_ASSIGN_CLK32KGAUDIO_SHIFT		0x05
 #define PALMAS_NSLEEP_RES_ASSIGN_CLK32KG			0x10
-#define PALMAS_NSLEEP_RES_ASSIGN_CLK32KG_SHIFT			4
+#define PALMAS_NSLEEP_RES_ASSIGN_CLK32KG_SHIFT			0x04
 #define PALMAS_NSLEEP_RES_ASSIGN_SYSEN2				0x08
-#define PALMAS_NSLEEP_RES_ASSIGN_SYSEN2_SHIFT			3
+#define PALMAS_NSLEEP_RES_ASSIGN_SYSEN2_SHIFT			0x03
 #define PALMAS_NSLEEP_RES_ASSIGN_SYSEN1				0x04
-#define PALMAS_NSLEEP_RES_ASSIGN_SYSEN1_SHIFT			2
+#define PALMAS_NSLEEP_RES_ASSIGN_SYSEN1_SHIFT			0x02
 #define PALMAS_NSLEEP_RES_ASSIGN_REGEN2				0x02
-#define PALMAS_NSLEEP_RES_ASSIGN_REGEN2_SHIFT			1
+#define PALMAS_NSLEEP_RES_ASSIGN_REGEN2_SHIFT			0x01
 #define PALMAS_NSLEEP_RES_ASSIGN_REGEN1				0x01
-#define PALMAS_NSLEEP_RES_ASSIGN_REGEN1_SHIFT			0
+#define PALMAS_NSLEEP_RES_ASSIGN_REGEN1_SHIFT			0x00
 
 /* Bit definitions for NSLEEP_SMPS_ASSIGN */
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS10			0x80
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS10_SHIFT			7
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS10_SHIFT			0x07
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS9				0x40
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS9_SHIFT			6
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS9_SHIFT			0x06
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS8				0x20
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS8_SHIFT			5
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS8_SHIFT			0x05
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS7				0x10
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS7_SHIFT			4
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS7_SHIFT			0x04
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS6				0x08
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS6_SHIFT			3
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS6_SHIFT			0x03
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS45			0x04
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS45_SHIFT			2
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS45_SHIFT			0x02
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS3				0x02
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS3_SHIFT			1
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS3_SHIFT			0x01
 #define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS12			0x01
-#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS12_SHIFT			0
+#define PALMAS_NSLEEP_SMPS_ASSIGN_SMPS12_SHIFT			0x00
 
 /* Bit definitions for NSLEEP_LDO_ASSIGN1 */
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO8				0x80
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO8_SHIFT			7
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO8_SHIFT			0x07
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO7				0x40
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO7_SHIFT			6
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO7_SHIFT			0x06
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO6				0x20
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO6_SHIFT			5
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO6_SHIFT			0x05
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO5				0x10
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO5_SHIFT			4
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO5_SHIFT			0x04
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO4				0x08
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO4_SHIFT			3
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO4_SHIFT			0x03
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO3				0x04
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO3_SHIFT			2
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO3_SHIFT			0x02
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO2				0x02
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO2_SHIFT			1
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO2_SHIFT			0x01
 #define PALMAS_NSLEEP_LDO_ASSIGN1_LDO1				0x01
-#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO1_SHIFT			0
+#define PALMAS_NSLEEP_LDO_ASSIGN1_LDO1_SHIFT			0x00
 
 /* Bit definitions for NSLEEP_LDO_ASSIGN2 */
 #define PALMAS_NSLEEP_LDO_ASSIGN2_LDOUSB			0x04
-#define PALMAS_NSLEEP_LDO_ASSIGN2_LDOUSB_SHIFT			2
+#define PALMAS_NSLEEP_LDO_ASSIGN2_LDOUSB_SHIFT			0x02
 #define PALMAS_NSLEEP_LDO_ASSIGN2_LDOLN				0x02
-#define PALMAS_NSLEEP_LDO_ASSIGN2_LDOLN_SHIFT			1
+#define PALMAS_NSLEEP_LDO_ASSIGN2_LDOLN_SHIFT			0x01
 #define PALMAS_NSLEEP_LDO_ASSIGN2_LDO9				0x01
-#define PALMAS_NSLEEP_LDO_ASSIGN2_LDO9_SHIFT			0
+#define PALMAS_NSLEEP_LDO_ASSIGN2_LDO9_SHIFT			0x00
 
 /* Bit definitions for ENABLE1_RES_ASSIGN */
 #define PALMAS_ENABLE1_RES_ASSIGN_REGEN3			0x40
-#define PALMAS_ENABLE1_RES_ASSIGN_REGEN3_SHIFT			6
+#define PALMAS_ENABLE1_RES_ASSIGN_REGEN3_SHIFT			0x06
 #define PALMAS_ENABLE1_RES_ASSIGN_CLK32KGAUDIO			0x20
-#define PALMAS_ENABLE1_RES_ASSIGN_CLK32KGAUDIO_SHIFT		5
+#define PALMAS_ENABLE1_RES_ASSIGN_CLK32KGAUDIO_SHIFT		0x05
 #define PALMAS_ENABLE1_RES_ASSIGN_CLK32KG			0x10
-#define PALMAS_ENABLE1_RES_ASSIGN_CLK32KG_SHIFT			4
+#define PALMAS_ENABLE1_RES_ASSIGN_CLK32KG_SHIFT			0x04
 #define PALMAS_ENABLE1_RES_ASSIGN_SYSEN2			0x08
-#define PALMAS_ENABLE1_RES_ASSIGN_SYSEN2_SHIFT			3
+#define PALMAS_ENABLE1_RES_ASSIGN_SYSEN2_SHIFT			0x03
 #define PALMAS_ENABLE1_RES_ASSIGN_SYSEN1			0x04
-#define PALMAS_ENABLE1_RES_ASSIGN_SYSEN1_SHIFT			2
+#define PALMAS_ENABLE1_RES_ASSIGN_SYSEN1_SHIFT			0x02
 #define PALMAS_ENABLE1_RES_ASSIGN_REGEN2			0x02
-#define PALMAS_ENABLE1_RES_ASSIGN_REGEN2_SHIFT			1
+#define PALMAS_ENABLE1_RES_ASSIGN_REGEN2_SHIFT			0x01
 #define PALMAS_ENABLE1_RES_ASSIGN_REGEN1			0x01
-#define PALMAS_ENABLE1_RES_ASSIGN_REGEN1_SHIFT			0
+#define PALMAS_ENABLE1_RES_ASSIGN_REGEN1_SHIFT			0x00
 
 /* Bit definitions for ENABLE1_SMPS_ASSIGN */
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS10			0x80
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS10_SHIFT			7
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS10_SHIFT			0x07
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS9			0x40
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS9_SHIFT			6
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS9_SHIFT			0x06
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS8			0x20
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS8_SHIFT			5
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS8_SHIFT			0x05
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS7			0x10
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS7_SHIFT			4
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS7_SHIFT			0x04
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS6			0x08
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS6_SHIFT			3
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS6_SHIFT			0x03
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS45			0x04
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS45_SHIFT			2
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS45_SHIFT			0x02
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS3			0x02
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS3_SHIFT			1
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS3_SHIFT			0x01
 #define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS12			0x01
-#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS12_SHIFT			0
+#define PALMAS_ENABLE1_SMPS_ASSIGN_SMPS12_SHIFT			0x00
 
 /* Bit definitions for ENABLE1_LDO_ASSIGN1 */
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO8				0x80
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO8_SHIFT			7
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO8_SHIFT			0x07
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO7				0x40
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO7_SHIFT			6
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO7_SHIFT			0x06
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO6				0x20
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO6_SHIFT			5
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO6_SHIFT			0x05
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO5				0x10
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO5_SHIFT			4
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO5_SHIFT			0x04
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO4				0x08
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO4_SHIFT			3
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO4_SHIFT			0x03
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO3				0x04
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO3_SHIFT			2
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO3_SHIFT			0x02
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO2				0x02
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO2_SHIFT			1
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO2_SHIFT			0x01
 #define PALMAS_ENABLE1_LDO_ASSIGN1_LDO1				0x01
-#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO1_SHIFT			0
+#define PALMAS_ENABLE1_LDO_ASSIGN1_LDO1_SHIFT			0x00
 
 /* Bit definitions for ENABLE1_LDO_ASSIGN2 */
 #define PALMAS_ENABLE1_LDO_ASSIGN2_LDOUSB			0x04
-#define PALMAS_ENABLE1_LDO_ASSIGN2_LDOUSB_SHIFT			2
+#define PALMAS_ENABLE1_LDO_ASSIGN2_LDOUSB_SHIFT			0x02
 #define PALMAS_ENABLE1_LDO_ASSIGN2_LDOLN			0x02
-#define PALMAS_ENABLE1_LDO_ASSIGN2_LDOLN_SHIFT			1
+#define PALMAS_ENABLE1_LDO_ASSIGN2_LDOLN_SHIFT			0x01
 #define PALMAS_ENABLE1_LDO_ASSIGN2_LDO9				0x01
-#define PALMAS_ENABLE1_LDO_ASSIGN2_LDO9_SHIFT			0
+#define PALMAS_ENABLE1_LDO_ASSIGN2_LDO9_SHIFT			0x00
 
 /* Bit definitions for ENABLE2_RES_ASSIGN */
 #define PALMAS_ENABLE2_RES_ASSIGN_REGEN3			0x40
-#define PALMAS_ENABLE2_RES_ASSIGN_REGEN3_SHIFT			6
+#define PALMAS_ENABLE2_RES_ASSIGN_REGEN3_SHIFT			0x06
 #define PALMAS_ENABLE2_RES_ASSIGN_CLK32KGAUDIO			0x20
-#define PALMAS_ENABLE2_RES_ASSIGN_CLK32KGAUDIO_SHIFT		5
+#define PALMAS_ENABLE2_RES_ASSIGN_CLK32KGAUDIO_SHIFT		0x05
 #define PALMAS_ENABLE2_RES_ASSIGN_CLK32KG			0x10
-#define PALMAS_ENABLE2_RES_ASSIGN_CLK32KG_SHIFT			4
+#define PALMAS_ENABLE2_RES_ASSIGN_CLK32KG_SHIFT			0x04
 #define PALMAS_ENABLE2_RES_ASSIGN_SYSEN2			0x08
-#define PALMAS_ENABLE2_RES_ASSIGN_SYSEN2_SHIFT			3
+#define PALMAS_ENABLE2_RES_ASSIGN_SYSEN2_SHIFT			0x03
 #define PALMAS_ENABLE2_RES_ASSIGN_SYSEN1			0x04
-#define PALMAS_ENABLE2_RES_ASSIGN_SYSEN1_SHIFT			2
+#define PALMAS_ENABLE2_RES_ASSIGN_SYSEN1_SHIFT			0x02
 #define PALMAS_ENABLE2_RES_ASSIGN_REGEN2			0x02
-#define PALMAS_ENABLE2_RES_ASSIGN_REGEN2_SHIFT			1
+#define PALMAS_ENABLE2_RES_ASSIGN_REGEN2_SHIFT			0x01
 #define PALMAS_ENABLE2_RES_ASSIGN_REGEN1			0x01
-#define PALMAS_ENABLE2_RES_ASSIGN_REGEN1_SHIFT			0
+#define PALMAS_ENABLE2_RES_ASSIGN_REGEN1_SHIFT			0x00
 
 /* Bit definitions for ENABLE2_SMPS_ASSIGN */
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS10			0x80
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS10_SHIFT			7
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS10_SHIFT			0x07
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS9			0x40
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS9_SHIFT			6
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS9_SHIFT			0x06
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS8			0x20
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS8_SHIFT			5
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS8_SHIFT			0x05
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS7			0x10
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS7_SHIFT			4
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS7_SHIFT			0x04
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS6			0x08
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS6_SHIFT			3
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS6_SHIFT			0x03
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS45			0x04
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS45_SHIFT			2
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS45_SHIFT			0x02
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS3			0x02
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS3_SHIFT			1
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS3_SHIFT			0x01
 #define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS12			0x01
-#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS12_SHIFT			0
+#define PALMAS_ENABLE2_SMPS_ASSIGN_SMPS12_SHIFT			0x00
 
 /* Bit definitions for ENABLE2_LDO_ASSIGN1 */
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO8				0x80
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO8_SHIFT			7
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO8_SHIFT			0x07
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO7				0x40
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO7_SHIFT			6
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO7_SHIFT			0x06
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO6				0x20
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO6_SHIFT			5
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO6_SHIFT			0x05
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO5				0x10
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO5_SHIFT			4
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO5_SHIFT			0x04
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO4				0x08
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO4_SHIFT			3
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO4_SHIFT			0x03
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO3				0x04
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO3_SHIFT			2
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO3_SHIFT			0x02
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO2				0x02
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO2_SHIFT			1
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO2_SHIFT			0x01
 #define PALMAS_ENABLE2_LDO_ASSIGN1_LDO1				0x01
-#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO1_SHIFT			0
+#define PALMAS_ENABLE2_LDO_ASSIGN1_LDO1_SHIFT			0x00
 
 /* Bit definitions for ENABLE2_LDO_ASSIGN2 */
 #define PALMAS_ENABLE2_LDO_ASSIGN2_LDOUSB			0x04
-#define PALMAS_ENABLE2_LDO_ASSIGN2_LDOUSB_SHIFT			2
+#define PALMAS_ENABLE2_LDO_ASSIGN2_LDOUSB_SHIFT			0x02
 #define PALMAS_ENABLE2_LDO_ASSIGN2_LDOLN			0x02
-#define PALMAS_ENABLE2_LDO_ASSIGN2_LDOLN_SHIFT			1
+#define PALMAS_ENABLE2_LDO_ASSIGN2_LDOLN_SHIFT			0x01
 #define PALMAS_ENABLE2_LDO_ASSIGN2_LDO9				0x01
-#define PALMAS_ENABLE2_LDO_ASSIGN2_LDO9_SHIFT			0
+#define PALMAS_ENABLE2_LDO_ASSIGN2_LDO9_SHIFT			0x00
 
 /* Bit definitions for REGEN3_CTRL */
 #define PALMAS_REGEN3_CTRL_STATUS				0x10
-#define PALMAS_REGEN3_CTRL_STATUS_SHIFT				4
+#define PALMAS_REGEN3_CTRL_STATUS_SHIFT				0x04
 #define PALMAS_REGEN3_CTRL_MODE_SLEEP				0x04
-#define PALMAS_REGEN3_CTRL_MODE_SLEEP_SHIFT			2
+#define PALMAS_REGEN3_CTRL_MODE_SLEEP_SHIFT			0x02
 #define PALMAS_REGEN3_CTRL_MODE_ACTIVE				0x01
-#define PALMAS_REGEN3_CTRL_MODE_ACTIVE_SHIFT			0
+#define PALMAS_REGEN3_CTRL_MODE_ACTIVE_SHIFT			0x00
 
 /* Registers for function PAD_CONTROL */
-#define PALMAS_OD_OUTPUT_CTRL2					0x2
-#define PALMAS_POLARITY_CTRL2					0x3
-#define PALMAS_PU_PD_INPUT_CTRL1				0x4
-#define PALMAS_PU_PD_INPUT_CTRL2				0x5
-#define PALMAS_PU_PD_INPUT_CTRL3				0x6
-#define PALMAS_PU_PD_INPUT_CTRL5				0x7
-#define PALMAS_OD_OUTPUT_CTRL					0x8
-#define PALMAS_POLARITY_CTRL					0x9
-#define PALMAS_PRIMARY_SECONDARY_PAD1				0xA
-#define PALMAS_PRIMARY_SECONDARY_PAD2				0xB
-#define PALMAS_I2C_SPI						0xC
-#define PALMAS_PU_PD_INPUT_CTRL4				0xD
-#define PALMAS_PRIMARY_SECONDARY_PAD3				0xE
-#define PALMAS_PRIMARY_SECONDARY_PAD4				0xF
+#define PALMAS_OD_OUTPUT_CTRL2					0x02
+#define PALMAS_POLARITY_CTRL2					0x03
+#define PALMAS_PU_PD_INPUT_CTRL1				0x04
+#define PALMAS_PU_PD_INPUT_CTRL2				0x05
+#define PALMAS_PU_PD_INPUT_CTRL3				0x06
+#define PALMAS_PU_PD_INPUT_CTRL5				0x07
+#define PALMAS_OD_OUTPUT_CTRL					0x08
+#define PALMAS_POLARITY_CTRL					0x09
+#define PALMAS_PRIMARY_SECONDARY_PAD1				0x0A
+#define PALMAS_PRIMARY_SECONDARY_PAD2				0x0B
+#define PALMAS_I2C_SPI						0x0C
+#define PALMAS_PU_PD_INPUT_CTRL4				0x0D
+#define PALMAS_PRIMARY_SECONDARY_PAD3				0x0E
+#define PALMAS_PRIMARY_SECONDARY_PAD4				0x0F
 
 /* Bit definitions for PU_PD_INPUT_CTRL1 */
 #define PALMAS_PU_PD_INPUT_CTRL1_RESET_IN_PD			0x40
-#define PALMAS_PU_PD_INPUT_CTRL1_RESET_IN_PD_SHIFT		6
+#define PALMAS_PU_PD_INPUT_CTRL1_RESET_IN_PD_SHIFT		0x06
 #define PALMAS_PU_PD_INPUT_CTRL1_GPADC_START_PU			0x20
-#define PALMAS_PU_PD_INPUT_CTRL1_GPADC_START_PU_SHIFT		5
+#define PALMAS_PU_PD_INPUT_CTRL1_GPADC_START_PU_SHIFT		0x05
 #define PALMAS_PU_PD_INPUT_CTRL1_GPADC_START_PD			0x10
-#define PALMAS_PU_PD_INPUT_CTRL1_GPADC_START_PD_SHIFT		4
+#define PALMAS_PU_PD_INPUT_CTRL1_GPADC_START_PD_SHIFT		0x04
 #define PALMAS_PU_PD_INPUT_CTRL1_PWRDOWN_PD			0x04
-#define PALMAS_PU_PD_INPUT_CTRL1_PWRDOWN_PD_SHIFT		2
+#define PALMAS_PU_PD_INPUT_CTRL1_PWRDOWN_PD_SHIFT		0x02
 #define PALMAS_PU_PD_INPUT_CTRL1_NRESWARM_PU			0x02
-#define PALMAS_PU_PD_INPUT_CTRL1_NRESWARM_PU_SHIFT		1
+#define PALMAS_PU_PD_INPUT_CTRL1_NRESWARM_PU_SHIFT		0x01
 
 /* Bit definitions for PU_PD_INPUT_CTRL2 */
 #define PALMAS_PU_PD_INPUT_CTRL2_ENABLE2_PU			0x20
-#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE2_PU_SHIFT		5
+#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE2_PU_SHIFT		0x05
 #define PALMAS_PU_PD_INPUT_CTRL2_ENABLE2_PD			0x10
-#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE2_PD_SHIFT		4
+#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE2_PD_SHIFT		0x04
 #define PALMAS_PU_PD_INPUT_CTRL2_ENABLE1_PU			0x08
-#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE1_PU_SHIFT		3
+#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE1_PU_SHIFT		0x03
 #define PALMAS_PU_PD_INPUT_CTRL2_ENABLE1_PD			0x04
-#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE1_PD_SHIFT		2
+#define PALMAS_PU_PD_INPUT_CTRL2_ENABLE1_PD_SHIFT		0x02
 #define PALMAS_PU_PD_INPUT_CTRL2_NSLEEP_PU			0x02
-#define PALMAS_PU_PD_INPUT_CTRL2_NSLEEP_PU_SHIFT		1
+#define PALMAS_PU_PD_INPUT_CTRL2_NSLEEP_PU_SHIFT		0x01
 #define PALMAS_PU_PD_INPUT_CTRL2_NSLEEP_PD			0x01
-#define PALMAS_PU_PD_INPUT_CTRL2_NSLEEP_PD_SHIFT		0
+#define PALMAS_PU_PD_INPUT_CTRL2_NSLEEP_PD_SHIFT		0x00
 
 /* Bit definitions for PU_PD_INPUT_CTRL3 */
 #define PALMAS_PU_PD_INPUT_CTRL3_ACOK_PD			0x40
-#define PALMAS_PU_PD_INPUT_CTRL3_ACOK_PD_SHIFT			6
+#define PALMAS_PU_PD_INPUT_CTRL3_ACOK_PD_SHIFT			0x06
 #define PALMAS_PU_PD_INPUT_CTRL3_CHRG_DET_N_PD			0x10
-#define PALMAS_PU_PD_INPUT_CTRL3_CHRG_DET_N_PD_SHIFT		4
+#define PALMAS_PU_PD_INPUT_CTRL3_CHRG_DET_N_PD_SHIFT		0x04
 #define PALMAS_PU_PD_INPUT_CTRL3_POWERHOLD_PD			0x04
-#define PALMAS_PU_PD_INPUT_CTRL3_POWERHOLD_PD_SHIFT		2
+#define PALMAS_PU_PD_INPUT_CTRL3_POWERHOLD_PD_SHIFT		0x02
 #define PALMAS_PU_PD_INPUT_CTRL3_MSECURE_PD			0x01
-#define PALMAS_PU_PD_INPUT_CTRL3_MSECURE_PD_SHIFT		0
+#define PALMAS_PU_PD_INPUT_CTRL3_MSECURE_PD_SHIFT		0x00
 
 /* Bit definitions for OD_OUTPUT_CTRL */
 #define PALMAS_OD_OUTPUT_CTRL_PWM_2_OD				0x80
-#define PALMAS_OD_OUTPUT_CTRL_PWM_2_OD_SHIFT			7
+#define PALMAS_OD_OUTPUT_CTRL_PWM_2_OD_SHIFT			0x07
 #define PALMAS_OD_OUTPUT_CTRL_VBUSDET_OD			0x40
-#define PALMAS_OD_OUTPUT_CTRL_VBUSDET_OD_SHIFT			6
+#define PALMAS_OD_OUTPUT_CTRL_VBUSDET_OD_SHIFT			0x06
 #define PALMAS_OD_OUTPUT_CTRL_PWM_1_OD				0x20
-#define PALMAS_OD_OUTPUT_CTRL_PWM_1_OD_SHIFT			5
+#define PALMAS_OD_OUTPUT_CTRL_PWM_1_OD_SHIFT			0x05
 #define PALMAS_OD_OUTPUT_CTRL_INT_OD				0x08
-#define PALMAS_OD_OUTPUT_CTRL_INT_OD_SHIFT			3
+#define PALMAS_OD_OUTPUT_CTRL_INT_OD_SHIFT			0x03
 
 /* Bit definitions for POLARITY_CTRL */
 #define PALMAS_POLARITY_CTRL_INT_POLARITY			0x80
-#define PALMAS_POLARITY_CTRL_INT_POLARITY_SHIFT			7
+#define PALMAS_POLARITY_CTRL_INT_POLARITY_SHIFT			0x07
 #define PALMAS_POLARITY_CTRL_ENABLE2_POLARITY			0x40
-#define PALMAS_POLARITY_CTRL_ENABLE2_POLARITY_SHIFT		6
+#define PALMAS_POLARITY_CTRL_ENABLE2_POLARITY_SHIFT		0x06
 #define PALMAS_POLARITY_CTRL_ENABLE1_POLARITY			0x20
-#define PALMAS_POLARITY_CTRL_ENABLE1_POLARITY_SHIFT		5
+#define PALMAS_POLARITY_CTRL_ENABLE1_POLARITY_SHIFT		0x05
 #define PALMAS_POLARITY_CTRL_NSLEEP_POLARITY			0x10
-#define PALMAS_POLARITY_CTRL_NSLEEP_POLARITY_SHIFT		4
+#define PALMAS_POLARITY_CTRL_NSLEEP_POLARITY_SHIFT		0x04
 #define PALMAS_POLARITY_CTRL_RESET_IN_POLARITY			0x08
-#define PALMAS_POLARITY_CTRL_RESET_IN_POLARITY_SHIFT		3
+#define PALMAS_POLARITY_CTRL_RESET_IN_POLARITY_SHIFT		0x03
 #define PALMAS_POLARITY_CTRL_GPIO_3_CHRG_DET_N_POLARITY		0x04
-#define PALMAS_POLARITY_CTRL_GPIO_3_CHRG_DET_N_POLARITY_SHIFT	2
+#define PALMAS_POLARITY_CTRL_GPIO_3_CHRG_DET_N_POLARITY_SHIFT	0x02
 #define PALMAS_POLARITY_CTRL_POWERGOOD_USB_PSEL_POLARITY	0x02
-#define PALMAS_POLARITY_CTRL_POWERGOOD_USB_PSEL_POLARITY_SHIFT	1
+#define PALMAS_POLARITY_CTRL_POWERGOOD_USB_PSEL_POLARITY_SHIFT	0x01
 #define PALMAS_POLARITY_CTRL_PWRDOWN_POLARITY			0x01
-#define PALMAS_POLARITY_CTRL_PWRDOWN_POLARITY_SHIFT		0
+#define PALMAS_POLARITY_CTRL_PWRDOWN_POLARITY_SHIFT		0x00
 
 /* Bit definitions for PRIMARY_SECONDARY_PAD1 */
 #define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_3			0x80
-#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_3_SHIFT		7
+#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_3_SHIFT		0x07
 #define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_2_MASK		0x60
-#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_2_SHIFT		5
+#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_2_SHIFT		0x05
 #define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_1_MASK		0x18
-#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_1_SHIFT		3
+#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_1_SHIFT		0x03
 #define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_0			0x04
-#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_0_SHIFT		2
+#define PALMAS_PRIMARY_SECONDARY_PAD1_GPIO_0_SHIFT		0x02
 #define PALMAS_PRIMARY_SECONDARY_PAD1_VAC			0x02
-#define PALMAS_PRIMARY_SECONDARY_PAD1_VAC_SHIFT			1
+#define PALMAS_PRIMARY_SECONDARY_PAD1_VAC_SHIFT			0x01
 #define PALMAS_PRIMARY_SECONDARY_PAD1_POWERGOOD			0x01
-#define PALMAS_PRIMARY_SECONDARY_PAD1_POWERGOOD_SHIFT		0
+#define PALMAS_PRIMARY_SECONDARY_PAD1_POWERGOOD_SHIFT		0x00
 
 /* Bit definitions for PRIMARY_SECONDARY_PAD2 */
 #define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK		0x30
-#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_SHIFT		4
+#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_SHIFT		0x04
 #define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_6			0x08
-#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_6_SHIFT		3
+#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_6_SHIFT		0x03
 #define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK		0x06
-#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_5_SHIFT		1
+#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_5_SHIFT		0x01
 #define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_4			0x01
-#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_4_SHIFT		0
+#define PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_4_SHIFT		0x00
 
 /* Bit definitions for I2C_SPI */
 #define PALMAS_I2C_SPI_I2C2OTP_EN				0x80
-#define PALMAS_I2C_SPI_I2C2OTP_EN_SHIFT				7
+#define PALMAS_I2C_SPI_I2C2OTP_EN_SHIFT				0x07
 #define PALMAS_I2C_SPI_I2C2OTP_PAGESEL				0x40
-#define PALMAS_I2C_SPI_I2C2OTP_PAGESEL_SHIFT			6
+#define PALMAS_I2C_SPI_I2C2OTP_PAGESEL_SHIFT			0x06
 #define PALMAS_I2C_SPI_ID_I2C2					0x20
-#define PALMAS_I2C_SPI_ID_I2C2_SHIFT				5
+#define PALMAS_I2C_SPI_ID_I2C2_SHIFT				0x05
 #define PALMAS_I2C_SPI_I2C_SPI					0x10
-#define PALMAS_I2C_SPI_I2C_SPI_SHIFT				4
-#define PALMAS_I2C_SPI_ID_I2C1_MASK				0x0f
-#define PALMAS_I2C_SPI_ID_I2C1_SHIFT				0
+#define PALMAS_I2C_SPI_I2C_SPI_SHIFT				0x04
+#define PALMAS_I2C_SPI_ID_I2C1_MASK				0x0F
+#define PALMAS_I2C_SPI_ID_I2C1_SHIFT				0x00
 
 /* Bit definitions for PU_PD_INPUT_CTRL4 */
 #define PALMAS_PU_PD_INPUT_CTRL4_DVFS2_DAT_PD			0x40
-#define PALMAS_PU_PD_INPUT_CTRL4_DVFS2_DAT_PD_SHIFT		6
+#define PALMAS_PU_PD_INPUT_CTRL4_DVFS2_DAT_PD_SHIFT		0x06
 #define PALMAS_PU_PD_INPUT_CTRL4_DVFS2_CLK_PD			0x10
-#define PALMAS_PU_PD_INPUT_CTRL4_DVFS2_CLK_PD_SHIFT		4
+#define PALMAS_PU_PD_INPUT_CTRL4_DVFS2_CLK_PD_SHIFT		0x04
 #define PALMAS_PU_PD_INPUT_CTRL4_DVFS1_DAT_PD			0x04
-#define PALMAS_PU_PD_INPUT_CTRL4_DVFS1_DAT_PD_SHIFT		2
+#define PALMAS_PU_PD_INPUT_CTRL4_DVFS1_DAT_PD_SHIFT		0x02
 #define PALMAS_PU_PD_INPUT_CTRL4_DVFS1_CLK_PD			0x01
-#define PALMAS_PU_PD_INPUT_CTRL4_DVFS1_CLK_PD_SHIFT		0
+#define PALMAS_PU_PD_INPUT_CTRL4_DVFS1_CLK_PD_SHIFT		0x00
 
 /* Bit definitions for PRIMARY_SECONDARY_PAD3 */
 #define PALMAS_PRIMARY_SECONDARY_PAD3_DVFS2			0x02
-#define PALMAS_PRIMARY_SECONDARY_PAD3_DVFS2_SHIFT		1
+#define PALMAS_PRIMARY_SECONDARY_PAD3_DVFS2_SHIFT		0x01
 #define PALMAS_PRIMARY_SECONDARY_PAD3_DVFS1			0x01
-#define PALMAS_PRIMARY_SECONDARY_PAD3_DVFS1_SHIFT		0
+#define PALMAS_PRIMARY_SECONDARY_PAD3_DVFS1_SHIFT		0x00
 
 /* Registers for function LED_PWM */
-#define PALMAS_LED_PERIOD_CTRL					0x0
-#define PALMAS_LED_CTRL						0x1
-#define PALMAS_PWM_CTRL1					0x2
-#define PALMAS_PWM_CTRL2					0x3
+#define PALMAS_LED_PERIOD_CTRL					0x00
+#define PALMAS_LED_CTRL						0x01
+#define PALMAS_PWM_CTRL1					0x02
+#define PALMAS_PWM_CTRL2					0x03
 
 /* Bit definitions for LED_PERIOD_CTRL */
 #define PALMAS_LED_PERIOD_CTRL_LED_2_PERIOD_MASK		0x38
-#define PALMAS_LED_PERIOD_CTRL_LED_2_PERIOD_SHIFT		3
+#define PALMAS_LED_PERIOD_CTRL_LED_2_PERIOD_SHIFT		0x03
 #define PALMAS_LED_PERIOD_CTRL_LED_1_PERIOD_MASK		0x07
-#define PALMAS_LED_PERIOD_CTRL_LED_1_PERIOD_SHIFT		0
+#define PALMAS_LED_PERIOD_CTRL_LED_1_PERIOD_SHIFT		0x00
 
 /* Bit definitions for LED_CTRL */
 #define PALMAS_LED_CTRL_LED_2_SEQ				0x20
-#define PALMAS_LED_CTRL_LED_2_SEQ_SHIFT				5
+#define PALMAS_LED_CTRL_LED_2_SEQ_SHIFT				0x05
 #define PALMAS_LED_CTRL_LED_1_SEQ				0x10
-#define PALMAS_LED_CTRL_LED_1_SEQ_SHIFT				4
+#define PALMAS_LED_CTRL_LED_1_SEQ_SHIFT				0x04
 #define PALMAS_LED_CTRL_LED_2_ON_TIME_MASK			0x0c
-#define PALMAS_LED_CTRL_LED_2_ON_TIME_SHIFT			2
+#define PALMAS_LED_CTRL_LED_2_ON_TIME_SHIFT			0x02
 #define PALMAS_LED_CTRL_LED_1_ON_TIME_MASK			0x03
-#define PALMAS_LED_CTRL_LED_1_ON_TIME_SHIFT			0
+#define PALMAS_LED_CTRL_LED_1_ON_TIME_SHIFT			0x00
 
 /* Bit definitions for PWM_CTRL1 */
 #define PALMAS_PWM_CTRL1_PWM_FREQ_EN				0x02
-#define PALMAS_PWM_CTRL1_PWM_FREQ_EN_SHIFT			1
+#define PALMAS_PWM_CTRL1_PWM_FREQ_EN_SHIFT			0x01
 #define PALMAS_PWM_CTRL1_PWM_FREQ_SEL				0x01
-#define PALMAS_PWM_CTRL1_PWM_FREQ_SEL_SHIFT			0
+#define PALMAS_PWM_CTRL1_PWM_FREQ_SEL_SHIFT			0x00
 
 /* Bit definitions for PWM_CTRL2 */
-#define PALMAS_PWM_CTRL2_PWM_DUTY_SEL_MASK			0xff
-#define PALMAS_PWM_CTRL2_PWM_DUTY_SEL_SHIFT			0
+#define PALMAS_PWM_CTRL2_PWM_DUTY_SEL_MASK			0xFF
+#define PALMAS_PWM_CTRL2_PWM_DUTY_SEL_SHIFT			0x00
 
 /* Registers for function INTERRUPT */
-#define PALMAS_INT1_STATUS					0x0
-#define PALMAS_INT1_MASK					0x1
-#define PALMAS_INT1_LINE_STATE					0x2
-#define PALMAS_INT1_EDGE_DETECT1_RESERVED			0x3
-#define PALMAS_INT1_EDGE_DETECT2_RESERVED			0x4
-#define PALMAS_INT2_STATUS					0x5
-#define PALMAS_INT2_MASK					0x6
-#define PALMAS_INT2_LINE_STATE					0x7
-#define PALMAS_INT2_EDGE_DETECT1_RESERVED			0x8
-#define PALMAS_INT2_EDGE_DETECT2_RESERVED			0x9
-#define PALMAS_INT3_STATUS					0xA
-#define PALMAS_INT3_MASK					0xB
-#define PALMAS_INT3_LINE_STATE					0xC
-#define PALMAS_INT3_EDGE_DETECT1_RESERVED			0xD
-#define PALMAS_INT3_EDGE_DETECT2_RESERVED			0xE
-#define PALMAS_INT4_STATUS					0xF
+#define PALMAS_INT1_STATUS					0x00
+#define PALMAS_INT1_MASK					0x01
+#define PALMAS_INT1_LINE_STATE					0x02
+#define PALMAS_INT1_EDGE_DETECT1_RESERVED			0x03
+#define PALMAS_INT1_EDGE_DETECT2_RESERVED			0x04
+#define PALMAS_INT2_STATUS					0x05
+#define PALMAS_INT2_MASK					0x06
+#define PALMAS_INT2_LINE_STATE					0x07
+#define PALMAS_INT2_EDGE_DETECT1_RESERVED			0x08
+#define PALMAS_INT2_EDGE_DETECT2_RESERVED			0x09
+#define PALMAS_INT3_STATUS					0x0A
+#define PALMAS_INT3_MASK					0x0B
+#define PALMAS_INT3_LINE_STATE					0x0C
+#define PALMAS_INT3_EDGE_DETECT1_RESERVED			0x0D
+#define PALMAS_INT3_EDGE_DETECT2_RESERVED			0x0E
+#define PALMAS_INT4_STATUS					0x0F
 #define PALMAS_INT4_MASK					0x10
 #define PALMAS_INT4_LINE_STATE					0x11
 #define PALMAS_INT4_EDGE_DETECT1				0x12
@@ -1966,276 +1966,276 @@ enum usb_irq_events {
 
 /* Bit definitions for INT1_STATUS */
 #define PALMAS_INT1_STATUS_VBAT_MON				0x80
-#define PALMAS_INT1_STATUS_VBAT_MON_SHIFT			7
+#define PALMAS_INT1_STATUS_VBAT_MON_SHIFT			0x07
 #define PALMAS_INT1_STATUS_VSYS_MON				0x40
-#define PALMAS_INT1_STATUS_VSYS_MON_SHIFT			6
+#define PALMAS_INT1_STATUS_VSYS_MON_SHIFT			0x06
 #define PALMAS_INT1_STATUS_HOTDIE				0x20
-#define PALMAS_INT1_STATUS_HOTDIE_SHIFT				5
+#define PALMAS_INT1_STATUS_HOTDIE_SHIFT				0x05
 #define PALMAS_INT1_STATUS_PWRDOWN				0x10
-#define PALMAS_INT1_STATUS_PWRDOWN_SHIFT			4
+#define PALMAS_INT1_STATUS_PWRDOWN_SHIFT			0x04
 #define PALMAS_INT1_STATUS_RPWRON				0x08
-#define PALMAS_INT1_STATUS_RPWRON_SHIFT				3
+#define PALMAS_INT1_STATUS_RPWRON_SHIFT				0x03
 #define PALMAS_INT1_STATUS_LONG_PRESS_KEY			0x04
-#define PALMAS_INT1_STATUS_LONG_PRESS_KEY_SHIFT			2
+#define PALMAS_INT1_STATUS_LONG_PRESS_KEY_SHIFT			0x02
 #define PALMAS_INT1_STATUS_PWRON				0x02
-#define PALMAS_INT1_STATUS_PWRON_SHIFT				1
+#define PALMAS_INT1_STATUS_PWRON_SHIFT				0x01
 #define PALMAS_INT1_STATUS_CHARG_DET_N_VBUS_OVV			0x01
-#define PALMAS_INT1_STATUS_CHARG_DET_N_VBUS_OVV_SHIFT		0
+#define PALMAS_INT1_STATUS_CHARG_DET_N_VBUS_OVV_SHIFT		0x00
 
 /* Bit definitions for INT1_MASK */
 #define PALMAS_INT1_MASK_VBAT_MON				0x80
-#define PALMAS_INT1_MASK_VBAT_MON_SHIFT				7
+#define PALMAS_INT1_MASK_VBAT_MON_SHIFT				0x07
 #define PALMAS_INT1_MASK_VSYS_MON				0x40
-#define PALMAS_INT1_MASK_VSYS_MON_SHIFT				6
+#define PALMAS_INT1_MASK_VSYS_MON_SHIFT				0x06
 #define PALMAS_INT1_MASK_HOTDIE					0x20
-#define PALMAS_INT1_MASK_HOTDIE_SHIFT				5
+#define PALMAS_INT1_MASK_HOTDIE_SHIFT				0x05
 #define PALMAS_INT1_MASK_PWRDOWN				0x10
-#define PALMAS_INT1_MASK_PWRDOWN_SHIFT				4
+#define PALMAS_INT1_MASK_PWRDOWN_SHIFT				0x04
 #define PALMAS_INT1_MASK_RPWRON					0x08
-#define PALMAS_INT1_MASK_RPWRON_SHIFT				3
+#define PALMAS_INT1_MASK_RPWRON_SHIFT				0x03
 #define PALMAS_INT1_MASK_LONG_PRESS_KEY				0x04
-#define PALMAS_INT1_MASK_LONG_PRESS_KEY_SHIFT			2
+#define PALMAS_INT1_MASK_LONG_PRESS_KEY_SHIFT			0x02
 #define PALMAS_INT1_MASK_PWRON					0x02
-#define PALMAS_INT1_MASK_PWRON_SHIFT				1
+#define PALMAS_INT1_MASK_PWRON_SHIFT				0x01
 #define PALMAS_INT1_MASK_CHARG_DET_N_VBUS_OVV			0x01
-#define PALMAS_INT1_MASK_CHARG_DET_N_VBUS_OVV_SHIFT		0
+#define PALMAS_INT1_MASK_CHARG_DET_N_VBUS_OVV_SHIFT		0x00
 
 /* Bit definitions for INT1_LINE_STATE */
 #define PALMAS_INT1_LINE_STATE_VBAT_MON				0x80
-#define PALMAS_INT1_LINE_STATE_VBAT_MON_SHIFT			7
+#define PALMAS_INT1_LINE_STATE_VBAT_MON_SHIFT			0x07
 #define PALMAS_INT1_LINE_STATE_VSYS_MON				0x40
-#define PALMAS_INT1_LINE_STATE_VSYS_MON_SHIFT			6
+#define PALMAS_INT1_LINE_STATE_VSYS_MON_SHIFT			0x06
 #define PALMAS_INT1_LINE_STATE_HOTDIE				0x20
-#define PALMAS_INT1_LINE_STATE_HOTDIE_SHIFT			5
+#define PALMAS_INT1_LINE_STATE_HOTDIE_SHIFT			0x05
 #define PALMAS_INT1_LINE_STATE_PWRDOWN				0x10
-#define PALMAS_INT1_LINE_STATE_PWRDOWN_SHIFT			4
+#define PALMAS_INT1_LINE_STATE_PWRDOWN_SHIFT			0x04
 #define PALMAS_INT1_LINE_STATE_RPWRON				0x08
-#define PALMAS_INT1_LINE_STATE_RPWRON_SHIFT			3
+#define PALMAS_INT1_LINE_STATE_RPWRON_SHIFT			0x03
 #define PALMAS_INT1_LINE_STATE_LONG_PRESS_KEY			0x04
-#define PALMAS_INT1_LINE_STATE_LONG_PRESS_KEY_SHIFT		2
+#define PALMAS_INT1_LINE_STATE_LONG_PRESS_KEY_SHIFT		0x02
 #define PALMAS_INT1_LINE_STATE_PWRON				0x02
-#define PALMAS_INT1_LINE_STATE_PWRON_SHIFT			1
+#define PALMAS_INT1_LINE_STATE_PWRON_SHIFT			0x01
 #define PALMAS_INT1_LINE_STATE_CHARG_DET_N_VBUS_OVV		0x01
-#define PALMAS_INT1_LINE_STATE_CHARG_DET_N_VBUS_OVV_SHIFT	0
+#define PALMAS_INT1_LINE_STATE_CHARG_DET_N_VBUS_OVV_SHIFT	0x00
 
 /* Bit definitions for INT2_STATUS */
 #define PALMAS_INT2_STATUS_VAC_ACOK				0x80
-#define PALMAS_INT2_STATUS_VAC_ACOK_SHIFT			7
+#define PALMAS_INT2_STATUS_VAC_ACOK_SHIFT			0x07
 #define PALMAS_INT2_STATUS_SHORT				0x40
-#define PALMAS_INT2_STATUS_SHORT_SHIFT				6
+#define PALMAS_INT2_STATUS_SHORT_SHIFT				0x06
 #define PALMAS_INT2_STATUS_FBI_BB				0x20
-#define PALMAS_INT2_STATUS_FBI_BB_SHIFT				5
+#define PALMAS_INT2_STATUS_FBI_BB_SHIFT				0x05
 #define PALMAS_INT2_STATUS_RESET_IN				0x10
-#define PALMAS_INT2_STATUS_RESET_IN_SHIFT			4
+#define PALMAS_INT2_STATUS_RESET_IN_SHIFT			0x04
 #define PALMAS_INT2_STATUS_BATREMOVAL				0x08
-#define PALMAS_INT2_STATUS_BATREMOVAL_SHIFT			3
+#define PALMAS_INT2_STATUS_BATREMOVAL_SHIFT			0x03
 #define PALMAS_INT2_STATUS_WDT					0x04
-#define PALMAS_INT2_STATUS_WDT_SHIFT				2
+#define PALMAS_INT2_STATUS_WDT_SHIFT				0x02
 #define PALMAS_INT2_STATUS_RTC_TIMER				0x02
-#define PALMAS_INT2_STATUS_RTC_TIMER_SHIFT			1
+#define PALMAS_INT2_STATUS_RTC_TIMER_SHIFT			0x01
 #define PALMAS_INT2_STATUS_RTC_ALARM				0x01
-#define PALMAS_INT2_STATUS_RTC_ALARM_SHIFT			0
+#define PALMAS_INT2_STATUS_RTC_ALARM_SHIFT			0x00
 
 /* Bit definitions for INT2_MASK */
 #define PALMAS_INT2_MASK_VAC_ACOK				0x80
-#define PALMAS_INT2_MASK_VAC_ACOK_SHIFT				7
+#define PALMAS_INT2_MASK_VAC_ACOK_SHIFT				0x07
 #define PALMAS_INT2_MASK_SHORT					0x40
-#define PALMAS_INT2_MASK_SHORT_SHIFT				6
+#define PALMAS_INT2_MASK_SHORT_SHIFT				0x06
 #define PALMAS_INT2_MASK_FBI_BB					0x20
-#define PALMAS_INT2_MASK_FBI_BB_SHIFT				5
+#define PALMAS_INT2_MASK_FBI_BB_SHIFT				0x05
 #define PALMAS_INT2_MASK_RESET_IN				0x10
-#define PALMAS_INT2_MASK_RESET_IN_SHIFT				4
+#define PALMAS_INT2_MASK_RESET_IN_SHIFT				0x04
 #define PALMAS_INT2_MASK_BATREMOVAL				0x08
-#define PALMAS_INT2_MASK_BATREMOVAL_SHIFT			3
+#define PALMAS_INT2_MASK_BATREMOVAL_SHIFT			0x03
 #define PALMAS_INT2_MASK_WDT					0x04
-#define PALMAS_INT2_MASK_WDT_SHIFT				2
+#define PALMAS_INT2_MASK_WDT_SHIFT				0x02
 #define PALMAS_INT2_MASK_RTC_TIMER				0x02
-#define PALMAS_INT2_MASK_RTC_TIMER_SHIFT			1
+#define PALMAS_INT2_MASK_RTC_TIMER_SHIFT			0x01
 #define PALMAS_INT2_MASK_RTC_ALARM				0x01
-#define PALMAS_INT2_MASK_RTC_ALARM_SHIFT			0
+#define PALMAS_INT2_MASK_RTC_ALARM_SHIFT			0x00
 
 /* Bit definitions for INT2_LINE_STATE */
 #define PALMAS_INT2_LINE_STATE_VAC_ACOK				0x80
-#define PALMAS_INT2_LINE_STATE_VAC_ACOK_SHIFT			7
+#define PALMAS_INT2_LINE_STATE_VAC_ACOK_SHIFT			0x07
 #define PALMAS_INT2_LINE_STATE_SHORT				0x40
-#define PALMAS_INT2_LINE_STATE_SHORT_SHIFT			6
+#define PALMAS_INT2_LINE_STATE_SHORT_SHIFT			0x06
 #define PALMAS_INT2_LINE_STATE_FBI_BB				0x20
-#define PALMAS_INT2_LINE_STATE_FBI_BB_SHIFT			5
+#define PALMAS_INT2_LINE_STATE_FBI_BB_SHIFT			0x05
 #define PALMAS_INT2_LINE_STATE_RESET_IN				0x10
-#define PALMAS_INT2_LINE_STATE_RESET_IN_SHIFT			4
+#define PALMAS_INT2_LINE_STATE_RESET_IN_SHIFT			0x04
 #define PALMAS_INT2_LINE_STATE_BATREMOVAL			0x08
-#define PALMAS_INT2_LINE_STATE_BATREMOVAL_SHIFT			3
+#define PALMAS_INT2_LINE_STATE_BATREMOVAL_SHIFT			0x03
 #define PALMAS_INT2_LINE_STATE_WDT				0x04
-#define PALMAS_INT2_LINE_STATE_WDT_SHIFT			2
+#define PALMAS_INT2_LINE_STATE_WDT_SHIFT			0x02
 #define PALMAS_INT2_LINE_STATE_RTC_TIMER			0x02
-#define PALMAS_INT2_LINE_STATE_RTC_TIMER_SHIFT			1
+#define PALMAS_INT2_LINE_STATE_RTC_TIMER_SHIFT			0x01
 #define PALMAS_INT2_LINE_STATE_RTC_ALARM			0x01
-#define PALMAS_INT2_LINE_STATE_RTC_ALARM_SHIFT			0
+#define PALMAS_INT2_LINE_STATE_RTC_ALARM_SHIFT			0x00
 
 /* Bit definitions for INT3_STATUS */
 #define PALMAS_INT3_STATUS_VBUS					0x80
-#define PALMAS_INT3_STATUS_VBUS_SHIFT				7
+#define PALMAS_INT3_STATUS_VBUS_SHIFT				0x07
 #define PALMAS_INT3_STATUS_VBUS_OTG				0x40
-#define PALMAS_INT3_STATUS_VBUS_OTG_SHIFT			6
+#define PALMAS_INT3_STATUS_VBUS_OTG_SHIFT			0x06
 #define PALMAS_INT3_STATUS_ID					0x20
-#define PALMAS_INT3_STATUS_ID_SHIFT				5
+#define PALMAS_INT3_STATUS_ID_SHIFT				0x05
 #define PALMAS_INT3_STATUS_ID_OTG				0x10
-#define PALMAS_INT3_STATUS_ID_OTG_SHIFT				4
+#define PALMAS_INT3_STATUS_ID_OTG_SHIFT				0x04
 #define PALMAS_INT3_STATUS_GPADC_EOC_RT				0x08
-#define PALMAS_INT3_STATUS_GPADC_EOC_RT_SHIFT			3
+#define PALMAS_INT3_STATUS_GPADC_EOC_RT_SHIFT			0x03
 #define PALMAS_INT3_STATUS_GPADC_EOC_SW				0x04
-#define PALMAS_INT3_STATUS_GPADC_EOC_SW_SHIFT			2
+#define PALMAS_INT3_STATUS_GPADC_EOC_SW_SHIFT			0x02
 #define PALMAS_INT3_STATUS_GPADC_AUTO_1				0x02
-#define PALMAS_INT3_STATUS_GPADC_AUTO_1_SHIFT			1
+#define PALMAS_INT3_STATUS_GPADC_AUTO_1_SHIFT			0x01
 #define PALMAS_INT3_STATUS_GPADC_AUTO_0				0x01
-#define PALMAS_INT3_STATUS_GPADC_AUTO_0_SHIFT			0
+#define PALMAS_INT3_STATUS_GPADC_AUTO_0_SHIFT			0x00
 
 /* Bit definitions for INT3_MASK */
 #define PALMAS_INT3_MASK_VBUS					0x80
-#define PALMAS_INT3_MASK_VBUS_SHIFT				7
+#define PALMAS_INT3_MASK_VBUS_SHIFT				0x07
 #define PALMAS_INT3_MASK_VBUS_OTG				0x40
-#define PALMAS_INT3_MASK_VBUS_OTG_SHIFT				6
+#define PALMAS_INT3_MASK_VBUS_OTG_SHIFT				0x06
 #define PALMAS_INT3_MASK_ID					0x20
-#define PALMAS_INT3_MASK_ID_SHIFT				5
+#define PALMAS_INT3_MASK_ID_SHIFT				0x05
 #define PALMAS_INT3_MASK_ID_OTG					0x10
-#define PALMAS_INT3_MASK_ID_OTG_SHIFT				4
+#define PALMAS_INT3_MASK_ID_OTG_SHIFT				0x04
 #define PALMAS_INT3_MASK_GPADC_EOC_RT				0x08
-#define PALMAS_INT3_MASK_GPADC_EOC_RT_SHIFT			3
+#define PALMAS_INT3_MASK_GPADC_EOC_RT_SHIFT			0x03
 #define PALMAS_INT3_MASK_GPADC_EOC_SW				0x04
-#define PALMAS_INT3_MASK_GPADC_EOC_SW_SHIFT			2
+#define PALMAS_INT3_MASK_GPADC_EOC_SW_SHIFT			0x02
 #define PALMAS_INT3_MASK_GPADC_AUTO_1				0x02
-#define PALMAS_INT3_MASK_GPADC_AUTO_1_SHIFT			1
+#define PALMAS_INT3_MASK_GPADC_AUTO_1_SHIFT			0x01
 #define PALMAS_INT3_MASK_GPADC_AUTO_0				0x01
-#define PALMAS_INT3_MASK_GPADC_AUTO_0_SHIFT			0
+#define PALMAS_INT3_MASK_GPADC_AUTO_0_SHIFT			0x00
 
 /* Bit definitions for INT3_LINE_STATE */
 #define PALMAS_INT3_LINE_STATE_VBUS				0x80
-#define PALMAS_INT3_LINE_STATE_VBUS_SHIFT			7
+#define PALMAS_INT3_LINE_STATE_VBUS_SHIFT			0x07
 #define PALMAS_INT3_LINE_STATE_VBUS_OTG				0x40
-#define PALMAS_INT3_LINE_STATE_VBUS_OTG_SHIFT			6
+#define PALMAS_INT3_LINE_STATE_VBUS_OTG_SHIFT			0x06
 #define PALMAS_INT3_LINE_STATE_ID				0x20
-#define PALMAS_INT3_LINE_STATE_ID_SHIFT				5
+#define PALMAS_INT3_LINE_STATE_ID_SHIFT				0x05
 #define PALMAS_INT3_LINE_STATE_ID_OTG				0x10
-#define PALMAS_INT3_LINE_STATE_ID_OTG_SHIFT			4
+#define PALMAS_INT3_LINE_STATE_ID_OTG_SHIFT			0x04
 #define PALMAS_INT3_LINE_STATE_GPADC_EOC_RT			0x08
-#define PALMAS_INT3_LINE_STATE_GPADC_EOC_RT_SHIFT		3
+#define PALMAS_INT3_LINE_STATE_GPADC_EOC_RT_SHIFT		0x03
 #define PALMAS_INT3_LINE_STATE_GPADC_EOC_SW			0x04
-#define PALMAS_INT3_LINE_STATE_GPADC_EOC_SW_SHIFT		2
+#define PALMAS_INT3_LINE_STATE_GPADC_EOC_SW_SHIFT		0x02
 #define PALMAS_INT3_LINE_STATE_GPADC_AUTO_1			0x02
-#define PALMAS_INT3_LINE_STATE_GPADC_AUTO_1_SHIFT		1
+#define PALMAS_INT3_LINE_STATE_GPADC_AUTO_1_SHIFT		0x01
 #define PALMAS_INT3_LINE_STATE_GPADC_AUTO_0			0x01
-#define PALMAS_INT3_LINE_STATE_GPADC_AUTO_0_SHIFT		0
+#define PALMAS_INT3_LINE_STATE_GPADC_AUTO_0_SHIFT		0x00
 
 /* Bit definitions for INT4_STATUS */
 #define PALMAS_INT4_STATUS_GPIO_7				0x80
-#define PALMAS_INT4_STATUS_GPIO_7_SHIFT				7
+#define PALMAS_INT4_STATUS_GPIO_7_SHIFT				0x07
 #define PALMAS_INT4_STATUS_GPIO_6				0x40
-#define PALMAS_INT4_STATUS_GPIO_6_SHIFT				6
+#define PALMAS_INT4_STATUS_GPIO_6_SHIFT				0x06
 #define PALMAS_INT4_STATUS_GPIO_5				0x20
-#define PALMAS_INT4_STATUS_GPIO_5_SHIFT				5
+#define PALMAS_INT4_STATUS_GPIO_5_SHIFT				0x05
 #define PALMAS_INT4_STATUS_GPIO_4				0x10
-#define PALMAS_INT4_STATUS_GPIO_4_SHIFT				4
+#define PALMAS_INT4_STATUS_GPIO_4_SHIFT				0x04
 #define PALMAS_INT4_STATUS_GPIO_3				0x08
-#define PALMAS_INT4_STATUS_GPIO_3_SHIFT				3
+#define PALMAS_INT4_STATUS_GPIO_3_SHIFT				0x03
 #define PALMAS_INT4_STATUS_GPIO_2				0x04
-#define PALMAS_INT4_STATUS_GPIO_2_SHIFT				2
+#define PALMAS_INT4_STATUS_GPIO_2_SHIFT				0x02
 #define PALMAS_INT4_STATUS_GPIO_1				0x02
-#define PALMAS_INT4_STATUS_GPIO_1_SHIFT				1
+#define PALMAS_INT4_STATUS_GPIO_1_SHIFT				0x01
 #define PALMAS_INT4_STATUS_GPIO_0				0x01
-#define PALMAS_INT4_STATUS_GPIO_0_SHIFT				0
+#define PALMAS_INT4_STATUS_GPIO_0_SHIFT				0x00
 
 /* Bit definitions for INT4_MASK */
 #define PALMAS_INT4_MASK_GPIO_7					0x80
-#define PALMAS_INT4_MASK_GPIO_7_SHIFT				7
+#define PALMAS_INT4_MASK_GPIO_7_SHIFT				0x07
 #define PALMAS_INT4_MASK_GPIO_6					0x40
-#define PALMAS_INT4_MASK_GPIO_6_SHIFT				6
+#define PALMAS_INT4_MASK_GPIO_6_SHIFT				0x06
 #define PALMAS_INT4_MASK_GPIO_5					0x20
-#define PALMAS_INT4_MASK_GPIO_5_SHIFT				5
+#define PALMAS_INT4_MASK_GPIO_5_SHIFT				0x05
 #define PALMAS_INT4_MASK_GPIO_4					0x10
-#define PALMAS_INT4_MASK_GPIO_4_SHIFT				4
+#define PALMAS_INT4_MASK_GPIO_4_SHIFT				0x04
 #define PALMAS_INT4_MASK_GPIO_3					0x08
-#define PALMAS_INT4_MASK_GPIO_3_SHIFT				3
+#define PALMAS_INT4_MASK_GPIO_3_SHIFT				0x03
 #define PALMAS_INT4_MASK_GPIO_2					0x04
-#define PALMAS_INT4_MASK_GPIO_2_SHIFT				2
+#define PALMAS_INT4_MASK_GPIO_2_SHIFT				0x02
 #define PALMAS_INT4_MASK_GPIO_1					0x02
-#define PALMAS_INT4_MASK_GPIO_1_SHIFT				1
+#define PALMAS_INT4_MASK_GPIO_1_SHIFT				0x01
 #define PALMAS_INT4_MASK_GPIO_0					0x01
-#define PALMAS_INT4_MASK_GPIO_0_SHIFT				0
+#define PALMAS_INT4_MASK_GPIO_0_SHIFT				0x00
 
 /* Bit definitions for INT4_LINE_STATE */
 #define PALMAS_INT4_LINE_STATE_GPIO_7				0x80
-#define PALMAS_INT4_LINE_STATE_GPIO_7_SHIFT			7
+#define PALMAS_INT4_LINE_STATE_GPIO_7_SHIFT			0x07
 #define PALMAS_INT4_LINE_STATE_GPIO_6				0x40
-#define PALMAS_INT4_LINE_STATE_GPIO_6_SHIFT			6
+#define PALMAS_INT4_LINE_STATE_GPIO_6_SHIFT			0x06
 #define PALMAS_INT4_LINE_STATE_GPIO_5				0x20
-#define PALMAS_INT4_LINE_STATE_GPIO_5_SHIFT			5
+#define PALMAS_INT4_LINE_STATE_GPIO_5_SHIFT			0x05
 #define PALMAS_INT4_LINE_STATE_GPIO_4				0x10
-#define PALMAS_INT4_LINE_STATE_GPIO_4_SHIFT			4
+#define PALMAS_INT4_LINE_STATE_GPIO_4_SHIFT			0x04
 #define PALMAS_INT4_LINE_STATE_GPIO_3				0x08
-#define PALMAS_INT4_LINE_STATE_GPIO_3_SHIFT			3
+#define PALMAS_INT4_LINE_STATE_GPIO_3_SHIFT			0x03
 #define PALMAS_INT4_LINE_STATE_GPIO_2				0x04
-#define PALMAS_INT4_LINE_STATE_GPIO_2_SHIFT			2
+#define PALMAS_INT4_LINE_STATE_GPIO_2_SHIFT			0x02
 #define PALMAS_INT4_LINE_STATE_GPIO_1				0x02
-#define PALMAS_INT4_LINE_STATE_GPIO_1_SHIFT			1
+#define PALMAS_INT4_LINE_STATE_GPIO_1_SHIFT			0x01
 #define PALMAS_INT4_LINE_STATE_GPIO_0				0x01
-#define PALMAS_INT4_LINE_STATE_GPIO_0_SHIFT			0
+#define PALMAS_INT4_LINE_STATE_GPIO_0_SHIFT			0x00
 
 /* Bit definitions for INT4_EDGE_DETECT1 */
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_3_RISING			0x80
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_3_RISING_SHIFT		7
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_3_RISING_SHIFT		0x07
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_3_FALLING			0x40
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_3_FALLING_SHIFT		6
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_3_FALLING_SHIFT		0x06
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_2_RISING			0x20
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_2_RISING_SHIFT		5
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_2_RISING_SHIFT		0x05
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_2_FALLING			0x10
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_2_FALLING_SHIFT		4
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_2_FALLING_SHIFT		0x04
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_1_RISING			0x08
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_1_RISING_SHIFT		3
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_1_RISING_SHIFT		0x03
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_1_FALLING			0x04
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_1_FALLING_SHIFT		2
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_1_FALLING_SHIFT		0x02
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_0_RISING			0x02
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_0_RISING_SHIFT		1
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_0_RISING_SHIFT		0x01
 #define PALMAS_INT4_EDGE_DETECT1_GPIO_0_FALLING			0x01
-#define PALMAS_INT4_EDGE_DETECT1_GPIO_0_FALLING_SHIFT		0
+#define PALMAS_INT4_EDGE_DETECT1_GPIO_0_FALLING_SHIFT		0x00
 
 /* Bit definitions for INT4_EDGE_DETECT2 */
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_7_RISING			0x80
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_7_RISING_SHIFT		7
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_7_RISING_SHIFT		0x07
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_7_FALLING			0x40
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_7_FALLING_SHIFT		6
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_7_FALLING_SHIFT		0x06
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_6_RISING			0x20
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_6_RISING_SHIFT		5
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_6_RISING_SHIFT		0x05
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_6_FALLING			0x10
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_6_FALLING_SHIFT		4
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_6_FALLING_SHIFT		0x04
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_5_RISING			0x08
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_5_RISING_SHIFT		3
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_5_RISING_SHIFT		0x03
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_5_FALLING			0x04
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_5_FALLING_SHIFT		2
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_5_FALLING_SHIFT		0x02
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_4_RISING			0x02
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_4_RISING_SHIFT		1
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_4_RISING_SHIFT		0x01
 #define PALMAS_INT4_EDGE_DETECT2_GPIO_4_FALLING			0x01
-#define PALMAS_INT4_EDGE_DETECT2_GPIO_4_FALLING_SHIFT		0
+#define PALMAS_INT4_EDGE_DETECT2_GPIO_4_FALLING_SHIFT		0x00
 
 /* Bit definitions for INT_CTRL */
 #define PALMAS_INT_CTRL_INT_PENDING				0x04
-#define PALMAS_INT_CTRL_INT_PENDING_SHIFT			2
+#define PALMAS_INT_CTRL_INT_PENDING_SHIFT			0x02
 #define PALMAS_INT_CTRL_INT_CLEAR				0x01
-#define PALMAS_INT_CTRL_INT_CLEAR_SHIFT				0
+#define PALMAS_INT_CTRL_INT_CLEAR_SHIFT				0x00
 
 /* Registers for function USB_OTG */
-#define PALMAS_USB_WAKEUP					0x3
-#define PALMAS_USB_VBUS_CTRL_SET				0x4
-#define PALMAS_USB_VBUS_CTRL_CLR				0x5
-#define PALMAS_USB_ID_CTRL_SET					0x6
-#define PALMAS_USB_ID_CTRL_CLEAR				0x7
-#define PALMAS_USB_VBUS_INT_SRC					0x8
-#define PALMAS_USB_VBUS_INT_LATCH_SET				0x9
-#define PALMAS_USB_VBUS_INT_LATCH_CLR				0xA
-#define PALMAS_USB_VBUS_INT_EN_LO_SET				0xB
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR				0xC
-#define PALMAS_USB_VBUS_INT_EN_HI_SET				0xD
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR				0xE
-#define PALMAS_USB_ID_INT_SRC					0xF
+#define PALMAS_USB_WAKEUP					0x03
+#define PALMAS_USB_VBUS_CTRL_SET				0x04
+#define PALMAS_USB_VBUS_CTRL_CLR				0x05
+#define PALMAS_USB_ID_CTRL_SET					0x06
+#define PALMAS_USB_ID_CTRL_CLEAR				0x07
+#define PALMAS_USB_VBUS_INT_SRC					0x08
+#define PALMAS_USB_VBUS_INT_LATCH_SET				0x09
+#define PALMAS_USB_VBUS_INT_LATCH_CLR				0x0A
+#define PALMAS_USB_VBUS_INT_EN_LO_SET				0x0B
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR				0x0C
+#define PALMAS_USB_VBUS_INT_EN_HI_SET				0x0D
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR				0x0E
+#define PALMAS_USB_ID_INT_SRC					0x0F
 #define PALMAS_USB_ID_INT_LATCH_SET				0x10
 #define PALMAS_USB_ID_INT_LATCH_CLR				0x11
 #define PALMAS_USB_ID_INT_EN_LO_SET				0x12
@@ -2250,306 +2250,306 @@ enum usb_irq_events {
 
 /* Bit definitions for USB_WAKEUP */
 #define PALMAS_USB_WAKEUP_ID_WK_UP_COMP				0x01
-#define PALMAS_USB_WAKEUP_ID_WK_UP_COMP_SHIFT			0
+#define PALMAS_USB_WAKEUP_ID_WK_UP_COMP_SHIFT			0x00
 
 /* Bit definitions for USB_VBUS_CTRL_SET */
 #define PALMAS_USB_VBUS_CTRL_SET_VBUS_CHRG_VSYS			0x80
-#define PALMAS_USB_VBUS_CTRL_SET_VBUS_CHRG_VSYS_SHIFT		7
+#define PALMAS_USB_VBUS_CTRL_SET_VBUS_CHRG_VSYS_SHIFT		0x07
 #define PALMAS_USB_VBUS_CTRL_SET_VBUS_DISCHRG			0x20
-#define PALMAS_USB_VBUS_CTRL_SET_VBUS_DISCHRG_SHIFT		5
+#define PALMAS_USB_VBUS_CTRL_SET_VBUS_DISCHRG_SHIFT		0x05
 #define PALMAS_USB_VBUS_CTRL_SET_VBUS_IADP_SRC			0x10
-#define PALMAS_USB_VBUS_CTRL_SET_VBUS_IADP_SRC_SHIFT		4
+#define PALMAS_USB_VBUS_CTRL_SET_VBUS_IADP_SRC_SHIFT		0x04
 #define PALMAS_USB_VBUS_CTRL_SET_VBUS_IADP_SINK			0x08
-#define PALMAS_USB_VBUS_CTRL_SET_VBUS_IADP_SINK_SHIFT		3
+#define PALMAS_USB_VBUS_CTRL_SET_VBUS_IADP_SINK_SHIFT		0x03
 #define PALMAS_USB_VBUS_CTRL_SET_VBUS_ACT_COMP			0x04
-#define PALMAS_USB_VBUS_CTRL_SET_VBUS_ACT_COMP_SHIFT		2
+#define PALMAS_USB_VBUS_CTRL_SET_VBUS_ACT_COMP_SHIFT		0x02
 
 /* Bit definitions for USB_VBUS_CTRL_CLR */
 #define PALMAS_USB_VBUS_CTRL_CLR_VBUS_CHRG_VSYS			0x80
-#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_CHRG_VSYS_SHIFT		7
+#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_CHRG_VSYS_SHIFT		0x07
 #define PALMAS_USB_VBUS_CTRL_CLR_VBUS_DISCHRG			0x20
-#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_DISCHRG_SHIFT		5
+#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_DISCHRG_SHIFT		0x05
 #define PALMAS_USB_VBUS_CTRL_CLR_VBUS_IADP_SRC			0x10
-#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_IADP_SRC_SHIFT		4
+#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_IADP_SRC_SHIFT		0x04
 #define PALMAS_USB_VBUS_CTRL_CLR_VBUS_IADP_SINK			0x08
-#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_IADP_SINK_SHIFT		3
+#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_IADP_SINK_SHIFT		0x03
 #define PALMAS_USB_VBUS_CTRL_CLR_VBUS_ACT_COMP			0x04
-#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_ACT_COMP_SHIFT		2
+#define PALMAS_USB_VBUS_CTRL_CLR_VBUS_ACT_COMP_SHIFT		0x02
 
 /* Bit definitions for USB_ID_CTRL_SET */
 #define PALMAS_USB_ID_CTRL_SET_ID_PU_220K			0x80
-#define PALMAS_USB_ID_CTRL_SET_ID_PU_220K_SHIFT			7
+#define PALMAS_USB_ID_CTRL_SET_ID_PU_220K_SHIFT			0x07
 #define PALMAS_USB_ID_CTRL_SET_ID_PU_100K			0x40
-#define PALMAS_USB_ID_CTRL_SET_ID_PU_100K_SHIFT			6
+#define PALMAS_USB_ID_CTRL_SET_ID_PU_100K_SHIFT			0x06
 #define PALMAS_USB_ID_CTRL_SET_ID_GND_DRV			0x20
-#define PALMAS_USB_ID_CTRL_SET_ID_GND_DRV_SHIFT			5
+#define PALMAS_USB_ID_CTRL_SET_ID_GND_DRV_SHIFT			0x05
 #define PALMAS_USB_ID_CTRL_SET_ID_SRC_16U			0x10
-#define PALMAS_USB_ID_CTRL_SET_ID_SRC_16U_SHIFT			4
+#define PALMAS_USB_ID_CTRL_SET_ID_SRC_16U_SHIFT			0x04
 #define PALMAS_USB_ID_CTRL_SET_ID_SRC_5U			0x08
-#define PALMAS_USB_ID_CTRL_SET_ID_SRC_5U_SHIFT			3
+#define PALMAS_USB_ID_CTRL_SET_ID_SRC_5U_SHIFT			0x03
 #define PALMAS_USB_ID_CTRL_SET_ID_ACT_COMP			0x04
-#define PALMAS_USB_ID_CTRL_SET_ID_ACT_COMP_SHIFT		2
+#define PALMAS_USB_ID_CTRL_SET_ID_ACT_COMP_SHIFT		0x02
 
 /* Bit definitions for USB_ID_CTRL_CLEAR */
 #define PALMAS_USB_ID_CTRL_CLEAR_ID_PU_220K			0x80
-#define PALMAS_USB_ID_CTRL_CLEAR_ID_PU_220K_SHIFT		7
+#define PALMAS_USB_ID_CTRL_CLEAR_ID_PU_220K_SHIFT		0x07
 #define PALMAS_USB_ID_CTRL_CLEAR_ID_PU_100K			0x40
-#define PALMAS_USB_ID_CTRL_CLEAR_ID_PU_100K_SHIFT		6
+#define PALMAS_USB_ID_CTRL_CLEAR_ID_PU_100K_SHIFT		0x06
 #define PALMAS_USB_ID_CTRL_CLEAR_ID_GND_DRV			0x20
-#define PALMAS_USB_ID_CTRL_CLEAR_ID_GND_DRV_SHIFT		5
+#define PALMAS_USB_ID_CTRL_CLEAR_ID_GND_DRV_SHIFT		0x05
 #define PALMAS_USB_ID_CTRL_CLEAR_ID_SRC_16U			0x10
-#define PALMAS_USB_ID_CTRL_CLEAR_ID_SRC_16U_SHIFT		4
+#define PALMAS_USB_ID_CTRL_CLEAR_ID_SRC_16U_SHIFT		0x04
 #define PALMAS_USB_ID_CTRL_CLEAR_ID_SRC_5U			0x08
-#define PALMAS_USB_ID_CTRL_CLEAR_ID_SRC_5U_SHIFT		3
+#define PALMAS_USB_ID_CTRL_CLEAR_ID_SRC_5U_SHIFT		0x03
 #define PALMAS_USB_ID_CTRL_CLEAR_ID_ACT_COMP			0x04
-#define PALMAS_USB_ID_CTRL_CLEAR_ID_ACT_COMP_SHIFT		2
+#define PALMAS_USB_ID_CTRL_CLEAR_ID_ACT_COMP_SHIFT		0x02
 
 /* Bit definitions for USB_VBUS_INT_SRC */
 #define PALMAS_USB_VBUS_INT_SRC_VOTG_SESS_VLD			0x80
-#define PALMAS_USB_VBUS_INT_SRC_VOTG_SESS_VLD_SHIFT		7
+#define PALMAS_USB_VBUS_INT_SRC_VOTG_SESS_VLD_SHIFT		0x07
 #define PALMAS_USB_VBUS_INT_SRC_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_SRC_VADP_PRB_SHIFT			6
+#define PALMAS_USB_VBUS_INT_SRC_VADP_PRB_SHIFT			0x06
 #define PALMAS_USB_VBUS_INT_SRC_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_SRC_VADP_SNS_SHIFT			5
+#define PALMAS_USB_VBUS_INT_SRC_VADP_SNS_SHIFT			0x05
 #define PALMAS_USB_VBUS_INT_SRC_VA_VBUS_VLD			0x08
-#define PALMAS_USB_VBUS_INT_SRC_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_SRC_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_SRC_VA_SESS_VLD			0x04
-#define PALMAS_USB_VBUS_INT_SRC_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_SRC_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_SRC_VB_SESS_VLD			0x02
-#define PALMAS_USB_VBUS_INT_SRC_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_SRC_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_SRC_VB_SESS_END			0x01
-#define PALMAS_USB_VBUS_INT_SRC_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_SRC_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_VBUS_INT_LATCH_SET */
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VOTG_SESS_VLD		0x80
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VOTG_SESS_VLD_SHIFT	7
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VOTG_SESS_VLD_SHIFT	0x07
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VADP_PRB_SHIFT		6
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VADP_PRB_SHIFT		0x06
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VADP_SNS_SHIFT		5
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VADP_SNS_SHIFT		0x05
 #define PALMAS_USB_VBUS_INT_LATCH_SET_ADP			0x10
-#define PALMAS_USB_VBUS_INT_LATCH_SET_ADP_SHIFT			4
+#define PALMAS_USB_VBUS_INT_LATCH_SET_ADP_SHIFT			0x04
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VA_VBUS_VLD		0x08
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VA_SESS_VLD		0x04
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VB_SESS_VLD		0x02
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_LATCH_SET_VB_SESS_END		0x01
-#define PALMAS_USB_VBUS_INT_LATCH_SET_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_LATCH_SET_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_VBUS_INT_LATCH_CLR */
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VOTG_SESS_VLD		0x80
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VOTG_SESS_VLD_SHIFT	7
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VOTG_SESS_VLD_SHIFT	0x07
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VADP_PRB_SHIFT		6
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VADP_PRB_SHIFT		0x06
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VADP_SNS_SHIFT		5
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VADP_SNS_SHIFT		0x05
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_ADP			0x10
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_ADP_SHIFT			4
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_ADP_SHIFT			0x04
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VA_VBUS_VLD		0x08
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VA_SESS_VLD		0x04
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VB_SESS_VLD		0x02
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_LATCH_CLR_VB_SESS_END		0x01
-#define PALMAS_USB_VBUS_INT_LATCH_CLR_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_LATCH_CLR_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_VBUS_INT_EN_LO_SET */
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VOTG_SESS_VLD		0x80
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VOTG_SESS_VLD_SHIFT	7
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VOTG_SESS_VLD_SHIFT	0x07
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VADP_PRB_SHIFT		6
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VADP_PRB_SHIFT		0x06
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VADP_SNS_SHIFT		5
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VADP_SNS_SHIFT		0x05
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VA_VBUS_VLD		0x08
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VA_SESS_VLD		0x04
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VB_SESS_VLD		0x02
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_EN_LO_SET_VB_SESS_END		0x01
-#define PALMAS_USB_VBUS_INT_EN_LO_SET_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_EN_LO_SET_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_VBUS_INT_EN_LO_CLR */
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VOTG_SESS_VLD		0x80
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VOTG_SESS_VLD_SHIFT	7
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VOTG_SESS_VLD_SHIFT	0x07
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VADP_PRB_SHIFT		6
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VADP_PRB_SHIFT		0x06
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VADP_SNS_SHIFT		5
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VADP_SNS_SHIFT		0x05
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VA_VBUS_VLD		0x08
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VA_SESS_VLD		0x04
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VB_SESS_VLD		0x02
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_EN_LO_CLR_VB_SESS_END		0x01
-#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_EN_LO_CLR_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_VBUS_INT_EN_HI_SET */
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VOTG_SESS_VLD		0x80
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VOTG_SESS_VLD_SHIFT	7
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VOTG_SESS_VLD_SHIFT	0x07
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VADP_PRB_SHIFT		6
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VADP_PRB_SHIFT		0x06
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VADP_SNS_SHIFT		5
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VADP_SNS_SHIFT		0x05
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_ADP			0x10
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_ADP_SHIFT			4
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_ADP_SHIFT			0x04
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VA_VBUS_VLD		0x08
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VA_SESS_VLD		0x04
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VB_SESS_VLD		0x02
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_EN_HI_SET_VB_SESS_END		0x01
-#define PALMAS_USB_VBUS_INT_EN_HI_SET_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_EN_HI_SET_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_VBUS_INT_EN_HI_CLR */
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VOTG_SESS_VLD		0x80
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VOTG_SESS_VLD_SHIFT	7
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VOTG_SESS_VLD_SHIFT	0x07
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VADP_PRB			0x40
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VADP_PRB_SHIFT		6
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VADP_PRB_SHIFT		0x06
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VADP_SNS			0x20
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VADP_SNS_SHIFT		5
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VADP_SNS_SHIFT		0x05
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_ADP			0x10
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_ADP_SHIFT			4
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_ADP_SHIFT			0x04
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VA_VBUS_VLD		0x08
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VA_VBUS_VLD_SHIFT		3
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VA_VBUS_VLD_SHIFT		0x03
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VA_SESS_VLD		0x04
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VA_SESS_VLD_SHIFT		2
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VA_SESS_VLD_SHIFT		0x02
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VB_SESS_VLD		0x02
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VB_SESS_VLD_SHIFT		1
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VB_SESS_VLD_SHIFT		0x01
 #define PALMAS_USB_VBUS_INT_EN_HI_CLR_VB_SESS_END		0x01
-#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VB_SESS_END_SHIFT		0
+#define PALMAS_USB_VBUS_INT_EN_HI_CLR_VB_SESS_END_SHIFT		0x00
 
 /* Bit definitions for USB_ID_INT_SRC */
 #define PALMAS_USB_ID_INT_SRC_ID_FLOAT				0x10
-#define PALMAS_USB_ID_INT_SRC_ID_FLOAT_SHIFT			4
+#define PALMAS_USB_ID_INT_SRC_ID_FLOAT_SHIFT			0x04
 #define PALMAS_USB_ID_INT_SRC_ID_A				0x08
-#define PALMAS_USB_ID_INT_SRC_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_SRC_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_SRC_ID_B				0x04
-#define PALMAS_USB_ID_INT_SRC_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_SRC_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_SRC_ID_C				0x02
-#define PALMAS_USB_ID_INT_SRC_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_SRC_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_SRC_ID_GND				0x01
-#define PALMAS_USB_ID_INT_SRC_ID_GND_SHIFT			0
+#define PALMAS_USB_ID_INT_SRC_ID_GND_SHIFT			0x00
 
 /* Bit definitions for USB_ID_INT_LATCH_SET */
 #define PALMAS_USB_ID_INT_LATCH_SET_ID_FLOAT			0x10
-#define PALMAS_USB_ID_INT_LATCH_SET_ID_FLOAT_SHIFT		4
+#define PALMAS_USB_ID_INT_LATCH_SET_ID_FLOAT_SHIFT		0x04
 #define PALMAS_USB_ID_INT_LATCH_SET_ID_A			0x08
-#define PALMAS_USB_ID_INT_LATCH_SET_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_LATCH_SET_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_LATCH_SET_ID_B			0x04
-#define PALMAS_USB_ID_INT_LATCH_SET_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_LATCH_SET_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_LATCH_SET_ID_C			0x02
-#define PALMAS_USB_ID_INT_LATCH_SET_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_LATCH_SET_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_LATCH_SET_ID_GND			0x01
-#define PALMAS_USB_ID_INT_LATCH_SET_ID_GND_SHIFT		0
+#define PALMAS_USB_ID_INT_LATCH_SET_ID_GND_SHIFT		0x00
 
 /* Bit definitions for USB_ID_INT_LATCH_CLR */
 #define PALMAS_USB_ID_INT_LATCH_CLR_ID_FLOAT			0x10
-#define PALMAS_USB_ID_INT_LATCH_CLR_ID_FLOAT_SHIFT		4
+#define PALMAS_USB_ID_INT_LATCH_CLR_ID_FLOAT_SHIFT		0x04
 #define PALMAS_USB_ID_INT_LATCH_CLR_ID_A			0x08
-#define PALMAS_USB_ID_INT_LATCH_CLR_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_LATCH_CLR_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_LATCH_CLR_ID_B			0x04
-#define PALMAS_USB_ID_INT_LATCH_CLR_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_LATCH_CLR_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_LATCH_CLR_ID_C			0x02
-#define PALMAS_USB_ID_INT_LATCH_CLR_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_LATCH_CLR_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_LATCH_CLR_ID_GND			0x01
-#define PALMAS_USB_ID_INT_LATCH_CLR_ID_GND_SHIFT		0
+#define PALMAS_USB_ID_INT_LATCH_CLR_ID_GND_SHIFT		0x00
 
 /* Bit definitions for USB_ID_INT_EN_LO_SET */
 #define PALMAS_USB_ID_INT_EN_LO_SET_ID_FLOAT			0x10
-#define PALMAS_USB_ID_INT_EN_LO_SET_ID_FLOAT_SHIFT		4
+#define PALMAS_USB_ID_INT_EN_LO_SET_ID_FLOAT_SHIFT		0x04
 #define PALMAS_USB_ID_INT_EN_LO_SET_ID_A			0x08
-#define PALMAS_USB_ID_INT_EN_LO_SET_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_EN_LO_SET_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_EN_LO_SET_ID_B			0x04
-#define PALMAS_USB_ID_INT_EN_LO_SET_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_EN_LO_SET_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_EN_LO_SET_ID_C			0x02
-#define PALMAS_USB_ID_INT_EN_LO_SET_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_EN_LO_SET_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_EN_LO_SET_ID_GND			0x01
-#define PALMAS_USB_ID_INT_EN_LO_SET_ID_GND_SHIFT		0
+#define PALMAS_USB_ID_INT_EN_LO_SET_ID_GND_SHIFT		0x00
 
 /* Bit definitions for USB_ID_INT_EN_LO_CLR */
 #define PALMAS_USB_ID_INT_EN_LO_CLR_ID_FLOAT			0x10
-#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_FLOAT_SHIFT		4
+#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_FLOAT_SHIFT		0x04
 #define PALMAS_USB_ID_INT_EN_LO_CLR_ID_A			0x08
-#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_EN_LO_CLR_ID_B			0x04
-#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_EN_LO_CLR_ID_C			0x02
-#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_EN_LO_CLR_ID_GND			0x01
-#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_GND_SHIFT		0
+#define PALMAS_USB_ID_INT_EN_LO_CLR_ID_GND_SHIFT		0x00
 
 /* Bit definitions for USB_ID_INT_EN_HI_SET */
 #define PALMAS_USB_ID_INT_EN_HI_SET_ID_FLOAT			0x10
-#define PALMAS_USB_ID_INT_EN_HI_SET_ID_FLOAT_SHIFT		4
+#define PALMAS_USB_ID_INT_EN_HI_SET_ID_FLOAT_SHIFT		0x04
 #define PALMAS_USB_ID_INT_EN_HI_SET_ID_A			0x08
-#define PALMAS_USB_ID_INT_EN_HI_SET_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_EN_HI_SET_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_EN_HI_SET_ID_B			0x04
-#define PALMAS_USB_ID_INT_EN_HI_SET_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_EN_HI_SET_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_EN_HI_SET_ID_C			0x02
-#define PALMAS_USB_ID_INT_EN_HI_SET_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_EN_HI_SET_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_EN_HI_SET_ID_GND			0x01
-#define PALMAS_USB_ID_INT_EN_HI_SET_ID_GND_SHIFT		0
+#define PALMAS_USB_ID_INT_EN_HI_SET_ID_GND_SHIFT		0x00
 
 /* Bit definitions for USB_ID_INT_EN_HI_CLR */
 #define PALMAS_USB_ID_INT_EN_HI_CLR_ID_FLOAT			0x10
-#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_FLOAT_SHIFT		4
+#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_FLOAT_SHIFT		0x04
 #define PALMAS_USB_ID_INT_EN_HI_CLR_ID_A			0x08
-#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_A_SHIFT			3
+#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_A_SHIFT			0x03
 #define PALMAS_USB_ID_INT_EN_HI_CLR_ID_B			0x04
-#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_B_SHIFT			2
+#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_B_SHIFT			0x02
 #define PALMAS_USB_ID_INT_EN_HI_CLR_ID_C			0x02
-#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_C_SHIFT			1
+#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_C_SHIFT			0x01
 #define PALMAS_USB_ID_INT_EN_HI_CLR_ID_GND			0x01
-#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_GND_SHIFT		0
+#define PALMAS_USB_ID_INT_EN_HI_CLR_ID_GND_SHIFT		0x00
 
 /* Bit definitions for USB_OTG_ADP_CTRL */
 #define PALMAS_USB_OTG_ADP_CTRL_ADP_EN				0x04
-#define PALMAS_USB_OTG_ADP_CTRL_ADP_EN_SHIFT			2
+#define PALMAS_USB_OTG_ADP_CTRL_ADP_EN_SHIFT			0x02
 #define PALMAS_USB_OTG_ADP_CTRL_ADP_MODE_MASK			0x03
-#define PALMAS_USB_OTG_ADP_CTRL_ADP_MODE_SHIFT			0
+#define PALMAS_USB_OTG_ADP_CTRL_ADP_MODE_SHIFT			0x00
 
 /* Bit definitions for USB_OTG_ADP_HIGH */
-#define PALMAS_USB_OTG_ADP_HIGH_T_ADP_HIGH_MASK			0xff
-#define PALMAS_USB_OTG_ADP_HIGH_T_ADP_HIGH_SHIFT		0
+#define PALMAS_USB_OTG_ADP_HIGH_T_ADP_HIGH_MASK			0xFF
+#define PALMAS_USB_OTG_ADP_HIGH_T_ADP_HIGH_SHIFT		0x00
 
 /* Bit definitions for USB_OTG_ADP_LOW */
-#define PALMAS_USB_OTG_ADP_LOW_T_ADP_LOW_MASK			0xff
-#define PALMAS_USB_OTG_ADP_LOW_T_ADP_LOW_SHIFT			0
+#define PALMAS_USB_OTG_ADP_LOW_T_ADP_LOW_MASK			0xFF
+#define PALMAS_USB_OTG_ADP_LOW_T_ADP_LOW_SHIFT			0x00
 
 /* Bit definitions for USB_OTG_ADP_RISE */
-#define PALMAS_USB_OTG_ADP_RISE_T_ADP_RISE_MASK			0xff
-#define PALMAS_USB_OTG_ADP_RISE_T_ADP_RISE_SHIFT		0
+#define PALMAS_USB_OTG_ADP_RISE_T_ADP_RISE_MASK			0xFF
+#define PALMAS_USB_OTG_ADP_RISE_T_ADP_RISE_SHIFT		0x00
 
 /* Bit definitions for USB_OTG_REVISION */
 #define PALMAS_USB_OTG_REVISION_OTG_REV				0x01
-#define PALMAS_USB_OTG_REVISION_OTG_REV_SHIFT			0
+#define PALMAS_USB_OTG_REVISION_OTG_REV_SHIFT			0x00
 
 /* Registers for function VIBRATOR */
-#define PALMAS_VIBRA_CTRL					0x0
+#define PALMAS_VIBRA_CTRL					0x00
 
 /* Bit definitions for VIBRA_CTRL */
 #define PALMAS_VIBRA_CTRL_PWM_DUTY_SEL_MASK			0x06
-#define PALMAS_VIBRA_CTRL_PWM_DUTY_SEL_SHIFT			1
+#define PALMAS_VIBRA_CTRL_PWM_DUTY_SEL_SHIFT			0x01
 #define PALMAS_VIBRA_CTRL_PWM_FREQ_SEL				0x01
-#define PALMAS_VIBRA_CTRL_PWM_FREQ_SEL_SHIFT			0
+#define PALMAS_VIBRA_CTRL_PWM_FREQ_SEL_SHIFT			0x00
 
 /* Registers for function GPIO */
-#define PALMAS_GPIO_DATA_IN					0x0
-#define PALMAS_GPIO_DATA_DIR					0x1
-#define PALMAS_GPIO_DATA_OUT					0x2
-#define PALMAS_GPIO_DEBOUNCE_EN					0x3
-#define PALMAS_GPIO_CLEAR_DATA_OUT				0x4
-#define PALMAS_GPIO_SET_DATA_OUT				0x5
-#define PALMAS_PU_PD_GPIO_CTRL1					0x6
-#define PALMAS_PU_PD_GPIO_CTRL2					0x7
-#define PALMAS_OD_OUTPUT_GPIO_CTRL				0x8
-#define PALMAS_GPIO_DATA_IN2					0x9
+#define PALMAS_GPIO_DATA_IN					0x00
+#define PALMAS_GPIO_DATA_DIR					0x01
+#define PALMAS_GPIO_DATA_OUT					0x02
+#define PALMAS_GPIO_DEBOUNCE_EN					0x03
+#define PALMAS_GPIO_CLEAR_DATA_OUT				0x04
+#define PALMAS_GPIO_SET_DATA_OUT				0x05
+#define PALMAS_PU_PD_GPIO_CTRL1					0x06
+#define PALMAS_PU_PD_GPIO_CTRL2					0x07
+#define PALMAS_OD_OUTPUT_GPIO_CTRL				0x08
+#define PALMAS_GPIO_DATA_IN2					0x09
 #define PALMAS_GPIO_DATA_DIR2					0x0A
 #define PALMAS_GPIO_DATA_OUT2					0x0B
 #define PALMAS_GPIO_DEBOUNCE_EN2				0x0C
@@ -2561,167 +2561,167 @@ enum usb_irq_events {
 
 /* Bit definitions for GPIO_DATA_IN */
 #define PALMAS_GPIO_DATA_IN_GPIO_7_IN				0x80
-#define PALMAS_GPIO_DATA_IN_GPIO_7_IN_SHIFT			7
+#define PALMAS_GPIO_DATA_IN_GPIO_7_IN_SHIFT			0x07
 #define PALMAS_GPIO_DATA_IN_GPIO_6_IN				0x40
-#define PALMAS_GPIO_DATA_IN_GPIO_6_IN_SHIFT			6
+#define PALMAS_GPIO_DATA_IN_GPIO_6_IN_SHIFT			0x06
 #define PALMAS_GPIO_DATA_IN_GPIO_5_IN				0x20
-#define PALMAS_GPIO_DATA_IN_GPIO_5_IN_SHIFT			5
+#define PALMAS_GPIO_DATA_IN_GPIO_5_IN_SHIFT			0x05
 #define PALMAS_GPIO_DATA_IN_GPIO_4_IN				0x10
-#define PALMAS_GPIO_DATA_IN_GPIO_4_IN_SHIFT			4
+#define PALMAS_GPIO_DATA_IN_GPIO_4_IN_SHIFT			0x04
 #define PALMAS_GPIO_DATA_IN_GPIO_3_IN				0x08
-#define PALMAS_GPIO_DATA_IN_GPIO_3_IN_SHIFT			3
+#define PALMAS_GPIO_DATA_IN_GPIO_3_IN_SHIFT			0x03
 #define PALMAS_GPIO_DATA_IN_GPIO_2_IN				0x04
-#define PALMAS_GPIO_DATA_IN_GPIO_2_IN_SHIFT			2
+#define PALMAS_GPIO_DATA_IN_GPIO_2_IN_SHIFT			0x02
 #define PALMAS_GPIO_DATA_IN_GPIO_1_IN				0x02
-#define PALMAS_GPIO_DATA_IN_GPIO_1_IN_SHIFT			1
+#define PALMAS_GPIO_DATA_IN_GPIO_1_IN_SHIFT			0x01
 #define PALMAS_GPIO_DATA_IN_GPIO_0_IN				0x01
-#define PALMAS_GPIO_DATA_IN_GPIO_0_IN_SHIFT			0
+#define PALMAS_GPIO_DATA_IN_GPIO_0_IN_SHIFT			0x00
 
 /* Bit definitions for GPIO_DATA_DIR */
 #define PALMAS_GPIO_DATA_DIR_GPIO_7_DIR				0x80
-#define PALMAS_GPIO_DATA_DIR_GPIO_7_DIR_SHIFT			7
+#define PALMAS_GPIO_DATA_DIR_GPIO_7_DIR_SHIFT			0x07
 #define PALMAS_GPIO_DATA_DIR_GPIO_6_DIR				0x40
-#define PALMAS_GPIO_DATA_DIR_GPIO_6_DIR_SHIFT			6
+#define PALMAS_GPIO_DATA_DIR_GPIO_6_DIR_SHIFT			0x06
 #define PALMAS_GPIO_DATA_DIR_GPIO_5_DIR				0x20
-#define PALMAS_GPIO_DATA_DIR_GPIO_5_DIR_SHIFT			5
+#define PALMAS_GPIO_DATA_DIR_GPIO_5_DIR_SHIFT			0x05
 #define PALMAS_GPIO_DATA_DIR_GPIO_4_DIR				0x10
-#define PALMAS_GPIO_DATA_DIR_GPIO_4_DIR_SHIFT			4
+#define PALMAS_GPIO_DATA_DIR_GPIO_4_DIR_SHIFT			0x04
 #define PALMAS_GPIO_DATA_DIR_GPIO_3_DIR				0x08
-#define PALMAS_GPIO_DATA_DIR_GPIO_3_DIR_SHIFT			3
+#define PALMAS_GPIO_DATA_DIR_GPIO_3_DIR_SHIFT			0x03
 #define PALMAS_GPIO_DATA_DIR_GPIO_2_DIR				0x04
-#define PALMAS_GPIO_DATA_DIR_GPIO_2_DIR_SHIFT			2
+#define PALMAS_GPIO_DATA_DIR_GPIO_2_DIR_SHIFT			0x02
 #define PALMAS_GPIO_DATA_DIR_GPIO_1_DIR				0x02
-#define PALMAS_GPIO_DATA_DIR_GPIO_1_DIR_SHIFT			1
+#define PALMAS_GPIO_DATA_DIR_GPIO_1_DIR_SHIFT			0x01
 #define PALMAS_GPIO_DATA_DIR_GPIO_0_DIR				0x01
-#define PALMAS_GPIO_DATA_DIR_GPIO_0_DIR_SHIFT			0
+#define PALMAS_GPIO_DATA_DIR_GPIO_0_DIR_SHIFT			0x00
 
 /* Bit definitions for GPIO_DATA_OUT */
 #define PALMAS_GPIO_DATA_OUT_GPIO_7_OUT				0x80
-#define PALMAS_GPIO_DATA_OUT_GPIO_7_OUT_SHIFT			7
+#define PALMAS_GPIO_DATA_OUT_GPIO_7_OUT_SHIFT			0x07
 #define PALMAS_GPIO_DATA_OUT_GPIO_6_OUT				0x40
-#define PALMAS_GPIO_DATA_OUT_GPIO_6_OUT_SHIFT			6
+#define PALMAS_GPIO_DATA_OUT_GPIO_6_OUT_SHIFT			0x06
 #define PALMAS_GPIO_DATA_OUT_GPIO_5_OUT				0x20
-#define PALMAS_GPIO_DATA_OUT_GPIO_5_OUT_SHIFT			5
+#define PALMAS_GPIO_DATA_OUT_GPIO_5_OUT_SHIFT			0x05
 #define PALMAS_GPIO_DATA_OUT_GPIO_4_OUT				0x10
-#define PALMAS_GPIO_DATA_OUT_GPIO_4_OUT_SHIFT			4
+#define PALMAS_GPIO_DATA_OUT_GPIO_4_OUT_SHIFT			0x04
 #define PALMAS_GPIO_DATA_OUT_GPIO_3_OUT				0x08
-#define PALMAS_GPIO_DATA_OUT_GPIO_3_OUT_SHIFT			3
+#define PALMAS_GPIO_DATA_OUT_GPIO_3_OUT_SHIFT			0x03
 #define PALMAS_GPIO_DATA_OUT_GPIO_2_OUT				0x04
-#define PALMAS_GPIO_DATA_OUT_GPIO_2_OUT_SHIFT			2
+#define PALMAS_GPIO_DATA_OUT_GPIO_2_OUT_SHIFT			0x02
 #define PALMAS_GPIO_DATA_OUT_GPIO_1_OUT				0x02
-#define PALMAS_GPIO_DATA_OUT_GPIO_1_OUT_SHIFT			1
+#define PALMAS_GPIO_DATA_OUT_GPIO_1_OUT_SHIFT			0x01
 #define PALMAS_GPIO_DATA_OUT_GPIO_0_OUT				0x01
-#define PALMAS_GPIO_DATA_OUT_GPIO_0_OUT_SHIFT			0
+#define PALMAS_GPIO_DATA_OUT_GPIO_0_OUT_SHIFT			0x00
 
 /* Bit definitions for GPIO_DEBOUNCE_EN */
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_7_DEBOUNCE_EN		0x80
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_7_DEBOUNCE_EN_SHIFT	7
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_7_DEBOUNCE_EN_SHIFT	0x07
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_6_DEBOUNCE_EN		0x40
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_6_DEBOUNCE_EN_SHIFT	6
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_6_DEBOUNCE_EN_SHIFT	0x06
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_5_DEBOUNCE_EN		0x20
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_5_DEBOUNCE_EN_SHIFT	5
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_5_DEBOUNCE_EN_SHIFT	0x05
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_4_DEBOUNCE_EN		0x10
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_4_DEBOUNCE_EN_SHIFT	4
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_4_DEBOUNCE_EN_SHIFT	0x04
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_3_DEBOUNCE_EN		0x08
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_3_DEBOUNCE_EN_SHIFT	3
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_3_DEBOUNCE_EN_SHIFT	0x03
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_2_DEBOUNCE_EN		0x04
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_2_DEBOUNCE_EN_SHIFT	2
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_2_DEBOUNCE_EN_SHIFT	0x02
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_1_DEBOUNCE_EN		0x02
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_1_DEBOUNCE_EN_SHIFT	1
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_1_DEBOUNCE_EN_SHIFT	0x01
 #define PALMAS_GPIO_DEBOUNCE_EN_GPIO_0_DEBOUNCE_EN		0x01
-#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_0_DEBOUNCE_EN_SHIFT	0
+#define PALMAS_GPIO_DEBOUNCE_EN_GPIO_0_DEBOUNCE_EN_SHIFT	0x00
 
 /* Bit definitions for GPIO_CLEAR_DATA_OUT */
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_7_CLEAR_DATA_OUT	0x80
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_7_CLEAR_DATA_OUT_SHIFT	7
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_7_CLEAR_DATA_OUT_SHIFT	0x07
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_6_CLEAR_DATA_OUT	0x40
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_6_CLEAR_DATA_OUT_SHIFT	6
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_6_CLEAR_DATA_OUT_SHIFT	0x06
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_5_CLEAR_DATA_OUT	0x20
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_5_CLEAR_DATA_OUT_SHIFT	5
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_5_CLEAR_DATA_OUT_SHIFT	0x05
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_4_CLEAR_DATA_OUT	0x10
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_4_CLEAR_DATA_OUT_SHIFT	4
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_4_CLEAR_DATA_OUT_SHIFT	0x04
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_3_CLEAR_DATA_OUT	0x08
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_3_CLEAR_DATA_OUT_SHIFT	3
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_3_CLEAR_DATA_OUT_SHIFT	0x03
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_2_CLEAR_DATA_OUT	0x04
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_2_CLEAR_DATA_OUT_SHIFT	2
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_2_CLEAR_DATA_OUT_SHIFT	0x02
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_1_CLEAR_DATA_OUT	0x02
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_1_CLEAR_DATA_OUT_SHIFT	1
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_1_CLEAR_DATA_OUT_SHIFT	0x01
 #define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_0_CLEAR_DATA_OUT	0x01
-#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_0_CLEAR_DATA_OUT_SHIFT	0
+#define PALMAS_GPIO_CLEAR_DATA_OUT_GPIO_0_CLEAR_DATA_OUT_SHIFT	0x00
 
 /* Bit definitions for GPIO_SET_DATA_OUT */
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_7_SET_DATA_OUT		0x80
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_7_SET_DATA_OUT_SHIFT	7
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_7_SET_DATA_OUT_SHIFT	0x07
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_6_SET_DATA_OUT		0x40
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_6_SET_DATA_OUT_SHIFT	6
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_6_SET_DATA_OUT_SHIFT	0x06
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_5_SET_DATA_OUT		0x20
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_5_SET_DATA_OUT_SHIFT	5
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_5_SET_DATA_OUT_SHIFT	0x05
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_4_SET_DATA_OUT		0x10
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_4_SET_DATA_OUT_SHIFT	4
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_4_SET_DATA_OUT_SHIFT	0x04
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_3_SET_DATA_OUT		0x08
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_3_SET_DATA_OUT_SHIFT	3
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_3_SET_DATA_OUT_SHIFT	0x03
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_2_SET_DATA_OUT		0x04
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_2_SET_DATA_OUT_SHIFT	2
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_2_SET_DATA_OUT_SHIFT	0x02
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_1_SET_DATA_OUT		0x02
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_1_SET_DATA_OUT_SHIFT	1
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_1_SET_DATA_OUT_SHIFT	0x01
 #define PALMAS_GPIO_SET_DATA_OUT_GPIO_0_SET_DATA_OUT		0x01
-#define PALMAS_GPIO_SET_DATA_OUT_GPIO_0_SET_DATA_OUT_SHIFT	0
+#define PALMAS_GPIO_SET_DATA_OUT_GPIO_0_SET_DATA_OUT_SHIFT	0x00
 
 /* Bit definitions for PU_PD_GPIO_CTRL1 */
 #define PALMAS_PU_PD_GPIO_CTRL1_GPIO_3_PD			0x40
-#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_3_PD_SHIFT			6
+#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_3_PD_SHIFT			0x06
 #define PALMAS_PU_PD_GPIO_CTRL1_GPIO_2_PU			0x20
-#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_2_PU_SHIFT			5
+#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_2_PU_SHIFT			0x05
 #define PALMAS_PU_PD_GPIO_CTRL1_GPIO_2_PD			0x10
-#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_2_PD_SHIFT			4
+#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_2_PD_SHIFT			0x04
 #define PALMAS_PU_PD_GPIO_CTRL1_GPIO_1_PU			0x08
-#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_1_PU_SHIFT			3
+#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_1_PU_SHIFT			0x03
 #define PALMAS_PU_PD_GPIO_CTRL1_GPIO_1_PD			0x04
-#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_1_PD_SHIFT			2
+#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_1_PD_SHIFT			0x02
 #define PALMAS_PU_PD_GPIO_CTRL1_GPIO_0_PD			0x01
-#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_0_PD_SHIFT			0
+#define PALMAS_PU_PD_GPIO_CTRL1_GPIO_0_PD_SHIFT			0x00
 
 /* Bit definitions for PU_PD_GPIO_CTRL2 */
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_7_PD			0x40
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_7_PD_SHIFT			6
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_7_PD_SHIFT			0x06
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_6_PU			0x20
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_6_PU_SHIFT			5
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_6_PU_SHIFT			0x05
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_6_PD			0x10
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_6_PD_SHIFT			4
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_6_PD_SHIFT			0x04
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_5_PU			0x08
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_5_PU_SHIFT			3
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_5_PU_SHIFT			0x03
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_5_PD			0x04
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_5_PD_SHIFT			2
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_5_PD_SHIFT			0x02
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_4_PU			0x02
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_4_PU_SHIFT			1
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_4_PU_SHIFT			0x01
 #define PALMAS_PU_PD_GPIO_CTRL2_GPIO_4_PD			0x01
-#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_4_PD_SHIFT			0
+#define PALMAS_PU_PD_GPIO_CTRL2_GPIO_4_PD_SHIFT			0x00
 
 /* Bit definitions for OD_OUTPUT_GPIO_CTRL */
 #define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_5_OD			0x20
-#define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_5_OD_SHIFT		5
+#define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_5_OD_SHIFT		0x05
 #define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_2_OD			0x04
-#define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_2_OD_SHIFT		2
+#define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_2_OD_SHIFT		0x02
 #define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_1_OD			0x02
-#define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_1_OD_SHIFT		1
+#define PALMAS_OD_OUTPUT_GPIO_CTRL_GPIO_1_OD_SHIFT		0x01
 
 /* Registers for function GPADC */
-#define PALMAS_GPADC_CTRL1					0x0
-#define PALMAS_GPADC_CTRL2					0x1
-#define PALMAS_GPADC_RT_CTRL					0x2
-#define PALMAS_GPADC_AUTO_CTRL					0x3
-#define PALMAS_GPADC_STATUS					0x4
-#define PALMAS_GPADC_RT_SELECT					0x5
-#define PALMAS_GPADC_RT_CONV0_LSB				0x6
-#define PALMAS_GPADC_RT_CONV0_MSB				0x7
-#define PALMAS_GPADC_AUTO_SELECT				0x8
-#define PALMAS_GPADC_AUTO_CONV0_LSB				0x9
-#define PALMAS_GPADC_AUTO_CONV0_MSB				0xA
-#define PALMAS_GPADC_AUTO_CONV1_LSB				0xB
-#define PALMAS_GPADC_AUTO_CONV1_MSB				0xC
-#define PALMAS_GPADC_SW_SELECT					0xD
-#define PALMAS_GPADC_SW_CONV0_LSB				0xE
-#define PALMAS_GPADC_SW_CONV0_MSB				0xF
+#define PALMAS_GPADC_CTRL1					0x00
+#define PALMAS_GPADC_CTRL2					0x01
+#define PALMAS_GPADC_RT_CTRL					0x02
+#define PALMAS_GPADC_AUTO_CTRL					0x03
+#define PALMAS_GPADC_STATUS					0x04
+#define PALMAS_GPADC_RT_SELECT					0x05
+#define PALMAS_GPADC_RT_CONV0_LSB				0x06
+#define PALMAS_GPADC_RT_CONV0_MSB				0x07
+#define PALMAS_GPADC_AUTO_SELECT				0x08
+#define PALMAS_GPADC_AUTO_CONV0_LSB				0x09
+#define PALMAS_GPADC_AUTO_CONV0_MSB				0x0A
+#define PALMAS_GPADC_AUTO_CONV1_LSB				0x0B
+#define PALMAS_GPADC_AUTO_CONV1_MSB				0x0C
+#define PALMAS_GPADC_SW_SELECT					0x0D
+#define PALMAS_GPADC_SW_CONV0_LSB				0x0E
+#define PALMAS_GPADC_SW_CONV0_MSB				0x0F
 #define PALMAS_GPADC_THRES_CONV0_LSB				0x10
 #define PALMAS_GPADC_THRES_CONV0_MSB				0x11
 #define PALMAS_GPADC_THRES_CONV1_LSB				0x12
@@ -2731,150 +2731,150 @@ enum usb_irq_events {
 
 /* Bit definitions for GPADC_CTRL1 */
 #define PALMAS_GPADC_CTRL1_RESERVED_MASK			0xc0
-#define PALMAS_GPADC_CTRL1_RESERVED_SHIFT			6
+#define PALMAS_GPADC_CTRL1_RESERVED_SHIFT			0x06
 #define PALMAS_GPADC_CTRL1_CURRENT_SRC_CH3_MASK			0x30
-#define PALMAS_GPADC_CTRL1_CURRENT_SRC_CH3_SHIFT		4
+#define PALMAS_GPADC_CTRL1_CURRENT_SRC_CH3_SHIFT		0x04
 #define PALMAS_GPADC_CTRL1_CURRENT_SRC_CH0_MASK			0x0c
-#define PALMAS_GPADC_CTRL1_CURRENT_SRC_CH0_SHIFT		2
+#define PALMAS_GPADC_CTRL1_CURRENT_SRC_CH0_SHIFT		0x02
 #define PALMAS_GPADC_CTRL1_BAT_REMOVAL_DET			0x02
-#define PALMAS_GPADC_CTRL1_BAT_REMOVAL_DET_SHIFT		1
+#define PALMAS_GPADC_CTRL1_BAT_REMOVAL_DET_SHIFT		0x01
 #define PALMAS_GPADC_CTRL1_GPADC_FORCE				0x01
-#define PALMAS_GPADC_CTRL1_GPADC_FORCE_SHIFT			0
+#define PALMAS_GPADC_CTRL1_GPADC_FORCE_SHIFT			0x00
 
 /* Bit definitions for GPADC_CTRL2 */
 #define PALMAS_GPADC_CTRL2_RESERVED_MASK			0x06
-#define PALMAS_GPADC_CTRL2_RESERVED_SHIFT			1
+#define PALMAS_GPADC_CTRL2_RESERVED_SHIFT			0x01
 
 /* Bit definitions for GPADC_RT_CTRL */
 #define PALMAS_GPADC_RT_CTRL_EXTEND_DELAY			0x02
-#define PALMAS_GPADC_RT_CTRL_EXTEND_DELAY_SHIFT			1
+#define PALMAS_GPADC_RT_CTRL_EXTEND_DELAY_SHIFT			0x01
 #define PALMAS_GPADC_RT_CTRL_START_POLARITY			0x01
-#define PALMAS_GPADC_RT_CTRL_START_POLARITY_SHIFT		0
+#define PALMAS_GPADC_RT_CTRL_START_POLARITY_SHIFT		0x00
 
 /* Bit definitions for GPADC_AUTO_CTRL */
 #define PALMAS_GPADC_AUTO_CTRL_SHUTDOWN_CONV1			0x80
-#define PALMAS_GPADC_AUTO_CTRL_SHUTDOWN_CONV1_SHIFT		7
+#define PALMAS_GPADC_AUTO_CTRL_SHUTDOWN_CONV1_SHIFT		0x07
 #define PALMAS_GPADC_AUTO_CTRL_SHUTDOWN_CONV0			0x40
-#define PALMAS_GPADC_AUTO_CTRL_SHUTDOWN_CONV0_SHIFT		6
+#define PALMAS_GPADC_AUTO_CTRL_SHUTDOWN_CONV0_SHIFT		0x06
 #define PALMAS_GPADC_AUTO_CTRL_AUTO_CONV1_EN			0x20
-#define PALMAS_GPADC_AUTO_CTRL_AUTO_CONV1_EN_SHIFT		5
+#define PALMAS_GPADC_AUTO_CTRL_AUTO_CONV1_EN_SHIFT		0x05
 #define PALMAS_GPADC_AUTO_CTRL_AUTO_CONV0_EN			0x10
-#define PALMAS_GPADC_AUTO_CTRL_AUTO_CONV0_EN_SHIFT		4
-#define PALMAS_GPADC_AUTO_CTRL_COUNTER_CONV_MASK		0x0f
-#define PALMAS_GPADC_AUTO_CTRL_COUNTER_CONV_SHIFT		0
+#define PALMAS_GPADC_AUTO_CTRL_AUTO_CONV0_EN_SHIFT		0x04
+#define PALMAS_GPADC_AUTO_CTRL_COUNTER_CONV_MASK		0x0F
+#define PALMAS_GPADC_AUTO_CTRL_COUNTER_CONV_SHIFT		0x00
 
 /* Bit definitions for GPADC_STATUS */
 #define PALMAS_GPADC_STATUS_GPADC_AVAILABLE			0x10
-#define PALMAS_GPADC_STATUS_GPADC_AVAILABLE_SHIFT		4
+#define PALMAS_GPADC_STATUS_GPADC_AVAILABLE_SHIFT		0x04
 
 /* Bit definitions for GPADC_RT_SELECT */
 #define PALMAS_GPADC_RT_SELECT_RT_CONV_EN			0x80
-#define PALMAS_GPADC_RT_SELECT_RT_CONV_EN_SHIFT			7
-#define PALMAS_GPADC_RT_SELECT_RT_CONV0_SEL_MASK		0x0f
-#define PALMAS_GPADC_RT_SELECT_RT_CONV0_SEL_SHIFT		0
+#define PALMAS_GPADC_RT_SELECT_RT_CONV_EN_SHIFT			0x07
+#define PALMAS_GPADC_RT_SELECT_RT_CONV0_SEL_MASK		0x0F
+#define PALMAS_GPADC_RT_SELECT_RT_CONV0_SEL_SHIFT		0x00
 
 /* Bit definitions for GPADC_RT_CONV0_LSB */
-#define PALMAS_GPADC_RT_CONV0_LSB_RT_CONV0_LSB_MASK		0xff
-#define PALMAS_GPADC_RT_CONV0_LSB_RT_CONV0_LSB_SHIFT		0
+#define PALMAS_GPADC_RT_CONV0_LSB_RT_CONV0_LSB_MASK		0xFF
+#define PALMAS_GPADC_RT_CONV0_LSB_RT_CONV0_LSB_SHIFT		0x00
 
 /* Bit definitions for GPADC_RT_CONV0_MSB */
-#define PALMAS_GPADC_RT_CONV0_MSB_RT_CONV0_MSB_MASK		0x0f
-#define PALMAS_GPADC_RT_CONV0_MSB_RT_CONV0_MSB_SHIFT		0
+#define PALMAS_GPADC_RT_CONV0_MSB_RT_CONV0_MSB_MASK		0x0F
+#define PALMAS_GPADC_RT_CONV0_MSB_RT_CONV0_MSB_SHIFT		0x00
 
 /* Bit definitions for GPADC_AUTO_SELECT */
-#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV1_SEL_MASK		0xf0
-#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV1_SEL_SHIFT		4
-#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV0_SEL_MASK		0x0f
-#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV0_SEL_SHIFT		0
+#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV1_SEL_MASK		0xF0
+#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV1_SEL_SHIFT		0x04
+#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV0_SEL_MASK		0x0F
+#define PALMAS_GPADC_AUTO_SELECT_AUTO_CONV0_SEL_SHIFT		0x00
 
 /* Bit definitions for GPADC_AUTO_CONV0_LSB */
-#define PALMAS_GPADC_AUTO_CONV0_LSB_AUTO_CONV0_LSB_MASK		0xff
-#define PALMAS_GPADC_AUTO_CONV0_LSB_AUTO_CONV0_LSB_SHIFT	0
+#define PALMAS_GPADC_AUTO_CONV0_LSB_AUTO_CONV0_LSB_MASK		0xFF
+#define PALMAS_GPADC_AUTO_CONV0_LSB_AUTO_CONV0_LSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_AUTO_CONV0_MSB */
-#define PALMAS_GPADC_AUTO_CONV0_MSB_AUTO_CONV0_MSB_MASK		0x0f
-#define PALMAS_GPADC_AUTO_CONV0_MSB_AUTO_CONV0_MSB_SHIFT	0
+#define PALMAS_GPADC_AUTO_CONV0_MSB_AUTO_CONV0_MSB_MASK		0x0F
+#define PALMAS_GPADC_AUTO_CONV0_MSB_AUTO_CONV0_MSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_AUTO_CONV1_LSB */
-#define PALMAS_GPADC_AUTO_CONV1_LSB_AUTO_CONV1_LSB_MASK		0xff
-#define PALMAS_GPADC_AUTO_CONV1_LSB_AUTO_CONV1_LSB_SHIFT	0
+#define PALMAS_GPADC_AUTO_CONV1_LSB_AUTO_CONV1_LSB_MASK		0xFF
+#define PALMAS_GPADC_AUTO_CONV1_LSB_AUTO_CONV1_LSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_AUTO_CONV1_MSB */
-#define PALMAS_GPADC_AUTO_CONV1_MSB_AUTO_CONV1_MSB_MASK		0x0f
-#define PALMAS_GPADC_AUTO_CONV1_MSB_AUTO_CONV1_MSB_SHIFT	0
+#define PALMAS_GPADC_AUTO_CONV1_MSB_AUTO_CONV1_MSB_MASK		0x0F
+#define PALMAS_GPADC_AUTO_CONV1_MSB_AUTO_CONV1_MSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_SW_SELECT */
 #define PALMAS_GPADC_SW_SELECT_SW_CONV_EN			0x80
-#define PALMAS_GPADC_SW_SELECT_SW_CONV_EN_SHIFT			7
+#define PALMAS_GPADC_SW_SELECT_SW_CONV_EN_SHIFT			0x07
 #define PALMAS_GPADC_SW_SELECT_SW_START_CONV0			0x10
-#define PALMAS_GPADC_SW_SELECT_SW_START_CONV0_SHIFT		4
-#define PALMAS_GPADC_SW_SELECT_SW_CONV0_SEL_MASK		0x0f
-#define PALMAS_GPADC_SW_SELECT_SW_CONV0_SEL_SHIFT		0
+#define PALMAS_GPADC_SW_SELECT_SW_START_CONV0_SHIFT		0x04
+#define PALMAS_GPADC_SW_SELECT_SW_CONV0_SEL_MASK		0x0F
+#define PALMAS_GPADC_SW_SELECT_SW_CONV0_SEL_SHIFT		0x00
 
 /* Bit definitions for GPADC_SW_CONV0_LSB */
-#define PALMAS_GPADC_SW_CONV0_LSB_SW_CONV0_LSB_MASK		0xff
-#define PALMAS_GPADC_SW_CONV0_LSB_SW_CONV0_LSB_SHIFT		0
+#define PALMAS_GPADC_SW_CONV0_LSB_SW_CONV0_LSB_MASK		0xFF
+#define PALMAS_GPADC_SW_CONV0_LSB_SW_CONV0_LSB_SHIFT		0x00
 
 /* Bit definitions for GPADC_SW_CONV0_MSB */
-#define PALMAS_GPADC_SW_CONV0_MSB_SW_CONV0_MSB_MASK		0x0f
-#define PALMAS_GPADC_SW_CONV0_MSB_SW_CONV0_MSB_SHIFT		0
+#define PALMAS_GPADC_SW_CONV0_MSB_SW_CONV0_MSB_MASK		0x0F
+#define PALMAS_GPADC_SW_CONV0_MSB_SW_CONV0_MSB_SHIFT		0x00
 
 /* Bit definitions for GPADC_THRES_CONV0_LSB */
-#define PALMAS_GPADC_THRES_CONV0_LSB_THRES_CONV0_LSB_MASK	0xff
-#define PALMAS_GPADC_THRES_CONV0_LSB_THRES_CONV0_LSB_SHIFT	0
+#define PALMAS_GPADC_THRES_CONV0_LSB_THRES_CONV0_LSB_MASK	0xFF
+#define PALMAS_GPADC_THRES_CONV0_LSB_THRES_CONV0_LSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_THRES_CONV0_MSB */
 #define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_POL		0x80
-#define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_POL_SHIFT	7
-#define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_MSB_MASK	0x0f
-#define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_MSB_SHIFT	0
+#define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_POL_SHIFT	0x07
+#define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_MSB_MASK	0x0F
+#define PALMAS_GPADC_THRES_CONV0_MSB_THRES_CONV0_MSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_THRES_CONV1_LSB */
-#define PALMAS_GPADC_THRES_CONV1_LSB_THRES_CONV1_LSB_MASK	0xff
-#define PALMAS_GPADC_THRES_CONV1_LSB_THRES_CONV1_LSB_SHIFT	0
+#define PALMAS_GPADC_THRES_CONV1_LSB_THRES_CONV1_LSB_MASK	0xFF
+#define PALMAS_GPADC_THRES_CONV1_LSB_THRES_CONV1_LSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_THRES_CONV1_MSB */
 #define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_POL		0x80
-#define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_POL_SHIFT	7
-#define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_MSB_MASK	0x0f
-#define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_MSB_SHIFT	0
+#define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_POL_SHIFT	0x07
+#define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_MSB_MASK	0x0F
+#define PALMAS_GPADC_THRES_CONV1_MSB_THRES_CONV1_MSB_SHIFT	0x00
 
 /* Bit definitions for GPADC_SMPS_ILMONITOR_EN */
 #define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_EN		0x20
-#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_EN_SHIFT	5
+#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_EN_SHIFT	0x05
 #define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_REXT		0x10
-#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_REXT_SHIFT	4
-#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_SEL_MASK	0x0f
-#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_SEL_SHIFT	0
+#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_REXT_SHIFT	0x04
+#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_SEL_MASK	0x0F
+#define PALMAS_GPADC_SMPS_ILMONITOR_EN_SMPS_ILMON_SEL_SHIFT	0x00
 
 /* Bit definitions for GPADC_SMPS_VSEL_MONITORING */
 #define PALMAS_GPADC_SMPS_VSEL_MONITORING_ACTIVE_PHASE		0x80
-#define PALMAS_GPADC_SMPS_VSEL_MONITORING_ACTIVE_PHASE_SHIFT	7
-#define PALMAS_GPADC_SMPS_VSEL_MONITORING_SMPS_VSEL_MONITORING_MASK	0x7f
-#define PALMAS_GPADC_SMPS_VSEL_MONITORING_SMPS_VSEL_MONITORING_SHIFT	0
+#define PALMAS_GPADC_SMPS_VSEL_MONITORING_ACTIVE_PHASE_SHIFT	0x07
+#define PALMAS_GPADC_SMPS_VSEL_MONITORING_SMPS_VSEL_MONITORING_MASK	0x7F
+#define PALMAS_GPADC_SMPS_VSEL_MONITORING_SMPS_VSEL_MONITORING_SHIFT	0x00
 
 /* Registers for function GPADC */
-#define PALMAS_GPADC_TRIM1					0x0
-#define PALMAS_GPADC_TRIM2					0x1
-#define PALMAS_GPADC_TRIM3					0x2
-#define PALMAS_GPADC_TRIM4					0x3
-#define PALMAS_GPADC_TRIM5					0x4
-#define PALMAS_GPADC_TRIM6					0x5
-#define PALMAS_GPADC_TRIM7					0x6
-#define PALMAS_GPADC_TRIM8					0x7
-#define PALMAS_GPADC_TRIM9					0x8
-#define PALMAS_GPADC_TRIM10					0x9
-#define PALMAS_GPADC_TRIM11					0xA
-#define PALMAS_GPADC_TRIM12					0xB
-#define PALMAS_GPADC_TRIM13					0xC
-#define PALMAS_GPADC_TRIM14					0xD
-#define PALMAS_GPADC_TRIM15					0xE
-#define PALMAS_GPADC_TRIM16					0xF
+#define PALMAS_GPADC_TRIM1					0x00
+#define PALMAS_GPADC_TRIM2					0x01
+#define PALMAS_GPADC_TRIM3					0x02
+#define PALMAS_GPADC_TRIM4					0x03
+#define PALMAS_GPADC_TRIM5					0x04
+#define PALMAS_GPADC_TRIM6					0x05
+#define PALMAS_GPADC_TRIM7					0x06
+#define PALMAS_GPADC_TRIM8					0x07
+#define PALMAS_GPADC_TRIM9					0x08
+#define PALMAS_GPADC_TRIM10					0x09
+#define PALMAS_GPADC_TRIM11					0x0A
+#define PALMAS_GPADC_TRIM12					0x0B
+#define PALMAS_GPADC_TRIM13					0x0C
+#define PALMAS_GPADC_TRIM14					0x0D
+#define PALMAS_GPADC_TRIM15					0x0E
+#define PALMAS_GPADC_TRIM16					0x0F
 
 static inline int palmas_read(struct palmas *palmas, unsigned int base,
 		unsigned int reg, unsigned int *val)
 {
-	unsigned int addr =  PALMAS_BASE_TO_REG(base, reg);
+	unsigned int addr = PALMAS_BASE_TO_REG(base, reg);
 	int slave_id = PALMAS_BASE_TO_SLAVE(base);
 
 	return regmap_read(palmas->regmap[slave_id], addr, val);
-- 
cgit 


From 368c96640d10a145da5f258f2d2833668d4f3629 Mon Sep 17 00:00:00 2001
From: Ping Cheng <pinglinux@gmail.com>
Date: Mon, 2 Jun 2014 17:19:52 -0700
Subject: HID: core: add two new usages for digitizer

On Feb 17, 2014, two new usages are approved to HID usage Table 18 -
Digitizer Page:

5A	Secondary Barrel Switch		MC	16.4
5B	Transducer Serial Number	SV	16.3.1

This patch adds relevant definitions to hid/input. It also removes
outdated comments in hid.h.

Signed-off-by: Ping Cheng <pingc@wacom.com>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-debug.c | 2 ++
 drivers/hid/hid-input.c | 5 +++++
 include/linux/hid.h     | 7 ++-----
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index c2537df10f47..84c3cb15ccdd 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -165,6 +165,8 @@ static const struct hid_usage_entry hid_usage_table[] = {
     {0, 0x53, "DeviceIndex"},
     {0, 0x54, "ContactCount"},
     {0, 0x55, "ContactMaximumNumber"},
+    {0, 0x5A, "SecondaryBarrelSwitch"},
+    {0, 0x5B, "TransducerSerialNumber"},
   { 15, 0, "PhysicalInterfaceDevice" },
     {0, 0x00, "Undefined"},
     {0, 0x01, "Physical_Interface_Device"},
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 9f2076acffb1..2619f7f4517a 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -684,9 +684,14 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 			break;
 
 		case 0x46: /* TabletPick */
+		case 0x5a: /* SecondaryBarrelSwitch */
 			map_key_clear(BTN_STYLUS2);
 			break;
 
+		case 0x5b: /* TransducerSerialNumber */
+			set_bit(MSC_SERIAL, input->mscbit);
+			break;
+
 		default:  goto unknown;
 		}
 		break;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 720e3a10608c..a468ec8cc4fe 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -233,11 +233,6 @@ struct hid_item {
 #define HID_DG_BARRELSWITCH	0x000d0044
 #define HID_DG_ERASER		0x000d0045
 #define HID_DG_TABLETPICK	0x000d0046
-/*
- * as of May 20, 2009 the usages below are not yet in the official USB spec
- * but are being pushed by Microsft as described in their paper "Digitizer
- * Drivers for Windows Touch and Pen-Based Computers"
- */
 #define HID_DG_CONFIDENCE	0x000d0047
 #define HID_DG_WIDTH		0x000d0048
 #define HID_DG_HEIGHT		0x000d0049
@@ -246,6 +241,8 @@ struct hid_item {
 #define HID_DG_DEVICEINDEX	0x000d0053
 #define HID_DG_CONTACTCOUNT	0x000d0054
 #define HID_DG_CONTACTMAX	0x000d0055
+#define HID_DG_BARRELSWITCH2	0x000d005a
+#define HID_DG_TOOLSERIALNUMBER	0x000d005b
 
 /*
  * HID report types --- Ouch! HID spec says 1 2 3!
-- 
cgit 


From c9482a5bdcc09be9096f40e858c5fe39c389cd52 Mon Sep 17 00:00:00 2001
From: Jianyu Zhan <nasa4836@gmail.com>
Date: Sat, 26 Apr 2014 15:40:28 +0800
Subject: kernfs: move the last knowledge of sysfs out from kernfs

There is still one residue of sysfs remaining: the sb_magic
SYSFS_MAGIC. However this should be kernfs user specific,
so this patch moves it out. Kerrnfs user should specify their
magic number while mouting.

Signed-off-by: Jianyu Zhan <nasa4836@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/kernfs/mount.c      | 11 ++++++-----
 fs/sysfs/mount.c       |  4 +++-
 include/linux/kernfs.h | 13 ++++++++-----
 kernel/cgroup.c        |  4 +++-
 4 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 6a5f04ac8704..95dcd1d558bb 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -62,7 +62,7 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
 	return NULL;
 }
 
-static int kernfs_fill_super(struct super_block *sb)
+static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
 {
 	struct kernfs_super_info *info = kernfs_info(sb);
 	struct inode *inode;
@@ -70,7 +70,7 @@ static int kernfs_fill_super(struct super_block *sb)
 
 	sb->s_blocksize = PAGE_CACHE_SIZE;
 	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-	sb->s_magic = SYSFS_MAGIC;
+	sb->s_magic = magic;
 	sb->s_op = &kernfs_sops;
 	sb->s_time_gran = 1;
 
@@ -131,6 +131,7 @@ const void *kernfs_super_ns(struct super_block *sb)
  * @fs_type: file_system_type of the fs being mounted
  * @flags: mount flags specified for the mount
  * @root: kernfs_root of the hierarchy being mounted
+ * @magic: file system specific magic number
  * @new_sb_created: tell the caller if we allocated a new superblock
  * @ns: optional namespace tag of the mount
  *
@@ -142,8 +143,8 @@ const void *kernfs_super_ns(struct super_block *sb)
  * The return value can be passed to the vfs layer verbatim.
  */
 struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, bool *new_sb_created,
-			       const void *ns)
+				struct kernfs_root *root, unsigned long magic,
+				bool *new_sb_created, const void *ns)
 {
 	struct super_block *sb;
 	struct kernfs_super_info *info;
@@ -166,7 +167,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 		*new_sb_created = !sb->s_root;
 
 	if (!sb->s_root) {
-		error = kernfs_fill_super(sb);
+		error = kernfs_fill_super(sb, magic);
 		if (error) {
 			deactivate_locked_super(sb);
 			return ERR_PTR(error);
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 8794423f7efb..8a49486bf30c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -13,6 +13,7 @@
 #define DEBUG
 
 #include <linux/fs.h>
+#include <linux/magic.h>
 #include <linux/mount.h>
 #include <linux/init.h>
 #include <linux/user_namespace.h>
@@ -38,7 +39,8 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type,
 	}
 
 	ns = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
-	root = kernfs_mount_ns(fs_type, flags, sysfs_root, &new_sb, ns);
+	root = kernfs_mount_ns(fs_type, flags, sysfs_root,
+				SYSFS_MAGIC, &new_sb, ns);
 	if (IS_ERR(root) || !new_sb)
 		kobj_ns_drop(KOBJ_NS_TYPE_NET, ns);
 	return root;
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index ca1be5c9136c..52bf5677db0b 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -297,8 +297,8 @@ void kernfs_notify(struct kernfs_node *kn);
 
 const void *kernfs_super_ns(struct super_block *sb);
 struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-			       struct kernfs_root *root, bool *new_sb_created,
-			       const void *ns);
+			       struct kernfs_root *root, unsigned long magic,
+			       bool *new_sb_created, const void *ns);
 void kernfs_kill_sb(struct super_block *sb);
 
 void kernfs_init(void);
@@ -391,7 +391,8 @@ static inline const void *kernfs_super_ns(struct super_block *sb)
 
 static inline struct dentry *
 kernfs_mount_ns(struct file_system_type *fs_type, int flags,
-		struct kernfs_root *root, bool *new_sb_created, const void *ns)
+		struct kernfs_root *root, unsigned long magic,
+		bool *new_sb_created, const void *ns)
 { return ERR_PTR(-ENOSYS); }
 
 static inline void kernfs_kill_sb(struct super_block *sb) { }
@@ -449,9 +450,11 @@ static inline int kernfs_rename(struct kernfs_node *kn,
 
 static inline struct dentry *
 kernfs_mount(struct file_system_type *fs_type, int flags,
-	     struct kernfs_root *root, bool *new_sb_created)
+		struct kernfs_root *root, unsigned long magic,
+		bool *new_sb_created)
 {
-	return kernfs_mount_ns(fs_type, flags, root, new_sb_created, NULL);
+	return kernfs_mount_ns(fs_type, flags, root,
+				magic, new_sb_created, NULL);
 }
 
 #endif	/* __LINUX_KERNFS_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3f1ca934a237..ceee0c54c6a4 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -33,6 +33,7 @@
 #include <linux/init_task.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/magic.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/mount.h>
@@ -1604,7 +1605,8 @@ out_unlock:
 	if (ret)
 		return ERR_PTR(ret);
 
-	dentry = kernfs_mount(fs_type, flags, root->kf_root, &new_sb);
+	dentry = kernfs_mount(fs_type, flags, root->kf_root,
+				CGROUP_SUPER_MAGIC, &new_sb);
 	if (IS_ERR(dentry) || !new_sb)
 		cgroup_put(&root->cgrp);
 	return dentry;
-- 
cgit 


From 1dd40c3ecd9b8a4ab91dbf2e6ce10b82a3b5ae63 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 14 Mar 2014 18:41:24 -0400
Subject: dm: introduce dm_accept_partial_bio

The function dm_accept_partial_bio allows the target to specify how many
sectors of the current bio it will process.  If the target only wants to
accept part of the bio, it calls dm_accept_partial_bio and the DM core
sends the rest of the data in next bio.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c               | 59 +++++++++++++++++++++++++++++++++++++------
 include/linux/device-mapper.h |  2 ++
 2 files changed, 53 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 368a20dd85c2..97940fc8c302 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1110,6 +1110,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 }
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
+/*
+ * A target may call dm_accept_partial_bio only from the map routine.  It is
+ * allowed for all bio types except REQ_FLUSH.
+ *
+ * dm_accept_partial_bio informs the dm that the target only wants to process
+ * additional n_sectors sectors of the bio and the rest of the data should be
+ * sent in a next bio.
+ *
+ * A diagram that explains the arithmetics:
+ * +--------------------+---------------+-------+
+ * |         1          |       2       |   3   |
+ * +--------------------+---------------+-------+
+ *
+ * <-------------- *tio->len_ptr --------------->
+ *                      <------- bi_size ------->
+ *                      <-- n_sectors -->
+ *
+ * Region 1 was already iterated over with bio_advance or similar function.
+ *	(it may be empty if the target doesn't use bio_advance)
+ * Region 2 is the remaining bio size that the target wants to process.
+ *	(it may be empty if region 1 is non-empty, although there is no reason
+ *	 to make it empty)
+ * The target requires that region 3 is to be sent in the next bio.
+ *
+ * If the target wants to receive multiple copies of the bio (via num_*bios, etc),
+ * the partially processed part (the sum of regions 1+2) must be the same for all
+ * copies of the bio.
+ */
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
+{
+	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
+	unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
+	BUG_ON(bio->bi_rw & REQ_FLUSH);
+	BUG_ON(bi_size > *tio->len_ptr);
+	BUG_ON(n_sectors > bi_size);
+	*tio->len_ptr -= bi_size - n_sectors;
+	bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT;
+}
+EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
+
 static void __map_bio(struct dm_target_io *tio)
 {
 	int r;
@@ -1200,11 +1240,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci,
 
 static void __clone_and_map_simple_bio(struct clone_info *ci,
 				       struct dm_target *ti,
-				       unsigned target_bio_nr, unsigned len)
+				       unsigned target_bio_nr, unsigned *len)
 {
 	struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr);
 	struct bio *clone = &tio->clone;
 
+	tio->len_ptr = len;
+
 	/*
 	 * Discard requests require the bio's inline iovecs be initialized.
 	 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
@@ -1212,13 +1254,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci,
 	 */
 	 __bio_clone_fast(clone, ci->bio);
 	if (len)
-		bio_setup_sector(clone, ci->sector, len);
+		bio_setup_sector(clone, ci->sector, *len);
 
 	__map_bio(tio);
 }
 
 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
-				  unsigned num_bios, unsigned len)
+				  unsigned num_bios, unsigned *len)
 {
 	unsigned target_bio_nr;
 
@@ -1233,13 +1275,13 @@ static int __send_empty_flush(struct clone_info *ci)
 
 	BUG_ON(bio_has_data(ci->bio));
 	while ((ti = dm_table_get_target(ci->map, target_nr++)))
-		__send_duplicate_bios(ci, ti, ti->num_flush_bios, 0);
+		__send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
 
 	return 0;
 }
 
 static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
-				     sector_t sector, unsigned len)
+				     sector_t sector, unsigned *len)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
@@ -1254,7 +1296,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
 
 	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
 		tio = alloc_tio(ci, ti, 0, target_bio_nr);
-		clone_bio(tio, bio, sector, len);
+		tio->len_ptr = len;
+		clone_bio(tio, bio, sector, *len);
 		__map_bio(tio);
 	}
 }
@@ -1306,7 +1349,7 @@ static int __send_changing_extent_only(struct clone_info *ci,
 		else
 			len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
 
-		__send_duplicate_bios(ci, ti, num_bios, len);
+		__send_duplicate_bios(ci, ti, num_bios, &len);
 
 		ci->sector += len;
 	} while (ci->sector_count -= len);
@@ -1345,7 +1388,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 
 	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
-	__clone_and_map_data_bio(ci, ti, ci->sector, len);
+	__clone_and_map_data_bio(ci, ti, ci->sector, &len);
 
 	ci->sector += len;
 	ci->sector_count -= len;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 63da56ed9796..0adca299f238 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -291,6 +291,7 @@ struct dm_target_io {
 	struct dm_io *io;
 	struct dm_target *ti;
 	unsigned target_bio_nr;
+	unsigned *len_ptr;
 	struct bio clone;
 };
 
@@ -401,6 +402,7 @@ int dm_copy_name_and_uuid(struct mapped_device *md, char *name, char *uuid);
 struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
+void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
-- 
cgit 


From bd67608a6127c994e897c49cc4f72d9095925301 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Tue, 3 Jun 2014 23:04:30 -0400
Subject: NVMe: Rename io_timeout to nvme_io_timeout

It's positively immoral to have a global variable called 'io_timeout'.
Keep the module parameter called io_timeout, though.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 4 ++--
 include/linux/nvme.h      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index bb6ce311ad44..2af079e571fc 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -54,8 +54,8 @@ static unsigned char admin_timeout = 60;
 module_param(admin_timeout, byte, 0644);
 MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
 
-unsigned char io_timeout = 30;
-module_param(io_timeout, byte, 0644);
+unsigned char nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
 MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
 
 static unsigned char retry_time = 30;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1813cfdb7e80..8541dd920bb7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -62,8 +62,8 @@ enum {
 
 #define NVME_VS(major, minor)	(major << 16 | minor)
 
-extern unsigned char io_timeout;
-#define NVME_IO_TIMEOUT	(io_timeout * HZ)
+extern unsigned char nvme_io_timeout;
+#define NVME_IO_TIMEOUT	(nvme_io_timeout * HZ)
 
 /*
  * Represents an NVM Express device.  Each nvme_dev is a PCI function.
-- 
cgit 


From e6cdb0929fe6726ba5203fc5529b74564d98a9e9 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Tue, 3 Jun 2014 11:24:06 +0800
Subject: blk-mq: fix sparse warning on missed __percpu annotation

'struct blk_mq_ctx' is  __percpu, so add the annotation
and fix the sparse warning reported from Fengguang:

	[block:for-linus 2/3] block/blk-mq.h:75:16: sparse: incorrect
	type in initializer (different address spaces)

Reported-by: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 2 +-
 include/linux/blkdev.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 43eb3156e110..3bb4cfec276b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1767,7 +1767,7 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 {
 	struct blk_mq_hw_ctx **hctxs;
-	struct blk_mq_ctx *ctx;
+	struct blk_mq_ctx __percpu *ctx;
 	struct request_queue *q;
 	unsigned int *map;
 	int i;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8aba35f46f87..5c6f836afa1b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -335,7 +335,7 @@ struct request_queue {
 	unsigned int		*mq_map;
 
 	/* sw queues */
-	struct blk_mq_ctx	*queue_ctx;
+	struct blk_mq_ctx __percpu	*queue_ctx;
 	unsigned int		nr_queues;
 
 	/* hw dispatch queues */
-- 
cgit 


From bfec07d0f8ed78b10df3ca3bc23e27de1166ea45 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Wed, 28 May 2014 08:56:09 +0800
Subject: f2fs: avoid overflow when large directory feathure is enabled

When large directory feathure is enable, We have one case which could cause
overflow in dir_buckets() as following:
special case: level + dir_level >= 32 and level < MAX_DIR_HASH_DEPTH / 2.

Here we define MAX_DIR_BUCKETS to limit the return value when the condition
could trigger potential overflow.

Changes from V1
 o modify description of calculation in f2fs.txt suggested by Changman Lee.

Suggested-by: Changman Lee <cm224.lee@samsung.com>
Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt | 8 ++++----
 fs/f2fs/dir.c                      | 4 ++--
 include/linux/f2fs_fs.h            | 3 +++
 3 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 25311e113e75..51afba17bbae 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -461,11 +461,11 @@ The number of blocks and buckets are determined by,
   # of blocks in level #n = |
                             `- 4, Otherwise
 
-                             ,- 2^ (n + dir_level),
-			     |            if n < MAX_DIR_HASH_DEPTH / 2,
+                             ,- 2^(n + dir_level),
+			     |        if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
   # of buckets in level #n = |
-                             `- 2^((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1),
-			                  Otherwise
+                             `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+			              Otherwise
 
 When F2FS finds a file name in a directory, at first a hash value of the file
 name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index c3f148555c37..966acb039e3b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -23,10 +23,10 @@ static unsigned long dir_blocks(struct inode *inode)
 
 static unsigned int dir_buckets(unsigned int level, int dir_level)
 {
-	if (level < MAX_DIR_HASH_DEPTH / 2)
+	if (level + dir_level < MAX_DIR_HASH_DEPTH / 2)
 		return 1 << (level + dir_level);
 	else
-		return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1);
+		return MAX_DIR_BUCKETS;
 }
 
 static unsigned int bucket_blocks(unsigned int level)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 8c03f71307c6..ba6f3127738f 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -394,6 +394,9 @@ typedef __le32	f2fs_hash_t;
 /* MAX level for dir lookup */
 #define MAX_DIR_HASH_DEPTH	63
 
+/* MAX buckets in one level of dir */
+#define MAX_DIR_BUCKETS		(1 << ((MAX_DIR_HASH_DEPTH / 2) - 1))
+
 #define SIZE_OF_DIR_ENTRY	11	/* by byte */
 #define SIZE_OF_DENTRY_BITMAP	((NR_DENTRY_IN_BLOCK + BITS_PER_BYTE - 1) / \
 					BITS_PER_BYTE)
-- 
cgit 


From 1dbe4152168d44fa164edbdc9f1243de70b98f7a Mon Sep 17 00:00:00 2001
From: Changman Lee <cm224.lee@samsung.com>
Date: Mon, 12 May 2014 12:27:43 +0900
Subject: f2fs: large volume support

f2fs's cp has one page which consists of struct f2fs_checkpoint and
version bitmap of sit and nat. To support lots of segments, we need more
blocks for sit bitmap. So let's arrange sit bitmap as following:
+-----------------+------------+
| f2fs_checkpoint | sit bitmap |
| + nat bitmap    |            |
+-----------------+------------+
0                 4k        N blocks

Signed-off-by: Changman Lee <cm224.lee@samsung.com>
[Jaegeuk Kim: simple code change for readability]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    | 45 ++++++++++++++++++++++++++++++++++++++++-----
 fs/f2fs/f2fs.h          | 13 +++++++++++--
 include/linux/f2fs_fs.h |  2 ++
 3 files changed, 53 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index fe968c7bfc90..ecba8da3308b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -371,7 +371,9 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi)
 		return;
 
 	sbi->por_doing = true;
-	start_blk = __start_cp_addr(sbi) + 1;
+
+	start_blk = __start_cp_addr(sbi) + 1 +
+		le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
 	orphan_blkaddr = __start_sum_addr(sbi) - 1;
 
 	ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
@@ -512,8 +514,11 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 	unsigned long blk_size = sbi->blocksize;
 	unsigned long long cp1_version = 0, cp2_version = 0;
 	unsigned long long cp_start_blk_no;
+	unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
+	block_t cp_blk_no;
+	int i;
 
-	sbi->ckpt = kzalloc(blk_size, GFP_KERNEL);
+	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
 	if (!sbi->ckpt)
 		return -ENOMEM;
 	/*
@@ -544,6 +549,23 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
 	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
 	memcpy(sbi->ckpt, cp_block, blk_size);
 
+	if (cp_blks <= 1)
+		goto done;
+
+	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
+	if (cur_page == cp2)
+		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
+
+	for (i = 1; i < cp_blks; i++) {
+		void *sit_bitmap_ptr;
+		unsigned char *ckpt = (unsigned char *)sbi->ckpt;
+
+		cur_page = get_meta_page(sbi, cp_blk_no + i);
+		sit_bitmap_ptr = page_address(cur_page);
+		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
+		f2fs_put_page(cur_page, 1);
+	}
+done:
 	f2fs_put_page(cp1, 1);
 	f2fs_put_page(cp2, 1);
 	return 0;
@@ -736,6 +758,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	__u32 crc32 = 0;
 	void *kaddr;
 	int i;
+	int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
 
 	/*
 	 * This avoids to conduct wrong roll-forward operations and uses
@@ -786,16 +809,19 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 
 	orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
 					/ F2FS_ORPHANS_PER_BLOCK;
-	ckpt->cp_pack_start_sum = cpu_to_le32(1 + orphan_blocks);
+	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
+			orphan_blocks);
 
 	if (is_umount) {
 		set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
-			data_sum_blocks + orphan_blocks + NR_CURSEG_NODE_TYPE);
+				cp_payload_blks + data_sum_blocks +
+				orphan_blocks + NR_CURSEG_NODE_TYPE);
 	} else {
 		clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 		ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
-			data_sum_blocks + orphan_blocks);
+				cp_payload_blks + data_sum_blocks +
+				orphan_blocks);
 	}
 
 	if (sbi->n_orphans)
@@ -821,6 +847,15 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	set_page_dirty(cp_page);
 	f2fs_put_page(cp_page, 1);
 
+	for (i = 1; i < 1 + cp_payload_blks; i++) {
+		cp_page = grab_meta_page(sbi, start_blk++);
+		kaddr = page_address(cp_page);
+		memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
+				(1 << sbi->log_blocksize));
+		set_page_dirty(cp_page);
+		f2fs_put_page(cp_page, 1);
+	}
+
 	if (sbi->n_orphans) {
 		write_orphan_inodes(sbi, start_blk);
 		start_blk += orphan_blocks;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 676a2c6ccec7..9684b1f77a7d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -764,9 +764,18 @@ static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag)
 static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
-	int offset = (flag == NAT_BITMAP) ?
+	int offset;
+
+	if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload) > 0) {
+		if (flag == NAT_BITMAP)
+			return &ckpt->sit_nat_version_bitmap;
+		else
+			return ((unsigned char *)ckpt + F2FS_BLKSIZE);
+	} else {
+		offset = (flag == NAT_BITMAP) ?
 			le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
-	return &ckpt->sit_nat_version_bitmap + offset;
+		return &ckpt->sit_nat_version_bitmap + offset;
+	}
 }
 
 static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi)
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index ba6f3127738f..6ff0b0b42d47 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -19,6 +19,7 @@
 #define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
 #define F2FS_BLKSIZE			4096	/* support only 4KB block */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
+#define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
 
 #define NULL_ADDR		((block_t)0)	/* used as block_t addresses */
 #define NEW_ADDR		((block_t)-1)	/* used as block_t addresses */
@@ -75,6 +76,7 @@ struct f2fs_super_block {
 	__le16 volume_name[512];	/* volume name */
 	__le32 extension_count;		/* # of extensions below */
 	__u8 extension_list[F2FS_MAX_EXTENSION][8];	/* extension array */
+	__le32 cp_payload;
 } __packed;
 
 /*
-- 
cgit 


From 64c5c759084e153272eb05f4103de3e0adf5a88a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 4 Jun 2014 04:40:19 -0500
Subject: of/irq: provide more wrappers for !CONFIG_OF

The pci-rcar driver is enabled for compile tests, and this has
now shown that the driver cannot build without CONFIG_OF,
following the inclusion of f8f2fe7355fb "PCI: rcar: Use new OF
interrupt mapping when possible":

drivers/built-in.o: In function `rcar_pci_map_irq':
:(.text+0x1cc7c): undefined reference to `of_irq_parse_and_map_pci'
pci/host/pcie-rcar.c: In function 'pci_dma_range_parser_init':
pci/host/pcie-rcar.c:875:2: error: implicit declaration of function 'of_n_addr_cells' [-Werror=implicit-function-declaration]

As pointed out by Ben Dooks and Geert Uytterhoeven, this is actually
supposed to build fine, which we can achieve if we make the
declaration of of_irq_parse_and_map_pci conditional on CONFIG_OF
and provide an empty inline function otherwise, as we do for
a lot of other of interfaces.

This lets us build the rcar_pci driver again without CONFIG_OF
for build testing. All platforms using this driver select OF,
so this doesn't change anything for the users.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: devicetree@vger.kernel.org
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Magnus Damm <damm@opensource.se>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ben Dooks <ben.dooks@codethink.co.uk>
Cc: linux-pci@vger.kernel.org
Cc: linux-sh@vger.kernel.org
[robh: drop wrappers for of_n_addr_cells and of_n_size_cells which are
low-level functions that should not be used for !OF]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of_pci.h | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 1a1f5ffd5288..dde3a4a0fa5d 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -6,14 +6,44 @@
 
 struct pci_dev;
 struct of_phandle_args;
-int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
-int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
-
 struct device_node;
+
+#ifdef CONFIG_OF
+int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq);
 struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
 int of_pci_get_devfn(struct device_node *np);
+int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
+#else
+static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
+{
+	return 0;
+}
+
+static inline struct device_node *of_pci_find_child_device(struct device_node *parent,
+					     unsigned int devfn)
+{
+	return NULL;
+}
+
+static inline int of_pci_get_devfn(struct device_node *np)
+{
+	return -EINVAL;
+}
+
+static inline int
+of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+	return 0;
+}
+
+static inline int
+of_pci_parse_bus_range(struct device_node *node, struct resource *res)
+{
+	return -EINVAL;
+}
+#endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
 int of_pci_msi_chip_add(struct msi_chip *chip);
-- 
cgit 


From 4f4cf5ad6fc1b16dc8dc9d750bb80b35eba5e98d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 28 May 2014 10:34:49 -0400
Subject: SUNRPC: Move congestion window constants to header file

I would like to use one of the RPC client's congestion algorithm
constants in transport-specific code.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/xprt.h |  6 ++++++
 net/sunrpc/xprt.c           | 28 +++++++++-------------------
 2 files changed, 15 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3e5efb2b236e..5903d2c0ab4d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -24,6 +24,12 @@
 #define RPC_MAX_SLOT_TABLE_LIMIT	(65536U)
 #define RPC_MAX_SLOT_TABLE	RPC_MAX_SLOT_TABLE_LIMIT
 
+#define RPC_CWNDSHIFT		(8U)
+#define RPC_CWNDSCALE		(1U << RPC_CWNDSHIFT)
+#define RPC_INITCWND		RPC_CWNDSCALE
+#define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
+#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
+
 /*
  * This describes a timeout strategy
  */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d173f79947c6..2d1d5a643b95 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -71,24 +71,6 @@ static void	 xprt_destroy(struct rpc_xprt *xprt);
 static DEFINE_SPINLOCK(xprt_list_lock);
 static LIST_HEAD(xprt_list);
 
-/*
- * The transport code maintains an estimate on the maximum number of out-
- * standing RPC requests, using a smoothed version of the congestion
- * avoidance implemented in 44BSD. This is basically the Van Jacobson
- * congestion algorithm: If a retransmit occurs, the congestion window is
- * halved; otherwise, it is incremented by 1/cwnd when
- *
- *	-	a reply is received and
- *	-	a full number of requests are outstanding and
- *	-	the congestion window hasn't been updated recently.
- */
-#define RPC_CWNDSHIFT		(8U)
-#define RPC_CWNDSCALE		(1U << RPC_CWNDSHIFT)
-#define RPC_INITCWND		RPC_CWNDSCALE
-#define RPC_MAXCWND(xprt)	((xprt)->max_reqs << RPC_CWNDSHIFT)
-
-#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-
 /**
  * xprt_register_transport - register a transport implementation
  * @transport: transport to register
@@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
  * @task: recently completed RPC request used to adjust window
  * @result: result code of completed RPC request
  *
- * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ *	-	a reply is received and
+ *	-	a full number of requests are outstanding and
+ *	-	the congestion window hasn't been updated recently.
  */
 void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
 {
-- 
cgit 


From 11f0431be2f99c574a65c6dfc0ca205511500f29 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 3 Jun 2014 10:30:28 -0400
Subject: dm: remove symbol export for dm_set_device_limits

There is no need for code other than DM core to use dm_set_device_limits
so remove its EXPORT_SYMBOL_GPL.  Also, cleanup a couple whitespace nits.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c         | 5 ++---
 drivers/md/dm.c               | 1 -
 include/linux/device-mapper.h | 8 +-------
 3 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 50601ec7017a..5f59f1e3e5b1 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -465,8 +465,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
 }
 EXPORT_SYMBOL(dm_get_device);
 
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data)
+static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
+				sector_t start, sector_t len, void *data)
 {
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
@@ -499,7 +499,6 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
 					  (unsigned int) (PAGE_SIZE >> 9));
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dm_set_device_limits);
 
 /*
  * Decrement a device's use count and remove it if necessary.
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3234a753a80d..bf1a1eaad9a9 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1498,7 +1498,6 @@ static int dm_merge_bvec(struct request_queue *q,
 	 * just one page.
 	 */
 	else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
-
 		max_size = 0;
 
 out:
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0adca299f238..e1707de043ae 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -115,12 +115,6 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
 
 void dm_error(const char *message);
 
-/*
- * Combine device limits.
- */
-int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, sector_t len, void *data);
-
 struct dm_dev {
 	struct block_device *bdev;
 	fmode_t mode;
@@ -132,7 +126,7 @@ struct dm_dev {
  * are opened/closed correctly.
  */
 int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
-						 struct dm_dev **result);
+		  struct dm_dev **result);
 void dm_put_device(struct dm_target *ti, struct dm_dev *d);
 
 /*
-- 
cgit 


From b14903e10a06347234b387f7364f86aa07252d9f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 4 Jun 2014 16:46:00 +0200
Subject: regulator: add regulator_can_change_voltage stub

When CONFIG_REGULATOR is not set, we cannot call
regulator_can_change_voltage() from a device driver, which results
in a build error like

video/fbdev/omap2/dss/hdmi5.c: In function 'hdmi_init_regulator':
video/fbdev/omap2/dss/hdmi5.c:149:2: error: implicit declaration of function 'regulator_can_change_voltage' [-Werror=implicit-function-declaration]

even for drivers that don't require the regulator API normally.
Such a use was recently added in the omap2+ hdmi driver.

This avoids the problem by adding a static inline function
stub in the API header, as we have for most of the other
regulator functions as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Mark Brown <broonie@kernel.org>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/consumer.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index e530681bea70..d60b92a7fc25 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -391,6 +391,11 @@ static inline void regulator_bulk_free(int num_consumers,
 {
 }
 
+static inline int regulator_can_change_voltage(struct regulator *regulator)
+{
+	return 0;
+}
+
 static inline int regulator_set_voltage(struct regulator *regulator,
 					int min_uV, int max_uV)
 {
-- 
cgit 


From 0c36b390a546055b6815d4b93a2c9fed4d980ffb Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Wed, 4 Jun 2014 15:58:24 +0200
Subject: percpu-refcount: fix usage of this_cpu_ops

The percpu-refcount infrastructure uses the underscore variants of
this_cpu_ops in order to modify percpu reference counters.
(e.g. __this_cpu_inc()).

However the underscore variants do not atomically update the percpu
variable, instead they may be implemented using read-modify-write
semantics (more than one instruction).  Therefore it is only safe to
use the underscore variant if the context is always the same (process,
softirq, or hardirq). Otherwise it is possible to lose updates.

This problem is something that Sebastian has seen within the aio
subsystem which uses percpu refcounters both in process and softirq
context leading to reference counts that never dropped to zeroes; even
though the number of "get" and "put" calls matched.

Fix this by using the non-underscore this_cpu_ops variant which
provides correct per cpu atomic semantics and fixes the corrupted
reference counts.

Cc: Kent Overstreet <kmo@daterainc.com>
Cc: <stable@vger.kernel.org> # v3.11+
Reported-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
References: http://lkml.kernel.org/g/alpine.LFD.2.11.1406041540520.21183@denkbrett
---
 include/linux/percpu-refcount.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 95961f0bf62d..0afb48fd449d 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -110,7 +110,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
 
 	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
-		__this_cpu_inc(*pcpu_count);
+		this_cpu_inc(*pcpu_count);
 	else
 		atomic_inc(&ref->count);
 
@@ -139,7 +139,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
 
 	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) {
-		__this_cpu_inc(*pcpu_count);
+		this_cpu_inc(*pcpu_count);
 		ret = true;
 	}
 
@@ -164,7 +164,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
 	pcpu_count = ACCESS_ONCE(ref->pcpu_count);
 
 	if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
-		__this_cpu_dec(*pcpu_count);
+		this_cpu_dec(*pcpu_count);
 	else if (unlikely(atomic_dec_and_test(&ref->count)))
 		ref->release(ref);
 
-- 
cgit 


From 0e62f51f8753b048f391ee2d7f2af1f7297b0be5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 4 Jun 2014 10:23:49 -0600
Subject: blk-mq: let blk_mq_tag_to_rq() take blk_mq_tags as the main parameter

We currently pass in the hardware queue, and get the tags from there.
But from scsi-mq, with a shared tag space, it's a lot more convenient
to pass in the blk_mq_tags instead as the hardware queue isn't always
directly available. So instead of having to re-map to a given
hardware queue from rq->mq_ctx, just pass in the tags structure.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 19 ++++++++++++-------
 drivers/block/mtip32xx/mtip32xx.c |  4 +++-
 include/linux/blk-mq.h            |  2 +-
 3 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e8e8cf00815..4e4cd6208052 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -529,15 +529,20 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
+static inline bool is_flush_request(struct request *rq, unsigned int tag)
 {
-	struct request_queue *q = hctx->queue;
+	return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
+			rq->q->flush_rq->tag == tag);
+}
 
-	if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) &&
-	    q->flush_rq->tag == tag)
-		return q->flush_rq;
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+{
+	struct request *rq = tags->rqs[tag];
+
+	if (!is_flush_request(rq, tag))
+		return rq;
 
-	return hctx->tags->rqs[tag];
+	return rq->q->flush_rq;
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
@@ -566,7 +571,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
 		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = blk_mq_tag_to_rq(hctx, tag++);
+		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
 		if (rq->q != hctx->queue)
 			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index abc858b3528b..74abd49fabdc 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -193,7 +193,9 @@ static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd)
 static struct request *mtip_rq_from_tag(struct driver_data *dd,
 					unsigned int tag)
 {
-	return blk_mq_tag_to_rq(dd->queue->queue_hw_ctx[0], tag);
+	struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0];
+
+	return blk_mq_tag_to_rq(hctx->tags, tag);
 }
 
 static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c15128833100..0feedebfde48 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -155,7 +155,7 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
-struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
-- 
cgit 


From c177c81e09e517bbf75b67762cdab1b83aba6976 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 4 Jun 2014 16:05:35 -0700
Subject: hugetlb: restrict hugepage_migration_support() to x86_64

Currently hugepage migration is available for all archs which support
pmd-level hugepage, but testing is done only for x86_64 and there're
bugs for other archs.  So to avoid breaking such archs, this patch
limits the availability strictly to x86_64 until developers of other
archs get interested in enabling this feature.

Simply disabling hugepage migration on non-x86_64 archs is not enough to
fix the reported problem where sys_move_pages() hits the BUG_ON() in
follow_page(FOLL_GET), so let's fix this by checking if hugepage
migration is supported in vma_migratable().

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: James Hogan <james.hogan@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Miller <davem@davemloft.net>
Cc: <stable@vger.kernel.org>	[3.12+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/hugetlbpage.c     |  5 -----
 arch/arm64/mm/hugetlbpage.c   |  5 -----
 arch/ia64/mm/hugetlbpage.c    |  5 -----
 arch/metag/mm/hugetlbpage.c   |  5 -----
 arch/mips/mm/hugetlbpage.c    |  5 -----
 arch/powerpc/mm/hugetlbpage.c | 10 ----------
 arch/s390/mm/hugetlbpage.c    |  5 -----
 arch/sh/mm/hugetlbpage.c      |  5 -----
 arch/sparc/mm/hugetlbpage.c   |  5 -----
 arch/tile/mm/hugetlbpage.c    |  5 -----
 arch/x86/Kconfig              |  4 ++++
 arch/x86/mm/hugetlbpage.c     | 10 ----------
 include/linux/hugetlb.h       | 13 +++++--------
 include/linux/mempolicy.h     |  6 ++++++
 mm/Kconfig                    |  3 +++
 15 files changed, 18 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/hugetlbpage.c b/arch/arm/mm/hugetlbpage.c
index 54ee6163c181..66781bf34077 100644
--- a/arch/arm/mm/hugetlbpage.c
+++ b/arch/arm/mm/hugetlbpage.c
@@ -56,8 +56,3 @@ int pmd_huge(pmd_t pmd)
 {
 	return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
 }
-
-int pmd_huge_support(void)
-{
-	return 1;
-}
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index 31eb959e9aa8..023747bf4dd7 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -58,11 +58,6 @@ int pud_huge(pud_t pud)
 #endif
 }
 
-int pmd_huge_support(void)
-{
-	return 1;
-}
-
 static __init int setup_hugepagesz(char *opt)
 {
 	unsigned long ps = memparse(opt, &opt);
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 68232db98baa..76069c18ee42 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -114,11 +114,6 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-int pmd_huge_support(void)
-{
-	return 0;
-}
-
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
diff --git a/arch/metag/mm/hugetlbpage.c b/arch/metag/mm/hugetlbpage.c
index 042431509b56..3c52fa6d0f8e 100644
--- a/arch/metag/mm/hugetlbpage.c
+++ b/arch/metag/mm/hugetlbpage.c
@@ -110,11 +110,6 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-int pmd_huge_support(void)
-{
-	return 1;
-}
-
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index 77e0ae036e7c..4ec8ee10d371 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -84,11 +84,6 @@ int pud_huge(pud_t pud)
 	return (pud_val(pud) & _PAGE_HUGE) != 0;
 }
 
-int pmd_huge_support(void)
-{
-	return 1;
-}
-
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 		pmd_t *pmd, int write)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index eb923654ba80..7e70ae968e5f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -86,11 +86,6 @@ int pgd_huge(pgd_t pgd)
 	 */
 	return ((pgd_val(pgd) & 0x3) != 0x0);
 }
-
-int pmd_huge_support(void)
-{
-	return 1;
-}
 #else
 int pmd_huge(pmd_t pmd)
 {
@@ -106,11 +101,6 @@ int pgd_huge(pgd_t pgd)
 {
 	return 0;
 }
-
-int pmd_huge_support(void)
-{
-	return 0;
-}
 #endif
 
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 0727a55d87d9..0ff66a7e29bb 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -220,11 +220,6 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-int pmd_huge_support(void)
-{
-	return 1;
-}
-
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmdp, int write)
 {
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 0d676a41081e..d7762349ea48 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -83,11 +83,6 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-int pmd_huge_support(void)
-{
-	return 0;
-}
-
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c
index 9bd9ce80bf77..d329537739c6 100644
--- a/arch/sparc/mm/hugetlbpage.c
+++ b/arch/sparc/mm/hugetlbpage.c
@@ -231,11 +231,6 @@ int pud_huge(pud_t pud)
 	return 0;
 }
 
-int pmd_huge_support(void)
-{
-	return 0;
-}
-
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c
index 0cb3bbaa580c..e514899e1100 100644
--- a/arch/tile/mm/hugetlbpage.c
+++ b/arch/tile/mm/hugetlbpage.c
@@ -166,11 +166,6 @@ int pud_huge(pud_t pud)
 	return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
 }
 
-int pmd_huge_support(void)
-{
-	return 1;
-}
-
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 			     pmd_t *pmd, int write)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7d5feb5908dd..e41b258ad040 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1873,6 +1873,10 @@ config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	def_bool y
 	depends on X86_64 || X86_PAE
 
+config ARCH_ENABLE_HUGEPAGE_MIGRATION
+	def_bool y
+	depends on X86_64 && HUGETLB_PAGE && MIGRATION
+
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 8c9f647ff9e1..8b977ebf9388 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -58,11 +58,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 {
 	return NULL;
 }
-
-int pmd_huge_support(void)
-{
-	return 0;
-}
 #else
 
 struct page *
@@ -80,11 +75,6 @@ int pud_huge(pud_t pud)
 {
 	return !!(pud_val(pud) & _PAGE_PSE);
 }
-
-int pmd_huge_support(void)
-{
-	return 1;
-}
 #endif
 
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index b65166de1d9d..d0bad1a8b0bd 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -392,15 +392,13 @@ static inline pgoff_t basepage_index(struct page *page)
 
 extern void dissolve_free_huge_pages(unsigned long start_pfn,
 				     unsigned long end_pfn);
-int pmd_huge_support(void);
-/*
- * Currently hugepage migration is enabled only for pmd-based hugepage.
- * This function will be updated when hugepage migration is more widely
- * supported.
- */
 static inline int hugepage_migration_support(struct hstate *h)
 {
-	return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT);
+#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+	return huge_page_shift(h) == PMD_SHIFT;
+#else
+	return 0;
+#endif
 }
 
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
@@ -450,7 +448,6 @@ static inline pgoff_t basepage_index(struct page *page)
 	return page->index;
 }
 #define dissolve_free_huge_pages(s, e)	do {} while (0)
-#define pmd_huge_support()	0
 #define hugepage_migration_support(h)	0
 
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 3c1b968da0ca..f230a978e6ba 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -175,6 +175,12 @@ static inline int vma_migratable(struct vm_area_struct *vma)
 {
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 		return 0;
+
+#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
+	if (vma->vm_flags & VM_HUGETLB)
+		return 0;
+#endif
+
 	/*
 	 * Migration allocates pages in the highest zone. If we cannot
 	 * do so then migration (at least from node to node) is not
diff --git a/mm/Kconfig b/mm/Kconfig
index 28cec518f4d4..75ac479cbacd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -267,6 +267,9 @@ config MIGRATION
 	  pages as migration can relocate pages to satisfy a huge page
 	  allocation instead of reclaiming.
 
+config ARCH_ENABLE_HUGEPAGE_MIGRATION
+	boolean
+
 config PHYS_ADDR_T_64BIT
 	def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
 
-- 
cgit 


From ac13a829f6adb674015ab399594c089990104af7 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 4 Jun 2014 16:06:27 -0700
Subject: fs/libfs.c: add generic data flush to fsync

Description by Jan Kara:
 "A lot of older filesystems don't properly flush volatile disk caches
  on fsync(2) which can lead to loss of fsynced data after power failure.

This patch makes generic_file_fsync() issue proper cache flush to fix the
problem.  Sysadmin can use /sys/devices/.../cache_type to tell the system
it should not send the cache flush."

[akpm@linux-foundation.org: nuke ifdef]
[akpm@linux-foundation.org: fix warning]
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/libfs.c             | 34 +++++++++++++++++++++++++++++++---
 include/linux/blkdev.h |  9 +++++++++
 include/linux/fs.h     |  1 +
 3 files changed, 41 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index a1844244246f..88e3e00e2eca 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -3,6 +3,7 @@
  *	Library for filesystems writers.
  */
 
+#include <linux/blkdev.h>
 #include <linux/export.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
@@ -923,16 +924,19 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
 EXPORT_SYMBOL_GPL(generic_fh_to_parent);
 
 /**
- * generic_file_fsync - generic fsync implementation for simple filesystems
+ * __generic_file_fsync - generic fsync implementation for simple filesystems
+ *
  * @file:	file to synchronize
+ * @start:	start offset in bytes
+ * @end:	end offset in bytes (inclusive)
  * @datasync:	only synchronize essential metadata if true
  *
  * This is a generic implementation of the fsync method for simple
  * filesystems which track all non-inode metadata in the buffers list
  * hanging off the address_space structure.
  */
-int generic_file_fsync(struct file *file, loff_t start, loff_t end,
-		       int datasync)
+int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
+				 int datasync)
 {
 	struct inode *inode = file->f_mapping->host;
 	int err;
@@ -952,10 +956,34 @@ int generic_file_fsync(struct file *file, loff_t start, loff_t end,
 	err = sync_inode_metadata(inode, 1);
 	if (ret == 0)
 		ret = err;
+
 out:
 	mutex_unlock(&inode->i_mutex);
 	return ret;
 }
+EXPORT_SYMBOL(__generic_file_fsync);
+
+/**
+ * generic_file_fsync - generic fsync implementation for simple filesystems
+ *			with flush
+ * @file:	file to synchronize
+ * @start:	start offset in bytes
+ * @end:	end offset in bytes (inclusive)
+ * @datasync:	only synchronize essential metadata if true
+ *
+ */
+
+int generic_file_fsync(struct file *file, loff_t start, loff_t end,
+		       int datasync)
+{
+	struct inode *inode = file->f_mapping->host;
+	int err;
+
+	err = __generic_file_fsync(file, start, end, datasync);
+	if (err)
+		return err;
+	return blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+}
 EXPORT_SYMBOL(generic_file_fsync);
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8aba35f46f87..45cf6e537c83 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1607,6 +1607,9 @@ struct block_device_operations {
 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 				 unsigned long);
 #else /* CONFIG_BLOCK */
+
+struct block_device;
+
 /*
  * stubs for when the block layer is configured out
  */
@@ -1642,6 +1645,12 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 	return false;
 }
 
+static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+				     sector_t *error_sector)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BLOCK */
 
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 878031227c57..c3f46e499dd0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2590,6 +2590,7 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
 		const void __user *from, size_t count);
 
+extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 
 extern int generic_check_addressable(unsigned, u64);
-- 
cgit 


From c46a7c817e662a820373bb76b88d0ad67d6abe5d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:06:30 -0700
Subject: x86: define _PAGE_NUMA by reusing software bits on the PMD and PTE
 levels

_PAGE_NUMA is currently an alias of _PROT_PROTNONE to trap NUMA hinting
faults on x86.  Care is taken such that _PAGE_NUMA is used only in
situations where the VMA flags distinguish between NUMA hinting faults
and prot_none faults.  This decision was x86-specific and conceptually
it is difficult requiring special casing to distinguish between PROTNONE
and NUMA ptes based on context.

Fundamentally, we only need the _PAGE_NUMA bit to tell the difference
between an entry that is really unmapped and a page that is protected
for NUMA hinting faults as if the PTE is not present then a fault will
be trapped.

Swap PTEs on x86-64 use the bits after _PAGE_GLOBAL for the offset.
This patch shrinks the maximum possible swap size and uses the bit to
uniquely distinguish between NUMA hinting ptes and swap ptes.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Anvin <hpa@zytor.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Noonan <steven@uplinklabs.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/pgtable.h   |  6 ++++
 arch/x86/include/asm/pgtable.h       | 15 +++++---
 arch/x86/include/asm/pgtable_64.h    |  8 +++++
 arch/x86/include/asm/pgtable_types.h | 66 +++++++++++++++++++-----------------
 arch/x86/mm/pageattr-test.c          |  2 +-
 include/asm-generic/pgtable.h        |  8 +++--
 include/linux/swapops.h              |  2 +-
 mm/memory.c                          | 17 ++++------
 8 files changed, 75 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 3ebb188c3ff5..d98c1ecc3266 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -44,6 +44,12 @@ static inline int pte_present(pte_t pte)
 	return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA);
 }
 
+#define pte_present_nonuma pte_present_nonuma
+static inline int pte_present_nonuma(pte_t pte)
+{
+	return pte_val(pte) & (_PAGE_PRESENT);
+}
+
 #define pte_numa pte_numa
 static inline int pte_numa(pte_t pte)
 {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index b459ddf27d64..66276c1d23bb 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -131,7 +131,8 @@ static inline int pte_exec(pte_t pte)
 
 static inline int pte_special(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_SPECIAL;
+	return (pte_flags(pte) & (_PAGE_PRESENT|_PAGE_SPECIAL)) ==
+				 (_PAGE_PRESENT|_PAGE_SPECIAL);
 }
 
 static inline unsigned long pte_pfn(pte_t pte)
@@ -452,6 +453,12 @@ static inline int pte_present(pte_t a)
 			       _PAGE_NUMA);
 }
 
+#define pte_present_nonuma pte_present_nonuma
+static inline int pte_present_nonuma(pte_t a)
+{
+	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+}
+
 #define pte_accessible pte_accessible
 static inline bool pte_accessible(struct mm_struct *mm, pte_t a)
 {
@@ -860,19 +867,19 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 
 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 {
-	VM_BUG_ON(pte_present(pte));
+	VM_BUG_ON(pte_present_nonuma(pte));
 	return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
 }
 
 static inline int pte_swp_soft_dirty(pte_t pte)
 {
-	VM_BUG_ON(pte_present(pte));
+	VM_BUG_ON(pte_present_nonuma(pte));
 	return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
 }
 
 static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
-	VM_BUG_ON(pte_present(pte));
+	VM_BUG_ON(pte_present_nonuma(pte));
 	return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
 }
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e22c1dbf7feb..6d6ecd09883c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -145,8 +145,16 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
+#ifdef CONFIG_NUMA_BALANCING
+/* Automatic NUMA balancing needs to be distinguishable from swap entries */
+#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 2)
+#else
 #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1)
+#endif
 #else
+#ifdef CONFIG_NUMA_BALANCING
+#error Incompatible format for automatic NUMA balancing
+#endif
 #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1)
 #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
 #endif
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index eb3d44945133..f216963760e5 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -16,15 +16,26 @@
 #define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
 #define _PAGE_BIT_PAT		7	/* on 4KB pages */
 #define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
-#define _PAGE_BIT_UNUSED1	9	/* available for programmer */
-#define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */
-#define _PAGE_BIT_HIDDEN	11	/* hidden by kmemcheck */
+#define _PAGE_BIT_SOFTW1	9	/* available for programmer */
+#define _PAGE_BIT_SOFTW2	10	/* " */
+#define _PAGE_BIT_SOFTW3	11	/* " */
 #define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
-#define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
-#define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
-#define _PAGE_BIT_SPLITTING	_PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */
+#define _PAGE_BIT_SPECIAL	_PAGE_BIT_SOFTW1
+#define _PAGE_BIT_CPA_TEST	_PAGE_BIT_SOFTW1
+#define _PAGE_BIT_SPLITTING	_PAGE_BIT_SOFTW2 /* only valid on a PSE pmd */
+#define _PAGE_BIT_IOMAP		_PAGE_BIT_SOFTW2 /* flag used to indicate IO mapping */
+#define _PAGE_BIT_HIDDEN	_PAGE_BIT_SOFTW3 /* hidden by kmemcheck */
+#define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_SOFTW3 /* software dirty tracking */
 #define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
 
+/*
+ * Swap offsets on configurations that allow automatic NUMA balancing use the
+ * bits after _PAGE_BIT_GLOBAL. To uniquely distinguish NUMA hinting PTEs from
+ * swap entries, we use the first bit after _PAGE_BIT_GLOBAL and shrink the
+ * maximum possible swap space from 16TB to 8TB.
+ */
+#define _PAGE_BIT_NUMA		(_PAGE_BIT_GLOBAL+1)
+
 /* If _PAGE_BIT_PRESENT is clear, we use these: */
 /* - if the user mapped it with PROT_NONE; pte_present gives true */
 #define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL
@@ -40,7 +51,7 @@
 #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
 #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
 #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
-#define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+#define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
 #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
 #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
 #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
@@ -61,14 +72,27 @@
  * they do not conflict with each other.
  */
 
-#define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_HIDDEN
-
 #ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
 #else
 #define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 0))
 #endif
 
+/*
+ * _PAGE_NUMA distinguishes between a numa hinting minor fault and a page
+ * that is not present. The hinting fault gathers numa placement statistics
+ * (see pte_numa()). The bit is always zero when the PTE is not present.
+ *
+ * The bit picked must be always zero when the pmd is present and not
+ * present, so that we don't lose information when we set it while
+ * atomically clearing the present bit.
+ */
+#ifdef CONFIG_NUMA_BALANCING
+#define _PAGE_NUMA	(_AT(pteval_t, 1) << _PAGE_BIT_NUMA)
+#else
+#define _PAGE_NUMA	(_AT(pteval_t, 0))
+#endif
+
 /*
  * Tracking soft dirty bit when a page goes to a swap is tricky.
  * We need a bit which can be stored in pte _and_ not conflict
@@ -94,26 +118,6 @@
 #define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
 #define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
 
-/*
- * _PAGE_NUMA indicates that this page will trigger a numa hinting
- * minor page fault to gather numa placement statistics (see
- * pte_numa()). The bit picked (8) is within the range between
- * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
- * require changes to the swp entry format because that bit is always
- * zero when the pte is not present.
- *
- * The bit picked must be always zero when the pmd is present and not
- * present, so that we don't lose information when we set it while
- * atomically clearing the present bit.
- *
- * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
- * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
- * couldn't reach, like handle_mm_fault() (see access_error in
- * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
- * handle_mm_fault() to be invoked).
- */
-#define _PAGE_NUMA	_PAGE_PROTNONE
-
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
 			 _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
@@ -122,8 +126,8 @@
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
 			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
-			 _PAGE_SOFT_DIRTY)
-#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
+			 _PAGE_SOFT_DIRTY | _PAGE_NUMA)
+#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)
 
 #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
 #define _PAGE_CACHE_WB		(0)
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 461bc8289024..6629f397b467 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -35,7 +35,7 @@ enum {
 
 static int pte_testbit(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_UNUSED1;
+	return pte_flags(pte) & _PAGE_SOFTW1;
 }
 
 struct split_state {
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index a8015a7a55bb..53b2acc38213 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -233,6 +233,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 # define pte_accessible(mm, pte)	((void)(pte), 1)
 #endif
 
+#ifndef pte_present_nonuma
+#define pte_present_nonuma(pte) pte_present(pte)
+#endif
+
 #ifndef flush_tlb_fix_spurious_fault
 #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address)
 #endif
@@ -670,7 +674,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd)
 static inline int pte_numa(pte_t pte)
 {
 	return (pte_flags(pte) &
-		(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+		(_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA;
 }
 #endif
 
@@ -678,7 +682,7 @@ static inline int pte_numa(pte_t pte)
 static inline int pmd_numa(pmd_t pmd)
 {
 	return (pmd_flags(pmd) &
-		(_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
+		(_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA;
 }
 #endif
 
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index c0f75261a728..6adfb7bfbf44 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry)
 /* check whether a pte points to a swap entry */
 static inline int is_swap_pte(pte_t pte)
 {
-	return !pte_none(pte) && !pte_present(pte) && !pte_file(pte);
+	return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte);
 }
 #endif
 
diff --git a/mm/memory.c b/mm/memory.c
index e302ae1dcce0..0897830011f3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -756,7 +756,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 	unsigned long pfn = pte_pfn(pte);
 
 	if (HAVE_PTE_SPECIAL) {
-		if (likely(!pte_special(pte)))
+		if (likely(!pte_special(pte) || pte_numa(pte)))
 			goto check_pfn;
 		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 			return NULL;
@@ -782,14 +782,15 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 		}
 	}
 
-	if (is_zero_pfn(pfn))
-		return NULL;
 check_pfn:
 	if (unlikely(pfn > highest_memmap_pfn)) {
 		print_bad_pte(vma, addr, pte, NULL);
 		return NULL;
 	}
 
+	if (is_zero_pfn(pfn))
+		return NULL;
+
 	/*
 	 * NOTE! We still have PageReserved() pages in the page tables.
 	 * eg. VDSO mappings can cause them to exist.
@@ -1722,13 +1723,9 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
 
 	/*
-	 * If FOLL_FORCE and FOLL_NUMA are both set, handle_mm_fault
-	 * would be called on PROT_NONE ranges. We must never invoke
-	 * handle_mm_fault on PROT_NONE ranges or the NUMA hinting
-	 * page faults would unprotect the PROT_NONE ranges if
-	 * _PAGE_NUMA and _PAGE_PROTNONE are sharing the same pte/pmd
-	 * bitflag. So to avoid that, don't set FOLL_NUMA if
-	 * FOLL_FORCE is set.
+	 * If FOLL_FORCE is set then do not force a full fault as the hinting
+	 * fault information is unrelated to the reference behaviour of a task
+	 * using the address space
 	 */
 	if (!(gup_flags & FOLL_FORCE))
 		gup_flags |= FOLL_NUMA;
-- 
cgit 


From 5dfb417509921eb90ee123a4d1525e8916b4ace4 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:06:38 -0700
Subject: sl[au]b: charge slabs to kmemcg explicitly

We have only a few places where we actually want to charge kmem so
instead of intruding into the general page allocation path with
__GFP_KMEMCG it's better to explictly charge kmem there.  All kmem
charges will be easier to follow that way.

This is a step towards removing __GFP_KMEMCG.  It removes __GFP_KMEMCG
from memcg caches' allocflags.  Instead it makes slab allocation path
call memcg_charge_kmem directly getting memcg to charge from the cache's
memcg params.

This also eliminates any possibility of misaccounting an allocation
going from one memcg's cache to another memcg, because now we always
charge slabs against the memcg the cache belongs to.  That's why this
patch removes the big comment to memcg_kmem_get_cache.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 15 ++++-----------
 mm/memcontrol.c            |  4 ++--
 mm/slab.c                  |  7 ++++++-
 mm/slab.h                  | 29 +++++++++++++++++++++++++++++
 mm/slab_common.c           |  6 +-----
 mm/slub.c                  | 24 +++++++++++++++++-------
 6 files changed, 59 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b569b8be5c5a..96e5d2573eb0 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -506,6 +506,9 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
+
 void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
@@ -583,17 +586,7 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
  * @cachep: the original global kmem cache
  * @gfp: allocation flags.
  *
- * This function assumes that the task allocating, which determines the memcg
- * in the page allocator, belongs to the same cgroup throughout the whole
- * process.  Misacounting can happen if the task calls memcg_kmem_get_cache()
- * while belonging to a cgroup, and later on changes. This is considered
- * acceptable, and should only happen upon task migration.
- *
- * Before the cache is created by the memcg core, there is also a possible
- * imbalance: the task belongs to a memcg, but the cache being allocated from
- * is the global cache, since the child cache is not yet guaranteed to be
- * ready. This case is also fine, since in this case the GFP_KMEMCG will not be
- * passed and the page allocator will not attempt any cgroup accounting.
+ * All memory allocated from a per-memcg cache is charged to the owner memcg.
  */
 static __always_inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5177c6d4a2dd..56a768b3d5a8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2953,7 +2953,7 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
 }
 #endif
 
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
 	int ret = 0;
@@ -2991,7 +2991,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 	return ret;
 }
 
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 {
 	res_counter_uncharge(&memcg->res, size);
 	if (do_swap_account)
diff --git a/mm/slab.c b/mm/slab.c
index 5c846d25c17d..944ac58cfcf8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1688,8 +1688,12 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
 		flags |= __GFP_RECLAIMABLE;
 
+	if (memcg_charge_slab(cachep, flags, cachep->gfporder))
+		return NULL;
+
 	page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
 	if (!page) {
+		memcg_uncharge_slab(cachep, cachep->gfporder);
 		slab_out_of_memory(cachep, flags, nodeid);
 		return NULL;
 	}
@@ -1747,7 +1751,8 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
-	__free_memcg_kmem_pages(page, cachep->gfporder);
+	__free_pages(page, cachep->gfporder);
+	memcg_uncharge_slab(cachep, cachep->gfporder);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
diff --git a/mm/slab.h b/mm/slab.h
index 6bd4c353704f..863e67b8c8c9 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -192,6 +192,26 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 		return s;
 	return s->memcg_params->root_cache;
 }
+
+static __always_inline int memcg_charge_slab(struct kmem_cache *s,
+					     gfp_t gfp, int order)
+{
+	if (!memcg_kmem_enabled())
+		return 0;
+	if (is_root_cache(s))
+		return 0;
+	return memcg_charge_kmem(s->memcg_params->memcg, gfp,
+				 PAGE_SIZE << order);
+}
+
+static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+	if (!memcg_kmem_enabled())
+		return;
+	if (is_root_cache(s))
+		return;
+	memcg_uncharge_kmem(s->memcg_params->memcg, PAGE_SIZE << order);
+}
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
 {
@@ -227,6 +247,15 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
 {
 	return s;
 }
+
+static inline int memcg_charge_slab(struct kmem_cache *s, gfp_t gfp, int order)
+{
+	return 0;
+}
+
+static inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
+{
+}
 #endif
 
 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 102cc6fca3d3..06f0c6125632 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -290,12 +290,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
 				 root_cache->size, root_cache->align,
 				 root_cache->flags, root_cache->ctor,
 				 memcg, root_cache);
-	if (IS_ERR(s)) {
+	if (IS_ERR(s))
 		kfree(cache_name);
-		goto out_unlock;
-	}
-
-	s->allocflags |= __GFP_KMEMCG;
 
 out_unlock:
 	mutex_unlock(&slab_mutex);
diff --git a/mm/slub.c b/mm/slub.c
index d05a5483106d..fc9831851be6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1312,17 +1312,26 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
 /*
  * Slab allocation and freeing
  */
-static inline struct page *alloc_slab_page(gfp_t flags, int node,
-					struct kmem_cache_order_objects oo)
+static inline struct page *alloc_slab_page(struct kmem_cache *s,
+		gfp_t flags, int node, struct kmem_cache_order_objects oo)
 {
+	struct page *page;
 	int order = oo_order(oo);
 
 	flags |= __GFP_NOTRACK;
 
+	if (memcg_charge_slab(s, flags, order))
+		return NULL;
+
 	if (node == NUMA_NO_NODE)
-		return alloc_pages(flags, order);
+		page = alloc_pages(flags, order);
 	else
-		return alloc_pages_exact_node(node, flags, order);
+		page = alloc_pages_exact_node(node, flags, order);
+
+	if (!page)
+		memcg_uncharge_slab(s, order);
+
+	return page;
 }
 
 static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1344,7 +1353,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	 */
 	alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL;
 
-	page = alloc_slab_page(alloc_gfp, node, oo);
+	page = alloc_slab_page(s, alloc_gfp, node, oo);
 	if (unlikely(!page)) {
 		oo = s->min;
 		alloc_gfp = flags;
@@ -1352,7 +1361,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 		 * Allocation may have failed due to fragmentation.
 		 * Try a lower order alloc if possible
 		 */
-		page = alloc_slab_page(alloc_gfp, node, oo);
+		page = alloc_slab_page(s, alloc_gfp, node, oo);
 
 		if (page)
 			stat(s, ORDER_FALLBACK);
@@ -1468,7 +1477,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	page_mapcount_reset(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-	__free_memcg_kmem_pages(page, order);
+	__free_pages(page, order);
+	memcg_uncharge_slab(s, order);
 }
 
 #define need_reserve_slab_rcu						\
-- 
cgit 


From 52383431b37cdbec63944e953ffc2698a7ad9722 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:06:39 -0700
Subject: mm: get rid of __GFP_KMEMCG

Currently to allocate a page that should be charged to kmemcg (e.g.
threadinfo), we pass __GFP_KMEMCG flag to the page allocator.  The page
allocated is then to be freed by free_memcg_kmem_pages.  Apart from
looking asymmetrical, this also requires intrusion to the general
allocation path.  So let's introduce separate functions that will
alloc/free pages charged to kmemcg.

The new functions are called alloc_kmem_pages and free_kmem_pages.  They
should be used when the caller actually would like to use kmalloc, but
has to fall back to the page allocator for the allocation is large.
They only differ from alloc_pages and free_pages in that besides
allocating or freeing pages they also charge them to the kmem resource
counter of the current memory cgroup.

[sfr@canb.auug.org.au: export kmalloc_order() to modules]
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h             | 10 +++++---
 include/linux/memcontrol.h      |  2 +-
 include/linux/slab.h            | 11 +-------
 include/linux/thread_info.h     |  2 --
 include/trace/events/gfpflags.h |  1 -
 kernel/fork.c                   |  6 ++---
 mm/memcontrol.c                 | 11 ++++----
 mm/page_alloc.c                 | 56 +++++++++++++++++++++++++----------------
 mm/slab_common.c                | 13 ++++++++++
 mm/slub.c                       |  6 ++---
 10 files changed, 68 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 39b81dc7d01a..d382db71e300 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -31,7 +31,6 @@ struct vm_area_struct;
 #define ___GFP_HARDWALL		0x20000u
 #define ___GFP_THISNODE		0x40000u
 #define ___GFP_RECLAIMABLE	0x80000u
-#define ___GFP_KMEMCG		0x100000u
 #define ___GFP_NOTRACK		0x200000u
 #define ___GFP_NO_KSWAPD	0x400000u
 #define ___GFP_OTHER_NODE	0x800000u
@@ -91,7 +90,6 @@ struct vm_area_struct;
 
 #define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
 #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
-#define __GFP_KMEMCG	((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
 #define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
 
 /*
@@ -353,6 +351,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
 	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 
+extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
+extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
+					  unsigned int order);
+
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
@@ -372,8 +374,8 @@ extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_cold_page(struct page *page, int cold);
 extern void free_hot_cold_page_list(struct list_head *list, int cold);
 
-extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
-extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);
+extern void __free_kmem_pages(struct page *page, unsigned int order);
+extern void free_kmem_pages(unsigned long addr, unsigned int order);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 96e5d2573eb0..5155d09e749d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -537,7 +537,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 	 * res_counter_charge_nofail, but we hope those allocations are rare,
 	 * and won't be worth the trouble.
 	 */
-	if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL))
+	if (gfp & __GFP_NOFAIL)
 		return true;
 	if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD))
 		return true;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 307bfbe62387..a6aab2c0dfc5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -369,16 +369,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 #include <linux/slub_def.h>
 #endif
 
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
-	void *ret;
-
-	flags |= (__GFP_COMP | __GFP_KMEMCG);
-	ret = (void *) __get_free_pages(flags, order);
-	kmemleak_alloc(ret, size, 1, flags);
-	return ret;
-}
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
 
 #ifdef CONFIG_TRACING
 extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index cb0cec94fda3..ff307b548ed3 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm);
 # define THREADINFO_GFP		(GFP_KERNEL | __GFP_NOTRACK)
 #endif
 
-#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG)
-
 /*
  * flag set/clear/test wrappers
  * - pass TIF_xxxx constants to these functions
diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
index 1eddbf1557f2..d6fd8e5b14b7 100644
--- a/include/trace/events/gfpflags.h
+++ b/include/trace/events/gfpflags.h
@@ -34,7 +34,6 @@
 	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\
 	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\
 	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\
-	{(unsigned long)__GFP_KMEMCG,		"GFP_KMEMCG"},		\
 	{(unsigned long)__GFP_MOVABLE,		"GFP_MOVABLE"},		\
 	{(unsigned long)__GFP_NOTRACK,		"GFP_NOTRACK"},		\
 	{(unsigned long)__GFP_NO_KSWAPD,	"GFP_NO_KSWAPD"},	\
diff --git a/kernel/fork.c b/kernel/fork.c
index 54a8d26f612f..59e3dcc5b8f2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -150,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti)
 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 						  int node)
 {
-	struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
-					     THREAD_SIZE_ORDER);
+	struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP,
+						  THREAD_SIZE_ORDER);
 
 	return page ? page_address(page) : NULL;
 }
 
 static inline void free_thread_info(struct thread_info *ti)
 {
-	free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+	free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
 static struct kmem_cache *thread_info_cache;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 56a768b3d5a8..7bab1de50f48 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3540,11 +3540,12 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
 	/*
 	 * Disabling accounting is only relevant for some specific memcg
 	 * internal allocations. Therefore we would initially not have such
-	 * check here, since direct calls to the page allocator that are marked
-	 * with GFP_KMEMCG only happen outside memcg core. We are mostly
-	 * concerned with cache allocations, and by having this test at
-	 * memcg_kmem_get_cache, we are already able to relay the allocation to
-	 * the root cache and bypass the memcg cache altogether.
+	 * check here, since direct calls to the page allocator that are
+	 * accounted to kmemcg (alloc_kmem_pages and friends) only happen
+	 * outside memcg core. We are mostly concerned with cache allocations,
+	 * and by having this test at memcg_kmem_get_cache, we are already able
+	 * to relay the allocation to the root cache and bypass the memcg cache
+	 * altogether.
 	 *
 	 * There is one exception, though: the SLUB allocator does not create
 	 * large order caches, but rather service large kmallocs directly from
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5dba2933c9c0..7cfdcd808f52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2697,7 +2697,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	int migratetype = allocflags_to_migratetype(gfp_mask);
 	unsigned int cpuset_mems_cookie;
 	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
-	struct mem_cgroup *memcg = NULL;
 
 	gfp_mask &= gfp_allowed_mask;
 
@@ -2716,13 +2715,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	if (unlikely(!zonelist->_zonerefs->zone))
 		return NULL;
 
-	/*
-	 * Will only have any effect when __GFP_KMEMCG is set.  This is
-	 * verified in the (always inline) callee
-	 */
-	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
-		return NULL;
-
 retry_cpuset:
 	cpuset_mems_cookie = read_mems_allowed_begin();
 
@@ -2782,8 +2774,6 @@ out:
 	if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
 		goto retry_cpuset;
 
-	memcg_kmem_commit_charge(page, memcg, order);
-
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -2837,27 +2827,51 @@ void free_pages(unsigned long addr, unsigned int order)
 EXPORT_SYMBOL(free_pages);
 
 /*
- * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
- * pages allocated with __GFP_KMEMCG.
+ * alloc_kmem_pages charges newly allocated pages to the kmem resource counter
+ * of the current memory cgroup.
  *
- * Those pages are accounted to a particular memcg, embedded in the
- * corresponding page_cgroup. To avoid adding a hit in the allocator to search
- * for that information only to find out that it is NULL for users who have no
- * interest in that whatsoever, we provide these functions.
- *
- * The caller knows better which flags it relies on.
+ * It should be used when the caller would like to use kmalloc, but since the
+ * allocation is large, it has to fall back to the page allocator.
+ */
+struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order)
+{
+	struct page *page;
+	struct mem_cgroup *memcg = NULL;
+
+	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+		return NULL;
+	page = alloc_pages(gfp_mask, order);
+	memcg_kmem_commit_charge(page, memcg, order);
+	return page;
+}
+
+struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
+{
+	struct page *page;
+	struct mem_cgroup *memcg = NULL;
+
+	if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
+		return NULL;
+	page = alloc_pages_node(nid, gfp_mask, order);
+	memcg_kmem_commit_charge(page, memcg, order);
+	return page;
+}
+
+/*
+ * __free_kmem_pages and free_kmem_pages will free pages allocated with
+ * alloc_kmem_pages.
  */
-void __free_memcg_kmem_pages(struct page *page, unsigned int order)
+void __free_kmem_pages(struct page *page, unsigned int order)
 {
 	memcg_kmem_uncharge_pages(page, order);
 	__free_pages(page, order);
 }
 
-void free_memcg_kmem_pages(unsigned long addr, unsigned int order)
+void free_kmem_pages(unsigned long addr, unsigned int order)
 {
 	if (addr != 0) {
 		VM_BUG_ON(!virt_addr_valid((void *)addr));
-		__free_memcg_kmem_pages(virt_to_page((void *)addr), order);
+		__free_kmem_pages(virt_to_page((void *)addr), order);
 	}
 }
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 06f0c6125632..1950c8f4d1a6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -582,6 +582,19 @@ void __init create_kmalloc_caches(unsigned long flags)
 }
 #endif /* !CONFIG_SLOB */
 
+void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+	void *ret;
+	struct page *page;
+
+	flags |= __GFP_COMP;
+	page = alloc_kmem_pages(flags, order);
+	ret = page ? page_address(page) : NULL;
+	kmemleak_alloc(ret, size, 1, flags);
+	return ret;
+}
+EXPORT_SYMBOL(kmalloc_order);
+
 #ifdef CONFIG_TRACING
 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
diff --git a/mm/slub.c b/mm/slub.c
index fc9831851be6..ddb60795f373 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3311,8 +3311,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 	struct page *page;
 	void *ptr = NULL;
 
-	flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
-	page = alloc_pages_node(node, flags, get_order(size));
+	flags |= __GFP_COMP | __GFP_NOTRACK;
+	page = alloc_kmem_pages_node(node, flags, get_order(size));
 	if (page)
 		ptr = page_address(page);
 
@@ -3381,7 +3381,7 @@ void kfree(const void *x)
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
 		kfree_hook(x);
-		__free_memcg_kmem_pages(page, compound_order(page));
+		__free_kmem_pages(page, compound_order(page));
 		return;
 	}
 	slab_free(page->slab_cache, page, object, _RET_IP_);
-- 
cgit 


From 4f115147ff802267d0aa41e361c5aa5bd933d896 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Wed, 4 Jun 2014 16:06:46 -0700
Subject: mm,vmacache: add debug data

Introduce a CONFIG_DEBUG_VM_VMACACHE option to enable counting the cache
hit rate -- exported in /proc/vmstat.

Any updates to the caching scheme needs this kind of data, thus it can
save some work re-implementing the counting all the time.

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h |  4 ++++
 include/linux/vmstat.h        |  6 ++++++
 lib/Kconfig.debug             | 10 ++++++++++
 mm/vmacache.c                 | 12 ++++++++++--
 mm/vmstat.c                   |  4 ++++
 5 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 486c3972c0be..ced92345c963 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -80,6 +80,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		NR_TLB_LOCAL_FLUSH_ALL,
 		NR_TLB_LOCAL_FLUSH_ONE,
 #endif /* CONFIG_DEBUG_TLBFLUSH */
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+		VMACACHE_FIND_CALLS,
+		VMACACHE_FIND_HITS,
+#endif
 		NR_VM_EVENT_ITEMS
 };
 
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 45c9cd1daf7a..82e7db7f7100 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -95,6 +95,12 @@ static inline void vm_events_fold_cpu(int cpu)
 #define count_vm_tlb_events(x, y) do { (void)(y); } while (0)
 #endif
 
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+#define count_vm_vmacache_event(x) count_vm_event(x)
+#else
+#define count_vm_vmacache_event(x) do {} while (0)
+#endif
+
 #define __count_zone_vm_events(item, zone, delta) \
 		__count_vm_events(item##_NORMAL - ZONE_NORMAL + \
 		zone_idx(zone), delta)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 99c8bfee1b00..c2de65045a40 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -501,6 +501,16 @@ config DEBUG_VM
 
 	  If unsure, say N.
 
+config DEBUG_VM_VMACACHE
+	bool "Debug VMA caching"
+	depends on DEBUG_VM
+	help
+	  Enable this to turn on VMA caching debug information. Doing so
+	  can cause significant overhead, so only enable it in non-production
+	  environments.
+
+	  If unsure, say N.
+
 config DEBUG_VM_RB
 	bool "Debug VM red-black trees"
 	depends on DEBUG_VM
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 1037a3bab505..658ed3b3e38d 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -78,6 +78,8 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 	if (!vmacache_valid(mm))
 		return NULL;
 
+	count_vm_vmacache_event(VMACACHE_FIND_CALLS);
+
 	for (i = 0; i < VMACACHE_SIZE; i++) {
 		struct vm_area_struct *vma = current->vmacache[i];
 
@@ -85,8 +87,10 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
 			continue;
 		if (WARN_ON_ONCE(vma->vm_mm != mm))
 			break;
-		if (vma->vm_start <= addr && vma->vm_end > addr)
+		if (vma->vm_start <= addr && vma->vm_end > addr) {
+			count_vm_vmacache_event(VMACACHE_FIND_HITS);
 			return vma;
+		}
 	}
 
 	return NULL;
@@ -102,11 +106,15 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 	if (!vmacache_valid(mm))
 		return NULL;
 
+	count_vm_vmacache_event(VMACACHE_FIND_CALLS);
+
 	for (i = 0; i < VMACACHE_SIZE; i++) {
 		struct vm_area_struct *vma = current->vmacache[i];
 
-		if (vma && vma->vm_start == start && vma->vm_end == end)
+		if (vma && vma->vm_start == start && vma->vm_end == end) {
+			count_vm_vmacache_event(VMACACHE_FIND_HITS);
 			return vma;
+		}
 	}
 
 	return NULL;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 302dd076b8bf..82ce17ce58c4 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -866,6 +866,10 @@ const char * const vmstat_text[] = {
 	"nr_tlb_local_flush_one",
 #endif /* CONFIG_DEBUG_TLBFLUSH */
 
+#ifdef CONFIG_DEBUG_VM_VMACACHE
+	"vmacache_find_calls",
+	"vmacache_find_hits",
+#endif
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
 #endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA */
-- 
cgit 


From 9c5a3621427da68afe6a078cadf807d2c8cc1d12 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 4 Jun 2014 16:06:50 -0700
Subject: x86: enable DMA CMA with swiotlb

The DMA Contiguous Memory Allocator support on x86 is disabled when
swiotlb config option is enabled.  So DMA CMA is always disabled on
x86_64 because swiotlb is always enabled.  This attempts to support for
DMA CMA with enabling swiotlb config option.

The contiguous memory allocator on x86 is integrated in the function
dma_generic_alloc_coherent() which is .alloc callback in nommu_dma_ops
for dma_alloc_coherent().

x86_swiotlb_alloc_coherent() which is .alloc callback in swiotlb_dma_ops
tries to allocate with dma_generic_alloc_coherent() firstly and then
swiotlb_alloc_coherent() is called as a fallback.

The main part of supporting DMA CMA with swiotlb is that changing
x86_swiotlb_free_coherent() which is .free callback in swiotlb_dma_ops
for dma_free_coherent() so that it can distinguish memory allocated by
dma_generic_alloc_coherent() from one allocated by
swiotlb_alloc_coherent() and release it with dma_generic_free_coherent()
which can handle contiguous memory.  This change requires making
is_swiotlb_buffer() global function.

This also needs to change .free callback in the dma_map_ops for amd_gart
and sta2x11, because these dma_ops are also using
dma_generic_alloc_coherent().

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig               | 2 +-
 arch/x86/include/asm/swiotlb.h | 7 +++++++
 arch/x86/kernel/amd_gart_64.c  | 2 +-
 arch/x86/kernel/pci-swiotlb.c  | 9 ++++++---
 arch/x86/pci/sta2x11-fixup.c   | 6 ++----
 include/linux/swiotlb.h        | 2 ++
 lib/swiotlb.c                  | 2 +-
 7 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 896a411a4584..4a0137f6f032 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -41,7 +41,7 @@ config X86
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_FRAME_POINTERS
 	select HAVE_DMA_ATTRS
-	select HAVE_DMA_CONTIGUOUS if !SWIOTLB
+	select HAVE_DMA_CONTIGUOUS
 	select HAVE_KRETPROBES
 	select GENERIC_EARLY_IOREMAP
 	select HAVE_OPTPROBES
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h
index 977f1761a25d..ab05d73e2bb7 100644
--- a/arch/x86/include/asm/swiotlb.h
+++ b/arch/x86/include/asm/swiotlb.h
@@ -29,4 +29,11 @@ static inline void pci_swiotlb_late_init(void)
 
 static inline void dma_mark_clean(void *addr, size_t size) {}
 
+extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+					dma_addr_t *dma_handle, gfp_t flags,
+					struct dma_attrs *attrs);
+extern void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+					void *vaddr, dma_addr_t dma_addr,
+					struct dma_attrs *attrs);
+
 #endif /* _ASM_X86_SWIOTLB_H */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b574b295a2f9..8e3842fc8bea 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -512,7 +512,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 		   dma_addr_t dma_addr, struct dma_attrs *attrs)
 {
 	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
-	free_pages((unsigned long)vaddr, get_order(size));
+	dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
 static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6c483ba98b9c..77dd0ad58be4 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -14,7 +14,7 @@
 #include <asm/iommu_table.h>
 int swiotlb __read_mostly;
 
-static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags,
 					struct dma_attrs *attrs)
 {
@@ -28,11 +28,14 @@ static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
 }
 
-static void x86_swiotlb_free_coherent(struct device *dev, size_t size,
+void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 				      void *vaddr, dma_addr_t dma_addr,
 				      struct dma_attrs *attrs)
 {
-	swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+	if (is_swiotlb_buffer(dma_to_phys(dev, dma_addr)))
+		swiotlb_free_coherent(dev, size, vaddr, dma_addr);
+	else
+		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
 static struct dma_map_ops swiotlb_dma_ops = {
diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c
index 9d8a509c9730..5ceda85b8687 100644
--- a/arch/x86/pci/sta2x11-fixup.c
+++ b/arch/x86/pci/sta2x11-fixup.c
@@ -173,9 +173,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
 {
 	void *vaddr;
 
-	vaddr = dma_generic_alloc_coherent(dev, size, dma_handle, flags, attrs);
-	if (!vaddr)
-		vaddr = swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+	vaddr = x86_swiotlb_alloc_coherent(dev, size, dma_handle, flags, attrs);
 	*dma_handle = p2a(*dma_handle, to_pci_dev(dev));
 	return vaddr;
 }
@@ -183,7 +181,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev,
 /* We have our own dma_ops: the same as swiotlb but from alloc (above) */
 static struct dma_map_ops sta2x11_dma_ops = {
 	.alloc = sta2x11_swiotlb_alloc_coherent,
-	.free = swiotlb_free_coherent,
+	.free = x86_swiotlb_free_coherent,
 	.map_page = swiotlb_map_page,
 	.unmap_page = swiotlb_unmap_page,
 	.map_sg = swiotlb_map_sg_attrs,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a5ffd32642fd..e7a018eaf3a2 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { }
 #endif
 
 extern void swiotlb_print_info(void);
+extern int is_swiotlb_buffer(phys_addr_t paddr);
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b604b831f4d1..649d097853a1 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -374,7 +374,7 @@ void __init swiotlb_free(void)
 	io_tlb_nslabs = 0;
 }
 
-static int is_swiotlb_buffer(phys_addr_t paddr)
+int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
-- 
cgit 


From 2bfc2862c4fe38379a2fb2cfba33fad32ccb4ff4 Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 4 Jun 2014 16:06:53 -0700
Subject: memblock: introduce memblock_alloc_range()

This introduces memblock_alloc_range() which allocates memblock from the
specified range of physical address.  I would like to use this function
to specify the location of CMA.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h |  2 ++
 mm/memblock.c            | 21 +++++++++++++++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 73dc382e72d8..b660e05b63d4 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -272,6 +272,8 @@ static inline bool memblock_bottom_up(void) { return false; }
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
 
+phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
+					phys_addr_t start, phys_addr_t end);
 phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align,
 				phys_addr_t max_addr);
 phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align,
diff --git a/mm/memblock.c b/mm/memblock.c
index a810ba923cdd..146736411318 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1033,22 +1033,35 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
-					phys_addr_t align, phys_addr_t max_addr,
-					int nid)
+static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
+					phys_addr_t align, phys_addr_t start,
+					phys_addr_t end, int nid)
 {
 	phys_addr_t found;
 
 	if (!align)
 		align = SMP_CACHE_BYTES;
 
-	found = memblock_find_in_range_node(size, align, 0, max_addr, nid);
+	found = memblock_find_in_range_node(size, align, start, end, nid);
 	if (found && !memblock_reserve(found, size))
 		return found;
 
 	return 0;
 }
 
+phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
+					phys_addr_t start, phys_addr_t end)
+{
+	return memblock_alloc_range_nid(size, align, start, end, NUMA_NO_NODE);
+}
+
+static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
+					phys_addr_t align, phys_addr_t max_addr,
+					int nid)
+{
+	return memblock_alloc_range_nid(size, align, 0, max_addr, nid);
+}
+
 phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	return memblock_alloc_base_nid(size, align, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-- 
cgit 


From 5ea3b1b2f8ad9162684431ce6188102ca4c64b7a Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Wed, 4 Jun 2014 16:06:54 -0700
Subject: cma: add placement specifier for "cma=" kernel parameter

Currently, "cma=" kernel parameter is used to specify the size of CMA,
but we can't specify where it is located.  We want to locate CMA below
4GB for devices only supporting 32-bit addressing on 64-bit systems
without iommu.

This enables to specify the placement of CMA by extending "cma=" kernel
parameter.

Examples:
 1. locate 64MB CMA below 4GB by "cma=64M@0-4G"
 2. locate 64MB CMA exact at 512MB by "cma=64M@512M"

Note that the DMA contiguous memory allocator on x86 assumes that
page_address() works for the pages to allocate.  So this change requires
to limit end address of contiguous memory area upto max_pfn_mapped to
prevent from locating it on highmem area by the argument of
dma_contiguous_reserve().

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  7 +++++--
 arch/x86/kernel/setup.c             |  2 +-
 drivers/base/dma-contiguous.c       | 42 ++++++++++++++++++++++++++++---------
 include/linux/dma-contiguous.h      |  9 +++++---
 4 files changed, 44 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index af55e13ace8f..adea3a22fa00 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -630,8 +630,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Also note the kernel might malfunction if you disable
 			some critical bits.
 
-	cma=nn[MG]	[ARM,KNL]
-			Sets the size of kernel global memory area for contiguous
+	cma=nn[MG]@[start[MG][-end[MG]]]
+			[ARM,X86,KNL]
+			Sets the size of kernel global memory area for
+			contiguous memory allocations and optionally the
+			placement constraint by the physical address range of
 			memory allocations. For more information, see
 			include/linux/dma-contiguous.h
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 09c76d265550..78a0e6298922 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1119,7 +1119,7 @@ void __init setup_arch(char **cmdline_p)
 	setup_real_mode();
 
 	memblock_set_current_limit(get_max_mapped());
-	dma_contiguous_reserve(0);
+	dma_contiguous_reserve(max_pfn_mapped << PAGE_SHIFT);
 
 	/*
 	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index c34ec3364243..83969f8c5727 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -60,11 +60,22 @@ struct cma *dma_contiguous_default_area;
  */
 static const phys_addr_t size_bytes = CMA_SIZE_MBYTES * SZ_1M;
 static phys_addr_t size_cmdline = -1;
+static phys_addr_t base_cmdline;
+static phys_addr_t limit_cmdline;
 
 static int __init early_cma(char *p)
 {
 	pr_debug("%s(%s)\n", __func__, p);
 	size_cmdline = memparse(p, &p);
+	if (*p != '@')
+		return 0;
+	base_cmdline = memparse(p + 1, &p);
+	if (*p != '-') {
+		limit_cmdline = base_cmdline + size_cmdline;
+		return 0;
+	}
+	limit_cmdline = memparse(p + 1, &p);
+
 	return 0;
 }
 early_param("cma", early_cma);
@@ -108,11 +119,18 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
 void __init dma_contiguous_reserve(phys_addr_t limit)
 {
 	phys_addr_t selected_size = 0;
+	phys_addr_t selected_base = 0;
+	phys_addr_t selected_limit = limit;
+	bool fixed = false;
 
 	pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
 
 	if (size_cmdline != -1) {
 		selected_size = size_cmdline;
+		selected_base = base_cmdline;
+		selected_limit = min_not_zero(limit_cmdline, limit);
+		if (base_cmdline + size_cmdline == limit_cmdline)
+			fixed = true;
 	} else {
 #ifdef CONFIG_CMA_SIZE_SEL_MBYTES
 		selected_size = size_bytes;
@@ -129,10 +147,12 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
 
-		dma_contiguous_reserve_area(selected_size, 0, limit,
-					    &dma_contiguous_default_area);
+		dma_contiguous_reserve_area(selected_size, selected_base,
+					    selected_limit,
+					    &dma_contiguous_default_area,
+					    fixed);
 	}
-};
+}
 
 static DEFINE_MUTEX(cma_mutex);
 
@@ -189,15 +209,20 @@ core_initcall(cma_init_reserved_areas);
  * @base: Base address of the reserved area optional, use 0 for any
  * @limit: End address of the reserved memory (optional, 0 for any).
  * @res_cma: Pointer to store the created cma region.
+ * @fixed: hint about where to place the reserved area
  *
  * This function reserves memory from early allocator. It should be
  * called by arch specific code once the early allocator (memblock or bootmem)
  * has been activated and all other subsystems have already allocated/reserved
  * memory. This function allows to create custom reserved areas for specific
  * devices.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
  */
 int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
-				       phys_addr_t limit, struct cma **res_cma)
+				       phys_addr_t limit, struct cma **res_cma,
+				       bool fixed)
 {
 	struct cma *cma = &cma_areas[cma_area_count];
 	phys_addr_t alignment;
@@ -223,18 +248,15 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
 	limit &= ~(alignment - 1);
 
 	/* Reserve memory */
-	if (base) {
+	if (base && fixed) {
 		if (memblock_is_region_reserved(base, size) ||
 		    memblock_reserve(base, size) < 0) {
 			ret = -EBUSY;
 			goto err;
 		}
 	} else {
-		/*
-		 * Use __memblock_alloc_base() since
-		 * memblock_alloc_base() panic()s.
-		 */
-		phys_addr_t addr = __memblock_alloc_base(size, alignment, limit);
+		phys_addr_t addr = memblock_alloc_range(size, alignment, base,
+							limit);
 		if (!addr) {
 			ret = -ENOMEM;
 			goto err;
diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h
index 3b28f937d959..772eab5d524a 100644
--- a/include/linux/dma-contiguous.h
+++ b/include/linux/dma-contiguous.h
@@ -88,7 +88,8 @@ static inline void dma_contiguous_set_default(struct cma *cma)
 void dma_contiguous_reserve(phys_addr_t addr_limit);
 
 int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
-				       phys_addr_t limit, struct cma **res_cma);
+				       phys_addr_t limit, struct cma **res_cma,
+				       bool fixed);
 
 /**
  * dma_declare_contiguous() - reserve area for contiguous memory handling
@@ -108,7 +109,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
 {
 	struct cma *cma;
 	int ret;
-	ret = dma_contiguous_reserve_area(size, base, limit, &cma);
+	ret = dma_contiguous_reserve_area(size, base, limit, &cma, true);
 	if (ret == 0)
 		dev_set_cma_area(dev, cma);
 
@@ -136,7 +137,9 @@ static inline void dma_contiguous_set_default(struct cma *cma) { }
 static inline void dma_contiguous_reserve(phys_addr_t limit) { }
 
 static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
-				       phys_addr_t limit, struct cma **res_cma) {
+				       phys_addr_t limit, struct cma **res_cma,
+				       bool fixed)
+{
 	return -ENOSYS;
 }
 
-- 
cgit 


From 02a8efeda894d3541c7143ed818b25b299504190 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 4 Jun 2014 16:06:59 -0700
Subject: include/linux/mmdebug.h: add VM_WARN_ON() and VM_WARN_ON_ONCE()

WARN_ON() and WARN_ON_ONCE(), dependent on CONFIG_DEBUG_VM

Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2d57efa64cc1..a3499d7b0e8a 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -11,9 +11,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
 #define VM_BUG_ON(cond) BUG_ON(cond)
 #define VM_BUG_ON_PAGE(cond, page) \
 	do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0)
+#define VM_WARN_ON(cond) WARN_ON(cond)
+#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #endif
 
 #ifdef CONFIG_DEBUG_VIRTUAL
-- 
cgit 


From e4f674229ce63dac60be0c4ddfb5ef8d1225d30d Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 4 Jun 2014 16:07:02 -0700
Subject: mm: pass VM_BUG_ON() reason to dump_page()

I recently added a patch to let folks pass a "reason" string dump_page()
which gets dumped out along with the page's data.  This essentially
saves the bug-reader a trip in to the source to figure out why we
BUG_ON()'d.

The new VM_BUG_ON_PAGE() passes in NULL for "reason".  It seems like we
might as well pass the BUG_ON() condition if we have it.  This will
bloat kernels a bit with ~160 new strings, but this is all under a
debugging option anyway.

	page:ffffea0008560280 count:1 mapcount:0 mapping:(null) index:0x0
	page flags: 0xbfffc0000000001(locked)
	page dumped because: VM_BUG_ON_PAGE(PageLocked(page))
	------------[ cut here ]------------
	kernel BUG at /home/davehans/linux.git/mm/filemap.c:464!
	invalid opcode: 0000 [#1] SMP
	CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.14.0+ #251
	Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
	...

[akpm@linux-foundation.org: include stringify.h]
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index a3499d7b0e8a..edd82a105220 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -1,6 +1,8 @@
 #ifndef LINUX_MM_DEBUG_H
 #define LINUX_MM_DEBUG_H 1
 
+#include <linux/stringify.h>
+
 struct page;
 
 extern void dump_page(struct page *page, const char *reason);
@@ -9,8 +11,13 @@ extern void dump_page_badflags(struct page *page, const char *reason,
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
-#define VM_BUG_ON_PAGE(cond, page) \
-	do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0)
+#define VM_BUG_ON_PAGE(cond, page)					\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_page(page, "VM_BUG_ON_PAGE(" __stringify(cond)")");\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #else
-- 
cgit 


From bae7f4ae14d47008a11b4358b167cb0ae186c06a Mon Sep 17 00:00:00 2001
From: Luiz Capitulino <lcapitulino@redhat.com>
Date: Wed, 4 Jun 2014 16:07:08 -0700
Subject: hugetlb: add hstate_is_gigantic()

Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Reviewed-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  5 +++++
 mm/hugetlb.c            | 28 ++++++++++++++--------------
 2 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index d0bad1a8b0bd..35786ee36f06 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -343,6 +343,11 @@ static inline unsigned huge_page_shift(struct hstate *h)
 	return h->order + PAGE_SHIFT;
 }
 
+static inline bool hstate_is_gigantic(struct hstate *h)
+{
+	return huge_page_order(h) >= MAX_ORDER;
+}
+
 static inline unsigned int pages_per_huge_page(struct hstate *h)
 {
 	return 1 << h->order;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5d54d4b8df01..a66310586894 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -611,7 +611,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
 {
 	int i;
 
-	VM_BUG_ON(h->order >= MAX_ORDER);
+	VM_BUG_ON(hstate_is_gigantic(h));
 
 	h->nr_huge_pages--;
 	h->nr_huge_pages_node[page_to_nid(page)]--;
@@ -664,7 +664,7 @@ static void free_huge_page(struct page *page)
 	if (restore_reserve)
 		h->resv_huge_pages++;
 
-	if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
+	if (h->surplus_huge_pages_node[nid] && !hstate_is_gigantic(h)) {
 		/* remove the page from active list */
 		list_del(&page->lru);
 		update_and_free_page(h, page);
@@ -768,7 +768,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 {
 	struct page *page;
 
-	if (h->order >= MAX_ORDER)
+	if (hstate_is_gigantic(h))
 		return NULL;
 
 	page = alloc_pages_exact_node(nid,
@@ -962,7 +962,7 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 	struct page *page;
 	unsigned int r_nid;
 
-	if (h->order >= MAX_ORDER)
+	if (hstate_is_gigantic(h))
 		return NULL;
 
 	/*
@@ -1155,7 +1155,7 @@ static void return_unused_surplus_pages(struct hstate *h,
 	h->resv_huge_pages -= unused_resv_pages;
 
 	/* Cannot return gigantic pages currently */
-	if (h->order >= MAX_ORDER)
+	if (hstate_is_gigantic(h))
 		return;
 
 	nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
@@ -1355,7 +1355,7 @@ static void __init gather_bootmem_prealloc(void)
 		 * fix confusing memory reports from free(1) and another
 		 * side-effects, like CommitLimit going negative.
 		 */
-		if (h->order > (MAX_ORDER - 1))
+		if (hstate_is_gigantic(h))
 			adjust_managed_page_count(page, 1 << h->order);
 	}
 }
@@ -1365,7 +1365,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 	unsigned long i;
 
 	for (i = 0; i < h->max_huge_pages; ++i) {
-		if (h->order >= MAX_ORDER) {
+		if (hstate_is_gigantic(h)) {
 			if (!alloc_bootmem_huge_page(h))
 				break;
 		} else if (!alloc_fresh_huge_page(h,
@@ -1381,7 +1381,7 @@ static void __init hugetlb_init_hstates(void)
 
 	for_each_hstate(h) {
 		/* oversize hugepages were init'ed in early boot */
-		if (h->order < MAX_ORDER)
+		if (!hstate_is_gigantic(h))
 			hugetlb_hstate_alloc_pages(h);
 	}
 }
@@ -1415,7 +1415,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
 {
 	int i;
 
-	if (h->order >= MAX_ORDER)
+	if (hstate_is_gigantic(h))
 		return;
 
 	for_each_node_mask(i, *nodes_allowed) {
@@ -1478,7 +1478,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
 {
 	unsigned long min_count, ret;
 
-	if (h->order >= MAX_ORDER)
+	if (hstate_is_gigantic(h))
 		return h->max_huge_pages;
 
 	/*
@@ -1605,7 +1605,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
 		goto out;
 
 	h = kobj_to_hstate(kobj, &nid);
-	if (h->order >= MAX_ORDER) {
+	if (hstate_is_gigantic(h)) {
 		err = -EINVAL;
 		goto out;
 	}
@@ -1688,7 +1688,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
 	unsigned long input;
 	struct hstate *h = kobj_to_hstate(kobj, NULL);
 
-	if (h->order >= MAX_ORDER)
+	if (hstate_is_gigantic(h))
 		return -EINVAL;
 
 	err = kstrtoul(buf, 10, &input);
@@ -2112,7 +2112,7 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 
 	tmp = h->max_huge_pages;
 
-	if (write && h->order >= MAX_ORDER)
+	if (write && hstate_is_gigantic(h))
 		return -EINVAL;
 
 	table->data = &tmp;
@@ -2168,7 +2168,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 
 	tmp = h->nr_overcommit_huge_pages;
 
-	if (write && h->order >= MAX_ORDER)
+	if (write && hstate_is_gigantic(h))
 		return -EINVAL;
 
 	table->data = &tmp;
-- 
cgit 


From 4f9b16a64753d0bb607454347036dc997fd03b82 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:07:14 -0700
Subject: mm: disable zone_reclaim_mode by default

When it was introduced, zone_reclaim_mode made sense as NUMA distances
punished and workloads were generally partitioned to fit into a NUMA
node.  NUMA machines are now common but few of the workloads are
NUMA-aware and it's routine to see major performance degradation due to
zone_reclaim_mode being enabled but relatively few can identify the
problem.

Those that require zone_reclaim_mode are likely to be able to detect
when it needs to be enabled and tune appropriately so lets have a
sensible default for the bulk of users.

This patch (of 2):

zone_reclaim_mode causes processes to prefer reclaiming memory from
local node instead of spilling over to other nodes.  This made sense
initially when NUMA machines were almost exclusively HPC and the
workload was partitioned into nodes.  The NUMA penalties were
sufficiently high to justify reclaiming the memory.  On current machines
and workloads it is often the case that zone_reclaim_mode destroys
performance but not all users know how to detect this.  Favour the
common case and disable it by default.  Users that are sophisticated
enough to know they need zone_reclaim_mode will detect it.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/vm.txt         | 17 +++++++++--------
 arch/ia64/include/asm/topology.h    |  3 ++-
 arch/powerpc/include/asm/topology.h |  8 ++------
 include/linux/topology.h            |  3 ++-
 mm/page_alloc.c                     |  2 --
 5 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index dd9d0e33b443..5b6da0fb5fbf 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -772,16 +772,17 @@ This is value ORed together of
 2	= Zone reclaim writes dirty pages out
 4	= Zone reclaim swaps pages
 
-zone_reclaim_mode is set during bootup to 1 if it is determined that pages
-from remote zones will cause a measurable performance reduction. The
-page allocator will then reclaim easily reusable pages (those page
-cache pages that are currently not used) before allocating off node pages.
-
-It may be beneficial to switch off zone reclaim if the system is
-used for a file server and all of memory should be used for caching files
-from disk. In that case the caching effect is more important than
+zone_reclaim_mode is disabled by default.  For file servers or workloads
+that benefit from having their data cached, zone_reclaim_mode should be
+left disabled as the caching effect is likely to be more important than
 data locality.
 
+zone_reclaim may be enabled if it's known that the workload is partitioned
+such that each partition fits within a NUMA node and that accessing remote
+memory would cause a measurable performance reduction.  The page allocator
+will then reclaim easily reusable pages (those page cache pages that are
+currently not used) before allocating off node pages.
+
 Allowing zone reclaim to write out pages stops processes that are
 writing large amounts of data from dirtying pages on other nodes. Zone
 reclaim will write out dirty pages if a zone fills up and so effectively
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 3202aa74e0d6..6437ca21f61b 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -21,7 +21,8 @@
 #define PENALTY_FOR_NODE_WITH_CPUS 255
 
 /*
- * Distance above which we begin to use zone reclaim
+ * Nodes within this distance are eligible for reclaim by zone_reclaim() when
+ * zone_reclaim_mode is enabled.
  */
 #define RECLAIM_DISTANCE 15
 
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c9202151079f..6c8a8c5a37a1 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -9,12 +9,8 @@ struct device_node;
 #ifdef CONFIG_NUMA
 
 /*
- * Before going off node we want the VM to try and reclaim from the local
- * node. It does this if the remote distance is larger than RECLAIM_DISTANCE.
- * With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of
- * 20, we never reclaim and go off node straight away.
- *
- * To fix this we choose a smaller value of RECLAIM_DISTANCE.
+ * If zone_reclaim_mode is enabled, a RECLAIM_DISTANCE of 10 will mean that
+ * all zones on all nodes will be eligible for zone_reclaim().
  */
 #define RECLAIM_DISTANCE 10
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 973671ff9e7d..dda6ee521e74 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -58,7 +58,8 @@ int arch_update_cpu_topology(void);
 /*
  * If the distance between nodes in a system is larger than RECLAIM_DISTANCE
  * (in whatever arch specific measurement units returned by node_distance())
- * then switch on zone reclaim on boot.
+ * and zone_reclaim_mode is enabled then the VM will only call zone_reclaim()
+ * on nodes within this distance.
  */
 #define RECLAIM_DISTANCE 30
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7cfdcd808f52..dfe954fbb48a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1860,8 +1860,6 @@ static void __paginginit init_zone_allows_reclaim(int nid)
 	for_each_node_state(i, N_MEMORY)
 		if (node_distance(nid, i) <= RECLAIM_DISTANCE)
 			node_set(i, NODE_DATA(nid)->reclaim_nodes);
-		else
-			zone_reclaim_mode = 1;
 }
 
 #else	/* CONFIG_NUMA */
-- 
cgit 


From 5f7a75acdb24c7b9c436b3a0a66eec12e101d19c Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:07:15 -0700
Subject: mm: page_alloc: do not cache reclaim distances

pgdat->reclaim_nodes tracks if a remote node is allowed to be reclaimed
by zone_reclaim due to its distance.  As it is expected that
zone_reclaim_mode will be rarely enabled it is unreasonable for all
machines to take a penalty.  Fortunately, the zone_reclaim_mode() path
is already slow and it is the path that takes the hit.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  1 -
 mm/page_alloc.c        | 17 ++---------------
 2 files changed, 2 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fac5509c18f0..c1dbe0ba9f82 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -763,7 +763,6 @@ typedef struct pglist_data {
 	unsigned long node_spanned_pages; /* total size of physical page
 					     range, including holes */
 	int node_id;
-	nodemask_t reclaim_nodes;	/* Nodes allowed to reclaim from */
 	wait_queue_head_t kswapd_wait;
 	wait_queue_head_t pfmemalloc_wait;
 	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dfe954fbb48a..9f13bcfb6762 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1850,16 +1850,8 @@ static bool zone_local(struct zone *local_zone, struct zone *zone)
 
 static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
-	return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
-}
-
-static void __paginginit init_zone_allows_reclaim(int nid)
-{
-	int i;
-
-	for_each_node_state(i, N_MEMORY)
-		if (node_distance(nid, i) <= RECLAIM_DISTANCE)
-			node_set(i, NODE_DATA(nid)->reclaim_nodes);
+	return node_distance(zone_to_nid(local_zone), zone_to_nid(zone)) <
+				RECLAIM_DISTANCE;
 }
 
 #else	/* CONFIG_NUMA */
@@ -1893,9 +1885,6 @@ static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 	return true;
 }
 
-static inline void init_zone_allows_reclaim(int nid)
-{
-}
 #endif	/* CONFIG_NUMA */
 
 /*
@@ -4933,8 +4922,6 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
 
 	pgdat->node_id = nid;
 	pgdat->node_start_pfn = node_start_pfn;
-	if (node_state(nid, N_MEMORY))
-		init_zone_allows_reclaim(nid);
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 	get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 #endif
-- 
cgit 


From bfc8c90139ebd049b9801a951db3b9a4a00bed9c Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:07:18 -0700
Subject: mem-hotplug: implement get/put_online_mems

kmem_cache_{create,destroy,shrink} need to get a stable value of
cpu/node online mask, because they init/destroy/access per-cpu/node
kmem_cache parts, which can be allocated or destroyed on cpu/mem
hotplug.  To protect against cpu hotplug, these functions use
{get,put}_online_cpus.  However, they do nothing to synchronize with
memory hotplug - taking the slab_mutex does not eliminate the
possibility of race as described in patch 2.

What we need there is something like get_online_cpus, but for memory.
We already have lock_memory_hotplug, which serves for the purpose, but
it's a bit of a hammer right now, because it's backed by a mutex.  As a
result, it imposes some limitations to locking order, which are not
desirable, and can't be used just like get_online_cpus.  That's why in
patch 1 I substitute it with get/put_online_mems, which work exactly
like get/put_online_cpus except they block not cpu, but memory hotplug.

[ v1 can be found at https://lkml.org/lkml/2014/4/6/68.  I NAK'ed it by
  myself, because it used an rw semaphore for get/put_online_mems,
  making them dead lock prune.  ]

This patch (of 2):

{un}lock_memory_hotplug, which is used to synchronize against memory
hotplug, is currently backed by a mutex, which makes it a bit of a
hammer - threads that only want to get a stable value of online nodes
mask won't be able to proceed concurrently.  Also, it imposes some
strong locking ordering rules on it, which narrows down the set of its
usage scenarios.

This patch introduces get/put_online_mems, which are the same as
get/put_online_cpus, but for memory hotplug, i.e.  executing a code
inside a get/put_online_mems section will guarantee a stable value of
online nodes, present pages, etc.

lock_memory_hotplug()/unlock_memory_hotplug() are removed altogether.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h |  14 ++--
 include/linux/mmzone.h         |   8 +--
 mm/kmemleak.c                  |   4 +-
 mm/memory-failure.c            |   8 +--
 mm/memory_hotplug.c            | 142 +++++++++++++++++++++++++++++------------
 mm/slub.c                      |   4 +-
 mm/vmscan.c                    |   2 +-
 7 files changed, 116 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4ca3d951fe91..010d125bffbf 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -187,14 +187,8 @@ extern void put_page_bootmem(struct page *page);
 extern void get_page_bootmem(unsigned long ingo, struct page *page,
 			     unsigned long type);
 
-/*
- * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
- * notifier will be called under this. 2) offline/online/add/remove memory
- * will not run simultaneously.
- */
-
-void lock_memory_hotplug(void);
-void unlock_memory_hotplug(void);
+void get_online_mems(void);
+void put_online_mems(void);
 
 #else /* ! CONFIG_MEMORY_HOTPLUG */
 /*
@@ -232,8 +226,8 @@ static inline int try_online_node(int nid)
 	return 0;
 }
 
-static inline void lock_memory_hotplug(void) {}
-static inline void unlock_memory_hotplug(void) {}
+static inline void get_online_mems(void) {}
+static inline void put_online_mems(void) {}
 
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c1dbe0ba9f82..ae693e1ad0f9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -481,9 +481,8 @@ struct zone {
 	 * give them a chance of being in the same cacheline.
 	 *
 	 * Write access to present_pages at runtime should be protected by
-	 * lock_memory_hotplug()/unlock_memory_hotplug().  Any reader who can't
-	 * tolerant drift of present_pages should hold memory hotplug lock to
-	 * get a stable value.
+	 * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
+	 * present_pages should get_online_mems() to get a stable value.
 	 *
 	 * Read access to managed_pages should be safe because it's unsigned
 	 * long. Write access to zone->managed_pages and totalram_pages are
@@ -765,7 +764,8 @@ typedef struct pglist_data {
 	int node_id;
 	wait_queue_head_t kswapd_wait;
 	wait_queue_head_t pfmemalloc_wait;
-	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */
+	struct task_struct *kswapd;	/* Protected by
+					   mem_hotplug_begin/end() */
 	int kswapd_max_order;
 	enum zone_type classzone_idx;
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8d2fcdfeff7f..736ade31d1dc 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1300,7 +1300,7 @@ static void kmemleak_scan(void)
 	/*
 	 * Struct page scanning for each node.
 	 */
-	lock_memory_hotplug();
+	get_online_mems();
 	for_each_online_node(i) {
 		unsigned long start_pfn = node_start_pfn(i);
 		unsigned long end_pfn = node_end_pfn(i);
@@ -1318,7 +1318,7 @@ static void kmemleak_scan(void)
 			scan_block(page, page + 1, NULL, 1);
 		}
 	}
-	unlock_memory_hotplug();
+	put_online_mems();
 
 	/*
 	 * Scanning the task stacks (may introduce false negatives).
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9ccef39a9de2..6917f799412b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1664,11 +1664,7 @@ int soft_offline_page(struct page *page, int flags)
 		}
 	}
 
-	/*
-	 * The lock_memory_hotplug prevents a race with memory hotplug.
-	 * This is a big hammer, a better would be nicer.
-	 */
-	lock_memory_hotplug();
+	get_online_mems();
 
 	/*
 	 * Isolate the page, so that it doesn't get reallocated if it
@@ -1679,7 +1675,7 @@ int soft_offline_page(struct page *page, int flags)
 		set_migratetype_isolate(page, true);
 
 	ret = get_any_page(page, pfn, flags);
-	unlock_memory_hotplug();
+	put_online_mems();
 	if (ret > 0) { /* for in-use pages */
 		if (PageHuge(page))
 			ret = soft_offline_huge_page(page, flags);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a650db29606f..2906873a1502 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -46,19 +46,84 @@
 static void generic_online_page(struct page *page);
 
 static online_page_callback_t online_page_callback = generic_online_page;
+static DEFINE_MUTEX(online_page_callback_lock);
 
-DEFINE_MUTEX(mem_hotplug_mutex);
+/* The same as the cpu_hotplug lock, but for memory hotplug. */
+static struct {
+	struct task_struct *active_writer;
+	struct mutex lock; /* Synchronizes accesses to refcount, */
+	/*
+	 * Also blocks the new readers during
+	 * an ongoing mem hotplug operation.
+	 */
+	int refcount;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lockdep_map dep_map;
+#endif
+} mem_hotplug = {
+	.active_writer = NULL,
+	.lock = __MUTEX_INITIALIZER(mem_hotplug.lock),
+	.refcount = 0,
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	.dep_map = {.name = "mem_hotplug.lock" },
+#endif
+};
+
+/* Lockdep annotations for get/put_online_mems() and mem_hotplug_begin/end() */
+#define memhp_lock_acquire_read() lock_map_acquire_read(&mem_hotplug.dep_map)
+#define memhp_lock_acquire()      lock_map_acquire(&mem_hotplug.dep_map)
+#define memhp_lock_release()      lock_map_release(&mem_hotplug.dep_map)
+
+void get_online_mems(void)
+{
+	might_sleep();
+	if (mem_hotplug.active_writer == current)
+		return;
+	memhp_lock_acquire_read();
+	mutex_lock(&mem_hotplug.lock);
+	mem_hotplug.refcount++;
+	mutex_unlock(&mem_hotplug.lock);
+
+}
 
-void lock_memory_hotplug(void)
+void put_online_mems(void)
 {
-	mutex_lock(&mem_hotplug_mutex);
+	if (mem_hotplug.active_writer == current)
+		return;
+	mutex_lock(&mem_hotplug.lock);
+
+	if (WARN_ON(!mem_hotplug.refcount))
+		mem_hotplug.refcount++; /* try to fix things up */
+
+	if (!--mem_hotplug.refcount && unlikely(mem_hotplug.active_writer))
+		wake_up_process(mem_hotplug.active_writer);
+	mutex_unlock(&mem_hotplug.lock);
+	memhp_lock_release();
+
 }
 
-void unlock_memory_hotplug(void)
+static void mem_hotplug_begin(void)
 {
-	mutex_unlock(&mem_hotplug_mutex);
+	mem_hotplug.active_writer = current;
+
+	memhp_lock_acquire();
+	for (;;) {
+		mutex_lock(&mem_hotplug.lock);
+		if (likely(!mem_hotplug.refcount))
+			break;
+		__set_current_state(TASK_UNINTERRUPTIBLE);
+		mutex_unlock(&mem_hotplug.lock);
+		schedule();
+	}
 }
 
+static void mem_hotplug_done(void)
+{
+	mem_hotplug.active_writer = NULL;
+	mutex_unlock(&mem_hotplug.lock);
+	memhp_lock_release();
+}
 
 /* add this memory to iomem resource */
 static struct resource *register_memory_resource(u64 start, u64 size)
@@ -727,14 +792,16 @@ int set_online_page_callback(online_page_callback_t callback)
 {
 	int rc = -EINVAL;
 
-	lock_memory_hotplug();
+	get_online_mems();
+	mutex_lock(&online_page_callback_lock);
 
 	if (online_page_callback == generic_online_page) {
 		online_page_callback = callback;
 		rc = 0;
 	}
 
-	unlock_memory_hotplug();
+	mutex_unlock(&online_page_callback_lock);
+	put_online_mems();
 
 	return rc;
 }
@@ -744,14 +811,16 @@ int restore_online_page_callback(online_page_callback_t callback)
 {
 	int rc = -EINVAL;
 
-	lock_memory_hotplug();
+	get_online_mems();
+	mutex_lock(&online_page_callback_lock);
 
 	if (online_page_callback == callback) {
 		online_page_callback = generic_online_page;
 		rc = 0;
 	}
 
-	unlock_memory_hotplug();
+	mutex_unlock(&online_page_callback_lock);
+	put_online_mems();
 
 	return rc;
 }
@@ -899,7 +968,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	int ret;
 	struct memory_notify arg;
 
-	lock_memory_hotplug();
+	mem_hotplug_begin();
 	/*
 	 * This doesn't need a lock to do pfn_to_page().
 	 * The section can't be removed here because of the
@@ -907,23 +976,18 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	 */
 	zone = page_zone(pfn_to_page(pfn));
 
+	ret = -EINVAL;
 	if ((zone_idx(zone) > ZONE_NORMAL || online_type == ONLINE_MOVABLE) &&
-	    !can_online_high_movable(zone)) {
-		unlock_memory_hotplug();
-		return -EINVAL;
-	}
+	    !can_online_high_movable(zone))
+		goto out;
 
 	if (online_type == ONLINE_KERNEL && zone_idx(zone) == ZONE_MOVABLE) {
-		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages)) {
-			unlock_memory_hotplug();
-			return -EINVAL;
-		}
+		if (move_pfn_range_left(zone - 1, zone, pfn, pfn + nr_pages))
+			goto out;
 	}
 	if (online_type == ONLINE_MOVABLE && zone_idx(zone) == ZONE_MOVABLE - 1) {
-		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages)) {
-			unlock_memory_hotplug();
-			return -EINVAL;
-		}
+		if (move_pfn_range_right(zone, zone + 1, pfn, pfn + nr_pages))
+			goto out;
 	}
 
 	/* Previous code may changed the zone of the pfn range */
@@ -939,8 +1003,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 	ret = notifier_to_errno(ret);
 	if (ret) {
 		memory_notify(MEM_CANCEL_ONLINE, &arg);
-		unlock_memory_hotplug();
-		return ret;
+		goto out;
 	}
 	/*
 	 * If this zone is not populated, then it is not in zonelist.
@@ -964,8 +1027,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 		       (((unsigned long long) pfn + nr_pages)
 			    << PAGE_SHIFT) - 1);
 		memory_notify(MEM_CANCEL_ONLINE, &arg);
-		unlock_memory_hotplug();
-		return ret;
+		goto out;
 	}
 
 	zone->present_pages += onlined_pages;
@@ -995,9 +1057,9 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ
 
 	if (onlined_pages)
 		memory_notify(MEM_ONLINE, &arg);
-	unlock_memory_hotplug();
-
-	return 0;
+out:
+	mem_hotplug_done();
+	return ret;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
@@ -1055,7 +1117,7 @@ int try_online_node(int nid)
 	if (node_online(nid))
 		return 0;
 
-	lock_memory_hotplug();
+	mem_hotplug_begin();
 	pgdat = hotadd_new_pgdat(nid, 0);
 	if (!pgdat) {
 		pr_err("Cannot online node %d due to NULL pgdat\n", nid);
@@ -1073,7 +1135,7 @@ int try_online_node(int nid)
 	}
 
 out:
-	unlock_memory_hotplug();
+	mem_hotplug_done();
 	return ret;
 }
 
@@ -1117,7 +1179,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		new_pgdat = !p;
 	}
 
-	lock_memory_hotplug();
+	mem_hotplug_begin();
 
 	new_node = !node_online(nid);
 	if (new_node) {
@@ -1158,7 +1220,7 @@ error:
 	release_memory_resource(res);
 
 out:
-	unlock_memory_hotplug();
+	mem_hotplug_done();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(add_memory);
@@ -1565,7 +1627,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 	if (!test_pages_in_a_zone(start_pfn, end_pfn))
 		return -EINVAL;
 
-	lock_memory_hotplug();
+	mem_hotplug_begin();
 
 	zone = page_zone(pfn_to_page(start_pfn));
 	node = zone_to_nid(zone);
@@ -1672,7 +1734,7 @@ repeat:
 	writeback_set_ratelimit();
 
 	memory_notify(MEM_OFFLINE, &arg);
-	unlock_memory_hotplug();
+	mem_hotplug_done();
 	return 0;
 
 failed_removal:
@@ -1684,7 +1746,7 @@ failed_removal:
 	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
 
 out:
-	unlock_memory_hotplug();
+	mem_hotplug_done();
 	return ret;
 }
 
@@ -1888,7 +1950,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 
 	BUG_ON(check_hotplug_memory_range(start, size));
 
-	lock_memory_hotplug();
+	mem_hotplug_begin();
 
 	/*
 	 * All memory blocks must be offlined before removing memory.  Check
@@ -1897,10 +1959,8 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 	 */
 	ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL,
 				check_memblock_offlined_cb);
-	if (ret) {
-		unlock_memory_hotplug();
+	if (ret)
 		BUG();
-	}
 
 	/* remove memmap entry */
 	firmware_map_remove(start, start + size, "System RAM");
@@ -1909,7 +1969,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 
 	try_offline_node(nid);
 
-	unlock_memory_hotplug();
+	mem_hotplug_done();
 }
 EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/mm/slub.c b/mm/slub.c
index ddb60795f373..9cb2501a2960 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4332,7 +4332,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 		}
 	}
 
-	lock_memory_hotplug();
+	get_online_mems();
 #ifdef CONFIG_SLUB_DEBUG
 	if (flags & SO_ALL) {
 		for_each_node_state(node, N_NORMAL_MEMORY) {
@@ -4372,7 +4372,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
 			x += sprintf(buf + x, " N%d=%lu",
 					node, nodes[node]);
 #endif
-	unlock_memory_hotplug();
+	put_online_mems();
 	kfree(nodes);
 	return x + sprintf(buf + x, "\n");
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7901cb749e17..fbcf46076c4f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3434,7 +3434,7 @@ int kswapd_run(int nid)
 
 /*
  * Called by memory hotplug when all memory in a node is offlined.  Caller must
- * hold lock_memory_hotplug().
+ * hold mem_hotplug_begin/end().
  */
 void kswapd_stop(int nid)
 {
-- 
cgit 


From 2329d3751b082b4fd354f334a88662d72abac52d Mon Sep 17 00:00:00 2001
From: Jianyu Zhan <nasa4836@gmail.com>
Date: Wed, 4 Jun 2014 16:07:31 -0700
Subject: mm/swap.c: clean up *lru_cache_add* functions

In mm/swap.c, __lru_cache_add() is exported, but actually there are no
users outside this file.

This patch unexports __lru_cache_add(), and makes it static.  It also
exports lru_cache_add_file(), as it is use by cifs and fuse, which can
loaded as modules.

Signed-off-by: Jianyu Zhan <nasa4836@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Khalid Aziz <khalid.aziz@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 19 ++-----------------
 mm/swap.c            | 31 +++++++++++++++++++++++--------
 2 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 350711560753..5a14b928164e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -308,8 +308,9 @@ extern unsigned long nr_free_pagecache_pages(void);
 
 
 /* linux/mm/swap.c */
-extern void __lru_cache_add(struct page *);
 extern void lru_cache_add(struct page *);
+extern void lru_cache_add_anon(struct page *page);
+extern void lru_cache_add_file(struct page *page);
 extern void lru_add_page_tail(struct page *page, struct page *page_tail,
 			 struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
@@ -323,22 +324,6 @@ extern void swap_setup(void);
 
 extern void add_page_to_unevictable_list(struct page *page);
 
-/**
- * lru_cache_add: add a page to the page lists
- * @page: the page to add
- */
-static inline void lru_cache_add_anon(struct page *page)
-{
-	ClearPageActive(page);
-	__lru_cache_add(page);
-}
-
-static inline void lru_cache_add_file(struct page *page)
-{
-	ClearPageActive(page);
-	__lru_cache_add(page);
-}
-
 /* linux/mm/vmscan.c */
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
diff --git a/mm/swap.c b/mm/swap.c
index 9ce43ba4498b..c0ed4d65438f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -582,13 +582,7 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
-/*
- * Queue the page for addition to the LRU via pagevec. The decision on whether
- * to add the page to the [in]active [file|anon] list is deferred until the
- * pagevec is drained. This gives a chance for the caller of __lru_cache_add()
- * have the page added to the active list using mark_page_accessed().
- */
-void __lru_cache_add(struct page *page)
+static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
 
@@ -598,11 +592,32 @@ void __lru_cache_add(struct page *page)
 	pagevec_add(pvec, page);
 	put_cpu_var(lru_add_pvec);
 }
-EXPORT_SYMBOL(__lru_cache_add);
+
+/**
+ * lru_cache_add: add a page to the page lists
+ * @page: the page to add
+ */
+void lru_cache_add_anon(struct page *page)
+{
+	ClearPageActive(page);
+	__lru_cache_add(page);
+}
+
+void lru_cache_add_file(struct page *page)
+{
+	ClearPageActive(page);
+	__lru_cache_add(page);
+}
+EXPORT_SYMBOL(lru_cache_add_file);
 
 /**
  * lru_cache_add - add a page to a page list
  * @page: the page to be added to the LRU.
+ *
+ * Queue the page for addition to the LRU via pagevec. The decision on whether
+ * to add the page to the [in]active [file|anon] list is deferred until the
+ * pagevec is drained. This gives a chance for the caller of lru_cache_add()
+ * have the page added to the active list using mark_page_accessed().
  */
 void lru_cache_add(struct page *page)
 {
-- 
cgit 


From f98bafa06a28fdfdd5c49f820f4d6560f636fc46 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 4 Jun 2014 16:07:34 -0700
Subject: memcg: kill CONFIG_MM_OWNER

CONFIG_MM_OWNER makes no sense.  It is not user-selectable, it is only
selected by CONFIG_MEMCG automatically.  So we can kill this option in
init/Kconfig and do s/CONFIG_MM_OWNER/CONFIG_MEMCG/ globally.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 include/linux/sched.h    | 4 ++--
 init/Kconfig             | 7 -------
 kernel/exit.c            | 4 ++--
 kernel/fork.c            | 4 ++--
 5 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8967e20cbe57..de1627232af0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -406,7 +406,7 @@ struct mm_struct {
 	spinlock_t			ioctx_lock;
 	struct kioctx_table __rcu	*ioctx_table;
 #endif
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
 	/*
 	 * "owner" points to a task that is regarded as the canonical
 	 * user/owner of this mm. All of the following must be true in
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70f67e4e6156..2f2dd7d932a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2967,7 +2967,7 @@ static inline void inc_syscw(struct task_struct *tsk)
 #define TASK_SIZE_OF(tsk)	TASK_SIZE
 #endif
 
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
 extern void mm_update_next_owner(struct mm_struct *mm);
 extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
 #else
@@ -2978,7 +2978,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm)
 static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 {
 }
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
 
 static inline unsigned long task_rlimit(const struct task_struct *tsk,
 		unsigned int limit)
diff --git a/init/Kconfig b/init/Kconfig
index 4a1822a1a680..0a2f09a80e90 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -933,7 +933,6 @@ config RESOURCE_COUNTERS
 config MEMCG
 	bool "Memory Resource Controller for Control Groups"
 	depends on RESOURCE_COUNTERS
-	select MM_OWNER
 	select EVENTFD
 	help
 	  Provides a memory resource controller that manages both anonymous
@@ -951,9 +950,6 @@ config MEMCG
 	  disable memory resource controller and you can avoid overheads.
 	  (and lose benefits of memory resource controller)
 
-	  This config option also selects MM_OWNER config option, which
-	  could in turn add some fork/exit overhead.
-
 config MEMCG_SWAP
 	bool "Memory Resource Controller Swap Extension"
 	depends on MEMCG && SWAP
@@ -1179,9 +1175,6 @@ config SCHED_AUTOGROUP
 	  desktop applications.  Task group autogeneration is currently based
 	  upon task session.
 
-config MM_OWNER
-	bool
-
 config SYSFS_DEPRECATED
 	bool "Enable deprecated sysfs features to support old userspace tools"
 	depends on SYSFS
diff --git a/kernel/exit.c b/kernel/exit.c
index 6ed6a1d552b5..da1b838de8a6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -352,7 +352,7 @@ int disallow_signal(int sig)
 
 EXPORT_SYMBOL(disallow_signal);
 
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
 /*
  * A task is exiting.   If it owned this mm, find a new owner for the mm.
  */
@@ -434,7 +434,7 @@ assign_new_owner:
 	task_unlock(c);
 	put_task_struct(c);
 }
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
 
 /*
  * Turn us into a lazy TLB process if we
diff --git a/kernel/fork.c b/kernel/fork.c
index 59e3dcc5b8f2..0d53eb0dfb6f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1099,12 +1099,12 @@ static void rt_mutex_init_task(struct task_struct *p)
 #endif
 }
 
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
 void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 {
 	mm->owner = p;
 }
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
 
 /*
  * Initialize POSIX timer handling for a single task.
-- 
cgit 


From 1e32e77f95d60b121b6072e3e3a650a7f93068f9 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:07:37 -0700
Subject: memcg, slab: do not schedule cache destruction when last page goes
 away

This patchset is a part of preparations for kmemcg re-parenting.  It
targets at simplifying kmemcg work-flows and synchronization.

First, it removes async per memcg cache destruction (see patches 1, 2).
Now caches are only destroyed on memcg offline.  That means the caches
that are not empty on memcg offline will be leaked.  However, they are
already leaked, because memcg_cache_params::nr_pages normally never drops
to 0 so the destruction work is never scheduled except kmem_cache_shrink
is called explicitly.  In the future I'm planning reaping such dead caches
on vmpressure or periodically.

Second, it substitutes per memcg slab_caches_mutex's with the global
memcg_slab_mutex, which should be taken during the whole per memcg cache
creation/destruction path before the slab_mutex (see patch 3).  This
greatly simplifies synchronization among various per memcg cache
creation/destruction paths.

I'm still not quite sure about the end picture, in particular I don't know
whether we should reap dead memcgs' kmem caches periodically or try to
merge them with their parents (see https://lkml.org/lkml/2014/4/20/38 for
more details), but whichever way we choose, this set looks like a
reasonable change to me, because it greatly simplifies kmemcg work-flows
and eases further development.

This patch (of 3):

After a memcg is offlined, we mark its kmem caches that cannot be deleted
right now due to pending objects as dead by setting the
memcg_cache_params::dead flag, so that memcg_release_pages will schedule
cache destruction (memcg_cache_params::destroy) as soon as the last slab
of the cache is freed (memcg_cache_params::nr_pages drops to zero).

I guess the idea was to destroy the caches as soon as possible, i.e.
immediately after freeing the last object.  However, it just doesn't work
that way, because kmem caches always preserve some pages for the sake of
performance, so that nr_pages never gets to zero unless the cache is
shrunk explicitly using kmem_cache_shrink.  Of course, we could account
the total number of objects on the cache or check if all the slabs
allocated for the cache are empty on kmem_cache_free and schedule
destruction if so, but that would be too costly.

Thus we have a piece of code that works only when we explicitly call
kmem_cache_shrink, but complicates the whole picture a lot.  Moreover,
it's racy in fact.  For instance, kmem_cache_shrink may free the last slab
and thus schedule cache destruction before it finishes checking that the
cache is empty, which can lead to use-after-free.

So I propose to remove this async cache destruction from
memcg_release_pages, and check if the cache is empty explicitly after
calling kmem_cache_shrink instead.  This will simplify things a lot w/o
introducing any functional changes.

And regarding dead memcg caches (i.e.  those that are left hanging around
after memcg offline for they have objects), I suppose we should reap them
either periodically or on vmpressure as Glauber suggested initially.  I'm
going to implement this later.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  1 -
 include/linux/slab.h       |  2 --
 mm/memcontrol.c            | 63 ++--------------------------------------------
 mm/slab.h                  |  7 ++----
 4 files changed, 4 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5155d09e749d..087a45314181 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -509,7 +509,6 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
 void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
 
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
 /**
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a6aab2c0dfc5..905541dd3778 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -524,7 +524,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * @memcg: pointer to the memcg this cache belongs to
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
- * @dead: set to true after the memcg dies; the cache may still be around.
  * @nr_pages: number of pages that belongs to this cache.
  * @destroy: worker to be called whenever we are ready, or believe we may be
  *           ready, to destroy this cache.
@@ -540,7 +539,6 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head list;
 			struct kmem_cache *root_cache;
-			bool dead;
 			atomic_t nr_pages;
 			struct work_struct destroy;
 		};
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f4ff49c6add..6b1c45ced733 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3277,60 +3277,11 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
 
 	cachep = memcg_params_to_cache(p);
 
-	/*
-	 * If we get down to 0 after shrink, we could delete right away.
-	 * However, memcg_release_pages() already puts us back in the workqueue
-	 * in that case. If we proceed deleting, we'll get a dangling
-	 * reference, and removing the object from the workqueue in that case
-	 * is unnecessary complication. We are not a fast path.
-	 *
-	 * Note that this case is fundamentally different from racing with
-	 * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
-	 * kmem_cache_shrink, not only we would be reinserting a dead cache
-	 * into the queue, but doing so from inside the worker racing to
-	 * destroy it.
-	 *
-	 * So if we aren't down to zero, we'll just schedule a worker and try
-	 * again
-	 */
-	if (atomic_read(&cachep->memcg_params->nr_pages) != 0)
-		kmem_cache_shrink(cachep);
-	else
+	kmem_cache_shrink(cachep);
+	if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
 		kmem_cache_destroy(cachep);
 }
 
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
-{
-	if (!cachep->memcg_params->dead)
-		return;
-
-	/*
-	 * There are many ways in which we can get here.
-	 *
-	 * We can get to a memory-pressure situation while the delayed work is
-	 * still pending to run. The vmscan shrinkers can then release all
-	 * cache memory and get us to destruction. If this is the case, we'll
-	 * be executed twice, which is a bug (the second time will execute over
-	 * bogus data). In this case, cancelling the work should be fine.
-	 *
-	 * But we can also get here from the worker itself, if
-	 * kmem_cache_shrink is enough to shake all the remaining objects and
-	 * get the page count to 0. In this case, we'll deadlock if we try to
-	 * cancel the work (the worker runs with an internal lock held, which
-	 * is the same lock we would hold for cancel_work_sync().)
-	 *
-	 * Since we can't possibly know who got us here, just refrain from
-	 * running if there is already work pending
-	 */
-	if (work_pending(&cachep->memcg_params->destroy))
-		return;
-	/*
-	 * We have to defer the actual destroying to a workqueue, because
-	 * we might currently be in a context that cannot sleep.
-	 */
-	schedule_work(&cachep->memcg_params->destroy);
-}
-
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 {
 	struct kmem_cache *c;
@@ -3356,16 +3307,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 		 * We will now manually delete the caches, so to avoid races
 		 * we need to cancel all pending destruction workers and
 		 * proceed with destruction ourselves.
-		 *
-		 * kmem_cache_destroy() will call kmem_cache_shrink internally,
-		 * and that could spawn the workers again: it is likely that
-		 * the cache still have active pages until this very moment.
-		 * This would lead us back to mem_cgroup_destroy_cache.
-		 *
-		 * But that will not execute at all if the "dead" flag is not
-		 * set, so flip it down to guarantee we are in control.
 		 */
-		c->memcg_params->dead = false;
 		cancel_work_sync(&c->memcg_params->destroy);
 		kmem_cache_destroy(c);
 
@@ -3387,7 +3329,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
 	mutex_lock(&memcg->slab_caches_mutex);
 	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
 		cachep = memcg_params_to_cache(params);
-		cachep->memcg_params->dead = true;
 		schedule_work(&cachep->memcg_params->destroy);
 	}
 	mutex_unlock(&memcg->slab_caches_mutex);
diff --git a/mm/slab.h b/mm/slab.h
index d85d59803d5f..b59447ac4533 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -129,11 +129,8 @@ static inline void memcg_bind_pages(struct kmem_cache *s, int order)
 
 static inline void memcg_release_pages(struct kmem_cache *s, int order)
 {
-	if (is_root_cache(s))
-		return;
-
-	if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
-		mem_cgroup_destroy_cache(s);
+	if (!is_root_cache(s))
+		atomic_sub(1 << order, &s->memcg_params->nr_pages);
 }
 
 static inline bool slab_equal_or_root(struct kmem_cache *s,
-- 
cgit 


From c67a8a685a6e9abbaf0235e084168f15a721ae39 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:07:39 -0700
Subject: memcg, slab: merge memcg_{bind,release}_pages to
 memcg_{un}charge_slab

Currently we have two pairs of kmemcg-related functions that are called on
slab alloc/free.  The first is memcg_{bind,release}_pages that count the
total number of pages allocated on a kmem cache.  The second is
memcg_{un}charge_slab that {un}charge slab pages to kmemcg resource
counter.  Let's just merge them to keep the code clean.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  4 ++--
 mm/memcontrol.c            | 22 ++++++++++++++++++++--
 mm/slab.c                  |  2 --
 mm/slab.h                  | 25 ++-----------------------
 mm/slub.c                  |  2 --
 5 files changed, 24 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 087a45314181..d38d190f4cec 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -506,8 +506,8 @@ void memcg_update_array_size(int num_groups);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 
-int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
+int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
+void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
 
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6b1c45ced733..86a2078805e5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2954,7 +2954,7 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
 }
 #endif
 
-int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 {
 	struct res_counter *fail_res;
 	int ret = 0;
@@ -2992,7 +2992,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 	return ret;
 }
 
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
 {
 	res_counter_uncharge(&memcg->res, size);
 	if (do_swap_account)
@@ -3390,6 +3390,24 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
 	__memcg_create_cache_enqueue(memcg, cachep);
 	memcg_resume_kmem_account();
 }
+
+int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
+{
+	int res;
+
+	res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp,
+				PAGE_SIZE << order);
+	if (!res)
+		atomic_add(1 << order, &cachep->memcg_params->nr_pages);
+	return res;
+}
+
+void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
+{
+	memcg_uncharge_kmem(cachep->memcg_params->memcg, PAGE_SIZE << order);
+	atomic_sub(1 << order, &cachep->memcg_params->nr_pages);
+}
+
 /*
  * Return the kmem_cache we're supposed to use for a slab allocation.
  * We try to use the current memcg's version of the cache.
diff --git a/mm/slab.c b/mm/slab.c
index 7067ea7f3927..9ca3b87edabc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1712,7 +1712,6 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
 		SetPageSlabPfmemalloc(page);
-	memcg_bind_pages(cachep, cachep->gfporder);
 
 	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
 		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1748,7 +1747,6 @@ static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
 	page_mapcount_reset(page);
 	page->mapping = NULL;
 
-	memcg_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
 	__free_pages(page, cachep->gfporder);
diff --git a/mm/slab.h b/mm/slab.h
index b59447ac4533..961a3fb1f5a2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -121,18 +121,6 @@ static inline bool is_root_cache(struct kmem_cache *s)
 	return !s->memcg_params || s->memcg_params->is_root_cache;
 }
 
-static inline void memcg_bind_pages(struct kmem_cache *s, int order)
-{
-	if (!is_root_cache(s))
-		atomic_add(1 << order, &s->memcg_params->nr_pages);
-}
-
-static inline void memcg_release_pages(struct kmem_cache *s, int order)
-{
-	if (!is_root_cache(s))
-		atomic_sub(1 << order, &s->memcg_params->nr_pages);
-}
-
 static inline bool slab_equal_or_root(struct kmem_cache *s,
 					struct kmem_cache *p)
 {
@@ -198,8 +186,7 @@ static __always_inline int memcg_charge_slab(struct kmem_cache *s,
 		return 0;
 	if (is_root_cache(s))
 		return 0;
-	return memcg_charge_kmem(s->memcg_params->memcg, gfp,
-				 PAGE_SIZE << order);
+	return __memcg_charge_slab(s, gfp, order);
 }
 
 static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
@@ -208,7 +195,7 @@ static __always_inline void memcg_uncharge_slab(struct kmem_cache *s, int order)
 		return;
 	if (is_root_cache(s))
 		return;
-	memcg_uncharge_kmem(s->memcg_params->memcg, PAGE_SIZE << order);
+	__memcg_uncharge_slab(s, order);
 }
 #else
 static inline bool is_root_cache(struct kmem_cache *s)
@@ -216,14 +203,6 @@ static inline bool is_root_cache(struct kmem_cache *s)
 	return true;
 }
 
-static inline void memcg_bind_pages(struct kmem_cache *s, int order)
-{
-}
-
-static inline void memcg_release_pages(struct kmem_cache *s, int order)
-{
-}
-
 static inline bool slab_equal_or_root(struct kmem_cache *s,
 				      struct kmem_cache *p)
 {
diff --git a/mm/slub.c b/mm/slub.c
index 5d1b653183ab..9e288d7c5e6a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1422,7 +1422,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 
 	order = compound_order(page);
 	inc_slabs_node(s, page_to_nid(page), page->objects);
-	memcg_bind_pages(s, order);
 	page->slab_cache = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
@@ -1473,7 +1472,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 	__ClearPageSlabPfmemalloc(page);
 	__ClearPageSlab(page);
 
-	memcg_release_pages(s, order);
 	page_mapcount_reset(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-- 
cgit 


From bd67314586a3d5725e60f2f6587b4cb0f659bb67 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:07:40 -0700
Subject: memcg, slab: simplify synchronization scheme

At present, we have the following mutexes protecting data related to per
memcg kmem caches:

 - slab_mutex.  This one is held during the whole kmem cache creation
   and destruction paths.  We also take it when updating per root cache
   memcg_caches arrays (see memcg_update_all_caches).  As a result, taking
   it guarantees there will be no changes to any kmem cache (including per
   memcg).  Why do we need something else then?  The point is it is
   private to slab implementation and has some internal dependencies with
   other mutexes (get_online_cpus).  So we just don't want to rely upon it
   and prefer to introduce additional mutexes instead.

 - activate_kmem_mutex.  Initially it was added to synchronize
   initializing kmem limit (memcg_activate_kmem).  However, since we can
   grow per root cache memcg_caches arrays only on kmem limit
   initialization (see memcg_update_all_caches), we also employ it to
   protect against memcg_caches arrays relocation (e.g.  see
   __kmem_cache_destroy_memcg_children).

 - We have a convention not to take slab_mutex in memcontrol.c, but we
   want to walk over per memcg memcg_slab_caches lists there (e.g.  for
   destroying all memcg caches on offline).  So we have per memcg
   slab_caches_mutex's protecting those lists.

The mutexes are taken in the following order:

   activate_kmem_mutex -> slab_mutex -> memcg::slab_caches_mutex

Such a syncrhonization scheme has a number of flaws, for instance:

 - We can't call kmem_cache_{destroy,shrink} while walking over a
   memcg::memcg_slab_caches list due to locking order.  As a result, in
   mem_cgroup_destroy_all_caches we schedule the
   memcg_cache_params::destroy work shrinking and destroying the cache.

 - We don't have a mutex to synchronize per memcg caches destruction
   between memcg offline (mem_cgroup_destroy_all_caches) and root cache
   destruction (__kmem_cache_destroy_memcg_children).  Currently we just
   don't bother about it.

This patch simplifies it by substituting per memcg slab_caches_mutex's
with the global memcg_slab_mutex.  It will be held whenever a new per
memcg cache is created or destroyed, so it protects per root cache
memcg_caches arrays and per memcg memcg_slab_caches lists.  The locking
order is following:

   activate_kmem_mutex -> memcg_slab_mutex -> slab_mutex

This allows us to call kmem_cache_{create,shrink,destroy} under the
memcg_slab_mutex.  As a result, we don't need memcg_cache_params::destroy
work any more - we can simply destroy caches while iterating over a per
memcg slab caches list.

Also using the global mutex simplifies synchronization between concurrent
per memcg caches creation/destruction, e.g.  mem_cgroup_destroy_all_caches
vs __kmem_cache_destroy_memcg_children.

The downside of this is that we substitute per-memcg slab_caches_mutex's
with a hummer-like global mutex, but since we already take either the
slab_mutex or the cgroup_mutex along with a memcg::slab_caches_mutex, it
shouldn't hurt concurrency a lot.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ---
 include/linux/slab.h       |   6 +-
 mm/memcontrol.c            | 150 ++++++++++++++++++---------------------------
 mm/slab_common.c           |  23 +++----
 4 files changed, 69 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d38d190f4cec..1fa23244fe37 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -497,8 +497,6 @@ char *memcg_create_cache_name(struct mem_cgroup *memcg,
 int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
 			     struct kmem_cache *root_cache);
 void memcg_free_cache_params(struct kmem_cache *s);
-void memcg_register_cache(struct kmem_cache *s);
-void memcg_unregister_cache(struct kmem_cache *s);
 
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
@@ -640,14 +638,6 @@ static inline void memcg_free_cache_params(struct kmem_cache *s)
 {
 }
 
-static inline void memcg_register_cache(struct kmem_cache *s)
-{
-}
-
-static inline void memcg_unregister_cache(struct kmem_cache *s)
-{
-}
-
 static inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 905541dd3778..ecbec9ccb80d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,7 +116,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
 			void (*)(void *));
 #ifdef CONFIG_MEMCG_KMEM
-void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *);
+struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *,
+					   struct kmem_cache *);
 #endif
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
@@ -525,8 +526,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
  * @nr_pages: number of pages that belongs to this cache.
- * @destroy: worker to be called whenever we are ready, or believe we may be
- *           ready, to destroy this cache.
  */
 struct memcg_cache_params {
 	bool is_root_cache;
@@ -540,7 +539,6 @@ struct memcg_cache_params {
 			struct list_head list;
 			struct kmem_cache *root_cache;
 			atomic_t nr_pages;
-			struct work_struct destroy;
 		};
 	};
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 86a2078805e5..6b448881422b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -357,10 +357,9 @@ struct mem_cgroup {
 	struct cg_proto tcp_mem;
 #endif
 #if defined(CONFIG_MEMCG_KMEM)
-	/* analogous to slab_common's slab_caches list. per-memcg */
+	/* analogous to slab_common's slab_caches list, but per-memcg;
+	 * protected by memcg_slab_mutex */
 	struct list_head memcg_slab_caches;
-	/* Not a spinlock, we can take a lot of time walking the list */
-	struct mutex slab_caches_mutex;
         /* Index in the kmem_cache->memcg_params->memcg_caches array */
 	int kmemcg_id;
 #endif
@@ -2913,6 +2912,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 static DEFINE_MUTEX(set_limit_mutex);
 
 #ifdef CONFIG_MEMCG_KMEM
+/*
+ * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
+ * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
+ */
+static DEFINE_MUTEX(memcg_slab_mutex);
+
 static DEFINE_MUTEX(activate_kmem_mutex);
 
 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
@@ -2945,10 +2950,10 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
 
 	print_slabinfo_header(m);
 
-	mutex_lock(&memcg->slab_caches_mutex);
+	mutex_lock(&memcg_slab_mutex);
 	list_for_each_entry(params, &memcg->memcg_slab_caches, list)
 		cache_show(memcg_params_to_cache(params), m);
-	mutex_unlock(&memcg->slab_caches_mutex);
+	mutex_unlock(&memcg_slab_mutex);
 
 	return 0;
 }
@@ -3050,8 +3055,6 @@ void memcg_update_array_size(int num)
 		memcg_limited_groups_array_size = memcg_caches_array_size(num);
 }
 
-static void kmem_cache_destroy_work_func(struct work_struct *w);
-
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 {
 	struct memcg_cache_params *cur_params = s->memcg_params;
@@ -3148,8 +3151,6 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
 	if (memcg) {
 		s->memcg_params->memcg = memcg;
 		s->memcg_params->root_cache = root_cache;
-		INIT_WORK(&s->memcg_params->destroy,
-				kmem_cache_destroy_work_func);
 		css_get(&memcg->css);
 	} else
 		s->memcg_params->is_root_cache = true;
@@ -3166,24 +3167,34 @@ void memcg_free_cache_params(struct kmem_cache *s)
 	kfree(s->memcg_params);
 }
 
-void memcg_register_cache(struct kmem_cache *s)
+static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
+				    struct kmem_cache *root_cache)
 {
-	struct kmem_cache *root;
-	struct mem_cgroup *memcg;
+	struct kmem_cache *cachep;
 	int id;
 
-	if (is_root_cache(s))
+	lockdep_assert_held(&memcg_slab_mutex);
+
+	id = memcg_cache_id(memcg);
+
+	/*
+	 * Since per-memcg caches are created asynchronously on first
+	 * allocation (see memcg_kmem_get_cache()), several threads can try to
+	 * create the same cache, but only one of them may succeed.
+	 */
+	if (cache_from_memcg_idx(root_cache, id))
 		return;
 
+	cachep = kmem_cache_create_memcg(memcg, root_cache);
 	/*
-	 * Holding the slab_mutex assures nobody will touch the memcg_caches
-	 * array while we are modifying it.
+	 * If we could not create a memcg cache, do not complain, because
+	 * that's not critical at all as we can always proceed with the root
+	 * cache.
 	 */
-	lockdep_assert_held(&slab_mutex);
+	if (!cachep)
+		return;
 
-	root = s->memcg_params->root_cache;
-	memcg = s->memcg_params->memcg;
-	id = memcg_cache_id(memcg);
+	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
 
 	/*
 	 * Since readers won't lock (see cache_from_memcg_idx()), we need a
@@ -3192,49 +3203,30 @@ void memcg_register_cache(struct kmem_cache *s)
 	 */
 	smp_wmb();
 
-	/*
-	 * Initialize the pointer to this cache in its parent's memcg_params
-	 * before adding it to the memcg_slab_caches list, otherwise we can
-	 * fail to convert memcg_params_to_cache() while traversing the list.
-	 */
-	VM_BUG_ON(root->memcg_params->memcg_caches[id]);
-	root->memcg_params->memcg_caches[id] = s;
-
-	mutex_lock(&memcg->slab_caches_mutex);
-	list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
-	mutex_unlock(&memcg->slab_caches_mutex);
+	BUG_ON(root_cache->memcg_params->memcg_caches[id]);
+	root_cache->memcg_params->memcg_caches[id] = cachep;
 }
 
-void memcg_unregister_cache(struct kmem_cache *s)
+static void memcg_kmem_destroy_cache(struct kmem_cache *cachep)
 {
-	struct kmem_cache *root;
+	struct kmem_cache *root_cache;
 	struct mem_cgroup *memcg;
 	int id;
 
-	if (is_root_cache(s))
-		return;
+	lockdep_assert_held(&memcg_slab_mutex);
 
-	/*
-	 * Holding the slab_mutex assures nobody will touch the memcg_caches
-	 * array while we are modifying it.
-	 */
-	lockdep_assert_held(&slab_mutex);
+	BUG_ON(is_root_cache(cachep));
 
-	root = s->memcg_params->root_cache;
-	memcg = s->memcg_params->memcg;
+	root_cache = cachep->memcg_params->root_cache;
+	memcg = cachep->memcg_params->memcg;
 	id = memcg_cache_id(memcg);
 
-	mutex_lock(&memcg->slab_caches_mutex);
-	list_del(&s->memcg_params->list);
-	mutex_unlock(&memcg->slab_caches_mutex);
+	BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
+	root_cache->memcg_params->memcg_caches[id] = NULL;
 
-	/*
-	 * Clear the pointer to this cache in its parent's memcg_params only
-	 * after removing it from the memcg_slab_caches list, otherwise we can
-	 * fail to convert memcg_params_to_cache() while traversing the list.
-	 */
-	VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
-	root->memcg_params->memcg_caches[id] = NULL;
+	list_del(&cachep->memcg_params->list);
+
+	kmem_cache_destroy(cachep);
 }
 
 /*
@@ -3268,70 +3260,42 @@ static inline void memcg_resume_kmem_account(void)
 	current->memcg_kmem_skip_account--;
 }
 
-static void kmem_cache_destroy_work_func(struct work_struct *w)
-{
-	struct kmem_cache *cachep;
-	struct memcg_cache_params *p;
-
-	p = container_of(w, struct memcg_cache_params, destroy);
-
-	cachep = memcg_params_to_cache(p);
-
-	kmem_cache_shrink(cachep);
-	if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-		kmem_cache_destroy(cachep);
-}
-
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 {
 	struct kmem_cache *c;
 	int i, failed = 0;
 
-	/*
-	 * If the cache is being destroyed, we trust that there is no one else
-	 * requesting objects from it. Even if there are, the sanity checks in
-	 * kmem_cache_destroy should caught this ill-case.
-	 *
-	 * Still, we don't want anyone else freeing memcg_caches under our
-	 * noses, which can happen if a new memcg comes to life. As usual,
-	 * we'll take the activate_kmem_mutex to protect ourselves against
-	 * this.
-	 */
-	mutex_lock(&activate_kmem_mutex);
+	mutex_lock(&memcg_slab_mutex);
 	for_each_memcg_cache_index(i) {
 		c = cache_from_memcg_idx(s, i);
 		if (!c)
 			continue;
 
-		/*
-		 * We will now manually delete the caches, so to avoid races
-		 * we need to cancel all pending destruction workers and
-		 * proceed with destruction ourselves.
-		 */
-		cancel_work_sync(&c->memcg_params->destroy);
-		kmem_cache_destroy(c);
+		memcg_kmem_destroy_cache(c);
 
 		if (cache_from_memcg_idx(s, i))
 			failed++;
 	}
-	mutex_unlock(&activate_kmem_mutex);
+	mutex_unlock(&memcg_slab_mutex);
 	return failed;
 }
 
 static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
 {
 	struct kmem_cache *cachep;
-	struct memcg_cache_params *params;
+	struct memcg_cache_params *params, *tmp;
 
 	if (!memcg_kmem_is_active(memcg))
 		return;
 
-	mutex_lock(&memcg->slab_caches_mutex);
-	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+	mutex_lock(&memcg_slab_mutex);
+	list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
 		cachep = memcg_params_to_cache(params);
-		schedule_work(&cachep->memcg_params->destroy);
+		kmem_cache_shrink(cachep);
+		if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
+			memcg_kmem_destroy_cache(cachep);
 	}
-	mutex_unlock(&memcg->slab_caches_mutex);
+	mutex_unlock(&memcg_slab_mutex);
 }
 
 struct create_work {
@@ -3346,7 +3310,10 @@ static void memcg_create_cache_work_func(struct work_struct *w)
 	struct mem_cgroup *memcg = cw->memcg;
 	struct kmem_cache *cachep = cw->cachep;
 
-	kmem_cache_create_memcg(memcg, cachep);
+	mutex_lock(&memcg_slab_mutex);
+	memcg_kmem_create_cache(memcg, cachep);
+	mutex_unlock(&memcg_slab_mutex);
+
 	css_put(&memcg->css);
 	kfree(cw);
 }
@@ -5022,13 +4989,14 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
 	 * Make sure we have enough space for this cgroup in each root cache's
 	 * memcg_params.
 	 */
+	mutex_lock(&memcg_slab_mutex);
 	err = memcg_update_all_caches(memcg_id + 1);
+	mutex_unlock(&memcg_slab_mutex);
 	if (err)
 		goto out_rmid;
 
 	memcg->kmemcg_id = memcg_id;
 	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-	mutex_init(&memcg->slab_caches_mutex);
 
 	/*
 	 * We couldn't have accounted to this cgroup, because it hasn't got the
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2dd920dc3776..7e348cff814d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -160,7 +160,6 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
 
 	s->refcount = 1;
 	list_add(&s->list, &slab_caches);
-	memcg_register_cache(s);
 out:
 	if (err)
 		return ERR_PTR(err);
@@ -270,9 +269,10 @@ EXPORT_SYMBOL(kmem_cache_create);
  * requests going from @memcg to @root_cache. The new cache inherits properties
  * from its parent.
  */
-void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache)
+struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
+					   struct kmem_cache *root_cache)
 {
-	struct kmem_cache *s;
+	struct kmem_cache *s = NULL;
 	char *cache_name;
 
 	get_online_cpus();
@@ -280,14 +280,6 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
 
 	mutex_lock(&slab_mutex);
 
-	/*
-	 * Since per-memcg caches are created asynchronously on first
-	 * allocation (see memcg_kmem_get_cache()), several threads can try to
-	 * create the same cache, but only one of them may succeed.
-	 */
-	if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
-		goto out_unlock;
-
 	cache_name = memcg_create_cache_name(memcg, root_cache);
 	if (!cache_name)
 		goto out_unlock;
@@ -296,14 +288,18 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
 				 root_cache->size, root_cache->align,
 				 root_cache->flags, root_cache->ctor,
 				 memcg, root_cache);
-	if (IS_ERR(s))
+	if (IS_ERR(s)) {
 		kfree(cache_name);
+		s = NULL;
+	}
 
 out_unlock:
 	mutex_unlock(&slab_mutex);
 
 	put_online_mems();
 	put_online_cpus();
+
+	return s;
 }
 
 static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
@@ -348,11 +344,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
 		goto out_unlock;
 
 	list_del(&s->list);
-	memcg_unregister_cache(s);
-
 	if (__kmem_cache_shutdown(s) != 0) {
 		list_add(&s->list, &slab_caches);
-		memcg_register_cache(s);
 		printk(KERN_ERR "kmem_cache_destroy %s: "
 		       "Slab cache still has objects\n", s->name);
 		dump_stack();
-- 
cgit 


From 1b938c0827478df268d2336469ec48d400a2eb3e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 4 Jun 2014 16:07:43 -0700
Subject: fs/buffer.c: remove block_write_full_page_endio()

The last in-tree caller of block_write_full_page_endio() was removed in
January 2013.  It's time to remove the EXPORT_SYMBOL, which leaves
block_write_full_page() as the only caller of
block_write_full_page_endio(), so inline block_write_full_page_endio()
into block_write_full_page().

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dheeraj Reddy <dheeraj.reddy@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 | 21 +++++----------------
 fs/ext4/page-io.c           |  2 +-
 fs/ocfs2/file.c             |  2 +-
 include/linux/buffer_head.h |  2 --
 4 files changed, 7 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 6a8110c03a47..e33f8d5452ad 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2879,10 +2879,9 @@ EXPORT_SYMBOL(block_truncate_page);
 
 /*
  * The generic ->writepage function for buffer-backed address_spaces
- * this form passes in the end_io handler used to finish the IO.
  */
-int block_write_full_page_endio(struct page *page, get_block_t *get_block,
-			struct writeback_control *wbc, bh_end_io_t *handler)
+int block_write_full_page(struct page *page, get_block_t *get_block,
+			struct writeback_control *wbc)
 {
 	struct inode * const inode = page->mapping->host;
 	loff_t i_size = i_size_read(inode);
@@ -2892,7 +2891,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
 	/* Is the page fully inside i_size? */
 	if (page->index < end_index)
 		return __block_write_full_page(inode, page, get_block, wbc,
-					       handler);
+					       end_buffer_async_write);
 
 	/* Is the page fully outside i_size? (truncate in progress) */
 	offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2915,18 +2914,8 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
 	 * writes to that region are not written out to the file."
 	 */
 	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
-	return __block_write_full_page(inode, page, get_block, wbc, handler);
-}
-EXPORT_SYMBOL(block_write_full_page_endio);
-
-/*
- * The generic ->writepage function for buffer-backed address_spaces
- */
-int block_write_full_page(struct page *page, get_block_t *get_block,
-			struct writeback_control *wbc)
-{
-	return block_write_full_page_endio(page, get_block, wbc,
-					   end_buffer_async_write);
+	return __block_write_full_page(inode, page, get_block, wbc,
+							end_buffer_async_write);
 }
 EXPORT_SYMBOL(block_write_full_page);
 
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index c18d95b50540..1a64e7a52b84 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -429,7 +429,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 		block_start = bh_offset(bh);
 		if (block_start >= len) {
 			/*
-			 * Comments copied from block_write_full_page_endio:
+			 * Comments copied from block_write_full_page:
 			 *
 			 * The page straddles i_size.  It must be zeroed out on
 			 * each and every writepage invocation because it may
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8970dcf74de5..8eb6e5732d3b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -828,7 +828,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 		/*
 		 * fs-writeback will release the dirty pages without page lock
 		 * whose offset are over inode size, the release happens at
-		 * block_write_full_page_endio().
+		 * block_write_full_page().
 		 */
 		i_size_write(inode, abs_to);
 		inode->i_blocks = ocfs2_inode_sector_count(inode);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 7cbf837a279c..324329ceea1e 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -207,8 +207,6 @@ void block_invalidatepage(struct page *page, unsigned int offset,
 			  unsigned int length);
 int block_write_full_page(struct page *page, get_block_t *get_block,
 				struct writeback_control *wbc);
-int block_write_full_page_endio(struct page *page, get_block_t *get_block,
-			struct writeback_control *wbc, bh_end_io_t *handler);
 int block_read_full_page(struct page*, get_block_t*);
 int block_is_partially_uptodate(struct page *page, unsigned long from,
 				unsigned long count);
-- 
cgit 


From 57d998456ae8680ed446aa1993f45f4d8a9a5973 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 4 Jun 2014 16:07:45 -0700
Subject: fs/mpage.c: factor page_endio() out of mpage_end_io()

page_endio() takes care of updating all the appropriate page flags once
I/O has finished to a page.  Switch to using mapping_set_error() instead
of setting AS_EIO directly; this will handle thin-provisioned devices
correctly.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dheeraj Reddy <dheeraj.reddy@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/mpage.c              | 18 +-----------------
 include/linux/pagemap.h |  2 ++
 mm/filemap.c            | 25 +++++++++++++++++++++++++
 3 files changed, 28 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/mpage.c b/fs/mpage.c
index 4cc9c5d079f7..10da0da73017 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -48,23 +48,7 @@ static void mpage_end_io(struct bio *bio, int err)
 
 	bio_for_each_segment_all(bv, bio, i) {
 		struct page *page = bv->bv_page;
-
-		if (bio_data_dir(bio) == READ) {
-			if (!err) {
-				SetPageUptodate(page);
-			} else {
-				ClearPageUptodate(page);
-				SetPageError(page);
-			}
-			unlock_page(page);
-		} else { /* bio_data_dir(bio) == WRITE */
-			if (err) {
-				SetPageError(page);
-				if (page->mapping)
-					set_bit(AS_EIO, &page->mapping->flags);
-			}
-			end_page_writeback(page);
-		}
+		page_endio(page, bio_data_dir(bio), err);
 	}
 
 	bio_put(bio);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 45598f1e9aa3..718214c5584e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -425,6 +425,8 @@ static inline void wait_on_page_writeback(struct page *page)
 extern void end_page_writeback(struct page *page);
 void wait_for_stable_page(struct page *page);
 
+void page_endio(struct page *page, int rw, int err);
+
 /*
  * Add an arbitrary waiter to a page's wait queue
  */
diff --git a/mm/filemap.c b/mm/filemap.c
index 021056c324e6..47d235b357a7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -764,6 +764,31 @@ void end_page_writeback(struct page *page)
 }
 EXPORT_SYMBOL(end_page_writeback);
 
+/*
+ * After completing I/O on a page, call this routine to update the page
+ * flags appropriately
+ */
+void page_endio(struct page *page, int rw, int err)
+{
+	if (rw == READ) {
+		if (!err) {
+			SetPageUptodate(page);
+		} else {
+			ClearPageUptodate(page);
+			SetPageError(page);
+		}
+		unlock_page(page);
+	} else { /* rw == WRITE */
+		if (err) {
+			SetPageError(page);
+			if (page->mapping)
+				mapping_set_error(page->mapping, err);
+		}
+		end_page_writeback(page);
+	}
+}
+EXPORT_SYMBOL_GPL(page_endio);
+
 /**
  * __lock_page - get a lock on the page, assuming we need to sleep to get it
  * @page: the page to lock
-- 
cgit 


From 47a191fd38ebddb1bd1510ec2bc1085c578c8868 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
Date: Wed, 4 Jun 2014 16:07:46 -0700
Subject: fs/block_dev.c: add bdev_read_page() and bdev_write_page()

A block device driver may choose to provide a rw_page operation.  These
will be called when the filesystem is attempting to do page sized I/O to
page cache pages (ie not for direct I/O).  This does preclude I/Os that
are larger than page size, so this may only be a performance gain for
some devices.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Tested-by: Dheeraj Reddy <dheeraj.reddy@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/block_dev.c         | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/mpage.c             | 12 ++++++++++
 include/linux/blkdev.h |  4 ++++
 3 files changed, 79 insertions(+)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 552a8d13bc32..83fba15cc394 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -363,6 +363,69 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(blkdev_fsync);
 
+/**
+ * bdev_read_page() - Start reading a page from a block device
+ * @bdev: The device to read the page from
+ * @sector: The offset on the device to read the page to (need not be aligned)
+ * @page: The page to read
+ *
+ * On entry, the page should be locked.  It will be unlocked when the page
+ * has been read.  If the block driver implements rw_page synchronously,
+ * that will be true on exit from this function, but it need not be.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to read this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+			struct page *page)
+{
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	if (!ops->rw_page)
+		return -EOPNOTSUPP;
+	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+}
+EXPORT_SYMBOL_GPL(bdev_read_page);
+
+/**
+ * bdev_write_page() - Start writing a page to a block device
+ * @bdev: The device to write the page to
+ * @sector: The offset on the device to write the page to (need not be aligned)
+ * @page: The page to write
+ * @wbc: The writeback_control for the write
+ *
+ * On entry, the page should be locked and not currently under writeback.
+ * On exit, if the write started successfully, the page will be unlocked and
+ * under writeback.  If the write failed already (eg the driver failed to
+ * queue the page to the device), the page will still be locked.  If the
+ * caller is a ->writepage implementation, it will need to unlock the page.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to write this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+			struct page *page, struct writeback_control *wbc)
+{
+	int result;
+	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	if (!ops->rw_page)
+		return -EOPNOTSUPP;
+	set_page_writeback(page);
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
+	if (result)
+		end_page_writeback(page);
+	else
+		unlock_page(page);
+	return result;
+}
+EXPORT_SYMBOL_GPL(bdev_write_page);
+
 /*
  * pseudo-fs
  */
diff --git a/fs/mpage.c b/fs/mpage.c
index 10da0da73017..5f9ed622274f 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -269,6 +269,11 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 
 alloc_new:
 	if (bio == NULL) {
+		if (first_hole == blocks_per_page) {
+			if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
+								page))
+				goto out;
+		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 			  	min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
 				GFP_KERNEL);
@@ -587,6 +592,13 @@ page_is_mapped:
 
 alloc_new:
 	if (bio == NULL) {
+		if (first_unmapped == blocks_per_page) {
+			if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
+								page, wbc)) {
+				clean_buffers(page, first_unmapped);
+				goto out;
+			}
+		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 45cf6e537c83..2f3886e6cc78 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1588,6 +1588,7 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
+	int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*direct_access) (struct block_device *, sector_t,
@@ -1606,6 +1607,9 @@ struct block_device_operations {
 
 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 				 unsigned long);
+extern int bdev_read_page(struct block_device *, sector_t, struct page *);
+extern int bdev_write_page(struct block_device *, sector_t, struct page *,
+						struct writeback_control *);
 #else /* CONFIG_BLOCK */
 
 struct block_device;
-- 
cgit 


From f7f28ca98b9a7a99fc55df2dddcf49857ab004f0 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 4 Jun 2014 16:07:57 -0700
Subject: mm: constify nmask argument to mbind()

The nmask argument to mbind() is const according to the userspace header
numaif.h, and since the kernel does indeed not modify it, it might as well
be declared const in the kernel.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 mm/mempolicy.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a4a0588c5397..bfef0be279dd 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -723,7 +723,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages,
 				int flags);
 asmlinkage long sys_mbind(unsigned long start, unsigned long len,
 				unsigned long mode,
-				unsigned long __user *nmask,
+				const unsigned long __user *nmask,
 				unsigned long maxnode,
 				unsigned flags);
 asmlinkage long sys_get_mempolicy(int __user *policy,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 78e1472933ea..727187f1155b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1362,7 +1362,7 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
 }
 
 SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
-		unsigned long, mode, unsigned long __user *, nmask,
+		unsigned long, mode, const unsigned long __user *, nmask,
 		unsigned long, maxnode, unsigned, flags)
 {
 	nodemask_t nodes;
-- 
cgit 


From 23c8902d403ef9a04cdc367d0b76a3ed6d83f5c5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 4 Jun 2014 16:07:58 -0700
Subject: mm: constify nmask argument to set_mempolicy()

The nmask argument to set_mempolicy() is const according to the user-space
header numaif.h, and since the kernel does indeed not modify it, it might
as well be declared const in the kernel.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/syscalls.h | 2 +-
 mm/mempolicy.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bfef0be279dd..b0881a0ed322 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -711,7 +711,7 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3,
 
 asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
 asmlinkage long sys_ioprio_get(int which, int who);
-asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
+asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask,
 				unsigned long maxnode);
 asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
 				const unsigned long __user *from,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 727187f1155b..b09586d8316b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1383,7 +1383,7 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
 }
 
 /* Set the process memory policy */
-SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask,
+SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
 		unsigned long, maxnode)
 {
 	int err;
-- 
cgit 


From d2ee40eae98d8a41ff27dcdd13b1b656c4c1ad00 Mon Sep 17 00:00:00 2001
From: Jianyu Zhan <nasa4836@gmail.com>
Date: Wed, 4 Jun 2014 16:08:02 -0700
Subject: mm: introdule compound_head_by_tail()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, in put_compound_page(), we have

======
if (likely(!PageTail(page))) {                  <------  (1)
        if (put_page_testzero(page)) {
                 /*
                 ¦* By the time all refcounts have been released
                 ¦* split_huge_page cannot run anymore from under us.
                 ¦*/
                 if (PageHead(page))
                         __put_compound_page(page);
                 else
                         __put_single_page(page);
         }
         return;
}

/* __split_huge_page_refcount can run under us */
page_head = compound_head(page);        <------------ (2)
======

if at (1) ,  we fail the check, this means page is *likely* a tail page.

Then at (2), as compoud_head(page) is inlined, it is :

======
static inline struct page *compound_head(struct page *page)
{
          if (unlikely(PageTail(page))) {           <----------- (3)
              struct page *head = page->first_page;

                smp_rmb();
                if (likely(PageTail(page)))
                        return head;
        }
        return page;
}
======

here, the (3) unlikely in the case is a negative hint, because it is
*likely* a tail page.  So the check (3) in this case is not good, so I
introduce a helper for this case.

So this patch introduces compound_head_by_tail() which deals with a
possible tail page(though it could be spilt by a racy thread), and make
compound_head() a wrapper on it.

This patch has no functional change, and it reduces the object
size slightly:
   text    data     bss     dec     hex  filename
  11003    1328      16   12347    303b  mm/swap.o.orig
  10971    1328      16   12315    301b  mm/swap.o.patched

I've ran "perf top -e branch-miss" to observe branch-miss in this case.
As Michael points out, it's a slow path, so only very few times this case
happens.  But I grep'ed the code base, and found there still are some
other call sites could be benifited from this helper.  And given that it
only bloating up the source by only 5 lines, but with a reduced object
size.  I still believe this helper deserves to exsit.

Signed-off-by: Jianyu Zhan <nasa4836@gmail.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 29 +++++++++++++++++------------
 mm/swap.c          |  2 +-
 2 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d6777060449f..368600628d14 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -407,20 +407,25 @@ static inline void compound_unlock_irqrestore(struct page *page,
 #endif
 }
 
+static inline struct page *compound_head_by_tail(struct page *tail)
+{
+	struct page *head = tail->first_page;
+
+	/*
+	 * page->first_page may be a dangling pointer to an old
+	 * compound page, so recheck that it is still a tail
+	 * page before returning.
+	 */
+	smp_rmb();
+	if (likely(PageTail(tail)))
+		return head;
+	return tail;
+}
+
 static inline struct page *compound_head(struct page *page)
 {
-	if (unlikely(PageTail(page))) {
-		struct page *head = page->first_page;
-
-		/*
-		 * page->first_page may be a dangling pointer to an old
-		 * compound page, so recheck that it is still a tail
-		 * page before returning.
-		 */
-		smp_rmb();
-		if (likely(PageTail(page)))
-			return head;
-	}
+	if (unlikely(PageTail(page)))
+		return compound_head_by_tail(page);
 	return page;
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index d089c5a0cf98..c8d6df556ce6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -253,7 +253,7 @@ static void put_compound_page(struct page *page)
 	 *  Case 3 is possible, as we may race with
 	 *  __split_huge_page_refcount tearing down a THP page.
 	 */
-	page_head = compound_head(page);
+	page_head = compound_head_by_tail(page);
 	if (!__compound_tail_refcounted(page_head))
 		put_unrefcounted_compound_page(page_head, page);
 	else
-- 
cgit 


From 1754e44e8291c92b9d981a6eca59f28dd25f03ab Mon Sep 17 00:00:00 2001
From: Wang Sheng-Hui <shhuiw@gmail.com>
Date: Wed, 4 Jun 2014 16:08:04 -0700
Subject: include/linux/bootmem.h: cleanup the comment for BOOTMEM_ flags

Use BOOTMEM_DEFAULT instead of 0 in the comment.

Signed-off-by: Wang Sheng-Hui <shhuiw@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index db51fe4fe317..4e2bd4c95b66 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -58,9 +58,9 @@ extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
  * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
  * the architecture-specific code should honor this).
  *
- * If flags is 0, then the return value is always 0 (success). If
- * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
- * memory already was reserved.
+ * If flags is BOOTMEM_DEFAULT, then the return value is always 0 (success).
+ * If flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the memory
+ * already was reserved.
  */
 #define BOOTMEM_DEFAULT		0
 #define BOOTMEM_EXCLUSIVE	(1<<0)
-- 
cgit 


From ac7695012a6f3269acd80d6c2b2218a6769edbf3 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 4 Jun 2014 16:08:17 -0700
Subject: mm/rmap.c: make page_referenced_one() and try_to_unmap_one() static

KSM was converted to use rmap_walk() and now nobody uses these functions
outside mm/rmap.c.

Let's covert them back to static.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 4 ----
 mm/rmap.c            | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b66c2110cb1f..9be55c7617da 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -183,14 +183,10 @@ static inline void page_dup_rmap(struct page *page)
  */
 int page_referenced(struct page *, int is_locked,
 			struct mem_cgroup *memcg, unsigned long *vm_flags);
-int page_referenced_one(struct page *, struct vm_area_struct *,
-	unsigned long address, void *arg);
 
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
 int try_to_unmap(struct page *, enum ttu_flags flags);
-int try_to_unmap_one(struct page *, struct vm_area_struct *,
-			unsigned long address, void *arg);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/mm/rmap.c b/mm/rmap.c
index 7da400d5d98e..8754e1fa83b6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -671,7 +671,7 @@ struct page_referenced_arg {
 /*
  * arg: page_referenced_arg will be passed
  */
-int page_referenced_one(struct page *page, struct vm_area_struct *vma,
+static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 			unsigned long address, void *arg)
 {
 	struct mm_struct *mm = vma->vm_mm;
@@ -1114,7 +1114,7 @@ out:
 /*
  * @arg: enum ttu_flags will be passed to this argument
  */
-int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
+static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		     unsigned long address, void *arg)
 {
 	struct mm_struct *mm = vma->vm_mm;
-- 
cgit 


From 3fb1c8dcfcda2f5bfb7d79d8b08bf2f04b1eed8f Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 4 Jun 2014 16:08:20 -0700
Subject: mm: update comment for DEFAULT_MAX_MAP_COUNT

With ELF extended numbering 16-bit bound is not hard limit any more.

[akpm@linux-foundation.org: fix typo]
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/sysctl.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 8045a554cafb..596a0e007c62 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -25,6 +25,10 @@ enum { sysctl_hung_task_timeout_secs = 0 };
  * Because the kernel adds some informative sections to a image of program at
  * generating coredump, we need some margin. The number of extra sections is
  * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ *
+ * ELF extended numbering allows more than 65535 sections, so 16-bit bound is
+ * not a hard limit any more. Although some userspace tools can be surprised by
+ * that.
  */
 #define MAPCOUNT_ELF_CORE_MARGIN	(5)
 #define DEFAULT_MAX_MAP_COUNT	(USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
-- 
cgit 


From 073ee1c6cd11cd190f4d0da84d9b4ba79d7b9e70 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:08:23 -0700
Subject: memcg: get rid of memcg_create_cache_name

Instead of calling back to memcontrol.c from kmem_cache_create_memcg in
order to just create the name of a per memcg cache, let's allocate it in
place.  We only need to pass the memcg name to kmem_cache_create_memcg for
that - everything else can be done in slab_common.c.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  2 --
 include/linux/slab.h       |  3 ++-
 mm/memcontrol.c            | 33 +++++++++------------------------
 mm/slab_common.c           |  7 +++++--
 4 files changed, 16 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1fa23244fe37..dfc2929a3877 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -492,8 +492,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-char *memcg_create_cache_name(struct mem_cgroup *memcg,
-			      struct kmem_cache *root_cache);
 int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
 			     struct kmem_cache *root_cache);
 void memcg_free_cache_params(struct kmem_cache *s);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ecbec9ccb80d..86e5b26fbdab 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -117,7 +117,8 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			void (*)(void *));
 #ifdef CONFIG_MEMCG_KMEM
 struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *,
-					   struct kmem_cache *);
+					   struct kmem_cache *,
+					   const char *);
 #endif
 void kmem_cache_destroy(struct kmem_cache *);
 int kmem_cache_shrink(struct kmem_cache *);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 971d7b643f6e..7df7f599e3df 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3095,29 +3095,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 	return 0;
 }
 
-char *memcg_create_cache_name(struct mem_cgroup *memcg,
-			      struct kmem_cache *root_cache)
-{
-	static char *buf;
-
-	/*
-	 * We need a mutex here to protect the shared buffer. Since this is
-	 * expected to be called only on cache creation, we can employ the
-	 * slab_mutex for that purpose.
-	 */
-	lockdep_assert_held(&slab_mutex);
-
-	if (!buf) {
-		buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
-		if (!buf)
-			return NULL;
-	}
-
-	cgroup_name(memcg->css.cgroup, buf, NAME_MAX + 1);
-	return kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
-			 memcg_cache_id(memcg), buf);
-}
-
 int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
 			     struct kmem_cache *root_cache)
 {
@@ -3158,6 +3135,7 @@ void memcg_free_cache_params(struct kmem_cache *s)
 static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
 				    struct kmem_cache *root_cache)
 {
+	static char *memcg_name_buf;	/* protected by memcg_slab_mutex */
 	struct kmem_cache *cachep;
 	int id;
 
@@ -3173,7 +3151,14 @@ static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
 	if (cache_from_memcg_idx(root_cache, id))
 		return;
 
-	cachep = kmem_cache_create_memcg(memcg, root_cache);
+	if (!memcg_name_buf) {
+		memcg_name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
+		if (!memcg_name_buf)
+			return;
+	}
+
+	cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
+	cachep = kmem_cache_create_memcg(memcg, root_cache, memcg_name_buf);
 	/*
 	 * If we could not create a memcg cache, do not complain, because
 	 * that's not critical at all as we can always proceed with the root
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 7e348cff814d..32175617cb75 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -264,13 +264,15 @@ EXPORT_SYMBOL(kmem_cache_create);
  * kmem_cache_create_memcg - Create a cache for a memory cgroup.
  * @memcg: The memory cgroup the new cache is for.
  * @root_cache: The parent of the new cache.
+ * @memcg_name: The name of the memory cgroup (used for naming the new cache).
  *
  * This function attempts to create a kmem cache that will serve allocation
  * requests going from @memcg to @root_cache. The new cache inherits properties
  * from its parent.
  */
 struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
-					   struct kmem_cache *root_cache)
+					   struct kmem_cache *root_cache,
+					   const char *memcg_name)
 {
 	struct kmem_cache *s = NULL;
 	char *cache_name;
@@ -280,7 +282,8 @@ struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
 
 	mutex_lock(&slab_mutex);
 
-	cache_name = memcg_create_cache_name(memcg, root_cache);
+	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
+			       memcg_cache_id(memcg), memcg_name);
 	if (!cache_name)
 		goto out_unlock;
 
-- 
cgit 


From 68711a746345c44ae00c64d8dbac6a9ce13ac54a Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 4 Jun 2014 16:08:25 -0700
Subject: mm, migration: add destination page freeing callback

Memory migration uses a callback defined by the caller to determine how to
allocate destination pages.  When migration fails for a source page,
however, it frees the destination page back to the system.

This patch adds a memory migration callback defined by the caller to
determine how to free destination pages.  If a caller, such as memory
compaction, builds its own freelist for migration targets, this can reuse
already freed memory instead of scanning additional memory.

If the caller provides a function to handle freeing of destination pages,
it is called when page migration fails.  If the caller passes NULL then
freeing back to the system will be handled as usual.  This patch
introduces no functional change.

Signed-off-by: David Rientjes <rientjes@google.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 11 ++++++----
 mm/compaction.c         |  2 +-
 mm/memory-failure.c     |  4 ++--
 mm/memory_hotplug.c     |  2 +-
 mm/mempolicy.c          |  4 ++--
 mm/migrate.c            | 55 +++++++++++++++++++++++++++++++++++--------------
 mm/page_alloc.c         |  2 +-
 7 files changed, 53 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 84a31ad0b791..a2901c414664 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -5,7 +5,9 @@
 #include <linux/mempolicy.h>
 #include <linux/migrate_mode.h>
 
-typedef struct page *new_page_t(struct page *, unsigned long private, int **);
+typedef struct page *new_page_t(struct page *page, unsigned long private,
+				int **reason);
+typedef void free_page_t(struct page *page, unsigned long private);
 
 /*
  * Return values from addresss_space_operations.migratepage():
@@ -38,7 +40,7 @@ enum migrate_reason {
 extern void putback_movable_pages(struct list_head *l);
 extern int migrate_page(struct address_space *,
 			struct page *, struct page *, enum migrate_mode);
-extern int migrate_pages(struct list_head *l, new_page_t x,
+extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
 		unsigned long private, enum migrate_mode mode, int reason);
 
 extern int migrate_prep(void);
@@ -56,8 +58,9 @@ extern int migrate_page_move_mapping(struct address_space *mapping,
 #else
 
 static inline void putback_movable_pages(struct list_head *l) {}
-static inline int migrate_pages(struct list_head *l, new_page_t x,
-		unsigned long private, enum migrate_mode mode, int reason)
+static inline int migrate_pages(struct list_head *l, new_page_t new,
+		free_page_t free, unsigned long private, enum migrate_mode mode,
+		int reason)
 	{ return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
diff --git a/mm/compaction.c b/mm/compaction.c
index 6010aabde28c..f74a362d2e28 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1016,7 +1016,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 		}
 
 		nr_migrate = cc->nr_migratepages;
-		err = migrate_pages(&cc->migratepages, compaction_alloc,
+		err = migrate_pages(&cc->migratepages, compaction_alloc, NULL,
 				(unsigned long)cc,
 				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
 				MR_COMPACTION);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d50f17fb9be2..3cd1b652821c 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1503,7 +1503,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
 
 	/* Keep page count to indicate a given hugepage is isolated. */
 	list_move(&hpage->lru, &pagelist);
-	ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+	ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
 				MIGRATE_SYNC, MR_MEMORY_FAILURE);
 	if (ret) {
 		pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
@@ -1584,7 +1584,7 @@ static int __soft_offline_page(struct page *page, int flags)
 		inc_zone_page_state(page, NR_ISOLATED_ANON +
 					page_is_file_cache(page));
 		list_add(&page->lru, &pagelist);
-		ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+		ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
 					MIGRATE_SYNC, MR_MEMORY_FAILURE);
 		if (ret) {
 			if (!list_empty(&pagelist)) {
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index cbb7ca0ac44b..469bbf505f85 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1394,7 +1394,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
 		 * alloc_migrate_target should be improooooved!!
 		 * migrate_pages returns # of failed pages.
 		 */
-		ret = migrate_pages(&source, alloc_migrate_target, 0,
+		ret = migrate_pages(&source, alloc_migrate_target, NULL, 0,
 					MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
 		if (ret)
 			putback_movable_pages(&source);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7f7864b95e8e..16bc9fa42998 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1028,7 +1028,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
 			flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
 	if (!list_empty(&pagelist)) {
-		err = migrate_pages(&pagelist, new_node_page, dest,
+		err = migrate_pages(&pagelist, new_node_page, NULL, dest,
 					MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
 			putback_movable_pages(&pagelist);
@@ -1277,7 +1277,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (!list_empty(&pagelist)) {
 			WARN_ON_ONCE(flags & MPOL_MF_LAZY);
 			nr_failed = migrate_pages(&pagelist, new_vma_page,
-					(unsigned long)vma,
+					NULL, (unsigned long)vma,
 					MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
 			if (nr_failed)
 				putback_movable_pages(&pagelist);
diff --git a/mm/migrate.c b/mm/migrate.c
index 6247be7fa30e..2a459675eeab 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -938,8 +938,9 @@ out:
  * Obtain the lock on page, remove all ptes and migrate the page
  * to the newly allocated page in newpage.
  */
-static int unmap_and_move(new_page_t get_new_page, unsigned long private,
-			struct page *page, int force, enum migrate_mode mode)
+static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
+			unsigned long private, struct page *page, int force,
+			enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -983,11 +984,17 @@ out:
 				page_is_file_cache(page));
 		putback_lru_page(page);
 	}
+
 	/*
-	 * Move the new page to the LRU. If migration was not successful
-	 * then this will free the page.
+	 * If migration was not successful and there's a freeing callback, use
+	 * it.  Otherwise, putback_lru_page() will drop the reference grabbed
+	 * during isolation.
 	 */
-	putback_lru_page(newpage);
+	if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+		put_new_page(newpage, private);
+	else
+		putback_lru_page(newpage);
+
 	if (result) {
 		if (rc)
 			*result = rc;
@@ -1016,8 +1023,9 @@ out:
  * will wait in the page fault for migration to complete.
  */
 static int unmap_and_move_huge_page(new_page_t get_new_page,
-				unsigned long private, struct page *hpage,
-				int force, enum migrate_mode mode)
+				free_page_t put_new_page, unsigned long private,
+				struct page *hpage, int force,
+				enum migrate_mode mode)
 {
 	int rc = 0;
 	int *result = NULL;
@@ -1056,20 +1064,30 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	if (!page_mapped(hpage))
 		rc = move_to_new_page(new_hpage, hpage, 1, mode);
 
-	if (rc)
+	if (rc != MIGRATEPAGE_SUCCESS)
 		remove_migration_ptes(hpage, hpage);
 
 	if (anon_vma)
 		put_anon_vma(anon_vma);
 
-	if (!rc)
+	if (rc == MIGRATEPAGE_SUCCESS)
 		hugetlb_cgroup_migrate(hpage, new_hpage);
 
 	unlock_page(hpage);
 out:
 	if (rc != -EAGAIN)
 		putback_active_hugepage(hpage);
-	put_page(new_hpage);
+
+	/*
+	 * If migration was not successful and there's a freeing callback, use
+	 * it.  Otherwise, put_page() will drop the reference grabbed during
+	 * isolation.
+	 */
+	if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+		put_new_page(new_hpage, private);
+	else
+		put_page(new_hpage);
+
 	if (result) {
 		if (rc)
 			*result = rc;
@@ -1086,6 +1104,8 @@ out:
  * @from:		The list of pages to be migrated.
  * @get_new_page:	The function used to allocate free pages to be used
  *			as the target of the page migration.
+ * @put_new_page:	The function used to free target pages if migration
+ *			fails, or NULL if no special handling is necessary.
  * @private:		Private data to be passed on to get_new_page()
  * @mode:		The migration mode that specifies the constraints for
  *			page migration, if any.
@@ -1099,7 +1119,8 @@ out:
  * Returns the number of pages that were not migrated, or an error code.
  */
 int migrate_pages(struct list_head *from, new_page_t get_new_page,
-		unsigned long private, enum migrate_mode mode, int reason)
+		free_page_t put_new_page, unsigned long private,
+		enum migrate_mode mode, int reason)
 {
 	int retry = 1;
 	int nr_failed = 0;
@@ -1121,10 +1142,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 
 			if (PageHuge(page))
 				rc = unmap_and_move_huge_page(get_new_page,
-						private, page, pass > 2, mode);
+						put_new_page, private, page,
+						pass > 2, mode);
 			else
-				rc = unmap_and_move(get_new_page, private,
-						page, pass > 2, mode);
+				rc = unmap_and_move(get_new_page, put_new_page,
+						private, page, pass > 2, mode);
 
 			switch(rc) {
 			case -ENOMEM:
@@ -1273,7 +1295,7 @@ set_status:
 
 	err = 0;
 	if (!list_empty(&pagelist)) {
-		err = migrate_pages(&pagelist, new_page_node,
+		err = migrate_pages(&pagelist, new_page_node, NULL,
 				(unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
 		if (err)
 			putback_movable_pages(&pagelist);
@@ -1729,7 +1751,8 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 
 	list_add(&page->lru, &migratepages);
 	nr_remaining = migrate_pages(&migratepages, alloc_misplaced_dst_page,
-				     node, MIGRATE_ASYNC, MR_NUMA_MISPLACED);
+				     NULL, node, MIGRATE_ASYNC,
+				     MR_NUMA_MISPLACED);
 	if (nr_remaining) {
 		if (!list_empty(&migratepages)) {
 			list_del(&page->lru);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 132c337dbe55..027d0294413a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6218,7 +6218,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 		cc->nr_migratepages -= nr_reclaimed;
 
 		ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-				    0, MIGRATE_SYNC, MR_CMA);
+				    NULL, 0, MIGRATE_SYNC, MR_CMA);
 	}
 	if (ret < 0) {
 		putback_movable_pages(&cc->migratepages);
-- 
cgit 


From 35979ef3393110ff3c12c6b94552208d3bdf1a36 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 4 Jun 2014 16:08:27 -0700
Subject: mm, compaction: add per-zone migration pfn cache for async compaction

Each zone has a cached migration scanner pfn for memory compaction so that
subsequent calls to memory compaction can start where the previous call
left off.

Currently, the compaction migration scanner only updates the per-zone
cached pfn when pageblocks were not skipped for async compaction.  This
creates a dependency on calling sync compaction to avoid having subsequent
calls to async compaction from scanning an enormous amount of non-MOVABLE
pageblocks each time it is called.  On large machines, this could be
potentially very expensive.

This patch adds a per-zone cached migration scanner pfn only for async
compaction.  It is updated everytime a pageblock has been scanned in its
entirety and when no pages from it were successfully isolated.  The cached
migration scanner pfn for sync compaction is updated only when called for
sync compaction.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  5 ++--
 mm/compaction.c        | 66 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 43 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ae693e1ad0f9..10a96ee68311 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -360,9 +360,10 @@ struct zone {
 	/* Set to true when the PG_migrate_skip bits should be cleared */
 	bool			compact_blockskip_flush;
 
-	/* pfns where compaction scanners should start */
+	/* pfn where compaction free scanner should start */
 	unsigned long		compact_cached_free_pfn;
-	unsigned long		compact_cached_migrate_pfn;
+	/* pfn where async and sync compaction migration scanner should start */
+	unsigned long		compact_cached_migrate_pfn[2];
 #endif
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
diff --git a/mm/compaction.c b/mm/compaction.c
index d0c7c994e11b..70c0f8cda33f 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -89,7 +89,8 @@ static void __reset_isolation_suitable(struct zone *zone)
 	unsigned long end_pfn = zone_end_pfn(zone);
 	unsigned long pfn;
 
-	zone->compact_cached_migrate_pfn = start_pfn;
+	zone->compact_cached_migrate_pfn[0] = start_pfn;
+	zone->compact_cached_migrate_pfn[1] = start_pfn;
 	zone->compact_cached_free_pfn = end_pfn;
 	zone->compact_blockskip_flush = false;
 
@@ -131,9 +132,10 @@ void reset_isolation_suitable(pg_data_t *pgdat)
  */
 static void update_pageblock_skip(struct compact_control *cc,
 			struct page *page, unsigned long nr_isolated,
-			bool migrate_scanner)
+			bool set_unsuitable, bool migrate_scanner)
 {
 	struct zone *zone = cc->zone;
+	unsigned long pfn;
 
 	if (cc->ignore_skip_hint)
 		return;
@@ -141,20 +143,31 @@ static void update_pageblock_skip(struct compact_control *cc,
 	if (!page)
 		return;
 
-	if (!nr_isolated) {
-		unsigned long pfn = page_to_pfn(page);
+	if (nr_isolated)
+		return;
+
+	/*
+	 * Only skip pageblocks when all forms of compaction will be known to
+	 * fail in the near future.
+	 */
+	if (set_unsuitable)
 		set_pageblock_skip(page);
 
-		/* Update where compaction should restart */
-		if (migrate_scanner) {
-			if (!cc->finished_update_migrate &&
-			    pfn > zone->compact_cached_migrate_pfn)
-				zone->compact_cached_migrate_pfn = pfn;
-		} else {
-			if (!cc->finished_update_free &&
-			    pfn < zone->compact_cached_free_pfn)
-				zone->compact_cached_free_pfn = pfn;
-		}
+	pfn = page_to_pfn(page);
+
+	/* Update where async and sync compaction should restart */
+	if (migrate_scanner) {
+		if (cc->finished_update_migrate)
+			return;
+		if (pfn > zone->compact_cached_migrate_pfn[0])
+			zone->compact_cached_migrate_pfn[0] = pfn;
+		if (cc->sync && pfn > zone->compact_cached_migrate_pfn[1])
+			zone->compact_cached_migrate_pfn[1] = pfn;
+	} else {
+		if (cc->finished_update_free)
+			return;
+		if (pfn < zone->compact_cached_free_pfn)
+			zone->compact_cached_free_pfn = pfn;
 	}
 }
 #else
@@ -166,7 +179,7 @@ static inline bool isolation_suitable(struct compact_control *cc,
 
 static void update_pageblock_skip(struct compact_control *cc,
 			struct page *page, unsigned long nr_isolated,
-			bool migrate_scanner)
+			bool set_unsuitable, bool migrate_scanner)
 {
 }
 #endif /* CONFIG_COMPACTION */
@@ -323,7 +336,8 @@ isolate_fail:
 
 	/* Update the pageblock-skip if the whole pageblock was scanned */
 	if (blockpfn == end_pfn)
-		update_pageblock_skip(cc, valid_page, total_isolated, false);
+		update_pageblock_skip(cc, valid_page, total_isolated, true,
+				      false);
 
 	count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
 	if (total_isolated)
@@ -458,7 +472,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	unsigned long flags;
 	bool locked = false;
 	struct page *page = NULL, *valid_page = NULL;
-	bool skipped_async_unsuitable = false;
+	bool set_unsuitable = true;
 	const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) |
 				    (unevictable ? ISOLATE_UNEVICTABLE : 0);
 
@@ -535,8 +549,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 			 */
 			mt = get_pageblock_migratetype(page);
 			if (!cc->sync && !migrate_async_suitable(mt)) {
-				cc->finished_update_migrate = true;
-				skipped_async_unsuitable = true;
+				set_unsuitable = false;
 				goto next_pageblock;
 			}
 		}
@@ -640,11 +653,10 @@ next_pageblock:
 	/*
 	 * Update the pageblock-skip information and cached scanner pfn,
 	 * if the whole pageblock was scanned without isolating any page.
-	 * This is not done when pageblock was skipped due to being unsuitable
-	 * for async compaction, so that eventual sync compaction can try.
 	 */
-	if (low_pfn == end_pfn && !skipped_async_unsuitable)
-		update_pageblock_skip(cc, valid_page, nr_isolated, true);
+	if (low_pfn == end_pfn)
+		update_pageblock_skip(cc, valid_page, nr_isolated,
+				      set_unsuitable, true);
 
 	trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
 
@@ -868,7 +880,8 @@ static int compact_finished(struct zone *zone,
 	/* Compaction run completes if the migrate and free scanner meet */
 	if (cc->free_pfn <= cc->migrate_pfn) {
 		/* Let the next compaction start anew. */
-		zone->compact_cached_migrate_pfn = zone->zone_start_pfn;
+		zone->compact_cached_migrate_pfn[0] = zone->zone_start_pfn;
+		zone->compact_cached_migrate_pfn[1] = zone->zone_start_pfn;
 		zone->compact_cached_free_pfn = zone_end_pfn(zone);
 
 		/*
@@ -993,7 +1006,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	 * information on where the scanners should start but check that it
 	 * is initialised by ensuring the values are within zone boundaries.
 	 */
-	cc->migrate_pfn = zone->compact_cached_migrate_pfn;
+	cc->migrate_pfn = zone->compact_cached_migrate_pfn[cc->sync];
 	cc->free_pfn = zone->compact_cached_free_pfn;
 	if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
 		cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
@@ -1001,7 +1014,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	}
 	if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
 		cc->migrate_pfn = start_pfn;
-		zone->compact_cached_migrate_pfn = cc->migrate_pfn;
+		zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
+		zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
 	}
 
 	trace_mm_compaction_begin(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn);
-- 
cgit 


From e0b9daeb453e602a95ea43853dc12d385558ce1f Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 4 Jun 2014 16:08:28 -0700
Subject: mm, compaction: embed migration mode in compact_control

We're going to want to manipulate the migration mode for compaction in the
page allocator, and currently compact_control's sync field is only a bool.

Currently, we only do MIGRATE_ASYNC or MIGRATE_SYNC_LIGHT compaction
depending on the value of this bool.  Convert the bool to enum
migrate_mode and pass the migration mode in directly.  Later, we'll want
to avoid MIGRATE_SYNC_LIGHT for thp allocations in the pagefault patch to
avoid unnecessary latency.

This also alters compaction triggered from sysfs, either for the entire
system or for a node, to force MIGRATE_SYNC.

[akpm@linux-foundation.org: fix build]
[iamjoonsoo.kim@lge.com: use MIGRATE_SYNC in alloc_contig_range()]
Signed-off-by: David Rientjes <rientjes@google.com>
Suggested-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  4 ++--
 mm/compaction.c            | 36 +++++++++++++++++++-----------------
 mm/internal.h              |  2 +-
 mm/page_alloc.c            | 39 +++++++++++++++++----------------------
 4 files changed, 39 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 7e1c76e3cd68..01e3132820da 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			bool sync, bool *contended);
+			enum migrate_mode mode, bool *contended);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -91,7 +91,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended)
+			enum migrate_mode mode, bool *contended)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index 70c0f8cda33f..217a6ad9a20e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -161,7 +161,8 @@ static void update_pageblock_skip(struct compact_control *cc,
 			return;
 		if (pfn > zone->compact_cached_migrate_pfn[0])
 			zone->compact_cached_migrate_pfn[0] = pfn;
-		if (cc->sync && pfn > zone->compact_cached_migrate_pfn[1])
+		if (cc->mode != MIGRATE_ASYNC &&
+		    pfn > zone->compact_cached_migrate_pfn[1])
 			zone->compact_cached_migrate_pfn[1] = pfn;
 	} else {
 		if (cc->finished_update_free)
@@ -208,7 +209,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 		}
 
 		/* async aborts if taking too long or contended */
-		if (!cc->sync) {
+		if (cc->mode == MIGRATE_ASYNC) {
 			cc->contended = true;
 			return false;
 		}
@@ -473,7 +474,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	bool locked = false;
 	struct page *page = NULL, *valid_page = NULL;
 	bool set_unsuitable = true;
-	const isolate_mode_t mode = (!cc->sync ? ISOLATE_ASYNC_MIGRATE : 0) |
+	const isolate_mode_t mode = (cc->mode == MIGRATE_ASYNC ?
+					ISOLATE_ASYNC_MIGRATE : 0) |
 				    (unevictable ? ISOLATE_UNEVICTABLE : 0);
 
 	/*
@@ -483,7 +485,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	 */
 	while (unlikely(too_many_isolated(zone))) {
 		/* async migration should just abort */
-		if (!cc->sync)
+		if (cc->mode == MIGRATE_ASYNC)
 			return 0;
 
 		congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -548,7 +550,8 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 			 * the minimum amount of work satisfies the allocation
 			 */
 			mt = get_pageblock_migratetype(page);
-			if (!cc->sync && !migrate_async_suitable(mt)) {
+			if (cc->mode == MIGRATE_ASYNC &&
+			    !migrate_async_suitable(mt)) {
 				set_unsuitable = false;
 				goto next_pageblock;
 			}
@@ -981,6 +984,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	int ret;
 	unsigned long start_pfn = zone->zone_start_pfn;
 	unsigned long end_pfn = zone_end_pfn(zone);
+	const bool sync = cc->mode != MIGRATE_ASYNC;
 
 	ret = compaction_suitable(zone, cc->order);
 	switch (ret) {
@@ -1006,7 +1010,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	 * information on where the scanners should start but check that it
 	 * is initialised by ensuring the values are within zone boundaries.
 	 */
-	cc->migrate_pfn = zone->compact_cached_migrate_pfn[cc->sync];
+	cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
 	cc->free_pfn = zone->compact_cached_free_pfn;
 	if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
 		cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
@@ -1040,8 +1044,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
-				compaction_free, (unsigned long)cc,
-				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
+				compaction_free, (unsigned long)cc, cc->mode,
 				MR_COMPACTION);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
@@ -1074,9 +1077,8 @@ out:
 	return ret;
 }
 
-static unsigned long compact_zone_order(struct zone *zone,
-				 int order, gfp_t gfp_mask,
-				 bool sync, bool *contended)
+static unsigned long compact_zone_order(struct zone *zone, int order,
+		gfp_t gfp_mask, enum migrate_mode mode, bool *contended)
 {
 	unsigned long ret;
 	struct compact_control cc = {
@@ -1085,7 +1087,7 @@ static unsigned long compact_zone_order(struct zone *zone,
 		.order = order,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
-		.sync = sync,
+		.mode = mode,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -1107,7 +1109,7 @@ int sysctl_extfrag_threshold = 500;
  * @order: The order of the current allocation
  * @gfp_mask: The GFP mask of the current allocation
  * @nodemask: The allowed nodes to allocate from
- * @sync: Whether migration is synchronous or not
+ * @mode: The migration mode for async, sync light, or sync migration
  * @contended: Return value that is true if compaction was aborted due to lock contention
  * @page: Optionally capture a free page of the requested order during compaction
  *
@@ -1115,7 +1117,7 @@ int sysctl_extfrag_threshold = 500;
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			bool sync, bool *contended)
+			enum migrate_mode mode, bool *contended)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1140,7 +1142,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
-		status = compact_zone_order(zone, order, gfp_mask, sync,
+		status = compact_zone_order(zone, order, gfp_mask, mode,
 						contended);
 		rc = max(status, rc);
 
@@ -1190,7 +1192,7 @@ void compact_pgdat(pg_data_t *pgdat, int order)
 {
 	struct compact_control cc = {
 		.order = order,
-		.sync = false,
+		.mode = MIGRATE_ASYNC,
 	};
 
 	if (!order)
@@ -1203,7 +1205,7 @@ static void compact_node(int nid)
 {
 	struct compact_control cc = {
 		.order = -1,
-		.sync = true,
+		.mode = MIGRATE_SYNC,
 		.ignore_skip_hint = true,
 	};
 
diff --git a/mm/internal.h b/mm/internal.h
index 6ee580d69ddd..a25424a24e0c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -134,7 +134,7 @@ struct compact_control {
 	unsigned long nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
-	bool sync;			/* Synchronous migration */
+	enum migrate_mode mode;		/* Async or sync migration mode */
 	bool ignore_skip_hint;		/* Scan blocks even if marked skip */
 	bool finished_update_free;	/* True when the zone cached pfns are
 					 * no longer being updated
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 027d0294413a..afb29da0576c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2217,7 +2217,7 @@ static struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-	int migratetype, bool sync_migration,
+	int migratetype, enum migrate_mode mode,
 	bool *contended_compaction, bool *deferred_compaction,
 	unsigned long *did_some_progress)
 {
@@ -2231,7 +2231,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 
 	current->flags |= PF_MEMALLOC;
 	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
-						nodemask, sync_migration,
+						nodemask, mode,
 						contended_compaction);
 	current->flags &= ~PF_MEMALLOC;
 
@@ -2264,7 +2264,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		 * As async compaction considers a subset of pageblocks, only
 		 * defer if the failure was a sync compaction failure.
 		 */
-		if (sync_migration)
+		if (mode != MIGRATE_ASYNC)
 			defer_compaction(preferred_zone, order);
 
 		cond_resched();
@@ -2277,9 +2277,8 @@ static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-	int migratetype, bool sync_migration,
-	bool *contended_compaction, bool *deferred_compaction,
-	unsigned long *did_some_progress)
+	int migratetype, enum migrate_mode mode, bool *contended_compaction,
+	bool *deferred_compaction, unsigned long *did_some_progress)
 {
 	return NULL;
 }
@@ -2474,7 +2473,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	int alloc_flags;
 	unsigned long pages_reclaimed = 0;
 	unsigned long did_some_progress;
-	bool sync_migration = false;
+	enum migrate_mode migration_mode = MIGRATE_ASYNC;
 	bool deferred_compaction = false;
 	bool contended_compaction = false;
 
@@ -2568,17 +2567,15 @@ rebalance:
 	 * Try direct compaction. The first pass is asynchronous. Subsequent
 	 * attempts after direct reclaim are synchronous
 	 */
-	page = __alloc_pages_direct_compact(gfp_mask, order,
-					zonelist, high_zoneidx,
-					nodemask,
-					alloc_flags, preferred_zone,
-					migratetype, sync_migration,
-					&contended_compaction,
+	page = __alloc_pages_direct_compact(gfp_mask, order, zonelist,
+					high_zoneidx, nodemask, alloc_flags,
+					preferred_zone, migratetype,
+					migration_mode, &contended_compaction,
 					&deferred_compaction,
 					&did_some_progress);
 	if (page)
 		goto got_pg;
-	sync_migration = true;
+	migration_mode = MIGRATE_SYNC_LIGHT;
 
 	/*
 	 * If compaction is deferred for high-order allocations, it is because
@@ -2653,12 +2650,10 @@ rebalance:
 		 * direct reclaim and reclaim/compaction depends on compaction
 		 * being called after reclaim so call directly if necessary
 		 */
-		page = __alloc_pages_direct_compact(gfp_mask, order,
-					zonelist, high_zoneidx,
-					nodemask,
-					alloc_flags, preferred_zone,
-					migratetype, sync_migration,
-					&contended_compaction,
+		page = __alloc_pages_direct_compact(gfp_mask, order, zonelist,
+					high_zoneidx, nodemask, alloc_flags,
+					preferred_zone, migratetype,
+					migration_mode, &contended_compaction,
 					&deferred_compaction,
 					&did_some_progress);
 		if (page)
@@ -6218,7 +6213,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 		cc->nr_migratepages -= nr_reclaimed;
 
 		ret = migrate_pages(&cc->migratepages, alloc_migrate_target,
-				    NULL, 0, MIGRATE_SYNC, MR_CMA);
+				    NULL, 0, cc->mode, MR_CMA);
 	}
 	if (ret < 0) {
 		putback_movable_pages(&cc->migratepages);
@@ -6257,7 +6252,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 		.nr_migratepages = 0,
 		.order = -1,
 		.zone = page_zone(pfn_to_page(start)),
-		.sync = true,
+		.mode = MIGRATE_SYNC,
 		.ignore_skip_hint = true,
 	};
 	INIT_LIST_HEAD(&cc.migratepages);
-- 
cgit 


From adfab836f4908deb049a5128082719e689eed964 Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 4 Jun 2014 16:09:53 -0700
Subject: swap: change swap_info singly-linked list to list_head

The logic controlling the singly-linked list of swap_info_struct entries
for all active, i.e.  swapon'ed, swap targets is rather complex, because:

 - it stores the entries in priority order
 - there is a pointer to the highest priority entry
 - there is a pointer to the highest priority not-full entry
 - there is a highest_priority_index variable set outside the swap_lock
 - swap entries of equal priority should be used equally

this complexity leads to bugs such as: https://lkml.org/lkml/2014/2/13/181
where different priority swap targets are incorrectly used equally.

That bug probably could be solved with the existing singly-linked lists,
but I think it would only add more complexity to the already difficult to
understand get_swap_page() swap_list iteration logic.

The first patch changes from a singly-linked list to a doubly-linked list
using list_heads; the highest_priority_index and related code are removed
and get_swap_page() starts each iteration at the highest priority
swap_info entry, even if it's full.  While this does introduce unnecessary
list iteration (i.e.  Schlemiel the painter's algorithm) in the case where
one or more of the highest priority entries are full, the iteration and
manipulation code is much simpler and behaves correctly re: the above bug;
and the fourth patch removes the unnecessary iteration.

The second patch adds some minor plist helper functions; nothing new
really, just functions to match existing regular list functions.  These
are used by the next two patches.

The third patch adds plist_requeue(), which is used by get_swap_page() in
the next patch - it performs the requeueing of same-priority entries
(which moves the entry to the end of its priority in the plist), so that
all equal-priority swap_info_structs get used equally.

The fourth patch converts the main list into a plist, and adds a new plist
that contains only swap_info entries that are both active and not full.
As Mel suggested using plists allows removing all the ordering code from
swap - plists handle ordering automatically.  The list naming is also
clarified now that there are two lists, with the original list changed
from swap_list_head to swap_active_head and the new list named
swap_avail_head.  A new spinlock is also added for the new list, so
swap_info entries can be added or removed from the new list immediately as
they become full or not full.

This patch (of 4):

Replace the singly-linked list tracking active, i.e.  swapon'ed,
swap_info_struct entries with a doubly-linked list using struct
list_heads.  Simplify the logic iterating and manipulating the list of
entries, especially get_swap_page(), by using standard list_head
functions, and removing the highest priority iteration logic.

The change fixes the bug:
https://lkml.org/lkml/2014/2/13/181
in which different priority swap entries after the highest priority entry
are incorrectly used equally in pairs.  The swap behavior is now as
advertised, i.e. different priority swap entries are used in order, and
equal priority swap targets are used concurrently.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Weijie Yang <weijieut@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h     |   7 +-
 include/linux/swapfile.h |   2 +-
 mm/frontswap.c           |  13 ++--
 mm/swapfile.c            | 171 ++++++++++++++++++++---------------------------
 4 files changed, 78 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5a14b928164e..8bb85d6d65f0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,8 +214,8 @@ struct percpu_cluster {
 struct swap_info_struct {
 	unsigned long	flags;		/* SWP_USED etc: see above */
 	signed short	prio;		/* swap priority of this type */
+	struct list_head list;		/* entry in swap list */
 	signed char	type;		/* strange name for an index */
-	signed char	next;		/* next type on the swap list */
 	unsigned int	max;		/* extent of the swap_map */
 	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
 	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
@@ -255,11 +255,6 @@ struct swap_info_struct {
 	struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
 };
 
-struct swap_list_t {
-	int head;	/* head of priority-ordered swapfile list */
-	int next;	/* swapfile to be used next */
-};
-
 /* linux/mm/workingset.c */
 void *workingset_eviction(struct address_space *mapping, struct page *page);
 bool workingset_refault(void *shadow);
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index e282624e8c10..2eab382d593d 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,7 +6,7 @@
  * want to expose them to the dozens of source files that include swap.h
  */
 extern spinlock_t swap_lock;
-extern struct swap_list_t swap_list;
+extern struct list_head swap_list_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
 
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 1b24bdcb3197..fae11602e8a9 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -327,15 +327,12 @@ EXPORT_SYMBOL(__frontswap_invalidate_area);
 
 static unsigned long __frontswap_curr_pages(void)
 {
-	int type;
 	unsigned long totalpages = 0;
 	struct swap_info_struct *si = NULL;
 
 	assert_spin_locked(&swap_lock);
-	for (type = swap_list.head; type >= 0; type = si->next) {
-		si = swap_info[type];
+	list_for_each_entry(si, &swap_list_head, list)
 		totalpages += atomic_read(&si->frontswap_pages);
-	}
 	return totalpages;
 }
 
@@ -347,11 +344,9 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
 	int si_frontswap_pages;
 	unsigned long total_pages_to_unuse = total;
 	unsigned long pages = 0, pages_to_unuse = 0;
-	int type;
 
 	assert_spin_locked(&swap_lock);
-	for (type = swap_list.head; type >= 0; type = si->next) {
-		si = swap_info[type];
+	list_for_each_entry(si, &swap_list_head, list) {
 		si_frontswap_pages = atomic_read(&si->frontswap_pages);
 		if (total_pages_to_unuse < si_frontswap_pages) {
 			pages = pages_to_unuse = total_pages_to_unuse;
@@ -366,7 +361,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
 		}
 		vm_unacct_memory(pages);
 		*unused = pages_to_unuse;
-		*swapid = type;
+		*swapid = si->type;
 		ret = 0;
 		break;
 	}
@@ -413,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
 	/*
 	 * we don't want to hold swap_lock while doing a very
 	 * lengthy try_to_unuse, but swap_list may change
-	 * so restart scan from swap_list.head each time
+	 * so restart scan from swap_list_head each time
 	 */
 	spin_lock(&swap_lock);
 	ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4a7f7e6992b6..6c95a8c63b1a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -51,14 +51,17 @@ atomic_long_t nr_swap_pages;
 /* protected with swap_lock. reading in vm_swap_full() doesn't need lock */
 long total_swap_pages;
 static int least_priority;
-static atomic_t highest_priority_index = ATOMIC_INIT(-1);
 
 static const char Bad_file[] = "Bad swap file entry ";
 static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-struct swap_list_t swap_list = {-1, -1};
+/*
+ * all active swap_info_structs
+ * protected with swap_lock, and ordered by priority.
+ */
+LIST_HEAD(swap_list_head);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
@@ -640,66 +643,54 @@ no_page:
 
 swp_entry_t get_swap_page(void)
 {
-	struct swap_info_struct *si;
+	struct swap_info_struct *si, *next;
 	pgoff_t offset;
-	int type, next;
-	int wrapped = 0;
-	int hp_index;
+	struct list_head *tmp;
 
 	spin_lock(&swap_lock);
 	if (atomic_long_read(&nr_swap_pages) <= 0)
 		goto noswap;
 	atomic_long_dec(&nr_swap_pages);
 
-	for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) {
-		hp_index = atomic_xchg(&highest_priority_index, -1);
-		/*
-		 * highest_priority_index records current highest priority swap
-		 * type which just frees swap entries. If its priority is
-		 * higher than that of swap_list.next swap type, we use it.  It
-		 * isn't protected by swap_lock, so it can be an invalid value
-		 * if the corresponding swap type is swapoff. We double check
-		 * the flags here. It's even possible the swap type is swapoff
-		 * and swapon again and its priority is changed. In such rare
-		 * case, low prority swap type might be used, but eventually
-		 * high priority swap will be used after several rounds of
-		 * swap.
-		 */
-		if (hp_index != -1 && hp_index != type &&
-		    swap_info[type]->prio < swap_info[hp_index]->prio &&
-		    (swap_info[hp_index]->flags & SWP_WRITEOK)) {
-			type = hp_index;
-			swap_list.next = type;
-		}
-
-		si = swap_info[type];
-		next = si->next;
-		if (next < 0 ||
-		    (!wrapped && si->prio != swap_info[next]->prio)) {
-			next = swap_list.head;
-			wrapped++;
-		}
-
+	list_for_each(tmp, &swap_list_head) {
+		si = list_entry(tmp, typeof(*si), list);
 		spin_lock(&si->lock);
-		if (!si->highest_bit) {
-			spin_unlock(&si->lock);
-			continue;
-		}
-		if (!(si->flags & SWP_WRITEOK)) {
+		if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
 			spin_unlock(&si->lock);
 			continue;
 		}
 
-		swap_list.next = next;
+		/*
+		 * rotate the current swap_info that we're going to use
+		 * to after any other swap_info that have the same prio,
+		 * so that all equal-priority swap_info get used equally
+		 */
+		next = si;
+		list_for_each_entry_continue(next, &swap_list_head, list) {
+			if (si->prio != next->prio)
+				break;
+			list_rotate_left(&si->list);
+			next = si;
+		}
 
 		spin_unlock(&swap_lock);
 		/* This is called for allocating swap entry for cache */
 		offset = scan_swap_map(si, SWAP_HAS_CACHE);
 		spin_unlock(&si->lock);
 		if (offset)
-			return swp_entry(type, offset);
+			return swp_entry(si->type, offset);
 		spin_lock(&swap_lock);
-		next = swap_list.next;
+		/*
+		 * if we got here, it's likely that si was almost full before,
+		 * and since scan_swap_map() can drop the si->lock, multiple
+		 * callers probably all tried to get a page from the same si
+		 * and it filled up before we could get one.  So we need to
+		 * try again.  Since we dropped the swap_lock, there may now
+		 * be non-full higher priority swap_infos, and this si may have
+		 * even been removed from the list (although very unlikely).
+		 * Let's start over.
+		 */
+		tmp = &swap_list_head;
 	}
 
 	atomic_long_inc(&nr_swap_pages);
@@ -766,27 +757,6 @@ out:
 	return NULL;
 }
 
-/*
- * This swap type frees swap entry, check if it is the highest priority swap
- * type which just frees swap entry. get_swap_page() uses
- * highest_priority_index to search highest priority swap type. The
- * swap_info_struct.lock can't protect us if there are multiple swap types
- * active, so we use atomic_cmpxchg.
- */
-static void set_highest_priority_index(int type)
-{
-	int old_hp_index, new_hp_index;
-
-	do {
-		old_hp_index = atomic_read(&highest_priority_index);
-		if (old_hp_index != -1 &&
-			swap_info[old_hp_index]->prio >= swap_info[type]->prio)
-			break;
-		new_hp_index = type;
-	} while (atomic_cmpxchg(&highest_priority_index,
-		old_hp_index, new_hp_index) != old_hp_index);
-}
-
 static unsigned char swap_entry_free(struct swap_info_struct *p,
 				     swp_entry_t entry, unsigned char usage)
 {
@@ -830,7 +800,6 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 			p->lowest_bit = offset;
 		if (offset > p->highest_bit)
 			p->highest_bit = offset;
-		set_highest_priority_index(p->type);
 		atomic_long_inc(&nr_swap_pages);
 		p->inuse_pages--;
 		frontswap_invalidate_page(p->type, offset);
@@ -1765,7 +1734,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 				unsigned char *swap_map,
 				struct swap_cluster_info *cluster_info)
 {
-	int i, prev;
+	struct swap_info_struct *si;
 
 	if (prio >= 0)
 		p->prio = prio;
@@ -1777,18 +1746,28 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 	atomic_long_add(p->pages, &nr_swap_pages);
 	total_swap_pages += p->pages;
 
-	/* insert swap space into swap_list: */
-	prev = -1;
-	for (i = swap_list.head; i >= 0; i = swap_info[i]->next) {
-		if (p->prio >= swap_info[i]->prio)
-			break;
-		prev = i;
+	assert_spin_locked(&swap_lock);
+	BUG_ON(!list_empty(&p->list));
+	/*
+	 * insert into swap list; the list is in priority order,
+	 * so that get_swap_page() can get a page from the highest
+	 * priority swap_info_struct with available page(s), and
+	 * swapoff can adjust the auto-assigned (i.e. negative) prio
+	 * values for any lower-priority swap_info_structs when
+	 * removing a negative-prio swap_info_struct
+	 */
+	list_for_each_entry(si, &swap_list_head, list) {
+		if (p->prio >= si->prio) {
+			list_add_tail(&p->list, &si->list);
+			return;
+		}
 	}
-	p->next = i;
-	if (prev < 0)
-		swap_list.head = swap_list.next = p->type;
-	else
-		swap_info[prev]->next = p->type;
+	/*
+	 * this covers two cases:
+	 * 1) p->prio is less than all existing prio
+	 * 2) the swap list is empty
+	 */
+	list_add_tail(&p->list, &swap_list_head);
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1823,8 +1802,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	struct address_space *mapping;
 	struct inode *inode;
 	struct filename *pathname;
-	int i, type, prev;
-	int err;
+	int err, found = 0;
 	unsigned int old_block_size;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -1842,17 +1820,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		goto out;
 
 	mapping = victim->f_mapping;
-	prev = -1;
 	spin_lock(&swap_lock);
-	for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
-		p = swap_info[type];
+	list_for_each_entry(p, &swap_list_head, list) {
 		if (p->flags & SWP_WRITEOK) {
-			if (p->swap_file->f_mapping == mapping)
+			if (p->swap_file->f_mapping == mapping) {
+				found = 1;
 				break;
+			}
 		}
-		prev = type;
 	}
-	if (type < 0) {
+	if (!found) {
 		err = -EINVAL;
 		spin_unlock(&swap_lock);
 		goto out_dput;
@@ -1864,20 +1841,16 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		spin_unlock(&swap_lock);
 		goto out_dput;
 	}
-	if (prev < 0)
-		swap_list.head = p->next;
-	else
-		swap_info[prev]->next = p->next;
-	if (type == swap_list.next) {
-		/* just pick something that's safe... */
-		swap_list.next = swap_list.head;
-	}
 	spin_lock(&p->lock);
 	if (p->prio < 0) {
-		for (i = p->next; i >= 0; i = swap_info[i]->next)
-			swap_info[i]->prio = p->prio--;
+		struct swap_info_struct *si = p;
+
+		list_for_each_entry_continue(si, &swap_list_head, list) {
+			si->prio++;
+		}
 		least_priority++;
 	}
+	list_del_init(&p->list);
 	atomic_long_sub(p->pages, &nr_swap_pages);
 	total_swap_pages -= p->pages;
 	p->flags &= ~SWP_WRITEOK;
@@ -1885,7 +1858,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	spin_unlock(&swap_lock);
 
 	set_current_oom_origin();
-	err = try_to_unuse(type, false, 0); /* force all pages to be unused */
+	err = try_to_unuse(p->type, false, 0); /* force unuse all pages */
 	clear_current_oom_origin();
 
 	if (err) {
@@ -1926,7 +1899,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	frontswap_map = frontswap_map_get(p);
 	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
-	frontswap_invalidate_area(type);
+	frontswap_invalidate_area(p->type);
 	frontswap_map_set(p, NULL);
 	mutex_unlock(&swapon_mutex);
 	free_percpu(p->percpu_cluster);
@@ -1935,7 +1908,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	vfree(cluster_info);
 	vfree(frontswap_map);
 	/* Destroy swap account information */
-	swap_cgroup_swapoff(type);
+	swap_cgroup_swapoff(p->type);
 
 	inode = mapping->host;
 	if (S_ISBLK(inode->i_mode)) {
@@ -2142,8 +2115,8 @@ static struct swap_info_struct *alloc_swap_info(void)
 		 */
 	}
 	INIT_LIST_HEAD(&p->first_swap_extent.list);
+	INIT_LIST_HEAD(&p->list);
 	p->flags = SWP_USED;
-	p->next = -1;
 	spin_unlock(&swap_lock);
 	spin_lock_init(&p->lock);
 
-- 
cgit 


From fd16618e12a05df79a3439d72d5ffdac5d34f3da Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 4 Jun 2014 16:09:55 -0700
Subject: lib/plist: add helper functions

Add PLIST_HEAD() to plist.h, equivalent to LIST_HEAD() from list.h, to
define and initialize a struct plist_head.

Add plist_for_each_continue() and plist_for_each_entry_continue(),
equivalent to list_for_each_continue() and list_for_each_entry_continue(),
to iterate over a plist continuing after the current position.

Add plist_prev() and plist_next(), equivalent to (struct list_head*)->prev
and ->next, implemented by list_prev_entry() and list_next_entry(), to
access the prev/next struct plist_node entry.  These are needed because
unlike struct list_head, direct access of the prev/next struct plist_node
isn't possible; the list must be navigated via the contained struct
list_head.  e.g.  instead of accessing the prev by list_prev_entry(node,
node_list) it can be accessed by plist_prev(node).

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Weijie Yang <weijieut@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/plist.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index aa0fb390bd29..c81549119bd4 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -97,6 +97,13 @@ struct plist_node {
 	.node_list = LIST_HEAD_INIT((head).node_list)	\
 }
 
+/**
+ * PLIST_HEAD - declare and init plist_head
+ * @head:	name for struct plist_head variable
+ */
+#define PLIST_HEAD(head) \
+	struct plist_head head = PLIST_HEAD_INIT(head)
+
 /**
  * PLIST_NODE_INIT - static struct plist_node initializer
  * @node:	struct plist_node variable name
@@ -142,6 +149,16 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
 #define plist_for_each(pos, head)	\
 	 list_for_each_entry(pos, &(head)->node_list, node_list)
 
+/**
+ * plist_for_each_continue - continue iteration over the plist
+ * @pos:	the type * to use as a loop cursor
+ * @head:	the head for your list
+ *
+ * Continue to iterate over plist, continuing after the current position.
+ */
+#define plist_for_each_continue(pos, head)	\
+	 list_for_each_entry_continue(pos, &(head)->node_list, node_list)
+
 /**
  * plist_for_each_safe - iterate safely over a plist of given type
  * @pos:	the type * to use as a loop counter
@@ -162,6 +179,18 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
 #define plist_for_each_entry(pos, head, mem)	\
 	 list_for_each_entry(pos, &(head)->node_list, mem.node_list)
 
+/**
+ * plist_for_each_entry_continue - continue iteration over list of given type
+ * @pos:	the type * to use as a loop cursor
+ * @head:	the head for your list
+ * @m:		the name of the list_struct within the struct
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define plist_for_each_entry_continue(pos, head, m)	\
+	list_for_each_entry_continue(pos, &(head)->node_list, m.node_list)
+
 /**
  * plist_for_each_entry_safe - iterate safely over list of given type
  * @pos:	the type * to use as a loop counter
@@ -228,6 +257,20 @@ static inline int plist_node_empty(const struct plist_node *node)
 	container_of(plist_last(head), type, member)
 #endif
 
+/**
+ * plist_next - get the next entry in list
+ * @pos:	the type * to cursor
+ */
+#define plist_next(pos) \
+	list_next_entry(pos, node_list)
+
+/**
+ * plist_prev - get the prev entry in list
+ * @pos:	the type * to cursor
+ */
+#define plist_prev(pos) \
+	list_prev_entry(pos, node_list)
+
 /**
  * plist_first - return the first node (and thus, highest priority)
  * @head:	the &struct plist_head pointer
-- 
cgit 


From a75f232ce0fe38bd01301899ecd97ffd0254316a Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 4 Jun 2014 16:09:57 -0700
Subject: lib/plist: add plist_requeue

Add plist_requeue(), which moves the specified plist_node after all other
same-priority plist_nodes in the list.  This is essentially an optimized
plist_del() followed by plist_add().

This is needed by swap, which (with the next patch in this set) uses a
plist of available swap devices.  When a swap device (either a swap
partition or swap file) are added to the system with swapon(), the device
is added to a plist, ordered by the swap device's priority.  When swap
needs to allocate a page from one of the swap devices, it takes the page
from the first swap device on the plist, which is the highest priority
swap device.  The swap device is left in the plist until all its pages are
used, and then removed from the plist when it becomes full.

However, as described in man 2 swapon, swap must allocate pages from swap
devices with the same priority in round-robin order; to do this, on each
swap page allocation, swap uses a page from the first swap device in the
plist, and then calls plist_requeue() to move that swap device entry to
after any other same-priority swap devices.  The next swap page allocation
will again use a page from the first swap device in the plist and requeue
it, and so on, resulting in round-robin usage of equal-priority swap
devices.

Also add plist_test_requeue() test function, for use by plist_test() to
test plist_requeue() function.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Weijie Yang <weijieut@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/plist.h |  2 ++
 lib/plist.c           | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index c81549119bd4..8b6c970cff6c 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -141,6 +141,8 @@ static inline void plist_node_init(struct plist_node *node, int prio)
 extern void plist_add(struct plist_node *node, struct plist_head *head);
 extern void plist_del(struct plist_node *node, struct plist_head *head);
 
+extern void plist_requeue(struct plist_node *node, struct plist_head *head);
+
 /**
  * plist_for_each - iterate over the plist
  * @pos:	the type * to use as a loop counter
diff --git a/lib/plist.c b/lib/plist.c
index 1ebc95f7a46f..0f2084d30798 100644
--- a/lib/plist.c
+++ b/lib/plist.c
@@ -134,6 +134,46 @@ void plist_del(struct plist_node *node, struct plist_head *head)
 	plist_check_head(head);
 }
 
+/**
+ * plist_requeue - Requeue @node at end of same-prio entries.
+ *
+ * This is essentially an optimized plist_del() followed by
+ * plist_add().  It moves an entry already in the plist to
+ * after any other same-priority entries.
+ *
+ * @node:	&struct plist_node pointer - entry to be moved
+ * @head:	&struct plist_head pointer - list head
+ */
+void plist_requeue(struct plist_node *node, struct plist_head *head)
+{
+	struct plist_node *iter;
+	struct list_head *node_next = &head->node_list;
+
+	plist_check_head(head);
+	BUG_ON(plist_head_empty(head));
+	BUG_ON(plist_node_empty(node));
+
+	if (node == plist_last(head))
+		return;
+
+	iter = plist_next(node);
+
+	if (node->prio != iter->prio)
+		return;
+
+	plist_del(node, head);
+
+	plist_for_each_continue(iter, head) {
+		if (node->prio != iter->prio) {
+			node_next = &iter->node_list;
+			break;
+		}
+	}
+	list_add_tail(&node->node_list, node_next);
+
+	plist_check_head(head);
+}
+
 #ifdef CONFIG_DEBUG_PI_LIST
 #include <linux/sched.h>
 #include <linux/module.h>
@@ -170,6 +210,14 @@ static void __init plist_test_check(int nr_expect)
 	BUG_ON(prio_pos->prio_list.next != &first->prio_list);
 }
 
+static void __init plist_test_requeue(struct plist_node *node)
+{
+	plist_requeue(node, &test_head);
+
+	if (node != plist_last(&test_head))
+		BUG_ON(node->prio == plist_next(node)->prio);
+}
+
 static int  __init plist_test(void)
 {
 	int nr_expect = 0, i, loop;
@@ -193,6 +241,10 @@ static int  __init plist_test(void)
 			nr_expect--;
 		}
 		plist_test_check(nr_expect);
+		if (!plist_node_empty(test_node + i)) {
+			plist_test_requeue(test_node + i);
+			plist_test_check(nr_expect);
+		}
 	}
 
 	for (i = 0; i < ARRAY_SIZE(test_node); i++) {
-- 
cgit 


From 18ab4d4ced0817421e6db6940374cc39d28d65da Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 4 Jun 2014 16:09:59 -0700
Subject: swap: change swap_list_head to plist, add swap_avail_head

Originally get_swap_page() started iterating through the singly-linked
list of swap_info_structs using swap_list.next or highest_priority_index,
which both were intended to point to the highest priority active swap
target that was not full.  The first patch in this series changed the
singly-linked list to a doubly-linked list, and removed the logic to start
at the highest priority non-full entry; it starts scanning at the highest
priority entry each time, even if the entry is full.

Replace the manually ordered swap_list_head with a plist, swap_active_head.
Add a new plist, swap_avail_head.  The original swap_active_head plist
contains all active swap_info_structs, as before, while the new
swap_avail_head plist contains only swap_info_structs that are active and
available, i.e. not full.  Add a new spinlock, swap_avail_lock, to protect
the swap_avail_head list.

Mel Gorman suggested using plists since they internally handle ordering
the list entries based on priority, which is exactly what swap was doing
manually.  All the ordering code is now removed, and swap_info_struct
entries and simply added to their corresponding plist and automatically
ordered correctly.

Using a new plist for available swap_info_structs simplifies and
optimizes get_swap_page(), which no longer has to iterate over full
swap_info_structs.  Using a new spinlock for swap_avail_head plist
allows each swap_info_struct to add or remove themselves from the
plist when they become full or not-full; previously they could not
do so because the swap_info_struct->lock is held when they change
from full<->not-full, and the swap_lock protecting the main
swap_active_head must be ordered before any swap_info_struct->lock.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Shaohua Li <shli@fusionio.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Weijie Yang <weijieut@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h     |   3 +-
 include/linux/swapfile.h |   2 +-
 mm/frontswap.c           |   6 +-
 mm/swapfile.c            | 145 +++++++++++++++++++++++++++++------------------
 4 files changed, 97 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8bb85d6d65f0..9155bcdcce12 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -214,7 +214,8 @@ struct percpu_cluster {
 struct swap_info_struct {
 	unsigned long	flags;		/* SWP_USED etc: see above */
 	signed short	prio;		/* swap priority of this type */
-	struct list_head list;		/* entry in swap list */
+	struct plist_node list;		/* entry in swap_active_head */
+	struct plist_node avail_list;	/* entry in swap_avail_head */
 	signed char	type;		/* strange name for an index */
 	unsigned int	max;		/* extent of the swap_map */
 	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
index 2eab382d593d..388293a91e8c 100644
--- a/include/linux/swapfile.h
+++ b/include/linux/swapfile.h
@@ -6,7 +6,7 @@
  * want to expose them to the dozens of source files that include swap.h
  */
 extern spinlock_t swap_lock;
-extern struct list_head swap_list_head;
+extern struct plist_head swap_active_head;
 extern struct swap_info_struct *swap_info[];
 extern int try_to_unuse(unsigned int, bool, unsigned long);
 
diff --git a/mm/frontswap.c b/mm/frontswap.c
index fae11602e8a9..c30eec536f03 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -331,7 +331,7 @@ static unsigned long __frontswap_curr_pages(void)
 	struct swap_info_struct *si = NULL;
 
 	assert_spin_locked(&swap_lock);
-	list_for_each_entry(si, &swap_list_head, list)
+	plist_for_each_entry(si, &swap_active_head, list)
 		totalpages += atomic_read(&si->frontswap_pages);
 	return totalpages;
 }
@@ -346,7 +346,7 @@ static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused,
 	unsigned long pages = 0, pages_to_unuse = 0;
 
 	assert_spin_locked(&swap_lock);
-	list_for_each_entry(si, &swap_list_head, list) {
+	plist_for_each_entry(si, &swap_active_head, list) {
 		si_frontswap_pages = atomic_read(&si->frontswap_pages);
 		if (total_pages_to_unuse < si_frontswap_pages) {
 			pages = pages_to_unuse = total_pages_to_unuse;
@@ -408,7 +408,7 @@ void frontswap_shrink(unsigned long target_pages)
 	/*
 	 * we don't want to hold swap_lock while doing a very
 	 * lengthy try_to_unuse, but swap_list may change
-	 * so restart scan from swap_list_head each time
+	 * so restart scan from swap_active_head each time
 	 */
 	spin_lock(&swap_lock);
 	ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 6c95a8c63b1a..beeeef8a1b2d 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -61,7 +61,22 @@ static const char Unused_offset[] = "Unused swap offset entry ";
  * all active swap_info_structs
  * protected with swap_lock, and ordered by priority.
  */
-LIST_HEAD(swap_list_head);
+PLIST_HEAD(swap_active_head);
+
+/*
+ * all available (active, not full) swap_info_structs
+ * protected with swap_avail_lock, ordered by priority.
+ * This is used by get_swap_page() instead of swap_active_head
+ * because swap_active_head includes all swap_info_structs,
+ * but get_swap_page() doesn't need to look at full ones.
+ * This uses its own lock instead of swap_lock because when a
+ * swap_info_struct changes between not-full/full, it needs to
+ * add/remove itself to/from this list, but the swap_info_struct->lock
+ * is held and the locking order requires swap_lock to be taken
+ * before any swap_info_struct->lock.
+ */
+static PLIST_HEAD(swap_avail_head);
+static DEFINE_SPINLOCK(swap_avail_lock);
 
 struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
@@ -594,6 +609,9 @@ checks:
 	if (si->inuse_pages == si->pages) {
 		si->lowest_bit = si->max;
 		si->highest_bit = 0;
+		spin_lock(&swap_avail_lock);
+		plist_del(&si->avail_list, &swap_avail_head);
+		spin_unlock(&swap_avail_lock);
 	}
 	si->swap_map[offset] = usage;
 	inc_cluster_info_page(si, si->cluster_info, offset);
@@ -645,57 +663,63 @@ swp_entry_t get_swap_page(void)
 {
 	struct swap_info_struct *si, *next;
 	pgoff_t offset;
-	struct list_head *tmp;
 
-	spin_lock(&swap_lock);
 	if (atomic_long_read(&nr_swap_pages) <= 0)
 		goto noswap;
 	atomic_long_dec(&nr_swap_pages);
 
-	list_for_each(tmp, &swap_list_head) {
-		si = list_entry(tmp, typeof(*si), list);
+	spin_lock(&swap_avail_lock);
+
+start_over:
+	plist_for_each_entry_safe(si, next, &swap_avail_head, avail_list) {
+		/* requeue si to after same-priority siblings */
+		plist_requeue(&si->avail_list, &swap_avail_head);
+		spin_unlock(&swap_avail_lock);
 		spin_lock(&si->lock);
 		if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
+			spin_lock(&swap_avail_lock);
+			if (plist_node_empty(&si->avail_list)) {
+				spin_unlock(&si->lock);
+				goto nextsi;
+			}
+			WARN(!si->highest_bit,
+			     "swap_info %d in list but !highest_bit\n",
+			     si->type);
+			WARN(!(si->flags & SWP_WRITEOK),
+			     "swap_info %d in list but !SWP_WRITEOK\n",
+			     si->type);
+			plist_del(&si->avail_list, &swap_avail_head);
 			spin_unlock(&si->lock);
-			continue;
+			goto nextsi;
 		}
 
-		/*
-		 * rotate the current swap_info that we're going to use
-		 * to after any other swap_info that have the same prio,
-		 * so that all equal-priority swap_info get used equally
-		 */
-		next = si;
-		list_for_each_entry_continue(next, &swap_list_head, list) {
-			if (si->prio != next->prio)
-				break;
-			list_rotate_left(&si->list);
-			next = si;
-		}
-
-		spin_unlock(&swap_lock);
 		/* This is called for allocating swap entry for cache */
 		offset = scan_swap_map(si, SWAP_HAS_CACHE);
 		spin_unlock(&si->lock);
 		if (offset)
 			return swp_entry(si->type, offset);
-		spin_lock(&swap_lock);
+		pr_debug("scan_swap_map of si %d failed to find offset\n",
+		       si->type);
+		spin_lock(&swap_avail_lock);
+nextsi:
 		/*
 		 * if we got here, it's likely that si was almost full before,
 		 * and since scan_swap_map() can drop the si->lock, multiple
 		 * callers probably all tried to get a page from the same si
-		 * and it filled up before we could get one.  So we need to
-		 * try again.  Since we dropped the swap_lock, there may now
-		 * be non-full higher priority swap_infos, and this si may have
-		 * even been removed from the list (although very unlikely).
-		 * Let's start over.
+		 * and it filled up before we could get one; or, the si filled
+		 * up between us dropping swap_avail_lock and taking si->lock.
+		 * Since we dropped the swap_avail_lock, the swap_avail_head
+		 * list may have been modified; so if next is still in the
+		 * swap_avail_head list then try it, otherwise start over.
 		 */
-		tmp = &swap_list_head;
+		if (plist_node_empty(&next->avail_list))
+			goto start_over;
 	}
 
+	spin_unlock(&swap_avail_lock);
+
 	atomic_long_inc(&nr_swap_pages);
 noswap:
-	spin_unlock(&swap_lock);
 	return (swp_entry_t) {0};
 }
 
@@ -798,8 +822,18 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 		dec_cluster_info_page(p, p->cluster_info, offset);
 		if (offset < p->lowest_bit)
 			p->lowest_bit = offset;
-		if (offset > p->highest_bit)
+		if (offset > p->highest_bit) {
+			bool was_full = !p->highest_bit;
 			p->highest_bit = offset;
+			if (was_full && (p->flags & SWP_WRITEOK)) {
+				spin_lock(&swap_avail_lock);
+				WARN_ON(!plist_node_empty(&p->avail_list));
+				if (plist_node_empty(&p->avail_list))
+					plist_add(&p->avail_list,
+						  &swap_avail_head);
+				spin_unlock(&swap_avail_lock);
+			}
+		}
 		atomic_long_inc(&nr_swap_pages);
 		p->inuse_pages--;
 		frontswap_invalidate_page(p->type, offset);
@@ -1734,12 +1768,16 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 				unsigned char *swap_map,
 				struct swap_cluster_info *cluster_info)
 {
-	struct swap_info_struct *si;
-
 	if (prio >= 0)
 		p->prio = prio;
 	else
 		p->prio = --least_priority;
+	/*
+	 * the plist prio is negated because plist ordering is
+	 * low-to-high, while swap ordering is high-to-low
+	 */
+	p->list.prio = -p->prio;
+	p->avail_list.prio = -p->prio;
 	p->swap_map = swap_map;
 	p->cluster_info = cluster_info;
 	p->flags |= SWP_WRITEOK;
@@ -1747,27 +1785,20 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 	total_swap_pages += p->pages;
 
 	assert_spin_locked(&swap_lock);
-	BUG_ON(!list_empty(&p->list));
-	/*
-	 * insert into swap list; the list is in priority order,
-	 * so that get_swap_page() can get a page from the highest
-	 * priority swap_info_struct with available page(s), and
-	 * swapoff can adjust the auto-assigned (i.e. negative) prio
-	 * values for any lower-priority swap_info_structs when
-	 * removing a negative-prio swap_info_struct
-	 */
-	list_for_each_entry(si, &swap_list_head, list) {
-		if (p->prio >= si->prio) {
-			list_add_tail(&p->list, &si->list);
-			return;
-		}
-	}
 	/*
-	 * this covers two cases:
-	 * 1) p->prio is less than all existing prio
-	 * 2) the swap list is empty
+	 * both lists are plists, and thus priority ordered.
+	 * swap_active_head needs to be priority ordered for swapoff(),
+	 * which on removal of any swap_info_struct with an auto-assigned
+	 * (i.e. negative) priority increments the auto-assigned priority
+	 * of any lower-priority swap_info_structs.
+	 * swap_avail_head needs to be priority ordered for get_swap_page(),
+	 * which allocates swap pages from the highest available priority
+	 * swap_info_struct.
 	 */
-	list_add_tail(&p->list, &swap_list_head);
+	plist_add(&p->list, &swap_active_head);
+	spin_lock(&swap_avail_lock);
+	plist_add(&p->avail_list, &swap_avail_head);
+	spin_unlock(&swap_avail_lock);
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
@@ -1821,7 +1852,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 
 	mapping = victim->f_mapping;
 	spin_lock(&swap_lock);
-	list_for_each_entry(p, &swap_list_head, list) {
+	plist_for_each_entry(p, &swap_active_head, list) {
 		if (p->flags & SWP_WRITEOK) {
 			if (p->swap_file->f_mapping == mapping) {
 				found = 1;
@@ -1841,16 +1872,21 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		spin_unlock(&swap_lock);
 		goto out_dput;
 	}
+	spin_lock(&swap_avail_lock);
+	plist_del(&p->avail_list, &swap_avail_head);
+	spin_unlock(&swap_avail_lock);
 	spin_lock(&p->lock);
 	if (p->prio < 0) {
 		struct swap_info_struct *si = p;
 
-		list_for_each_entry_continue(si, &swap_list_head, list) {
+		plist_for_each_entry_continue(si, &swap_active_head, list) {
 			si->prio++;
+			si->list.prio--;
+			si->avail_list.prio--;
 		}
 		least_priority++;
 	}
-	list_del_init(&p->list);
+	plist_del(&p->list, &swap_active_head);
 	atomic_long_sub(p->pages, &nr_swap_pages);
 	total_swap_pages -= p->pages;
 	p->flags &= ~SWP_WRITEOK;
@@ -2115,7 +2151,8 @@ static struct swap_info_struct *alloc_swap_info(void)
 		 */
 	}
 	INIT_LIST_HEAD(&p->first_swap_extent.list);
-	INIT_LIST_HEAD(&p->list);
+	plist_node_init(&p->list, 0);
+	plist_node_init(&p->avail_list, 0);
 	p->flags = SWP_USED;
 	spin_unlock(&swap_lock);
 	spin_lock_init(&p->lock);
-- 
cgit 


From 776ed0f0377914d1e65fed903c052e9eef3f4cc3 Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 4 Jun 2014 16:10:02 -0700
Subject: memcg: cleanup kmem cache creation/destruction functions naming

Current names are rather inconsistent. Let's try to improve them.

Brief change log:

** old name **                          ** new name **

kmem_cache_create_memcg                 memcg_create_kmem_cache
memcg_kmem_create_cache                 memcg_regsiter_cache
memcg_kmem_destroy_cache                memcg_unregister_cache

kmem_cache_destroy_memcg_children       memcg_cleanup_cache_params
mem_cgroup_destroy_all_caches           memcg_unregister_all_caches

create_work                             memcg_register_cache_work
memcg_create_cache_work_func            memcg_register_cache_func
memcg_create_cache_enqueue              memcg_schedule_register_cache

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  2 +-
 include/linux/slab.h       |  2 +-
 mm/memcontrol.c            | 60 ++++++++++++++++++++++------------------------
 mm/slab_common.c           | 12 +++++-----
 4 files changed, 36 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index dfc2929a3877..eb65d29516ca 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -505,7 +505,7 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
 
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
+int __memcg_cleanup_cache_params(struct kmem_cache *s);
 
 /**
  * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 86e5b26fbdab..1d9abb7d22a0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -116,7 +116,7 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t,
 			unsigned long,
 			void (*)(void *));
 #ifdef CONFIG_MEMCG_KMEM
-struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *,
+struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *,
 					   struct kmem_cache *,
 					   const char *);
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 5e2bfcc96da9..d176edb1d5e8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3132,8 +3132,8 @@ void memcg_free_cache_params(struct kmem_cache *s)
 	kfree(s->memcg_params);
 }
 
-static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
-				    struct kmem_cache *root_cache)
+static void memcg_register_cache(struct mem_cgroup *memcg,
+				 struct kmem_cache *root_cache)
 {
 	static char memcg_name_buf[NAME_MAX + 1]; /* protected by
 						     memcg_slab_mutex */
@@ -3153,7 +3153,7 @@ static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
 		return;
 
 	cgroup_name(memcg->css.cgroup, memcg_name_buf, NAME_MAX + 1);
-	cachep = kmem_cache_create_memcg(memcg, root_cache, memcg_name_buf);
+	cachep = memcg_create_kmem_cache(memcg, root_cache, memcg_name_buf);
 	/*
 	 * If we could not create a memcg cache, do not complain, because
 	 * that's not critical at all as we can always proceed with the root
@@ -3175,7 +3175,7 @@ static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
 	root_cache->memcg_params->memcg_caches[id] = cachep;
 }
 
-static void memcg_kmem_destroy_cache(struct kmem_cache *cachep)
+static void memcg_unregister_cache(struct kmem_cache *cachep)
 {
 	struct kmem_cache *root_cache;
 	struct mem_cgroup *memcg;
@@ -3228,7 +3228,7 @@ static inline void memcg_resume_kmem_account(void)
 	current->memcg_kmem_skip_account--;
 }
 
-int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+int __memcg_cleanup_cache_params(struct kmem_cache *s)
 {
 	struct kmem_cache *c;
 	int i, failed = 0;
@@ -3239,7 +3239,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 		if (!c)
 			continue;
 
-		memcg_kmem_destroy_cache(c);
+		memcg_unregister_cache(c);
 
 		if (cache_from_memcg_idx(s, i))
 			failed++;
@@ -3248,7 +3248,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 	return failed;
 }
 
-static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
 {
 	struct kmem_cache *cachep;
 	struct memcg_cache_params *params, *tmp;
@@ -3261,25 +3261,26 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
 		cachep = memcg_params_to_cache(params);
 		kmem_cache_shrink(cachep);
 		if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-			memcg_kmem_destroy_cache(cachep);
+			memcg_unregister_cache(cachep);
 	}
 	mutex_unlock(&memcg_slab_mutex);
 }
 
-struct create_work {
+struct memcg_register_cache_work {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *cachep;
 	struct work_struct work;
 };
 
-static void memcg_create_cache_work_func(struct work_struct *w)
+static void memcg_register_cache_func(struct work_struct *w)
 {
-	struct create_work *cw = container_of(w, struct create_work, work);
+	struct memcg_register_cache_work *cw =
+		container_of(w, struct memcg_register_cache_work, work);
 	struct mem_cgroup *memcg = cw->memcg;
 	struct kmem_cache *cachep = cw->cachep;
 
 	mutex_lock(&memcg_slab_mutex);
-	memcg_kmem_create_cache(memcg, cachep);
+	memcg_register_cache(memcg, cachep);
 	mutex_unlock(&memcg_slab_mutex);
 
 	css_put(&memcg->css);
@@ -3289,12 +3290,12 @@ static void memcg_create_cache_work_func(struct work_struct *w)
 /*
  * Enqueue the creation of a per-memcg kmem_cache.
  */
-static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-					 struct kmem_cache *cachep)
+static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
+					    struct kmem_cache *cachep)
 {
-	struct create_work *cw;
+	struct memcg_register_cache_work *cw;
 
-	cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+	cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
 	if (cw == NULL) {
 		css_put(&memcg->css);
 		return;
@@ -3303,17 +3304,17 @@ static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
 	cw->memcg = memcg;
 	cw->cachep = cachep;
 
-	INIT_WORK(&cw->work, memcg_create_cache_work_func);
+	INIT_WORK(&cw->work, memcg_register_cache_func);
 	schedule_work(&cw->work);
 }
 
-static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-				       struct kmem_cache *cachep)
+static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
+					  struct kmem_cache *cachep)
 {
 	/*
 	 * We need to stop accounting when we kmalloc, because if the
 	 * corresponding kmalloc cache is not yet created, the first allocation
-	 * in __memcg_create_cache_enqueue will recurse.
+	 * in __memcg_schedule_register_cache will recurse.
 	 *
 	 * However, it is better to enclose the whole function. Depending on
 	 * the debugging options enabled, INIT_WORK(), for instance, can
@@ -3322,7 +3323,7 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
 	 * the safest choice is to do it like this, wrapping the whole function.
 	 */
 	memcg_stop_kmem_account();
-	__memcg_create_cache_enqueue(memcg, cachep);
+	__memcg_schedule_register_cache(memcg, cachep);
 	memcg_resume_kmem_account();
 }
 
@@ -3393,16 +3394,11 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
 	 *
 	 * However, there are some clashes that can arrive from locking.
 	 * For instance, because we acquire the slab_mutex while doing
-	 * kmem_cache_dup, this means no further allocation could happen
-	 * with the slab_mutex held.
-	 *
-	 * Also, because cache creation issue get_online_cpus(), this
-	 * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
-	 * that ends up reversed during cpu hotplug. (cpuset allocates
-	 * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
-	 * better to defer everything.
+	 * memcg_create_kmem_cache, this means no further allocation
+	 * could happen with the slab_mutex held. So it's better to
+	 * defer everything.
 	 */
-	memcg_create_cache_enqueue(memcg, cachep);
+	memcg_schedule_register_cache(memcg, cachep);
 	return cachep;
 out:
 	rcu_read_unlock();
@@ -3526,7 +3522,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
 }
 #else
-static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
 {
 }
 #endif /* CONFIG_MEMCG_KMEM */
@@ -6372,7 +6368,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 	css_for_each_descendant_post(iter, css)
 		mem_cgroup_reparent_charges(mem_cgroup_from_css(iter));
 
-	mem_cgroup_destroy_all_caches(memcg);
+	memcg_unregister_all_caches(memcg);
 	vmpressure_cleanup(&memcg->vmpressure);
 }
 
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 32175617cb75..48fafb61f35e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -261,7 +261,7 @@ EXPORT_SYMBOL(kmem_cache_create);
 
 #ifdef CONFIG_MEMCG_KMEM
 /*
- * kmem_cache_create_memcg - Create a cache for a memory cgroup.
+ * memcg_create_kmem_cache - Create a cache for a memory cgroup.
  * @memcg: The memory cgroup the new cache is for.
  * @root_cache: The parent of the new cache.
  * @memcg_name: The name of the memory cgroup (used for naming the new cache).
@@ -270,7 +270,7 @@ EXPORT_SYMBOL(kmem_cache_create);
  * requests going from @memcg to @root_cache. The new cache inherits properties
  * from its parent.
  */
-struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
+struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 					   struct kmem_cache *root_cache,
 					   const char *memcg_name)
 {
@@ -305,7 +305,7 @@ out_unlock:
 	return s;
 }
 
-static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+static int memcg_cleanup_cache_params(struct kmem_cache *s)
 {
 	int rc;
 
@@ -314,13 +314,13 @@ static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 		return 0;
 
 	mutex_unlock(&slab_mutex);
-	rc = __kmem_cache_destroy_memcg_children(s);
+	rc = __memcg_cleanup_cache_params(s);
 	mutex_lock(&slab_mutex);
 
 	return rc;
 }
 #else
-static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
+static int memcg_cleanup_cache_params(struct kmem_cache *s)
 {
 	return 0;
 }
@@ -343,7 +343,7 @@ void kmem_cache_destroy(struct kmem_cache *s)
 	if (s->refcount)
 		goto out_unlock;
 
-	if (kmem_cache_destroy_memcg_children(s) != 0)
+	if (memcg_cleanup_cache_params(s) != 0)
 		goto out_unlock;
 
 	list_del(&s->list);
-- 
cgit 


From ea5e9539abf1258f23e725cb9cb25aa74efa29eb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:07 -0700
Subject: include/linux/jump_label.h: expose the reference count

This patch exposes the jump_label reference count in preparation for the
next patch.  cpusets cares about both the jump_label being enabled and how
many users of the cpusets there currently are.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jump_label.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 5c1dfb2a9e73..784304b222b3 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -69,6 +69,10 @@ struct static_key {
 
 # include <asm/jump_label.h>
 # define HAVE_JUMP_LABEL
+#else
+struct static_key {
+	atomic_t enabled;
+};
 #endif	/* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
 
 enum jump_label_type {
@@ -79,6 +83,12 @@ enum jump_label_type {
 struct module;
 
 #include <linux/atomic.h>
+
+static inline int static_key_count(struct static_key *key)
+{
+	return atomic_read(&key->enabled);
+}
+
 #ifdef HAVE_JUMP_LABEL
 
 #define JUMP_LABEL_TYPE_FALSE_BRANCH	0UL
@@ -134,10 +144,6 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else  /* !HAVE_JUMP_LABEL */
 
-struct static_key {
-	atomic_t enabled;
-};
-
 static __always_inline void jump_label_init(void)
 {
 	static_key_initialized = true;
@@ -145,14 +151,14 @@ static __always_inline void jump_label_init(void)
 
 static __always_inline bool static_key_false(struct static_key *key)
 {
-	if (unlikely(atomic_read(&key->enabled) > 0))
+	if (unlikely(static_key_count(key) > 0))
 		return true;
 	return false;
 }
 
 static __always_inline bool static_key_true(struct static_key *key)
 {
-	if (likely(atomic_read(&key->enabled) > 0))
+	if (likely(static_key_count(key) > 0))
 		return true;
 	return false;
 }
@@ -194,7 +200,7 @@ static inline int jump_label_apply_nops(struct module *mod)
 
 static inline bool static_key_enabled(struct static_key *key)
 {
-	return (atomic_read(&key->enabled) > 0);
+	return static_key_count(key) > 0;
 }
 
 #endif	/* _LINUX_JUMP_LABEL_H */
-- 
cgit 


From 664eeddeef6539247691197c1ac124d4aa872ab6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:08 -0700
Subject: mm: page_alloc: use jump labels to avoid checking number_of_cpusets

If cpusets are not in use then we still check a global variable on every
page allocation.  Use jump labels to avoid the overhead.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 29 ++++++++++++++++++++++++++---
 kernel/cpuset.c        | 14 ++++----------
 mm/page_alloc.c        |  3 ++-
 3 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index b19d3dc2e651..ade2390ffe92 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -12,10 +12,31 @@
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/mm.h>
+#include <linux/jump_label.h>
 
 #ifdef CONFIG_CPUSETS
 
-extern int number_of_cpusets;	/* How many cpusets are defined in system? */
+extern struct static_key cpusets_enabled_key;
+static inline bool cpusets_enabled(void)
+{
+	return static_key_false(&cpusets_enabled_key);
+}
+
+static inline int nr_cpusets(void)
+{
+	/* jump label reference count + the top-level cpuset */
+	return static_key_count(&cpusets_enabled_key) + 1;
+}
+
+static inline void cpuset_inc(void)
+{
+	static_key_slow_inc(&cpusets_enabled_key);
+}
+
+static inline void cpuset_dec(void)
+{
+	static_key_slow_dec(&cpusets_enabled_key);
+}
 
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
@@ -32,13 +53,13 @@ extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
 
 static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
 {
-	return number_of_cpusets <= 1 ||
+	return nr_cpusets() <= 1 ||
 		__cpuset_node_allowed_softwall(node, gfp_mask);
 }
 
 static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
 {
-	return number_of_cpusets <= 1 ||
+	return nr_cpusets() <= 1 ||
 		__cpuset_node_allowed_hardwall(node, gfp_mask);
 }
 
@@ -124,6 +145,8 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 
 #else /* !CONFIG_CPUSETS */
 
+static inline bool cpusets_enabled(void) { return false; }
+
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 3d54c418bd06..130017843899 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -61,12 +61,7 @@
 #include <linux/cgroup.h>
 #include <linux/wait.h>
 
-/*
- * Tracks how many cpusets are currently defined in system.
- * When there is only one cpuset (the root cpuset) we can
- * short circuit some hooks.
- */
-int number_of_cpusets __read_mostly;
+struct static_key cpusets_enabled_key __read_mostly = STATIC_KEY_INIT_FALSE;
 
 /* See "Frequency meter" comments, below. */
 
@@ -611,7 +606,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
 		goto done;
 	}
 
-	csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
+	csa = kmalloc(nr_cpusets() * sizeof(cp), GFP_KERNEL);
 	if (!csa)
 		goto done;
 	csn = 0;
@@ -1888,7 +1883,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 	if (is_spread_slab(parent))
 		set_bit(CS_SPREAD_SLAB, &cs->flags);
 
-	number_of_cpusets++;
+	cpuset_inc();
 
 	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
 		goto out_unlock;
@@ -1939,7 +1934,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
 	if (is_sched_load_balance(cs))
 		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
 
-	number_of_cpusets--;
+	cpuset_dec();
 	clear_bit(CS_ONLINE, &cs->flags);
 
 	mutex_unlock(&cpuset_mutex);
@@ -1992,7 +1987,6 @@ int __init cpuset_init(void)
 	if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
 		BUG();
 
-	number_of_cpusets = 1;
 	return 0;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b4381eaee715..a2955e101715 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1921,7 +1921,8 @@ zonelist_scan:
 		if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
 			!zlc_zone_worth_trying(zonelist, z, allowednodes))
 				continue;
-		if ((alloc_flags & ALLOC_CPUSET) &&
+		if (cpusets_enabled() &&
+			(alloc_flags & ALLOC_CPUSET) &&
 			!cpuset_zone_allowed_softwall(zone, gfp_mask))
 				continue;
 		BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
-- 
cgit 


From e58469bafd0524e848c3733bc3918d854595e20f Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:16 -0700
Subject: mm: page_alloc: use word-based accesses for get/set pageblock bitmaps

The test_bit operations in get/set pageblock flags are expensive.  This
patch reads the bitmap on a word basis and use shifts and masks to isolate
the bits of interest.  Similarly masks are used to set a local copy of the
bitmap and then use cmpxchg to update the bitmap if there have been no
other changes made in parallel.

In a test running dd onto tmpfs the overhead of the pageblock-related
functions went from 1.27% in profiles to 0.5%.

In addition to the performance benefits, this patch closes races that are
possible between:

a) get_ and set_pageblock_migratetype(), where get_pageblock_migratetype()
   reads part of the bits before and other part of the bits after
   set_pageblock_migratetype() has updated them.

b) set_pageblock_migratetype() and set_pageblock_skip(), where the non-atomic
   read-modify-update set bit operation in set_pageblock_skip() will cause
   lost updates to some bits changed in the set_pageblock_migratetype().

Joonsoo Kim first reported the case a) via code inspection.  Vlastimil
Babka's testing with a debug patch showed that either a) or b) occurs
roughly once per mmtests' stress-highalloc benchmark (although not
necessarily in the same pageblock).  Furthermore during development of
unrelated compaction patches, it was observed that frequent calls to
{start,undo}_isolate_page_range() the race occurs several thousands of
times and has resulted in NULL pointer dereferences in move_freepages()
and free_one_page() in places where free_list[migratetype] is
manipulated by e.g.  list_move().  Further debugging confirmed that
migratetype had invalid value of 6, causing out of bounds access to the
free_list array.

That confirmed that the race exist, although it may be extremely rare,
and currently only fatal where page isolation is performed due to
memory hot remove.  Races on pageblocks being updated by
set_pageblock_migratetype(), where both old and new migratetype are
lower MIGRATE_RESERVE, currently cannot result in an invalid value
being observed, although theoretically they may still lead to
unexpected creation or destruction of MIGRATE_RESERVE pageblocks.
Furthermore, things could get suddenly worse when memory isolation is
used more, or when new migratetypes are added.

After this patch, the race has no longer been observed in testing.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Reported-and-tested-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h          |  6 ++++-
 include/linux/pageblock-flags.h | 37 ++++++++++++++++++++++++-----
 mm/page_alloc.c                 | 52 +++++++++++++++++++++++++----------------
 3 files changed, 68 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 10a96ee68311..8ef1e3f71e0f 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -75,9 +75,13 @@ enum {
 
 extern int page_group_by_mobility_disabled;
 
+#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
+#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
+
 static inline int get_pageblock_migratetype(struct page *page)
 {
-	return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
+	BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
+	return get_pageblock_flags_mask(page, PB_migrate_end, MIGRATETYPE_MASK);
 }
 
 struct free_area {
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 2ee8cd2466b5..c08730c10c7a 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -30,9 +30,12 @@ enum pageblock_bits {
 	PB_migrate,
 	PB_migrate_end = PB_migrate + 3 - 1,
 			/* 3 bits required for migrate types */
-#ifdef CONFIG_COMPACTION
 	PB_migrate_skip,/* If set the block is skipped by compaction */
-#endif /* CONFIG_COMPACTION */
+
+	/*
+	 * Assume the bits will always align on a word. If this assumption
+	 * changes then get/set pageblock needs updating.
+	 */
 	NR_PAGEBLOCK_BITS
 };
 
@@ -62,11 +65,33 @@ extern int pageblock_order;
 /* Forward declaration */
 struct page;
 
+unsigned long get_pageblock_flags_mask(struct page *page,
+				unsigned long end_bitidx,
+				unsigned long mask);
+void set_pageblock_flags_mask(struct page *page,
+				unsigned long flags,
+				unsigned long end_bitidx,
+				unsigned long mask);
+
 /* Declarations for getting and setting flags. See mm/page_alloc.c */
-unsigned long get_pageblock_flags_group(struct page *page,
-					int start_bitidx, int end_bitidx);
-void set_pageblock_flags_group(struct page *page, unsigned long flags,
-					int start_bitidx, int end_bitidx);
+static inline unsigned long get_pageblock_flags_group(struct page *page,
+					int start_bitidx, int end_bitidx)
+{
+	unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
+	unsigned long mask = (1 << nr_flag_bits) - 1;
+
+	return get_pageblock_flags_mask(page, end_bitidx, mask);
+}
+
+static inline void set_pageblock_flags_group(struct page *page,
+					unsigned long flags,
+					int start_bitidx, int end_bitidx)
+{
+	unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
+	unsigned long mask = (1 << nr_flag_bits) - 1;
+
+	set_pageblock_flags_mask(page, flags, end_bitidx, mask);
+}
 
 #ifdef CONFIG_COMPACTION
 #define get_pageblock_skip(page) \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 485932c577e7..6e937809c87a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6028,53 +6028,65 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
  * @end_bitidx: The last bit of interest
  * returns pageblock_bits flags
  */
-unsigned long get_pageblock_flags_group(struct page *page,
-					int start_bitidx, int end_bitidx)
+unsigned long get_pageblock_flags_mask(struct page *page,
+					unsigned long end_bitidx,
+					unsigned long mask)
 {
 	struct zone *zone;
 	unsigned long *bitmap;
-	unsigned long pfn, bitidx;
-	unsigned long flags = 0;
-	unsigned long value = 1;
+	unsigned long pfn, bitidx, word_bitidx;
+	unsigned long word;
 
 	zone = page_zone(page);
 	pfn = page_to_pfn(page);
 	bitmap = get_pageblock_bitmap(zone, pfn);
 	bitidx = pfn_to_bitidx(zone, pfn);
+	word_bitidx = bitidx / BITS_PER_LONG;
+	bitidx &= (BITS_PER_LONG-1);
 
-	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
-		if (test_bit(bitidx + start_bitidx, bitmap))
-			flags |= value;
-
-	return flags;
+	word = bitmap[word_bitidx];
+	bitidx += end_bitidx;
+	return (word >> (BITS_PER_LONG - bitidx - 1)) & mask;
 }
 
 /**
- * set_pageblock_flags_group - Set the requested group of flags for a pageblock_nr_pages block of pages
+ * set_pageblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
  * @page: The page within the block of interest
  * @start_bitidx: The first bit of interest
  * @end_bitidx: The last bit of interest
  * @flags: The flags to set
  */
-void set_pageblock_flags_group(struct page *page, unsigned long flags,
-					int start_bitidx, int end_bitidx)
+void set_pageblock_flags_mask(struct page *page, unsigned long flags,
+					unsigned long end_bitidx,
+					unsigned long mask)
 {
 	struct zone *zone;
 	unsigned long *bitmap;
-	unsigned long pfn, bitidx;
-	unsigned long value = 1;
+	unsigned long pfn, bitidx, word_bitidx;
+	unsigned long old_word, word;
+
+	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 
 	zone = page_zone(page);
 	pfn = page_to_pfn(page);
 	bitmap = get_pageblock_bitmap(zone, pfn);
 	bitidx = pfn_to_bitidx(zone, pfn);
+	word_bitidx = bitidx / BITS_PER_LONG;
+	bitidx &= (BITS_PER_LONG-1);
+
 	VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page);
 
-	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
-		if (flags & value)
-			__set_bit(bitidx + start_bitidx, bitmap);
-		else
-			__clear_bit(bitidx + start_bitidx, bitmap);
+	bitidx += end_bitidx;
+	mask <<= (BITS_PER_LONG - bitidx - 1);
+	flags <<= (BITS_PER_LONG - bitidx - 1);
+
+	word = ACCESS_ONCE(bitmap[word_bitidx]);
+	for (;;) {
+		old_word = cmpxchg(&bitmap[word_bitidx], word, (word & ~mask) | flags);
+		if (word == old_word)
+			break;
+		word = old_word;
+	}
 }
 
 /*
-- 
cgit 


From dc4b0caff24d9b2918e9f27bc65499ee63187eba Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:17 -0700
Subject: mm: page_alloc: reduce number of times page_to_pfn is called

In the free path we calculate page_to_pfn multiple times. Reduce that.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h          |  9 +++++++--
 include/linux/pageblock-flags.h | 33 +++++++++++++--------------------
 mm/page_alloc.c                 | 34 +++++++++++++++++++---------------
 3 files changed, 39 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8ef1e3f71e0f..472426ac96ae 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -78,10 +78,15 @@ extern int page_group_by_mobility_disabled;
 #define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1)
 #define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1)
 
-static inline int get_pageblock_migratetype(struct page *page)
+#define get_pageblock_migratetype(page)					\
+	get_pfnblock_flags_mask(page, page_to_pfn(page),		\
+			PB_migrate_end, MIGRATETYPE_MASK)
+
+static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn)
 {
 	BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2);
-	return get_pageblock_flags_mask(page, PB_migrate_end, MIGRATETYPE_MASK);
+	return get_pfnblock_flags_mask(page, pfn, PB_migrate_end,
+					MIGRATETYPE_MASK);
 }
 
 struct free_area {
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index c08730c10c7a..2baeee12f48e 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -65,33 +65,26 @@ extern int pageblock_order;
 /* Forward declaration */
 struct page;
 
-unsigned long get_pageblock_flags_mask(struct page *page,
+unsigned long get_pfnblock_flags_mask(struct page *page,
+				unsigned long pfn,
 				unsigned long end_bitidx,
 				unsigned long mask);
-void set_pageblock_flags_mask(struct page *page,
+
+void set_pfnblock_flags_mask(struct page *page,
 				unsigned long flags,
+				unsigned long pfn,
 				unsigned long end_bitidx,
 				unsigned long mask);
 
 /* Declarations for getting and setting flags. See mm/page_alloc.c */
-static inline unsigned long get_pageblock_flags_group(struct page *page,
-					int start_bitidx, int end_bitidx)
-{
-	unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
-	unsigned long mask = (1 << nr_flag_bits) - 1;
-
-	return get_pageblock_flags_mask(page, end_bitidx, mask);
-}
-
-static inline void set_pageblock_flags_group(struct page *page,
-					unsigned long flags,
-					int start_bitidx, int end_bitidx)
-{
-	unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
-	unsigned long mask = (1 << nr_flag_bits) - 1;
-
-	set_pageblock_flags_mask(page, flags, end_bitidx, mask);
-}
+#define get_pageblock_flags_group(page, start_bitidx, end_bitidx) \
+	get_pfnblock_flags_mask(page, page_to_pfn(page),		\
+			end_bitidx,					\
+			(1 << (end_bitidx - start_bitidx + 1)) - 1)
+#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \
+	set_pfnblock_flags_mask(page, flags, page_to_pfn(page),		\
+			end_bitidx,					\
+			(1 << (end_bitidx - start_bitidx + 1)) - 1)
 
 #ifdef CONFIG_COMPACTION
 #define get_pageblock_skip(page) \
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e937809c87a..6cadc8678e28 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -560,6 +560,7 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
  */
 
 static inline void __free_one_page(struct page *page,
+		unsigned long pfn,
 		struct zone *zone, unsigned int order,
 		int migratetype)
 {
@@ -576,7 +577,7 @@ static inline void __free_one_page(struct page *page,
 
 	VM_BUG_ON(migratetype == -1);
 
-	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+	page_idx = pfn & ((1 << MAX_ORDER) - 1);
 
 	VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
 	VM_BUG_ON_PAGE(bad_range(zone, page), page);
@@ -711,7 +712,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 			list_del(&page->lru);
 			mt = get_freepage_migratetype(page);
 			/* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
-			__free_one_page(page, zone, 0, mt);
+			__free_one_page(page, page_to_pfn(page), zone, 0, mt);
 			trace_mm_page_pcpu_drain(page, 0, mt);
 			if (likely(!is_migrate_isolate_page(page))) {
 				__mod_zone_page_state(zone, NR_FREE_PAGES, 1);
@@ -723,13 +724,15 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	spin_unlock(&zone->lock);
 }
 
-static void free_one_page(struct zone *zone, struct page *page, int order,
+static void free_one_page(struct zone *zone,
+				struct page *page, unsigned long pfn,
+				int order,
 				int migratetype)
 {
 	spin_lock(&zone->lock);
 	zone->pages_scanned = 0;
 
-	__free_one_page(page, zone, order, migratetype);
+	__free_one_page(page, pfn, zone, order, migratetype);
 	if (unlikely(!is_migrate_isolate(migratetype)))
 		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	spin_unlock(&zone->lock);
@@ -766,15 +769,16 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 {
 	unsigned long flags;
 	int migratetype;
+	unsigned long pfn = page_to_pfn(page);
 
 	if (!free_pages_prepare(page, order))
 		return;
 
 	local_irq_save(flags);
 	__count_vm_events(PGFREE, 1 << order);
-	migratetype = get_pageblock_migratetype(page);
+	migratetype = get_pfnblock_migratetype(page, pfn);
 	set_freepage_migratetype(page, migratetype);
-	free_one_page(page_zone(page), page, order, migratetype);
+	free_one_page(page_zone(page), page, pfn, order, migratetype);
 	local_irq_restore(flags);
 }
 
@@ -1380,12 +1384,13 @@ void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
+	unsigned long pfn = page_to_pfn(page);
 	int migratetype;
 
 	if (!free_pages_prepare(page, 0))
 		return;
 
-	migratetype = get_pageblock_migratetype(page);
+	migratetype = get_pfnblock_migratetype(page, pfn);
 	set_freepage_migratetype(page, migratetype);
 	local_irq_save(flags);
 	__count_vm_event(PGFREE);
@@ -1399,7 +1404,7 @@ void free_hot_cold_page(struct page *page, int cold)
 	 */
 	if (migratetype >= MIGRATE_PCPTYPES) {
 		if (unlikely(is_migrate_isolate(migratetype))) {
-			free_one_page(zone, page, 0, migratetype);
+			free_one_page(zone, page, pfn, 0, migratetype);
 			goto out;
 		}
 		migratetype = MIGRATE_MOVABLE;
@@ -6028,17 +6033,16 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
  * @end_bitidx: The last bit of interest
  * returns pageblock_bits flags
  */
-unsigned long get_pageblock_flags_mask(struct page *page,
+unsigned long get_pfnblock_flags_mask(struct page *page, unsigned long pfn,
 					unsigned long end_bitidx,
 					unsigned long mask)
 {
 	struct zone *zone;
 	unsigned long *bitmap;
-	unsigned long pfn, bitidx, word_bitidx;
+	unsigned long bitidx, word_bitidx;
 	unsigned long word;
 
 	zone = page_zone(page);
-	pfn = page_to_pfn(page);
 	bitmap = get_pageblock_bitmap(zone, pfn);
 	bitidx = pfn_to_bitidx(zone, pfn);
 	word_bitidx = bitidx / BITS_PER_LONG;
@@ -6050,25 +6054,25 @@ unsigned long get_pageblock_flags_mask(struct page *page,
 }
 
 /**
- * set_pageblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
+ * set_pfnblock_flags_mask - Set the requested group of flags for a pageblock_nr_pages block of pages
  * @page: The page within the block of interest
  * @start_bitidx: The first bit of interest
  * @end_bitidx: The last bit of interest
  * @flags: The flags to set
  */
-void set_pageblock_flags_mask(struct page *page, unsigned long flags,
+void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
+					unsigned long pfn,
 					unsigned long end_bitidx,
 					unsigned long mask)
 {
 	struct zone *zone;
 	unsigned long *bitmap;
-	unsigned long pfn, bitidx, word_bitidx;
+	unsigned long bitidx, word_bitidx;
 	unsigned long old_word, word;
 
 	BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
 
 	zone = page_zone(page);
-	pfn = page_to_pfn(page);
 	bitmap = get_pageblock_bitmap(zone, pfn);
 	bitidx = pfn_to_bitidx(zone, pfn);
 	word_bitidx = bitidx / BITS_PER_LONG;
-- 
cgit 


From 7aeb09f9104b760fc53c98cb7d20d06640baf9e6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:21 -0700
Subject: mm: page_alloc: use unsigned int for order in more places

X86 prefers the use of unsigned types for iterators and there is a
tendency to mix whether a signed or unsigned type if used for page order.
This converts a number of sites in mm/page_alloc.c to use unsigned int for
order where possible.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  8 ++++----
 mm/page_alloc.c        | 43 +++++++++++++++++++++++--------------------
 2 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 472426ac96ae..6cbd1b6c3d20 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -817,10 +817,10 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat)
 extern struct mutex zonelists_mutex;
 void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);
 void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);
-bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		int classzone_idx, int alloc_flags);
-bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
-		int classzone_idx, int alloc_flags);
+bool zone_watermark_ok(struct zone *z, unsigned int order,
+		unsigned long mark, int classzone_idx, int alloc_flags);
+bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+		unsigned long mark, int classzone_idx, int alloc_flags);
 enum memmap_context {
 	MEMMAP_EARLY,
 	MEMMAP_HOTPLUG,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ce4d3716214c..37ef1b87f1f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -409,7 +409,8 @@ static int destroy_compound_page(struct page *page, unsigned long order)
 	return bad;
 }
 
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+static inline void prep_zero_page(struct page *page, unsigned int order,
+							gfp_t gfp_flags)
 {
 	int i;
 
@@ -453,7 +454,7 @@ static inline void set_page_guard_flag(struct page *page) { }
 static inline void clear_page_guard_flag(struct page *page) { }
 #endif
 
-static inline void set_page_order(struct page *page, int order)
+static inline void set_page_order(struct page *page, unsigned int order)
 {
 	set_page_private(page, order);
 	__SetPageBuddy(page);
@@ -504,7 +505,7 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
  * For recording page's order, we use page_private(page).
  */
 static inline int page_is_buddy(struct page *page, struct page *buddy,
-								int order)
+							unsigned int order)
 {
 	if (!pfn_valid_within(page_to_pfn(buddy)))
 		return 0;
@@ -726,7 +727,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 
 static void free_one_page(struct zone *zone,
 				struct page *page, unsigned long pfn,
-				int order,
+				unsigned int order,
 				int migratetype)
 {
 	spin_lock(&zone->lock);
@@ -897,7 +898,7 @@ static inline int check_new_page(struct page *page)
 	return 0;
 }
 
-static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
+static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
 {
 	int i;
 
@@ -1108,16 +1109,17 @@ static int try_to_steal_freepages(struct zone *zone, struct page *page,
 
 /* Remove an element from the buddy allocator from the fallback list */
 static inline struct page *
-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 {
 	struct free_area *area;
-	int current_order;
+	unsigned int current_order;
 	struct page *page;
 	int migratetype, new_type, i;
 
 	/* Find the largest possible block of pages in the other list */
-	for (current_order = MAX_ORDER-1; current_order >= order;
-						--current_order) {
+	for (current_order = MAX_ORDER-1;
+				current_order >= order && current_order <= MAX_ORDER-1;
+				--current_order) {
 		for (i = 0;; i++) {
 			migratetype = fallbacks[start_migratetype][i];
 
@@ -1345,7 +1347,7 @@ void mark_free_pages(struct zone *zone)
 {
 	unsigned long pfn, max_zone_pfn;
 	unsigned long flags;
-	int order, t;
+	unsigned int order, t;
 	struct list_head *curr;
 
 	if (zone_is_empty(zone))
@@ -1541,8 +1543,8 @@ int split_free_page(struct page *page)
  */
 static inline
 struct page *buffered_rmqueue(struct zone *preferred_zone,
-			struct zone *zone, int order, gfp_t gfp_flags,
-			int migratetype)
+			struct zone *zone, unsigned int order,
+			gfp_t gfp_flags, int migratetype)
 {
 	unsigned long flags;
 	struct page *page;
@@ -1691,8 +1693,9 @@ static inline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
  * Return true if free pages are above 'mark'. This takes into account the order
  * of the allocation.
  */
-static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int alloc_flags, long free_pages)
+static bool __zone_watermark_ok(struct zone *z, unsigned int order,
+			unsigned long mark, int classzone_idx, int alloc_flags,
+			long free_pages)
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
@@ -1726,15 +1729,15 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 	return true;
 }
 
-bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
 		      int classzone_idx, int alloc_flags)
 {
 	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
 					zone_page_state(z, NR_FREE_PAGES));
 }
 
-bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int alloc_flags)
+bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
+			unsigned long mark, int classzone_idx, int alloc_flags)
 {
 	long free_pages = zone_page_state(z, NR_FREE_PAGES);
 
@@ -4121,7 +4124,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 
 static void __meminit zone_init_free_lists(struct zone *zone)
 {
-	int order, t;
+	unsigned int order, t;
 	for_each_migratetype_order(order, t) {
 		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
 		zone->free_area[order].nr_free = 0;
@@ -6444,7 +6447,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 {
 	struct page *page;
 	struct zone *zone;
-	int order, i;
+	unsigned int order, i;
 	unsigned long pfn;
 	unsigned long flags;
 	/* find the first valid pfn */
@@ -6496,7 +6499,7 @@ bool is_free_buddy_page(struct page *page)
 	struct zone *zone = page_zone(page);
 	unsigned long pfn = page_to_pfn(page);
 	unsigned long flags;
-	int order;
+	unsigned int order;
 
 	spin_lock_irqsave(&zone->lock, flags);
 	for (order = 0; order < MAX_ORDER; order++) {
-- 
cgit 


From b745bc85f21ea707e4ea1a91948055fa3e72c77b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:22 -0700
Subject: mm: page_alloc: convert hot/cold parameter and immediate callers to
 bool

cold is a bool, make it one.  Make the likely case the "if" part of the
block instead of the else as according to the optimisation manual this is
preferred.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/mm/homecache.c |  2 +-
 fs/fuse/dev.c            |  2 +-
 include/linux/gfp.h      |  4 ++--
 include/linux/pagemap.h  |  2 +-
 include/linux/swap.h     |  2 +-
 mm/page_alloc.c          | 20 ++++++++++----------
 mm/swap.c                |  4 ++--
 mm/swap_state.c          |  2 +-
 mm/vmscan.c              |  6 +++---
 9 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c
index 004ba568d93f..33294fdc402e 100644
--- a/arch/tile/mm/homecache.c
+++ b/arch/tile/mm/homecache.c
@@ -417,7 +417,7 @@ void __homecache_free_pages(struct page *page, unsigned int order)
 	if (put_page_testzero(page)) {
 		homecache_change_page_home(page, order, PAGE_HOME_HASH);
 		if (order == 0) {
-			free_hot_cold_page(page, 0);
+			free_hot_cold_page(page, false);
 		} else {
 			init_page_count(page);
 			__free_pages(page, order);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index aac71ce373e4..098f97bdcf1b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1614,7 +1614,7 @@ out_finish:
 
 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	release_pages(req->pages, req->num_pages, 0);
+	release_pages(req->pages, req->num_pages, false);
 }
 
 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index d382db71e300..454c99fdb79d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -371,8 +371,8 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
 
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
-extern void free_hot_cold_page(struct page *page, int cold);
-extern void free_hot_cold_page_list(struct list_head *list, int cold);
+extern void free_hot_cold_page(struct page *page, bool cold);
+extern void free_hot_cold_page_list(struct list_head *list, bool cold);
 
 extern void __free_kmem_pages(struct page *page, unsigned int order);
 extern void free_kmem_pages(unsigned long addr, unsigned int order);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 718214c5584e..c16fb6d06e36 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -110,7 +110,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 
 #define page_cache_get(page)		get_page(page)
 #define page_cache_release(page)	put_page(page)
-void release_pages(struct page **pages, int nr, int cold);
+void release_pages(struct page **pages, int nr, bool cold);
 
 /*
  * speculatively take a reference to a page.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 9155bcdcce12..97cf16164c46 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -477,7 +477,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 #define free_page_and_swap_cache(page) \
 	page_cache_release(page)
 #define free_pages_and_swap_cache(pages, nr) \
-	release_pages((pages), (nr), 0);
+	release_pages((pages), (nr), false);
 
 static inline void show_swap_cache_info(void)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 37ef1b87f1f3..09345ab7fb63 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1199,7 +1199,7 @@ retry_reserve:
  */
 static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list,
-			int migratetype, int cold)
+			int migratetype, bool cold)
 {
 	int i;
 
@@ -1218,7 +1218,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 		 * merge IO requests if the physical pages are ordered
 		 * properly.
 		 */
-		if (likely(cold == 0))
+		if (likely(!cold))
 			list_add(&page->lru, list);
 		else
 			list_add_tail(&page->lru, list);
@@ -1379,9 +1379,9 @@ void mark_free_pages(struct zone *zone)
 
 /*
  * Free a 0-order page
- * cold == 1 ? free a cold page : free a hot page
+ * cold == true ? free a cold page : free a hot page
  */
-void free_hot_cold_page(struct page *page, int cold)
+void free_hot_cold_page(struct page *page, bool cold)
 {
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
@@ -1413,10 +1413,10 @@ void free_hot_cold_page(struct page *page, int cold)
 	}
 
 	pcp = &this_cpu_ptr(zone->pageset)->pcp;
-	if (cold)
-		list_add_tail(&page->lru, &pcp->lists[migratetype]);
-	else
+	if (!cold)
 		list_add(&page->lru, &pcp->lists[migratetype]);
+	else
+		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
 		unsigned long batch = ACCESS_ONCE(pcp->batch);
@@ -1431,7 +1431,7 @@ out:
 /*
  * Free a list of 0-order pages
  */
-void free_hot_cold_page_list(struct list_head *list, int cold)
+void free_hot_cold_page_list(struct list_head *list, bool cold)
 {
 	struct page *page, *next;
 
@@ -1548,7 +1548,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
 {
 	unsigned long flags;
 	struct page *page;
-	int cold = !!(gfp_flags & __GFP_COLD);
+	bool cold = ((gfp_flags & __GFP_COLD) != 0);
 
 again:
 	if (likely(order == 0)) {
@@ -2823,7 +2823,7 @@ void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
 		if (order == 0)
-			free_hot_cold_page(page, 0);
+			free_hot_cold_page(page, false);
 		else
 			__free_pages_ok(page, order);
 	}
diff --git a/mm/swap.c b/mm/swap.c
index c8d6df556ce6..11ebb9714f49 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -67,7 +67,7 @@ static void __page_cache_release(struct page *page)
 static void __put_single_page(struct page *page)
 {
 	__page_cache_release(page);
-	free_hot_cold_page(page, 0);
+	free_hot_cold_page(page, false);
 }
 
 static void __put_compound_page(struct page *page)
@@ -860,7 +860,7 @@ void lru_add_drain_all(void)
  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
  * will free it.
  */
-void release_pages(struct page **pages, int nr, int cold)
+void release_pages(struct page **pages, int nr, bool cold)
 {
 	int i;
 	LIST_HEAD(pages_to_free);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e76ace30d436..2972eee184a4 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -270,7 +270,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
 
 		for (i = 0; i < todo; i++)
 			free_swap_cache(pagep[i]);
-		release_pages(pagep, todo, 0);
+		release_pages(pagep, todo, false);
 		pagep += todo;
 		nr -= todo;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9253e188000f..494cd632178c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1121,7 +1121,7 @@ keep:
 		VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
 	}
 
-	free_hot_cold_page_list(&free_pages, 1);
+	free_hot_cold_page_list(&free_pages, true);
 
 	list_splice(&ret_pages, page_list);
 	count_vm_events(PGACTIVATE, pgactivate);
@@ -1532,7 +1532,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 
 	spin_unlock_irq(&zone->lru_lock);
 
-	free_hot_cold_page_list(&page_list, 1);
+	free_hot_cold_page_list(&page_list, true);
 
 	/*
 	 * If reclaim is isolating dirty pages under writeback, it implies
@@ -1755,7 +1755,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
-	free_hot_cold_page_list(&l_hold, 1);
+	free_hot_cold_page_list(&l_hold, true);
 }
 
 #ifdef CONFIG_SWAP
-- 
cgit 


From 07a427884348d38a6fd56fa4d78249c407196650 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:24 -0700
Subject: mm: shmem: avoid atomic operation during shmem_getpage_gfp

shmem_getpage_gfp uses an atomic operation to set the SwapBacked field
before it's even added to the LRU or visible.  This is unnecessary as what
could it possible race against?  Use an unlocked variant.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 1 +
 mm/shmem.c                 | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d1fe1a761047..4d4b39ab2341 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -208,6 +208,7 @@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
 PAGEFLAG(SavePinned, savepinned);			/* Xen */
 PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
+	__SETPAGEFLAG(SwapBacked, swapbacked)
 
 __PAGEFLAG(SlobFree, slob_free)
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 9f70e02111c6..f47fb38c4889 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1132,7 +1132,7 @@ repeat:
 			goto decused;
 		}
 
-		SetPageSwapBacked(page);
+		__SetPageSwapBacked(page);
 		__set_page_locked(page);
 		error = mem_cgroup_charge_file(page, current->mm,
 						gfp & GFP_RECLAIM_MASK);
-- 
cgit 


From 2457aec63745e235bcafb7ef312b182d8682f0fc Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 4 Jun 2014 16:10:31 -0700
Subject: mm: non-atomically mark page accessed during page cache allocation
 where possible

aops->write_begin may allocate a new page and make it visible only to have
mark_page_accessed called almost immediately after.  Once the page is
visible the atomic operations are necessary which is noticable overhead
when writing to an in-memory filesystem like tmpfs but should also be
noticable with fast storage.  The objective of the patch is to initialse
the accessed information with non-atomic operations before the page is
visible.

The bulk of filesystems directly or indirectly use
grab_cache_page_write_begin or find_or_create_page for the initial
allocation of a page cache page.  This patch adds an init_page_accessed()
helper which behaves like the first call to mark_page_accessed() but may
called before the page is visible and can be done non-atomically.

The primary APIs of concern in this care are the following and are used
by most filesystems.

	find_get_page
	find_lock_page
	find_or_create_page
	grab_cache_page_nowait
	grab_cache_page_write_begin

All of them are very similar in detail to the patch creates a core helper
pagecache_get_page() which takes a flags parameter that affects its
behavior such as whether the page should be marked accessed or not.  Then
old API is preserved but is basically a thin wrapper around this core
function.

Each of the filesystems are then updated to avoid calling
mark_page_accessed when it is known that the VM interfaces have already
done the job.  There is a slight snag in that the timing of the
mark_page_accessed() has now changed so in rare cases it's possible a page
gets to the end of the LRU as PageReferenced where as previously it might
have been repromoted.  This is expected to be rare but it's worth the
filesystem people thinking about it in case they see a problem with the
timing change.  It is also the case that some filesystems may be marking
pages accessed that previously did not but it makes sense that filesystems
have consistent behaviour in this regard.

The test case used to evaulate this is a simple dd of a large file done
multiple times with the file deleted on each iterations.  The size of the
file is 1/10th physical memory to avoid dirty page balancing.  In the
async case it will be possible that the workload completes without even
hitting the disk and will have variable results but highlight the impact
of mark_page_accessed for async IO.  The sync results are expected to be
more stable.  The exception is tmpfs where the normal case is for the "IO"
to not hit the disk.

The test machine was single socket and UMA to avoid any scheduling or NUMA
artifacts.  Throughput and wall times are presented for sync IO, only wall
times are shown for async as the granularity reported by dd and the
variability is unsuitable for comparison.  As async results were variable
do to writback timings, I'm only reporting the maximum figures.  The sync
results were stable enough to make the mean and stddev uninteresting.

The performance results are reported based on a run with no profiling.
Profile data is based on a separate run with oprofile running.

async dd
                                    3.15.0-rc3            3.15.0-rc3
                                       vanilla           accessed-v2
ext3    Max      elapsed     13.9900 (  0.00%)     11.5900 ( 17.16%)
tmpfs	Max      elapsed      0.5100 (  0.00%)      0.4900 (  3.92%)
btrfs   Max      elapsed     12.8100 (  0.00%)     12.7800 (  0.23%)
ext4	Max      elapsed     18.6000 (  0.00%)     13.3400 ( 28.28%)
xfs	Max      elapsed     12.5600 (  0.00%)      2.0900 ( 83.36%)

The XFS figure is a bit strange as it managed to avoid a worst case by
sheer luck but the average figures looked reasonable.

        samples percentage
ext3       86107    0.9783  vmlinux-3.15.0-rc4-vanilla        mark_page_accessed
ext3       23833    0.2710  vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed
ext3        5036    0.0573  vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed
ext4       64566    0.8961  vmlinux-3.15.0-rc4-vanilla        mark_page_accessed
ext4        5322    0.0713  vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed
ext4        2869    0.0384  vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed
xfs        62126    1.7675  vmlinux-3.15.0-rc4-vanilla        mark_page_accessed
xfs         1904    0.0554  vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed
xfs          103    0.0030  vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed
btrfs      10655    0.1338  vmlinux-3.15.0-rc4-vanilla        mark_page_accessed
btrfs       2020    0.0273  vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed
btrfs        587    0.0079  vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed
tmpfs      59562    3.2628  vmlinux-3.15.0-rc4-vanilla        mark_page_accessed
tmpfs       1210    0.0696  vmlinux-3.15.0-rc4-accessed-v3r25 init_page_accessed
tmpfs         94    0.0054  vmlinux-3.15.0-rc4-accessed-v3r25 mark_page_accessed

[akpm@linux-foundation.org: don't run init_page_accessed() against an uninitialised pointer]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Jan Kara <jack@suse.cz>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Tested-by: Prabhakar Lad <prabhakar.csengg@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/btrfs/extent_io.c       |  11 +--
 fs/btrfs/file.c            |   5 +-
 fs/buffer.c                |   7 +-
 fs/ext4/mballoc.c          |  14 ++--
 fs/f2fs/checkpoint.c       |   3 -
 fs/f2fs/node.c             |   2 -
 fs/fuse/file.c             |   2 -
 fs/gfs2/aops.c             |   1 -
 fs/gfs2/meta_io.c          |   4 +-
 fs/ntfs/attrib.c           |   1 -
 fs/ntfs/file.c             |   1 -
 include/linux/page-flags.h |   1 +
 include/linux/pagemap.h    | 107 ++++++++++++++++++++++--
 include/linux/swap.h       |   1 +
 mm/filemap.c               | 202 +++++++++++++++++----------------------------
 mm/shmem.c                 |   6 +-
 mm/swap.c                  |  11 +++
 17 files changed, 217 insertions(+), 162 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index f29a54e454d4..4cd0ac983f91 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4510,7 +4510,8 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
 	spin_unlock(&eb->refs_lock);
 }
 
-static void mark_extent_buffer_accessed(struct extent_buffer *eb)
+static void mark_extent_buffer_accessed(struct extent_buffer *eb,
+		struct page *accessed)
 {
 	unsigned long num_pages, i;
 
@@ -4519,7 +4520,8 @@ static void mark_extent_buffer_accessed(struct extent_buffer *eb)
 	num_pages = num_extent_pages(eb->start, eb->len);
 	for (i = 0; i < num_pages; i++) {
 		struct page *p = extent_buffer_page(eb, i);
-		mark_page_accessed(p);
+		if (p != accessed)
+			mark_page_accessed(p);
 	}
 }
 
@@ -4533,7 +4535,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 			       start >> PAGE_CACHE_SHIFT);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
-		mark_extent_buffer_accessed(eb);
+		mark_extent_buffer_accessed(eb, NULL);
 		return eb;
 	}
 	rcu_read_unlock();
@@ -4581,7 +4583,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 				spin_unlock(&mapping->private_lock);
 				unlock_page(p);
 				page_cache_release(p);
-				mark_extent_buffer_accessed(exists);
+				mark_extent_buffer_accessed(exists, p);
 				goto free_eb;
 			}
 
@@ -4596,7 +4598,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 		attach_extent_buffer_page(eb, p);
 		spin_unlock(&mapping->private_lock);
 		WARN_ON(PageDirty(p));
-		mark_page_accessed(p);
 		eb->pages[i] = p;
 		if (!PageUptodate(p))
 			uptodate = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ae6af072b635..74272a3f9d9b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -470,11 +470,12 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
 	for (i = 0; i < num_pages; i++) {
 		/* page checked is some magic around finding pages that
 		 * have been modified without going through btrfs_set_page_dirty
-		 * clear it here
+		 * clear it here. There should be no need to mark the pages
+		 * accessed as prepare_pages should have marked them accessed
+		 * in prepare_pages via find_or_create_page()
 		 */
 		ClearPageChecked(pages[i]);
 		unlock_page(pages[i]);
-		mark_page_accessed(pages[i]);
 		page_cache_release(pages[i]);
 	}
 }
diff --git a/fs/buffer.c b/fs/buffer.c
index 0d3e8d5a2299..eba6e4f621ce 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -227,7 +227,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
 	int all_mapped = 1;
 
 	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
-	page = find_get_page(bd_mapping, index);
+	page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
 	if (!page)
 		goto out;
 
@@ -1366,12 +1366,13 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
 	struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
 
 	if (bh == NULL) {
+		/* __find_get_block_slow will mark the page accessed */
 		bh = __find_get_block_slow(bdev, block);
 		if (bh)
 			bh_lru_install(bh);
-	}
-	if (bh)
+	} else
 		touch_buffer(bh);
+
 	return bh;
 }
 EXPORT_SYMBOL(__find_get_block);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c8238a26818c..afe8a133e3d1 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1044,6 +1044,8 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 	 * allocating. If we are looking at the buddy cache we would
 	 * have taken a reference using ext4_mb_load_buddy and that
 	 * would have pinned buddy page to page cache.
+	 * The call to ext4_mb_get_buddy_page_lock will mark the
+	 * page accessed.
 	 */
 	ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
 	if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
@@ -1062,7 +1064,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 		ret = -EIO;
 		goto err;
 	}
-	mark_page_accessed(page);
 
 	if (e4b.bd_buddy_page == NULL) {
 		/*
@@ -1082,7 +1083,6 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 		ret = -EIO;
 		goto err;
 	}
-	mark_page_accessed(page);
 err:
 	ext4_mb_put_buddy_page_lock(&e4b);
 	return ret;
@@ -1141,7 +1141,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 
 	/* we could use find_or_create_page(), but it locks page
 	 * what we'd like to avoid in fast path ... */
-	page = find_get_page(inode->i_mapping, pnum);
+	page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
 			/*
@@ -1176,15 +1176,16 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 		ret = -EIO;
 		goto err;
 	}
+
+	/* Pages marked accessed already */
 	e4b->bd_bitmap_page = page;
 	e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
-	mark_page_accessed(page);
 
 	block++;
 	pnum = block / blocks_per_page;
 	poff = block % blocks_per_page;
 
-	page = find_get_page(inode->i_mapping, pnum);
+	page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
 	if (page == NULL || !PageUptodate(page)) {
 		if (page)
 			page_cache_release(page);
@@ -1209,9 +1210,10 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
 		ret = -EIO;
 		goto err;
 	}
+
+	/* Pages marked accessed already */
 	e4b->bd_buddy_page = page;
 	e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
-	mark_page_accessed(page);
 
 	BUG_ON(e4b->bd_bitmap_page == NULL);
 	BUG_ON(e4b->bd_buddy_page == NULL);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 4aa521aa9bc3..c405b8f17054 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -69,7 +69,6 @@ repeat:
 		goto repeat;
 	}
 out:
-	mark_page_accessed(page);
 	return page;
 }
 
@@ -137,13 +136,11 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
 		if (!page)
 			continue;
 		if (PageUptodate(page)) {
-			mark_page_accessed(page);
 			f2fs_put_page(page, 1);
 			continue;
 		}
 
 		f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
-		mark_page_accessed(page);
 		f2fs_put_page(page, 0);
 	}
 out:
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a161e955c4c8..57caa6eaf47b 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -967,7 +967,6 @@ repeat:
 		goto repeat;
 	}
 got_it:
-	mark_page_accessed(page);
 	return page;
 }
 
@@ -1022,7 +1021,6 @@ page_hit:
 		f2fs_put_page(page, 1);
 		return ERR_PTR(-EIO);
 	}
-	mark_page_accessed(page);
 	return page;
 }
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f680d2c44e97..903cbc9cd6bd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1089,8 +1089,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
 		tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
 		flush_dcache_page(page);
 
-		mark_page_accessed(page);
-
 		if (!tmp) {
 			unlock_page(page);
 			page_cache_release(page);
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 5a49b037da81..492123cda64a 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -577,7 +577,6 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
 		p = kmap_atomic(page);
 		memcpy(buf + copied, p + offset, amt);
 		kunmap_atomic(p);
-		mark_page_accessed(page);
 		page_cache_release(page);
 		copied += amt;
 		index++;
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 2cf09b63a6b4..b984a6e190bc 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -136,7 +136,8 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 			yield();
 		}
 	} else {
-		page = find_lock_page(mapping, index);
+		page = find_get_page_flags(mapping, index,
+						FGP_LOCK|FGP_ACCESSED);
 		if (!page)
 			return NULL;
 	}
@@ -153,7 +154,6 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
 		map_bh(bh, sdp->sd_vfs, blkno);
 
 	unlock_page(page);
-	mark_page_accessed(page);
 	page_cache_release(page);
 
 	return bh;
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index a27e3fecefaf..250ed5b20c8f 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1748,7 +1748,6 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
 	if (page) {
 		set_page_dirty(page);
 		unlock_page(page);
-		mark_page_accessed(page);
 		page_cache_release(page);
 	}
 	ntfs_debug("Done.");
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index db9bd8a31725..86ddab916b66 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2060,7 +2060,6 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
 		}
 		do {
 			unlock_page(pages[--do_pages]);
-			mark_page_accessed(pages[do_pages]);
 			page_cache_release(pages[do_pages]);
 		} while (do_pages);
 		if (unlikely(status))
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4d4b39ab2341..2093eb72785e 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -198,6 +198,7 @@ struct page;	/* forward declaration */
 TESTPAGEFLAG(Locked, locked)
 PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error)
 PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
+	__SETPAGEFLAG(Referenced, referenced)
 PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c16fb6d06e36..0a97b583ee8d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -259,12 +259,109 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
 pgoff_t page_cache_prev_hole(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan);
 
+#define FGP_ACCESSED		0x00000001
+#define FGP_LOCK		0x00000002
+#define FGP_CREAT		0x00000004
+#define FGP_WRITE		0x00000008
+#define FGP_NOFS		0x00000010
+#define FGP_NOWAIT		0x00000020
+
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
+		int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask);
+
+/**
+ * find_get_page - find and get a page reference
+ * @mapping: the address_space to search
+ * @offset: the page index
+ *
+ * Looks up the page cache slot at @mapping & @offset.  If there is a
+ * page cache page, it is returned with an increased refcount.
+ *
+ * Otherwise, %NULL is returned.
+ */
+static inline struct page *find_get_page(struct address_space *mapping,
+					pgoff_t offset)
+{
+	return pagecache_get_page(mapping, offset, 0, 0, 0);
+}
+
+static inline struct page *find_get_page_flags(struct address_space *mapping,
+					pgoff_t offset, int fgp_flags)
+{
+	return pagecache_get_page(mapping, offset, fgp_flags, 0, 0);
+}
+
+/**
+ * find_lock_page - locate, pin and lock a pagecache page
+ * pagecache_get_page - find and get a page reference
+ * @mapping: the address_space to search
+ * @offset: the page index
+ *
+ * Looks up the page cache slot at @mapping & @offset.  If there is a
+ * page cache page, it is returned locked and with an increased
+ * refcount.
+ *
+ * Otherwise, %NULL is returned.
+ *
+ * find_lock_page() may sleep.
+ */
+static inline struct page *find_lock_page(struct address_space *mapping,
+					pgoff_t offset)
+{
+	return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0);
+}
+
+/**
+ * find_or_create_page - locate or add a pagecache page
+ * @mapping: the page's address_space
+ * @index: the page's index into the mapping
+ * @gfp_mask: page allocation mode
+ *
+ * Looks up the page cache slot at @mapping & @offset.  If there is a
+ * page cache page, it is returned locked and with an increased
+ * refcount.
+ *
+ * If the page is not present, a new page is allocated using @gfp_mask
+ * and added to the page cache and the VM's LRU list.  The page is
+ * returned locked and with an increased refcount.
+ *
+ * On memory exhaustion, %NULL is returned.
+ *
+ * find_or_create_page() may sleep, even if @gfp_flags specifies an
+ * atomic allocation!
+ */
+static inline struct page *find_or_create_page(struct address_space *mapping,
+					pgoff_t offset, gfp_t gfp_mask)
+{
+	return pagecache_get_page(mapping, offset,
+					FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
+					gfp_mask, gfp_mask & GFP_RECLAIM_MASK);
+}
+
+/**
+ * grab_cache_page_nowait - returns locked page at given index in given cache
+ * @mapping: target address_space
+ * @index: the page index
+ *
+ * Same as grab_cache_page(), but do not wait if the page is unavailable.
+ * This is intended for speculative data generators, where the data can
+ * be regenerated if the page couldn't be grabbed.  This routine should
+ * be safe to call while holding the lock for another page.
+ *
+ * Clear __GFP_FS when allocating the page to avoid recursion into the fs
+ * and deadlock against the caller's locked page.
+ */
+static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
+				pgoff_t index)
+{
+	return pagecache_get_page(mapping, index,
+			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
+			mapping_gfp_mask(mapping),
+			GFP_NOFS);
+}
+
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
-struct page *find_get_page(struct address_space *mapping, pgoff_t offset);
 struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset);
-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset);
-struct page *find_or_create_page(struct address_space *mapping, pgoff_t index,
-				 gfp_t gfp_mask);
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
 			  unsigned int nr_entries, struct page **entries,
 			  pgoff_t *indices);
@@ -287,8 +384,6 @@ static inline struct page *grab_cache_page(struct address_space *mapping,
 	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
 }
 
-extern struct page * grab_cache_page_nowait(struct address_space *mapping,
-				pgoff_t index);
 extern struct page * read_cache_page(struct address_space *mapping,
 				pgoff_t index, filler_t *filler, void *data);
 extern struct page * read_cache_page_gfp(struct address_space *mapping,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 97cf16164c46..4348d95e571f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -311,6 +311,7 @@ extern void lru_add_page_tail(struct page *page, struct page *page_tail,
 			 struct lruvec *lruvec, struct list_head *head);
 extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
+extern void init_page_accessed(struct page *page);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
diff --git a/mm/filemap.c b/mm/filemap.c
index 47d235b357a7..0fcd792103f3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -981,26 +981,6 @@ out:
 }
 EXPORT_SYMBOL(find_get_entry);
 
-/**
- * find_get_page - find and get a page reference
- * @mapping: the address_space to search
- * @offset: the page index
- *
- * Looks up the page cache slot at @mapping & @offset.  If there is a
- * page cache page, it is returned with an increased refcount.
- *
- * Otherwise, %NULL is returned.
- */
-struct page *find_get_page(struct address_space *mapping, pgoff_t offset)
-{
-	struct page *page = find_get_entry(mapping, offset);
-
-	if (radix_tree_exceptional_entry(page))
-		page = NULL;
-	return page;
-}
-EXPORT_SYMBOL(find_get_page);
-
 /**
  * find_lock_entry - locate, pin and lock a page cache entry
  * @mapping: the address_space to search
@@ -1038,66 +1018,84 @@ repeat:
 EXPORT_SYMBOL(find_lock_entry);
 
 /**
- * find_lock_page - locate, pin and lock a pagecache page
+ * pagecache_get_page - find and get a page reference
  * @mapping: the address_space to search
  * @offset: the page index
+ * @fgp_flags: PCG flags
+ * @gfp_mask: gfp mask to use if a page is to be allocated
  *
- * Looks up the page cache slot at @mapping & @offset.  If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
- *
- * Otherwise, %NULL is returned.
- *
- * find_lock_page() may sleep.
- */
-struct page *find_lock_page(struct address_space *mapping, pgoff_t offset)
-{
-	struct page *page = find_lock_entry(mapping, offset);
-
-	if (radix_tree_exceptional_entry(page))
-		page = NULL;
-	return page;
-}
-EXPORT_SYMBOL(find_lock_page);
-
-/**
- * find_or_create_page - locate or add a pagecache page
- * @mapping: the page's address_space
- * @index: the page's index into the mapping
- * @gfp_mask: page allocation mode
+ * Looks up the page cache slot at @mapping & @offset.
  *
- * Looks up the page cache slot at @mapping & @offset.  If there is a
- * page cache page, it is returned locked and with an increased
- * refcount.
+ * PCG flags modify how the page is returned
  *
- * If the page is not present, a new page is allocated using @gfp_mask
- * and added to the page cache and the VM's LRU list.  The page is
- * returned locked and with an increased refcount.
+ * FGP_ACCESSED: the page will be marked accessed
+ * FGP_LOCK: Page is return locked
+ * FGP_CREAT: If page is not present then a new page is allocated using
+ *		@gfp_mask and added to the page cache and the VM's LRU
+ *		list. The page is returned locked and with an increased
+ *		refcount. Otherwise, %NULL is returned.
  *
- * On memory exhaustion, %NULL is returned.
+ * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
+ * if the GFP flags specified for FGP_CREAT are atomic.
  *
- * find_or_create_page() may sleep, even if @gfp_flags specifies an
- * atomic allocation!
+ * If there is a page cache page, it is returned with an increased refcount.
  */
-struct page *find_or_create_page(struct address_space *mapping,
-		pgoff_t index, gfp_t gfp_mask)
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
+	int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask)
 {
 	struct page *page;
-	int err;
+
 repeat:
-	page = find_lock_page(mapping, index);
-	if (!page) {
-		page = __page_cache_alloc(gfp_mask);
+	page = find_get_entry(mapping, offset);
+	if (radix_tree_exceptional_entry(page))
+		page = NULL;
+	if (!page)
+		goto no_page;
+
+	if (fgp_flags & FGP_LOCK) {
+		if (fgp_flags & FGP_NOWAIT) {
+			if (!trylock_page(page)) {
+				page_cache_release(page);
+				return NULL;
+			}
+		} else {
+			lock_page(page);
+		}
+
+		/* Has the page been truncated? */
+		if (unlikely(page->mapping != mapping)) {
+			unlock_page(page);
+			page_cache_release(page);
+			goto repeat;
+		}
+		VM_BUG_ON_PAGE(page->index != offset, page);
+	}
+
+	if (page && (fgp_flags & FGP_ACCESSED))
+		mark_page_accessed(page);
+
+no_page:
+	if (!page && (fgp_flags & FGP_CREAT)) {
+		int err;
+		if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
+			cache_gfp_mask |= __GFP_WRITE;
+		if (fgp_flags & FGP_NOFS) {
+			cache_gfp_mask &= ~__GFP_FS;
+			radix_gfp_mask &= ~__GFP_FS;
+		}
+
+		page = __page_cache_alloc(cache_gfp_mask);
 		if (!page)
 			return NULL;
-		/*
-		 * We want a regular kernel memory (not highmem or DMA etc)
-		 * allocation for the radix tree nodes, but we need to honour
-		 * the context-specific requirements the caller has asked for.
-		 * GFP_RECLAIM_MASK collects those requirements.
-		 */
-		err = add_to_page_cache_lru(page, mapping, index,
-			(gfp_mask & GFP_RECLAIM_MASK));
+
+		if (WARN_ON_ONCE(!(fgp_flags & FGP_LOCK)))
+			fgp_flags |= FGP_LOCK;
+
+		/* Init accessed so avoit atomic mark_page_accessed later */
+		if (fgp_flags & FGP_ACCESSED)
+			init_page_accessed(page);
+
+		err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
 		if (unlikely(err)) {
 			page_cache_release(page);
 			page = NULL;
@@ -1105,9 +1103,10 @@ repeat:
 				goto repeat;
 		}
 	}
+
 	return page;
 }
-EXPORT_SYMBOL(find_or_create_page);
+EXPORT_SYMBOL(pagecache_get_page);
 
 /**
  * find_get_entries - gang pagecache lookup
@@ -1404,39 +1403,6 @@ repeat:
 }
 EXPORT_SYMBOL(find_get_pages_tag);
 
-/**
- * grab_cache_page_nowait - returns locked page at given index in given cache
- * @mapping: target address_space
- * @index: the page index
- *
- * Same as grab_cache_page(), but do not wait if the page is unavailable.
- * This is intended for speculative data generators, where the data can
- * be regenerated if the page couldn't be grabbed.  This routine should
- * be safe to call while holding the lock for another page.
- *
- * Clear __GFP_FS when allocating the page to avoid recursion into the fs
- * and deadlock against the caller's locked page.
- */
-struct page *
-grab_cache_page_nowait(struct address_space *mapping, pgoff_t index)
-{
-	struct page *page = find_get_page(mapping, index);
-
-	if (page) {
-		if (trylock_page(page))
-			return page;
-		page_cache_release(page);
-		return NULL;
-	}
-	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
-	if (page && add_to_page_cache_lru(page, mapping, index, GFP_NOFS)) {
-		page_cache_release(page);
-		page = NULL;
-	}
-	return page;
-}
-EXPORT_SYMBOL(grab_cache_page_nowait);
-
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:
@@ -2406,7 +2372,6 @@ int pagecache_write_end(struct file *file, struct address_space *mapping,
 {
 	const struct address_space_operations *aops = mapping->a_ops;
 
-	mark_page_accessed(page);
 	return aops->write_end(file, mapping, pos, len, copied, page, fsdata);
 }
 EXPORT_SYMBOL(pagecache_write_end);
@@ -2488,34 +2453,18 @@ EXPORT_SYMBOL(generic_file_direct_write);
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 					pgoff_t index, unsigned flags)
 {
-	int status;
-	gfp_t gfp_mask;
 	struct page *page;
-	gfp_t gfp_notmask = 0;
+	int fgp_flags = FGP_LOCK|FGP_ACCESSED|FGP_WRITE|FGP_CREAT;
 
-	gfp_mask = mapping_gfp_mask(mapping);
-	if (mapping_cap_account_dirty(mapping))
-		gfp_mask |= __GFP_WRITE;
 	if (flags & AOP_FLAG_NOFS)
-		gfp_notmask = __GFP_FS;
-repeat:
-	page = find_lock_page(mapping, index);
+		fgp_flags |= FGP_NOFS;
+
+	page = pagecache_get_page(mapping, index, fgp_flags,
+			mapping_gfp_mask(mapping),
+			GFP_KERNEL);
 	if (page)
-		goto found;
+		wait_for_stable_page(page);
 
-	page = __page_cache_alloc(gfp_mask & ~gfp_notmask);
-	if (!page)
-		return NULL;
-	status = add_to_page_cache_lru(page, mapping, index,
-						GFP_KERNEL & ~gfp_notmask);
-	if (unlikely(status)) {
-		page_cache_release(page);
-		if (status == -EEXIST)
-			goto repeat;
-		return NULL;
-	}
-found:
-	wait_for_stable_page(page);
 	return page;
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
@@ -2564,7 +2513,7 @@ again:
 
 		status = a_ops->write_begin(file, mapping, pos, bytes, flags,
 						&page, &fsdata);
-		if (unlikely(status))
+		if (unlikely(status < 0))
 			break;
 
 		if (mapping_writably_mapped(mapping))
@@ -2573,7 +2522,6 @@ again:
 		copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes);
 		flush_dcache_page(page);
 
-		mark_page_accessed(page);
 		status = a_ops->write_end(file, mapping, pos, bytes, copied,
 						page, fsdata);
 		if (unlikely(status < 0))
diff --git a/mm/shmem.c b/mm/shmem.c
index f47fb38c4889..5402481c28d1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1372,9 +1372,13 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned flags,
 			struct page **pagep, void **fsdata)
 {
+	int ret;
 	struct inode *inode = mapping->host;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+	ret = shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
+	if (ret == 0 && *pagep)
+		init_page_accessed(*pagep);
+	return ret;
 }
 
 static int
diff --git a/mm/swap.c b/mm/swap.c
index 1fb25f8bb155..9e8e3472248b 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -614,6 +614,17 @@ void mark_page_accessed(struct page *page)
 }
 EXPORT_SYMBOL(mark_page_accessed);
 
+/*
+ * Used to mark_page_accessed(page) that is not visible yet and when it is
+ * still safe to use non-atomic ops
+ */
+void init_page_accessed(struct page *page)
+{
+	if (!PageReferenced(page))
+		__SetPageReferenced(page);
+}
+EXPORT_SYMBOL(init_page_accessed);
+
 static void __lru_cache_add(struct page *page)
 {
 	struct pagevec *pvec = &get_cpu_var(lru_add_pvec);
-- 
cgit 


From b7596fb43aa786fb3ee5015a73034fbb9e80feaa Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 4 Jun 2014 16:10:37 -0700
Subject: include/linux/gfp.h: exclude duplicate header

mmdebug.h is included twice.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 454c99fdb79d..6eb1fb37de9a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -6,7 +6,6 @@
 #include <linux/stddef.h>
 #include <linux/linkage.h>
 #include <linux/topology.h>
-#include <linux/mmdebug.h>
 
 struct vm_area_struct;
 
-- 
cgit 


From 4be89a34609659042ef0bf883ad76388fb5251bb Mon Sep 17 00:00:00 2001
From: Jianyu Zhan <nasa4836@gmail.com>
Date: Wed, 4 Jun 2014 16:10:38 -0700
Subject: mm/vmscan.c: use DIV_ROUND_UP for calculation of zone's balance_gap
 and correct comments.

Currently, we use (zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1)
/ KSWAPD_ZONE_BALANCE_GAP_RATIO to avoid a zero gap value.  It's better to
use DIV_ROUND_UP macro for neater code and clear meaning.

Besides, the gap value is calculated against the per-zone "managed pages",
not "present pages".  This patch also corrects the comment and do some
rephrasing.

Signed-off-by: Jianyu Zhan <nasa4836@gmail.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  8 ++++----
 mm/vmscan.c          | 10 ++++------
 2 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4348d95e571f..4bdbee80eede 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -166,10 +166,10 @@ enum {
 #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
 
 /*
- * Ratio between the present memory in the zone and the "gap" that
- * we're allowing kswapd to shrink in addition to the per-zone high
- * wmark, even for zones that already have the high wmark satisfied,
- * in order to provide better per-zone lru behavior. We are ok to
+ * Ratio between zone->managed_pages and the "gap" that above the per-zone
+ * "high_wmark". While balancing nodes, We allow kswapd to shrink zones that
+ * do not meet the (high_wmark + gap) watermark, even which already met the
+ * high_wmark, in order to provide better per-zone lru behavior. We are ok to
  * spend not more than 1% of the memory for this zone balancing "gap".
  */
 #define KSWAPD_ZONE_BALANCE_GAP_RATIO 100
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 494cd632178c..cc29fca8d989 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2295,9 +2295,8 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 	 * there is a buffer of free pages available to give compaction
 	 * a reasonable chance of completing and allocating the page
 	 */
-	balance_gap = min(low_wmark_pages(zone),
-		(zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
-			KSWAPD_ZONE_BALANCE_GAP_RATIO);
+	balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
+			zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
 	watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
 	watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
 
@@ -2949,9 +2948,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	 * high wmark plus a "gap" where the gap is either the low
 	 * watermark or 1% of the zone, whichever is smaller.
 	 */
-	balance_gap = min(low_wmark_pages(zone),
-		(zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
-		KSWAPD_ZONE_BALANCE_GAP_RATIO);
+	balance_gap = min(low_wmark_pages(zone), DIV_ROUND_UP(
+			zone->managed_pages, KSWAPD_ZONE_BALANCE_GAP_RATIO));
 
 	/*
 	 * If there is no low memory pressure or the zone is balanced then no
-- 
cgit 


From daa5ba768b9e15da8867824d2f1e8d455f1acac2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Wed, 4 Jun 2014 16:10:52 -0700
Subject: mm/rmap.c: cleanup ttu_flags

Transform action part of ttu_flags into individiual bits.  These flags
aren't part of any uses-space visible api or even trace events.

Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  7 +++----
 mm/rmap.c            | 10 +++++-----
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 9be55c7617da..be574506e6a9 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -72,10 +72,9 @@ struct anon_vma_chain {
 };
 
 enum ttu_flags {
-	TTU_UNMAP = 0,			/* unmap mode */
-	TTU_MIGRATION = 1,		/* migration mode */
-	TTU_MUNLOCK = 2,		/* munlock mode */
-	TTU_ACTION_MASK = 0xff,
+	TTU_UNMAP = 1,			/* unmap mode */
+	TTU_MIGRATION = 2,		/* migration mode */
+	TTU_MUNLOCK = 4,		/* munlock mode */
 
 	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
 	TTU_IGNORE_ACCESS = (1 << 9),	/* don't age */
diff --git a/mm/rmap.c b/mm/rmap.c
index ab74290d185d..ea8e20d75b29 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1162,7 +1162,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		if (vma->vm_flags & VM_LOCKED)
 			goto out_mlock;
 
-		if (TTU_ACTION(flags) == TTU_MUNLOCK)
+		if (flags & TTU_MUNLOCK)
 			goto out_unmap;
 	}
 	if (!(flags & TTU_IGNORE_ACCESS)) {
@@ -1230,7 +1230,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 * pte. do_swap_page() will wait until the migration
 			 * pte is removed and then restart fault handling.
 			 */
-			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
+			BUG_ON(!(flags & TTU_MIGRATION));
 			entry = make_migration_entry(page, pte_write(pteval));
 		}
 		swp_pte = swp_entry_to_pte(entry);
@@ -1239,7 +1239,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		set_pte_at(mm, address, pte, swp_pte);
 		BUG_ON(pte_file(*pte));
 	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
-		   (TTU_ACTION(flags) == TTU_MIGRATION)) {
+		   (flags & TTU_MIGRATION)) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
@@ -1252,7 +1252,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 
 out_unmap:
 	pte_unmap_unlock(pte, ptl);
-	if (ret != SWAP_FAIL && TTU_ACTION(flags) != TTU_MUNLOCK)
+	if (ret != SWAP_FAIL && !(flags & TTU_MUNLOCK))
 		mmu_notifier_invalidate_page(mm, address);
 out:
 	return ret;
@@ -1539,7 +1539,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
 	 * locking requirements of exec(), migration skips
 	 * temporary VMAs until after exec() completes.
 	 */
-	if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page))
+	if ((flags & TTU_MIGRATION) && !PageKsm(page) && PageAnon(page))
 		rwc.invalid_vma = invalid_migration_vma;
 
 	ret = rmap_walk(page, &rwc);
-- 
cgit 


From 100873d7a777b67ad35197c5a998b5e778f8bf3f Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Wed, 4 Jun 2014 16:10:56 -0700
Subject: hugetlb: rename hugepage_migration_support() to ..._supported()

We already have a function named hugepages_supported(), and the similar
name hugepage_migration_support() is a bit unconfortable, so let's rename
it hugepage_migration_supported().

Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 4 ++--
 mm/hugetlb.c            | 2 +-
 mm/migrate.c            | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 35786ee36f06..255cd5cc0754 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -397,7 +397,7 @@ static inline pgoff_t basepage_index(struct page *page)
 
 extern void dissolve_free_huge_pages(unsigned long start_pfn,
 				     unsigned long end_pfn);
-static inline int hugepage_migration_support(struct hstate *h)
+static inline int hugepage_migration_supported(struct hstate *h)
 {
 #ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
 	return huge_page_shift(h) == PMD_SHIFT;
@@ -453,7 +453,7 @@ static inline pgoff_t basepage_index(struct page *page)
 	return page->index;
 }
 #define dissolve_free_huge_pages(s, e)	do {} while (0)
-#define hugepage_migration_support(h)	0
+#define hugepage_migration_supported(h)	0
 
 static inline spinlock_t *huge_pte_lockptr(struct hstate *h,
 					   struct mm_struct *mm, pte_t *pte)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 244194217e39..226910cb7c9b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -544,7 +544,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 /* Movability of hugepages depends on migration support. */
 static inline gfp_t htlb_alloc_mask(struct hstate *h)
 {
-	if (hugepages_treat_as_movable || hugepage_migration_support(h))
+	if (hugepages_treat_as_movable || hugepage_migration_supported(h))
 		return GFP_HIGHUSER_MOVABLE;
 	else
 		return GFP_HIGHUSER;
diff --git a/mm/migrate.c b/mm/migrate.c
index 2a459675eeab..63f0cd559999 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1039,7 +1039,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	 * tables or check whether the hugepage is pmd-based or not before
 	 * kicking migration.
 	 */
-	if (!hugepage_migration_support(page_hstate(hpage))) {
+	if (!hugepage_migration_supported(page_hstate(hpage))) {
 		putback_active_hugepage(hpage);
 		return -ENOSYS;
 	}
-- 
cgit 


From 50417c55562c03e6746b13aee650c2bbb048fea3 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Wed, 4 Jun 2014 16:11:07 -0700
Subject: mm/zbud.c: make size unsigned like unique callsite

zbud_alloc is only called by zswap_frontswap_store with unsigned int len.
Change function parameter + update >= 0 check.

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Acked-by: Seth Jennings <sjennings@variantweb.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/zbud.h | 2 +-
 mm/zbud.c            | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/zbud.h b/include/linux/zbud.h
index 2571a5cfa5fc..13af0d450bf6 100644
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@@ -11,7 +11,7 @@ struct zbud_ops {
 
 struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
 void zbud_destroy_pool(struct zbud_pool *pool);
-int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
 	unsigned long *handle);
 void zbud_free(struct zbud_pool *pool, unsigned long handle);
 int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
diff --git a/mm/zbud.c b/mm/zbud.c
index 9451361e6aa7..01df13a7e2e1 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -247,7 +247,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
  * a new page.
  */
-int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
+int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp,
 			unsigned long *handle)
 {
 	int chunks, i, freechunks;
@@ -255,7 +255,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
 	enum buddy bud;
 	struct page *page;
 
-	if (size <= 0 || gfp & __GFP_HIGHMEM)
+	if (!size || (gfp & __GFP_HIGHMEM))
 		return -EINVAL;
 	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
 		return -ENOSPC;
-- 
cgit 


From 2c0d259e0e580dd95dd5d2d5aa4926169228d4a0 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 4 Jun 2014 16:11:16 -0700
Subject: compiler.h: avoid sparse errors in __compiletime_error_fallback()

Usually, BUG_ON and friends aren't even evaluated in sparse, but recently
compiletime_assert_atomic_type() was added, and that now results in a
sparse warning every time it is used.

The reason turns out to be the temporary variable, after it sparse no
longer considers the value to be a constant, and results in a warning and
an error.  The error is the more annoying part of this as it suppresses
any further warnings in the same file, hiding other problems.

Unfortunately the condition cannot be simply expanded out to avoid the
temporary variable since it breaks compiletime_assert on old versions of
GCC such as GCC 4.2.4 which the latest metag compiler is based on.

Therefore #ifndef __CHECKER__ out the __compiletime_error_fallback which
uses the potentially negative size array to trigger a conditional compiler
error, so that sparse doesn't see it.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Daniel Santos <daniel.santos@pobox.com>
Cc: Luciano Coelho <luciano.coelho@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ee7239ea1583..64fdfe1cfcf0 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -323,9 +323,18 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 #endif
 #ifndef __compiletime_error
 # define __compiletime_error(message)
-# define __compiletime_error_fallback(condition) \
+/*
+ * Sparse complains of variable sized arrays due to the temporary variable in
+ * __compiletime_assert. Unfortunately we can't just expand it out to make
+ * sparse see a constant array size without breaking compiletime_assert on old
+ * versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether.
+ */
+# ifndef __CHECKER__
+#  define __compiletime_error_fallback(condition) \
 	do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
-#else
+# endif
+#endif
+#ifndef __compiletime_error_fallback
 # define __compiletime_error_fallback(condition) do { } while (0)
 #endif
 
-- 
cgit 


From b300a4ea665f7fa44f015616ac1874deca891c5e Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill@shutemov.name>
Date: Wed, 4 Jun 2014 16:11:27 -0700
Subject: kernel/user.c: drop unused field 'files' from user_struct

Nobody seems uses it for a long time. Let's drop it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 1 -
 kernel/user.c         | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2f2dd7d932a2..611676fd4c2c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -745,7 +745,6 @@ static inline int signal_group_exit(const struct signal_struct *sig)
 struct user_struct {
 	atomic_t __count;	/* reference count */
 	atomic_t processes;	/* How many processes does this user have? */
-	atomic_t files;		/* How many open files does this user have? */
 	atomic_t sigpending;	/* How many pending signals does this user have? */
 #ifdef CONFIG_INOTIFY_USER
 	atomic_t inotify_watches; /* How many inotify watches does this user have? */
diff --git a/kernel/user.c b/kernel/user.c
index 294fc6a94168..4efa39350e44 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -87,7 +87,6 @@ static DEFINE_SPINLOCK(uidhash_lock);
 struct user_struct root_user = {
 	.__count	= ATOMIC_INIT(1),
 	.processes	= ATOMIC_INIT(1),
-	.files		= ATOMIC_INIT(0),
 	.sigpending	= ATOMIC_INIT(0),
 	.locked_shm     = 0,
 	.uid		= GLOBAL_ROOT_UID,
-- 
cgit 


From aac74dc495456412c4130a1167ce4beb6c1f0b38 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 4 Jun 2014 16:11:40 -0700
Subject: printk: rename printk_sched to printk_deferred

After learning we'll need some sort of deferred printk functionality in
the timekeeping core, Peter suggested we rename the printk_sched function
so it can be reused by needed subsystems.

This only changes the function name. No logic changes.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h  | 6 +++---
 kernel/printk/printk.c  | 2 +-
 kernel/sched/core.c     | 2 +-
 kernel/sched/deadline.c | 2 +-
 kernel/sched/rt.c       | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 8752f7595b27..7847301e2837 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -128,9 +128,9 @@ asmlinkage __printf(1, 2) __cold
 int printk(const char *fmt, ...);
 
 /*
- * Special printk facility for scheduler use only, _DO_NOT_USE_ !
+ * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ !
  */
-__printf(1, 2) __cold int printk_sched(const char *fmt, ...);
+__printf(1, 2) __cold int printk_deferred(const char *fmt, ...);
 
 /*
  * Please don't use printk_ratelimit(), because it shares ratelimiting state
@@ -165,7 +165,7 @@ int printk(const char *s, ...)
 	return 0;
 }
 static inline __printf(1, 2) __cold
-int printk_sched(const char *s, ...)
+int printk_deferred(const char *s, ...)
 {
 	return 0;
 }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index dc2b8bd9bc1e..35d9db251903 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2585,7 +2585,7 @@ void wake_up_klogd(void)
 	preempt_enable();
 }
 
-int printk_sched(const char *fmt, ...)
+int printk_deferred(const char *fmt, ...)
 {
 	va_list args;
 	int r;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 913c6d6cc2c1..caf03e89a068 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1367,7 +1367,7 @@ out:
 		 * leave kernel.
 		 */
 		if (p->mm && printk_ratelimit()) {
-			printk_sched("process %d (%s) no longer affine to cpu%d\n",
+			printk_deferred("process %d (%s) no longer affine to cpu%d\n",
 					task_pid_nr(p), p->comm, cpu);
 		}
 	}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index f9ca7d19781a..d17e1c48a79d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -352,7 +352,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 
 		if (!lag_once) {
 			lag_once = true;
-			printk_sched("sched: DL replenish lagged to much\n");
+			printk_deferred("sched: DL replenish lagged to much\n");
 		}
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0ebfd7a29472..5d7667b37c21 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -896,7 +896,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
 			if (!once) {
 				once = true;
-				printk_sched("sched: RT throttling activated\n");
+				printk_deferred("sched: RT throttling activated\n");
 			}
 		} else {
 			/*
-- 
cgit 


From c224815dac9c739b79050d3cc67443ff500bc478 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 4 Jun 2014 16:11:41 -0700
Subject: printk: Add printk_deferred_once

Two of the three prink_deferred uses are really printk_once style
uses, so add a printk_deferred_once macro to simplify those call
sites.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jiri Bohac <jbohac@suse.cz>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/printk.h  | 11 +++++++++++
 kernel/sched/deadline.c |  7 +------
 kernel/sched/rt.c       |  8 +-------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/printk.h b/include/linux/printk.h
index 7847301e2837..f086d6c99dbc 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -266,9 +266,20 @@ extern asmlinkage void dump_stack(void) __cold;
 		printk(fmt, ##__VA_ARGS__);			\
 	}							\
 })
+#define printk_deferred_once(fmt, ...)				\
+({								\
+	static bool __print_once __read_mostly;			\
+								\
+	if (!__print_once) {					\
+		__print_once = true;				\
+		printk_deferred(fmt, ##__VA_ARGS__);		\
+	}							\
+})
 #else
 #define printk_once(fmt, ...)					\
 	no_printk(fmt, ##__VA_ARGS__)
+#define printk_deferred_once(fmt, ...)				\
+	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 #define pr_emerg_once(fmt, ...)					\
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index d17e1c48a79d..e1574fca03b5 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -348,12 +348,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
 	 * entity.
 	 */
 	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
-		static bool lag_once = false;
-
-		if (!lag_once) {
-			lag_once = true;
-			printk_deferred("sched: DL replenish lagged to much\n");
-		}
+		printk_deferred_once("sched: DL replenish lagged to much\n");
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
 		dl_se->runtime = pi_se->dl_runtime;
 	}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5d7667b37c21..b3512f1afce9 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -890,14 +890,8 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 		 * but accrue some time due to boosting.
 		 */
 		if (likely(rt_b->rt_runtime)) {
-			static bool once = false;
-
 			rt_rq->rt_throttled = 1;
-
-			if (!once) {
-				once = true;
-				printk_deferred("sched: RT throttling activated\n");
-			}
+			printk_deferred_once("sched: RT throttling activated\n");
 		} else {
 			/*
 			 * In case we did anyway, make it go away,
-- 
cgit 


From 6e099f557d9c6797c3ee3ee7b5c8cebe543ec1cc Mon Sep 17 00:00:00 2001
From: Dan Streetman <ddstreet@ieee.org>
Date: Wed, 4 Jun 2014 16:11:44 -0700
Subject: Documentation: expand/clarify debug documentation

The pr_debug() and related debug print macros all differ from the normal
pr_XXX() macros, in that the normal ones print unconditionally, while
the debug macros are compiled out unless DEBUG is defined or
CONFIG_DYNAMIC_DEBUG is set.  This isn't obvious, and the only way to
find this out is either to review the actual printk.h code or to read
CodingStyle, and the message there doesn't highlight the fact.

Change Documentation/CodingStyle to clearly indicate that pr_debug() and
related debug printing macros behave differently than all other pr_XXX()
macros, and attempt to clarify when and where the different debug
printing methods might be used.

Add short comment to printk.h above the pr_XXX() macros indicating that
while these macros print unconditionally, pr_debug() does not.

Signed-off-by: Dan Streetman <ddstreet@ieee.org>
Cc: Joe Perches <joe@perches.com>
Cc: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/CodingStyle | 22 +++++++++++++++-------
 include/linux/printk.h    |  6 ++++++
 2 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
index 7fe0546c504a..6b6bef31e956 100644
--- a/Documentation/CodingStyle
+++ b/Documentation/CodingStyle
@@ -660,15 +660,23 @@ There are a number of driver model diagnostic macros in <linux/device.h>
 which you should use to make sure messages are matched to the right device
 and driver, and are tagged with the right level:  dev_err(), dev_warn(),
 dev_info(), and so forth.  For messages that aren't associated with a
-particular device, <linux/printk.h> defines pr_debug() and pr_info().
+particular device, <linux/printk.h> defines pr_notice(), pr_info(),
+pr_warn(), pr_err(), etc.
 
 Coming up with good debugging messages can be quite a challenge; and once
-you have them, they can be a huge help for remote troubleshooting.  Such
-messages should be compiled out when the DEBUG symbol is not defined (that
-is, by default they are not included).  When you use dev_dbg() or pr_debug(),
-that's automatic.  Many subsystems have Kconfig options to turn on -DDEBUG.
-A related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to the
-ones already enabled by DEBUG.
+you have them, they can be a huge help for remote troubleshooting.  However
+debug message printing is handled differently than printing other non-debug
+messages.  While the other pr_XXX() functions print unconditionally,
+pr_debug() does not; it is compiled out by default, unless either DEBUG is
+defined or CONFIG_DYNAMIC_DEBUG is set.  That is true for dev_dbg() also,
+and a related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to
+the ones already enabled by DEBUG.
+
+Many subsystems have Kconfig debug options to turn on -DDEBUG in the
+corresponding Makefile; in other cases specific files #define DEBUG.  And
+when a debug message should be unconditionally printed, such as if it is
+already inside a debug-related #ifdef secton, printk(KERN_DEBUG ...) can be
+used.
 
 
 		Chapter 14: Allocating memory
diff --git a/include/linux/printk.h b/include/linux/printk.h
index f086d6c99dbc..37f3a6589c1c 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -210,6 +210,12 @@ extern asmlinkage void dump_stack(void) __cold;
 #define pr_fmt(fmt) fmt
 #endif
 
+/*
+ * These can be used to print at the various log levels.
+ * All of these will print unconditionally, although note that pr_debug()
+ * and other debug macros are compiled out unless either DEBUG is defined
+ * or CONFIG_DYNAMIC_DEBUG is set.
+ */
 #define pr_emerg(fmt, ...) \
 	printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__)
 #define pr_alert(fmt, ...) \
-- 
cgit 


From a8fe19ebfbfd90ec17c02284717238b02efb9580 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 4 Jun 2014 16:11:46 -0700
Subject: kernel/printk: use symbolic defines for console loglevels

... instead of naked numbers.

Stuff in sysrq.c used to set it to 8 which is supposed to mean above
default level so set it to DEBUG instead as we're terminating/killing all
tasks and we want to be verbose there.

Also, correct the check in x86_64_start_kernel which should be >= as
we're clearly issuing the string there for all debug levels, not only
the magical 10.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Joe Perches <joe@perches.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/head64.c      |  2 +-
 arch/x86/platform/uv/uv_nmi.c |  2 +-
 drivers/nubus/nubus.c         | 18 +++++++++---------
 drivers/tty/sysrq.c           |  8 ++++----
 include/linux/printk.h        | 15 +++++++++++++--
 init/main.c                   |  4 ++--
 kernel/debug/kdb/kdb_bt.c     |  2 +-
 kernel/debug/kdb/kdb_io.c     |  2 +-
 kernel/debug/kdb/kdb_main.c   |  2 +-
 kernel/printk/printk.c        | 13 +++----------
 10 files changed, 36 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 068054f4bf20..eda1a865641e 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -172,7 +172,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	 */
 	load_ucode_bsp();
 
-	if (console_loglevel == 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		early_printk("Kernel alive\n");
 
 	clear_page(init_level4_pgt);
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c
index be27da60dc8f..c89c93320c12 100644
--- a/arch/x86/platform/uv/uv_nmi.c
+++ b/arch/x86/platform/uv/uv_nmi.c
@@ -85,7 +85,7 @@ static cpumask_var_t uv_nmi_cpu_mask;
  * Default is all stack dumps go to the console and buffer.
  * Lower level to send to log buffer only.
  */
-static int uv_nmi_loglevel = 7;
+static int uv_nmi_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
 module_param_named(dump_loglevel, uv_nmi_loglevel, int, 0644);
 
 /*
diff --git a/drivers/nubus/nubus.c b/drivers/nubus/nubus.c
index 43926cd25ae8..5066a7ef7b6c 100644
--- a/drivers/nubus/nubus.c
+++ b/drivers/nubus/nubus.c
@@ -473,7 +473,7 @@ static struct nubus_dev* __init
 	if (slot == 0 && (unsigned long)dir.base % 2)
 		dir.base += 1;
 	
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_functional_resource: parent is 0x%p, dir is 0x%p\n",
 		       parent->base, dir.base);
 
@@ -568,7 +568,7 @@ static int __init nubus_get_vidnames(struct nubus_board* board,
 
 	printk(KERN_INFO "    video modes supported:\n");
 	nubus_get_subdir(parent, &dir);
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_vidnames: parent is 0x%p, dir is 0x%p\n",
 		       parent->base, dir.base);
 
@@ -629,7 +629,7 @@ static int __init nubus_get_vendorinfo(struct nubus_board* board,
 
 	printk(KERN_INFO "    vendor info:\n");
 	nubus_get_subdir(parent, &dir);
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_vendorinfo: parent is 0x%p, dir is 0x%p\n",
 		       parent->base, dir.base);
 
@@ -654,7 +654,7 @@ static int __init nubus_get_board_resource(struct nubus_board* board, int slot,
 	struct nubus_dirent ent;
 	
 	nubus_get_subdir(parent, &dir);
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_board_resource: parent is 0x%p, dir is 0x%p\n",
 		       parent->base, dir.base);
 
@@ -753,19 +753,19 @@ static void __init nubus_find_rom_dir(struct nubus_board* board)
 	if (nubus_readdir(&dir, &ent) == -1)
 		goto badrom;
 
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_INFO "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
 	/* This one takes us to where we want to go. */
 	if (nubus_readdir(&dir, &ent) == -1) 
 		goto badrom;
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
 	nubus_get_subdir(&ent, &dir);
 
 	/* Resource ID 01, also an "Unknown Macintosh" */
 	if (nubus_readdir(&dir, &ent) == -1) 
 		goto badrom;
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
 
 	/* FIXME: the first one is *not* always the right one.  We
@@ -780,7 +780,7 @@ static void __init nubus_find_rom_dir(struct nubus_board* board)
 	   path to that address... */
 	if (nubus_readdir(&dir, &ent) == -1)
 		goto badrom;
-	if (console_loglevel >= 10)
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
 		printk(KERN_DEBUG "nubus_get_rom_dir: entry %02x %06x\n", ent.type, ent.data);
 	
 	/* Bwahahahaha... */
@@ -816,7 +816,7 @@ static struct nubus_board* __init nubus_add_board(int slot, int bytelanes)
 	board->fblock = rp;
 
 	/* Dump the format block for debugging purposes */
-	if (console_loglevel >= 10) {
+	if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG) {
 		int i;
 		printk(KERN_DEBUG "Slot %X, format block at 0x%p\n",
 		       slot, rp);
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index ce396ecdf412..b767a64e49d9 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -88,7 +88,7 @@ static void sysrq_handle_loglevel(int key)
 	int i;
 
 	i = key - '0';
-	console_loglevel = 7;
+	console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
 	printk("Loglevel set to %d\n", i);
 	console_loglevel = i;
 }
@@ -343,7 +343,7 @@ static void send_sig_all(int sig)
 static void sysrq_handle_term(int key)
 {
 	send_sig_all(SIGTERM);
-	console_loglevel = 8;
+	console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
 }
 static struct sysrq_key_op sysrq_term_op = {
 	.handler	= sysrq_handle_term,
@@ -387,7 +387,7 @@ static struct sysrq_key_op sysrq_thaw_op = {
 static void sysrq_handle_kill(int key)
 {
 	send_sig_all(SIGKILL);
-	console_loglevel = 8;
+	console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
 }
 static struct sysrq_key_op sysrq_kill_op = {
 	.handler	= sysrq_handle_kill,
@@ -520,7 +520,7 @@ void __handle_sysrq(int key, bool check_mask)
 	 * routing in the consumers of /proc/kmsg.
 	 */
 	orig_log_level = console_loglevel;
-	console_loglevel = 7;
+	console_loglevel = CONSOLE_LOGLEVEL_DEFAULT;
 	printk(KERN_INFO "SysRq : ");
 
         op_p = __sysrq_get_key_op(key);
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 37f3a6589c1c..319ff7e53efb 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -30,6 +30,17 @@ static inline const char *printk_skip_level(const char *buffer)
 	return buffer;
 }
 
+/* printk's without a loglevel use this.. */
+#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
+
+/* We show everything that is MORE important than this.. */
+#define CONSOLE_LOGLEVEL_SILENT  0 /* Mum's the word */
+#define CONSOLE_LOGLEVEL_MIN	 1 /* Minimum loglevel we let people use */
+#define CONSOLE_LOGLEVEL_QUIET	 4 /* Shhh ..., when booted with "quiet" */
+#define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */
+#define CONSOLE_LOGLEVEL_DEBUG	10 /* issue debug messages */
+#define CONSOLE_LOGLEVEL_MOTORMOUTH 15	/* You can't shut this one up */
+
 extern int console_printk[];
 
 #define console_loglevel (console_printk[0])
@@ -39,13 +50,13 @@ extern int console_printk[];
 
 static inline void console_silent(void)
 {
-	console_loglevel = 0;
+	console_loglevel = CONSOLE_LOGLEVEL_SILENT;
 }
 
 static inline void console_verbose(void)
 {
 	if (console_loglevel)
-		console_loglevel = 15;
+		console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
 }
 
 struct va_format {
diff --git a/init/main.c b/init/main.c
index e08c0b2065a1..04fab8d74c89 100644
--- a/init/main.c
+++ b/init/main.c
@@ -203,13 +203,13 @@ EXPORT_SYMBOL(loops_per_jiffy);
 
 static int __init debug_kernel(char *str)
 {
-	console_loglevel = 10;
+	console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
 	return 0;
 }
 
 static int __init quiet_kernel(char *str)
 {
-	console_loglevel = 4;
+	console_loglevel = CONSOLE_LOGLEVEL_QUIET;
 	return 0;
 }
 
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index b03e0e814e43..fe15fff5df53 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -21,7 +21,7 @@
 static void kdb_show_stack(struct task_struct *p, void *addr)
 {
 	int old_lvl = console_loglevel;
-	console_loglevel = 15;
+	console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
 	kdb_trap_printk++;
 	kdb_set_current_task(p);
 	if (addr) {
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 14ff4849262c..7c70812caea5 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -710,7 +710,7 @@ kdb_printit:
 	}
 	if (logging) {
 		saved_loglevel = console_loglevel;
-		console_loglevel = 0;
+		console_loglevel = CONSOLE_LOGLEVEL_SILENT;
 		printk(KERN_INFO "%s", kdb_buffer);
 	}
 
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 0b097c8a1e50..2f7c760305ca 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1091,7 +1091,7 @@ static int kdb_reboot(int argc, const char **argv)
 static void kdb_dumpregs(struct pt_regs *regs)
 {
 	int old_lvl = console_loglevel;
-	console_loglevel = 15;
+	console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
 	kdb_trap_printk++;
 	show_regs(regs);
 	kdb_trap_printk--;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 923c5d4e4202..ea2d5f6962ed 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -54,18 +54,11 @@
 #include "console_cmdline.h"
 #include "braille.h"
 
-/* printk's without a loglevel use this.. */
-#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
-
-/* We show everything that is MORE important than this.. */
-#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
-#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
-
 int console_printk[4] = {
-	DEFAULT_CONSOLE_LOGLEVEL,	/* console_loglevel */
+	CONSOLE_LOGLEVEL_DEFAULT,	/* console_loglevel */
 	DEFAULT_MESSAGE_LOGLEVEL,	/* default_message_loglevel */
-	MINIMUM_CONSOLE_LOGLEVEL,	/* minimum_console_loglevel */
-	DEFAULT_CONSOLE_LOGLEVEL,	/* default_console_loglevel */
+	CONSOLE_LOGLEVEL_MIN,		/* minimum_console_loglevel */
+	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
 
 /* Deferred messaged from sched code are marked by this special level */
-- 
cgit 


From 34a1b7236ad6113883f6c448d1da854cad60265e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 4 Jun 2014 16:12:19 -0700
Subject: kthreads: kill CLONE_KERNEL, change kernel_thread(kernel_init) to
 avoid CLONE_SIGHAND

1. Remove CLONE_KERNEL, it has no users and it is dangerous.

   The (old) comment says "List of flags we want to share for kernel
   threads" but this is not true, we do not want to share ->sighand by
   default. This flag can only be used if the caller is sure that both
   parent/child will never play with signals (say, allow_signal/etc).

2. Change rest_init() to clone kernel_init() without CLONE_SIGHAND.

   In this case CLONE_SIGHAND does not really hurt, and it looks like
   optimization because copy_sighand() can avoid kmem_cache_alloc().

   But in fact this only adds the minor pessimization. kernel_init()
   is going to exec the init process, and de_thread() will need to
   unshare ->sighand and do kmem_cache_alloc(sighand_cachep) anyway,
   but it needs to do more work and take tasklist_lock and siglock.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 6 ------
 init/main.c           | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 611676fd4c2c..8fcd0e6098d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -136,12 +136,6 @@ struct filename;
 #define VMACACHE_SIZE (1U << VMACACHE_BITS)
 #define VMACACHE_MASK (VMACACHE_SIZE - 1)
 
-/*
- * List of flags we want to share for kernel threads,
- * if only because they are not used by them anyway.
- */
-#define CLONE_KERNEL	(CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
-
 /*
  * These are the constant used to fake the fixed-point load-average
  * counting. Some notes:
diff --git a/init/main.c b/init/main.c
index 8ac3833f2bdf..4de815c0309a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -380,7 +380,7 @@ static noinline void __init_refok rest_init(void)
 	 * the init task will end up wanting to create kthreads, which, if
 	 * we schedule it before we create kthreadd, will OOPS.
 	 */
-	kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
+	kernel_thread(kernel_init, NULL, CLONE_FS);
 	numa_default_policy();
 	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
 	rcu_read_lock();
-- 
cgit 


From 647f010bff6795b3e85c2b5a7768c0594a049ab0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 4 Jun 2014 16:12:20 -0700
Subject: init/main.c: remove an ifdef

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/proc_fs.h | 4 ++++
 init/main.c             | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 608e60a74c3c..9d117f61d976 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -44,6 +44,10 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *);
 
 #else /* CONFIG_PROC_FS */
 
+static inline void proc_root_init(void)
+{
+}
+
 static inline void proc_flush_task(struct task_struct *task)
 {
 }
diff --git a/init/main.c b/init/main.c
index 4de815c0309a..17d47bcdf573 100644
--- a/init/main.c
+++ b/init/main.c
@@ -629,9 +629,7 @@ asmlinkage __visible void __init start_kernel(void)
 	signals_init();
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
-#ifdef CONFIG_PROC_FS
 	proc_root_init();
-#endif
 	cgroup_init();
 	cpuset_init();
 	taskstats_init_early();
-- 
cgit 


From 7e2b10c1e52ca37fb522be49f4be367f9311d0cd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:02 -0700
Subject: net: Support for multiple checksums with gso

When creating a GSO packet segment we may need to set more than
one checksum in the packet (for instance a TCP checksum and
UDP checksum for VXLAN encapsulation). To be efficient, we want
to do checksum calculation for any part of the packet at most once.

This patch adds csum_start offset to skb_gso_cb. This tracks the
starting offset for skb->csum which is initially set in skb_segment.
When a protocol needs to compute a transport checksum it calls
gso_make_checksum which computes the checksum value from the start
of transport header to csum_start and then adds in skb->csum to get
the full checksum. skb->csum and csum_start are then updated to reflect
the checksum of the resultant packet starting from the transport header.

This patch also adds a flag to skbuff, encap_hdr_csum, which is set
in *gso_segment fucntions to indicate that a tunnel protocol needs
checksum calculation

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 26 +++++++++++++++++++++++++-
 net/core/skbuff.c         |  8 +++++++-
 net/ipv4/ip_tunnel_core.c |  8 ++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7a9beeb1c458..d8d397acb52c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -567,7 +567,8 @@ struct sk_buff {
 	 * headers if needed
 	 */
 	__u8			encapsulation:1;
-	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
+	__u8			encap_hdr_csum:1;
+	/* 5/7 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -2988,6 +2989,7 @@ static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
 struct skb_gso_cb {
 	int	mac_offset;
 	int	encap_level;
+	__u16	csum_start;
 };
 #define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
 
@@ -3012,6 +3014,28 @@ static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
 	return 0;
 }
 
+/* Compute the checksum for a gso segment. First compute the checksum value
+ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+ * then add in skb->csum (checksum from csum_start to end of packet).
+ * skb->csum and csum_start are then updated to reflect the checksum of the
+ * resultant packet starting from the transport header-- the resultant checksum
+ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+ * header.
+ */
+static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+{
+	int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
+	    skb_transport_offset(skb);
+	__u16 csum;
+
+	csum = csum_fold(csum_partial(skb_transport_header(skb),
+				      plen, skb->csum));
+	skb->csum = res;
+	SKB_GSO_CB(skb)->csum_start -= plen;
+
+	return csum;
+}
+
 static inline bool skb_is_gso(const struct sk_buff *skb)
 {
 	return skb_shinfo(skb)->gso_size;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3f6c7e8be8a4..05f4bef2ce12 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2885,7 +2885,9 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 	if (unlikely(!proto))
 		return ERR_PTR(-EINVAL);
 
-	csum = !!can_checksum_protocol(features, proto);
+	csum = !head_skb->encap_hdr_csum &&
+	    !!can_checksum_protocol(features, proto);
+
 	__skb_push(head_skb, doffset);
 	headroom = skb_headroom(head_skb);
 	pos = skb_headlen(head_skb);
@@ -2983,6 +2985,8 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
 							    skb_put(nskb, len),
 							    len, 0);
+			SKB_GSO_CB(nskb)->csum_start =
+			    skb_headroom(nskb) + offset;
 			continue;
 		}
 
@@ -3052,6 +3056,8 @@ perform_csum_check:
 			nskb->csum = skb_checksum(nskb, doffset,
 						  nskb->len - doffset, 0);
 			nskb->ip_summed = CHECKSUM_NONE;
+			SKB_GSO_CB(nskb)->csum_start =
+			    skb_headroom(nskb) + doffset;
 		}
 	} while ((offset += len) < head_skb->len);
 
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 847e69cbff7e..f4c987bb7e94 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -135,6 +135,14 @@ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb,
 		return skb;
 	}
 
+	/* If packet is not gso and we are resolving any partial checksum,
+	 * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+	 * on the outer header without confusing devices that implement
+	 * NETIF_F_IP_CSUM with encapsulation.
+	 */
+	if (csum_help)
+		skb->encapsulation = 0;
+
 	if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
 		err = skb_checksum_help(skb);
 		if (unlikely(err))
-- 
cgit 


From 0f4f4ffa7b7c3d29d0537a126145c9f8d8ed5dbc Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:16 -0700
Subject: net: Add GSO support for UDP tunnels with checksum

Added a new netif feature for GSO_UDP_TUNNEL_CSUM. This indicates
that a device is capable of computing the UDP checksum in the
encapsulating header of a UDP tunnel.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  1 +
 include/linux/skbuff.h          |  2 ++
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp.c                  | 40 +++++++++++++++++++---------------------
 net/ipv4/udp_offload.c          |  4 +++-
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  4 +++-
 8 files changed, 31 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index c26d0ec2ef3a..f1338e0f9866 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -45,6 +45,7 @@ enum {
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
+	NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
 		NETIF_F_GSO_MPLS_BIT,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8d397acb52c..5a6d10a538f5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -345,6 +345,8 @@ enum {
 	SKB_GSO_UDP_TUNNEL = 1 << 9,
 
 	SKB_GSO_MPLS = 1 << 10,
+
+	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0e9bb08a91e4..0070ab87109b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1258,6 +1258,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_MPLS |
 		       0)))
 		goto out;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index d8de7b9e0720..c02f2d2e7bab 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -61,6 +61,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
+			       SKB_GSO_UDP_TUNNEL_CSUM |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index a84f6762ea9e..8d8c33d84c9a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2528,7 +2528,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
 	__be16 protocol = skb->protocol;
 	netdev_features_t enc_features;
-	int outer_hlen;
+	int udp_offset, outer_hlen;
+	unsigned int oldlen;
+	bool need_csum;
+
+	oldlen = (u16)~skb->len;
 
 	if (unlikely(!pskb_may_pull(skb, tnl_hlen)))
 		goto out;
@@ -2540,6 +2544,10 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	skb->mac_len = skb_inner_network_offset(skb);
 	skb->protocol = htons(ETH_P_TEB);
 
+	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
+	if (need_csum)
+		skb->encap_hdr_csum = 1;
+
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
 	segs = skb_mac_gso_segment(skb, enc_features);
@@ -2550,10 +2558,11 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	}
 
 	outer_hlen = skb_tnl_header_len(skb);
+	udp_offset = outer_hlen - tnl_hlen;
 	skb = segs;
 	do {
 		struct udphdr *uh;
-		int udp_offset = outer_hlen - tnl_hlen;
+		int len;
 
 		skb_reset_inner_headers(skb);
 		skb->encapsulation = 1;
@@ -2564,31 +2573,20 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 		skb_reset_mac_header(skb);
 		skb_set_network_header(skb, mac_len);
 		skb_set_transport_header(skb, udp_offset);
+		len = skb->len - udp_offset;
 		uh = udp_hdr(skb);
-		uh->len = htons(skb->len - udp_offset);
-
-		/* csum segment if tunnel sets skb with csum. */
-		if (protocol == htons(ETH_P_IP) && unlikely(uh->check)) {
-			struct iphdr *iph = ip_hdr(skb);
+		uh->len = htons(len);
 
-			uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
-						       skb->len - udp_offset,
-						       IPPROTO_UDP, 0);
-			uh->check = csum_fold(skb_checksum(skb, udp_offset,
-							   skb->len - udp_offset, 0));
-			if (uh->check == 0)
-				uh->check = CSUM_MANGLED_0;
+		if (need_csum) {
+			__be32 delta = htonl(oldlen + len);
 
-		} else if (protocol == htons(ETH_P_IPV6)) {
-			struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-			u32 len = skb->len - udp_offset;
+			uh->check = ~csum_fold((__force __wsum)
+					       ((__force u32)uh->check +
+						(__force u32)delta));
+			uh->check = gso_make_checksum(skb, ~uh->check);
 
-			uh->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
-						     len, IPPROTO_UDP, 0);
-			uh->check = csum_fold(skb_checksum(skb, udp_offset, len, 0));
 			if (uh->check == 0)
 				uh->check = CSUM_MANGLED_0;
-			skb->ip_summed = CHECKSUM_NONE;
 		}
 
 		skb->protocol = protocol;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 88b4023ecfcf..5c23f4765af9 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -56,7 +56,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	__wsum csum;
 
 	if (skb->encapsulation &&
-	    skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) {
+	    (skb_shinfo(skb)->gso_type &
+	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
 		segs = skb_udp_tunnel_segment(skb, features);
 		goto out;
 	}
@@ -71,6 +72,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 
 		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b2f091566f88..d54c5744e3db 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -100,6 +100,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
+		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
 		       0)))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b261ee8b83fc..79da8b305ced 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -63,6 +63,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		if (unlikely(type & ~(SKB_GSO_UDP |
 				      SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
+				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_GRE |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_SIT |
@@ -76,7 +77,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 		goto out;
 	}
 
-	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL)
+	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
+	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
 		segs = skb_udp_tunnel_segment(skb, features);
 	else {
 		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
-- 
cgit 


From 4749c09c37030ccdc44aecebe0f71b02a377fc14 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 4 Jun 2014 17:20:23 -0700
Subject: gre: Call gso_make_checksum

Call gso_make_checksum. This should have the benefit of using a
checksum that may have been previously computed for the packet.

This also adds NETIF_F_GSO_GRE_CSUM to differentiate devices that
offload GRE GSO with and without the GRE checksum offloaed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  2 ++
 include/linux/skbuff.h          |  2 ++
 include/net/gre.h               |  5 +++--
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/gre_demux.c            |  3 ++-
 net/ipv4/gre_offload.c          | 10 ++++++++--
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp_offload.c          |  3 ++-
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  1 +
 net/mpls/mpls_gso.c             |  1 +
 11 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index f1338e0f9866..e5a589435e2b 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -42,6 +42,7 @@ enum {
 	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
 	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
 	NETIF_F_GSO_GRE_BIT,		/* ... GRE with TSO */
+	NETIF_F_GSO_GRE_CSUM_BIT,	/* ... GRE with csum with TSO */
 	NETIF_F_GSO_IPIP_BIT,		/* ... IPIP tunnel with TSO */
 	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
@@ -112,6 +113,7 @@ enum {
 #define NETIF_F_RXFCS		__NETIF_F(RXFCS)
 #define NETIF_F_RXALL		__NETIF_F(RXALL)
 #define NETIF_F_GSO_GRE		__NETIF_F(GSO_GRE)
+#define NETIF_F_GSO_GRE_CSUM	__NETIF_F(GSO_GRE_CSUM)
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5a6d10a538f5..c705808bef9c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -347,6 +347,8 @@ enum {
 	SKB_GSO_MPLS = 1 << 10,
 
 	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
+
+	SKB_GSO_GRE_CSUM = 1 << 12,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/include/net/gre.h b/include/net/gre.h
index 70046a0b0b89..b53182018743 100644
--- a/include/net/gre.h
+++ b/include/net/gre.h
@@ -37,9 +37,10 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 		      int hdr_len);
 
 static inline struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
-						  bool gre_csum)
+						  bool csum)
 {
-	return iptunnel_handle_offloads(skb, gre_csum, SKB_GSO_GRE);
+	return iptunnel_handle_offloads(skb, csum,
+					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 0070ab87109b..d5e6836cf772 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1254,6 +1254,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_TCPV6 |
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index fbfd829f4049..4e9619bca732 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -84,7 +84,8 @@ void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
 			ptr--;
 		}
 		if (tpi->flags&TUNNEL_CSUM &&
-		    !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) {
+		    !(skb_shinfo(skb)->gso_type &
+		      (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
 			*ptr = 0;
 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
 								 skb->len, 0));
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d32280cb54..24deb3928b9e 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_GRE_CSUM |
 				  SKB_GSO_IPIP)))
 		goto out;
 
@@ -55,6 +56,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 		goto out;
 
 	csum = !!(greh->flags & GRE_CSUM);
+	if (csum)
+		skb->encap_hdr_csum = 1;
 
 	if (unlikely(!pskb_may_pull(skb, ghl)))
 		goto out;
@@ -94,10 +97,13 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				}
 			}
 
-			greh = (struct gre_base_hdr *)(skb->data);
+			skb_reset_transport_header(skb);
+
+			greh = (struct gre_base_hdr *)
+			    skb_transport_header(skb);
 			pcsum = (__be32 *)(greh + 1);
 			*pcsum = 0;
-			*(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0));
+			*(__sum16 *)pcsum = gso_make_checksum(skb, 0);
 		}
 		__skb_push(skb, tnl_hlen - ghl);
 
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index c02f2d2e7bab..4e86c59ec7f7 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -57,6 +57,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_TCP_ECN |
 			       SKB_GSO_TCPV6 |
 			       SKB_GSO_GRE |
+			       SKB_GSO_GRE_CSUM |
 			       SKB_GSO_IPIP |
 			       SKB_GSO_SIT |
 			       SKB_GSO_MPLS |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 5c23f4765af9..7b1840110173 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -74,7 +74,8 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_IPIP |
-				      SKB_GSO_GRE | SKB_GSO_MPLS) ||
+				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
+				      SKB_GSO_MPLS) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d54c5744e3db..65eda2a8af48 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -97,6 +97,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_DODGY |
 		       SKB_GSO_TCP_ECN |
 		       SKB_GSO_GRE |
+		       SKB_GSO_GRE_CSUM |
 		       SKB_GSO_IPIP |
 		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 79da8b305ced..0ae3d98f83e0 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -65,6 +65,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_GRE |
+				      SKB_GSO_GRE_CSUM |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_SIT |
 				      SKB_GSO_MPLS) ||
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 851cd880b0c0..6b38d083e1c9 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -33,6 +33,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_DODGY |
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
+				  SKB_GSO_GRE_CSUM |
 				  SKB_GSO_IPIP |
 				  SKB_GSO_MPLS)))
 		goto out;
-- 
cgit 


From ca05e3a7551b71891d42d637d3a1e443c6bbd781 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 1 Jun 2014 23:47:24 +0200
Subject: isdn/capi: move capi_info2str to capidrv.c

capi_info2str() is apparently meant to be of general utility. It is
actually only used in capidrv.c. So move it from capiutil.c to
capidrv.c and (obviously) stop exporting it.

And, since we're touching this, merge the two versions of this
function.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/capidrv.c   | 195 ++++++++++++++++++++++++++++++++++++++++
 drivers/isdn/capi/capiutil.c  | 200 ------------------------------------------
 include/linux/isdn/capiutil.h |   5 --
 3 files changed, 195 insertions(+), 205 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index cc9f1927a322..70e67f6a22ff 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -763,6 +763,201 @@ static inline int new_bchan(capidrv_contr *card)
 }
 
 /* ------------------------------------------------------------------- */
+static char *capi_info2str(u16 reason)
+{
+#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON
+	return "..";
+#else
+	switch (reason) {
+
+/*-- informative values (corresponding message was processed) -----*/
+	case 0x0001:
+		return "NCPI not supported by current protocol, NCPI ignored";
+	case 0x0002:
+		return "Flags not supported by current protocol, flags ignored";
+	case 0x0003:
+		return "Alert already sent by another application";
+
+/*-- error information concerning CAPI_REGISTER -----*/
+	case 0x1001:
+		return "Too many applications";
+	case 0x1002:
+		return "Logical block size too small, must be at least 128 Bytes";
+	case 0x1003:
+		return "Buffer exceeds 64 kByte";
+	case 0x1004:
+		return "Message buffer size too small, must be at least 1024 Bytes";
+	case 0x1005:
+		return "Max. number of logical connections not supported";
+	case 0x1006:
+		return "Reserved";
+	case 0x1007:
+		return "The message could not be accepted because of an internal busy condition";
+	case 0x1008:
+		return "OS resource error (no memory ?)";
+	case 0x1009:
+		return "CAPI not installed";
+	case 0x100A:
+		return "Controller does not support external equipment";
+	case 0x100B:
+		return "Controller does only support external equipment";
+
+/*-- error information concerning message exchange functions -----*/
+	case 0x1101:
+		return "Illegal application number";
+	case 0x1102:
+		return "Illegal command or subcommand or message length less than 12 bytes";
+	case 0x1103:
+		return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI";
+	case 0x1104:
+		return "Queue is empty";
+	case 0x1105:
+		return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE";
+	case 0x1106:
+		return "Unknown notification parameter";
+	case 0x1107:
+		return "The Message could not be accepted because of an internal busy condition";
+	case 0x1108:
+		return "OS Resource error (no memory ?)";
+	case 0x1109:
+		return "CAPI not installed";
+	case 0x110A:
+		return "Controller does not support external equipment";
+	case 0x110B:
+		return "Controller does only support external equipment";
+
+/*-- error information concerning resource / coding problems -----*/
+	case 0x2001:
+		return "Message not supported in current state";
+	case 0x2002:
+		return "Illegal Controller / PLCI / NCCI";
+	case 0x2003:
+		return "Out of PLCI";
+	case 0x2004:
+		return "Out of NCCI";
+	case 0x2005:
+		return "Out of LISTEN";
+	case 0x2006:
+		return "Out of FAX resources (protocol T.30)";
+	case 0x2007:
+		return "Illegal message parameter coding";
+
+/*-- error information concerning requested services  -----*/
+	case 0x3001:
+		return "B1 protocol not supported";
+	case 0x3002:
+		return "B2 protocol not supported";
+	case 0x3003:
+		return "B3 protocol not supported";
+	case 0x3004:
+		return "B1 protocol parameter not supported";
+	case 0x3005:
+		return "B2 protocol parameter not supported";
+	case 0x3006:
+		return "B3 protocol parameter not supported";
+	case 0x3007:
+		return "B protocol combination not supported";
+	case 0x3008:
+		return "NCPI not supported";
+	case 0x3009:
+		return "CIP Value unknown";
+	case 0x300A:
+		return "Flags not supported (reserved bits)";
+	case 0x300B:
+		return "Facility not supported";
+	case 0x300C:
+		return "Data length not supported by current protocol";
+	case 0x300D:
+		return "Reset procedure not supported by current protocol";
+
+/*-- informations about the clearing of a physical connection -----*/
+	case 0x3301:
+		return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)";
+	case 0x3302:
+		return "Protocol error layer 2";
+	case 0x3303:
+		return "Protocol error layer 3";
+	case 0x3304:
+		return "Another application got that call";
+/*-- T.30 specific reasons -----*/
+	case 0x3311:
+		return "Connecting not successful (remote station is no FAX G3 machine)";
+	case 0x3312:
+		return "Connecting not successful (training error)";
+	case 0x3313:
+		return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)";
+	case 0x3314:
+		return "Disconnected during transfer (remote abort)";
+	case 0x3315:
+		return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)";
+	case 0x3316:
+		return "Disconnected during transfer (local tx data underrun)";
+	case 0x3317:
+		return "Disconnected during transfer (local rx data overflow)";
+	case 0x3318:
+		return "Disconnected during transfer (local abort)";
+	case 0x3319:
+		return "Illegal parameter coding (e.g. SFF coding error)";
+
+/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/
+	case 0x3481: return "Unallocated (unassigned) number";
+	case 0x3482: return "No route to specified transit network";
+	case 0x3483: return "No route to destination";
+	case 0x3486: return "Channel unacceptable";
+	case 0x3487:
+		return "Call awarded and being delivered in an established channel";
+	case 0x3490: return "Normal call clearing";
+	case 0x3491: return "User busy";
+	case 0x3492: return "No user responding";
+	case 0x3493: return "No answer from user (user alerted)";
+	case 0x3495: return "Call rejected";
+	case 0x3496: return "Number changed";
+	case 0x349A: return "Non-selected user clearing";
+	case 0x349B: return "Destination out of order";
+	case 0x349C: return "Invalid number format";
+	case 0x349D: return "Facility rejected";
+	case 0x349E: return "Response to STATUS ENQUIRY";
+	case 0x349F: return "Normal, unspecified";
+	case 0x34A2: return "No circuit / channel available";
+	case 0x34A6: return "Network out of order";
+	case 0x34A9: return "Temporary failure";
+	case 0x34AA: return "Switching equipment congestion";
+	case 0x34AB: return "Access information discarded";
+	case 0x34AC: return "Requested circuit / channel not available";
+	case 0x34AF: return "Resources unavailable, unspecified";
+	case 0x34B1: return "Quality of service unavailable";
+	case 0x34B2: return "Requested facility not subscribed";
+	case 0x34B9: return "Bearer capability not authorized";
+	case 0x34BA: return "Bearer capability not presently available";
+	case 0x34BF: return "Service or option not available, unspecified";
+	case 0x34C1: return "Bearer capability not implemented";
+	case 0x34C2: return "Channel type not implemented";
+	case 0x34C5: return "Requested facility not implemented";
+	case 0x34C6: return "Only restricted digital information bearer capability is available";
+	case 0x34CF: return "Service or option not implemented, unspecified";
+	case 0x34D1: return "Invalid call reference value";
+	case 0x34D2: return "Identified channel does not exist";
+	case 0x34D3: return "A suspended call exists, but this call identity does not";
+	case 0x34D4: return "Call identity in use";
+	case 0x34D5: return "No call suspended";
+	case 0x34D6: return "Call having the requested call identity has been cleared";
+	case 0x34D8: return "Incompatible destination";
+	case 0x34DB: return "Invalid transit network selection";
+	case 0x34DF: return "Invalid message, unspecified";
+	case 0x34E0: return "Mandatory information element is missing";
+	case 0x34E1: return "Message type non-existent or not implemented";
+	case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented";
+	case 0x34E3: return "Information element non-existent or not implemented";
+	case 0x34E4: return "Invalid information element contents";
+	case 0x34E5: return "Message not compatible with call state";
+	case 0x34E6: return "Recovery on timer expiry";
+	case 0x34EF: return "Protocol error, unspecified";
+	case 0x34FF: return "Interworking, unspecified";
+
+	default: return "No additional information";
+	}
+#endif
+}
 
 static void handle_controller(_cmsg *cmsg)
 {
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index d26f17033b68..6e797e502cfa 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -22,205 +22,6 @@
 
 /* from CAPI2.0 DDK AVM Berlin GmbH */
 
-#ifndef CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON
-char *capi_info2str(u16 reason)
-{
-	return "..";
-}
-#else
-char *capi_info2str(u16 reason)
-{
-	switch (reason) {
-
-/*-- informative values (corresponding message was processed) -----*/
-	case 0x0001:
-		return "NCPI not supported by current protocol, NCPI ignored";
-	case 0x0002:
-		return "Flags not supported by current protocol, flags ignored";
-	case 0x0003:
-		return "Alert already sent by another application";
-
-/*-- error information concerning CAPI_REGISTER -----*/
-	case 0x1001:
-		return "Too many applications";
-	case 0x1002:
-		return "Logical block size too small, must be at least 128 Bytes";
-	case 0x1003:
-		return "Buffer exceeds 64 kByte";
-	case 0x1004:
-		return "Message buffer size too small, must be at least 1024 Bytes";
-	case 0x1005:
-		return "Max. number of logical connections not supported";
-	case 0x1006:
-		return "Reserved";
-	case 0x1007:
-		return "The message could not be accepted because of an internal busy condition";
-	case 0x1008:
-		return "OS resource error (no memory ?)";
-	case 0x1009:
-		return "CAPI not installed";
-	case 0x100A:
-		return "Controller does not support external equipment";
-	case 0x100B:
-		return "Controller does only support external equipment";
-
-/*-- error information concerning message exchange functions -----*/
-	case 0x1101:
-		return "Illegal application number";
-	case 0x1102:
-		return "Illegal command or subcommand or message length less than 12 bytes";
-	case 0x1103:
-		return "The message could not be accepted because of a queue full condition !! The error code does not imply that CAPI cannot receive messages directed to another controller, PLCI or NCCI";
-	case 0x1104:
-		return "Queue is empty";
-	case 0x1105:
-		return "Queue overflow, a message was lost !! This indicates a configuration error. The only recovery from this error is to perform a CAPI_RELEASE";
-	case 0x1106:
-		return "Unknown notification parameter";
-	case 0x1107:
-		return "The Message could not be accepted because of an internal busy condition";
-	case 0x1108:
-		return "OS Resource error (no memory ?)";
-	case 0x1109:
-		return "CAPI not installed";
-	case 0x110A:
-		return "Controller does not support external equipment";
-	case 0x110B:
-		return "Controller does only support external equipment";
-
-/*-- error information concerning resource / coding problems -----*/
-	case 0x2001:
-		return "Message not supported in current state";
-	case 0x2002:
-		return "Illegal Controller / PLCI / NCCI";
-	case 0x2003:
-		return "Out of PLCI";
-	case 0x2004:
-		return "Out of NCCI";
-	case 0x2005:
-		return "Out of LISTEN";
-	case 0x2006:
-		return "Out of FAX resources (protocol T.30)";
-	case 0x2007:
-		return "Illegal message parameter coding";
-
-/*-- error information concerning requested services  -----*/
-	case 0x3001:
-		return "B1 protocol not supported";
-	case 0x3002:
-		return "B2 protocol not supported";
-	case 0x3003:
-		return "B3 protocol not supported";
-	case 0x3004:
-		return "B1 protocol parameter not supported";
-	case 0x3005:
-		return "B2 protocol parameter not supported";
-	case 0x3006:
-		return "B3 protocol parameter not supported";
-	case 0x3007:
-		return "B protocol combination not supported";
-	case 0x3008:
-		return "NCPI not supported";
-	case 0x3009:
-		return "CIP Value unknown";
-	case 0x300A:
-		return "Flags not supported (reserved bits)";
-	case 0x300B:
-		return "Facility not supported";
-	case 0x300C:
-		return "Data length not supported by current protocol";
-	case 0x300D:
-		return "Reset procedure not supported by current protocol";
-
-/*-- informations about the clearing of a physical connection -----*/
-	case 0x3301:
-		return "Protocol error layer 1 (broken line or B-channel removed by signalling protocol)";
-	case 0x3302:
-		return "Protocol error layer 2";
-	case 0x3303:
-		return "Protocol error layer 3";
-	case 0x3304:
-		return "Another application got that call";
-/*-- T.30 specific reasons -----*/
-	case 0x3311:
-		return "Connecting not successful (remote station is no FAX G3 machine)";
-	case 0x3312:
-		return "Connecting not successful (training error)";
-	case 0x3313:
-		return "Disconnected before transfer (remote station does not support transfer mode, e.g. resolution)";
-	case 0x3314:
-		return "Disconnected during transfer (remote abort)";
-	case 0x3315:
-		return "Disconnected during transfer (remote procedure error, e.g. unsuccessful repetition of T.30 commands)";
-	case 0x3316:
-		return "Disconnected during transfer (local tx data underrun)";
-	case 0x3317:
-		return "Disconnected during transfer (local rx data overflow)";
-	case 0x3318:
-		return "Disconnected during transfer (local abort)";
-	case 0x3319:
-		return "Illegal parameter coding (e.g. SFF coding error)";
-
-/*-- disconnect causes from the network according to ETS 300 102-1/Q.931 -----*/
-	case 0x3481: return "Unallocated (unassigned) number";
-	case 0x3482: return "No route to specified transit network";
-	case 0x3483: return "No route to destination";
-	case 0x3486: return "Channel unacceptable";
-	case 0x3487:
-		return "Call awarded and being delivered in an established channel";
-	case 0x3490: return "Normal call clearing";
-	case 0x3491: return "User busy";
-	case 0x3492: return "No user responding";
-	case 0x3493: return "No answer from user (user alerted)";
-	case 0x3495: return "Call rejected";
-	case 0x3496: return "Number changed";
-	case 0x349A: return "Non-selected user clearing";
-	case 0x349B: return "Destination out of order";
-	case 0x349C: return "Invalid number format";
-	case 0x349D: return "Facility rejected";
-	case 0x349E: return "Response to STATUS ENQUIRY";
-	case 0x349F: return "Normal, unspecified";
-	case 0x34A2: return "No circuit / channel available";
-	case 0x34A6: return "Network out of order";
-	case 0x34A9: return "Temporary failure";
-	case 0x34AA: return "Switching equipment congestion";
-	case 0x34AB: return "Access information discarded";
-	case 0x34AC: return "Requested circuit / channel not available";
-	case 0x34AF: return "Resources unavailable, unspecified";
-	case 0x34B1: return "Quality of service unavailable";
-	case 0x34B2: return "Requested facility not subscribed";
-	case 0x34B9: return "Bearer capability not authorized";
-	case 0x34BA: return "Bearer capability not presently available";
-	case 0x34BF: return "Service or option not available, unspecified";
-	case 0x34C1: return "Bearer capability not implemented";
-	case 0x34C2: return "Channel type not implemented";
-	case 0x34C5: return "Requested facility not implemented";
-	case 0x34C6: return "Only restricted digital information bearer capability is available";
-	case 0x34CF: return "Service or option not implemented, unspecified";
-	case 0x34D1: return "Invalid call reference value";
-	case 0x34D2: return "Identified channel does not exist";
-	case 0x34D3: return "A suspended call exists, but this call identity does not";
-	case 0x34D4: return "Call identity in use";
-	case 0x34D5: return "No call suspended";
-	case 0x34D6: return "Call having the requested call identity has been cleared";
-	case 0x34D8: return "Incompatible destination";
-	case 0x34DB: return "Invalid transit network selection";
-	case 0x34DF: return "Invalid message, unspecified";
-	case 0x34E0: return "Mandatory information element is missing";
-	case 0x34E1: return "Message type non-existent or not implemented";
-	case 0x34E2: return "Message not compatible with call state or message type non-existent or not implemented";
-	case 0x34E3: return "Information element non-existent or not implemented";
-	case 0x34E4: return "Invalid information element contents";
-	case 0x34E5: return "Message not compatible with call state";
-	case 0x34E6: return "Recovery on timer expiry";
-	case 0x34EF: return "Protocol error, unspecified";
-	case 0x34FF: return "Interworking, unspecified";
-
-	default: return "No additional information";
-	}
-}
-#endif
-
 typedef struct {
 	int typ;
 	size_t off;
@@ -1073,4 +874,3 @@ EXPORT_SYMBOL(capi_cmsg_header);
 EXPORT_SYMBOL(capi_cmd2str);
 EXPORT_SYMBOL(capi_cmsg2str);
 EXPORT_SYMBOL(capi_message2str);
-EXPORT_SYMBOL(capi_info2str);
diff --git a/include/linux/isdn/capiutil.h b/include/linux/isdn/capiutil.h
index 5a52f2c94f3f..44bd6046e6e2 100644
--- a/include/linux/isdn/capiutil.h
+++ b/include/linux/isdn/capiutil.h
@@ -164,11 +164,6 @@ unsigned capi_cmsg_header(_cmsg * cmsg, __u16 _ApplId,
 			  __u8 _Command, __u8 _Subcommand,
 			  __u16 _Messagenumber, __u32 _Controller);
 
-/*
- * capi_info2str generated a readable string for Capi2.0 reasons.
- */
-char *capi_info2str(__u16 reason);
-
 /*-----------------------------------------------------------------------*/
 
 /*
-- 
cgit 


From 4fc828e24cd9c385d3a44e1b499ec7fc70239d8a Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Fri, 2 May 2014 11:24:15 -0700
Subject: locking/rwsem: Support optimistic spinning

We have reached the point where our mutexes are quite fine tuned
for a number of situations. This includes the use of heuristics
and optimistic spinning, based on MCS locking techniques.

Exclusive ownership of read-write semaphores are, conceptually,
just about the same as mutexes, making them close cousins. To
this end we need to make them both perform similarly, and
right now, rwsems are simply not up to it. This was discovered
by both reverting commit 4fc3f1d6 (mm/rmap, migration: Make
rmap_walk_anon() and try_to_unmap_anon() more scalable) and
similarly, converting some other mutexes (ie: i_mmap_mutex) to
rwsems. This creates a situation where users have to choose
between a rwsem and mutex taking into account this important
performance difference. Specifically, biggest difference between
both locks is when we fail to acquire a mutex in the fastpath,
optimistic spinning comes in to play and we can avoid a large
amount of unnecessary sleeping and overhead of moving tasks in
and out of wait queue. Rwsems do not have such logic.

This patch, based on the work from Tim Chen and I, adds support
for write-side optimistic spinning when the lock is contended.
It also includes support for the recently added cancelable MCS
locking for adaptive spinning. Note that is is only applicable
to the xadd method, and the spinlock rwsem variant remains intact.

Allowing optimistic spinning before putting the writer on the wait
queue reduces wait queue contention and provided greater chance
for the rwsem to get acquired. With these changes, rwsem is on par
with mutex. The performance benefits can be seen on a number of
workloads. For instance, on a 8 socket, 80 core 64bit Westmere box,
aim7 shows the following improvements in throughput:

 +--------------+---------------------+-----------------+
 |   Workload   | throughput-increase | number of users |
 +--------------+---------------------+-----------------+
 | alltests     | 20%                 | >1000           |
 | custom       | 27%, 60%            | 10-100, >1000   |
 | high_systime | 36%, 30%            | >100, >1000     |
 | shared       | 58%, 29%            | 10-100, >1000   |
 +--------------+---------------------+-----------------+

There was also improvement on smaller systems, such as a quad-core
x86-64 laptop running a 30Gb PostgreSQL (pgbench) workload for up
to +60% in throughput for over 50 clients. Additionally, benefits
were also noticed in exim (mail server) workloads. Furthermore, no
performance regression have been seen at all.

Based-on-work-from: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
[peterz: rej fixup due to comment patches, sched/rt.h header]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: "Paul E.McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Aswin Chandramouleeswaran <aswin@hp.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "Scott J Norton" <scott.norton@hp.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1399055055.6275.15.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h       |  25 ++++-
 kernel/locking/rwsem-xadd.c | 225 ++++++++++++++++++++++++++++++++++++++------
 kernel/locking/rwsem.c      |  31 +++++-
 3 files changed, 248 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 03f3b05e8ec1..3e108f154cb6 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,7 @@
 
 #include <linux/atomic.h>
 
+struct optimistic_spin_queue;
 struct rw_semaphore;
 
 #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -23,9 +24,17 @@ struct rw_semaphore;
 #else
 /* All arch specific implementations share the same struct */
 struct rw_semaphore {
-	long			count;
-	raw_spinlock_t		wait_lock;
-	struct list_head	wait_list;
+	long count;
+	raw_spinlock_t wait_lock;
+	struct list_head wait_list;
+#ifdef CONFIG_SMP
+	/*
+	 * Write owner. Used as a speculative check to see
+	 * if the owner is running on the cpu.
+	 */
+	struct task_struct *owner;
+	struct optimistic_spin_queue *osq; /* spinner MCS lock */
+#endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map	dep_map;
 #endif
@@ -55,11 +64,21 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
+#ifdef CONFIG_SMP
+#define __RWSEM_INITIALIZER(name)			\
+	{ RWSEM_UNLOCKED_VALUE,				\
+	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
+	  LIST_HEAD_INIT((name).wait_list),		\
+	  NULL, /* owner */				\
+	  NULL /* mcs lock */                           \
+	  __RWSEM_DEP_MAP_INIT(name) }
+#else
 #define __RWSEM_INITIALIZER(name)			\
 	{ RWSEM_UNLOCKED_VALUE,				\
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
 	  LIST_HEAD_INIT((name).wait_list)		\
 	  __RWSEM_DEP_MAP_INIT(name) }
+#endif
 
 #define DECLARE_RWSEM(name) \
 	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff87b8c..4a75278142cd 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -5,11 +5,17 @@
  *
  * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
  * and Michel Lespinasse <walken@google.com>
+ *
+ * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
+ * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
  */
 #include <linux/rwsem.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/export.h>
+#include <linux/sched/rt.h>
+
+#include "mcs_spinlock.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	raw_spin_lock_init(&sem->wait_lock);
 	INIT_LIST_HEAD(&sem->wait_list);
+#ifdef CONFIG_SMP
+	sem->owner = NULL;
+	sem->osq = NULL;
+#endif
 }
 
 EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
 }
 
 /*
- * wait for the read lock to be granted
+ * Wait for the read lock to be granted
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
 	return sem;
 }
 
+static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
+{
+	if (!(count & RWSEM_ACTIVE_MASK)) {
+		/* try acquiring the write lock */
+		if (sem->count == RWSEM_WAITING_BIAS &&
+		    cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+			    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
+			if (!list_is_singular(&sem->wait_list))
+				rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+			return true;
+		}
+	}
+	return false;
+}
+
+#ifdef CONFIG_SMP
 /*
- * wait until we successfully acquire the write lock
+ * Try to acquire write lock before the writer has been put on wait queue.
+ */
+static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
+{
+	long old, count = ACCESS_ONCE(sem->count);
+
+	while (true) {
+		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
+			return false;
+
+		old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+		if (old == count)
+			return true;
+
+		count = old;
+	}
+}
+
+static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool on_cpu = true;
+
+	if (need_resched())
+		return 0;
+
+	rcu_read_lock();
+	owner = ACCESS_ONCE(sem->owner);
+	if (owner)
+		on_cpu = owner->on_cpu;
+	rcu_read_unlock();
+
+	/*
+	 * If sem->owner is not set, the rwsem owner may have
+	 * just acquired it and not set the owner yet or the rwsem
+	 * has been released.
+	 */
+	return on_cpu;
+}
+
+static inline bool owner_running(struct rw_semaphore *sem,
+				 struct task_struct *owner)
+{
+	if (sem->owner != owner)
+		return false;
+
+	/*
+	 * Ensure we emit the owner->on_cpu, dereference _after_ checking
+	 * sem->owner still matches owner, if that fails, owner might
+	 * point to free()d memory, if it still matches, the rcu_read_lock()
+	 * ensures the memory stays valid.
+	 */
+	barrier();
+
+	return owner->on_cpu;
+}
+
+static noinline
+bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
+{
+	rcu_read_lock();
+	while (owner_running(sem, owner)) {
+		if (need_resched())
+			break;
+
+		arch_mutex_cpu_relax();
+	}
+	rcu_read_unlock();
+
+	/*
+	 * We break out the loop above on need_resched() or when the
+	 * owner changed, which is a sign for heavy contention. Return
+	 * success only when sem->owner is NULL.
+	 */
+	return sem->owner == NULL;
+}
+
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	struct task_struct *owner;
+	bool taken = false;
+
+	preempt_disable();
+
+	/* sem->wait_lock should not be held when doing optimistic spinning */
+	if (!rwsem_can_spin_on_owner(sem))
+		goto done;
+
+	if (!osq_lock(&sem->osq))
+		goto done;
+
+	while (true) {
+		owner = ACCESS_ONCE(sem->owner);
+		if (owner && !rwsem_spin_on_owner(sem, owner))
+			break;
+
+		/* wait_lock will be acquired if write_lock is obtained */
+		if (rwsem_try_write_lock_unqueued(sem)) {
+			taken = true;
+			break;
+		}
+
+		/*
+		 * When there's no owner, we might have preempted between the
+		 * owner acquiring the lock and setting the owner field. If
+		 * we're an RT task that will live-lock because we won't let
+		 * the owner complete.
+		 */
+		if (!owner && (need_resched() || rt_task(current)))
+			break;
+
+		/*
+		 * The cpu_relax() call is a compiler barrier which forces
+		 * everything in this loop to be re-loaded. We don't need
+		 * memory barriers as we'll eventually observe the right
+		 * values at the cost of a few extra spins.
+		 */
+		arch_mutex_cpu_relax();
+	}
+	osq_unlock(&sem->osq);
+done:
+	preempt_enable();
+	return taken;
+}
+
+#else
+static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
+{
+	return false;
+}
+#endif
+
+/*
+ * Wait until we successfully acquire the write lock
  */
 __visible
 struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 {
-	long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
+	long count;
+	bool waiting = true; /* any queued threads before us */
 	struct rwsem_waiter waiter;
-	struct task_struct *tsk = current;
 
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
+	/* undo write bias from down_write operation, stop active locking */
+	count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
+
+	/* do optimistic spinning and steal lock if possible */
+	if (rwsem_optimistic_spin(sem))
+		return sem;
+
+	/*
+	 * Optimistic spinning failed, proceed to the slowpath
+	 * and block until we can acquire the sem.
+	 */
+	waiter.task = current;
 	waiter.type = RWSEM_WAITING_FOR_WRITE;
 
 	raw_spin_lock_irq(&sem->wait_lock);
+
+	/* account for this before adding a new element to the list */
 	if (list_empty(&sem->wait_list))
-		adjustment += RWSEM_WAITING_BIAS;
+		waiting = false;
+
 	list_add_tail(&waiter.list, &sem->wait_list);
 
 	/* we're now waiting on the lock, but no longer actively locking */
-	count = rwsem_atomic_update(adjustment, sem);
+	if (waiting) {
+		count = ACCESS_ONCE(sem->count);
 
-	/* If there were already threads queued before us and there are no
-	 * active writers, the lock must be read owned; so we try to wake
-	 * any read locks that were queued ahead of us. */
-	if (count > RWSEM_WAITING_BIAS &&
-	    adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
-		sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+		/*
+		 * If there were already threads queued before us and there are no
+		 * active writers, the lock must be read owned; so we try to wake
+		 * any read locks that were queued ahead of us.
+		 */
+		if (count > RWSEM_WAITING_BIAS)
+			sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+
+	} else
+		count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
 
 	/* wait until we successfully acquire the lock */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+	set_current_state(TASK_UNINTERRUPTIBLE);
 	while (true) {
-		if (!(count & RWSEM_ACTIVE_MASK)) {
-			/* Try acquiring the write lock. */
-			count = RWSEM_ACTIVE_WRITE_BIAS;
-			if (!list_is_singular(&sem->wait_list))
-				count += RWSEM_WAITING_BIAS;
-
-			if (sem->count == RWSEM_WAITING_BIAS &&
-			    cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
-							RWSEM_WAITING_BIAS)
-				break;
-		}
-
+		if (rwsem_try_write_lock(count, sem))
+			break;
 		raw_spin_unlock_irq(&sem->wait_lock);
 
 		/* Block until there are no active lockers. */
 		do {
 			schedule();
-			set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+			set_current_state(TASK_UNINTERRUPTIBLE);
 		} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
 
 		raw_spin_lock_irq(&sem->wait_lock);
 	}
+	__set_current_state(TASK_RUNNING);
 
 	list_del(&waiter.list);
 	raw_spin_unlock_irq(&sem->wait_lock);
-	tsk->state = TASK_RUNNING;
 
 	return sem;
 }
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff1435bdfb..42f806de49d4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,6 +12,27 @@
 
 #include <linux/atomic.h>
 
+#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+	sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+	sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
+
 /*
  * lock for reading
  */
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
 {
 	int ret = __down_write_trylock(sem);
 
-	if (ret == 1)
+	if (ret == 1) {
 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
+		rwsem_set_owner(sem);
+	}
+
 	return ret;
 }
 
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
 
+	rwsem_clear_owner(sem);
 	__up_write(sem);
 }
 
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
 	 * lockdep: a downgraded write will live on as a write
 	 * dependency.
 	 */
+	rwsem_clear_owner(sem);
 	__downgrade_write(sem);
 }
 
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
 
 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+	rwsem_set_owner(sem);
 }
 
 EXPORT_SYMBOL(down_write_nested);
-- 
cgit 


From dbb5eafa23fdb7a00d64b04e23a3efc59d6474cb Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Mon, 19 May 2014 17:27:57 -0700
Subject: locking/rwsem: Fix warnings for CONFIG_RWSEM_GENERIC_SPINLOCK

Optimistic spinning is only used by the xadd variant
of rw-semaphores. Make sure that we use the old version
of the __RWSEM_INITIALIZER macro for systems that rely
on the spinlock one, otherwise warnings can be triggered,
such as the following reported on an arm box:

  ipc/ipcns_notifier.c:22:8: warning: excess elements in struct initializer [enabled by default]
  ipc/ipcns_notifier.c:22:8: warning: (near initialization for 'ipcns_chain.rwsem') [enabled by default]
  ipc/ipcns_notifier.c:22:8: warning: excess elements in struct initializer [enabled by default]
  ipc/ipcns_notifier.c:22:8: warning: (near initialization for 'ipcns_chain.rwsem') [enabled by default]

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Alex Shi <alex.shi@linaro.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Link: http://lkml.kernel.org/r/1400545677.6399.10.camel@buesod1.americas.hpqcorp.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 3e108f154cb6..8d79708146aa 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -64,7 +64,7 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
 # define __RWSEM_DEP_MAP_INIT(lockname)
 #endif
 
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
 #define __RWSEM_INITIALIZER(name)			\
 	{ RWSEM_UNLOCKED_VALUE,				\
 	  __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),	\
-- 
cgit 


From fa93384f40deeb294fd29f2fdcadbd0ebc2dedf1 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 23 May 2014 13:20:42 +0300
Subject: sched: Fix signedness bug in yield_to()

yield_to() is supposed to return -ESRCH if there is no task to
yield to, but because the type is bool that is the same as returning
true.

The only place I see which cares is kvm_vcpu_on_spin().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Raghavendra <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org
Link: http://lkml.kernel.org/r/20140523102042.GA7267@mwanda
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kvm_host.h | 2 +-
 include/linux/sched.h    | 2 +-
 kernel/sched/core.c      | 2 +-
 virt/kvm/kvm_main.c      | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7d21cf9f4380..3c4bcf146159 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -584,7 +584,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
+int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0f91d00efd87..6790c3b42072 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2180,7 +2180,7 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
 #endif
 
-extern bool yield_to(struct task_struct *p, bool preempt);
+extern int yield_to(struct task_struct *p, bool preempt);
 extern void set_user_nice(struct task_struct *p, long nice);
 extern int task_prio(const struct task_struct *p);
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 321d800e4baa..afcc84234a3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4195,7 +4195,7 @@ EXPORT_SYMBOL(yield);
  *	false (0) if we failed to boost the target.
  *	-ESRCH if there's no task to yield to.
  */
-bool __sched yield_to(struct task_struct *p, bool preempt)
+int __sched yield_to(struct task_struct *p, bool preempt)
 {
 	struct task_struct *curr = current;
 	struct rq *rq, *p_rq;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 56baae8c2f56..86d1c457458d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1708,11 +1708,11 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
 #endif /* !CONFIG_S390 */
 
-bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
+int kvm_vcpu_yield_to(struct kvm_vcpu *target)
 {
 	struct pid *pid;
 	struct task_struct *task = NULL;
-	bool ret = false;
+	int ret = 0;
 
 	rcu_read_lock();
 	pid = rcu_dereference(target->pid);
-- 
cgit 


From 63b2ca30bdb3dbf60bc7ac5f46713c0d32308261 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 26 May 2014 18:19:37 -0400
Subject: sched: Let 'struct sched_group_power' care about CPU capacity

It is better not to think about compute capacity as being equivalent
to "CPU power".  The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.

Since struct sched_group_power is really about compute capacity of sched
groups, let's rename it to struct sched_group_capacity. Similarly sgp
becomes sgc. Related variables and functions dealing with groups are also
adjusted accordingly.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-5yeix833vvgf2uyj5o36hpu9@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |   2 +-
 kernel/sched/core.c   |  81 +++++++++++++++----------------
 kernel/sched/fair.c   | 131 +++++++++++++++++++++++++-------------------------
 kernel/sched/sched.h  |  16 +++---
 4 files changed, 115 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6790c3b42072..a96f03598c61 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1013,7 +1013,7 @@ typedef const int (*sched_domain_flags_f)(void);
 struct sd_data {
 	struct sched_domain **__percpu sd;
 	struct sched_group **__percpu sg;
-	struct sched_group_power **__percpu sgp;
+	struct sched_group_capacity **__percpu sgc;
 };
 
 struct sched_domain_topology_level {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index afcc84234a3e..2e1fb0902200 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5221,14 +5221,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		}
 
 		/*
-		 * Even though we initialize ->power to something semi-sane,
-		 * we leave power_orig unset. This allows us to detect if
+		 * Even though we initialize ->capacity to something semi-sane,
+		 * we leave capacity_orig unset. This allows us to detect if
 		 * domain iteration is still funny without causing /0 traps.
 		 */
-		if (!group->sgp->power_orig) {
+		if (!group->sgc->capacity_orig) {
 			printk(KERN_CONT "\n");
-			printk(KERN_ERR "ERROR: domain->cpu_power not "
-					"set\n");
+			printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
 			break;
 		}
 
@@ -5250,9 +5249,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->sgp->power != SCHED_POWER_SCALE) {
-			printk(KERN_CONT " (cpu_power = %d)",
-				group->sgp->power);
+		if (group->sgc->capacity != SCHED_POWER_SCALE) {
+			printk(KERN_CONT " (cpu_capacity = %d)",
+				group->sgc->capacity);
 		}
 
 		group = group->next;
@@ -5466,7 +5465,7 @@ static struct root_domain *alloc_rootdomain(void)
 	return rd;
 }
 
-static void free_sched_groups(struct sched_group *sg, int free_sgp)
+static void free_sched_groups(struct sched_group *sg, int free_sgc)
 {
 	struct sched_group *tmp, *first;
 
@@ -5477,8 +5476,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgp)
 	do {
 		tmp = sg->next;
 
-		if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
-			kfree(sg->sgp);
+		if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
+			kfree(sg->sgc);
 
 		kfree(sg);
 		sg = tmp;
@@ -5496,7 +5495,7 @@ static void free_sched_domain(struct rcu_head *rcu)
 	if (sd->flags & SD_OVERLAP) {
 		free_sched_groups(sd->groups, 1);
 	} else if (atomic_dec_and_test(&sd->groups->ref)) {
-		kfree(sd->groups->sgp);
+		kfree(sd->groups->sgc);
 		kfree(sd->groups);
 	}
 	kfree(sd);
@@ -5707,17 +5706,17 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 
 		cpumask_or(covered, covered, sg_span);
 
-		sg->sgp = *per_cpu_ptr(sdd->sgp, i);
-		if (atomic_inc_return(&sg->sgp->ref) == 1)
+		sg->sgc = *per_cpu_ptr(sdd->sgc, i);
+		if (atomic_inc_return(&sg->sgc->ref) == 1)
 			build_group_mask(sd, sg);
 
 		/*
-		 * Initialize sgp->power such that even if we mess up the
+		 * Initialize sgc->capacity such that even if we mess up the
 		 * domains and no possible iteration will get us here, we won't
 		 * die on a /0 trap.
 		 */
-		sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
-		sg->sgp->power_orig = sg->sgp->power;
+		sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span);
+		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
 		 * Make sure the first group of this domain contains the
@@ -5755,8 +5754,8 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 
 	if (sg) {
 		*sg = *per_cpu_ptr(sdd->sg, cpu);
-		(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
-		atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
+		(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
+		atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */
 	}
 
 	return cpu;
@@ -5819,16 +5818,16 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 }
 
 /*
- * Initialize sched groups cpu_power.
+ * Initialize sched groups cpu_capacity.
  *
- * cpu_power indicates the capacity of sched group, which is used while
+ * cpu_capacity indicates the capacity of sched group, which is used while
  * distributing the load between different sched groups in a sched domain.
- * Typically cpu_power for all the groups in a sched domain will be same unless
- * there are asymmetries in the topology. If there are asymmetries, group
- * having more cpu_power will pickup more load compared to the group having
- * less cpu_power.
+ * Typically cpu_capacity for all the groups in a sched domain will be same
+ * unless there are asymmetries in the topology. If there are asymmetries,
+ * group having more cpu_capacity will pickup more load compared to the
+ * group having less cpu_capacity.
  */
-static void init_sched_groups_power(int cpu, struct sched_domain *sd)
+static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
 	struct sched_group *sg = sd->groups;
 
@@ -5842,8 +5841,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 	if (cpu != group_balance_cpu(sg))
 		return;
 
-	update_group_power(sd, cpu);
-	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
+	update_group_capacity(sd, cpu);
+	atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
 }
 
 /*
@@ -5934,8 +5933,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
 	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
 		*per_cpu_ptr(sdd->sg, cpu) = NULL;
 
-	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
-		*per_cpu_ptr(sdd->sgp, cpu) = NULL;
+	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
+		*per_cpu_ptr(sdd->sgc, cpu) = NULL;
 }
 
 #ifdef CONFIG_NUMA
@@ -6337,14 +6336,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 		if (!sdd->sg)
 			return -ENOMEM;
 
-		sdd->sgp = alloc_percpu(struct sched_group_power *);
-		if (!sdd->sgp)
+		sdd->sgc = alloc_percpu(struct sched_group_capacity *);
+		if (!sdd->sgc)
 			return -ENOMEM;
 
 		for_each_cpu(j, cpu_map) {
 			struct sched_domain *sd;
 			struct sched_group *sg;
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 
 		       	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
@@ -6362,12 +6361,12 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
 
-			sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
+			sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
-			if (!sgp)
+			if (!sgc)
 				return -ENOMEM;
 
-			*per_cpu_ptr(sdd->sgp, j) = sgp;
+			*per_cpu_ptr(sdd->sgc, j) = sgc;
 		}
 	}
 
@@ -6394,15 +6393,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
 
 			if (sdd->sg)
 				kfree(*per_cpu_ptr(sdd->sg, j));
-			if (sdd->sgp)
-				kfree(*per_cpu_ptr(sdd->sgp, j));
+			if (sdd->sgc)
+				kfree(*per_cpu_ptr(sdd->sgc, j));
 		}
 		free_percpu(sdd->sd);
 		sdd->sd = NULL;
 		free_percpu(sdd->sg);
 		sdd->sg = NULL;
-		free_percpu(sdd->sgp);
-		sdd->sgp = NULL;
+		free_percpu(sdd->sgc);
+		sdd->sgc = NULL;
 	}
 }
 
@@ -6479,7 +6478,7 @@ static int build_sched_domains(const struct cpumask *cpu_map,
 
 		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
 			claim_allocations(i, sd);
-			init_sched_groups_power(i, sd);
+			init_sched_groups_capacity(i, sd);
 		}
 	}
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e401e446e87c..36bd4d23fca8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4369,8 +4369,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 			avg_load += load;
 		}
 
-		/* Adjust by relative CPU power of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
+		/* Adjust by relative CPU capacity of the group */
+		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -5532,7 +5532,7 @@ struct sg_lb_stats {
 	unsigned long group_load; /* Total load over the CPUs of the group */
 	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
 	unsigned long load_per_task;
-	unsigned long group_power;
+	unsigned long group_capacity;
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
 	unsigned int group_capacity_factor;
 	unsigned int idle_cpus;
@@ -5553,7 +5553,7 @@ struct sd_lb_stats {
 	struct sched_group *busiest;	/* Busiest group in this sd */
 	struct sched_group *local;	/* Local group in this sd */
 	unsigned long total_load;	/* Total load of all groups in sd */
-	unsigned long total_pwr;	/* Total power of all groups in sd */
+	unsigned long total_capacity;	/* Total capacity of all groups in sd */
 	unsigned long avg_load;	/* Average load across all groups in sd */
 
 	struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
@@ -5572,7 +5572,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
 		.busiest = NULL,
 		.local = NULL,
 		.total_load = 0UL,
-		.total_pwr = 0UL,
+		.total_capacity = 0UL,
 		.busiest_stat = {
 			.avg_load = 0UL,
 		},
@@ -5681,7 +5681,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power >>= SCHED_POWER_SHIFT;
 	}
 
-	sdg->sgp->power_orig = power;
+	sdg->sgc->capacity_orig = power;
 
 	if (sched_feat(ARCH_POWER))
 		power *= arch_scale_freq_power(sd, cpu);
@@ -5697,26 +5697,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 		power = 1;
 
 	cpu_rq(cpu)->cpu_power = power;
-	sdg->sgp->power = power;
+	sdg->sgc->capacity = power;
 }
 
-void update_group_power(struct sched_domain *sd, int cpu)
+void update_group_capacity(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long power, power_orig;
+	unsigned long capacity, capacity_orig;
 	unsigned long interval;
 
 	interval = msecs_to_jiffies(sd->balance_interval);
 	interval = clamp(interval, 1UL, max_load_balance_interval);
-	sdg->sgp->next_update = jiffies + interval;
+	sdg->sgc->next_update = jiffies + interval;
 
 	if (!child) {
 		update_cpu_power(sd, cpu);
 		return;
 	}
 
-	power_orig = power = 0;
+	capacity_orig = capacity = 0;
 
 	if (child->flags & SD_OVERLAP) {
 		/*
@@ -5725,31 +5725,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
 		 */
 
 		for_each_cpu(cpu, sched_group_cpus(sdg)) {
-			struct sched_group_power *sgp;
+			struct sched_group_capacity *sgc;
 			struct rq *rq = cpu_rq(cpu);
 
 			/*
-			 * build_sched_domains() -> init_sched_groups_power()
+			 * build_sched_domains() -> init_sched_groups_capacity()
 			 * gets here before we've attached the domains to the
 			 * runqueues.
 			 *
 			 * Use power_of(), which is set irrespective of domains
 			 * in update_cpu_power().
 			 *
-			 * This avoids power/power_orig from being 0 and
+			 * This avoids capacity/capacity_orig from being 0 and
 			 * causing divide-by-zero issues on boot.
 			 *
-			 * Runtime updates will correct power_orig.
+			 * Runtime updates will correct capacity_orig.
 			 */
 			if (unlikely(!rq->sd)) {
-				power_orig += power_of(cpu);
-				power += power_of(cpu);
+				capacity_orig += power_of(cpu);
+				capacity += power_of(cpu);
 				continue;
 			}
 
-			sgp = rq->sd->groups->sgp;
-			power_orig += sgp->power_orig;
-			power += sgp->power;
+			sgc = rq->sd->groups->sgc;
+			capacity_orig += sgc->capacity_orig;
+			capacity += sgc->capacity;
 		}
 	} else  {
 		/*
@@ -5759,14 +5759,14 @@ void update_group_power(struct sched_domain *sd, int cpu)
 
 		group = child->groups;
 		do {
-			power_orig += group->sgp->power_orig;
-			power += group->sgp->power;
+			capacity_orig += group->sgc->capacity_orig;
+			capacity += group->sgc->capacity;
 			group = group->next;
 		} while (group != child->groups);
 	}
 
-	sdg->sgp->power_orig = power_orig;
-	sdg->sgp->power = power;
+	sdg->sgc->capacity_orig = capacity_orig;
+	sdg->sgc->capacity = capacity;
 }
 
 /*
@@ -5786,9 +5786,9 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 		return 0;
 
 	/*
-	 * If ~90% of the cpu_power is still there, we're good.
+	 * If ~90% of the cpu_capacity is still there, we're good.
 	 */
-	if (group->sgp->power * 32 > group->sgp->power_orig * 29)
+	if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
 		return 1;
 
 	return 0;
@@ -5825,7 +5825,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 
 static inline int sg_imbalanced(struct sched_group *group)
 {
-	return group->sgp->imbalance;
+	return group->sgc->imbalance;
 }
 
 /*
@@ -5833,22 +5833,23 @@ static inline int sg_imbalanced(struct sched_group *group)
  *
  * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by
  * first dividing out the smt factor and computing the actual number of cores
- * and limit power unit capacity with that.
+ * and limit unit capacity with that.
  */
 static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
 {
 	unsigned int capacity_factor, smt, cpus;
-	unsigned int power, power_orig;
+	unsigned int capacity, capacity_orig;
 
-	power = group->sgp->power;
-	power_orig = group->sgp->power_orig;
+	capacity = group->sgc->capacity;
+	capacity_orig = group->sgc->capacity_orig;
 	cpus = group->group_weight;
 
-	/* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */
-	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig);
+	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
+	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig);
 	capacity_factor = cpus / smt; /* cores */
 
-	capacity_factor = min_t(unsigned, capacity_factor, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE));
+	capacity_factor = min_t(unsigned,
+		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE));
 	if (!capacity_factor)
 		capacity_factor = fix_small_capacity(env->sd, group);
 
@@ -5892,9 +5893,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 			sgs->idle_cpus++;
 	}
 
-	/* Adjust by relative CPU power of the group */
-	sgs->group_power = group->sgp->power;
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power;
+	/* Adjust by relative CPU capacity of the group */
+	sgs->group_capacity = group->sgc->capacity;
+	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity;
 
 	if (sgs->sum_nr_running)
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -6009,8 +6010,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 			sgs = &sds->local_stat;
 
 			if (env->idle != CPU_NEWLY_IDLE ||
-			    time_after_eq(jiffies, sg->sgp->next_update))
-				update_group_power(env->sd, env->dst_cpu);
+			    time_after_eq(jiffies, sg->sgc->next_update))
+				update_group_capacity(env->sd, env->dst_cpu);
 		}
 
 		update_sg_lb_stats(env, sg, load_idx, local_group, sgs);
@@ -6040,7 +6041,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 next_group:
 		/* Now, start updating sd_lb_stats */
 		sds->total_load += sgs->group_load;
-		sds->total_pwr += sgs->group_power;
+		sds->total_capacity += sgs->group_capacity;
 
 		sg = sg->next;
 	} while (sg != env->sd->groups);
@@ -6087,7 +6088,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 		return 0;
 
 	env->imbalance = DIV_ROUND_CLOSEST(
-		sds->busiest_stat.avg_load * sds->busiest_stat.group_power,
+		sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
 		SCHED_POWER_SCALE);
 
 	return 1;
@@ -6103,7 +6104,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 static inline
 void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 {
-	unsigned long tmp, pwr_now = 0, pwr_move = 0;
+	unsigned long tmp, capa_now = 0, capa_move = 0;
 	unsigned int imbn = 2;
 	unsigned long scaled_busy_load_per_task;
 	struct sg_lb_stats *local, *busiest;
@@ -6118,7 +6119,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 
 	scaled_busy_load_per_task =
 		(busiest->load_per_task * SCHED_POWER_SCALE) /
-		busiest->group_power;
+		busiest->group_capacity;
 
 	if (busiest->avg_load + scaled_busy_load_per_task >=
 	    local->avg_load + (scaled_busy_load_per_task * imbn)) {
@@ -6132,34 +6133,34 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 	 * moving them.
 	 */
 
-	pwr_now += busiest->group_power *
+	capa_now += busiest->group_capacity *
 			min(busiest->load_per_task, busiest->avg_load);
-	pwr_now += local->group_power *
+	capa_now += local->group_capacity *
 			min(local->load_per_task, local->avg_load);
-	pwr_now /= SCHED_POWER_SCALE;
+	capa_now /= SCHED_POWER_SCALE;
 
 	/* Amount of load we'd subtract */
 	if (busiest->avg_load > scaled_busy_load_per_task) {
-		pwr_move += busiest->group_power *
+		capa_move += busiest->group_capacity *
 			    min(busiest->load_per_task,
 				busiest->avg_load - scaled_busy_load_per_task);
 	}
 
 	/* Amount of load we'd add */
-	if (busiest->avg_load * busiest->group_power <
+	if (busiest->avg_load * busiest->group_capacity <
 	    busiest->load_per_task * SCHED_POWER_SCALE) {
-		tmp = (busiest->avg_load * busiest->group_power) /
-		      local->group_power;
+		tmp = (busiest->avg_load * busiest->group_capacity) /
+		      local->group_capacity;
 	} else {
 		tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
-		      local->group_power;
+		      local->group_capacity;
 	}
-	pwr_move += local->group_power *
+	capa_move += local->group_capacity *
 		    min(local->load_per_task, local->avg_load + tmp);
-	pwr_move /= SCHED_POWER_SCALE;
+	capa_move /= SCHED_POWER_SCALE;
 
 	/* Move if we gain throughput */
-	if (pwr_move > pwr_now)
+	if (capa_move > capa_now)
 		env->imbalance = busiest->load_per_task;
 }
 
@@ -6207,7 +6208,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 			(busiest->sum_nr_running - busiest->group_capacity_factor);
 
 		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
-		load_above_capacity /= busiest->group_power;
+		load_above_capacity /= busiest->group_capacity;
 	}
 
 	/*
@@ -6222,8 +6223,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 
 	/* How much load to actually move to equalise the imbalance */
 	env->imbalance = min(
-		max_pull * busiest->group_power,
-		(sds->avg_load - local->avg_load) * local->group_power
+		max_pull * busiest->group_capacity,
+		(sds->avg_load - local->avg_load) * local->group_capacity
 	) / SCHED_POWER_SCALE;
 
 	/*
@@ -6278,7 +6279,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
+	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -6611,7 +6612,7 @@ more_balance:
 		 * We failed to reach balance because of affinity.
 		 */
 		if (sd_parent) {
-			int *group_imbalance = &sd_parent->groups->sgp->imbalance;
+			int *group_imbalance = &sd_parent->groups->sgc->imbalance;
 
 			if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
 				*group_imbalance = 1;
@@ -6998,7 +6999,7 @@ static inline void set_cpu_sd_state_busy(void)
 		goto unlock;
 	sd->nohz_idle = 0;
 
-	atomic_inc(&sd->groups->sgp->nr_busy_cpus);
+	atomic_inc(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7015,7 +7016,7 @@ void set_cpu_sd_state_idle(void)
 		goto unlock;
 	sd->nohz_idle = 1;
 
-	atomic_dec(&sd->groups->sgp->nr_busy_cpus);
+	atomic_dec(&sd->groups->sgc->nr_busy_cpus);
 unlock:
 	rcu_read_unlock();
 }
@@ -7219,7 +7220,7 @@ end:
  * of an idle cpu is the system.
  *   - This rq has more than one task.
  *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's power.
+ *     busy cpu's exceeding the group's capacity.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
@@ -7227,7 +7228,7 @@ static inline int nohz_kick_needed(struct rq *rq)
 {
 	unsigned long now = jiffies;
 	struct sched_domain *sd;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 	int nr_busy, cpu = rq->cpu;
 
 	if (unlikely(rq->idle_balance))
@@ -7257,8 +7258,8 @@ static inline int nohz_kick_needed(struct rq *rq)
 	sd = rcu_dereference(per_cpu(sd_busy, cpu));
 
 	if (sd) {
-		sgp = sd->groups->sgp;
-		nr_busy = atomic_read(&sgp->nr_busy_cpus);
+		sgc = sd->groups->sgc;
+		nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
 		if (nr_busy > 1)
 			goto need_kick_unlock;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 600e2291a75c..a5b957d53c92 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -728,15 +728,15 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
 DECLARE_PER_CPU(struct sched_domain *, sd_busy);
 DECLARE_PER_CPU(struct sched_domain *, sd_asym);
 
-struct sched_group_power {
+struct sched_group_capacity {
 	atomic_t ref;
 	/*
-	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
-	 * single CPU.
+	 * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
+	 * for a single CPU.
 	 */
-	unsigned int power, power_orig;
+	unsigned int capacity, capacity_orig;
 	unsigned long next_update;
-	int imbalance; /* XXX unrelated to power but shared group state */
+	int imbalance; /* XXX unrelated to capacity but shared group state */
 	/*
 	 * Number of busy cpus in this group.
 	 */
@@ -750,7 +750,7 @@ struct sched_group {
 	atomic_t ref;
 
 	unsigned int group_weight;
-	struct sched_group_power *sgp;
+	struct sched_group_capacity *sgc;
 
 	/*
 	 * The CPUs this group covers.
@@ -773,7 +773,7 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
  */
 static inline struct cpumask *sched_group_mask(struct sched_group *sg)
 {
-	return to_cpumask(sg->sgp->cpumask);
+	return to_cpumask(sg->sgc->cpumask);
 }
 
 /**
@@ -1167,7 +1167,7 @@ extern const struct sched_class idle_sched_class;
 
 #ifdef CONFIG_SMP
 
-extern void update_group_power(struct sched_domain *sd, int cpu);
+extern void update_group_capacity(struct sched_domain *sd, int cpu);
 
 extern void trigger_load_balance(struct rq *rq);
 
-- 
cgit 


From ca8ce3d0b144c318a5a9ce99649053e9029061ea Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Mon, 26 May 2014 18:19:39 -0400
Subject: sched: Final power vs. capacity cleanups

It is better not to think about compute capacity as being equivalent
to "CPU power".  The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.

This contains the architecture visible changes.  Incidentally, only ARM
takes advantage of the available pow^H^H^Hcapacity scaling hooks and
therefore those changes outside kernel/sched/ are confined to one ARM
specific file.  The default arch_scale_smt_power() hook is not overridden
by anyone.

Replacements are as follows:

	arch_scale_freq_power  --> arch_scale_freq_capacity
	arch_scale_smt_power   --> arch_scale_smt_capacity
	SCHED_POWER_SCALE      --> SCHED_CAPACITY_SCALE
	SCHED_POWER_SHIFT      --> SCHED_CAPACITY_SHIFT

The local usage of "power" in arch/arm/kernel/topology.c is also changed
to "capacity" as appropriate.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Brown <broonie@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: devicetree@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-48zba9qbznvglwelgq2cfygh@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/topology.c | 54 +++++++++++++++++++++---------------------
 include/linux/sched.h      |  6 ++---
 kernel/sched/core.c        |  6 ++---
 kernel/sched/fair.c        | 59 +++++++++++++++++++++++-----------------------
 4 files changed, 63 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 71e1fec6d31a..d42a7db22236 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -26,30 +26,30 @@
 #include <asm/topology.h>
 
 /*
- * cpu power scale management
+ * cpu capacity scale management
  */
 
 /*
- * cpu power table
+ * cpu capacity table
  * This per cpu data structure describes the relative capacity of each core.
  * On a heteregenous system, cores don't have the same computation capacity
- * and we reflect that difference in the cpu_power field so the scheduler can
- * take this difference into account during load balance. A per cpu structure
- * is preferred because each CPU updates its own cpu_power field during the
- * load balance except for idle cores. One idle core is selected to run the
- * rebalance_domains for all idle cores and the cpu_power can be updated
- * during this sequence.
+ * and we reflect that difference in the cpu_capacity field so the scheduler
+ * can take this difference into account during load balance. A per cpu
+ * structure is preferred because each CPU updates its own cpu_capacity field
+ * during the load balance except for idle cores. One idle core is selected
+ * to run the rebalance_domains for all idle cores and the cpu_capacity can be
+ * updated during this sequence.
  */
 static DEFINE_PER_CPU(unsigned long, cpu_scale);
 
-unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return per_cpu(cpu_scale, cpu);
 }
 
-static void set_power_scale(unsigned int cpu, unsigned long power)
+static void set_capacity_scale(unsigned int cpu, unsigned long capacity)
 {
-	per_cpu(cpu_scale, cpu) = power;
+	per_cpu(cpu_scale, cpu) = capacity;
 }
 
 #ifdef CONFIG_OF
@@ -62,11 +62,11 @@ struct cpu_efficiency {
  * Table of relative efficiency of each processors
  * The efficiency value must fit in 20bit and the final
  * cpu_scale value must be in the range
- *   0 < cpu_scale < 3*SCHED_POWER_SCALE/2
+ *   0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2
  * in order to return at most 1 when DIV_ROUND_CLOSEST
  * is used to compute the capacity of a CPU.
  * Processors that are not defined in the table,
- * use the default SCHED_POWER_SCALE value for cpu_scale.
+ * use the default SCHED_CAPACITY_SCALE value for cpu_scale.
  */
 static const struct cpu_efficiency table_efficiency[] = {
 	{"arm,cortex-a15", 3891},
@@ -83,9 +83,9 @@ static unsigned long middle_capacity = 1;
  * Iterate all CPUs' descriptor in DT and compute the efficiency
  * (as per table_efficiency). Also calculate a middle efficiency
  * as close as possible to  (max{eff_i} - min{eff_i}) / 2
- * This is later used to scale the cpu_power field such that an
- * 'average' CPU is of middle power. Also see the comments near
- * table_efficiency[] and update_cpu_power().
+ * This is later used to scale the cpu_capacity field such that an
+ * 'average' CPU is of middle capacity. Also see the comments near
+ * table_efficiency[] and update_cpu_capacity().
  */
 static void __init parse_dt_topology(void)
 {
@@ -141,15 +141,15 @@ static void __init parse_dt_topology(void)
 	 * cpu_scale because all CPUs have the same capacity. Otherwise, we
 	 * compute a middle_capacity factor that will ensure that the capacity
 	 * of an 'average' CPU of the system will be as close as possible to
-	 * SCHED_POWER_SCALE, which is the default value, but with the
+	 * SCHED_CAPACITY_SCALE, which is the default value, but with the
 	 * constraint explained near table_efficiency[].
 	 */
 	if (4*max_capacity < (3*(max_capacity + min_capacity)))
 		middle_capacity = (min_capacity + max_capacity)
-				>> (SCHED_POWER_SHIFT+1);
+				>> (SCHED_CAPACITY_SHIFT+1);
 	else
 		middle_capacity = ((max_capacity / 3)
-				>> (SCHED_POWER_SHIFT-1)) + 1;
+				>> (SCHED_CAPACITY_SHIFT-1)) + 1;
 
 }
 
@@ -158,20 +158,20 @@ static void __init parse_dt_topology(void)
  * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the
  * function returns directly for SMP system.
  */
-static void update_cpu_power(unsigned int cpu)
+static void update_cpu_capacity(unsigned int cpu)
 {
 	if (!cpu_capacity(cpu))
 		return;
 
-	set_power_scale(cpu, cpu_capacity(cpu) / middle_capacity);
+	set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
 
-	printk(KERN_INFO "CPU%u: update cpu_power %lu\n",
-		cpu, arch_scale_freq_power(NULL, cpu));
+	printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
+		cpu, arch_scale_freq_capacity(NULL, cpu));
 }
 
 #else
 static inline void parse_dt_topology(void) {}
-static inline void update_cpu_power(unsigned int cpuid) {}
+static inline void update_cpu_capacity(unsigned int cpuid) {}
 #endif
 
  /*
@@ -267,7 +267,7 @@ void store_cpu_topology(unsigned int cpuid)
 
 	update_siblings_masks(cpuid);
 
-	update_cpu_power(cpuid);
+	update_cpu_capacity(cpuid);
 
 	printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n",
 		cpuid, cpu_topology[cpuid].thread_id,
@@ -297,7 +297,7 @@ void __init init_cpu_topology(void)
 {
 	unsigned int cpu;
 
-	/* init core mask and power*/
+	/* init core mask and capacity */
 	for_each_possible_cpu(cpu) {
 		struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]);
 
@@ -307,7 +307,7 @@ void __init init_cpu_topology(void)
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
 
-		set_power_scale(cpu, SCHED_POWER_SCALE);
+		set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
 	}
 	smp_wmb();
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a96f03598c61..322110affe63 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -854,10 +854,10 @@ enum cpu_idle_type {
 };
 
 /*
- * Increase resolution of cpu_power calculations
+ * Increase resolution of cpu_capacity calculations
  */
-#define SCHED_POWER_SHIFT	10
-#define SCHED_POWER_SCALE	(1L << SCHED_POWER_SHIFT)
+#define SCHED_CAPACITY_SHIFT	10
+#define SCHED_CAPACITY_SCALE	(1L << SCHED_CAPACITY_SHIFT)
 
 /*
  * sched-domains (multiprocessor balancing) declarations:
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 07bc78a50329..7ba4f5413a10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5249,7 +5249,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->sgc->capacity != SCHED_POWER_SCALE) {
+		if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
 			printk(KERN_CONT " (cpu_capacity = %d)",
 				group->sgc->capacity);
 		}
@@ -5715,7 +5715,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		 * domains and no possible iteration will get us here, we won't
 		 * die on a /0 trap.
 		 */
-		sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span);
+		sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
 		sg->sgc->capacity_orig = sg->sgc->capacity;
 
 		/*
@@ -6921,7 +6921,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
-		rq->cpu_capacity = SCHED_POWER_SCALE;
+		rq->cpu_capacity = SCHED_CAPACITY_SCALE;
 		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 58684f684fa8..dc7d6527a282 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1062,9 +1062,9 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
 	if (!cpus)
 		return;
 
-	ns->load = (ns->load * SCHED_POWER_SCALE) / ns->compute_capacity;
+	ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity;
 	ns->task_capacity =
-		DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_POWER_SCALE);
+		DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
 	ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
 }
 
@@ -4370,7 +4370,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
 		}
 
 		/* Adjust by relative CPU capacity of the group */
-		avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity;
+		avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -5609,10 +5609,10 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
 
 static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
 {
-	return SCHED_POWER_SCALE;
+	return SCHED_CAPACITY_SCALE;
 }
 
-unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
 {
 	return default_scale_capacity(sd, cpu);
 }
@@ -5627,7 +5627,7 @@ static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu
 	return smt_gain;
 }
 
-unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu)
 {
 	return default_scale_smt_capacity(sd, cpu);
 }
@@ -5658,10 +5658,10 @@ static unsigned long scale_rt_capacity(int cpu)
 		available = total - avg;
 	}
 
-	if (unlikely((s64)total < SCHED_POWER_SCALE))
-		total = SCHED_POWER_SCALE;
+	if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
+		total = SCHED_CAPACITY_SCALE;
 
-	total >>= SCHED_POWER_SHIFT;
+	total >>= SCHED_CAPACITY_SHIFT;
 
 	return div_u64(available, total);
 }
@@ -5669,29 +5669,29 @@ static unsigned long scale_rt_capacity(int cpu)
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = sd->span_weight;
-	unsigned long capacity = SCHED_POWER_SCALE;
+	unsigned long capacity = SCHED_CAPACITY_SCALE;
 	struct sched_group *sdg = sd->groups;
 
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
 		if (sched_feat(ARCH_POWER))
-			capacity *= arch_scale_smt_power(sd, cpu);
+			capacity *= arch_scale_smt_capacity(sd, cpu);
 		else
 			capacity *= default_scale_smt_capacity(sd, cpu);
 
-		capacity >>= SCHED_POWER_SHIFT;
+		capacity >>= SCHED_CAPACITY_SHIFT;
 	}
 
 	sdg->sgc->capacity_orig = capacity;
 
 	if (sched_feat(ARCH_POWER))
-		capacity *= arch_scale_freq_power(sd, cpu);
+		capacity *= arch_scale_freq_capacity(sd, cpu);
 	else
 		capacity *= default_scale_capacity(sd, cpu);
 
-	capacity >>= SCHED_POWER_SHIFT;
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
 	capacity *= scale_rt_capacity(cpu);
-	capacity >>= SCHED_POWER_SHIFT;
+	capacity >>= SCHED_CAPACITY_SHIFT;
 
 	if (!capacity)
 		capacity = 1;
@@ -5780,7 +5780,7 @@ static inline int
 fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 {
 	/*
-	 * Only siblings can have significantly less than SCHED_POWER_SCALE
+	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
 	 */
 	if (!(sd->flags & SD_SHARE_CPUPOWER))
 		return 0;
@@ -5845,11 +5845,11 @@ static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *gro
 	cpus = group->group_weight;
 
 	/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
-	smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig);
+	smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
 	capacity_factor = cpus / smt; /* cores */
 
 	capacity_factor = min_t(unsigned,
-		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE));
+		capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
 	if (!capacity_factor)
 		capacity_factor = fix_small_capacity(env->sd, group);
 
@@ -5895,7 +5895,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 	/* Adjust by relative CPU capacity of the group */
 	sgs->group_capacity = group->sgc->capacity;
-	sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity;
+	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
 
 	if (sgs->sum_nr_running)
 		sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
@@ -6089,7 +6089,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
 
 	env->imbalance = DIV_ROUND_CLOSEST(
 		sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
-		SCHED_POWER_SCALE);
+		SCHED_CAPACITY_SCALE);
 
 	return 1;
 }
@@ -6118,7 +6118,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 		imbn = 1;
 
 	scaled_busy_load_per_task =
-		(busiest->load_per_task * SCHED_POWER_SCALE) /
+		(busiest->load_per_task * SCHED_CAPACITY_SCALE) /
 		busiest->group_capacity;
 
 	if (busiest->avg_load + scaled_busy_load_per_task >=
@@ -6137,7 +6137,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 			min(busiest->load_per_task, busiest->avg_load);
 	capa_now += local->group_capacity *
 			min(local->load_per_task, local->avg_load);
-	capa_now /= SCHED_POWER_SCALE;
+	capa_now /= SCHED_CAPACITY_SCALE;
 
 	/* Amount of load we'd subtract */
 	if (busiest->avg_load > scaled_busy_load_per_task) {
@@ -6148,16 +6148,16 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 
 	/* Amount of load we'd add */
 	if (busiest->avg_load * busiest->group_capacity <
-	    busiest->load_per_task * SCHED_POWER_SCALE) {
+	    busiest->load_per_task * SCHED_CAPACITY_SCALE) {
 		tmp = (busiest->avg_load * busiest->group_capacity) /
 		      local->group_capacity;
 	} else {
-		tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
+		tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
 		      local->group_capacity;
 	}
 	capa_move += local->group_capacity *
 		    min(local->load_per_task, local->avg_load + tmp);
-	capa_move /= SCHED_POWER_SCALE;
+	capa_move /= SCHED_CAPACITY_SCALE;
 
 	/* Move if we gain throughput */
 	if (capa_move > capa_now)
@@ -6207,7 +6207,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 		load_above_capacity =
 			(busiest->sum_nr_running - busiest->group_capacity_factor);
 
-		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
+		load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
 		load_above_capacity /= busiest->group_capacity;
 	}
 
@@ -6225,7 +6225,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 	env->imbalance = min(
 		max_pull * busiest->group_capacity,
 		(sds->avg_load - local->avg_load) * local->group_capacity
-	) / SCHED_POWER_SCALE;
+	) / SCHED_CAPACITY_SCALE;
 
 	/*
 	 * if *imbalance is less than the average load per runnable task
@@ -6279,7 +6279,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
 	if (!sds.busiest || busiest->sum_nr_running == 0)
 		goto out_balanced;
 
-	sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity;
+	sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load)
+						/ sds.total_capacity;
 
 	/*
 	 * If the busiest group is imbalanced the below checks don't
@@ -6378,7 +6379,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
 			continue;
 
 		capacity = capacity_of(i);
-		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE);
+		capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
 		if (!capacity_factor)
 			capacity_factor = fix_small_capacity(env->sd, group);
 
-- 
cgit 


From 5d4dfddd4f02b028d6ddaaa04d75d3b0cad1c9ae Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 27 May 2014 13:50:41 -0400
Subject: sched: Rename capacity related flags

It is better not to think about compute capacity as being equivalent
to "CPU power".  The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.

Let's rename the following feature flags since they do relate to capacity:

	SD_SHARE_CPUPOWER  -> SD_SHARE_CPUCAPACITY
	ARCH_POWER         -> ARCH_CAPACITY
	NONTASK_POWER      -> NONTASK_CAPACITY

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Andy Fleming <afleming@freescale.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Preeti U Murthy <preeti@linux.vnet.ibm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: devicetree@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/n/tip-e93lpnxb87owfievqatey6b5@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/kernel/smp.c |  2 +-
 include/linux/sched.h     |  4 ++--
 kernel/sched/core.c       | 14 +++++++-------
 kernel/sched/fair.c       |  8 ++++----
 kernel/sched/features.h   |  8 ++++----
 5 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 10ffffef0414..c51d16379cba 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -770,7 +770,7 @@ int setup_profiling_timer(unsigned int multiplier)
 /* cpumask of CPUs with asymetric SMT dependancy */
 static const int powerpc_smt_flags(void)
 {
-	int flags = SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 
 	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
 		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 322110affe63..ce93768a3312 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -869,7 +869,7 @@ enum cpu_idle_type {
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu power */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
@@ -881,7 +881,7 @@ enum cpu_idle_type {
 #ifdef CONFIG_SCHED_SMT
 static inline const int cpu_smt_flags(void)
 {
-	return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7ba4f5413a10..5976ca579d3e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -872,7 +872,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 	rq->clock_task += delta;
 
 #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
-	if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
+	if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
 		sched_rt_avg_update(rq, irq_delta + steal);
 #endif
 }
@@ -5309,7 +5309,7 @@ static int sd_degenerate(struct sched_domain *sd)
 			 SD_BALANCE_NEWIDLE |
 			 SD_BALANCE_FORK |
 			 SD_BALANCE_EXEC |
-			 SD_SHARE_CPUPOWER |
+			 SD_SHARE_CPUCAPACITY |
 			 SD_SHARE_PKG_RESOURCES |
 			 SD_SHARE_POWERDOMAIN)) {
 		if (sd->groups != sd->groups->next)
@@ -5340,7 +5340,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 				SD_BALANCE_NEWIDLE |
 				SD_BALANCE_FORK |
 				SD_BALANCE_EXEC |
-				SD_SHARE_CPUPOWER |
+				SD_SHARE_CPUCAPACITY |
 				SD_SHARE_PKG_RESOURCES |
 				SD_PREFER_SIBLING |
 				SD_SHARE_POWERDOMAIN);
@@ -5947,7 +5947,7 @@ static int sched_domains_curr_level;
 /*
  * SD_flags allowed in topology descriptions.
  *
- * SD_SHARE_CPUPOWER      - describes SMT topologies
+ * SD_SHARE_CPUCAPACITY      - describes SMT topologies
  * SD_SHARE_PKG_RESOURCES - describes shared caches
  * SD_NUMA                - describes NUMA topologies
  * SD_SHARE_POWERDOMAIN   - describes shared power domain
@@ -5956,7 +5956,7 @@ static int sched_domains_curr_level;
  * SD_ASYM_PACKING        - describes SMT quirks
  */
 #define TOPOLOGY_SD_FLAGS		\
-	(SD_SHARE_CPUPOWER |		\
+	(SD_SHARE_CPUCAPACITY |		\
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA |			\
 	 SD_ASYM_PACKING |		\
@@ -6002,7 +6002,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
 					| 1*SD_BALANCE_FORK
 					| 0*SD_BALANCE_WAKE
 					| 1*SD_WAKE_AFFINE
-					| 0*SD_SHARE_CPUPOWER
+					| 0*SD_SHARE_CPUCAPACITY
 					| 0*SD_SHARE_PKG_RESOURCES
 					| 0*SD_SERIALIZE
 					| 0*SD_PREFER_SIBLING
@@ -6024,7 +6024,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
 	 * Convert topological properties into behaviour.
 	 */
 
-	if (sd->flags & SD_SHARE_CPUPOWER) {
+	if (sd->flags & SD_SHARE_CPUCAPACITY) {
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dc7d6527a282..d3c731222199 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5672,8 +5672,8 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 	unsigned long capacity = SCHED_CAPACITY_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-		if (sched_feat(ARCH_POWER))
+	if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) {
+		if (sched_feat(ARCH_CAPACITY))
 			capacity *= arch_scale_smt_capacity(sd, cpu);
 		else
 			capacity *= default_scale_smt_capacity(sd, cpu);
@@ -5683,7 +5683,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 
 	sdg->sgc->capacity_orig = capacity;
 
-	if (sched_feat(ARCH_POWER))
+	if (sched_feat(ARCH_CAPACITY))
 		capacity *= arch_scale_freq_capacity(sd, cpu);
 	else
 		capacity *= default_scale_capacity(sd, cpu);
@@ -5782,7 +5782,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 	/*
 	 * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
 	 */
-	if (!(sd->flags & SD_SHARE_CPUPOWER))
+	if (!(sd->flags & SD_SHARE_CPUCAPACITY))
 		return 0;
 
 	/*
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 5716929a2e3a..90284d117fe6 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -37,18 +37,18 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true)
 SCHED_FEAT(WAKEUP_PREEMPTION, true)
 
 /*
- * Use arch dependent cpu power functions
+ * Use arch dependent cpu capacity functions
  */
-SCHED_FEAT(ARCH_POWER, true)
+SCHED_FEAT(ARCH_CAPACITY, true)
 
 SCHED_FEAT(HRTICK, false)
 SCHED_FEAT(DOUBLE_TICK, false)
 SCHED_FEAT(LB_BIAS, true)
 
 /*
- * Decrement CPU power based on time not spent running tasks
+ * Decrement CPU capacity based on time not spent running tasks
  */
-SCHED_FEAT(NONTASK_POWER, true)
+SCHED_FEAT(NONTASK_CAPACITY, true)
 
 /*
  * Queue remote wakeups on the target CPU and process them
-- 
cgit 


From 53b25335dd60981ad608da7890420898a34469a6 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Fri, 16 May 2014 17:12:12 -0400
Subject: perf: Disable sampled events if no PMU interrupt

Add common code to generate -ENOTSUPP at event creation time if an
architecture attempts to create a sampled event and
PERF_PMU_NO_INTERRUPT is set.

This adds a new pmu->capabilities flag.  Initially we only support
PERF_PMU_NO_INTERRUPT (to indicate a PMU has no support for generating
hardware interrupts) but there are other capabilities that can be
added later.

Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Acked-by: Will Deacon <will.deacon@arm.com>
[peterz: rename to PERF_PMU_CAP_* and moved the pmu::capabilities word into a hole]
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1405161708060.11099@vincent-weaver-1.umelst.maine.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 10 ++++++++++
 kernel/events/core.c       |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index af6dcf1d9e47..267c8f37012c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -166,6 +166,11 @@ struct perf_event;
  */
 #define PERF_EVENT_TXN 0x1
 
+/**
+ * pmu::capabilities flags
+ */
+#define PERF_PMU_CAP_NO_INTERRUPT		0x01
+
 /**
  * struct pmu - generic performance monitoring unit
  */
@@ -178,6 +183,11 @@ struct pmu {
 	const char			*name;
 	int				type;
 
+	/*
+	 * various common per-pmu feature flags
+	 */
+	int				capabilities;
+
 	int * __percpu			pmu_disable_count;
 	struct perf_cpu_context * __percpu pmu_cpu_context;
 	int				task_ctx_nr;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a62d142ad498..e9ef0c6646af 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7120,6 +7120,13 @@ SYSCALL_DEFINE5(perf_event_open,
 		}
 	}
 
+	if (is_sampling_event(event)) {
+		if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
+			err = -ENOTSUPP;
+			goto err_alloc;
+		}
+	}
+
 	account_event(event);
 
 	/*
-- 
cgit 


From 762380ad9322951cea4ce9d24864265f9c66a916 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Jun 2014 13:38:39 -0600
Subject: block: add notion of a chunk size for request merging

Some drivers have different limits on what size a request should
optimally be, depending on the offset of the request. Similar to
dividing a device into chunks. Add a setting that allows the driver
to inform the block layer of such a chunk size. The block layer will
then prevent merging across the chunks.

This is needed to optimally support NVMe with a non-zero stripe size.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c            |  3 ++-
 block/blk-settings.c   | 18 ++++++++++++++++++
 include/linux/blkdev.h | 22 +++++++++++++++++++++-
 3 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 96d28eee8a1e..97e832cc9b9c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -849,7 +849,8 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 		 unsigned int offset)
 {
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
+
+	return __bio_add_page(q, bio, page, len, offset, blk_max_size_offset(q, bio->bi_iter.bi_sector));
 }
 EXPORT_SYMBOL(bio_add_page);
 
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5d21239bc859..a2b9cb195e70 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+	lim->chunk_sectors = 0;
 	lim->max_write_same_sectors = 0;
 	lim->max_discard_sectors = 0;
 	lim->discard_granularity = 0;
@@ -276,6 +277,23 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
+/**
+ * blk_queue_chunk_sectors - set size of the chunk for this queue
+ * @q:  the request queue for the device
+ * @chunk_sectors:  chunk sectors in the usual 512b unit
+ *
+ * Description:
+ *    If a driver doesn't want IOs to cross a given chunk size, it can set
+ *    this limit and prevent merging across chunks. Note that the chunk size
+ *    must currently be a power-of-2 in sectors.
+ **/
+void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors)
+{
+	BUG_ON(!is_power_of_2(chunk_sectors));
+	q->limits.chunk_sectors = chunk_sectors;
+}
+EXPORT_SYMBOL(blk_queue_chunk_sectors);
+
 /**
  * blk_queue_max_discard_sectors - set max sectors for a single discard
  * @q:  the request queue for the device
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 3cd426e971db..dc2c703f05fd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -280,6 +280,7 @@ struct queue_limits {
 	unsigned long		seg_boundary_mask;
 
 	unsigned int		max_hw_sectors;
+	unsigned int		chunk_sectors;
 	unsigned int		max_sectors;
 	unsigned int		max_segment_size;
 	unsigned int		physical_block_size;
@@ -910,6 +911,20 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 	return q->limits.max_sectors;
 }
 
+/*
+ * Return maximum size of a request at given offset. Only valid for
+ * file system requests.
+ */
+static inline unsigned int blk_max_size_offset(struct request_queue *q,
+					       sector_t offset)
+{
+	if (!q->limits.chunk_sectors)
+		return q->limits.max_hw_sectors;
+
+	return q->limits.chunk_sectors -
+			(offset & (q->limits.chunk_sectors - 1));
+}
+
 static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -917,7 +932,11 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
 	if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
 		return q->limits.max_hw_sectors;
 
-	return blk_queue_get_max_sectors(q, rq->cmd_flags);
+	if (!q->limits.chunk_sectors)
+		return blk_queue_get_max_sectors(q, rq->cmd_flags);
+
+	return min(blk_max_size_offset(q, blk_rq_pos(rq)),
+			blk_queue_get_max_sectors(q, rq->cmd_flags));
 }
 
 static inline unsigned int blk_rq_count_bios(struct request *rq)
@@ -983,6 +1002,7 @@ extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
 extern void blk_queue_bounce_limit(struct request_queue *, u64);
 extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
 extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_max_discard_sectors(struct request_queue *q,
-- 
cgit 


From 1c03a2d04d7ab6d27c1fef8614f08187d974bd21 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 2 Jun 2014 22:49:28 +0530
Subject: cpufreq: add support for intermediate (stable) frequencies

Douglas Anderson, recently pointed out an interesting problem due to which
udelay() was expiring earlier than it should.

While transitioning between frequencies few platforms may temporarily switch to
a stable frequency, waiting for the main PLL to stabilize.

For example: When we transition between very low frequencies on exynos, like
between 200MHz and 300MHz, we may temporarily switch to a PLL running at 800MHz.
No CPUFREQ notification is sent for that. That means there's a period of time
when we're running at 800MHz but loops_per_jiffy is calibrated at between 200MHz
and 300MHz. And so udelay behaves badly.

To get this fixed in a generic way, introduce another set of callbacks
get_intermediate() and target_intermediate(), only for drivers with
target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.

get_intermediate() should return a stable intermediate frequency platform wants
to switch to, and target_intermediate() should set CPU to that frequency,
before jumping to the frequency corresponding to 'index'. Core will take care of
sending notifications and driver doesn't have to handle them in
target_intermediate() or target_index().

NOTE: ->target_index() should restore to policy->restore_freq in case of
failures as core would send notifications for that.

Tested-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/cpu-freq/cpu-drivers.txt | 29 ++++++++++++++-
 drivers/cpufreq/cpufreq.c              | 67 ++++++++++++++++++++++++++++++----
 include/linux/cpufreq.h                | 25 +++++++++++++
 3 files changed, 112 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
index b045fe54986a..14f4e6336d88 100644
--- a/Documentation/cpu-freq/cpu-drivers.txt
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -26,6 +26,7 @@ Contents:
 1.4  target/target_index or setpolicy?
 1.5  target/target_index
 1.6  setpolicy
+1.7  get_intermediate and target_intermediate
 2.   Frequency Table Helpers
 
 
@@ -79,6 +80,10 @@ cpufreq_driver.attr -		A pointer to a NULL-terminated list of
 				"struct freq_attr" which allow to
 				export values to sysfs.
 
+cpufreq_driver.get_intermediate
+and target_intermediate		Used to switch to stable frequency while
+				changing CPU frequency.
+
 
 1.2 Per-CPU Initialization
 --------------------------
@@ -151,7 +156,7 @@ Some cpufreq-capable processors switch the frequency between certain
 limits on their own. These shall use the ->setpolicy call
 
 
-1.4. target/target_index
+1.5. target/target_index
 -------------
 
 The target_index call has two arguments: struct cpufreq_policy *policy,
@@ -160,6 +165,9 @@ and unsigned int index (into the exposed frequency table).
 The CPUfreq driver must set the new frequency when called here. The
 actual frequency must be determined by freq_table[index].frequency.
 
+It should always restore to earlier frequency (i.e. policy->restore_freq) in
+case of errors, even if we switched to intermediate frequency earlier.
+
 Deprecated:
 ----------
 The target call has three arguments: struct cpufreq_policy *policy,
@@ -179,7 +187,7 @@ Here again the frequency table helper might assist you - see section 2
 for details.
 
 
-1.5 setpolicy
+1.6 setpolicy
 ---------------
 
 The setpolicy call only takes a struct cpufreq_policy *policy as
@@ -190,6 +198,23 @@ setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
 powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
 the reference implementation in drivers/cpufreq/longrun.c
 
+1.7 get_intermediate and target_intermediate
+--------------------------------------------
+
+Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
+
+get_intermediate should return a stable intermediate frequency platform wants to
+switch to, and target_intermediate() should set CPU to to that frequency, before
+jumping to the frequency corresponding to 'index'. Core will take care of
+sending notifications and driver doesn't have to handle them in
+target_intermediate() or target_index().
+
+Drivers can return '0' from get_intermediate() in case they don't wish to switch
+to intermediate frequency for some target frequency. In that case core will
+directly call ->target_index().
+
+NOTE: ->target_index() should restore to policy->restore_freq in case of
+failures as core would send notifications for that.
 
 
 2. Frequency Table Helpers
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index ae11dd51f81d..aed2b0cb83dc 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1816,20 +1816,55 @@ EXPORT_SYMBOL(cpufreq_unregister_notifier);
  *                              GOVERNORS                            *
  *********************************************************************/
 
+/* Must set freqs->new to intermediate frequency */
+static int __target_intermediate(struct cpufreq_policy *policy,
+				 struct cpufreq_freqs *freqs, int index)
+{
+	int ret;
+
+	freqs->new = cpufreq_driver->get_intermediate(policy, index);
+
+	/* We don't need to switch to intermediate freq */
+	if (!freqs->new)
+		return 0;
+
+	pr_debug("%s: cpu: %d, switching to intermediate freq: oldfreq: %u, intermediate freq: %u\n",
+		 __func__, policy->cpu, freqs->old, freqs->new);
+
+	cpufreq_freq_transition_begin(policy, freqs);
+	ret = cpufreq_driver->target_intermediate(policy, index);
+	cpufreq_freq_transition_end(policy, freqs, ret);
+
+	if (ret)
+		pr_err("%s: Failed to change to intermediate frequency: %d\n",
+		       __func__, ret);
+
+	return ret;
+}
+
 static int __target_index(struct cpufreq_policy *policy,
 			  struct cpufreq_frequency_table *freq_table, int index)
 {
-	struct cpufreq_freqs freqs;
+	struct cpufreq_freqs freqs = {.old = policy->cur, .flags = 0};
+	unsigned int intermediate_freq = 0;
 	int retval = -EINVAL;
 	bool notify;
 
 	notify = !(cpufreq_driver->flags & CPUFREQ_ASYNC_NOTIFICATION);
-
 	if (notify) {
-		freqs.old = policy->cur;
-		freqs.new = freq_table[index].frequency;
-		freqs.flags = 0;
+		/* Handle switching to intermediate frequency */
+		if (cpufreq_driver->get_intermediate) {
+			retval = __target_intermediate(policy, &freqs, index);
+			if (retval)
+				return retval;
+
+			intermediate_freq = freqs.new;
+			/* Set old freq to intermediate */
+			if (intermediate_freq)
+				freqs.old = freqs.new;
+		}
 
+		freqs.new = freq_table[index].frequency;
 		pr_debug("%s: cpu: %d, oldfreq: %u, new freq: %u\n",
 			 __func__, policy->cpu, freqs.old, freqs.new);
 
@@ -1841,9 +1876,23 @@ static int __target_index(struct cpufreq_policy *policy,
 		pr_err("%s: Failed to change cpu frequency: %d\n", __func__,
 		       retval);
 
-	if (notify)
+	if (notify) {
 		cpufreq_freq_transition_end(policy, &freqs, retval);
 
+		/*
+		 * Failed after setting to intermediate freq? Driver should have
+		 * reverted back to initial frequency and so should we. Check
+		 * here for intermediate_freq instead of get_intermediate, in
+		 * case we have't switched to intermediate freq at all.
+		 */
+		if (unlikely(retval && intermediate_freq)) {
+			freqs.old = intermediate_freq;
+			freqs.new = policy->restore_freq;
+			cpufreq_freq_transition_begin(policy, &freqs);
+			cpufreq_freq_transition_end(policy, &freqs, 0);
+		}
+	}
+
 	return retval;
 }
 
@@ -1875,6 +1924,9 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	if (target_freq == policy->cur)
 		return 0;
 
+	/* Save last value to restore later on errors */
+	policy->restore_freq = policy->cur;
+
 	if (cpufreq_driver->target)
 		retval = cpufreq_driver->target(policy, target_freq, relation);
 	else if (cpufreq_driver->target_index) {
@@ -2361,7 +2413,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
 	    !(driver_data->setpolicy || driver_data->target_index ||
 		    driver_data->target) ||
 	     (driver_data->setpolicy && (driver_data->target_index ||
-		    driver_data->target)))
+		    driver_data->target)) ||
+	     (!!driver_data->get_intermediate != !!driver_data->target_intermediate))
 		return -EINVAL;
 
 	pr_debug("trying to register driver %s\n", driver_data->name);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 3f458896d45c..ec4112d257bc 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -75,6 +75,7 @@ struct cpufreq_policy {
 	unsigned int		max;    /* in kHz */
 	unsigned int		cur;    /* in kHz, only needed if cpufreq
 					 * governors are used */
+	unsigned int		restore_freq; /* = policy->cur before transition */
 	unsigned int		suspend_freq; /* freq to set during suspend */
 
 	unsigned int		policy; /* see above */
@@ -221,11 +222,35 @@ struct cpufreq_driver {
 
 	/* define one out of two */
 	int	(*setpolicy)	(struct cpufreq_policy *policy);
+
+	/*
+	 * On failure, should always restore frequency to policy->restore_freq
+	 * (i.e. old freq).
+	 */
 	int	(*target)	(struct cpufreq_policy *policy,	/* Deprecated */
 				 unsigned int target_freq,
 				 unsigned int relation);
 	int	(*target_index)	(struct cpufreq_policy *policy,
 				 unsigned int index);
+	/*
+	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
+	 * unset.
+	 *
+	 * get_intermediate should return a stable intermediate frequency
+	 * platform wants to switch to and target_intermediate() should set CPU
+	 * to to that frequency, before jumping to the frequency corresponding
+	 * to 'index'. Core will take care of sending notifications and driver
+	 * doesn't have to handle them in target_intermediate() or
+	 * target_index().
+	 *
+	 * Drivers can return '0' from get_intermediate() in case they don't
+	 * wish to switch to intermediate frequency for some target frequency.
+	 * In that case core will directly call ->target_index().
+	 */
+	unsigned int (*get_intermediate)(struct cpufreq_policy *policy,
+					 unsigned int index);
+	int	(*target_intermediate)(struct cpufreq_policy *policy,
+				       unsigned int index);
 
 	/* should be defined, if possible */
 	unsigned int	(*get)	(unsigned int cpu);
-- 
cgit 


From 46cfd6ea23b0a207c87269d86457727dc4485708 Mon Sep 17 00:00:00 2001
From: Konrad Zapalowicz <bergo.torino@gmail.com>
Date: Thu, 5 Jun 2014 20:27:42 +0200
Subject: net: phy: fix sparse warning in fixed.c

This commit fixes the following sparse warning:

drivers/net/phy/fixed.c:207
    - warning: symbol 'fixed_phy_del' was not declared.
      Should it be static?

by adding symbol definition to the phy_fixed.h API file. It is ok to do
because the function in question is an exported symbol.

Signed-off-by: Konrad Zapalowicz <bergo.torino@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 4f2478b47136..ae612acebb53 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -17,6 +17,7 @@ extern int fixed_phy_add(unsigned int irq, int phy_id,
 extern int fixed_phy_register(unsigned int irq,
 			      struct fixed_phy_status *status,
 			      struct device_node *np);
+extern void fixed_phy_del(int phy_addr);
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
@@ -29,6 +30,10 @@ static inline int fixed_phy_register(unsigned int irq,
 {
 	return -ENODEV;
 }
+static inline int fixed_phy_del(int phy_addr)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_FIXED_PHY */
 
 /*
-- 
cgit 


From f98a128a55ff85d0087de89f304f10bd75e792aa Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 17 Apr 2014 08:55:50 +0800
Subject: ceph: update inode fields according to issued caps

Cap message and request reply from non-auth MDS may carry stale
information (corresponding locks are in LOCK states) even they
have the newest inode version. So client should update inode fields
according to issued caps.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
---
 fs/ceph/caps.c               | 58 ++++++++++++++++++++----------------
 fs/ceph/inode.c              | 70 ++++++++++++++++++++++++--------------------
 include/linux/ceph/ceph_fs.h |  2 ++
 3 files changed, 73 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index de39a03f5b71..5f6d24ede794 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2476,7 +2476,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 
 	__check_cap_issue(ci, cap, newcaps);
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(grant->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
@@ -2485,7 +2486,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+	if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0) {
 		set_nlink(inode, le32_to_cpu(grant->nlink));
 		if (inode->i_nlink == 0 &&
 		    (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
@@ -2512,31 +2514,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1)
 		queue_revalidate = 1;
 
-	/* size/ctime/mtime/atime? */
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(grant->truncate_seq),
-					  le64_to_cpu(grant->truncate_size),
-					  size);
-	ceph_decode_timespec(&mtime, &grant->mtime);
-	ceph_decode_timespec(&atime, &grant->atime);
-	ceph_decode_timespec(&ctime, &grant->ctime);
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(grant->time_warp_seq), &ctime, &mtime,
-			    &atime);
-
-
-	/* file layout may have changed */
-	ci->i_layout = grant->layout;
-
-	/* max size increase? */
-	if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size, max_size);
-		ci->i_max_size = max_size;
-		if (max_size >= ci->i_wanted_max_size) {
-			ci->i_wanted_max_size = 0;  /* reset */
-			ci->i_requested_max_size = 0;
+	if (newcaps & CEPH_CAP_ANY_RD) {
+		/* ctime/mtime/atime? */
+		ceph_decode_timespec(&mtime, &grant->mtime);
+		ceph_decode_timespec(&atime, &grant->atime);
+		ceph_decode_timespec(&ctime, &grant->ctime);
+		ceph_fill_file_time(inode, issued,
+				    le32_to_cpu(grant->time_warp_seq),
+				    &ctime, &mtime, &atime);
+	}
+
+	if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
+		/* file layout may have changed */
+		ci->i_layout = grant->layout;
+		/* size/truncate_seq? */
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(grant->truncate_seq),
+					le64_to_cpu(grant->truncate_size),
+					size);
+		/* max size increase? */
+		if (ci->i_auth_cap == cap && max_size != ci->i_max_size) {
+			dout("max_size %lld -> %llu\n",
+			     ci->i_max_size, max_size);
+			ci->i_max_size = max_size;
+			if (max_size >= ci->i_wanted_max_size) {
+				ci->i_wanted_max_size = 0;  /* reset */
+				ci->i_requested_max_size = 0;
+			}
+			wake = 1;
 		}
-		wake = 1;
 	}
 
 	/* check cap bits */
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 233c6f96910a..f9e7399877d6 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -585,14 +585,15 @@ static int fill_inode(struct inode *inode,
 	struct ceph_mds_reply_inode *info = iinfo->in;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int i;
-	int issued = 0, implemented;
+	int issued = 0, implemented, new_issued;
 	struct timespec mtime, atime, ctime;
 	u32 nsplits;
 	struct ceph_inode_frag *frag;
 	struct rb_node *rb_node;
 	struct ceph_buffer *xattr_blob = NULL;
 	int err = 0;
-	int queue_trunc = 0;
+	bool queue_trunc = false;
+	bool new_version = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -623,19 +624,23 @@ static int fill_inode(struct inode *inode,
 	 *   3    2     skip
 	 *   3    3     update
 	 */
-	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
-		goto no_change;
-	
+	if (ci->i_version == 0 ||
+	    ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	     le64_to_cpu(info->version) > (ci->i_version & ~1)))
+		new_version = true;
+
 	issued = __ceph_caps_issued(ci, &implemented);
 	issued |= implemented | __ceph_caps_dirty(ci);
+	new_issued = ~issued & le32_to_cpu(info->cap.caps);
 
 	/* update inode */
 	ci->i_version = le64_to_cpu(info->version);
 	inode->i_version++;
 	inode->i_rdev = le32_to_cpu(info->rdev);
+	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
-	if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+	if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
+	    (issued & CEPH_CAP_AUTH_EXCL) == 0) {
 		inode->i_mode = le32_to_cpu(info->mode);
 		inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid));
 		inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid));
@@ -644,23 +649,35 @@ static int fill_inode(struct inode *inode,
 		     from_kgid(&init_user_ns, inode->i_gid));
 	}
 
-	if ((issued & CEPH_CAP_LINK_EXCL) == 0)
+	if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
+	    (issued & CEPH_CAP_LINK_EXCL) == 0)
 		set_nlink(inode, le32_to_cpu(info->nlink));
 
-	/* be careful with mtime, atime, size */
-	ceph_decode_timespec(&atime, &info->atime);
-	ceph_decode_timespec(&mtime, &info->mtime);
-	ceph_decode_timespec(&ctime, &info->ctime);
-	queue_trunc = ceph_fill_file_size(inode, issued,
-					  le32_to_cpu(info->truncate_seq),
-					  le64_to_cpu(info->truncate_size),
-					  le64_to_cpu(info->size));
-	ceph_fill_file_time(inode, issued,
-			    le32_to_cpu(info->time_warp_seq),
-			    &ctime, &mtime, &atime);
-
-	ci->i_layout = info->layout;
-	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+	if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
+		/* be careful with mtime, atime, size */
+		ceph_decode_timespec(&atime, &info->atime);
+		ceph_decode_timespec(&mtime, &info->mtime);
+		ceph_decode_timespec(&ctime, &info->ctime);
+		ceph_fill_file_time(inode, issued,
+				le32_to_cpu(info->time_warp_seq),
+				&ctime, &mtime, &atime);
+	}
+
+	if (new_version ||
+	    (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+		ci->i_layout = info->layout;
+		queue_trunc = ceph_fill_file_size(inode, issued,
+					le32_to_cpu(info->truncate_seq),
+					le64_to_cpu(info->truncate_size),
+					le64_to_cpu(info->size));
+		/* only update max_size on auth cap */
+		if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+		    ci->i_max_size != le64_to_cpu(info->max_size)) {
+			dout("max_size %lld -> %llu\n", ci->i_max_size,
+					le64_to_cpu(info->max_size));
+			ci->i_max_size = le64_to_cpu(info->max_size);
+		}
+	}
 
 	/* xattrs */
 	/* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */
@@ -745,15 +762,6 @@ static int fill_inode(struct inode *inode,
 		dout(" marking %p complete (empty)\n", inode);
 		__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
 	}
-no_change:
-	/* only update max_size on auth cap */
-	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
-	    ci->i_max_size != le64_to_cpu(info->max_size)) {
-		dout("max_size %lld -> %llu\n", ci->i_max_size,
-		     le64_to_cpu(info->max_size));
-		ci->i_max_size = le64_to_cpu(info->max_size);
-	}
-
 	spin_unlock(&ci->i_ceph_lock);
 
 	/* queue truncate if we saw i_size decrease */
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 5f6db18d72e8..3c97d5e9b951 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -625,6 +625,8 @@ int ceph_flags_to_mode(int flags);
 			   CEPH_CAP_LINK_EXCL |		\
 			   CEPH_CAP_XATTR_EXCL |	\
 			   CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \
+			      CEPH_CAP_FILE_SHARED)
 #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |	\
 			      CEPH_CAP_FILE_EXCL)
 #define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
-- 
cgit 


From 513a8243d67f8e8d27f2883bd2f18bc87c7ca376 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 13 May 2014 11:19:26 +0400
Subject: libceph: mon_get_version request infrastructure

Add support for mon_get_version requests to libceph.  This reuses much
of the ceph_mon_generic_request infrastructure, with one exception.
Older OSDs don't set mon_get_version reply hdr->tid even if the
original request had a non-zero tid, which makes it impossible to
lookup ceph_mon_generic_request contexts by tid in get_generic_reply()
for such replies.  As a workaround, we allocate a reply message on the
reply path.  This can probably interfere with revoke, but I don't see
a better way.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/mon_client.h |   9 ++-
 net/ceph/ceph_common.c          |   2 +
 net/ceph/debugfs.c              |   2 +
 net/ceph/mon_client.c           | 123 ++++++++++++++++++++++++++++++++++++++--
 4 files changed, 128 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index a486f390dfbe..585ef9450e9d 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -40,9 +40,9 @@ struct ceph_mon_request {
 };
 
 /*
- * ceph_mon_generic_request is being used for the statfs and poolop requests
- * which are bening done a bit differently because we need to get data back
- * to the caller
+ * ceph_mon_generic_request is being used for the statfs, poolop and
+ * mon_get_version requests which are being done a bit differently
+ * because we need to get data back to the caller
  */
 struct ceph_mon_generic_request {
 	struct kref kref;
@@ -108,6 +108,9 @@ extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
 extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
 			       struct ceph_statfs *buf);
 
+extern int ceph_monc_do_get_version(struct ceph_mon_client *monc,
+				    const char *what, u64 *newest);
+
 extern int ceph_monc_open_session(struct ceph_mon_client *monc);
 
 extern int ceph_monc_validate_auth(struct ceph_mon_client *monc);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 67d7721d237e..1675021d8c12 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -72,6 +72,8 @@ const char *ceph_msg_type_name(int type)
 	case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
 	case CEPH_MSG_STATFS: return "statfs";
 	case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
+	case CEPH_MSG_MON_GET_VERSION: return "mon_get_version";
+	case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply";
 	case CEPH_MSG_MDS_MAP: return "mds_map";
 	case CEPH_MSG_CLIENT_SESSION: return "client_session";
 	case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 8903dcee8d8e..d1a62c69a9f4 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -129,6 +129,8 @@ static int monc_show(struct seq_file *s, void *p)
 			seq_printf(s, "%llu statfs\n", req->tid);
 		else if (op == CEPH_MSG_POOLOP)
 			seq_printf(s, "%llu poolop\n", req->tid);
+		else if (op == CEPH_MSG_MON_GET_VERSION)
+			seq_printf(s, "%llu mon_get_version", req->tid);
 		else
 			seq_printf(s, "%llu unknown\n", req->tid);
 	}
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 2ac9ef35110b..11d8d2f2708a 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -477,14 +477,13 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
 	return m;
 }
 
-static int do_generic_request(struct ceph_mon_client *monc,
-			      struct ceph_mon_generic_request *req)
+static int __do_generic_request(struct ceph_mon_client *monc, u64 tid,
+				struct ceph_mon_generic_request *req)
 {
 	int err;
 
 	/* register request */
-	mutex_lock(&monc->mutex);
-	req->tid = ++monc->last_tid;
+	req->tid = tid != 0 ? tid : ++monc->last_tid;
 	req->request->hdr.tid = cpu_to_le64(req->tid);
 	__insert_generic_request(monc, req);
 	monc->num_generic_requests++;
@@ -496,13 +495,24 @@ static int do_generic_request(struct ceph_mon_client *monc,
 	mutex_lock(&monc->mutex);
 	rb_erase(&req->node, &monc->generic_request_tree);
 	monc->num_generic_requests--;
-	mutex_unlock(&monc->mutex);
 
 	if (!err)
 		err = req->result;
 	return err;
 }
 
+static int do_generic_request(struct ceph_mon_client *monc,
+			      struct ceph_mon_generic_request *req)
+{
+	int err;
+
+	mutex_lock(&monc->mutex);
+	err = __do_generic_request(monc, 0, req);
+	mutex_unlock(&monc->mutex);
+
+	return err;
+}
+
 /*
  * statfs
  */
@@ -579,6 +589,96 @@ out:
 }
 EXPORT_SYMBOL(ceph_monc_do_statfs);
 
+static void handle_get_version_reply(struct ceph_mon_client *monc,
+				     struct ceph_msg *msg)
+{
+	struct ceph_mon_generic_request *req;
+	u64 tid = le64_to_cpu(msg->hdr.tid);
+	void *p = msg->front.iov_base;
+	void *end = p + msg->front_alloc_len;
+	u64 handle;
+
+	dout("%s %p tid %llu\n", __func__, msg, tid);
+
+	ceph_decode_need(&p, end, 2*sizeof(u64), bad);
+	handle = ceph_decode_64(&p);
+	if (tid != 0 && tid != handle)
+		goto bad;
+
+	mutex_lock(&monc->mutex);
+	req = __lookup_generic_req(monc, handle);
+	if (req) {
+		*(u64 *)req->buf = ceph_decode_64(&p);
+		req->result = 0;
+		get_generic_request(req);
+	}
+	mutex_unlock(&monc->mutex);
+	if (req) {
+		complete_all(&req->completion);
+		put_generic_request(req);
+	}
+
+	return;
+bad:
+	pr_err("corrupt mon_get_version reply\n");
+	ceph_msg_dump(msg);
+}
+
+/*
+ * Send MMonGetVersion and wait for the reply.
+ *
+ * @what: one of "mdsmap", "osdmap" or "monmap"
+ */
+int ceph_monc_do_get_version(struct ceph_mon_client *monc, const char *what,
+			     u64 *newest)
+{
+	struct ceph_mon_generic_request *req;
+	void *p, *end;
+	u64 tid;
+	int err;
+
+	req = kzalloc(sizeof(*req), GFP_NOFS);
+	if (!req)
+		return -ENOMEM;
+
+	kref_init(&req->kref);
+	req->buf = newest;
+	req->buf_len = sizeof(*newest);
+	init_completion(&req->completion);
+
+	req->request = ceph_msg_new(CEPH_MSG_MON_GET_VERSION,
+				    sizeof(u64) + sizeof(u32) + strlen(what),
+				    GFP_NOFS, true);
+	if (!req->request) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	req->reply = ceph_msg_new(CEPH_MSG_MON_GET_VERSION_REPLY, 1024,
+				  GFP_NOFS, true);
+	if (!req->reply) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	p = req->request->front.iov_base;
+	end = p + req->request->front_alloc_len;
+
+	/* fill out request */
+	mutex_lock(&monc->mutex);
+	tid = ++monc->last_tid;
+	ceph_encode_64(&p, tid); /* handle */
+	ceph_encode_string(&p, end, what, strlen(what));
+
+	err = __do_generic_request(monc, tid, req);
+
+	mutex_unlock(&monc->mutex);
+out:
+	kref_put(&req->kref, release_generic_request);
+	return err;
+}
+EXPORT_SYMBOL(ceph_monc_do_get_version);
+
 /*
  * pool ops
  */
@@ -981,6 +1081,10 @@ static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
 		handle_statfs_reply(monc, msg);
 		break;
 
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		handle_get_version_reply(monc, msg);
+		break;
+
 	case CEPH_MSG_POOLOP_REPLY:
 		handle_poolop_reply(monc, msg);
 		break;
@@ -1029,6 +1133,15 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
 	case CEPH_MSG_AUTH_REPLY:
 		m = ceph_msg_get(monc->m_auth_reply);
 		break;
+	case CEPH_MSG_MON_GET_VERSION_REPLY:
+		if (le64_to_cpu(hdr->tid) != 0)
+			return get_generic_reply(con, hdr, skip);
+
+		/*
+		 * Older OSDs don't set reply tid even if the orignal
+		 * request had a non-zero tid.  Workaround this weirdness
+		 * by falling through to the allocate case.
+		 */
 	case CEPH_MSG_MON_MAP:
 	case CEPH_MSG_MDS_MAP:
 	case CEPH_MSG_OSD_MAP:
-- 
cgit 


From 6044cde6f2a94d88142d4401624152a741866338 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <ilya.dryomov@inktank.com>
Date: Tue, 13 May 2014 11:19:27 +0400
Subject: libceph: add ceph_monc_wait_osdmap()

Add ceph_monc_wait_osdmap(), which will block until the osdmap with the
specified epoch is received or timeout occurs.

Export both of these as they are going to be needed by rbd.

Signed-off-by: Ilya Dryomov <ilya.dryomov@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
---
 include/linux/ceph/mon_client.h |  2 ++
 net/ceph/mon_client.c           | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 585ef9450e9d..deb47e45ac7c 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -104,6 +104,8 @@ extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have);
 extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have);
 
 extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc);
+extern int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+				 unsigned long timeout);
 
 extern int ceph_monc_do_statfs(struct ceph_mon_client *monc,
 			       struct ceph_statfs *buf);
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 11d8d2f2708a..067d3af2eaf6 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -296,6 +296,33 @@ void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc)
 		__send_subscribe(monc);
 	mutex_unlock(&monc->mutex);
 }
+EXPORT_SYMBOL(ceph_monc_request_next_osdmap);
+
+int ceph_monc_wait_osdmap(struct ceph_mon_client *monc, u32 epoch,
+			  unsigned long timeout)
+{
+	unsigned long started = jiffies;
+	int ret;
+
+	mutex_lock(&monc->mutex);
+	while (monc->have_osdmap < epoch) {
+		mutex_unlock(&monc->mutex);
+
+		if (timeout != 0 && time_after_eq(jiffies, started + timeout))
+			return -ETIMEDOUT;
+
+		ret = wait_event_interruptible_timeout(monc->client->auth_wq,
+					 monc->have_osdmap >= epoch, timeout);
+		if (ret < 0)
+			return ret;
+
+		mutex_lock(&monc->mutex);
+	}
+
+	mutex_unlock(&monc->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(ceph_monc_wait_osdmap);
 
 /*
  *
-- 
cgit 


From e041e328c4b41e1db79bfe5ba9992c2ed771ad19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 21 May 2014 17:32:19 +0200
Subject: perf: Fix perf_event_comm() vs. exec() assumption

perf_event_comm() assumes that set_task_comm() is only called on
exec(), and in particular that its only called on current.

Neither are true, as Dave reported a WARN triggered by set_task_comm()
being called on !current.

Separate the exec() hook from the comm hook.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20140521153219.GH5226@laptop.programming.kicks-ass.net
[ Build fix. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                  |  1 +
 include/linux/perf_event.h |  4 +++-
 kernel/events/core.c       | 28 ++++++++++++++++------------
 3 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 238b7aa26f68..a038a41a3677 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1110,6 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 	else
 		set_dumpable(current->mm, suid_dumpable);
 
+	perf_event_exec();
 	set_task_comm(current, kbasename(bprm->filename));
 
 	/* Set the new mm task size. We have to do that late because it may
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3ef6ea12806a..9b5cd1992a88 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -695,6 +695,7 @@ extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
+extern void perf_event_exec(void);
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
@@ -772,7 +773,7 @@ extern void perf_event_enable(struct perf_event *event);
 extern void perf_event_disable(struct perf_event *event);
 extern int __perf_event_disable(void *info);
 extern void perf_event_task_tick(void);
-#else
+#else /* !CONFIG_PERF_EVENTS: */
 static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
@@ -802,6 +803,7 @@ static inline int perf_unregister_guest_info_callbacks
 (struct perf_guest_info_callbacks *callbacks)				{ return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
+static inline void perf_event_exec(void)				{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 440eefc67397..647698f91988 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2970,6 +2970,22 @@ out:
 	local_irq_restore(flags);
 }
 
+void perf_event_exec(void)
+{
+	struct perf_event_context *ctx;
+	int ctxn;
+
+	rcu_read_lock();
+	for_each_task_context_nr(ctxn) {
+		ctx = current->perf_event_ctxp[ctxn];
+		if (!ctx)
+			continue;
+
+		perf_event_enable_on_exec(ctx);
+	}
+	rcu_read_unlock();
+}
+
 /*
  * Cross CPU call to read the hardware event
  */
@@ -5057,18 +5073,6 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 void perf_event_comm(struct task_struct *task)
 {
 	struct perf_comm_event comm_event;
-	struct perf_event_context *ctx;
-	int ctxn;
-
-	rcu_read_lock();
-	for_each_task_context_nr(ctxn) {
-		ctx = task->perf_event_ctxp[ctxn];
-		if (!ctx)
-			continue;
-
-		perf_event_enable_on_exec(ctx);
-	}
-	rcu_read_unlock();
 
 	if (!atomic_read(&nr_comm_events))
 		return;
-- 
cgit 


From 82b897782d10fcc4930c9d4a15b175348fdd2871 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 28 May 2014 11:45:04 +0300
Subject: perf: Differentiate exec() and non-exec() comm events

perf tools like 'perf report' can aggregate samples by comm strings,
which generally works.  However, there are other potential use-cases.
For example, to pair up 'calls' with 'returns' accurately (from branch
events like Intel BTS) it is necessary to identify whether the process
has exec'd.  Although a comm event is generated when an 'exec' happens
it is also generated whenever the comm string is changed on a whim
(e.g. by prctl PR_SET_NAME).  This patch adds a flag to the comm event
to differentiate one case from the other.

In order to determine whether the kernel supports the new flag, a
selection bit named 'exec' is added to struct perf_event_attr.  The
bit does nothing but will cause perf_event_open() to fail if the bit
is set on kernels that do not have it defined.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/537D9EBE.7030806@intel.com
Cc: Paul Mackerras <paulus@samba.org>
Cc: Dave Jones <davej@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 fs/exec.c                       | 6 +++---
 include/linux/perf_event.h      | 4 ++--
 include/linux/sched.h           | 6 +++++-
 include/uapi/linux/perf_event.h | 9 +++++++--
 kernel/events/core.c            | 4 ++--
 5 files changed, 19 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index a038a41a3677..a3d33fe592d6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm);
  * so that a new one can be started
  */
 
-void set_task_comm(struct task_struct *tsk, const char *buf)
+void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec)
 {
 	task_lock(tsk);
 	trace_task_rename(tsk, buf);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_event_comm(tsk);
+	perf_event_comm(tsk, exec);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -1111,7 +1111,7 @@ void setup_new_exec(struct linux_binprm * bprm)
 		set_dumpable(current->mm, suid_dumpable);
 
 	perf_event_exec();
-	set_task_comm(current, kbasename(bprm->filename));
+	__set_task_comm(current, kbasename(bprm->filename), true);
 
 	/* Set the new mm task size. We have to do that late because it may
 	 * depend on TIF_32BIT which is only updated in flush_thread() on
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b4c1d4685bf0..707617a8c0f6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -707,7 +707,7 @@ extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *
 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
 extern void perf_event_exec(void);
-extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_comm(struct task_struct *tsk, bool exec);
 extern void perf_event_fork(struct task_struct *tsk);
 
 /* Callchains */
@@ -815,7 +815,7 @@ static inline int perf_unregister_guest_info_callbacks
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_exec(void)				{ }
-static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_comm(struct task_struct *tsk, bool exec)	{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
 static inline int  perf_swevent_get_recursion_context(void)		{ return -1; }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 221b2bde3723..ad86e1d7dbc2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2379,7 +2379,11 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
-extern void set_task_comm(struct task_struct *tsk, const char *from);
+extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
+static inline void set_task_comm(struct task_struct *tsk, const char *from)
+{
+	__set_task_comm(tsk, from, false);
+}
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index d9cd853818ad..5312fae47218 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -302,8 +302,8 @@ struct perf_event_attr {
 				exclude_callchain_kernel : 1, /* exclude kernel callchains */
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
-
-				__reserved_1   : 40;
+				comm_exec      :  1, /* flag comm events that are due to an exec */
+				__reserved_1   : 39;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -502,7 +502,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ */
 #define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC		(1 << 13)
 /*
  * Indicates that the content of PERF_SAMPLE_IP points to
  * the actual instruction that triggered the event. See also
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8fac2056d51e..7da5e561e89a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5090,7 +5090,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
 		       NULL);
 }
 
-void perf_event_comm(struct task_struct *task)
+void perf_event_comm(struct task_struct *task, bool exec)
 {
 	struct perf_comm_event comm_event;
 
@@ -5104,7 +5104,7 @@ void perf_event_comm(struct task_struct *task)
 		.event_id  = {
 			.header = {
 				.type = PERF_RECORD_COMM,
-				.misc = 0,
+				.misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
 				/* .size */
 			},
 			/* .pid */
-- 
cgit 


From f27b087b81b70513b8c61ec20596c868f7b93474 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Fri, 6 Jun 2014 07:57:37 -0600
Subject: block: add blk_rq_set_block_pc()

With the optimizations around not clearing the full request at alloc
time, we are leaving some of the needed init for REQ_TYPE_BLOCK_PC
up to the user allocating the request.

Add a blk_rq_set_block_pc() that sets the command type to
REQ_TYPE_BLOCK_PC, and properly initializes the members associated
with this type of request. Update callers to use this function instead
of manipulating rq->cmd_type directly.

Includes fixes from Christoph Hellwig <hch@lst.de> for my half-assed
attempt.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c                            | 18 ++++++++++++++++++
 block/bsg.c                                 |  3 ++-
 block/scsi_ioctl.c                          |  6 +++---
 drivers/block/pktcdvd.c                     |  2 +-
 drivers/cdrom/cdrom.c                       |  2 +-
 drivers/scsi/device_handler/scsi_dh_alua.c  |  2 +-
 drivers/scsi/device_handler/scsi_dh_emc.c   |  2 +-
 drivers/scsi/device_handler/scsi_dh_hp_sw.c |  4 ++--
 drivers/scsi/device_handler/scsi_dh_rdac.c  |  2 +-
 drivers/scsi/osd/osd_initiator.c            |  4 ++--
 drivers/scsi/osst.c                         |  2 +-
 drivers/scsi/scsi_error.c                   |  3 ++-
 drivers/scsi/scsi_lib.c                     |  2 +-
 drivers/scsi/sg.c                           |  3 +--
 drivers/scsi/st.c                           |  2 +-
 drivers/target/target_core_pscsi.c          |  3 ++-
 include/linux/blkdev.h                      |  1 +
 17 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 40d654861c33..9aca8c71e70b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1218,6 +1218,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 	if (unlikely(!rq))
 		return ERR_PTR(-ENOMEM);
 
+	blk_rq_set_block_pc(rq);
+
 	for_each_bio(bio) {
 		struct bio *bounce_bio = bio;
 		int ret;
@@ -1234,6 +1236,22 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 }
 EXPORT_SYMBOL(blk_make_request);
 
+/**
+ * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ * @rq:		request to be initialized
+ *
+ */
+void blk_rq_set_block_pc(struct request *rq)
+{
+	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->__data_len = 0;
+	rq->__sector = (sector_t) -1;
+	rq->bio = rq->biotail = NULL;
+	memset(rq->__cmd, 0, sizeof(rq->__cmd));
+	rq->cmd = rq->__cmd;
+}
+EXPORT_SYMBOL(blk_rq_set_block_pc);
+
 /**
  * blk_requeue_request - put a request back on queue
  * @q:		request queue where request should be inserted
diff --git a/block/bsg.c b/block/bsg.c
index e5214c148096..ff46addde5d8 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -196,7 +196,6 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->request_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -273,6 +272,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm,
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	if (!rq)
 		return ERR_PTR(-ENOMEM);
+	blk_rq_set_block_pc(rq);
+
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm);
 	if (ret)
 		goto out;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9c28a5b38042..14695c6221c8 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -229,7 +229,6 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	rq->timeout = msecs_to_jiffies(hdr->timeout);
 	if (!rq->timeout)
@@ -311,6 +310,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
 	if (!rq)
 		return -ENOMEM;
+	blk_rq_set_block_pc(rq);
 
 	if (blk_fill_sghdr_rq(q, rq, hdr, mode)) {
 		blk_put_request(rq);
@@ -491,7 +491,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 	memset(sense, 0, sizeof(sense));
 	rq->sense = sense;
 	rq->sense_len = 0;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 
 	blk_execute_rq(q, disk, rq, 0);
 
@@ -524,7 +524,7 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	int err;
 
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(rq);
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	rq->cmd[0] = cmd;
 	rq->cmd[4] = data;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ef166ad2dbad..758ac442c5b5 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -704,6 +704,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 
 	rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
 			     WRITE : READ, __GFP_WAIT);
+	blk_rq_set_block_pc(rq);
 
 	if (cgc->buflen) {
 		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
@@ -716,7 +717,6 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
 
 	rq->timeout = 60*HZ;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	if (cgc->quiet)
 		rq->cmd_flags |= REQ_QUIET;
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 49ac5662585b..0f40c95049c0 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -2184,6 +2184,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 			ret = -ENOMEM;
 			break;
 		}
+		blk_rq_set_block_pc(rq);
 
 		ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL);
 		if (ret) {
@@ -2203,7 +2204,6 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf,
 		rq->cmd[9] = 0xf8;
 
 		rq->cmd_len = 12;
-		rq->cmd_type = REQ_TYPE_BLOCK_PC;
 		rq->timeout = 60 * HZ;
 		bio = rq->bio;
 
diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 5248c888552b..7bcf67eec921 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -120,6 +120,7 @@ static struct request *get_alua_req(struct scsi_device *sdev,
 			    "%s: blk_get_request failed\n", __func__);
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -128,7 +129,6 @@ static struct request *get_alua_req(struct scsi_device *sdev,
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = ALUA_FAILOVER_RETRIES;
diff --git a/drivers/scsi/device_handler/scsi_dh_emc.c b/drivers/scsi/device_handler/scsi_dh_emc.c
index e1c8be06de9d..6f07f7fe3aa1 100644
--- a/drivers/scsi/device_handler/scsi_dh_emc.c
+++ b/drivers/scsi/device_handler/scsi_dh_emc.c
@@ -280,6 +280,7 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
 		return NULL;
 	}
 
+	blk_rq_set_block_pc(rq);
 	rq->cmd_len = COMMAND_SIZE(cmd);
 	rq->cmd[0] = cmd;
 
@@ -304,7 +305,6 @@ static struct request *get_req(struct scsi_device *sdev, int cmd,
 		break;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->timeout = CLARIION_TIMEOUT;
diff --git a/drivers/scsi/device_handler/scsi_dh_hp_sw.c b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
index 084062bb8ee9..e9d9fea9e272 100644
--- a/drivers/scsi/device_handler/scsi_dh_hp_sw.c
+++ b/drivers/scsi/device_handler/scsi_dh_hp_sw.c
@@ -120,7 +120,7 @@ retry:
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(TEST_UNIT_READY);
@@ -250,7 +250,7 @@ static int hp_sw_start_stop(struct hp_sw_dh_data *h)
 	if (!req)
 		return SCSI_DH_RES_TEMP_UNAVAIL;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			  REQ_FAILFAST_DRIVER;
 	req->cmd_len = COMMAND_SIZE(START_STOP);
diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c
index 4b9cf93f3fb6..826069db9848 100644
--- a/drivers/scsi/device_handler/scsi_dh_rdac.c
+++ b/drivers/scsi/device_handler/scsi_dh_rdac.c
@@ -279,6 +279,7 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
 				"get_rdac_req: blk_get_request failed.\n");
 		return NULL;
 	}
+	blk_rq_set_block_pc(rq);
 
 	if (buflen && blk_rq_map_kern(q, rq, buffer, buflen, GFP_NOIO)) {
 		blk_put_request(rq);
@@ -287,7 +288,6 @@ static struct request *get_rdac_req(struct scsi_device *sdev,
 		return NULL;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->cmd_flags |= REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 			 REQ_FAILFAST_DRIVER;
 	rq->retries = RDAC_RETRIES;
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index bac04c2335aa..5f4cbf0c4759 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -1570,6 +1570,7 @@ static struct request *_make_request(struct request_queue *q, bool has_write,
 		if (unlikely(!req))
 			return ERR_PTR(-ENOMEM);
 
+		blk_rq_set_block_pc(req);
 		return req;
 	}
 }
@@ -1590,7 +1591,6 @@ static int _init_blk_request(struct osd_request *or,
 	}
 
 	or->request = req;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 
 	req->timeout = or->timeout;
@@ -1608,7 +1608,7 @@ static int _init_blk_request(struct osd_request *or,
 				ret = PTR_ERR(req);
 				goto out;
 			}
-			req->cmd_type = REQ_TYPE_BLOCK_PC;
+			blk_rq_set_block_pc(req);
 			or->in.req = or->request->next_rq = req;
 		}
 	} else if (has_in)
diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c
index 21883a2d6324..0727ea7cc387 100644
--- a/drivers/scsi/osst.c
+++ b/drivers/scsi/osst.c
@@ -365,7 +365,7 @@ static int osst_execute(struct osst_request *SRpnt, const unsigned char *cmd,
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	SRpnt->bio = NULL;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index f17aa7aa7879..af624619d547 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1951,6 +1951,8 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 	 */
 	req = blk_get_request(sdev->request_queue, READ, GFP_KERNEL);
 
+	blk_rq_set_block_pc(req);
+
 	req->cmd[0] = ALLOW_MEDIUM_REMOVAL;
 	req->cmd[1] = 0;
 	req->cmd[2] = 0;
@@ -1960,7 +1962,6 @@ static void scsi_eh_lock_door(struct scsi_device *sdev)
 
 	req->cmd_len = COMMAND_SIZE(req->cmd[0]);
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= REQ_QUIET;
 	req->timeout = 10 * HZ;
 	req->retries = 5;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a0c95cac91f0..c3c1697b143e 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -195,6 +195,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);
 	if (!req)
 		return ret;
+	blk_rq_set_block_pc(req);
 
 	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
 					buffer, bufflen, __GFP_WAIT))
@@ -206,7 +207,6 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->retries = retries;
 	req->timeout = timeout;
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
 
 	/*
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index df5e961484e1..53268aaba559 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1653,10 +1653,9 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd)
 	if (!rq)
 		return -ENOMEM;
 
+	blk_rq_set_block_pc(rq);
 	memcpy(rq->cmd, cmd, hp->cmd_len);
-
 	rq->cmd_len = hp->cmd_len;
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 
 	srp->rq = rq;
 	rq->end_io_data = srp;
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index afc834e172c6..14eb4b256a03 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -484,7 +484,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 	if (!req)
 		return DRIVER_ERROR << 24;
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
+	blk_rq_set_block_pc(req);
 	req->cmd_flags |= REQ_QUIET;
 
 	mdata->null_mapped = 1;
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 0f199f6a0738..94d00df28f39 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1055,6 +1055,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 			ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 			goto fail;
 		}
+
+		blk_rq_set_block_pc(req);
 	} else {
 		BUG_ON(!cmd->data_length);
 
@@ -1071,7 +1073,6 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 		}
 	}
 
-	req->cmd_type = REQ_TYPE_BLOCK_PC;
 	req->end_io = pscsi_req_done;
 	req->end_io_data = cmd;
 	req->cmd_len = scsi_command_size(pt->pscsi_cdb);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dc2c703f05fd..31e11051f1ba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -796,6 +796,7 @@ extern void __blk_put_request(struct request_queue *, struct request *);
 extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
 extern struct request *blk_make_request(struct request_queue *, struct bio *,
 					gfp_t);
+extern void blk_rq_set_block_pc(struct request *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
 extern void blk_add_request_payload(struct request *rq, struct page *page,
 		unsigned int len);
-- 
cgit 


From a4391c6465d9c978fd4bded12e34bdde3f5458f0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 5 Jun 2014 15:21:56 -0600
Subject: blk-mq: bump max tag depth to 10K tags

For some scsi-mq cases, the tag map can be huge. So increase the
max number of tags we support.

Additionally, don't fail with EINVAL if a user requests too many
tags. Warn that the tag depth has been adjusted down, and store
the new value inside the tag_set passed in.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 13 ++++++++++++-
 include/linux/blk-mq.h |  2 +-
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4e4cd6208052..a6ee74e27957 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1967,13 +1967,19 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+/*
+ * Alloc a tag set to be associated with one or more request queues.
+ * May fail with EINVAL for various error conditions. May adjust the
+ * requested depth down, if if it too large. In that case, the set
+ * value will be stored in set->queue_depth.
+ */
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 {
 	int i;
 
 	if (!set->nr_hw_queues)
 		return -EINVAL;
-	if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH)
+	if (!set->queue_depth)
 		return -EINVAL;
 	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
 		return -EINVAL;
@@ -1981,6 +1987,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue)
 		return -EINVAL;
 
+	if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
+		pr_info("blk-mq: reduced tag depth to %u\n",
+			BLK_MQ_MAX_DEPTH);
+		set->queue_depth = BLK_MQ_MAX_DEPTH;
+	}
 
 	set->tags = kmalloc_node(set->nr_hw_queues *
 				 sizeof(struct blk_mq_tags *),
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0feedebfde48..a002cf191427 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -135,7 +135,7 @@ enum {
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
 
-	BLK_MQ_MAX_DEPTH	= 2048,
+	BLK_MQ_MAX_DEPTH	= 10240,
 
 	BLK_MQ_CPU_WORK_BATCH	= 8,
 };
-- 
cgit 


From ce369a545aac3da653dd95d8117093a862bf94d3 Mon Sep 17 00:00:00 2001
From: Andrii Tseglytskyi <andrii.tseglytskyi@ti.com>
Date: Fri, 16 May 2014 05:45:58 -0500
Subject: ARM: OMAP5+: dpll: support Duty Cycle Correction(DCC)

Duty Cycle Correction(DCC) needs to be enabled if the MPU is to run at
frequencies beyond 1.4GHz for OMAP5, DRA75x, DRA72x.

MPU DPLL has a limitation on the maximum frequency it can be locked
at. Duty Cycle Correction circuit is used to recover a correct duty
cycle for achieving higher frequencies (hardware internally switches
output to M3 output(CLKOUTHIF) from M2 output (CLKOUT)).

For further information, See the note on OMAP5432 Technical Reference
Manual(SWPU282U) chapter 3.6.3.3.1 "DPLLs Output Clocks Parameters",
and also the "OMAP543x ES2.0 DM Operating Conditions Addendum v0.5"
chapter 2.1 "Micro Processor Unit (MPU)". Equivalent information is
present in relevant DRA75x, 72x documentation(SPRUHP2E, SPRUHI2P).

Signed-off-by: Andrii Tseglytskyi <andrii.tseglytskyi@ti.com>
Signed-off-by: Taras Kondratiuk <taras@ti.com>
Signed-off-by: J Keerthy <j-keerthy@ti.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
[t-kristo@ti.com: added TRM / DM references for DCC clock rate]
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 arch/arm/mach-omap2/dpll3xxx.c | 9 +++++++++
 include/linux/clk/ti.h         | 4 ++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index fcd8036af910..6d7ba37e2257 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -319,6 +319,15 @@ static int omap3_noncore_dpll_program(struct clk_hw_omap *clk, u16 freqsel)
 
 	/* Set DPLL multiplier, divider */
 	v = omap2_clk_readl(clk, dd->mult_div1_reg);
+
+	/* Handle Duty Cycle Correction */
+	if (dd->dcc_mask) {
+		if (dd->last_rounded_rate >= dd->dcc_rate)
+			v |= dd->dcc_mask; /* Enable DCC */
+		else
+			v &= ~dd->dcc_mask; /* Disable DCC */
+	}
+
 	v &= ~(dd->mult_mask | dd->div1_mask);
 	v |= dd->last_rounded_m << __ffs(dd->mult_mask);
 	v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 4231c41bed51..e8d8a35034a5 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -41,6 +41,8 @@
  * @idlest_reg: register containing the DPLL idle status bitfield
  * @autoidle_mask: mask of the DPLL autoidle mode bitfield in @autoidle_reg
  * @freqsel_mask: mask of the DPLL jitter correction bitfield in @control_reg
+ * @dcc_mask: mask of the DPLL DCC correction bitfield @mult_div1_reg
+ * @dcc_rate: rate atleast which DCC @dcc_mask must be set
  * @idlest_mask: mask of the DPLL idle status bitfield in @idlest_reg
  * @lpmode_mask: mask of the DPLL low-power mode bitfield in @control_reg
  * @m4xen_mask: mask of the DPLL M4X multiplier bitfield in @control_reg
@@ -86,6 +88,8 @@ struct dpll_data {
 	u32			idlest_mask;
 	u32			dco_mask;
 	u32			sddiv_mask;
+	u32			dcc_mask;
+	unsigned long		dcc_rate;
 	u32			lpmode_mask;
 	u32			m4xen_mask;
 	u8			auto_recal_bit;
-- 
cgit 


From 31632dbdba85aafc8a6772d578c5c14f84a1fe17 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Fri, 6 Jun 2014 14:35:49 -0700
Subject: drivers/rtc/rtc-cmos.c: drivers/char/rtc.c features for DECstation
 support

This brings in drivers/char/rtc.c functionality required for DECstation
and, should the maintainers decide to switch, Alpha systems to use
rtc-cmos.

Specifically these features are made available:

* RTC iomem rather than x86/PCI port I/O mapping, controlled with the
  RTC_IOMAPPED macro as with the original driver.  The DS1287A chip in all
  DECstation systems is mapped in the host bus address space as a
  contiguous block of 64 32-bit words of which the least significant byte
  accesses the RTC chip for both reads and writes.  All the address and
  data window register accesses are made transparently by the chipset glue
  logic so that the device appears directly mapped on the host bus.

* A way to set the size of the address space explicitly with the
  newly-added `address_space' member of the platform part of the RTC
  device structure.  This avoids the unreliable heuristics that does not
  work in a setup where the RTC is not explicitly accessed with the usual
  address and data window register pair.

* The ability to use the RTC periodic interrupt as a system clock
  device, which is implemented by arch/mips/kernel/cevt-ds1287.c for
  DECstation systems and takes the RTC interrupt away from the RTC driver.
   Eventually hooking back to the clock device's interrupt handler should
  be possible for the purpose of the alarm clock and possibly also
  update-in-progress interrupt, but this is not done by this change.

  o To avoid interfering with the clock interrupt all the places where
    the RTC interrupt mask is fiddled with are only executed if and IRQ
    has been assigned to the RTC driver.

  o To avoid changing the clock setup Register A is not fiddled with
    if CMOS_RTC_FLAGS_NOFREQ is set in the newly-added `flags' member of
    the platform part of the RTC device structure.  Originally, in
    drivers/char/rtc.c, this was keyed with the absence of the RTC
    interrupt, just like the interrupt mask, but there only the periodic
    interrupt frequency is set, whereas rtc-cmos also sets the divider
    bits.  Therefore a new flag is introduced so that systems where the
    RTC interrupt is not usable rather than used as a system clock device
    can fully initialise the RTC.

* A small clean-up is made to the IRQ assignment code that makes the IRQ
  number hardcoded to -1 rather than arbitrary -ENXIO (or whatever error
  happens to be returned by platform_get_irq) where no IRQ has been
  assigned to the RTC driver (NO_IRQ might be another candidate, but it
  looks like this macro has inconsistent or missing definitions and
  limited use and might therefore be unsafe).

Verified to work correctly with a DECstation 5000/240 system.

[akpm@linux-foundation.org: fix weird code layout]
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-cmos.c      | 85 ++++++++++++++++++++++++++++++++-------------
 include/linux/mc146818rtc.h |  4 +++
 2 files changed, 64 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 0963c9309c74..b0e4a3eb33c7 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -647,6 +647,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	int				retval = 0;
 	unsigned char			rtc_control;
 	unsigned			address_space;
+	u32				flags = 0;
 
 	/* there can be only one ... */
 	if (cmos_rtc.dev)
@@ -660,9 +661,12 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	 * REVISIT non-x86 systems may instead use memory space resources
 	 * (needing ioremap etc), not i/o space resources like this ...
 	 */
-	ports = request_region(ports->start,
-			resource_size(ports),
-			driver_name);
+	if (RTC_IOMAPPED)
+		ports = request_region(ports->start, resource_size(ports),
+				       driver_name);
+	else
+		ports = request_mem_region(ports->start, resource_size(ports),
+					   driver_name);
 	if (!ports) {
 		dev_dbg(dev, "i/o registers already in use\n");
 		return -EBUSY;
@@ -699,6 +703,11 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	 * expect CMOS_READ and friends to handle.
 	 */
 	if (info) {
+		if (info->flags)
+			flags = info->flags;
+		if (info->address_space)
+			address_space = info->address_space;
+
 		if (info->rtc_day_alarm && info->rtc_day_alarm < 128)
 			cmos_rtc.day_alrm = info->rtc_day_alarm;
 		if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128)
@@ -726,18 +735,21 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 
 	spin_lock_irq(&rtc_lock);
 
-	/* force periodic irq to CMOS reset default of 1024Hz;
-	 *
-	 * REVISIT it's been reported that at least one x86_64 ALI mobo
-	 * doesn't use 32KHz here ... for portability we might need to
-	 * do something about other clock frequencies.
-	 */
-	cmos_rtc.rtc->irq_freq = 1024;
-	hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq);
-	CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
+	if (!(flags & CMOS_RTC_FLAGS_NOFREQ)) {
+		/* force periodic irq to CMOS reset default of 1024Hz;
+		 *
+		 * REVISIT it's been reported that at least one x86_64 ALI
+		 * mobo doesn't use 32KHz here ... for portability we might
+		 * need to do something about other clock frequencies.
+		 */
+		cmos_rtc.rtc->irq_freq = 1024;
+		hpet_set_periodic_freq(cmos_rtc.rtc->irq_freq);
+		CMOS_WRITE(RTC_REF_CLCK_32KHZ | 0x06, RTC_FREQ_SELECT);
+	}
 
 	/* disable irqs */
-	cmos_irq_disable(&cmos_rtc, RTC_PIE | RTC_AIE | RTC_UIE);
+	if (is_valid_irq(rtc_irq))
+		cmos_irq_disable(&cmos_rtc, RTC_PIE | RTC_AIE | RTC_UIE);
 
 	rtc_control = CMOS_READ(RTC_CONTROL);
 
@@ -802,14 +814,18 @@ cleanup1:
 	cmos_rtc.dev = NULL;
 	rtc_device_unregister(cmos_rtc.rtc);
 cleanup0:
-	release_region(ports->start, resource_size(ports));
+	if (RTC_IOMAPPED)
+		release_region(ports->start, resource_size(ports));
+	else
+		release_mem_region(ports->start, resource_size(ports));
 	return retval;
 }
 
-static void cmos_do_shutdown(void)
+static void cmos_do_shutdown(int rtc_irq)
 {
 	spin_lock_irq(&rtc_lock);
-	cmos_irq_disable(&cmos_rtc, RTC_IRQMASK);
+	if (is_valid_irq(rtc_irq))
+		cmos_irq_disable(&cmos_rtc, RTC_IRQMASK);
 	spin_unlock_irq(&rtc_lock);
 }
 
@@ -818,7 +834,7 @@ static void __exit cmos_do_remove(struct device *dev)
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
 	struct resource *ports;
 
-	cmos_do_shutdown();
+	cmos_do_shutdown(cmos->irq);
 
 	sysfs_remove_bin_file(&dev->kobj, &nvram);
 
@@ -831,7 +847,10 @@ static void __exit cmos_do_remove(struct device *dev)
 	cmos->rtc = NULL;
 
 	ports = cmos->iomem;
-	release_region(ports->start, resource_size(ports));
+	if (RTC_IOMAPPED)
+		release_region(ports->start, resource_size(ports));
+	else
+		release_mem_region(ports->start, resource_size(ports));
 	cmos->iomem = NULL;
 
 	cmos->dev = NULL;
@@ -1065,10 +1084,13 @@ static void __exit cmos_pnp_remove(struct pnp_dev *pnp)
 
 static void cmos_pnp_shutdown(struct pnp_dev *pnp)
 {
-	if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pnp->dev))
+	struct device *dev = &pnp->dev;
+	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
+
+	if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(dev))
 		return;
 
-	cmos_do_shutdown();
+	cmos_do_shutdown(cmos->irq);
 }
 
 static const struct pnp_device_id rtc_ids[] = {
@@ -1143,11 +1165,21 @@ static inline void cmos_of_init(struct platform_device *pdev) {}
 
 static int __init cmos_platform_probe(struct platform_device *pdev)
 {
+	struct resource *resource;
+	int irq;
+
 	cmos_of_init(pdev);
 	cmos_wake_setup(&pdev->dev);
-	return cmos_do_probe(&pdev->dev,
-			platform_get_resource(pdev, IORESOURCE_IO, 0),
-			platform_get_irq(pdev, 0));
+
+	if (RTC_IOMAPPED)
+		resource = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	else
+		resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		irq = -1;
+
+	return cmos_do_probe(&pdev->dev, resource, irq);
 }
 
 static int __exit cmos_platform_remove(struct platform_device *pdev)
@@ -1158,10 +1190,13 @@ static int __exit cmos_platform_remove(struct platform_device *pdev)
 
 static void cmos_platform_shutdown(struct platform_device *pdev)
 {
-	if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pdev->dev))
+	struct device *dev = &pdev->dev;
+	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
+
+	if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(dev))
 		return;
 
-	cmos_do_shutdown();
+	cmos_do_shutdown(cmos->irq);
 }
 
 /* work with hotplug and coldplug */
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index 2f4e957af656..433e0c74d643 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -31,6 +31,10 @@ struct cmos_rtc_board_info {
 	void	(*wake_on)(struct device *dev);
 	void	(*wake_off)(struct device *dev);
 
+	u32	flags;
+#define CMOS_RTC_FLAGS_NOFREQ	(1 << 0)
+	int	address_space;
+
 	u8	rtc_day_alarm;		/* zero, or register index */
 	u8	rtc_mon_alarm;		/* zero, or register index */
 	u8	rtc_century;		/* zero, or register index */
-- 
cgit 


From 4e52365f279564cef0ddd41db5237f0471381093 Mon Sep 17 00:00:00 2001
From: Matthew Dempsky <mdempsky@chromium.org>
Date: Fri, 6 Jun 2014 14:36:42 -0700
Subject: ptrace: fix fork event messages across pid namespaces

When tracing a process in another pid namespace, it's important for fork
event messages to contain the child's pid as seen from the tracer's pid
namespace, not the parent's.  Otherwise, the tracer won't be able to
correlate the fork event with later SIGTRAP signals it receives from the
child.

We still risk a race condition if a ptracer from a different pid
namespace attaches after we compute the pid_t value.  However, sending a
bogus fork event message in this unlikely scenario is still a vast
improvement over the status quo where we always send bogus fork event
messages to debuggers in a different pid namespace than the forking
process.

Signed-off-by: Matthew Dempsky <mdempsky@chromium.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Julien Tinnes <jln@chromium.org>
Cc: Roland McGrath <mcgrathr@chromium.org>
Cc: Jan Kratochvil <jan.kratochvil@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h | 32 ++++++++++++++++++++++++++++++++
 kernel/fork.c          | 10 +++++++---
 2 files changed, 39 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 07d0df6bf768..077904c8b70d 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -5,6 +5,7 @@
 #include <linux/sched.h>		/* For struct task_struct.  */
 #include <linux/err.h>			/* for IS_ERR_VALUE */
 #include <linux/bug.h>			/* For BUG_ON.  */
+#include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
 #include <uapi/linux/ptrace.h>
 
 /*
@@ -128,6 +129,37 @@ static inline void ptrace_event(int event, unsigned long message)
 	}
 }
 
+/**
+ * ptrace_event_pid - possibly stop for a ptrace event notification
+ * @event:	%PTRACE_EVENT_* value to report
+ * @pid:	process identifier for %PTRACE_GETEVENTMSG to return
+ *
+ * Check whether @event is enabled and, if so, report @event and @pid
+ * to the ptrace parent.  @pid is reported as the pid_t seen from the
+ * the ptrace parent's pid namespace.
+ *
+ * Called without locks.
+ */
+static inline void ptrace_event_pid(int event, struct pid *pid)
+{
+	/*
+	 * FIXME: There's a potential race if a ptracer in a different pid
+	 * namespace than parent attaches between computing message below and
+	 * when we acquire tasklist_lock in ptrace_stop().  If this happens,
+	 * the ptracer will get a bogus pid from PTRACE_GETEVENTMSG.
+	 */
+	unsigned long message = 0;
+	struct pid_namespace *ns;
+
+	rcu_read_lock();
+	ns = task_active_pid_ns(rcu_dereference(current->parent));
+	if (ns)
+		message = pid_nr_ns(pid, ns);
+	rcu_read_unlock();
+
+	ptrace_event(event, message);
+}
+
 /**
  * ptrace_init_task - initialize ptrace state for a new child
  * @child:		new child task
diff --git a/kernel/fork.c b/kernel/fork.c
index 0d53eb0dfb6f..d2799d1fc952 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1606,10 +1606,12 @@ long do_fork(unsigned long clone_flags,
 	 */
 	if (!IS_ERR(p)) {
 		struct completion vfork;
+		struct pid *pid;
 
 		trace_sched_process_fork(current, p);
 
-		nr = task_pid_vnr(p);
+		pid = get_task_pid(p, PIDTYPE_PID);
+		nr = pid_vnr(pid);
 
 		if (clone_flags & CLONE_PARENT_SETTID)
 			put_user(nr, parent_tidptr);
@@ -1624,12 +1626,14 @@ long do_fork(unsigned long clone_flags,
 
 		/* forking complete and child started to run, tell ptracer */
 		if (unlikely(trace))
-			ptrace_event(trace, nr);
+			ptrace_event_pid(trace, pid);
 
 		if (clone_flags & CLONE_VFORK) {
 			if (!wait_for_vfork_done(p, &vfork))
-				ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+				ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
 		}
+
+		put_pid(pid);
 	} else {
 		nr = PTR_ERR(p);
 	}
-- 
cgit 


From 36fac0a214805bd7c8307cad1cde60a7b833266d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Jun 2014 14:36:45 -0700
Subject: signals: kill sigfindinword()

It has no users and it doesn't look useful.  I do not know why/when it was
introduced, I can't even find any user in the git history.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/include/asm/signal.h | 9 ---------
 arch/x86/include/asm/signal.h  | 6 ------
 include/linux/signal.h         | 5 -----
 3 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 214320b50384..8c8ce5e1ee0e 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -60,15 +60,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
 	 __const_sigismember(set,sig) :		\
 	 __gen_sigismember(set,sig))
 
-static inline int sigfindinword(unsigned long word)
-{
-	asm ("bfffo %1{#0,#0},%0"
-		: "=d" (word)
-		: "d" (word & -word)
-		: "cc");
-	return word ^ 31;
-}
-
 #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
 
 #ifndef __uClinux__
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 35e67a457182..31eab867e6d3 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -92,12 +92,6 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
 	 ? __const_sigismember((set), (sig))	\
 	 : __gen_sigismember((set), (sig)))
 
-static inline int sigfindinword(unsigned long word)
-{
-	asm("bsfl %1,%0" : "=r"(word) : "rm"(word) : "cc");
-	return word;
-}
-
 struct pt_regs;
 
 #else /* __i386__ */
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 2ac423bdb676..ae744c314630 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -63,11 +63,6 @@ static inline int sigismember(sigset_t *set, int _sig)
 		return 1 & (set->sig[sig / _NSIG_BPW] >> (sig % _NSIG_BPW));
 }
 
-static inline int sigfindinword(unsigned long word)
-{
-	return ffz(~word);
-}
-
 #endif /* __HAVE_ARCH_SIG_BITOPS */
 
 static inline int sigisemptyset(sigset_t *set)
-- 
cgit 


From 0341729b4b832e753c5e745c6ba0e797f6198be0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Jun 2014 14:36:53 -0700
Subject: signals: mv {dis,}allow_signal() from sched.h/exit.c to signal.[ch]

Move the declaration/definition of allow_signal/disallow_signal to
signal.h/signal.c.  The new place is more logical and allows to use the
static helpers in signal.c (see the next changes).

While at it, make them return void and remove the valid_signal() check.
Nobody checks the returned value, and in-kernel users must not pass the
wrong signal number.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h  |  3 ---
 include/linux/signal.h |  2 ++
 kernel/exit.c          | 39 ---------------------------------------
 kernel/signal.c        | 29 +++++++++++++++++++++++++++++
 4 files changed, 31 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fcd0e6098d9..ea74596014a2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2414,9 +2414,6 @@ extern void flush_itimer_signals(void);
 
 extern void do_group_exit(int);
 
-extern int allow_signal(int);
-extern int disallow_signal(int);
-
 extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
diff --git a/include/linux/signal.h b/include/linux/signal.h
index ae744c314630..ac83c593f4b9 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -284,6 +284,8 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
+extern void allow_signal(int);
+extern void disallow_signal(int);
 
 /*
  * Eventually that'll replace get_signal_to_deliver(); macro for now,
diff --git a/kernel/exit.c b/kernel/exit.c
index 750c2e594617..e5c4668f1799 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -313,45 +313,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 	}
 }
 
-/*
- * Let kernel threads use this to say that they allow a certain signal.
- * Must not be used if kthread was cloned with CLONE_SIGHAND.
- */
-int allow_signal(int sig)
-{
-	if (!valid_signal(sig) || sig < 1)
-		return -EINVAL;
-
-	spin_lock_irq(&current->sighand->siglock);
-	/* This is only needed for daemonize()'ed kthreads */
-	sigdelset(&current->blocked, sig);
-	/*
-	 * Kernel threads handle their own signals. Let the signal code
-	 * know it'll be handled, so that they don't get converted to
-	 * SIGKILL or just silently dropped.
-	 */
-	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-	return 0;
-}
-
-EXPORT_SYMBOL(allow_signal);
-
-int disallow_signal(int sig)
-{
-	if (!valid_signal(sig) || sig < 1)
-		return -EINVAL;
-
-	spin_lock_irq(&current->sighand->siglock);
-	current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-	return 0;
-}
-
-EXPORT_SYMBOL(disallow_signal);
-
 #ifdef CONFIG_MEMCG
 /*
  * A task is exiting.   If it owned this mm, find a new owner for the mm.
diff --git a/kernel/signal.c b/kernel/signal.c
index a6d8c3af0ad6..7d6ff8b18509 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3066,6 +3066,35 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
 }
 #endif
 
+/*
+ * Let kernel threads use this to say that they allow a certain signal.
+ * Must not be used if kthread was cloned with CLONE_SIGHAND.
+ */
+void allow_signal(int sig)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	/* This is only needed for daemonize()'ed kthreads */
+	sigdelset(&current->blocked, sig);
+	/*
+	 * Kernel threads handle their own signals. Let the signal code
+	 * know it'll be handled, so that they don't get converted to
+	 * SIGKILL or just silently dropped.
+	 */
+	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+}
+EXPORT_SYMBOL(allow_signal);
+
+void disallow_signal(int sig)
+{
+	spin_lock_irq(&current->sighand->siglock);
+	current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+}
+EXPORT_SYMBOL(disallow_signal);
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct task_struct *p = current, *t;
-- 
cgit 


From b4e74264eb0b03f42097fa70a0766312156244a0 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 6 Jun 2014 14:37:00 -0700
Subject: signals: introduce kernel_sigaction()

Now that allow_signal() is really trivial we can unify it with
disallow_signal().  Add the new helper, kernel_sigaction(), and
reimplement allow_signal/disallow_signal as a trivial wrappers.

This saves one EXPORT_SYMBOL() and the new helper can have more users.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 18 ++++++++++++++++--
 kernel/signal.c        | 36 ++++++++++++------------------------
 2 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index ac83c593f4b9..c9e65360c49a 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -284,8 +284,22 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
 extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping);
 extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
-extern void allow_signal(int);
-extern void disallow_signal(int);
+extern void kernel_sigaction(int, __sighandler_t);
+
+static inline void allow_signal(int sig)
+{
+	/*
+	 * Kernel threads handle their own signals. Let the signal code
+	 * know it'll be handled, so that they don't get converted to
+	 * SIGKILL or just silently dropped.
+	 */
+	kernel_sigaction(sig, (__force __sighandler_t)2);
+}
+
+static inline void disallow_signal(int sig)
+{
+	kernel_sigaction(sig, SIG_IGN);
+}
 
 /*
  * Eventually that'll replace get_signal_to_deliver(); macro for now,
diff --git a/kernel/signal.c b/kernel/signal.c
index 3ec405132c79..a4077e90f19f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3067,37 +3067,25 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
 #endif
 
 /*
- * Let kernel threads use this to say that they allow a certain signal.
- * Must not be used if kthread was cloned with CLONE_SIGHAND.
+ * For kthreads only, must not be used if cloned with CLONE_SIGHAND
  */
-void allow_signal(int sig)
+void kernel_sigaction(int sig, __sighandler_t action)
 {
-	/*
-	 * Kernel threads handle their own signals. Let the signal code
-	 * know it'll be handled, so that they don't get converted to
-	 * SIGKILL or just silently dropped.
-	 */
 	spin_lock_irq(&current->sighand->siglock);
-	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
-	spin_unlock_irq(&current->sighand->siglock);
-}
-EXPORT_SYMBOL(allow_signal);
+	current->sighand->action[sig - 1].sa.sa_handler = action;
+	if (action == SIG_IGN) {
+		sigset_t mask;
 
-void disallow_signal(int sig)
-{
-	sigset_t mask;
+		sigemptyset(&mask);
+		sigaddset(&mask, sig);
 
-	sigemptyset(&mask);
-	sigaddset(&mask, sig);
-
-	spin_lock_irq(&current->sighand->siglock);
-	current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
-	flush_sigqueue_mask(&mask, &current->signal->shared_pending);
-	flush_sigqueue_mask(&mask, &current->pending);
-	recalc_sigpending();
+		flush_sigqueue_mask(&mask, &current->signal->shared_pending);
+		flush_sigqueue_mask(&mask, &current->pending);
+		recalc_sigpending();
+	}
 	spin_unlock_irq(&current->sighand->siglock);
 }
-EXPORT_SYMBOL(disallow_signal);
+EXPORT_SYMBOL(kernel_sigaction);
 
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
-- 
cgit 


From dcbff5d1effbbd52be1ed9f2efb6c8d0445ad188 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Fri, 6 Jun 2014 14:37:15 -0700
Subject: idr: reorder the fields

idr_layer->layer is always accessed in read path, move it in the front.

idr_layer->bitmap is moved on the bottom.  And rcu_head shares with
bitmap due to they do not be accessed at the same time.

idr->id_free/id_free_cnt/lock are free list fields, and moved to the
bottom.  They will be removed in near future.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 6af3400b9b2f..013fd9bc4cb6 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -29,21 +29,24 @@
 
 struct idr_layer {
 	int			prefix;	/* the ID prefix of this idr_layer */
-	DECLARE_BITMAP(bitmap, IDR_SIZE); /* A zero bit means "space here" */
+	int			layer;	/* distance from leaf */
 	struct idr_layer __rcu	*ary[1<<IDR_BITS];
 	int			count;	/* When zero, we can release it */
-	int			layer;	/* distance from leaf */
-	struct rcu_head		rcu_head;
+	union {
+		/* A zero bit means "space here" */
+		DECLARE_BITMAP(bitmap, IDR_SIZE);
+		struct rcu_head		rcu_head;
+	};
 };
 
 struct idr {
 	struct idr_layer __rcu	*hint;	/* the last layer allocated from */
 	struct idr_layer __rcu	*top;
-	struct idr_layer	*id_free;
 	int			layers;	/* only valid w/o concurrent changes */
-	int			id_free_cnt;
 	int			cur;	/* current pos for cyclic allocation */
 	spinlock_t		lock;
+	int			id_free_cnt;
+	struct idr_layer	*id_free;
 };
 
 #define IDR_INIT(name)							\
-- 
cgit 


From 060028bac94bf60a65415d1d55a359c3a17d5c31 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 6 Jun 2014 14:37:42 -0700
Subject: ipc/shm.c: increase the defaults for SHMALL, SHMMAX

System V shared memory

a) can be abused to trigger out-of-memory conditions and the standard
   measures against out-of-memory do not work:

    - it is not possible to use setrlimit to limit the size of shm segments.

    - segments can exist without association with any processes, thus
      the oom-killer is unable to free that memory.

b) is typically used for shared information - today often multiple GB.
   (e.g. database shared buffers)

The current default is a maximum segment size of 32 MB and a maximum
total size of 8 GB.  This is often too much for a) and not enough for
b), which means that lots of users must change the defaults.

This patch increases the default limits (nearly) to the maximum, which
is perfect for case b).  The defaults are used after boot and as the
initial value for each new namespace.

Admins/distros that need a protection against a) should reduce the
limits and/or enable shm_rmid_forced.

Unix has historically required setting these limits for shared memory,
and Linux inherited such behavior.  The consequence of this is added
complexity for users and administrators.  One very common example are
Database setup/installation documents and scripts, where users must
manually calculate the values for these limits.  This also requires
(some) knowledge of how the underlying memory management works, thus
causing, in many occasions, the limits to just be flat out wrong.
Disabling these limits sooner could have saved companies a lot of time,
headaches and money for support.  But it's never too late, simplify
users life now.

Further notes:
- The patch only changes default, overrides behave as before:
        # sysctl kernel.shmall=33554432
  would recreate the previous limit for SHMMAX (for the current namespace).

- Disabling sysv shm allocation is possible with:
        # sysctl kernel.shmall=0
  (not a new feature, also per-namespace)

- The limits are intentionally set to a value slightly less than ULONG_MAX,
  to avoid triggering overflows in user space apps.
  [not unreasonable, see http://marc.info/?l=linux-mm&m=139638334330127]

Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Reported-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shm.h      | 3 +--
 include/uapi/linux/shm.h | 8 +++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/shm.h b/include/linux/shm.h
index 1e2cd2e6b540..57d77709fbe2 100644
--- a/include/linux/shm.h
+++ b/include/linux/shm.h
@@ -3,9 +3,8 @@
 
 #include <asm/page.h>
 #include <uapi/linux/shm.h>
-
-#define SHMALL (SHMMAX/PAGE_SIZE*(SHMMNI/16)) /* max shm system wide (pages) */
 #include <asm/shmparam.h>
+
 struct shmid_kernel /* private to the kernel */
 {	
 	struct kern_ipc_perm	shm_perm;
diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h
index 78b69413f582..74e786de6f4e 100644
--- a/include/uapi/linux/shm.h
+++ b/include/uapi/linux/shm.h
@@ -9,15 +9,13 @@
 
 /*
  * SHMMAX, SHMMNI and SHMALL are upper limits are defaults which can
- * be increased by sysctl
+ * be modified by sysctl.
  */
 
-#define SHMMAX 0x2000000		 /* max shared seg size (bytes) */
 #define SHMMIN 1			 /* min shared seg size (bytes) */
 #define SHMMNI 4096			 /* max num of segs system wide */
-#ifndef __KERNEL__
-#define SHMALL (SHMMAX/getpagesize()*(SHMMNI/16))
-#endif
+#define SHMMAX (ULONG_MAX - (1L<<24))	 /* max shared seg size (bytes) */
+#define SHMALL (ULONG_MAX - (1L<<24))	 /* max shm system wide (pages) */
 #define SHMSEG SHMMNI			 /* max shared segs per process */
 
 
-- 
cgit 


From d6f50c95e0e44fa722852ae24aa51d4b7f0d56ed Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 6 Jun 2014 14:38:06 -0700
Subject: key: convert use of typedef ctl_table to struct ctl_table

This typedef is unnecessary and should just be removed.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/key.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 80d677483e31..3ae45f09589b 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -332,7 +332,7 @@ do {									\
 } while (0)
 
 #ifdef CONFIG_SYSCTL
-extern ctl_table key_sysctls[];
+extern struct ctl_table key_sysctls[];
 #endif
 /*
  * the userspace interface
-- 
cgit 


From ffe2c748e283c5dc1b9b9ac116299dbfc11a609b Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Fri, 6 Jun 2014 14:38:17 -0700
Subject: mm: introduce kmemleak_update_trace()

The memory allocation stack trace is not always useful for debugging a
memory leak (e.g.  radix_tree_preload).  This function, when called,
updates the stack trace for an already allocated object.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kmemleak.txt |  1 +
 include/linux/kmemleak.h   |  4 ++++
 mm/kmemleak.c              | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
index a7563ec4ea7b..b772418bf064 100644
--- a/Documentation/kmemleak.txt
+++ b/Documentation/kmemleak.txt
@@ -142,6 +142,7 @@ kmemleak_alloc_percpu	 - notify of a percpu memory block allocation
 kmemleak_free		 - notify of a memory block freeing
 kmemleak_free_part	 - notify of a partial memory block freeing
 kmemleak_free_percpu	 - notify of a percpu memory block freeing
+kmemleak_update_trace	 - update object allocation stack trace
 kmemleak_not_leak	 - mark an object as not a leak
 kmemleak_ignore		 - do not scan or report an object as leak
 kmemleak_scan_area	 - add scan areas inside a memory block
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 5bb424659c04..057e95971014 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -30,6 +30,7 @@ extern void kmemleak_alloc_percpu(const void __percpu *ptr, size_t size) __ref;
 extern void kmemleak_free(const void *ptr) __ref;
 extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_free_percpu(const void __percpu *ptr) __ref;
+extern void kmemleak_update_trace(const void *ptr) __ref;
 extern void kmemleak_not_leak(const void *ptr) __ref;
 extern void kmemleak_ignore(const void *ptr) __ref;
 extern void kmemleak_scan_area(const void *ptr, size_t size, gfp_t gfp) __ref;
@@ -83,6 +84,9 @@ static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
 static inline void kmemleak_free_percpu(const void __percpu *ptr)
 {
 }
+static inline void kmemleak_update_trace(const void *ptr)
+{
+}
 static inline void kmemleak_not_leak(const void *ptr)
 {
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5d4aec44982e..3cda50c1e394 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -989,6 +989,40 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
 }
 EXPORT_SYMBOL_GPL(kmemleak_free_percpu);
 
+/**
+ * kmemleak_update_trace - update object allocation stack trace
+ * @ptr:	pointer to beginning of the object
+ *
+ * Override the object allocation stack trace for cases where the actual
+ * allocation place is not always useful.
+ */
+void __ref kmemleak_update_trace(const void *ptr)
+{
+	struct kmemleak_object *object;
+	unsigned long flags;
+
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (!kmemleak_enabled || IS_ERR_OR_NULL(ptr))
+		return;
+
+	object = find_and_get_object((unsigned long)ptr, 1);
+	if (!object) {
+#ifdef DEBUG
+		kmemleak_warn("Updating stack trace for unknown object at %p\n",
+			      ptr);
+#endif
+		return;
+	}
+
+	spin_lock_irqsave(&object->lock, flags);
+	object->trace_len = __save_stack_trace(object->trace);
+	spin_unlock_irqrestore(&object->lock, flags);
+
+	put_object(object);
+}
+EXPORT_SYMBOL(kmemleak_update_trace);
+
 /**
  * kmemleak_not_leak - mark an allocated object as false positive
  * @ptr:	pointer to beginning of the object
-- 
cgit 


From ae022622ae9447bd70e59db7c91efa25c99a90d5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 6 Jun 2014 14:38:31 -0700
Subject: idle: remove cpu_idle() forward declarations

After all architectures were converted to the generic idle framework,
commit d190e8195b90 ("idle: Remove GENERIC_IDLE_LOOP config switch")
removed the last caller of cpu_idle().  The forward declarations in
header files were forgotten.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h | 1 -
 include/linux/smp.h | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 81887120395c..95978ad7fcdd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -256,7 +256,6 @@ enum cpuhp_state {
 };
 
 void cpu_startup_entry(enum cpuhp_state state);
-void cpu_idle(void);
 
 void cpu_idle_poll_ctrl(bool enable);
 
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 633f5edd7470..34347f26be9b 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -13,8 +13,6 @@
 #include <linux/init.h>
 #include <linux/llist.h>
 
-extern void cpu_idle(void);
-
 typedef void (*smp_call_func_t)(void *info);
 struct call_single_data {
 	struct llist_node llist;
-- 
cgit 


From 999e568354b8c797f344a2154761dd94cc84e4ac Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Tue, 3 Jun 2014 17:33:35 -0400
Subject: nfs4: remove unused CHANGE_SECURITY_LABEL

This constant has the wrong value.  And we don't use it.  And it's been
removed from the 4.2 spec anyway.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/nfs4proc.c    | 2 +-
 include/linux/nfs4.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39c6dc8..7f55fed8dc64 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
 
 #define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
 #define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
 
 static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
 {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 12c2cb947df5..a1e3064a8d99 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -399,8 +399,6 @@ enum lock_type4 {
 #define FATTR4_WORD2_LAYOUT_BLKSIZE     (1UL << 1)
 #define FATTR4_WORD2_MDSTHRESHOLD       (1UL << 4)
 #define FATTR4_WORD2_SECURITY_LABEL     (1UL << 16)
-#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
-					(1UL << 17)
 
 /* MDS threshold bitmap bits */
 #define THRESHOLD_RD                    (1UL << 0)
-- 
cgit 


From 0bf4828983dff062cd502f27ab8644b32774e72e Mon Sep 17 00:00:00 2001
From: Steve Wise <swise@opengridcomputing.com>
Date: Wed, 28 May 2014 15:12:01 -0500
Subject: svcrdma: refactor marshalling logic

This patch refactors the NFSRDMA server marshalling logic to
remove the intermediary map structures.  It also fixes an existing bug
where the NFSRDMA server was not minding the device fast register page
list length limitations.

Signed-off-by: Tom Tucker <tom@opengridcomputing.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 include/linux/sunrpc/svc_rdma.h          |   3 +-
 net/sunrpc/xprtrdma/svc_rdma_recvfrom.c  | 643 +++++++++++++------------------
 net/sunrpc/xprtrdma/svc_rdma_sendto.c    | 230 ++---------
 net/sunrpc/xprtrdma/svc_rdma_transport.c |  62 +--
 4 files changed, 332 insertions(+), 606 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 0b8e3e6bdacf..5cf99a016368 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
 	struct list_head frmr_list;
 };
 struct svc_rdma_req_map {
-	struct svc_rdma_fastreg_mr *frmr;
 	unsigned long count;
 	union {
 		struct kvec sge[RPCSVC_MAXPAGES];
 		struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+		unsigned long lkey[RPCSVC_MAXPAGES];
 	};
 };
-#define RDMACTXT_F_FAST_UNREG	1
 #define RDMACTXT_F_LAST_CTXT	2
 
 #define	SVCRDMA_DEVCAP_FAST_REG		1	/* fast mr registration */
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 8d904e4eef15..52d9f2ce20b0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 
 	/* Set up the XDR head */
 	rqstp->rq_arg.head[0].iov_base = page_address(page);
-	rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
+	rqstp->rq_arg.head[0].iov_len =
+		min_t(size_t, byte_count, ctxt->sge[0].length);
 	rqstp->rq_arg.len = byte_count;
 	rqstp->rq_arg.buflen = byte_count;
 
@@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 		page = ctxt->pages[sge_no];
 		put_page(rqstp->rq_pages[sge_no]);
 		rqstp->rq_pages[sge_no] = page;
-		bc -= min(bc, ctxt->sge[sge_no].length);
+		bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
 		rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
 		sge_no++;
 	}
@@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
 	rqstp->rq_arg.tail[0].iov_len = 0;
 }
 
-/* Encode a read-chunk-list as an array of IB SGE
- *
- * Assumptions:
- * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contiguous and consists of
- *   PAGE_SIZE elements.
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- *   chunk in the read list
- *
- */
-static int map_read_chunks(struct svcxprt_rdma *xprt,
-			   struct svc_rqst *rqstp,
-			   struct svc_rdma_op_ctxt *head,
-			   struct rpcrdma_msg *rmsgp,
-			   struct svc_rdma_req_map *rpl_map,
-			   struct svc_rdma_req_map *chl_map,
-			   int ch_count,
-			   int byte_count)
+static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
 {
-	int sge_no;
-	int sge_bytes;
-	int page_off;
-	int page_no;
-	int ch_bytes;
-	int ch_no;
-	struct rpcrdma_read_chunk *ch;
+	if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+	     RDMA_TRANSPORT_IWARP)
+		return 1;
+	else
+		return min_t(int, sge_count, xprt->sc_max_sge);
+}
 
-	sge_no = 0;
-	page_no = 0;
-	page_off = 0;
-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	ch_no = 0;
-	ch_bytes = ntohl(ch->rc_target.rs_length);
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
-	head->hdr_count = head->count; /* save count of hdr pages */
-	head->arg.page_base = 0;
-	head->arg.page_len = ch_bytes;
-	head->arg.len = rqstp->rq_arg.len + ch_bytes;
-	head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
-	head->count++;
-	chl_map->ch[0].start = 0;
-	while (byte_count) {
-		rpl_map->sge[sge_no].iov_base =
-			page_address(rqstp->rq_arg.pages[page_no]) + page_off;
-		sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
-		rpl_map->sge[sge_no].iov_len = sge_bytes;
-		/*
-		 * Don't bump head->count here because the same page
-		 * may be used by multiple SGE.
-		 */
-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
-		rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+			      struct svc_rqst *rqstp,
+			      struct svc_rdma_op_ctxt *head,
+			      int *page_no,
+			      u32 *page_offset,
+			      u32 rs_handle,
+			      u32 rs_length,
+			      u64 rs_offset,
+			      int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+			       struct svc_rqst *rqstp,
+			       struct svc_rdma_op_ctxt *head,
+			       int *page_no,
+			       u32 *page_offset,
+			       u32 rs_handle,
+			       u32 rs_length,
+			       u64 rs_offset,
+			       int last)
+{
+	struct ib_send_wr read_wr;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
+
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->read_hdr = head;
+	pages_needed =
+		min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
 		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		ctxt->sge[pno].addr =
+			ib_dma_map_page(xprt->sc_cm_id->device,
+					head->arg.pages[pg_no], pg_off,
+					PAGE_SIZE - pg_off,
+					DMA_FROM_DEVICE);
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   ctxt->sge[pno].addr);
+		if (ret)
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
 
-		byte_count -= sge_bytes;
-		ch_bytes -= sge_bytes;
-		sge_no++;
-		/*
-		 * If all bytes for this chunk have been mapped to an
-		 * SGE, move to the next SGE
-		 */
-		if (ch_bytes == 0) {
-			chl_map->ch[ch_no].count =
-				sge_no - chl_map->ch[ch_no].start;
-			ch_no++;
-			ch++;
-			chl_map->ch[ch_no].start = sge_no;
-			ch_bytes = ntohl(ch->rc_target.rs_length);
-			/* If bytes remaining account for next chunk */
-			if (byte_count) {
-				head->arg.page_len += ch_bytes;
-				head->arg.len += ch_bytes;
-				head->arg.buflen += ch_bytes;
-			}
+		/* The lkey here is either a local dma lkey or a dma_mr lkey */
+		ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+		ctxt->sge[pno].length = len;
+		ctxt->count++;
+
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
 		}
-		/*
-		 * If this SGE consumed all of the page, move to the
-		 * next page
-		 */
-		if ((sge_bytes + page_off) == PAGE_SIZE) {
-			page_no++;
-			page_off = 0;
-			/*
-			 * If there are still bytes left to map, bump
-			 * the page count
-			 */
-			if (byte_count)
-				head->count++;
-		} else
-			page_off += sge_bytes;
+		rs_length -= len;
 	}
-	BUG_ON(byte_count != 0);
-	return sge_no;
+
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.wr_id = (unsigned long)ctxt;
+	read_wr.opcode = IB_WR_RDMA_READ;
+	ctxt->wr_op = read_wr.opcode;
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = pages_needed;
+
+	ret = svc_rdma_send(xprt, &read_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
+	}
+
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	return ret;
 }
 
-/* Map a read-chunk-list to an XDR and fast register the page-list.
- *
- * Assumptions:
- * - chunk[0]	position points to pages[0] at an offset of 0
- * - pages[]	will be made physically contiguous by creating a one-off memory
- *		region using the fastreg verb.
- * - byte_count is # of bytes in read-chunk-list
- * - ch_count	is # of chunks in read-chunk-list
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- *   chunk in the read list
- */
-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 				struct svc_rqst *rqstp,
 				struct svc_rdma_op_ctxt *head,
-				struct rpcrdma_msg *rmsgp,
-				struct svc_rdma_req_map *rpl_map,
-				struct svc_rdma_req_map *chl_map,
-				int ch_count,
-				int byte_count)
+				int *page_no,
+				u32 *page_offset,
+				u32 rs_handle,
+				u32 rs_length,
+				u64 rs_offset,
+				int last)
 {
-	int page_no;
-	int ch_no;
-	u32 offset;
-	struct rpcrdma_read_chunk *ch;
-	struct svc_rdma_fastreg_mr *frmr;
-	int ret = 0;
+	struct ib_send_wr read_wr;
+	struct ib_send_wr inv_wr;
+	struct ib_send_wr fastreg_wr;
+	u8 key;
+	int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+	struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+	struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+	int ret, read, pno;
+	u32 pg_off = *page_offset;
+	u32 pg_no = *page_no;
 
-	frmr = svc_rdma_get_frmr(xprt);
 	if (IS_ERR(frmr))
 		return -ENOMEM;
 
-	head->frmr = frmr;
-	head->arg.head[0] = rqstp->rq_arg.head[0];
-	head->arg.tail[0] = rqstp->rq_arg.tail[0];
-	head->arg.pages = &head->pages[head->count];
-	head->hdr_count = head->count; /* save count of hdr pages */
-	head->arg.page_base = 0;
-	head->arg.page_len = byte_count;
-	head->arg.len = rqstp->rq_arg.len + byte_count;
-	head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+	ctxt->direction = DMA_FROM_DEVICE;
+	ctxt->frmr = frmr;
+	pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+	read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
 
-	/* Fast register the page list */
-	frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+	frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
 	frmr->direction = DMA_FROM_DEVICE;
 	frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-	frmr->map_len = byte_count;
-	frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
-	for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-		frmr->page_list->page_list[page_no] =
+	frmr->map_len = pages_needed << PAGE_SHIFT;
+	frmr->page_list_len = pages_needed;
+
+	for (pno = 0; pno < pages_needed; pno++) {
+		int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+		head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+		head->arg.page_len += len;
+		head->arg.len += len;
+		if (!pg_off)
+			head->count++;
+		rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+		rqstp->rq_next_page = rqstp->rq_respages + 1;
+		frmr->page_list->page_list[pno] =
 			ib_dma_map_page(xprt->sc_cm_id->device,
-					rqstp->rq_arg.pages[page_no], 0,
+					head->arg.pages[pg_no], 0,
 					PAGE_SIZE, DMA_FROM_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
+		ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+					   frmr->page_list->page_list[pno]);
+		if (ret)
+			goto err;
 		atomic_inc(&xprt->sc_dma_used);
-		head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
-	}
-	head->count += page_no;
-
-	/* rq_respages points one past arg pages */
-	rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
-	rqstp->rq_next_page = rqstp->rq_respages + 1;
 
-	/* Create the reply and chunk maps */
-	offset = 0;
-	ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	for (ch_no = 0; ch_no < ch_count; ch_no++) {
-		int len = ntohl(ch->rc_target.rs_length);
-		rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
-		rpl_map->sge[ch_no].iov_len = len;
-		chl_map->ch[ch_no].count = 1;
-		chl_map->ch[ch_no].start = ch_no;
-		offset += len;
-		ch++;
+		/* adjust offset and wrap to next page if needed */
+		pg_off += len;
+		if (pg_off == PAGE_SIZE) {
+			pg_off = 0;
+			pg_no++;
+		}
+		rs_length -= len;
 	}
 
-	ret = svc_rdma_fastreg(xprt, frmr);
-	if (ret)
-		goto fatal_err;
-
-	return ch_no;
-
- fatal_err:
-	printk("svcrdma: error fast registering xdr for xprt %p", xprt);
-	svc_rdma_put_frmr(xprt, frmr);
-	return -EIO;
-}
-
-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
-			     struct svc_rdma_op_ctxt *ctxt,
-			     struct svc_rdma_fastreg_mr *frmr,
-			     struct kvec *vec,
-			     u64 *sgl_offset,
-			     int count)
-{
-	int i;
-	unsigned long off;
+	if (last && rs_length == 0)
+		set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+	else
+		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
-	ctxt->count = count;
-	ctxt->direction = DMA_FROM_DEVICE;
-	for (i = 0; i < count; i++) {
-		ctxt->sge[i].length = 0; /* in case map fails */
-		if (!frmr) {
-			BUG_ON(!virt_to_page(vec[i].iov_base));
-			off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
-			ctxt->sge[i].addr =
-				ib_dma_map_page(xprt->sc_cm_id->device,
-						virt_to_page(vec[i].iov_base),
-						off,
-						vec[i].iov_len,
-						DMA_FROM_DEVICE);
-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-						 ctxt->sge[i].addr))
-				return -EINVAL;
-			ctxt->sge[i].lkey = xprt->sc_dma_lkey;
-			atomic_inc(&xprt->sc_dma_used);
-		} else {
-			ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
-			ctxt->sge[i].lkey = frmr->mr->lkey;
-		}
-		ctxt->sge[i].length = vec[i].iov_len;
-		*sgl_offset = *sgl_offset + vec[i].iov_len;
+	/* Bump the key */
+	key = (u8)(frmr->mr->lkey & 0x000000FF);
+	ib_update_fast_reg_key(frmr->mr, ++key);
+
+	ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+	ctxt->sge[0].lkey = frmr->mr->lkey;
+	ctxt->sge[0].length = read;
+	ctxt->count = 1;
+	ctxt->read_hdr = head;
+
+	/* Prepare FASTREG WR */
+	memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+	fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+	fastreg_wr.send_flags = IB_SEND_SIGNALED;
+	fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+	fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+	fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+	fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+	fastreg_wr.wr.fast_reg.length = frmr->map_len;
+	fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+	fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+	fastreg_wr.next = &read_wr;
+
+	/* Prepare RDMA_READ */
+	memset(&read_wr, 0, sizeof(read_wr));
+	read_wr.send_flags = IB_SEND_SIGNALED;
+	read_wr.wr.rdma.rkey = rs_handle;
+	read_wr.wr.rdma.remote_addr = rs_offset;
+	read_wr.sg_list = ctxt->sge;
+	read_wr.num_sge = 1;
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+		read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+		read_wr.wr_id = (unsigned long)ctxt;
+		read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+	} else {
+		read_wr.opcode = IB_WR_RDMA_READ;
+		read_wr.next = &inv_wr;
+		/* Prepare invalidate */
+		memset(&inv_wr, 0, sizeof(inv_wr));
+		inv_wr.wr_id = (unsigned long)ctxt;
+		inv_wr.opcode = IB_WR_LOCAL_INV;
+		inv_wr.send_flags = IB_SEND_SIGNALED;
+		inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+	}
+	ctxt->wr_op = read_wr.opcode;
+
+	/* Post the chain */
+	ret = svc_rdma_send(xprt, &fastreg_wr);
+	if (ret) {
+		pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+		set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+		goto err;
 	}
-	return 0;
-}
 
-static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
-{
-	if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
-	     RDMA_TRANSPORT_IWARP) &&
-	    sge_count > 1)
-		return 1;
-	else
-		return min_t(int, sge_count, xprt->sc_max_sge);
+	/* return current location in page array */
+	*page_no = pg_no;
+	*page_offset = pg_off;
+	ret = read;
+	atomic_inc(&rdma_stat_read);
+	return ret;
+ err:
+	svc_rdma_unmap_dma(ctxt);
+	svc_rdma_put_context(ctxt, 0);
+	svc_rdma_put_frmr(xprt, frmr);
+	return ret;
 }
 
-/*
- * Use RDMA_READ to read data from the advertised client buffer into the
- * XDR stream starting at rq_arg.head[0].iov_base.
- * Each chunk in the array
- * contains the following fields:
- * discrim      - '1', This isn't used for data placement
- * position     - The xdr stream offset (the same for every chunk)
- * handle       - RMR for client memory region
- * length       - data transfer length
- * offset       - 64 bit tagged offset in remote memory region
- *
- * On our side, we need to read into a pagelist. The first page immediately
- * follows the RPC header.
- *
- * This function returns:
- * 0 - No error and no read-list found.
- *
- * 1 - Successful read-list processing. The data is not yet in
- * the pagelist and therefore the RPC request must be deferred. The
- * I/O completion will enqueue the transport again and
- * svc_rdma_recvfrom will complete the request.
- *
- * <0 - Error processing/posting read-list.
- *
- * NOTE: The ctxt must not be touched after the last WR has been posted
- * because the I/O completion processing may occur on another
- * processor and free / modify the context. Ne touche pas!
- */
-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
-			 struct rpcrdma_msg *rmsgp,
-			 struct svc_rqst *rqstp,
-			 struct svc_rdma_op_ctxt *hdr_ctxt)
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+			    struct rpcrdma_msg *rmsgp,
+			    struct svc_rqst *rqstp,
+			    struct svc_rdma_op_ctxt *head)
 {
-	struct ib_send_wr read_wr;
-	struct ib_send_wr inv_wr;
-	int err = 0;
-	int ch_no;
-	int ch_count;
-	int byte_count;
-	int sge_count;
-	u64 sgl_offset;
+	int page_no, ch_count, ret;
 	struct rpcrdma_read_chunk *ch;
-	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct svc_rdma_req_map *rpl_map;
-	struct svc_rdma_req_map *chl_map;
+	u32 page_offset, byte_count;
+	u64 rs_offset;
+	rdma_reader_fn reader;
 
 	/* If no read list is present, return 0 */
 	ch = svc_rdma_get_read_chunk(rmsgp);
@@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
 	if (ch_count > RPCSVC_MAXPAGES)
 		return -EINVAL;
 
-	/* Allocate temporary reply and chunk maps */
-	rpl_map = svc_rdma_get_req_map();
-	chl_map = svc_rdma_get_req_map();
+	/* The request is completed when the RDMA_READs complete. The
+	 * head context keeps all the pages that comprise the
+	 * request.
+	 */
+	head->arg.head[0] = rqstp->rq_arg.head[0];
+	head->arg.tail[0] = rqstp->rq_arg.tail[0];
+	head->arg.pages = &head->pages[head->count];
+	head->hdr_count = head->count;
+	head->arg.page_base = 0;
+	head->arg.page_len = 0;
+	head->arg.len = rqstp->rq_arg.len;
+	head->arg.buflen = rqstp->rq_arg.buflen;
 
-	if (!xprt->sc_frmr_pg_list_len)
-		sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
-					    rpl_map, chl_map, ch_count,
-					    byte_count);
+	/* Use FRMR if supported */
+	if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+		reader = rdma_read_chunk_frmr;
 	else
-		sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
-						 rpl_map, chl_map, ch_count,
-						 byte_count);
-	if (sge_count < 0) {
-		err = -EIO;
-		goto out;
-	}
-
-	sgl_offset = 0;
-	ch_no = 0;
+		reader = rdma_read_chunk_lcl;
 
+	page_no = 0; page_offset = 0;
 	for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
-	     ch->rc_discrim != 0; ch++, ch_no++) {
-		u64 rs_offset;
-next_sge:
-		ctxt = svc_rdma_get_context(xprt);
-		ctxt->direction = DMA_FROM_DEVICE;
-		ctxt->frmr = hdr_ctxt->frmr;
-		ctxt->read_hdr = NULL;
-		clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+	     ch->rc_discrim != 0; ch++) {
 
-		/* Prepare READ WR */
-		memset(&read_wr, 0, sizeof read_wr);
-		read_wr.wr_id = (unsigned long)ctxt;
-		read_wr.opcode = IB_WR_RDMA_READ;
-		ctxt->wr_op = read_wr.opcode;
-		read_wr.send_flags = IB_SEND_SIGNALED;
-		read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
 		xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
 				 &rs_offset);
-		read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
-		read_wr.sg_list = ctxt->sge;
-		read_wr.num_sge =
-			rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
-		err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
-					&rpl_map->sge[chl_map->ch[ch_no].start],
-					&sgl_offset,
-					read_wr.num_sge);
-		if (err) {
-			svc_rdma_unmap_dma(ctxt);
-			svc_rdma_put_context(ctxt, 0);
-			goto out;
-		}
-		if (((ch+1)->rc_discrim == 0) &&
-		    (read_wr.num_sge == chl_map->ch[ch_no].count)) {
-			/*
-			 * Mark the last RDMA_READ with a bit to
-			 * indicate all RPC data has been fetched from
-			 * the client and the RPC needs to be enqueued.
-			 */
-			set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
-			if (hdr_ctxt->frmr) {
-				set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
-				/*
-				 * Invalidate the local MR used to map the data
-				 * sink.
-				 */
-				if (xprt->sc_dev_caps &
-				    SVCRDMA_DEVCAP_READ_W_INV) {
-					read_wr.opcode =
-						IB_WR_RDMA_READ_WITH_INV;
-					ctxt->wr_op = read_wr.opcode;
-					read_wr.ex.invalidate_rkey =
-						ctxt->frmr->mr->lkey;
-				} else {
-					/* Prepare INVALIDATE WR */
-					memset(&inv_wr, 0, sizeof inv_wr);
-					inv_wr.opcode = IB_WR_LOCAL_INV;
-					inv_wr.send_flags = IB_SEND_SIGNALED;
-					inv_wr.ex.invalidate_rkey =
-						hdr_ctxt->frmr->mr->lkey;
-					read_wr.next = &inv_wr;
-				}
-			}
-			ctxt->read_hdr = hdr_ctxt;
-		}
-		/* Post the read */
-		err = svc_rdma_send(xprt, &read_wr);
-		if (err) {
-			printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
-			       err);
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
-			svc_rdma_unmap_dma(ctxt);
-			svc_rdma_put_context(ctxt, 0);
-			goto out;
+		byte_count = ntohl(ch->rc_target.rs_length);
+
+		while (byte_count > 0) {
+			ret = reader(xprt, rqstp, head,
+				     &page_no, &page_offset,
+				     ntohl(ch->rc_target.rs_handle),
+				     byte_count, rs_offset,
+				     ((ch+1)->rc_discrim == 0) /* last */
+				     );
+			if (ret < 0)
+				goto err;
+			byte_count -= ret;
+			rs_offset += ret;
+			head->arg.buflen += ret;
 		}
-		atomic_inc(&rdma_stat_read);
-
-		if (read_wr.num_sge < chl_map->ch[ch_no].count) {
-			chl_map->ch[ch_no].count -= read_wr.num_sge;
-			chl_map->ch[ch_no].start += read_wr.num_sge;
-			goto next_sge;
-		}
-		sgl_offset = 0;
-		err = 1;
 	}
-
- out:
-	svc_rdma_put_req_map(rpl_map);
-	svc_rdma_put_req_map(chl_map);
-
+	ret = 1;
+ err:
 	/* Detach arg pages. svc_recv will replenish them */
-	for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
-		rqstp->rq_pages[ch_no] = NULL;
+	for (page_no = 0;
+	     &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+		rqstp->rq_pages[page_no] = NULL;
 
-	return err;
+	return ret;
 }
 
 static int rdma_read_complete(struct svc_rqst *rqstp,
@@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
 		list_del_init(&ctxt->dto_q);
-	}
-	if (ctxt) {
 		spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
 		return rdma_read_complete(rqstp, ctxt);
-	}
-
-	if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+	} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
 		ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
 				  struct svc_rdma_op_ctxt,
 				  dto_q);
@@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 		if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
 			goto close_out;
 
-		BUG_ON(ret);
 		goto out;
 	}
 	dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
 	}
 
 	/* Read read-list data. */
-	ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+	ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
 	if (ret > 0) {
 		/* read-list posted, defer until data received from client. */
 		goto defer;
-	}
-	if (ret < 0) {
+	} else if (ret < 0) {
 		/* Post of read-list failed, free context. */
 		svc_rdma_put_context(ctxt, 1);
 		return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7e024a51617e..49fd21a5c215 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -49,152 +50,6 @@
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
-/* Encode an XDR as an array of IB SGE
- *
- * Assumptions:
- * - head[0] is physically contiguous.
- * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contiguous and consists of
- *   PAGE_SIZE elements.
- *
- * Output:
- * SGE[0]              reserved for RCPRDMA header
- * SGE[1]              data from xdr->head[]
- * SGE[2..sge_count-2] data from xdr->pages[]
- * SGE[sge_count-1]    data from xdr->tail.
- *
- * The max SGE we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
- * reserves a page for both the request and the reply header, and this
- * array is only concerned with the reply we are assured that we have
- * on extra page for the RPCRMDA header.
- */
-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
-			struct xdr_buf *xdr,
-			struct svc_rdma_req_map *vec)
-{
-	int sge_no;
-	u32 sge_bytes;
-	u32 page_bytes;
-	u32 page_off;
-	int page_no = 0;
-	u8 *frva;
-	struct svc_rdma_fastreg_mr *frmr;
-
-	frmr = svc_rdma_get_frmr(xprt);
-	if (IS_ERR(frmr))
-		return -ENOMEM;
-	vec->frmr = frmr;
-
-	/* Skip the RPCRDMA header */
-	sge_no = 1;
-
-	/* Map the head. */
-	frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
-	vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
-	vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
-	vec->count = 2;
-	sge_no++;
-
-	/* Map the XDR head */
-	frmr->kva = frva;
-	frmr->direction = DMA_TO_DEVICE;
-	frmr->access_flags = 0;
-	frmr->map_len = PAGE_SIZE;
-	frmr->page_list_len = 1;
-	page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
-	frmr->page_list->page_list[page_no] =
-		ib_dma_map_page(xprt->sc_cm_id->device,
-				virt_to_page(xdr->head[0].iov_base),
-				page_off,
-				PAGE_SIZE - page_off,
-				DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-				 frmr->page_list->page_list[page_no]))
-		goto fatal_err;
-	atomic_inc(&xprt->sc_dma_used);
-
-	/* Map the XDR page list */
-	page_off = xdr->page_base;
-	page_bytes = xdr->page_len + page_off;
-	if (!page_bytes)
-		goto encode_tail;
-
-	/* Map the pages */
-	vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-	vec->sge[sge_no].iov_len = page_bytes;
-	sge_no++;
-	while (page_bytes) {
-		struct page *page;
-
-		page = xdr->pages[page_no++];
-		sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
-		page_bytes -= sge_bytes;
-
-		frmr->page_list->page_list[page_no] =
-			ib_dma_map_page(xprt->sc_cm_id->device,
-					page, page_off,
-					sge_bytes, DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-
-		atomic_inc(&xprt->sc_dma_used);
-		page_off = 0; /* reset for next time through loop */
-		frmr->map_len += PAGE_SIZE;
-		frmr->page_list_len++;
-	}
-	vec->count++;
-
- encode_tail:
-	/* Map tail */
-	if (0 == xdr->tail[0].iov_len)
-		goto done;
-
-	vec->count++;
-	vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-
-	if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
-	    ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
-		/*
-		 * If head and tail use the same page, we don't need
-		 * to map it again.
-		 */
-		vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
-	} else {
-		void *va;
-
-		/* Map another page for the tail */
-		page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
-		va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
-		vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-
-		frmr->page_list->page_list[page_no] =
-		    ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
-				    page_off,
-				    PAGE_SIZE,
-				    DMA_TO_DEVICE);
-		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-					 frmr->page_list->page_list[page_no]))
-			goto fatal_err;
-		atomic_inc(&xprt->sc_dma_used);
-		frmr->map_len += PAGE_SIZE;
-		frmr->page_list_len++;
-	}
-
- done:
-	if (svc_rdma_fastreg(xprt, frmr))
-		goto fatal_err;
-
-	return 0;
-
- fatal_err:
-	printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
-	vec->frmr = NULL;
-	svc_rdma_put_frmr(xprt, frmr);
-	return -EIO;
-}
-
 static int map_xdr(struct svcxprt_rdma *xprt,
 		   struct xdr_buf *xdr,
 		   struct svc_rdma_req_map *vec)
@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
 	BUG_ON(xdr->len !=
 	       (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
 
-	if (xprt->sc_frmr_pg_list_len)
-		return fast_reg_xdr(xprt, xdr, vec);
-
 	/* Skip the first sge, this is for the RPCRDMA header */
 	sge_no = 1;
 
@@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
 }
 
 /* Assumptions:
- * - We are using FRMR
- *     - or -
  * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
  */
 static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 		sge_bytes = min_t(size_t,
 			  bc, vec->sge[xdr_sge_no].iov_len-sge_off);
 		sge[sge_no].length = sge_bytes;
-		if (!vec->frmr) {
-			sge[sge_no].addr =
-				dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
-					    sge_bytes, DMA_TO_DEVICE);
-			xdr_off += sge_bytes;
-			if (ib_dma_mapping_error(xprt->sc_cm_id->device,
-						 sge[sge_no].addr))
-				goto err;
-			atomic_inc(&xprt->sc_dma_used);
-			sge[sge_no].lkey = xprt->sc_dma_lkey;
-		} else {
-			sge[sge_no].addr = (unsigned long)
-				vec->sge[xdr_sge_no].iov_base + sge_off;
-			sge[sge_no].lkey = vec->frmr->mr->lkey;
-		}
+		sge[sge_no].addr =
+			dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+					 sge[sge_no].addr))
+			goto err;
+		atomic_inc(&xprt->sc_dma_used);
+		sge[sge_no].lkey = xprt->sc_dma_lkey;
 		ctxt->count++;
-		ctxt->frmr = vec->frmr;
 		sge_off = 0;
 		sge_no++;
 		xdr_sge_no++;
@@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	return 0;
  err:
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(xprt, vec->frmr);
 	svc_rdma_put_context(ctxt, 0);
 	/* Fatal error, close transport */
 	return -EIO;
@@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[1];
 
-	if (vec->frmr)
-		max_write = vec->frmr->map_len;
-	else
-		max_write = xprt->sc_max_sge * PAGE_SIZE;
+	max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* Write chunks start at the pagelist */
 	for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
 	res_ary = (struct rpcrdma_write_array *)
 		&rdma_resp->rm_body.rm_chunks[2];
 
-	if (vec->frmr)
-		max_write = vec->frmr->map_len;
-	else
-		max_write = xprt->sc_max_sge * PAGE_SIZE;
+	max_write = xprt->sc_max_sge * PAGE_SIZE;
 
 	/* xdr offset starts at RPC message */
 	nchunks = ntohl(arg_ary->wc_nchunks);
@@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		      int byte_count)
 {
 	struct ib_send_wr send_wr;
-	struct ib_send_wr inv_wr;
 	int sge_no;
 	int sge_bytes;
 	int page_no;
@@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		       "svcrdma: could not post a receive buffer, err=%d."
 		       "Closing transport %p.\n", ret, rdma);
 		set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
-		svc_rdma_put_frmr(rdma, vec->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		return -ENOTCONN;
 	}
@@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	/* Prepare the context */
 	ctxt->pages[0] = page;
 	ctxt->count = 1;
-	ctxt->frmr = vec->frmr;
-	if (vec->frmr)
-		set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
-	else
-		clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
 
 	/* Prepare the SGE for the RPCRDMA Header */
 	ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
 		int xdr_off = 0;
 		sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
 		byte_count -= sge_bytes;
-		if (!vec->frmr) {
-			ctxt->sge[sge_no].addr =
-				dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
-					    sge_bytes, DMA_TO_DEVICE);
-			xdr_off += sge_bytes;
-			if (ib_dma_mapping_error(rdma->sc_cm_id->device,
-						 ctxt->sge[sge_no].addr))
-				goto err;
-			atomic_inc(&rdma->sc_dma_used);
-			ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
-		} else {
-			ctxt->sge[sge_no].addr = (unsigned long)
-				vec->sge[sge_no].iov_base;
-			ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
-		}
+		ctxt->sge[sge_no].addr =
+			dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+				    sge_bytes, DMA_TO_DEVICE);
+		xdr_off += sge_bytes;
+		if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+					 ctxt->sge[sge_no].addr))
+			goto err;
+		atomic_inc(&rdma->sc_dma_used);
+		ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
 		ctxt->sge[sge_no].length = sge_bytes;
 	}
 	BUG_ON(byte_count != 0);
@@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
 			ctxt->sge[page_no+1].length = 0;
 	}
 	rqstp->rq_next_page = rqstp->rq_respages + 1;
+
 	BUG_ON(sge_no > rdma->sc_max_sge);
 	memset(&send_wr, 0, sizeof send_wr);
 	ctxt->wr_op = IB_WR_SEND;
@@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 	send_wr.num_sge = sge_no;
 	send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags =  IB_SEND_SIGNALED;
-	if (vec->frmr) {
-		/* Prepare INVALIDATE WR */
-		memset(&inv_wr, 0, sizeof inv_wr);
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.send_flags = IB_SEND_SIGNALED;
-		inv_wr.ex.invalidate_rkey =
-			vec->frmr->mr->lkey;
-		send_wr.next = &inv_wr;
-	}
 
 	ret = svc_rdma_send(rdma, &send_wr);
 	if (ret)
@@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
 
  err:
 	svc_rdma_unmap_dma(ctxt);
-	svc_rdma_put_frmr(rdma, vec->frmr);
 	svc_rdma_put_context(ctxt, 1);
 	return -EIO;
 }
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 02db8d9cc994..e7323fbbd348 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -162,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
 		schedule_timeout_uninterruptible(msecs_to_jiffies(500));
 	}
 	map->count = 0;
-	map->frmr = NULL;
 	return map;
 }
 
@@ -338,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
 
 	switch (ctxt->wr_op) {
 	case IB_WR_SEND:
-		if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
-			svc_rdma_put_frmr(xprt, ctxt->frmr);
+		BUG_ON(ctxt->frmr);
 		svc_rdma_put_context(ctxt, 1);
 		break;
 
 	case IB_WR_RDMA_WRITE:
+		BUG_ON(ctxt->frmr);
 		svc_rdma_put_context(ctxt, 0);
 		break;
 
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ_WITH_INV:
+		svc_rdma_put_frmr(xprt, ctxt->frmr);
 		if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
 			struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
 			BUG_ON(!read_hdr);
-			if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
-				svc_rdma_put_frmr(xprt, ctxt->frmr);
 			spin_lock_bh(&xprt->sc_rq_dto_lock);
 			set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
 			list_add_tail(&read_hdr->dto_q,
@@ -365,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
 		break;
 
 	default:
+		BUG_ON(1);
 		printk(KERN_ERR "svcrdma: unexpected completion type, "
 		       "opcode=%d\n",
 		       ctxt->wr_op);
@@ -380,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
 static void sq_cq_reap(struct svcxprt_rdma *xprt)
 {
 	struct svc_rdma_op_ctxt *ctxt = NULL;
-	struct ib_wc wc;
+	struct ib_wc wc_a[6];
+	struct ib_wc *wc;
 	struct ib_cq *cq = xprt->sc_sq_cq;
 	int ret;
 
+	memset(wc_a, 0, sizeof(wc_a));
+
 	if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
 		return;
 
 	ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
 	atomic_inc(&rdma_stat_sq_poll);
-	while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
-		if (wc.status != IB_WC_SUCCESS)
-			/* Close the transport */
-			set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+	while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
+		int i;
 
-		/* Decrement used SQ WR count */
-		atomic_dec(&xprt->sc_sq_count);
-		wake_up(&xprt->sc_send_wait);
+		for (i = 0; i < ret; i++) {
+			wc = &wc_a[i];
+			if (wc->status != IB_WC_SUCCESS) {
+				dprintk("svcrdma: sq wc err status %d\n",
+					wc->status);
 
-		ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
-		if (ctxt)
-			process_context(xprt, ctxt);
+				/* Close the transport */
+				set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+			}
 
-		svc_xprt_put(&xprt->sc_xprt);
+			/* Decrement used SQ WR count */
+			atomic_dec(&xprt->sc_sq_count);
+			wake_up(&xprt->sc_send_wait);
+
+			ctxt = (struct svc_rdma_op_ctxt *)
+				(unsigned long)wc->wr_id;
+			if (ctxt)
+				process_context(xprt, ctxt);
+
+			svc_xprt_put(&xprt->sc_xprt);
+		}
 	}
 
 	if (ctxt)
@@ -995,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 			need_dma_mr = 0;
 		break;
 	case RDMA_TRANSPORT_IB:
-		if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+		if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+			need_dma_mr = 1;
+			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+		} else if (!(devattr.device_cap_flags &
+			     IB_DEVICE_LOCAL_DMA_LKEY)) {
 			need_dma_mr = 1;
 			dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
 		} else
@@ -1192,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
 		container_of(xprt, struct svcxprt_rdma, sc_xprt);
 
 	/*
-	 * If there are fewer SQ WR available than required to send a
-	 * simple response, return false.
-	 */
-	if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
-		return 0;
-
-	/*
-	 * ...or there are already waiters on the SQ,
+	 * If there are already waiters on the SQ,
 	 * return false.
 	 */
 	if (waitqueue_active(&rdma->sc_send_wait))
-- 
cgit 


From 8b8b36834d0fff67fc8668093f4312dd04dcf21d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 10 Jun 2014 09:46:00 -0400
Subject: ring-buffer: Check if buffer exists before polling

The per_cpu buffers are created one per possible CPU. But these do
not mean that those CPUs are online, nor do they even exist.

With the addition of the ring buffer polling, it assumes that the
caller polls on an existing buffer. But this is not the case if
the user reads trace_pipe from a CPU that does not exist, and this
causes the kernel to crash.

Simple fix is to check the cpu against buffer bitmask against to see
if the buffer was allocated or not and return -ENODEV if it is
not.

More updates were done to pass the -ENODEV back up to userspace.

Link: http://lkml.kernel.org/r/5393DB61.6060707@oracle.com

Reported-by: Sasha Levin <sasha.levin@oracle.com>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  2 +-
 kernel/trace/ring_buffer.c  |  5 ++++-
 kernel/trace/trace.c        | 22 ++++++++++++++++------
 3 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index d69cf637a15a..49a4d6f59108 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c634868c2921..7c56c3d06943 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	DEFINE_WAIT(wait);
@@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 	if (cpu == RING_BUFFER_ALL_CPUS)
 		work = &buffer->irq_work;
 	else {
+		if (!cpumask_test_cpu(cpu, buffer->cpumask))
+			return -ENODEV;
 		cpu_buffer = buffer->buffers[cpu];
 		work = &cpu_buffer->irq_work;
 	}
@@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 		schedule();
 
 	finish_wait(&work->waiters, &wait);
+	return 0;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 16f7038d1f4d..56422f1decba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1085,13 +1085,13 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static void wait_on_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
-		return;
+		return 0;
 
-	ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4378,6 +4378,7 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
 static int tracing_wait_pipe(struct file *filp)
 {
 	struct trace_iterator *iter = filp->private_data;
+	int ret;
 
 	while (trace_empty(iter)) {
 
@@ -4399,10 +4400,13 @@ static int tracing_wait_pipe(struct file *filp)
 
 		mutex_unlock(&iter->mutex);
 
-		wait_on_pipe(iter);
+		ret = wait_on_pipe(iter);
 
 		mutex_lock(&iter->mutex);
 
+		if (ret)
+			return ret;
+
 		if (signal_pending(current))
 			return -EINTR;
 	}
@@ -5327,8 +5331,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			wait_on_pipe(iter);
+			ret = wait_on_pipe(iter);
 			mutex_lock(&trace_types_lock);
+			if (ret) {
+				size = ret;
+				goto out_unlock;
+			}
 			if (signal_pending(current)) {
 				size = -EINTR;
 				goto out_unlock;
@@ -5538,8 +5546,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		wait_on_pipe(iter);
+		ret = wait_on_pipe(iter);
 		mutex_lock(&trace_types_lock);
+		if (ret)
+			goto out;
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			goto out;
-- 
cgit 


From 962bd40bc30e412828e091bfda041b7547e779c8 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Tue, 10 Jun 2014 12:24:40 -0400
Subject: locks: add missing memory barrier in break_deleg

break_deleg is subject to the same potential race as break_lease. Add
a memory barrier to prevent it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 include/linux/fs.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3f46e499dd0..22ae79650b82 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1914,6 +1914,12 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 
 static inline int break_deleg(struct inode *inode, unsigned int mode)
 {
+	/*
+	 * Since this check is lockless, we must ensure that any refcounts
+	 * taken are done before checking inode->i_flock. Otherwise, we could
+	 * end up racing with tasks trying to set a new lease on this file.
+	 */
+	smp_mb();
 	if (inode->i_flock)
 		return __break_lease(inode, mode, FL_DELEG);
 	return 0;
-- 
cgit 


From b5097e956a4d2919ee248d6481e4204c5568ed5c Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 10 Jun 2014 20:04:50 +0200
Subject: block: add __init to elv_register

elv_register is only called by elevator init functions:

__init cfq_init
__init deadline_init
__init noop_init

Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/elevator.c         | 2 +-
 include/linux/elevator.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 1e01b66a0b92..f35edddfe9b5 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -845,7 +845,7 @@ void elv_unregister_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-int elv_register(struct elevator_type *e)
+int __init elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index df63bd3a8cf1..4ff262e2bf37 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -144,7 +144,7 @@ extern void elv_drain_elevator(struct request_queue *);
  * io scheduler registration
  */
 extern void __init load_default_elevator_module(void);
-extern int elv_register(struct elevator_type *);
+extern int __init elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*
-- 
cgit 


From 87a1ef8058d9ab26bc289ea4f27bc3c11ce9acb8 Mon Sep 17 00:00:00 2001
From: David Cohen <david.a.cohen@linux.intel.com>
Date: Tue, 15 Apr 2014 13:06:05 -0700
Subject: watchdog: add Intel MID watchdog driver support

Add initial Intel MID watchdog driver support.

This driver is an initial implementation of generic Intel MID watchdog
driver. Currently it supports Intel Merrifield platform.

Signed-off-by: Eric Ernst <eric.ernst@intel.com>
Signed-off-by: David Cohen <david.a.cohen@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig                    |  13 ++
 drivers/watchdog/Makefile                   |   1 +
 drivers/watchdog/intel-mid_wdt.c            | 184 ++++++++++++++++++++++++++++
 include/linux/platform_data/intel-mid_wdt.h |  22 ++++
 4 files changed, 220 insertions(+)
 create mode 100644 drivers/watchdog/intel-mid_wdt.c
 create mode 100644 include/linux/platform_data/intel-mid_wdt.h

(limited to 'include/linux')

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index cbd5ac7b8832..c845527b503a 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -665,6 +665,19 @@ config INTEL_SCU_WATCHDOG
 
 	  To compile this driver as a module, choose M here.
 
+config INTEL_MID_WATCHDOG
+	tristate "Intel MID Watchdog Timer"
+	depends on X86_INTEL_MID
+	select WATCHDOG_CORE
+	---help---
+	  Watchdog timer driver built into the Intel SCU for Intel MID
+	  Platforms.
+
+	  This driver currently supports only the watchdog evolution
+	  implementation in SCU, available for Merrifield generation.
+
+	  To compile this driver as a module, choose M here.
+
 config ITCO_WDT
 	tristate "Intel TCO Timer/Watchdog"
 	depends on (X86 || IA64) && PCI
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 1384531eaa45..7b8a91ed20e7 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -112,6 +112,7 @@ obj-$(CONFIG_W83977F_WDT) += w83977f_wdt.o
 obj-$(CONFIG_MACHZ_WDT) += machzwd.o
 obj-$(CONFIG_SBC_EPX_C3_WATCHDOG) += sbc_epx_c3.o
 obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o
+obj-$(CONFIG_INTEL_MID_WATCHDOG) += intel-mid_wdt.o
 
 # M32R Architecture
 
diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c
new file mode 100644
index 000000000000..ca66e8e74635
--- /dev/null
+++ b/drivers/watchdog/intel-mid_wdt.c
@@ -0,0 +1,184 @@
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Platforms supported so far:
+ *      - Merrifield only
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of version 2 of the GNU General
+ *      Public License as published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/platform_data/intel-mid_wdt.h>
+
+#include <asm/intel_scu_ipc.h>
+#include <asm/intel-mid.h>
+
+#define IPC_WATCHDOG 0xf8
+
+#define MID_WDT_PRETIMEOUT		15
+#define MID_WDT_TIMEOUT_MIN		(1 + MID_WDT_PRETIMEOUT)
+#define MID_WDT_TIMEOUT_MAX		170
+#define MID_WDT_DEFAULT_TIMEOUT		90
+
+/* SCU watchdog messages */
+enum {
+	SCU_WATCHDOG_START = 0,
+	SCU_WATCHDOG_STOP,
+	SCU_WATCHDOG_KEEPALIVE,
+};
+
+static inline int wdt_command(int sub, u32 *in, int inlen)
+{
+	return intel_scu_ipc_command(IPC_WATCHDOG, sub, in, inlen, NULL, 0);
+}
+
+static int wdt_start(struct watchdog_device *wd)
+{
+	int ret, in_size;
+	int timeout = wd->timeout;
+	struct ipc_wd_start {
+		u32 pretimeout;
+		u32 timeout;
+	} ipc_wd_start = { timeout - MID_WDT_PRETIMEOUT, timeout };
+
+	/*
+	 * SCU expects the input size for watchdog IPC to
+	 * be based on 4 bytes
+	 */
+	in_size = DIV_ROUND_UP(sizeof(ipc_wd_start), 4);
+
+	ret = wdt_command(SCU_WATCHDOG_START, (u32 *)&ipc_wd_start, in_size);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "error starting watchdog: %d\n", ret);
+	}
+
+	return ret;
+}
+
+static int wdt_ping(struct watchdog_device *wd)
+{
+	int ret;
+
+	ret = wdt_command(SCU_WATCHDOG_KEEPALIVE, NULL, 0);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "Error executing keepalive: 0x%x\n", ret);
+	}
+
+	return ret;
+}
+
+static int wdt_stop(struct watchdog_device *wd)
+{
+	int ret;
+
+	ret = wdt_command(SCU_WATCHDOG_STOP, NULL, 0);
+	if (ret) {
+		struct device *dev = watchdog_get_drvdata(wd);
+		dev_crit(dev, "Error stopping watchdog: 0x%x\n", ret);
+	}
+
+	return ret;
+}
+
+static irqreturn_t mid_wdt_irq(int irq, void *dev_id)
+{
+	panic("Kernel Watchdog");
+
+	/* This code should not be reached */
+	return IRQ_HANDLED;
+}
+
+static const struct watchdog_info mid_wdt_info = {
+	.identity = "Intel MID SCU watchdog",
+	.options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT,
+};
+
+static const struct watchdog_ops mid_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = wdt_start,
+	.stop = wdt_stop,
+	.ping = wdt_ping,
+};
+
+static int mid_wdt_probe(struct platform_device *pdev)
+{
+	struct watchdog_device *wdt_dev;
+	struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data;
+	int ret;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "missing platform data\n");
+		return -EINVAL;
+	}
+
+	if (pdata->probe) {
+		ret = pdata->probe(pdev);
+		if (ret)
+			return ret;
+	}
+
+	wdt_dev = devm_kzalloc(&pdev->dev, sizeof(*wdt_dev), GFP_KERNEL);
+	if (!wdt_dev)
+		return -ENOMEM;
+
+	wdt_dev->info = &mid_wdt_info;
+	wdt_dev->ops = &mid_wdt_ops;
+	wdt_dev->min_timeout = MID_WDT_TIMEOUT_MIN;
+	wdt_dev->max_timeout = MID_WDT_TIMEOUT_MAX;
+	wdt_dev->timeout = MID_WDT_DEFAULT_TIMEOUT;
+
+	watchdog_set_drvdata(wdt_dev, &pdev->dev);
+	platform_set_drvdata(pdev, wdt_dev);
+
+	ret = devm_request_irq(&pdev->dev, pdata->irq, mid_wdt_irq,
+			       IRQF_SHARED | IRQF_NO_SUSPEND, "watchdog",
+			       wdt_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "error requesting warning irq %d\n",
+			pdata->irq);
+		return ret;
+	}
+
+	ret = watchdog_register_device(wdt_dev);
+	if (ret) {
+		dev_err(&pdev->dev, "error registering watchdog device\n");
+		return ret;
+	}
+
+	dev_info(&pdev->dev, "Intel MID watchdog device probed\n");
+
+	return 0;
+}
+
+static int mid_wdt_remove(struct platform_device *pdev)
+{
+	struct watchdog_device *wd = platform_get_drvdata(pdev);
+	watchdog_unregister_device(wd);
+	return 0;
+}
+
+static struct platform_driver mid_wdt_driver = {
+	.probe		= mid_wdt_probe,
+	.remove		= mid_wdt_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "intel_mid_wdt",
+	},
+};
+
+module_platform_driver(mid_wdt_driver);
+
+MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
+MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/intel-mid_wdt.h b/include/linux/platform_data/intel-mid_wdt.h
new file mode 100644
index 000000000000..b98253466ace
--- /dev/null
+++ b/include/linux/platform_data/intel-mid_wdt.h
@@ -0,0 +1,22 @@
+/*
+ *      intel-mid_wdt: generic Intel MID SCU watchdog driver
+ *
+ *      Copyright (C) 2014 Intel Corporation. All rights reserved.
+ *      Contact: David Cohen <david.a.cohen@linux.intel.com>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of version 2 of the GNU General
+ *      Public License as published by the Free Software Foundation.
+ */
+
+#ifndef __INTEL_MID_WDT_H__
+#define __INTEL_MID_WDT_H__
+
+#include <linux/platform_device.h>
+
+struct intel_mid_wdt_pdata {
+	int irq;
+	int (*probe)(struct platform_device *pdev);
+};
+
+#endif /*__INTEL_MID_WDT_H__*/
-- 
cgit 


From 23adbe12ef7d3d4195e80800ab36b37bee28cd03 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Tue, 10 Jun 2014 12:45:42 -0700
Subject: fs,userns: Change inode_capable to capable_wrt_inode_uidgid

The kernel has no concept of capabilities with respect to inodes; inodes
exist independently of namespaces.  For example, inode_capable(inode,
CAP_LINUX_IMMUTABLE) would be nonsense.

This patch changes inode_capable to check for uid and gid mappings and
renames it to capable_wrt_inode_uidgid, which should make it more
obvious what it does.

Fixes CVE-2014-4014.

Cc: Theodore Ts'o <tytso@mit.edu>
Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: stable@vger.kernel.org
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/attr.c                  |  8 ++++----
 fs/inode.c                 | 10 +++++++---
 fs/namei.c                 | 11 ++++++-----
 fs/xfs/xfs_ioctl.c         |  2 +-
 include/linux/capability.h |  2 +-
 kernel/capability.c        | 20 ++++++++------------
 6 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/attr.c b/fs/attr.c
index 5d4e59d56e85..6530ced19697 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
 	if ((ia_valid & ATTR_UID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	     !uid_eq(attr->ia_uid, inode->i_uid)) &&
-	    !inode_capable(inode, CAP_CHOWN))
+	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure caller can chgrp. */
 	if ((ia_valid & ATTR_GID) &&
 	    (!uid_eq(current_fsuid(), inode->i_uid) ||
 	    (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
-	    !inode_capable(inode, CAP_CHOWN))
+	    !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
 		return -EPERM;
 
 	/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
 		/* Also check the setgid bit! */
 		if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
 				inode->i_gid) &&
-		    !inode_capable(inode, CAP_FSETID))
+		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			attr->ia_mode &= ~S_ISGID;
 	}
 
@@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
 		umode_t mode = attr->ia_mode;
 
 		if (!in_group_p(inode->i_gid) &&
-		    !inode_capable(inode, CAP_FSETID))
+		    !capable_wrt_inode_uidgid(inode, CAP_FSETID))
 			mode &= ~S_ISGID;
 		inode->i_mode = mode;
 	}
diff --git a/fs/inode.c b/fs/inode.c
index 2feb9b69f1be..6eecb7ff0b9a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1839,14 +1839,18 @@ EXPORT_SYMBOL(inode_init_owner);
  * inode_owner_or_capable - check current task permissions to inode
  * @inode: inode being checked
  *
- * Return true if current either has CAP_FOWNER to the inode, or
- * owns the file.
+ * Return true if current either has CAP_FOWNER in a namespace with the
+ * inode owner uid mapped, or owns the file.
  */
 bool inode_owner_or_capable(const struct inode *inode)
 {
+	struct user_namespace *ns;
+
 	if (uid_eq(current_fsuid(), inode->i_uid))
 		return true;
-	if (inode_capable(inode, CAP_FOWNER))
+
+	ns = current_user_ns();
+	if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
 		return true;
 	return false;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 80168273396b..985c6f368485 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -332,10 +332,11 @@ int generic_permission(struct inode *inode, int mask)
 
 	if (S_ISDIR(inode->i_mode)) {
 		/* DACs are overridable for directories */
-		if (inode_capable(inode, CAP_DAC_OVERRIDE))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
 			return 0;
 		if (!(mask & MAY_WRITE))
-			if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+			if (capable_wrt_inode_uidgid(inode,
+						     CAP_DAC_READ_SEARCH))
 				return 0;
 		return -EACCES;
 	}
@@ -345,7 +346,7 @@ int generic_permission(struct inode *inode, int mask)
 	 * at least one exec bit set.
 	 */
 	if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
-		if (inode_capable(inode, CAP_DAC_OVERRIDE))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
 			return 0;
 
 	/*
@@ -353,7 +354,7 @@ int generic_permission(struct inode *inode, int mask)
 	 */
 	mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
 	if (mask == MAY_READ)
-		if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+		if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
 			return 0;
 
 	return -EACCES;
@@ -2379,7 +2380,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
 		return 0;
 	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
-	return !inode_capable(inode, CAP_FOWNER);
+	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
 }
 
 /*
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0b18776b075e..6152cbe353e8 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1215,7 +1215,7 @@ xfs_ioctl_setattr(
 		 * cleared upon successful return from chown()
 		 */
 		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
-		    !inode_capable(VFS_I(ip), CAP_FSETID))
+		    !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
 			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
 
 		/*
diff --git a/include/linux/capability.h b/include/linux/capability.h
index a6ee1f9a5018..84b13ad67c1c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -210,7 +210,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool inode_capable(const struct inode *inode, int cap);
+extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 
 /* audit system wants to get cap info from files as well */
diff --git a/kernel/capability.c b/kernel/capability.c
index 84b2bbf443e7..a5cf13c018ce 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -424,23 +424,19 @@ bool capable(int cap)
 EXPORT_SYMBOL(capable);
 
 /**
- * inode_capable - Check superior capability over inode
+ * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
  * @inode: The inode in question
  * @cap: The capability in question
  *
- * Return true if the current task has the given superior capability
- * targeted at it's own user namespace and that the given inode is owned
- * by the current user namespace or a child namespace.
- *
- * Currently we check to see if an inode is owned by the current
- * user namespace by seeing if the inode's owner maps into the
- * current user namespace.
- *
+ * Return true if the current task has the given capability targeted at
+ * its own user namespace and that the given inode's uid and gid are
+ * mapped into the current user namespace.
  */
-bool inode_capable(const struct inode *inode, int cap)
+bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
 {
 	struct user_namespace *ns = current_user_ns();
 
-	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
+	return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
+		kgid_has_mapping(ns, inode->i_gid);
 }
-EXPORT_SYMBOL(inode_capable);
+EXPORT_SYMBOL(capable_wrt_inode_uidgid);
-- 
cgit 


From 602cb5bbae9868fe48989efa78aca62415309fcf Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 10 Jun 2014 15:18:40 -0700
Subject: mfd/rtc: sec/s5m: rename SEC* symbols to S5M

Prepare for adding support for S2MPS14 RTC device to the rtc-s5m driver:

1. Rename SEC* symbols to S5M.
2. Add S5M prefix to some of defines which are different between S5M876X
   and S2MPS14.

This is only a rename-like patch, new code is not added.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s5m.c           | 66 +++++++++++++++++------------------
 include/linux/mfd/samsung/rtc.h | 76 ++++++++++++++++++++---------------------
 2 files changed, 71 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 8ec2d6a1dbe1..b37df8c790f2 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -30,10 +30,10 @@
 
 /*
  * Maximum number of retries for checking changes in UDR field
- * of SEC_RTC_UDR_CON register (to limit possible endless loop).
+ * of S5M_RTC_UDR_CON register (to limit possible endless loop).
  *
  * After writing to RTC registers (setting time or alarm) read the UDR field
- * in SEC_RTC_UDR_CON register. UDR is auto-cleared when data have
+ * in S5M_RTC_UDR_CON register. UDR is auto-cleared when data have
  * been transferred.
  */
 #define UDR_READ_RETRY_CNT	5
@@ -54,7 +54,7 @@ static const struct regmap_config s5m_rtc_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 
-	.max_register = SEC_RTC_REG_MAX,
+	.max_register = S5M_RTC_REG_MAX,
 };
 
 static const struct regmap_config s2mps14_rtc_regmap_config = {
@@ -119,8 +119,8 @@ static inline int s5m8767_wait_for_udr_update(struct s5m_rtc_info *info)
 	unsigned int data;
 
 	do {
-		ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
-	} while (--retry && (data & RTC_UDR_MASK) && !ret);
+		ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &data);
+	} while (--retry && (data & S5M_RTC_UDR_MASK) && !ret);
 
 	if (!retry)
 		dev_err(info->dev, "waiting for UDR update, reached max number of retries\n");
@@ -133,16 +133,16 @@ static inline int s5m8767_rtc_set_time_reg(struct s5m_rtc_info *info)
 	int ret;
 	unsigned int data;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+	ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &data);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to read update reg(%d)\n", ret);
 		return ret;
 	}
 
-	data |= RTC_TIME_EN_MASK;
-	data |= RTC_UDR_MASK;
+	data |= S5M_RTC_TIME_EN_MASK;
+	data |= S5M_RTC_UDR_MASK;
 
-	ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+	ret = regmap_write(info->regmap, S5M_RTC_UDR_CON, data);
 	if (ret < 0) {
 		dev_err(info->dev, "failed to write update reg(%d)\n", ret);
 		return ret;
@@ -158,17 +158,17 @@ static inline int s5m8767_rtc_set_alarm_reg(struct s5m_rtc_info *info)
 	int ret;
 	unsigned int data;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &data);
+	ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &data);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to read update reg(%d)\n",
 			__func__, ret);
 		return ret;
 	}
 
-	data &= ~RTC_TIME_EN_MASK;
-	data |= RTC_UDR_MASK;
+	data &= ~S5M_RTC_TIME_EN_MASK;
+	data |= S5M_RTC_UDR_MASK;
 
-	ret = regmap_write(info->regmap, SEC_RTC_UDR_CON, data);
+	ret = regmap_write(info->regmap, S5M_RTC_UDR_CON, data);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to write update reg(%d)\n",
 			__func__, ret);
@@ -218,7 +218,7 @@ static int s5m_rtc_read_time(struct device *dev, struct rtc_time *tm)
 	u8 data[8];
 	int ret;
 
-	ret = regmap_bulk_read(info->regmap, SEC_RTC_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_RTC_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -266,7 +266,7 @@ static int s5m_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		1900 + tm->tm_year, 1 + tm->tm_mon, tm->tm_mday,
 		tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_wday);
 
-	ret = regmap_raw_write(info->regmap, SEC_RTC_SEC, data, 8);
+	ret = regmap_raw_write(info->regmap, S5M_RTC_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -282,20 +282,20 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	unsigned int val;
 	int ret, i;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
 	switch (info->device_type) {
 	case S5M8763X:
 		s5m8763_data_to_tm(data, &alrm->time);
-		ret = regmap_read(info->regmap, SEC_ALARM0_CONF, &val);
+		ret = regmap_read(info->regmap, S5M_ALARM0_CONF, &val);
 		if (ret < 0)
 			return ret;
 
 		alrm->enabled = !!val;
 
-		ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
+		ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
 		if (ret < 0)
 			return ret;
 
@@ -318,7 +318,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		}
 
 		alrm->pending = 0;
-		ret = regmap_read(info->regmap, SEC_RTC_STATUS, &val);
+		ret = regmap_read(info->regmap, S5M_RTC_STATUS, &val);
 		if (ret < 0)
 			return ret;
 		break;
@@ -327,7 +327,7 @@ static int s5m_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 		return -EINVAL;
 	}
 
-	if (val & ALARM0_STATUS)
+	if (val & S5M_ALARM0_STATUS)
 		alrm->pending = 1;
 	else
 		alrm->pending = 0;
@@ -341,7 +341,7 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 	int ret, i;
 	struct rtc_time tm;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -352,14 +352,14 @@ static int s5m_rtc_stop_alarm(struct s5m_rtc_info *info)
 
 	switch (info->device_type) {
 	case S5M8763X:
-		ret = regmap_write(info->regmap, SEC_ALARM0_CONF, 0);
+		ret = regmap_write(info->regmap, S5M_ALARM0_CONF, 0);
 		break;
 
 	case S5M8767X:
 		for (i = 0; i < 7; i++)
 			data[i] &= ~ALARM_ENABLE_MASK;
 
-		ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+		ret = regmap_raw_write(info->regmap, S5M_ALARM0_SEC, data, 8);
 		if (ret < 0)
 			return ret;
 
@@ -381,7 +381,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 	u8 alarm0_conf;
 	struct rtc_time tm;
 
-	ret = regmap_bulk_read(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_bulk_read(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -393,7 +393,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 	switch (info->device_type) {
 	case S5M8763X:
 		alarm0_conf = 0x77;
-		ret = regmap_write(info->regmap, SEC_ALARM0_CONF, alarm0_conf);
+		ret = regmap_write(info->regmap, S5M_ALARM0_CONF, alarm0_conf);
 		break;
 
 	case S5M8767X:
@@ -408,7 +408,7 @@ static int s5m_rtc_start_alarm(struct s5m_rtc_info *info)
 		if (data[RTC_YEAR1] & 0x7f)
 			data[RTC_YEAR1] |= ALARM_ENABLE_MASK;
 
-		ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+		ret = regmap_raw_write(info->regmap, S5M_ALARM0_SEC, data, 8);
 		if (ret < 0)
 			return ret;
 		ret = s5m8767_rtc_set_alarm_reg(info);
@@ -450,7 +450,7 @@ static int s5m_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	if (ret < 0)
 		return ret;
 
-	ret = regmap_raw_write(info->regmap, SEC_ALARM0_SEC, data, 8);
+	ret = regmap_raw_write(info->regmap, S5M_ALARM0_SEC, data, 8);
 	if (ret < 0)
 		return ret;
 
@@ -495,7 +495,7 @@ static const struct rtc_class_ops s5m_rtc_ops = {
 static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
 {
 	int ret;
-	ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+	ret = regmap_update_bits(info->regmap, S5M_WTSR_SMPL_CNTL,
 				 WTSR_ENABLE_MASK,
 				 enable ? WTSR_ENABLE_MASK : 0);
 	if (ret < 0)
@@ -506,7 +506,7 @@ static void s5m_rtc_enable_wtsr(struct s5m_rtc_info *info, bool enable)
 static void s5m_rtc_enable_smpl(struct s5m_rtc_info *info, bool enable)
 {
 	int ret;
-	ret = regmap_update_bits(info->regmap, SEC_WTSR_SMPL_CNTL,
+	ret = regmap_update_bits(info->regmap, S5M_WTSR_SMPL_CNTL,
 				 SMPL_ENABLE_MASK,
 				 enable ? SMPL_ENABLE_MASK : 0);
 	if (ret < 0)
@@ -521,7 +521,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	int ret;
 	struct rtc_time tm;
 
-	ret = regmap_read(info->regmap, SEC_RTC_UDR_CON, &tp_read);
+	ret = regmap_read(info->regmap, S5M_RTC_UDR_CON, &tp_read);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to read control reg(%d)\n",
 			__func__, ret);
@@ -533,7 +533,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
 
 	info->rtc_24hr_mode = 1;
-	ret = regmap_raw_write(info->regmap, SEC_ALARM0_CONF, data, 2);
+	ret = regmap_raw_write(info->regmap, S5M_ALARM0_CONF, data, 2);
 	if (ret < 0) {
 		dev_err(info->dev, "%s: fail to write controlm reg(%d)\n",
 			__func__, ret);
@@ -555,7 +555,7 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 		ret = s5m_rtc_set_time(info->dev, &tm);
 	}
 
-	ret = regmap_update_bits(info->regmap, SEC_RTC_UDR_CON,
+	ret = regmap_update_bits(info->regmap, S5M_RTC_UDR_CON,
 				 RTC_TCON_MASK, tp_read | RTC_TCON_MASK);
 	if (ret < 0)
 		dev_err(info->dev, "%s: fail to update TCON reg(%d)\n",
@@ -676,7 +676,7 @@ static void s5m_rtc_shutdown(struct platform_device *pdev)
 	if (info->wtsr_smpl) {
 		for (i = 0; i < 3; i++) {
 			s5m_rtc_enable_wtsr(info, false);
-			regmap_read(info->regmap, SEC_WTSR_SMPL_CNTL, &val);
+			regmap_read(info->regmap, S5M_WTSR_SMPL_CNTL, &val);
 			pr_debug("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
 			if (val & WTSR_ENABLE_MASK)
 				pr_emerg("%s: fail to disable WTSR\n",
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 3e02b768d537..207fcfbde82e 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -18,38 +18,38 @@
 #ifndef __LINUX_MFD_SEC_RTC_H
 #define __LINUX_MFD_SEC_RTC_H
 
-enum sec_rtc_reg {
-	SEC_RTC_SEC,
-	SEC_RTC_MIN,
-	SEC_RTC_HOUR,
-	SEC_RTC_WEEKDAY,
-	SEC_RTC_DATE,
-	SEC_RTC_MONTH,
-	SEC_RTC_YEAR1,
-	SEC_RTC_YEAR2,
-	SEC_ALARM0_SEC,
-	SEC_ALARM0_MIN,
-	SEC_ALARM0_HOUR,
-	SEC_ALARM0_WEEKDAY,
-	SEC_ALARM0_DATE,
-	SEC_ALARM0_MONTH,
-	SEC_ALARM0_YEAR1,
-	SEC_ALARM0_YEAR2,
-	SEC_ALARM1_SEC,
-	SEC_ALARM1_MIN,
-	SEC_ALARM1_HOUR,
-	SEC_ALARM1_WEEKDAY,
-	SEC_ALARM1_DATE,
-	SEC_ALARM1_MONTH,
-	SEC_ALARM1_YEAR1,
-	SEC_ALARM1_YEAR2,
-	SEC_ALARM0_CONF,
-	SEC_ALARM1_CONF,
-	SEC_RTC_STATUS,
-	SEC_WTSR_SMPL_CNTL,
-	SEC_RTC_UDR_CON,
+enum s5m_rtc_reg {
+	S5M_RTC_SEC,
+	S5M_RTC_MIN,
+	S5M_RTC_HOUR,
+	S5M_RTC_WEEKDAY,
+	S5M_RTC_DATE,
+	S5M_RTC_MONTH,
+	S5M_RTC_YEAR1,
+	S5M_RTC_YEAR2,
+	S5M_ALARM0_SEC,
+	S5M_ALARM0_MIN,
+	S5M_ALARM0_HOUR,
+	S5M_ALARM0_WEEKDAY,
+	S5M_ALARM0_DATE,
+	S5M_ALARM0_MONTH,
+	S5M_ALARM0_YEAR1,
+	S5M_ALARM0_YEAR2,
+	S5M_ALARM1_SEC,
+	S5M_ALARM1_MIN,
+	S5M_ALARM1_HOUR,
+	S5M_ALARM1_WEEKDAY,
+	S5M_ALARM1_DATE,
+	S5M_ALARM1_MONTH,
+	S5M_ALARM1_YEAR1,
+	S5M_ALARM1_YEAR2,
+	S5M_ALARM0_CONF,
+	S5M_ALARM1_CONF,
+	S5M_RTC_STATUS,
+	S5M_WTSR_SMPL_CNTL,
+	S5M_RTC_UDR_CON,
 
-	SEC_RTC_REG_MAX,
+	S5M_RTC_REG_MAX,
 };
 
 enum s2mps_rtc_reg {
@@ -88,9 +88,9 @@ enum s2mps_rtc_reg {
 #define HOUR_12			(1 << 7)
 #define HOUR_AMPM		(1 << 6)
 #define HOUR_PM			(1 << 5)
-#define ALARM0_STATUS		(1 << 1)
-#define ALARM1_STATUS		(1 << 2)
-#define UPDATE_AD		(1 << 0)
+#define S5M_ALARM0_STATUS	(1 << 1)
+#define S5M_ALARM1_STATUS	(1 << 2)
+#define S5M_UPDATE_AD		(1 << 0)
 
 #define S2MPS_ALARM0_STATUS	(1 << 2)
 #define S2MPS_ALARM1_STATUS	(1 << 1)
@@ -101,16 +101,16 @@ enum s2mps_rtc_reg {
 #define MODEL24_SHIFT		1
 #define MODEL24_MASK		(1 << MODEL24_SHIFT)
 /* RTC Update Register1 */
-#define RTC_UDR_SHIFT		0
-#define RTC_UDR_MASK		(1 << RTC_UDR_SHIFT)
+#define S5M_RTC_UDR_SHIFT	0
+#define S5M_RTC_UDR_MASK	(1 << S5M_RTC_UDR_SHIFT)
 #define S2MPS_RTC_WUDR_SHIFT	4
 #define S2MPS_RTC_WUDR_MASK	(1 << S2MPS_RTC_WUDR_SHIFT)
 #define S2MPS_RTC_RUDR_SHIFT	0
 #define S2MPS_RTC_RUDR_MASK	(1 << S2MPS_RTC_RUDR_SHIFT)
 #define RTC_TCON_SHIFT		1
 #define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
-#define RTC_TIME_EN_SHIFT	3
-#define RTC_TIME_EN_MASK	(1 << RTC_TIME_EN_SHIFT)
+#define S5M_RTC_TIME_EN_SHIFT	3
+#define S5M_RTC_TIME_EN_MASK	(1 << S5M_RTC_TIME_EN_SHIFT)
 
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6
-- 
cgit 


From 0c5f5d9af311013aabc519b68df19533d0d51cda Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 10 Jun 2014 15:18:43 -0700
Subject: rtc: s5m: use shorter time of register update

Set the time needed for updating alarm and time registers to 0.45 ms.
The default is 7.32 ms which is too long and leads to warnings when
setting alarm or time:

	s5m-rtc: waiting for UDR update, reached max number of retries

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rtc/rtc-s5m.c           |  7 +++++++
 include/linux/mfd/samsung/rtc.h | 10 ++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-s5m.c b/drivers/rtc/rtc-s5m.c
index 22137d4dbadf..3751ef90f93c 100644
--- a/drivers/rtc/rtc-s5m.c
+++ b/drivers/rtc/rtc-s5m.c
@@ -519,6 +519,13 @@ static int s5m8767_rtc_init_reg(struct s5m_rtc_info *info)
 	u8 data[2];
 	int ret;
 
+	/* UDR update time. Default of 7.32 ms is too long. */
+	ret = regmap_update_bits(info->regmap, S5M_RTC_UDR_CON,
+			S5M_RTC_UDR_T_MASK, S5M_RTC_UDR_T_450_US);
+	if (ret < 0)
+		dev_err(info->dev, "%s: fail to change UDR time: %d\n",
+				__func__, ret);
+
 	/* Set RTC control register : Binary mode, 24hour mode */
 	data[0] = (1 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
 	data[1] = (0 << BCD_EN_SHIFT) | (1 << MODEL24_SHIFT);
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 207fcfbde82e..b6401e7661c7 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -111,6 +111,16 @@ enum s2mps_rtc_reg {
 #define RTC_TCON_MASK		(1 << RTC_TCON_SHIFT)
 #define S5M_RTC_TIME_EN_SHIFT	3
 #define S5M_RTC_TIME_EN_MASK	(1 << S5M_RTC_TIME_EN_SHIFT)
+/*
+ * UDR_T field in S5M_RTC_UDR_CON register determines the time needed
+ * for updating alarm and time registers. Default is 7.32 ms.
+ */
+#define S5M_RTC_UDR_T_SHIFT	6
+#define S5M_RTC_UDR_T_MASK	(0x3 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_7320_US	(0x0 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_1830_US	(0x1 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_3660_US	(0x2 << S5M_RTC_UDR_T_SHIFT)
+#define S5M_RTC_UDR_T_450_US	(0x3 << S5M_RTC_UDR_T_SHIFT)
 
 /* RTC Hour register */
 #define HOUR_PM_SHIFT		6
-- 
cgit 


From 07f8ac4a1e26e8283542cdaf658a6e2a12fd6980 Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@web.de>
Date: Sat, 7 Jun 2014 18:26:28 +0200
Subject: bridge: add export of multicast database adjacent to net_dev
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this new, exported function br_multicast_list_adjacent(net_dev) a
list of IPv4/6 addresses is returned. This list contains all multicast
addresses sensed by the bridge multicast snooping feature on all bridge
ports of the bridge interface of net_dev, excluding addresses from the
specified net_device itself.

Adding bridge support to the batman-adv multicast optimization requires
batman-adv knowing about the existence of bridged-in multicast
listeners to be able to reliably serve them with multicast packets.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 18 +++++++++++++++
 net/bridge/br_multicast.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h   | 12 ----------
 3 files changed, 76 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 1085ffeef956..44d6eb0eb852 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -16,9 +16,27 @@
 #include <linux/netdevice.h>
 #include <uapi/linux/if_bridge.h>
 
+struct br_ip {
+	union {
+		__be32	ip4;
+#if IS_ENABLED(CONFIG_IPV6)
+		struct in6_addr ip6;
+#endif
+	} u;
+	__be16		proto;
+	__u16           vid;
+};
+
+struct br_ip_list {
+	struct list_head list;
+	struct br_ip addr;
+};
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
+int br_multicast_list_adjacent(struct net_device *dev,
+			       struct list_head *br_ip_list);
 
 #endif
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index b3f17c9b4d06..772476b7c4b7 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/export.h>
 #include <linux/if_ether.h>
 #include <linux/igmp.h>
 #include <linux/jhash.h>
@@ -2141,3 +2142,60 @@ unlock:
 
 	return err;
 }
+
+/**
+ * br_multicast_list_adjacent - Returns snooped multicast addresses
+ * @dev:	The bridge port adjacent to which to retrieve addresses
+ * @br_ip_list:	The list to store found, snooped multicast IP addresses in
+ *
+ * Creates a list of IP addresses (struct br_ip_list) sensed by the multicast
+ * snooping feature on all bridge ports of dev's bridge device, excluding
+ * the addresses from dev itself.
+ *
+ * Returns the number of items added to br_ip_list.
+ *
+ * Notes:
+ * - br_ip_list needs to be initialized by caller
+ * - br_ip_list might contain duplicates in the end
+ *   (needs to be taken care of by caller)
+ * - br_ip_list needs to be freed by caller
+ */
+int br_multicast_list_adjacent(struct net_device *dev,
+			       struct list_head *br_ip_list)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	struct net_bridge_port_group *group;
+	struct br_ip_list *entry;
+	int count = 0;
+
+	rcu_read_lock();
+	if (!br_ip_list || !br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	list_for_each_entry_rcu(port, &br->port_list, list) {
+		if (!port->dev || port->dev == dev)
+			continue;
+
+		hlist_for_each_entry_rcu(group, &port->mglist, mglist) {
+			entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+			if (!entry)
+				goto unlock;
+
+			entry->addr = group->addr;
+			list_add(&entry->list, br_ip_list);
+			count++;
+		}
+	}
+
+unlock:
+	rcu_read_unlock();
+	return count;
+}
+EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 97c5e46dde72..50e2ab021484 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -54,18 +54,6 @@ struct mac_addr
 	unsigned char	addr[ETH_ALEN];
 };
 
-struct br_ip
-{
-	union {
-		__be32	ip4;
-#if IS_ENABLED(CONFIG_IPV6)
-		struct in6_addr ip6;
-#endif
-	} u;
-	__be16		proto;
-	__u16		vid;
-};
-
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 /* our own querier */
 struct bridge_mcast_own_query {
-- 
cgit 


From 2cd4143192e8c60f66cb32c3a30c76d0470a372d Mon Sep 17 00:00:00 2001
From: Linus Lüssing <linus.luessing@web.de>
Date: Sat, 7 Jun 2014 18:26:29 +0200
Subject: bridge: memorize and export selected IGMP/MLD querier port
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding bridge support to the batman-adv multicast optimization requires
batman-adv knowing about the existence of bridged-in IGMP/MLD queriers
to be able to reliably serve any multicast listener behind this same
bridge.

Signed-off-by: Linus Lüssing <linus.luessing@web.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h |  1 +
 net/bridge/br_multicast.c | 72 +++++++++++++++++++++++++++++++++++++++++++----
 net/bridge/br_private.h   |  1 +
 3 files changed, 68 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 44d6eb0eb852..fd22789d7b2e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -38,5 +38,6 @@ typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
 
 #endif
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 772476b7c4b7..cd3cf394c477 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1081,6 +1081,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 #endif
 
 static bool br_ip4_multicast_select_querier(struct net_bridge *br,
+					    struct net_bridge_port *port,
 					    __be32 saddr)
 {
 	if (!timer_pending(&br->ip4_own_query.timer) &&
@@ -1098,11 +1099,15 @@ static bool br_ip4_multicast_select_querier(struct net_bridge *br,
 update:
 	br->ip4_querier.addr.u.ip4 = saddr;
 
+	/* update protected by general multicast_lock by caller */
+	rcu_assign_pointer(br->ip4_querier.port, port);
+
 	return true;
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
 static bool br_ip6_multicast_select_querier(struct net_bridge *br,
+					    struct net_bridge_port *port,
 					    struct in6_addr *saddr)
 {
 	if (!timer_pending(&br->ip6_own_query.timer) &&
@@ -1117,19 +1122,23 @@ static bool br_ip6_multicast_select_querier(struct net_bridge *br,
 update:
 	br->ip6_querier.addr.u.ip6 = *saddr;
 
+	/* update protected by general multicast_lock by caller */
+	rcu_assign_pointer(br->ip6_querier.port, port);
+
 	return true;
 }
 #endif
 
 static bool br_multicast_select_querier(struct net_bridge *br,
+					struct net_bridge_port *port,
 					struct br_ip *saddr)
 {
 	switch (saddr->proto) {
 	case htons(ETH_P_IP):
-		return br_ip4_multicast_select_querier(br, saddr->u.ip4);
+		return br_ip4_multicast_select_querier(br, port, saddr->u.ip4);
 #if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
-		return br_ip6_multicast_select_querier(br, &saddr->u.ip6);
+		return br_ip6_multicast_select_querier(br, port, &saddr->u.ip6);
 #endif
 	}
 
@@ -1201,7 +1210,7 @@ static void br_multicast_query_received(struct net_bridge *br,
 					struct br_ip *saddr,
 					unsigned long max_delay)
 {
-	if (!br_multicast_select_querier(br, saddr))
+	if (!br_multicast_select_querier(br, port, saddr))
 		return;
 
 	br_multicast_update_query_timer(br, query, max_delay);
@@ -1804,12 +1813,14 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 }
 
 static void br_multicast_query_expired(struct net_bridge *br,
-				       struct bridge_mcast_own_query *query)
+				       struct bridge_mcast_own_query *query,
+				       struct bridge_mcast_querier *querier)
 {
 	spin_lock(&br->multicast_lock);
 	if (query->startup_sent < br->multicast_startup_query_count)
 		query->startup_sent++;
 
+	rcu_assign_pointer(querier, NULL);
 	br_multicast_send_query(br, NULL, query);
 	spin_unlock(&br->multicast_lock);
 }
@@ -1818,7 +1829,7 @@ static void br_ip4_multicast_query_expired(unsigned long data)
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_query_expired(br, &br->ip4_own_query);
+	br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -1826,7 +1837,7 @@ static void br_ip6_multicast_query_expired(unsigned long data)
 {
 	struct net_bridge *br = (void *)data;
 
-	br_multicast_query_expired(br, &br->ip6_own_query);
+	br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier);
 }
 #endif
 
@@ -1849,8 +1860,10 @@ void br_multicast_init(struct net_bridge *br)
 	br->multicast_membership_interval = 260 * HZ;
 
 	br->ip4_other_query.delay_time = 0;
+	br->ip4_querier.port = NULL;
 #if IS_ENABLED(CONFIG_IPV6)
 	br->ip6_other_query.delay_time = 0;
+	br->ip6_querier.port = NULL;
 #endif
 
 	spin_lock_init(&br->multicast_lock);
@@ -2199,3 +2212,50 @@ unlock:
 	return count;
 }
 EXPORT_SYMBOL_GPL(br_multicast_list_adjacent);
+
+/**
+ * br_multicast_has_querier_adjacent - Checks for a querier behind a bridge port
+ * @dev: The bridge port adjacent to which to check for a querier
+ * @proto: The protocol family to check for: IGMP -> ETH_P_IP, MLD -> ETH_P_IPV6
+ *
+ * Checks whether the given interface has a bridge on top and if so returns
+ * true if a selected querier is behind one of the other ports of this
+ * bridge. Otherwise returns false.
+ */
+bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto)
+{
+	struct net_bridge *br;
+	struct net_bridge_port *port;
+	bool ret = false;
+
+	rcu_read_lock();
+	if (!br_port_exists(dev))
+		goto unlock;
+
+	port = br_port_get_rcu(dev);
+	if (!port || !port->br)
+		goto unlock;
+
+	br = port->br;
+
+	switch (proto) {
+	case ETH_P_IP:
+		if (!timer_pending(&br->ip4_other_query.timer) ||
+		    rcu_dereference(br->ip4_querier.port) == port)
+			goto unlock;
+		break;
+	case ETH_P_IPV6:
+		if (!timer_pending(&br->ip6_other_query.timer) ||
+		    rcu_dereference(br->ip6_querier.port) == port)
+			goto unlock;
+		break;
+	default:
+		goto unlock;
+	}
+
+	ret = true;
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(br_multicast_has_querier_adjacent);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 50e2ab021484..8346e9504cdb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -70,6 +70,7 @@ struct bridge_mcast_other_query {
 /* selected querier */
 struct bridge_mcast_querier {
 	struct br_ip addr;
+	struct net_bridge_port __rcu	*port;
 };
 #endif
 
-- 
cgit 


From e430f34ee5192c84bcabd3c79ab7e2388b5eec74 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 6 Jun 2014 14:46:06 -0700
Subject: net: filter: cleanup A/X name usage

The macro 'A' used in internal BPF interpreter:
 #define A regs[insn->a_reg]
was easily confused with the name of classic BPF register 'A', since
'A' would mean two different things depending on context.

This patch is trying to clean up the naming and clarify its usage in the
following way:

- A and X are names of two classic BPF registers

- BPF_REG_A denotes internal BPF register R0 used to map classic register A
  in internal BPF programs generated from classic

- BPF_REG_X denotes internal BPF register R7 used to map classic register X
  in internal BPF programs generated from classic

- internal BPF instruction format:
struct sock_filter_int {
        __u8    code;           /* opcode */
        __u8    dst_reg:4;      /* dest register */
        __u8    src_reg:4;      /* source register */
        __s16   off;            /* signed offset */
        __s32   imm;            /* signed immediate constant */
};

- BPF_X/BPF_K is 1 bit used to encode source operand of instruction
In classic:
  BPF_X - means use register X as source operand
  BPF_K - means use 32-bit immediate as source operand
In internal:
  BPF_X - means use 'src_reg' register as source operand
  BPF_K - means use 32-bit immediate as source operand

Suggested-by: Chema Gonzalez <chema@google.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Chema Gonzalez <chema@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |   2 +-
 arch/x86/net/bpf_jit_comp.c         | 260 ++++++++++++++++++------------------
 include/linux/filter.h              | 156 ++++++++++++----------
 net/core/filter.c                   | 198 +++++++++++++--------------
 4 files changed, 314 insertions(+), 302 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 58c443926647..9f49b8690500 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -805,7 +805,7 @@ to seccomp_data, for converted BPF filters R1 points to a skb.
 
 A program, that is translated internally consists of the following elements:
 
-  op:16, jt:8, jf:8, k:32    ==>    op:8, a_reg:4, x_reg:4, off:16, imm:32
+  op:16, jt:8, jf:8, k:32    ==>    op:8, dst_reg:4, src_reg:4, off:16, imm:32
 
 So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
 has room for new instructions. Some of them may use 16/24/32 byte encoding. New
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 080f3f071bb0..99bef86ed6df 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -64,10 +64,10 @@ static inline bool is_simm32(s64 value)
 	return value == (s64) (s32) value;
 }
 
-/* mov A, X */
-#define EMIT_mov(A, X) \
-	do {if (A != X) \
-		EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \
+/* mov dst, src */
+#define EMIT_mov(DST, SRC) \
+	do {if (DST != SRC) \
+		EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \
 	} while (0)
 
 static int bpf_size_to_x86_bytes(int bpf_size)
@@ -194,16 +194,16 @@ static inline u8 add_2mod(u8 byte, u32 r1, u32 r2)
 	return byte;
 }
 
-/* encode dest register 'a_reg' into x64 opcode 'byte' */
-static inline u8 add_1reg(u8 byte, u32 a_reg)
+/* encode 'dst_reg' register into x64 opcode 'byte' */
+static inline u8 add_1reg(u8 byte, u32 dst_reg)
 {
-	return byte + reg2hex[a_reg];
+	return byte + reg2hex[dst_reg];
 }
 
-/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */
-static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg)
+/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */
+static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 {
-	return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3);
+	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
 struct jit_context {
@@ -286,9 +286,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 	}
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		const s32 K = insn->imm;
-		u32 a_reg = insn->a_reg;
-		u32 x_reg = insn->x_reg;
+		const s32 imm32 = insn->imm;
+		u32 dst_reg = insn->dst_reg;
+		u32 src_reg = insn->src_reg;
 		u8 b1 = 0, b2 = 0, b3 = 0;
 		s64 jmp_offset;
 		u8 jmp_cond;
@@ -315,32 +315,32 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			case BPF_XOR: b2 = 0x31; break;
 			}
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_2mod(0x48, a_reg, x_reg));
-			else if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT1(add_2mod(0x40, a_reg, x_reg));
-			EMIT2(b2, add_2reg(0xC0, a_reg, x_reg));
+				EMIT1(add_2mod(0x48, dst_reg, src_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* mov A, X */
+			/* mov dst, src */
 		case BPF_ALU64 | BPF_MOV | BPF_X:
-			EMIT_mov(a_reg, x_reg);
+			EMIT_mov(dst_reg, src_reg);
 			break;
 
-			/* mov32 A, X */
+			/* mov32 dst, src */
 		case BPF_ALU | BPF_MOV | BPF_X:
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT1(add_2mod(0x40, a_reg, x_reg));
-			EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg));
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, dst_reg, src_reg));
+			EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg));
 			break;
 
-			/* neg A */
+			/* neg dst */
 		case BPF_ALU | BPF_NEG:
 		case BPF_ALU64 | BPF_NEG:
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_1mod(0x48, a_reg));
-			else if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
-			EMIT2(0xF7, add_1reg(0xD8, a_reg));
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+			EMIT2(0xF7, add_1reg(0xD8, dst_reg));
 			break;
 
 		case BPF_ALU | BPF_ADD | BPF_K:
@@ -354,9 +354,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_OR | BPF_K:
 		case BPF_ALU64 | BPF_XOR | BPF_K:
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_1mod(0x48, a_reg));
-			else if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
 
 			switch (BPF_OP(insn->code)) {
 			case BPF_ADD: b3 = 0xC0; break;
@@ -366,10 +366,10 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			case BPF_XOR: b3 = 0xF0; break;
 			}
 
-			if (is_imm8(K))
-				EMIT3(0x83, add_1reg(b3, a_reg), K);
+			if (is_imm8(imm32))
+				EMIT3(0x83, add_1reg(b3, dst_reg), imm32);
 			else
-				EMIT2_off32(0x81, add_1reg(b3, a_reg), K);
+				EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU64 | BPF_MOV | BPF_K:
@@ -377,23 +377,23 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			 * use 'mov eax, imm32' (which zero-extends imm32)
 			 * to save 2 bytes
 			 */
-			if (K < 0) {
+			if (imm32 < 0) {
 				/* 'mov rax, imm32' sign extends imm32 */
-				b1 = add_1mod(0x48, a_reg);
+				b1 = add_1mod(0x48, dst_reg);
 				b2 = 0xC7;
 				b3 = 0xC0;
-				EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K);
+				EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32);
 				break;
 			}
 
 		case BPF_ALU | BPF_MOV | BPF_K:
 			/* mov %eax, imm32 */
-			if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
-			EMIT1_off32(add_1reg(0xB8, a_reg), K);
+			if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
+			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 			break;
 
-			/* A %= X, A /= X, A %= K, A /= K */
+			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 		case BPF_ALU | BPF_MOD | BPF_X:
 		case BPF_ALU | BPF_DIV | BPF_X:
 		case BPF_ALU | BPF_MOD | BPF_K:
@@ -406,14 +406,14 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x52); /* push rdx */
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov r11, X */
-				EMIT_mov(AUX_REG, x_reg);
+				/* mov r11, src_reg */
+				EMIT_mov(AUX_REG, src_reg);
 			else
-				/* mov r11, K */
-				EMIT3_off32(0x49, 0xC7, 0xC3, K);
+				/* mov r11, imm32 */
+				EMIT3_off32(0x49, 0xC7, 0xC3, imm32);
 
-			/* mov rax, A */
-			EMIT_mov(BPF_REG_0, a_reg);
+			/* mov rax, dst_reg */
+			EMIT_mov(BPF_REG_0, dst_reg);
 
 			/* xor edx, edx
 			 * equivalent to 'xor rdx, rdx', but one byte less
@@ -421,7 +421,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT2(0x31, 0xd2);
 
 			if (BPF_SRC(insn->code) == BPF_X) {
-				/* if (X == 0) return 0 */
+				/* if (src_reg == 0) return 0 */
 
 				/* cmp r11, 0 */
 				EMIT4(0x49, 0x83, 0xFB, 0x00);
@@ -457,8 +457,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x5A); /* pop rdx */
 			EMIT1(0x58); /* pop rax */
 
-			/* mov A, r11 */
-			EMIT_mov(a_reg, AUX_REG);
+			/* mov dst_reg, r11 */
+			EMIT_mov(dst_reg, AUX_REG);
 			break;
 
 		case BPF_ALU | BPF_MUL | BPF_K:
@@ -468,15 +468,15 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x50); /* push rax */
 			EMIT1(0x52); /* push rdx */
 
-			/* mov r11, A */
-			EMIT_mov(AUX_REG, a_reg);
+			/* mov r11, dst_reg */
+			EMIT_mov(AUX_REG, dst_reg);
 
 			if (BPF_SRC(insn->code) == BPF_X)
-				/* mov rax, X */
-				EMIT_mov(BPF_REG_0, x_reg);
+				/* mov rax, src_reg */
+				EMIT_mov(BPF_REG_0, src_reg);
 			else
-				/* mov rax, K */
-				EMIT3_off32(0x48, 0xC7, 0xC0, K);
+				/* mov rax, imm32 */
+				EMIT3_off32(0x48, 0xC7, 0xC0, imm32);
 
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
 				EMIT1(add_1mod(0x48, AUX_REG));
@@ -491,8 +491,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 			EMIT1(0x5A); /* pop rdx */
 			EMIT1(0x58); /* pop rax */
 
-			/* mov A, r11 */
-			EMIT_mov(a_reg, AUX_REG);
+			/* mov dst_reg, r11 */
+			EMIT_mov(dst_reg, AUX_REG);
 			break;
 
 			/* shifts */
@@ -503,39 +503,39 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU64 | BPF_RSH | BPF_K:
 		case BPF_ALU64 | BPF_ARSH | BPF_K:
 			if (BPF_CLASS(insn->code) == BPF_ALU64)
-				EMIT1(add_1mod(0x48, a_reg));
-			else if (is_ereg(a_reg))
-				EMIT1(add_1mod(0x40, a_reg));
+				EMIT1(add_1mod(0x48, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_1mod(0x40, dst_reg));
 
 			switch (BPF_OP(insn->code)) {
 			case BPF_LSH: b3 = 0xE0; break;
 			case BPF_RSH: b3 = 0xE8; break;
 			case BPF_ARSH: b3 = 0xF8; break;
 			}
-			EMIT3(0xC1, add_1reg(b3, a_reg), K);
+			EMIT3(0xC1, add_1reg(b3, dst_reg), imm32);
 			break;
 
 		case BPF_ALU | BPF_END | BPF_FROM_BE:
-			switch (K) {
+			switch (imm32) {
 			case 16:
 				/* emit 'ror %ax, 8' to swap lower 2 bytes */
 				EMIT1(0x66);
-				if (is_ereg(a_reg))
+				if (is_ereg(dst_reg))
 					EMIT1(0x41);
-				EMIT3(0xC1, add_1reg(0xC8, a_reg), 8);
+				EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8);
 				break;
 			case 32:
 				/* emit 'bswap eax' to swap lower 4 bytes */
-				if (is_ereg(a_reg))
+				if (is_ereg(dst_reg))
 					EMIT2(0x41, 0x0F);
 				else
 					EMIT1(0x0F);
-				EMIT1(add_1reg(0xC8, a_reg));
+				EMIT1(add_1reg(0xC8, dst_reg));
 				break;
 			case 64:
 				/* emit 'bswap rax' to swap 8 bytes */
-				EMIT3(add_1mod(0x48, a_reg), 0x0F,
-				      add_1reg(0xC8, a_reg));
+				EMIT3(add_1mod(0x48, dst_reg), 0x0F,
+				      add_1reg(0xC8, dst_reg));
 				break;
 			}
 			break;
@@ -543,117 +543,117 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image,
 		case BPF_ALU | BPF_END | BPF_FROM_LE:
 			break;
 
-			/* ST: *(u8*)(a_reg + off) = imm */
+			/* ST: *(u8*)(dst_reg + off) = imm */
 		case BPF_ST | BPF_MEM | BPF_B:
-			if (is_ereg(a_reg))
+			if (is_ereg(dst_reg))
 				EMIT2(0x41, 0xC6);
 			else
 				EMIT1(0xC6);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_H:
-			if (is_ereg(a_reg))
+			if (is_ereg(dst_reg))
 				EMIT3(0x66, 0x41, 0xC7);
 			else
 				EMIT2(0x66, 0xC7);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_W:
-			if (is_ereg(a_reg))
+			if (is_ereg(dst_reg))
 				EMIT2(0x41, 0xC7);
 			else
 				EMIT1(0xC7);
 			goto st;
 		case BPF_ST | BPF_MEM | BPF_DW:
-			EMIT2(add_1mod(0x48, a_reg), 0xC7);
+			EMIT2(add_1mod(0x48, dst_reg), 0xC7);
 
 st:			if (is_imm8(insn->off))
-				EMIT2(add_1reg(0x40, a_reg), insn->off);
+				EMIT2(add_1reg(0x40, dst_reg), insn->off);
 			else
-				EMIT1_off32(add_1reg(0x80, a_reg), insn->off);
+				EMIT1_off32(add_1reg(0x80, dst_reg), insn->off);
 
-			EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
+			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code)));
 			break;
 
-			/* STX: *(u8*)(a_reg + off) = x_reg */
+			/* STX: *(u8*)(dst_reg + off) = src_reg */
 		case BPF_STX | BPF_MEM | BPF_B:
 			/* emit 'mov byte ptr [rax + off], al' */
-			if (is_ereg(a_reg) || is_ereg(x_reg) ||
+			if (is_ereg(dst_reg) || is_ereg(src_reg) ||
 			    /* have to add extra byte for x86 SIL, DIL regs */
-			    x_reg == BPF_REG_1 || x_reg == BPF_REG_2)
-				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88);
+			    src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
 			else
 				EMIT1(0x88);
 			goto stx;
 		case BPF_STX | BPF_MEM | BPF_H:
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
 			else
 				EMIT2(0x66, 0x89);
 			goto stx;
 		case BPF_STX | BPF_MEM | BPF_W:
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
 			else
 				EMIT1(0x89);
 			goto stx;
 		case BPF_STX | BPF_MEM | BPF_DW:
-			EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89);
+			EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
 stx:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 			else
-				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 					    insn->off);
 			break;
 
-			/* LDX: a_reg = *(u8*)(x_reg + off) */
+			/* LDX: dst_reg = *(u8*)(src_reg + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
 			/* emit 'movzx rax, byte ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6);
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_H:
 			/* emit 'movzx rax, word ptr [rax + off]' */
-			EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7);
+			EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_W:
 			/* emit 'mov eax, dword ptr [rax+0x14]' */
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
 			else
 				EMIT1(0x8B);
 			goto ldx;
 		case BPF_LDX | BPF_MEM | BPF_DW:
 			/* emit 'mov rax, qword ptr [rax+0x14]' */
-			EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B);
+			EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
 ldx:			/* if insn->off == 0 we can save one extra byte, but
 			 * special case of x86 r13 which always needs an offset
 			 * is not worth the hassle
 			 */
 			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off);
+				EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
 			else
-				EMIT1_off32(add_2reg(0x80, x_reg, a_reg),
+				EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
 					    insn->off);
 			break;
 
-			/* STX XADD: lock *(u32*)(a_reg + off) += x_reg */
+			/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
 		case BPF_STX | BPF_XADD | BPF_W:
 			/* emit 'lock add dword ptr [rax + off], eax' */
-			if (is_ereg(a_reg) || is_ereg(x_reg))
-				EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01);
+			if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01);
 			else
 				EMIT2(0xF0, 0x01);
 			goto xadd;
 		case BPF_STX | BPF_XADD | BPF_DW:
-			EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01);
+			EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01);
 xadd:			if (is_imm8(insn->off))
-				EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off);
+				EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
 			else
-				EMIT1_off32(add_2reg(0x80, a_reg, x_reg),
+				EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
 					    insn->off);
 			break;
 
 			/* call */
 		case BPF_JMP | BPF_CALL:
-			func = (u8 *) __bpf_call_base + K;
+			func = (u8 *) __bpf_call_base + imm32;
 			jmp_offset = func - (image + addrs[i]);
 			if (ctx->seen_ld_abs) {
 				EMIT2(0x41, 0x52); /* push %r10 */
@@ -663,9 +663,9 @@ xadd:			if (is_imm8(insn->off))
 				 */
 				jmp_offset += 4;
 			}
-			if (!K || !is_simm32(jmp_offset)) {
+			if (!imm32 || !is_simm32(jmp_offset)) {
 				pr_err("unsupported bpf func %d addr %p image %p\n",
-				       K, func, image);
+				       imm32, func, image);
 				return -EINVAL;
 			}
 			EMIT1_off32(0xE8, jmp_offset);
@@ -682,21 +682,21 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JGE | BPF_X:
 		case BPF_JMP | BPF_JSGT | BPF_X:
 		case BPF_JMP | BPF_JSGE | BPF_X:
-			/* cmp a_reg, x_reg */
-			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39,
-			      add_2reg(0xC0, a_reg, x_reg));
+			/* cmp dst_reg, src_reg */
+			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39,
+			      add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_X:
-			/* test a_reg, x_reg */
-			EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85,
-			      add_2reg(0xC0, a_reg, x_reg));
+			/* test dst_reg, src_reg */
+			EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85,
+			      add_2reg(0xC0, dst_reg, src_reg));
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JSET | BPF_K:
-			/* test a_reg, imm32 */
-			EMIT1(add_1mod(0x48, a_reg));
-			EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K);
+			/* test dst_reg, imm32 */
+			EMIT1(add_1mod(0x48, dst_reg));
+			EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32);
 			goto emit_cond_jmp;
 
 		case BPF_JMP | BPF_JEQ | BPF_K:
@@ -705,13 +705,13 @@ xadd:			if (is_imm8(insn->off))
 		case BPF_JMP | BPF_JGE | BPF_K:
 		case BPF_JMP | BPF_JSGT | BPF_K:
 		case BPF_JMP | BPF_JSGE | BPF_K:
-			/* cmp a_reg, imm8/32 */
-			EMIT1(add_1mod(0x48, a_reg));
+			/* cmp dst_reg, imm8/32 */
+			EMIT1(add_1mod(0x48, dst_reg));
 
-			if (is_imm8(K))
-				EMIT3(0x83, add_1reg(0xF8, a_reg), K);
+			if (is_imm8(imm32))
+				EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32);
 			else
-				EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K);
+				EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32);
 
 emit_cond_jmp:		/* convert BPF opcode to x86 */
 			switch (BPF_OP(insn->code)) {
@@ -773,27 +773,27 @@ emit_jmp:
 			func = sk_load_word;
 			goto common_load;
 		case BPF_LD | BPF_ABS | BPF_W:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_word);
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
 common_load:		ctx->seen_ld_abs = true;
 			jmp_offset = func - (image + addrs[i]);
 			if (!func || !is_simm32(jmp_offset)) {
 				pr_err("unsupported bpf func %d addr %p image %p\n",
-				       K, func, image);
+				       imm32, func, image);
 				return -EINVAL;
 			}
 			if (BPF_MODE(insn->code) == BPF_ABS) {
 				/* mov %esi, imm32 */
-				EMIT1_off32(0xBE, K);
+				EMIT1_off32(0xBE, imm32);
 			} else {
-				/* mov %rsi, x_reg */
-				EMIT_mov(BPF_REG_2, x_reg);
-				if (K) {
-					if (is_imm8(K))
+				/* mov %rsi, src_reg */
+				EMIT_mov(BPF_REG_2, src_reg);
+				if (imm32) {
+					if (is_imm8(imm32))
 						/* add %esi, imm8 */
-						EMIT3(0x83, 0xC6, K);
+						EMIT3(0x83, 0xC6, imm32);
 					else
 						/* add %esi, imm32 */
-						EMIT2_off32(0x81, 0xC6, K);
+						EMIT2_off32(0x81, 0xC6, imm32);
 				}
 			}
 			/* skb pointer is in R6 (%rbx), it will be copied into
@@ -808,13 +808,13 @@ common_load:		ctx->seen_ld_abs = true;
 			func = sk_load_half;
 			goto common_load;
 		case BPF_LD | BPF_ABS | BPF_H:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_half);
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
 			goto common_load;
 		case BPF_LD | BPF_IND | BPF_B:
 			func = sk_load_byte;
 			goto common_load;
 		case BPF_LD | BPF_ABS | BPF_B:
-			func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
+			func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
 			goto common_load;
 
 		case BPF_JMP | BPF_EXIT:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index f0c2ad43b4af..a7e3c48d73a7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -78,161 +78,173 @@ enum {
 
 /* Helper macros for filter block array initializers. */
 
-/* ALU ops on registers, bpf_add|sub|...: A += X */
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
 
-#define BPF_ALU64_REG(OP, A, X)					\
+#define BPF_ALU64_REG(OP, DST, SRC)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-#define BPF_ALU32_REG(OP, A, X)					\
+#define BPF_ALU32_REG(OP, DST, SRC)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-/* ALU ops on immediates, bpf_add|sub|...: A += IMM */
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
 
-#define BPF_ALU64_IMM(OP, A, IMM)				\
+#define BPF_ALU64_IMM(OP, DST, IMM)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-#define BPF_ALU32_IMM(OP, A, IMM)				\
+#define BPF_ALU32_IMM(OP, DST, IMM)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
 /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
 
-#define BPF_ENDIAN(TYPE, A, LEN)				\
+#define BPF_ENDIAN(TYPE, DST, LEN)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = LEN })
 
-/* Short form of mov, A = X */
+/* Short form of mov, dst_reg = src_reg */
 
-#define BPF_MOV64_REG(A, X)					\
+#define BPF_MOV64_REG(DST, SRC)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-#define BPF_MOV32_REG(A, X)					\
+#define BPF_MOV32_REG(DST, SRC)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
-/* Short form of mov, A = IMM */
+/* Short form of mov, dst_reg = imm32 */
 
-#define BPF_MOV64_IMM(A, IMM)					\
+#define BPF_MOV64_IMM(DST, IMM)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-#define BPF_MOV32_IMM(A, IMM)					\
+#define BPF_MOV32_IMM(DST, IMM)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-/* Short form of mov based on type, BPF_X: A = X,  BPF_K: A = IMM */
+/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
-#define BPF_MOV64_RAW(TYPE, A, X, IMM)				\
+#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-#define BPF_MOV32_RAW(TYPE, A, X, IMM)				\
+#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
 		.imm   = IMM })
 
-/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
 
-#define BPF_LD_ABS(SIZE, OFF)					\
+#define BPF_LD_ABS(SIZE, IMM)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
-		.a_reg = 0,					\
-		.x_reg = 0,					\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
-		.imm   = OFF })
+		.imm   = IMM })
 
-/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */
+/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
 
-#define BPF_LD_IND(SIZE, X, OFF)				\
+#define BPF_LD_IND(SIZE, SRC, IMM)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
-		.a_reg = 0,					\
-		.x_reg = X,					\
+		.dst_reg = 0,					\
+		.src_reg = SRC,					\
 		.off   = 0,					\
-		.imm   = OFF })
+		.imm   = IMM })
 
-/* Memory store, A = *(uint *) (X + OFF), and vice versa */
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
 
-#define BPF_LDX_MEM(SIZE, A, X, OFF)				\
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = 0 })
 
-#define BPF_STX_MEM(SIZE, A, X, OFF)				\
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
 	((struct sock_filter_int) {				\
 		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = 0 })
 
-/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
+	((struct sock_filter_int) {				\
+		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
+		.off   = OFF,					\
+		.imm   = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
 
-#define BPF_JMP_REG(OP, A, X, OFF)				\
+#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = 0 })
 
-/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 
-#define BPF_JMP_IMM(OP, A, IMM, OFF)				\
+#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
-		.a_reg = A,					\
-		.x_reg = 0,					\
+		.dst_reg = DST,					\
+		.src_reg = 0,					\
 		.off   = OFF,					\
 		.imm   = IMM })
 
@@ -241,18 +253,18 @@ enum {
 #define BPF_EMIT_CALL(FUNC)					\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_CALL,			\
-		.a_reg = 0,					\
-		.x_reg = 0,					\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = ((FUNC) - __bpf_call_base) })
 
 /* Raw code statement block */
 
-#define BPF_RAW_INSN(CODE, A, X, OFF, IMM)			\
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
 	((struct sock_filter_int) {				\
 		.code  = CODE,					\
-		.a_reg = A,					\
-		.x_reg = X,					\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
 		.off   = OFF,					\
 		.imm   = IMM })
 
@@ -261,8 +273,8 @@ enum {
 #define BPF_EXIT_INSN()						\
 	((struct sock_filter_int) {				\
 		.code  = BPF_JMP | BPF_EXIT,			\
-		.a_reg = 0,					\
-		.x_reg = 0,					\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
 		.off   = 0,					\
 		.imm   = 0 })
 
@@ -287,8 +299,8 @@ enum {
 
 struct sock_filter_int {
 	__u8	code;		/* opcode */
-	__u8	a_reg:4;	/* dest register */
-	__u8	x_reg:4;	/* source register */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
 	__s16	off;		/* signed offset */
 	__s32	imm;		/* signed immediate constant */
 };
diff --git a/net/core/filter.c b/net/core/filter.c
index 6bd2e350e751..b3f21751b238 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -59,12 +59,12 @@
 #define BPF_R10	regs[BPF_REG_10]
 
 /* Named registers */
-#define A	regs[insn->a_reg]
-#define X	regs[insn->x_reg]
+#define DST	regs[insn->dst_reg]
+#define SRC	regs[insn->src_reg]
 #define FP	regs[BPF_REG_FP]
 #define ARG1	regs[BPF_REG_ARG1]
 #define CTX	regs[BPF_REG_CTX]
-#define K	insn->imm
+#define IMM	insn->imm
 
 /* No hurry in this branch
  *
@@ -264,7 +264,7 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins
 	FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
 	ARG1 = (u64) (unsigned long) ctx;
 
-	/* Register for user BPF programs need to be reset first. */
+	/* Registers used in classic BPF programs need to be reset first. */
 	regs[BPF_REG_A] = 0;
 	regs[BPF_REG_X] = 0;
 
@@ -274,16 +274,16 @@ select_insn:
 	/* ALU */
 #define ALU(OPCODE, OP)			\
 	ALU64_##OPCODE##_X:		\
-		A = A OP X;		\
+		DST = DST OP SRC;	\
 		CONT;			\
 	ALU_##OPCODE##_X:		\
-		A = (u32) A OP (u32) X;	\
+		DST = (u32) DST OP (u32) SRC;	\
 		CONT;			\
 	ALU64_##OPCODE##_K:		\
-		A = A OP K;		\
+		DST = DST OP IMM;		\
 		CONT;			\
 	ALU_##OPCODE##_K:		\
-		A = (u32) A OP (u32) K;	\
+		DST = (u32) DST OP (u32) IMM;	\
 		CONT;
 
 	ALU(ADD,  +)
@@ -296,92 +296,92 @@ select_insn:
 	ALU(MUL,  *)
 #undef ALU
 	ALU_NEG:
-		A = (u32) -A;
+		DST = (u32) -DST;
 		CONT;
 	ALU64_NEG:
-		A = -A;
+		DST = -DST;
 		CONT;
 	ALU_MOV_X:
-		A = (u32) X;
+		DST = (u32) SRC;
 		CONT;
 	ALU_MOV_K:
-		A = (u32) K;
+		DST = (u32) IMM;
 		CONT;
 	ALU64_MOV_X:
-		A = X;
+		DST = SRC;
 		CONT;
 	ALU64_MOV_K:
-		A = K;
+		DST = IMM;
 		CONT;
 	ALU64_ARSH_X:
-		(*(s64 *) &A) >>= X;
+		(*(s64 *) &DST) >>= SRC;
 		CONT;
 	ALU64_ARSH_K:
-		(*(s64 *) &A) >>= K;
+		(*(s64 *) &DST) >>= IMM;
 		CONT;
 	ALU64_MOD_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = A;
-		A = do_div(tmp, X);
+		tmp = DST;
+		DST = do_div(tmp, SRC);
 		CONT;
 	ALU_MOD_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = (u32) A;
-		A = do_div(tmp, (u32) X);
+		tmp = (u32) DST;
+		DST = do_div(tmp, (u32) SRC);
 		CONT;
 	ALU64_MOD_K:
-		tmp = A;
-		A = do_div(tmp, K);
+		tmp = DST;
+		DST = do_div(tmp, IMM);
 		CONT;
 	ALU_MOD_K:
-		tmp = (u32) A;
-		A = do_div(tmp, (u32) K);
+		tmp = (u32) DST;
+		DST = do_div(tmp, (u32) IMM);
 		CONT;
 	ALU64_DIV_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		do_div(A, X);
+		do_div(DST, SRC);
 		CONT;
 	ALU_DIV_X:
-		if (unlikely(X == 0))
+		if (unlikely(SRC == 0))
 			return 0;
-		tmp = (u32) A;
-		do_div(tmp, (u32) X);
-		A = (u32) tmp;
+		tmp = (u32) DST;
+		do_div(tmp, (u32) SRC);
+		DST = (u32) tmp;
 		CONT;
 	ALU64_DIV_K:
-		do_div(A, K);
+		do_div(DST, IMM);
 		CONT;
 	ALU_DIV_K:
-		tmp = (u32) A;
-		do_div(tmp, (u32) K);
-		A = (u32) tmp;
+		tmp = (u32) DST;
+		do_div(tmp, (u32) IMM);
+		DST = (u32) tmp;
 		CONT;
 	ALU_END_TO_BE:
-		switch (K) {
+		switch (IMM) {
 		case 16:
-			A = (__force u16) cpu_to_be16(A);
+			DST = (__force u16) cpu_to_be16(DST);
 			break;
 		case 32:
-			A = (__force u32) cpu_to_be32(A);
+			DST = (__force u32) cpu_to_be32(DST);
 			break;
 		case 64:
-			A = (__force u64) cpu_to_be64(A);
+			DST = (__force u64) cpu_to_be64(DST);
 			break;
 		}
 		CONT;
 	ALU_END_TO_LE:
-		switch (K) {
+		switch (IMM) {
 		case 16:
-			A = (__force u16) cpu_to_le16(A);
+			DST = (__force u16) cpu_to_le16(DST);
 			break;
 		case 32:
-			A = (__force u32) cpu_to_le32(A);
+			DST = (__force u32) cpu_to_le32(DST);
 			break;
 		case 64:
-			A = (__force u64) cpu_to_le64(A);
+			DST = (__force u64) cpu_to_le64(DST);
 			break;
 		}
 		CONT;
@@ -401,85 +401,85 @@ select_insn:
 		insn += insn->off;
 		CONT;
 	JMP_JEQ_X:
-		if (A == X) {
+		if (DST == SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JEQ_K:
-		if (A == K) {
+		if (DST == IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JNE_X:
-		if (A != X) {
+		if (DST != SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JNE_K:
-		if (A != K) {
+		if (DST != IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGT_X:
-		if (A > X) {
+		if (DST > SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGT_K:
-		if (A > K) {
+		if (DST > IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGE_X:
-		if (A >= X) {
+		if (DST >= SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JGE_K:
-		if (A >= K) {
+		if (DST >= IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGT_X:
-		if (((s64) A) > ((s64) X)) {
+		if (((s64) DST) > ((s64) SRC)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGT_K:
-		if (((s64) A) > ((s64) K)) {
+		if (((s64) DST) > ((s64) IMM)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGE_X:
-		if (((s64) A) >= ((s64) X)) {
+		if (((s64) DST) >= ((s64) SRC)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSGE_K:
-		if (((s64) A) >= ((s64) K)) {
+		if (((s64) DST) >= ((s64) IMM)) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSET_X:
-		if (A & X) {
+		if (DST & SRC) {
 			insn += insn->off;
 			CONT_JMP;
 		}
 		CONT;
 	JMP_JSET_K:
-		if (A & K) {
+		if (DST & IMM) {
 			insn += insn->off;
 			CONT_JMP;
 		}
@@ -488,15 +488,15 @@ select_insn:
 		return BPF_R0;
 
 	/* STX and ST and LDX*/
-#define LDST(SIZEOP, SIZE)					\
-	STX_MEM_##SIZEOP:					\
-		*(SIZE *)(unsigned long) (A + insn->off) = X;	\
-		CONT;						\
-	ST_MEM_##SIZEOP:					\
-		*(SIZE *)(unsigned long) (A + insn->off) = K;	\
-		CONT;						\
-	LDX_MEM_##SIZEOP:					\
-		A = *(SIZE *)(unsigned long) (X + insn->off);	\
+#define LDST(SIZEOP, SIZE)						\
+	STX_MEM_##SIZEOP:						\
+		*(SIZE *)(unsigned long) (DST + insn->off) = SRC;	\
+		CONT;							\
+	ST_MEM_##SIZEOP:						\
+		*(SIZE *)(unsigned long) (DST + insn->off) = IMM;	\
+		CONT;							\
+	LDX_MEM_##SIZEOP:						\
+		DST = *(SIZE *)(unsigned long) (SRC + insn->off);	\
 		CONT;
 
 	LDST(B,   u8)
@@ -504,16 +504,16 @@ select_insn:
 	LDST(W,  u32)
 	LDST(DW, u64)
 #undef LDST
-	STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */
-		atomic_add((u32) X, (atomic_t *)(unsigned long)
-			   (A + insn->off));
+	STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
+		atomic_add((u32) SRC, (atomic_t *)(unsigned long)
+			   (DST + insn->off));
 		CONT;
-	STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */
-		atomic64_add((u64) X, (atomic64_t *)(unsigned long)
-			     (A + insn->off));
+	STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
+		atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
+			     (DST + insn->off));
 		CONT;
-	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */
-		off = K;
+	LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
+		off = IMM;
 load_word:
 		/* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
 		 * only appearing in the programs where ctx ==
@@ -527,51 +527,51 @@ load_word:
 		 * BPF_R6-BPF_R9, and store return value into BPF_R0.
 		 *
 		 * Implicit input:
-		 *   ctx
+		 *   ctx == skb == BPF_R6 == CTX
 		 *
 		 * Explicit input:
-		 *   X == any register
-		 *   K == 32-bit immediate
+		 *   SRC == any register
+		 *   IMM == 32-bit immediate
 		 *
 		 * Output:
 		 *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
 		 */
 
-		ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp);
+		ptr = load_pointer((struct sk_buff *) CTX, off, 4, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be32(ptr);
 			CONT;
 		}
 
 		return 0;
-	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */
-		off = K;
+	LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
+		off = IMM;
 load_half:
-		ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp);
+		ptr = load_pointer((struct sk_buff *) CTX, off, 2, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = get_unaligned_be16(ptr);
 			CONT;
 		}
 
 		return 0;
-	LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */
-		off = K;
+	LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
+		off = IMM;
 load_byte:
-		ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp);
+		ptr = load_pointer((struct sk_buff *) CTX, off, 1, &tmp);
 		if (likely(ptr != NULL)) {
 			BPF_R0 = *(u8 *)ptr;
 			CONT;
 		}
 
 		return 0;
-	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */
-		off = K + X;
+	LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
+		off = IMM + SRC;
 		goto load_word;
-	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */
-		off = K + X;
+	LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
+		off = IMM + SRC;
 		goto load_half;
-	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */
-		off = K + X;
+	LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
+		off = IMM + SRC;
 		goto load_byte;
 
 	default_label:
@@ -675,7 +675,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
 
-		/* A = *(u16 *) (ctx + offsetof(protocol)) */
+		/* A = *(u16 *) (CTX + offsetof(protocol)) */
 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
 				      offsetof(struct sk_buff, protocol));
 		/* A = ntohs(A) [emitting a nop or swap16] */
@@ -741,7 +741,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
 		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
 
-		/* A = *(u16 *) (ctx + offsetof(vlan_tci)) */
+		/* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
 		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
 				      offsetof(struct sk_buff, vlan_tci));
 		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
@@ -760,13 +760,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
 	case SKF_AD_OFF + SKF_AD_CPU:
 	case SKF_AD_OFF + SKF_AD_RANDOM:
-		/* arg1 = ctx */
+		/* arg1 = CTX */
 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
 		/* arg2 = A */
 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
 		/* arg3 = X */
 		*insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
-		/* Emit call(ctx, arg2=A, arg3=X) */
+		/* Emit call(arg1=CTX, arg2=A, arg3=X) */
 		switch (fp->k) {
 		case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
 			*insn = BPF_EMIT_CALL(__skb_get_pay_offset);
@@ -941,12 +941,12 @@ do_pass:
 				 */
 				*insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
 
-				insn->a_reg = BPF_REG_A;
-				insn->x_reg = BPF_REG_TMP;
+				insn->dst_reg = BPF_REG_A;
+				insn->src_reg = BPF_REG_TMP;
 				bpf_src = BPF_X;
 			} else {
-				insn->a_reg = BPF_REG_A;
-				insn->x_reg = BPF_REG_X;
+				insn->dst_reg = BPF_REG_A;
+				insn->src_reg = BPF_REG_X;
 				insn->imm = fp->k;
 				bpf_src = BPF_SRC(fp->code);
 			}
-- 
cgit 


From 67cb9366ff5f99868100198efba5ca88aaa6ad25 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 11 Jun 2014 18:19:28 +0200
Subject: ktime: add ktime_after and ktime_before helper

Add two minimal helper functions analogous to time_before() and
time_after() that will later on both be needed by SCTP code.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ktime.h    | 24 ++++++++++++++++++++++++
 net/sctp/sm_make_chunk.c |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 31c0cd1c941a..de9e46e6bcc9 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -304,6 +304,30 @@ static inline int ktime_compare(const ktime_t cmp1, const ktime_t cmp2)
 	return 0;
 }
 
+/**
+ * ktime_after - Compare if a ktime_t value is bigger than another one.
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Return: true if cmp1 happened after cmp2.
+ */
+static inline bool ktime_after(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return ktime_compare(cmp1, cmp2) > 0;
+}
+
+/**
+ * ktime_before - Compare if a ktime_t value is smaller than another one.
+ * @cmp1:	comparable1
+ * @cmp2:	comparable2
+ *
+ * Return: true if cmp1 happened before cmp2.
+ */
+static inline bool ktime_before(const ktime_t cmp1, const ktime_t cmp2)
+{
+	return ktime_compare(cmp1, cmp2) < 0;
+}
+
 static inline s64 ktime_to_us(const ktime_t kt)
 {
 	struct timeval tv = ktime_to_timeval(kt);
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fee5552ddf92..ae0e616a7ca5 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1782,7 +1782,7 @@ no_hmac:
 	else
 		kt = ktime_get();
 
-	if (!asoc && ktime_compare(bear_cookie->expiration, kt) < 0) {
+	if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
 		/*
 		 * Section 3.3.10.3 Stale Cookie Error (3)
 		 *
-- 
cgit 


From 2940474af79744411da0cb63b041ad52c57bc443 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 11 Jun 2014 13:49:23 +0200
Subject: block: remove elv_abort_queue and blk_abort_flushes

elv_abort_queue has no callers, and blk_abort_flushes is only called by
elv_abort_queue.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c        | 38 --------------------------------------
 block/blk.h              |  1 -
 block/elevator.c         | 20 --------------------
 include/linux/elevator.h |  1 -
 4 files changed, 60 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 8ffee4b5f93d..3cb5e9e7108a 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -421,44 +421,6 @@ void blk_insert_flush(struct request *rq)
 	blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
 }
 
-/**
- * blk_abort_flushes - @q is being aborted, abort flush requests
- * @q: request_queue being aborted
- *
- * To be called from elv_abort_queue().  @q is being aborted.  Prepare all
- * FLUSH/FUA requests for abortion.
- *
- * CONTEXT:
- * spin_lock_irq(q->queue_lock)
- */
-void blk_abort_flushes(struct request_queue *q)
-{
-	struct request *rq, *n;
-	int i;
-
-	/*
-	 * Requests in flight for data are already owned by the dispatch
-	 * queue or the device driver.  Just restore for normal completion.
-	 */
-	list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
-		list_del_init(&rq->flush.list);
-		blk_flush_restore_request(rq);
-	}
-
-	/*
-	 * We need to give away requests on flush queues.  Restore for
-	 * normal completion and put them on the dispatch queue.
-	 */
-	for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
-		list_for_each_entry_safe(rq, n, &q->flush_queue[i],
-					 flush.list) {
-			list_del_init(&rq->flush.list);
-			blk_flush_restore_request(rq);
-			list_add_tail(&rq->queuelist, &q->queue_head);
-		}
-	}
-}
-
 /**
  * blkdev_issue_flush - queue a flush
  * @bdev:	blockdev to issue flush for
diff --git a/block/blk.h b/block/blk.h
index 45385e9abf6f..6748c4f8d7a1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq)
 #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED)
 
 void blk_insert_flush(struct request *rq);
-void blk_abort_flushes(struct request_queue *q);
 
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
diff --git a/block/elevator.c b/block/elevator.c
index f35edddfe9b5..34bded18910e 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw)
 	return ELV_MQUEUE_MAY;
 }
 
-void elv_abort_queue(struct request_queue *q)
-{
-	struct request *rq;
-
-	blk_abort_flushes(q);
-
-	while (!list_empty(&q->queue_head)) {
-		rq = list_entry_rq(q->queue_head.next);
-		rq->cmd_flags |= REQ_QUIET;
-		trace_block_rq_abort(q, rq);
-		/*
-		 * Mark this request as started so we don't trigger
-		 * any debug logic in the end I/O path.
-		 */
-		blk_start_request(rq);
-		__blk_end_request_all(rq, -EIO);
-	}
-}
-EXPORT_SYMBOL(elv_abort_queue);
-
 void elv_completed_request(struct request_queue *q, struct request *rq)
 {
 	struct elevator_queue *e = q->elevator;
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 4ff262e2bf37..e2a6bd7fb133 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request
 extern int elv_register_queue(struct request_queue *q);
 extern void elv_unregister_queue(struct request_queue *q);
 extern int elv_may_queue(struct request_queue *, int);
-extern void elv_abort_queue(struct request_queue *);
 extern void elv_completed_request(struct request_queue *, struct request *);
 extern int elv_set_request(struct request_queue *q, struct request *rq,
 			   struct bio *bio, gfp_t gfp_mask);
-- 
cgit 


From da91309e0a7e8966d916a74cce42ed170fde06bf Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Mon, 9 Jun 2014 10:24:38 +0300
Subject: cpumask: Utility function to set n'th cpu - local cpu first

This function sets the n'th cpu - local cpu's first.
For example: in a 16 cores server with even cpu's local, will get the
following values:
cpumask_set_cpu_local_first(0, numa, cpumask) => cpu 0 is set
cpumask_set_cpu_local_first(1, numa, cpumask) => cpu 2 is set
...
cpumask_set_cpu_local_first(7, numa, cpumask) => cpu 14 is set
cpumask_set_cpu_local_first(8, numa, cpumask) => cpu 1 is set
cpumask_set_cpu_local_first(9, numa, cpumask) => cpu 3 is set
...
cpumask_set_cpu_local_first(15, numa, cpumask) => cpu 15 is set

Curently this function will be used by multi queue networking devices to
calculate the irq affinity mask, such that as many local cpu's as
possible will be utilized to handle the mq device irq's.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cpumask.h |  8 +++++++
 lib/cpumask.c           | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d08e4d2a9b92..d5ef249735d2 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -142,6 +142,13 @@ static inline unsigned int cpumask_any_but(const struct cpumask *mask,
 	return 1;
 }
 
+static inline int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	set_bit(0, cpumask_bits(dstp));
+
+	return 0;
+}
+
 #define for_each_cpu(cpu, mask)			\
 	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
 #define for_each_cpu_not(cpu, mask)		\
@@ -192,6 +199,7 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
 
 int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
 int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp);
 
 /**
  * for_each_cpu - iterate over every cpu in a mask
diff --git a/lib/cpumask.c b/lib/cpumask.c
index b810b753c607..c101230658eb 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -164,3 +164,66 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 	memblock_free_early(__pa(mask), cpumask_size());
 }
 #endif
+
+/**
+ * cpumask_set_cpu_local_first - set i'th cpu with local numa cpu's first
+ *
+ * @i: index number
+ * @numa_node: local numa_node
+ * @dstp: cpumask with the relevant cpu bit set according to the policy
+ *
+ * This function sets the cpumask according to a numa aware policy.
+ * cpumask could be used as an affinity hint for the IRQ related to a
+ * queue. When the policy is to spread queues across cores - local cores
+ * first.
+ *
+ * Returns 0 on success, -ENOMEM for no memory, and -EAGAIN when failed to set
+ * the cpu bit and need to re-call the function.
+ */
+int cpumask_set_cpu_local_first(int i, int numa_node, cpumask_t *dstp)
+{
+	cpumask_var_t mask;
+	int cpu;
+	int ret = 0;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	i %= num_online_cpus();
+
+	if (!cpumask_of_node(numa_node)) {
+		/* Use all online cpu's for non numa aware system */
+		cpumask_copy(mask, cpu_online_mask);
+	} else {
+		int n;
+
+		cpumask_and(mask,
+			    cpumask_of_node(numa_node), cpu_online_mask);
+
+		n = cpumask_weight(mask);
+		if (i >= n) {
+			i -= n;
+
+			/* If index > number of local cpu's, mask out local
+			 * cpu's
+			 */
+			cpumask_andnot(mask, cpu_online_mask, mask);
+		}
+	}
+
+	for_each_cpu(cpu, mask) {
+		if (--i < 0)
+			goto out;
+	}
+
+	ret = -EAGAIN;
+
+out:
+	free_cpumask_var(mask);
+
+	if (!ret)
+		cpumask_set_cpu(cpu, dstp);
+
+	return ret;
+}
+EXPORT_SYMBOL(cpumask_set_cpu_local_first);
-- 
cgit 


From bad93e9d4eeb0d2d6b79204d6cedc7f2e7b256f1 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Thu, 12 Jun 2014 01:36:26 +0300
Subject: net: add __pskb_copy_fclone and pskb_copy_for_clone

There are several instances where a pskb_copy or __pskb_copy is
immediately followed by an skb_clone.

Add a couple of new functions to allow the copy skb to be allocated
from the fclone cache and thus speed up subsequent skb_clone calls.

Cc: Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: Marek Lindner <mareklindner@neomailbox.ch>
Cc: Simon Wunderlich <sw@simonwunderlich.de>
Cc: Antonio Quartulli <antonio@meshcoding.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: Arvid Brodin <arvid.brodin@alten.se>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Cc: Lauro Ramos Venancio <lauro.venancio@openbossa.org>
Cc: Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Cc: Andrew Hendry <andrew.hendry@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Reviewed-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                 | 16 +++++++++++++++-
 net/batman-adv/distributed-arp-table.c |  2 +-
 net/batman-adv/network-coding.c        |  2 +-
 net/bluetooth/hci_sock.c               |  6 +++---
 net/core/skbuff.c                      | 14 +++++++++-----
 net/nfc/llcp_core.c                    |  4 ++--
 net/nfc/rawsock.c                      |  4 ++--
 net/tipc/bcast.c                       |  2 +-
 8 files changed, 34 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c705808bef9c..1f50bfe2243d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -744,7 +744,13 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
 int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
 struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+				   gfp_t gfp_mask, bool fclone);
+static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
+					  gfp_t gfp_mask)
+{
+	return __pskb_copy_fclone(skb, headroom, gfp_mask, false);
+}
 
 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
 struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
@@ -2238,6 +2244,14 @@ static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
 	return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
 }
 
+
+static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb,
+						  gfp_t gfp_mask)
+{
+	return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true);
+}
+
+
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index dcd99b2bea3c..f2c066b21716 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -594,7 +594,7 @@ static bool batadv_dat_send_data(struct batadv_priv *bat_priv,
 		if (!neigh_node)
 			goto free_orig;
 
-		tmp_skb = pskb_copy(skb, GFP_ATOMIC);
+		tmp_skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
 		if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, tmp_skb,
 							   cand[i].orig_node,
 							   packet_subtype)) {
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 40a2fc4bcf4c..8d04d174669e 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -1344,7 +1344,7 @@ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv,
 	struct ethhdr *ethhdr;
 
 	/* Copy skb header to change the mac header */
-	skb = pskb_copy(skb, GFP_ATOMIC);
+	skb = pskb_copy_for_clone(skb, GFP_ATOMIC);
 	if (!skb)
 		return;
 
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f608bffdb8b9..80d25c150a65 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -143,7 +143,7 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb)
 
 		if (!skb_copy) {
 			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy(skb, 1, GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, 1, GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
@@ -247,8 +247,8 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
 			struct hci_mon_hdr *hdr;
 
 			/* Create a private copy with headroom */
-			skb_copy = __pskb_copy(skb, HCI_MON_HDR_SIZE,
-					       GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, HCI_MON_HDR_SIZE,
+						      GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 05f4bef2ce12..b9e85e6cb26a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -951,10 +951,13 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 EXPORT_SYMBOL(skb_copy);
 
 /**
- *	__pskb_copy	-	create copy of an sk_buff with private head.
+ *	__pskb_copy_fclone	-  create copy of an sk_buff with private head.
  *	@skb: buffer to copy
  *	@headroom: headroom of new skb
  *	@gfp_mask: allocation priority
+ *	@fclone: if true allocate the copy of the skb from the fclone
+ *	cache instead of the head cache; it is recommended to set this
+ *	to true for the cases where the copy will likely be cloned
  *
  *	Make a copy of both an &sk_buff and part of its data, located
  *	in header. Fragmented data remain shared. This is used when
@@ -964,11 +967,12 @@ EXPORT_SYMBOL(skb_copy);
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+				   gfp_t gfp_mask, bool fclone)
 {
 	unsigned int size = skb_headlen(skb) + headroom;
-	struct sk_buff *n = __alloc_skb(size, gfp_mask,
-					skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+	int flags = skb_alloc_rx_flag(skb) | (fclone ? SKB_ALLOC_FCLONE : 0);
+	struct sk_buff *n = __alloc_skb(size, gfp_mask, flags, NUMA_NO_NODE);
 
 	if (!n)
 		goto out;
@@ -1008,7 +1012,7 @@ struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
 out:
 	return n;
 }
-EXPORT_SYMBOL(__pskb_copy);
+EXPORT_SYMBOL(__pskb_copy_fclone);
 
 /**
  *	pskb_expand_head - reallocate header of &sk_buff
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index f6278da68763..51e788797317 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -680,8 +680,8 @@ void nfc_llcp_send_to_raw_sock(struct nfc_llcp_local *local,
 			continue;
 
 		if (skb_copy == NULL) {
-			skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE,
-					       GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+						      GFP_ATOMIC, true);
 
 			if (skb_copy == NULL)
 				continue;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 55eefee311eb..11c3544ea546 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -378,8 +378,8 @@ void nfc_send_to_raw_sock(struct nfc_dev *dev, struct sk_buff *skb,
 
 	sk_for_each(sk, &raw_sk_list.head) {
 		if (!skb_copy) {
-			skb_copy = __pskb_copy(skb, NFC_RAW_HEADER_SIZE,
-				     GFP_ATOMIC);
+			skb_copy = __pskb_copy_fclone(skb, NFC_RAW_HEADER_SIZE,
+						      GFP_ATOMIC, true);
 			if (!skb_copy)
 				continue;
 
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 671f9817b4f4..26631679a1fa 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -653,7 +653,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, struct tipc_bearer *unused1,
 			tipc_bearer_send(b->identity, buf, &b->bcast_addr);
 		} else {
 			/* Avoid concurrent buffer access */
-			tbuf = pskb_copy(buf, GFP_ATOMIC);
+			tbuf = pskb_copy_for_clone(buf, GFP_ATOMIC);
 			if (!tbuf)
 				break;
 			tipc_bearer_send(b->identity, tbuf, &b->bcast_addr);
-- 
cgit 


From 5d0c2b95bc57cf8fdc0e7b3e9d7e751eb65ad052 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 10 Jun 2014 18:54:13 -0700
Subject: net: Preserve CHECKSUM_COMPLETE at validation

Currently when the first checksum in a packet is validated using
CHECKSUM_COMPLETE, ip_summed is overwritten to be CHECKSUM_UNNECESSARY
so that any subsequent checksums in the packet are not correctly
validated.

This patch adds csum_valid flag in sk_buff and uses that to indicate
validated checksum instead of setting CHECKSUM_UNNECESSARY. The bit
is set accordingly in the skb_checksum_validate_* functions. The flag
is checked in skb_checksum_complete, so that validation is communicated
between checksum_init and checksum_complete sequence in TCP and UDP.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1f50bfe2243d..72a53805858a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -572,7 +572,8 @@ struct sk_buff {
 	 */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
-	/* 5/7 bit hole (depending on ndisc_nodetype presence) */
+	__u8			csum_valid:1;
+	/* 4/6 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
@@ -2735,7 +2736,7 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
 static inline int skb_csum_unnecessary(const struct sk_buff *skb)
 {
-	return skb->ip_summed & CHECKSUM_UNNECESSARY;
+	return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid);
 }
 
 /**
@@ -2769,10 +2770,8 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
 						  bool zero_okay,
 						  __sum16 check)
 {
-	if (skb_csum_unnecessary(skb)) {
-		return false;
-	} else if (zero_okay && !check) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
+		skb->csum_valid = 1;
 		return false;
 	}
 
@@ -2799,15 +2798,20 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 {
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
 		if (!csum_fold(csum_add(psum, skb->csum))) {
-			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			skb->csum_valid = 1;
 			return 0;
 		}
 	}
 
 	skb->csum = psum;
 
-	if (complete || skb->len <= CHECKSUM_BREAK)
-		return __skb_checksum_complete(skb);
+	if (complete || skb->len <= CHECKSUM_BREAK) {
+		__sum16 csum;
+
+		csum = __skb_checksum_complete(skb);
+		skb->csum_valid = !csum;
+		return csum;
+	}
 
 	return 0;
 }
@@ -2831,6 +2835,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 				zero_okay, check, compute_pseudo)	\
 ({									\
 	__sum16 __ret = 0;						\
+	skb->csum_valid = 0;						\
 	if (__skb_checksum_validate_needed(skb, zero_okay, check))	\
 		__ret = __skb_checksum_validate_complete(skb,		\
 				complete, compute_pseudo(skb, proto));	\
-- 
cgit 


From 7e3cead5172927732f51fde77fef6f521e22f209 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 10 Jun 2014 18:54:19 -0700
Subject: net: Save software checksum complete

In skb_checksum complete, if we need to compute the checksum for the
packet (via skb_checksum) save the result as CHECKSUM_COMPLETE.
Subsequent checksum verification can use this.

Also, added csum_complete_sw flag to distinguish between software and
hardware generated checksum complete, we should always be able to trust
the software computation.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 ++-
 net/core/datagram.c    | 14 +++++++++-----
 net/ipv4/gre_offload.c |  6 ++++--
 net/sunrpc/socklib.c   |  3 ++-
 4 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 72a53805858a..5b5cd3189c98 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -573,7 +573,8 @@ struct sk_buff {
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
-	/* 4/6 bit hole (depending on ndisc_nodetype presence) */
+	__u8			csum_complete_sw:1;
+	/* 3/5 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
diff --git a/net/core/datagram.c b/net/core/datagram.c
index a16ed7bbe376..6b1c04ca1d50 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -739,11 +739,15 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 	__sum16 sum;
 
 	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
-	if (likely(!sum)) {
-		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
-			netdev_rx_csum_fault(skb->dev);
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-	}
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !sum &&
+	    !skb->csum_complete_sw)
+		netdev_rx_csum_fault(skb->dev);
+
+	/* Save checksum complete for later use */
+	skb->csum = sum;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+	skb->csum_complete_sw = 1;
+
 	return sum;
 }
 EXPORT_SYMBOL(__skb_checksum_complete_head);
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 24deb3928b9e..eb92deb12666 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -131,10 +131,12 @@ static __sum16 gro_skb_checksum(struct sk_buff *skb)
 		csum_partial(skb->data, skb_gro_offset(skb), 0));
 	sum = csum_fold(NAPI_GRO_CB(skb)->csum);
 	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) {
-		if (unlikely(!sum))
+		if (unlikely(!sum) && !skb->csum_complete_sw)
 			netdev_rx_csum_fault(skb->dev);
-	} else
+	} else {
 		skb->ip_summed = CHECKSUM_COMPLETE;
+		skb->csum_complete_sw = 1;
+	}
 
 	return sum;
 }
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 0a648c502fc3..2df87f78e518 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -173,7 +173,8 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 		return -1;
 	if (csum_fold(desc.csum))
 		return -1;
-	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
+	if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !skb->csum_complete_sw)
 		netdev_rx_csum_fault(skb->dev);
 	return 0;
 no_checksum:
-- 
cgit 


From 8d0207652cbe27d1f962050737848e5ad4671958 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Apr 2014 04:27:08 -0400
Subject: ->splice_write() via ->write_iter()

iter_file_splice_write() - a ->splice_write() instance that gathers the
pipe buffers, builds a bio_vec-based iov_iter covering those and feeds
it to ->write_iter().  A bunch of simple cases coverted to that...

[AV: fixed the braino spotted by Cyrill]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c        |   2 +-
 fs/exofs/file.c       |   2 +-
 fs/ext2/file.c        |   2 +-
 fs/ext3/file.c        |   2 +-
 fs/ext4/file.c        |   2 +-
 fs/f2fs/file.c        |   2 +-
 fs/gfs2/file.c        |   4 +-
 fs/jfs/file.c         |   2 +-
 fs/ramfs/file-mmu.c   |   2 +-
 fs/ramfs/file-nommu.c |   2 +-
 fs/reiserfs/file.c    |   2 +-
 fs/splice.c           | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ubifs/file.c       |   2 +-
 fs/xfs/xfs_file.c     |  43 +---------------
 fs/xfs/xfs_trace.h    |   1 -
 include/linux/fs.h    |   2 +
 16 files changed, 156 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4e36b8ea8aa4..e68e150b1b16 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1583,7 +1583,7 @@ const struct file_operations def_blk_fops = {
 	.compat_ioctl	= compat_blkdev_ioctl,
 #endif
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 5b7f6be5a2d5..71bf8e4fb5d4 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -77,7 +77,7 @@ const struct file_operations exofs_file_operations = {
 	.fsync		= exofs_file_fsync,
 	.flush		= exofs_flush,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 const struct inode_operations exofs_file_inode_operations = {
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 970c6aca15cc..7c87b22a7228 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = {
 	.release	= ext2_release_file,
 	.fsync		= ext2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 #ifdef CONFIG_EXT2_FS_XIP
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index c833b1226d4d..a062fa1e1b11 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = {
 	.release	= ext3_release_file,
 	.fsync		= ext3_sync_file,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
 
 const struct inode_operations ext3_file_inode_operations = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 48383a5f37a1..708aad768199 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -599,7 +599,7 @@ const struct file_operations ext4_file_operations = {
 	.release	= ext4_release_file,
 	.fsync		= ext4_sync_file,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fallocate	= ext4_fallocate,
 };
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 22f4900dd8eb..e4ba4b93f96a 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -692,5 +692,5 @@ const struct file_operations f2fs_file_operations = {
 	.compat_ioctl	= f2fs_compat_ioctl,
 #endif
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 };
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index ca932cd358d3..01b4c5b1bff8 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1068,7 +1068,7 @@ const struct file_operations gfs2_file_fops = {
 	.lock		= gfs2_lock,
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.setlease	= gfs2_setlease,
 	.fallocate	= gfs2_fallocate,
 };
@@ -1098,7 +1098,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 	.release	= gfs2_release,
 	.fsync		= gfs2_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.setlease	= generic_setlease,
 	.fallocate	= gfs2_fallocate,
 };
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index cc744ecaf51f..33aa0cc1f8b8 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -157,7 +157,7 @@ const struct file_operations jfs_file_operations = {
 	.write_iter	= generic_file_write_iter,
 	.mmap		= generic_file_mmap,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.fsync		= jfs_fsync,
 	.release	= jfs_release,
 	.unlocked_ioctl = jfs_ioctl,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 6ea0b9718a9d..4f56de822d2f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -38,7 +38,7 @@ const struct file_operations ramfs_file_operations = {
 	.mmap		= generic_file_mmap,
 	.fsync		= noop_fsync,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.llseek		= generic_file_llseek,
 };
 
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 9ed420f8f3ca..dda012ad4208 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,7 +43,7 @@ const struct file_operations ramfs_file_operations = {
 	.write_iter		= generic_file_write_iter,
 	.fsync			= noop_fsync,
 	.splice_read		= generic_file_splice_read,
-	.splice_write		= generic_file_splice_write,
+	.splice_write		= iter_file_splice_write,
 	.llseek			= generic_file_llseek,
 };
 
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 7c8ecd6468db..f070cc827456 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -248,7 +248,7 @@ const struct file_operations reiserfs_file_operations = {
 	.read_iter = generic_file_read_iter,
 	.write_iter = generic_file_write_iter,
 	.splice_read = generic_file_splice_read,
-	.splice_write = generic_file_splice_write,
+	.splice_write = iter_file_splice_write,
 	.llseek = generic_file_llseek,
 };
 
diff --git a/fs/splice.c b/fs/splice.c
index f99e420744c7..f195a9b89fb2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -32,6 +32,7 @@
 #include <linux/gfp.h>
 #include <linux/socket.h>
 #include <linux/compat.h>
+#include <linux/aio.h>
 #include "internal.h"
 
 /*
@@ -1052,6 +1053,145 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_file_splice_write);
 
+/**
+ * iter_file_splice_write - splice data from a pipe to a file
+ * @pipe:	pipe info
+ * @out:	file to write to
+ * @ppos:	position in @out
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Description:
+ *    Will either move or copy pages (determined by @flags options) from
+ *    the given pipe inode to the given file.
+ *    This one is ->write_iter-based.
+ *
+ */
+ssize_t
+iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+			  loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
+	int nbufs = pipe->buffers;
+	struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
+	ssize_t ret;
+
+	if (unlikely(!array))
+		return -ENOMEM;
+
+	pipe_lock(pipe);
+
+	splice_from_pipe_begin(&sd);
+	while (sd.total_len) {
+		struct iov_iter from;
+		struct kiocb kiocb;
+		size_t left;
+		int n, idx;
+
+		ret = splice_from_pipe_next(pipe, &sd);
+		if (ret <= 0)
+			break;
+
+		if (unlikely(nbufs < pipe->buffers)) {
+			kfree(array);
+			nbufs = pipe->buffers;
+			array = kcalloc(nbufs, sizeof(struct bio_vec),
+					GFP_KERNEL);
+			if (!array) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+
+		/* build the vector */
+		left = sd.total_len;
+		for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) {
+			struct pipe_buffer *buf = pipe->bufs + idx;
+			size_t this_len = buf->len;
+
+			if (this_len > left)
+				this_len = left;
+
+			if (idx == pipe->buffers - 1)
+				idx = -1;
+
+			ret = buf->ops->confirm(pipe, buf);
+			if (unlikely(ret)) {
+				if (ret == -ENODATA)
+					ret = 0;
+				goto done;
+			}
+
+			array[n].bv_page = buf->page;
+			array[n].bv_len = this_len;
+			array[n].bv_offset = buf->offset;
+			left -= this_len;
+		}
+
+		/* ... iov_iter */
+		from.type = ITER_BVEC | WRITE;
+		from.bvec = array;
+		from.nr_segs = n;
+		from.count = sd.total_len - left;
+		from.iov_offset = 0;
+
+		/* ... and iocb */
+		init_sync_kiocb(&kiocb, out);
+		kiocb.ki_pos = sd.pos;
+		kiocb.ki_nbytes = sd.total_len - left;
+
+		/* now, send it */
+		ret = out->f_op->write_iter(&kiocb, &from);
+		if (-EIOCBQUEUED == ret)
+			ret = wait_on_sync_kiocb(&kiocb);
+
+		if (ret <= 0)
+			break;
+
+		sd.num_spliced += ret;
+		sd.total_len -= ret;
+		*ppos = sd.pos = kiocb.ki_pos;
+
+		/* dismiss the fully eaten buffers, adjust the partial one */
+		while (ret) {
+			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+			if (ret >= buf->len) {
+				const struct pipe_buf_operations *ops = buf->ops;
+				ret -= buf->len;
+				buf->len = 0;
+				buf->ops = NULL;
+				ops->release(pipe, buf);
+				pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
+				pipe->nrbufs--;
+				if (pipe->files)
+					sd.need_wakeup = true;
+			} else {
+				buf->offset += ret;
+				buf->len -= ret;
+				ret = 0;
+			}
+		}
+	}
+done:
+	kfree(array);
+	splice_from_pipe_end(pipe, &sd);
+
+	pipe_unlock(pipe);
+
+	if (sd.num_spliced)
+		ret = sd.num_spliced;
+
+	return ret;
+}
+
+EXPORT_SYMBOL(iter_file_splice_write);
+
 static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			  struct splice_desc *sd)
 {
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6bc4e8efbccf..0888502a6041 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1585,7 +1585,7 @@ const struct file_operations ubifs_file_operations = {
 	.fsync          = ubifs_fsync,
 	.unlocked_ioctl = ubifs_ioctl,
 	.splice_read	= generic_file_splice_read,
-	.splice_write	= generic_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl   = ubifs_compat_ioctl,
 #endif
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5446e86d3485..b1c489c1fb2e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -342,47 +342,6 @@ xfs_file_splice_read(
 	return ret;
 }
 
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
-	struct pipe_inode_info	*pipe,
-	struct file		*outfilp,
-	loff_t			*ppos,
-	size_t			count,
-	unsigned int		flags)
-{
-	struct inode		*inode = outfilp->f_mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
-	int			ioflags = 0;
-	ssize_t			ret;
-
-	XFS_STATS_INC(xs_write_calls);
-
-	if (outfilp->f_mode & FMODE_NOCMTIME)
-		ioflags |= IO_INVIS;
-
-	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-		return -EIO;
-
-	xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
-	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-	if (ret > 0)
-		XFS_STATS_ADD(xs_write_bytes, ret);
-
-	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-	return ret;
-}
-
 /*
  * This routine is called to handle zeroing any space in the last block of the
  * file that is beyond the EOF.  We do this since the size is being increased
@@ -1442,7 +1401,7 @@ const struct file_operations xfs_file_operations = {
 	.read_iter	= xfs_file_read_iter,
 	.write_iter	= xfs_file_write_iter,
 	.splice_read	= xfs_file_splice_read,
-	.splice_write	= xfs_file_splice_write,
+	.splice_write	= iter_file_splice_write,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 65d8c793a25c..53182f97cf01 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1060,7 +1060,6 @@ DEFINE_RW_EVENT(xfs_file_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
 
 DECLARE_EVENT_CLASS(xfs_page_class,
 	TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a6448849dbce..8bd8ed357c7b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2434,6 +2434,8 @@ extern ssize_t default_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
+		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
 
-- 
cgit 


From 96f9bc8fbc2440d90e15f02398e1de43f674b433 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Apr 2014 04:35:49 -0400
Subject: fs/splice.c: remove unneeded exports

ocfs2 was using a bunch of splice.c guts...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/splice.c            | 15 +++++----------
 include/linux/splice.h | 10 ----------
 2 files changed, 5 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index f195a9b89fb2..ab84051758a7 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -738,7 +738,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
  * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
  * a new page in the output file page cache and fill/dirty that.
  */
-int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 		 struct splice_desc *sd)
 {
 	struct file *file = sd->u.file;
@@ -773,7 +773,6 @@ int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 out:
 	return ret;
 }
-EXPORT_SYMBOL(pipe_to_file);
 
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
@@ -803,7 +802,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
  *    locking is required around copying the pipe buffers to the
  *    destination.
  */
-int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 			  splice_actor *actor)
 {
 	int ret;
@@ -850,7 +849,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
 
 	return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_feed);
 
 /**
  * splice_from_pipe_next - wait for some data to splice from
@@ -862,7 +860,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed);
  *    value (one) if pipe buffers are available.  It will return zero
  *    or -errno if no more data needs to be spliced.
  */
-int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
 	while (!pipe->nrbufs) {
 		if (!pipe->writers)
@@ -887,7 +885,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
 
 	return 1;
 }
-EXPORT_SYMBOL(splice_from_pipe_next);
 
 /**
  * splice_from_pipe_begin - start splicing from pipe
@@ -898,12 +895,11 @@ EXPORT_SYMBOL(splice_from_pipe_next);
  *    splice_from_pipe_next() and splice_from_pipe_feed() to
  *    initialize the necessary fields of @sd.
  */
-void splice_from_pipe_begin(struct splice_desc *sd)
+static void splice_from_pipe_begin(struct splice_desc *sd)
 {
 	sd->num_spliced = 0;
 	sd->need_wakeup = false;
 }
-EXPORT_SYMBOL(splice_from_pipe_begin);
 
 /**
  * splice_from_pipe_end - finish splicing from pipe
@@ -915,12 +911,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin);
  *    be called after a loop containing splice_from_pipe_next() and
  *    splice_from_pipe_feed().
  */
-void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
 {
 	if (sd->need_wakeup)
 		wakeup_pipe_writers(pipe);
 }
-EXPORT_SYMBOL(splice_from_pipe_end);
 
 /**
  * __splice_from_pipe - splice data from a pipe to given actor
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 0e43906d2fda..da2751d3b93d 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -70,16 +70,6 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
 				splice_actor *);
 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
 				  struct splice_desc *, splice_actor *);
-extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
-				 splice_actor *);
-extern int splice_from_pipe_next(struct pipe_inode_info *,
-				 struct splice_desc *);
-extern void splice_from_pipe_begin(struct splice_desc *);
-extern void splice_from_pipe_end(struct pipe_inode_info *,
-				 struct splice_desc *);
-extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
-			struct splice_desc *);
-
 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
 			      struct splice_pipe_desc *);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
-- 
cgit 


From 5f073850602084fbcbb987948ff3e70ae273f7d2 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 5 Apr 2014 13:31:50 -0400
Subject: kill generic_file_splice_write()

no callers left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/splice.c        | 124 -----------------------------------------------------
 include/linux/fs.h |   2 -
 2 files changed, 126 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index ab84051758a7..8e7eef755a9b 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -718,62 +718,6 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
 				    sd->len, &pos, more);
 }
 
-/*
- * This is a little more tricky than the file -> pipe splicing. There are
- * basically three cases:
- *
- *	- Destination page already exists in the address space and there
- *	  are users of it. For that case we have no other option that
- *	  copying the data. Tough luck.
- *	- Destination page already exists in the address space, but there
- *	  are no users of it. Make sure it's uptodate, then drop it. Fall
- *	  through to last case.
- *	- Destination page does not exist, we can add the pipe page to
- *	  the page cache and avoid the copy.
- *
- * If asked to move pages to the output file (SPLICE_F_MOVE is set in
- * sd->flags), we attempt to migrate pages from the pipe to the output
- * file address space page cache. This is possible if no one else has
- * the pipe page referenced outside of the pipe and page cache. If
- * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
- * a new page in the output file page cache and fill/dirty that.
- */
-static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-		 struct splice_desc *sd)
-{
-	struct file *file = sd->u.file;
-	struct address_space *mapping = file->f_mapping;
-	unsigned int offset, this_len;
-	struct page *page;
-	void *fsdata;
-	int ret;
-
-	offset = sd->pos & ~PAGE_CACHE_MASK;
-
-	this_len = sd->len;
-	if (this_len + offset > PAGE_CACHE_SIZE)
-		this_len = PAGE_CACHE_SIZE - offset;
-
-	ret = pagecache_write_begin(file, mapping, sd->pos, this_len,
-				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
-	if (unlikely(ret))
-		goto out;
-
-	if (buf->page != page) {
-		char *src = kmap_atomic(buf->page);
-		char *dst = kmap_atomic(page);
-
-		memcpy(dst + offset, src + buf->offset, this_len);
-		flush_dcache_page(page);
-		kunmap_atomic(dst);
-		kunmap_atomic(src);
-	}
-	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len,
-				page, fsdata);
-out:
-	return ret;
-}
-
 static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
 {
 	smp_mb();
@@ -980,74 +924,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	return ret;
 }
 
-/**
- * generic_file_splice_write - splice data from a pipe to a file
- * @pipe:	pipe info
- * @out:	file to write to
- * @ppos:	position in @out
- * @len:	number of bytes to splice
- * @flags:	splice modifier flags
- *
- * Description:
- *    Will either move or copy pages (determined by @flags options) from
- *    the given pipe inode to the given file.
- *
- */
-ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
-			  loff_t *ppos, size_t len, unsigned int flags)
-{
-	struct address_space *mapping = out->f_mapping;
-	struct inode *inode = mapping->host;
-	struct splice_desc sd = {
-		.total_len = len,
-		.flags = flags,
-		.pos = *ppos,
-		.u.file = out,
-	};
-	ssize_t ret;
-
-	pipe_lock(pipe);
-
-	splice_from_pipe_begin(&sd);
-	do {
-		ret = splice_from_pipe_next(pipe, &sd);
-		if (ret <= 0)
-			break;
-
-		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
-		ret = file_remove_suid(out);
-		if (!ret) {
-			ret = file_update_time(out);
-			if (!ret)
-				ret = splice_from_pipe_feed(pipe, &sd,
-							    pipe_to_file);
-		}
-		mutex_unlock(&inode->i_mutex);
-	} while (ret > 0);
-	splice_from_pipe_end(pipe, &sd);
-
-	pipe_unlock(pipe);
-
-	if (sd.num_spliced)
-		ret = sd.num_spliced;
-
-	if (ret > 0) {
-		int err;
-
-		err = generic_write_sync(out, *ppos, ret);
-		if (err)
-			ret = err;
-		else
-			*ppos += ret;
-		balance_dirty_pages_ratelimited(mapping);
-	}
-
-	return ret;
-}
-
-EXPORT_SYMBOL(generic_file_splice_write);
-
 /**
  * iter_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8bd8ed357c7b..4e92d551518d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2432,8 +2432,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t default_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
-		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
-- 
cgit 


From 1a539d372edd9832444e7a3daa710c444c014dc9 Mon Sep 17 00:00:00 2001
From: Tomas Pop <Tomas.Pop@sensirion.com>
Date: Thu, 5 Jun 2014 15:24:19 -0700
Subject: hwmon: add support for Sensirion SHTC1 sensor

Add support for Sensirion SHTC1 and compatible temperature and humidity
sensors.

Signed-off-by: Tomas Pop <tomas.pop@sensirion.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 Documentation/hwmon/shtc1           |  43 ++++++
 drivers/hwmon/Kconfig               |  10 ++
 drivers/hwmon/Makefile              |   1 +
 drivers/hwmon/shtc1.c               | 251 ++++++++++++++++++++++++++++++++++++
 include/linux/platform_data/shtc1.h |  23 ++++
 5 files changed, 328 insertions(+)
 create mode 100644 Documentation/hwmon/shtc1
 create mode 100644 drivers/hwmon/shtc1.c
 create mode 100644 include/linux/platform_data/shtc1.h

(limited to 'include/linux')

diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1
new file mode 100644
index 000000000000..6b1e05458f0f
--- /dev/null
+++ b/Documentation/hwmon/shtc1
@@ -0,0 +1,43 @@
+Kernel driver shtc1
+===================
+
+Supported chips:
+  * Sensirion SHTC1
+    Prefix: 'shtc1'
+    Addresses scanned: none
+    Datasheet: http://www.sensirion.com/file/datasheet_shtc1
+
+  * Sensirion SHTW1
+    Prefix: 'shtw1'
+    Addresses scanned: none
+    Datasheet: Not publicly available
+
+Author:
+  Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHTC1 chip, a humidity and
+temperature sensor. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. Driver can be used as well for SHTW1
+chip, which has the same electrical interface.
+
+The device communicates with the I2C protocol. All sensors are set to I2C
+address 0x70. See Documentation/i2c/instantiating-devices for methods to
+instantiate the device.
+
+There are two options configurable by means of shtc1_platform_data:
+1. blocking (pull the I2C clock line down while performing the measurement) or
+   non-blocking mode. Blocking mode will guarantee the fastest result but
+   the I2C bus will be busy during that time. By default, non-blocking mode
+   is used. Make sure clock-stretching works properly on your device if you
+   want to use blocking mode.
+2. high or low accuracy. High accuracy is used by default and using it is
+   strongly recommended.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 00343166feb1..08531a128f53 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -1124,6 +1124,16 @@ config SENSORS_SHT21
 	  This driver can also be built as a module.  If so, the module
 	  will be called sht21.
 
+config SENSORS_SHTC1
+	tristate "Sensiron humidity and temperature sensors. SHTC1 and compat."
+	depends on I2C
+	help
+	  If you say yes here you get support for the Sensiron SHTC1 and SHTW1
+	  humidity and temperature sensors.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called shtc1.
+
 config SENSORS_S3C
 	tristate "Samsung built-in ADC"
 	depends on S3C_ADC
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 11798ad7e801..3dc0f02f71d2 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -126,6 +126,7 @@ obj-$(CONFIG_SENSORS_SCH5627)	+= sch5627.o
 obj-$(CONFIG_SENSORS_SCH5636)	+= sch5636.o
 obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SHT21)	+= sht21.o
+obj-$(CONFIG_SENSORS_SHTC1)	+= shtc1.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMM665)	+= smm665.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c
new file mode 100644
index 000000000000..decd7df995ab
--- /dev/null
+++ b/drivers/hwmon/shtc1.c
@@ -0,0 +1,251 @@
+/* Sensirion SHTC1 humidity and temperature sensor driver
+ *
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/i2c.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/platform_data/shtc1.h>
+
+/* commands (high precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_hpm[]    = { 0x7C, 0xA2 };
+static const unsigned char shtc1_cmd_measure_nonblocking_hpm[] = { 0x78, 0x66 };
+
+/* commands (low precision mode) */
+static const unsigned char shtc1_cmd_measure_blocking_lpm[]    = { 0x64, 0x58 };
+static const unsigned char shtc1_cmd_measure_nonblocking_lpm[] = { 0x60, 0x9c };
+
+/* command for reading the ID register */
+static const unsigned char shtc1_cmd_read_id_reg[]	       = { 0xef, 0xc8 };
+
+/* constants for reading the ID register */
+#define SHTC1_ID	  0x07
+#define SHTC1_ID_REG_MASK 0x1f
+
+/* delays for non-blocking i2c commands, both in us */
+#define SHTC1_NONBLOCKING_WAIT_TIME_HPM  14400
+#define SHTC1_NONBLOCKING_WAIT_TIME_LPM   1000
+
+#define SHTC1_CMD_LENGTH      2
+#define SHTC1_RESPONSE_LENGTH 6
+
+struct shtc1_data {
+	struct i2c_client *client;
+	struct mutex update_lock;
+	bool valid;
+	unsigned long last_updated; /* in jiffies */
+
+	const unsigned char *command;
+	unsigned int nonblocking_wait_time; /* in us */
+
+	struct shtc1_platform_data setup;
+
+	int temperature; /* 1000 * temperature in dgr C */
+	int humidity; /* 1000 * relative humidity in %RH */
+};
+
+static int shtc1_update_values(struct i2c_client *client,
+			       struct shtc1_data *data,
+			       char *buf, int bufsize)
+{
+	int ret = i2c_master_send(client, data->command, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(&client->dev, "failed to send command: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	/*
+	 * In blocking mode (clock stretching mode) the I2C bus
+	 * is blocked for other traffic, thus the call to i2c_master_recv()
+	 * will wait until the data is ready. For non blocking mode, we
+	 * have to wait ourselves.
+	 */
+	if (!data->setup.blocking_io)
+		usleep_range(data->nonblocking_wait_time,
+			     data->nonblocking_wait_time + 1000);
+
+	ret = i2c_master_recv(client, buf, bufsize);
+	if (ret != bufsize) {
+		dev_err(&client->dev, "failed to read values: %d\n", ret);
+		return ret < 0 ? ret : -EIO;
+	}
+
+	return 0;
+}
+
+/* sysfs attributes */
+static struct shtc1_data *shtc1_update_client(struct device *dev)
+{
+	struct shtc1_data *data = dev_get_drvdata(dev);
+	struct i2c_client *client = data->client;
+	unsigned char buf[SHTC1_RESPONSE_LENGTH];
+	int val;
+	int ret = 0;
+
+	mutex_lock(&data->update_lock);
+
+	if (time_after(jiffies, data->last_updated + HZ / 10) || !data->valid) {
+		ret = shtc1_update_values(client, data, buf, sizeof(buf));
+		if (ret)
+			goto out;
+
+		/*
+		 * From datasheet:
+		 * T = -45 + 175 * ST / 2^16
+		 * RH = 100 * SRH / 2^16
+		 *
+		 * Adapted for integer fixed point (3 digit) arithmetic.
+		 */
+		val = be16_to_cpup((__be16 *)buf);
+		data->temperature = ((21875 * val) >> 13) - 45000;
+		val = be16_to_cpup((__be16 *)(buf + 3));
+		data->humidity = ((12500 * val) >> 13);
+
+		data->last_updated = jiffies;
+		data->valid = true;
+	}
+
+out:
+	mutex_unlock(&data->update_lock);
+
+	return ret == 0 ? data : ERR_PTR(ret);
+}
+
+static ssize_t temp1_input_show(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->temperature);
+}
+
+static ssize_t humidity1_input_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct shtc1_data *data = shtc1_update_client(dev);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	return sprintf(buf, "%d\n", data->humidity);
+}
+
+static DEVICE_ATTR_RO(temp1_input);
+static DEVICE_ATTR_RO(humidity1_input);
+
+static struct attribute *shtc1_attrs[] = {
+	&dev_attr_temp1_input.attr,
+	&dev_attr_humidity1_input.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(shtc1);
+
+static void shtc1_select_command(struct shtc1_data *data)
+{
+	if (data->setup.high_precision) {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_hpm :
+				shtc1_cmd_measure_nonblocking_hpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_HPM;
+
+	} else {
+		data->command = data->setup.blocking_io ?
+				shtc1_cmd_measure_blocking_lpm :
+				shtc1_cmd_measure_nonblocking_lpm;
+		data->nonblocking_wait_time = SHTC1_NONBLOCKING_WAIT_TIME_LPM;
+	}
+}
+
+static int shtc1_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	int ret;
+	char id_reg[2];
+	struct shtc1_data *data;
+	struct device *hwmon_dev;
+	struct i2c_adapter *adap = client->adapter;
+	struct device *dev = &client->dev;
+
+	if (!i2c_check_functionality(adap, I2C_FUNC_I2C)) {
+		dev_err(dev, "plain i2c transactions not supported\n");
+		return -ENODEV;
+	}
+
+	ret = i2c_master_send(client, shtc1_cmd_read_id_reg, SHTC1_CMD_LENGTH);
+	if (ret != SHTC1_CMD_LENGTH) {
+		dev_err(dev, "could not send read_id_reg command: %d\n", ret);
+		return ret < 0 ? ret : -ENODEV;
+	}
+	ret = i2c_master_recv(client, id_reg, sizeof(id_reg));
+	if (ret != sizeof(id_reg)) {
+		dev_err(dev, "could not read ID register: %d\n", ret);
+		return -ENODEV;
+	}
+	if ((id_reg[1] & SHTC1_ID_REG_MASK) != SHTC1_ID) {
+		dev_err(dev, "ID register doesn't match\n");
+		return -ENODEV;
+	}
+
+	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->setup.blocking_io = false;
+	data->setup.high_precision = true;
+	data->client = client;
+
+	if (client->dev.platform_data)
+		data->setup = *(struct shtc1_platform_data *)dev->platform_data;
+	shtc1_select_command(data);
+	mutex_init(&data->update_lock);
+
+	hwmon_dev = devm_hwmon_device_register_with_groups(dev,
+							   client->name,
+							   data,
+							   shtc1_groups);
+	if (IS_ERR(hwmon_dev))
+		dev_dbg(dev, "unable to register hwmon device\n");
+
+	return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+/* device ID table */
+static const struct i2c_device_id shtc1_id[] = {
+	{ "shtc1", 0 },
+	{ "shtw1", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, shtc1_id);
+
+static struct i2c_driver shtc1_i2c_driver = {
+	.driver.name  = "shtc1",
+	.probe        = shtc1_probe,
+	.id_table     = shtc1_id,
+};
+
+module_i2c_driver(shtc1_i2c_driver);
+
+MODULE_AUTHOR("Johannes Winkelmann <johannes.winkelmann@sensirion.com>");
+MODULE_DESCRIPTION("Sensirion SHTC1 humidity and temperature sensor driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/shtc1.h b/include/linux/platform_data/shtc1.h
new file mode 100644
index 000000000000..7b8c353f7dc8
--- /dev/null
+++ b/include/linux/platform_data/shtc1.h
@@ -0,0 +1,23 @@
+/*
+ * Copyright (C) 2014 Sensirion AG, Switzerland
+ * Author: Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SHTC1_H_
+#define __SHTC1_H_
+
+struct shtc1_platform_data {
+	bool blocking_io;
+	bool high_precision;
+};
+#endif /* __SHTC1_H_ */
-- 
cgit 


From f3db22feb5de6b98b7bae924c2d4b6c8d65bedae Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 11 Jun 2014 11:51:35 -0600
Subject: NVMe: Fix hot cpu notification dead lock

There is a potential dead lock if a cpu event occurs during nvme probe
since it registered with hot cpu notification. This fixes the race by
having the module register with notification outside of probe rather
than have each device register.

The actual work is done in a scheduled work queue instead of in the
notifier since assigning IO queues has the potential to block if the
driver creates additional queues.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 drivers/block/nvme-core.c | 35 +++++++++++++++++++++++++----------
 include/linux/nvme.h      |  2 +-
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 2af079e571fc..e0ac1210fe31 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -73,6 +73,7 @@ static LIST_HEAD(dev_list);
 static struct task_struct *nvme_thread;
 static struct workqueue_struct *nvme_workq;
 static wait_queue_head_t nvme_kthread_wait;
+static struct notifier_block nvme_nb;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
 
@@ -2115,14 +2116,25 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
+static void nvme_cpu_workfn(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
+	if (dev->initialized)
+		nvme_assign_io_queues(dev);
+}
+
 static int nvme_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
-	struct nvme_dev *dev = container_of(self, struct nvme_dev, nb);
+	struct nvme_dev *dev;
+
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-		nvme_assign_io_queues(dev);
+		spin_lock(&dev_list_lock);
+		list_for_each_entry(dev, &dev_list, node)
+			schedule_work(&dev->cpu_work);
+		spin_unlock(&dev_list_lock);
 		break;
 	}
 	return NOTIFY_OK;
@@ -2191,11 +2203,6 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	nvme_free_queues(dev, nr_io_queues + 1);
 	nvme_assign_io_queues(dev);
 
-	dev->nb.notifier_call = &nvme_cpu_notify;
-	result = register_hotcpu_notifier(&dev->nb);
-	if (result)
-		goto free_queues;
-
 	return 0;
 
  free_queues:
@@ -2495,8 +2502,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 	int i;
 
 	dev->initialized = 0;
-	unregister_hotcpu_notifier(&dev->nb);
-
 	nvme_dev_list_remove(dev);
 
 	if (!dev->bar || (dev->bar && readl(&dev->bar->csts) == -1)) {
@@ -2767,6 +2772,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
+	INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
 	dev->pci_dev = pdev;
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -2836,6 +2842,7 @@ static void nvme_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->cpu_work);
 	misc_deregister(&dev->miscdev);
 	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
@@ -2923,11 +2930,18 @@ static int __init nvme_init(void)
 	else if (result > 0)
 		nvme_major = result;
 
-	result = pci_register_driver(&nvme_driver);
+	nvme_nb.notifier_call = &nvme_cpu_notify;
+	result = register_hotcpu_notifier(&nvme_nb);
 	if (result)
 		goto unregister_blkdev;
+
+	result = pci_register_driver(&nvme_driver);
+	if (result)
+		goto unregister_hotcpu;
 	return 0;
 
+ unregister_hotcpu:
+	unregister_hotcpu_notifier(&nvme_nb);
  unregister_blkdev:
 	unregister_blkdev(nvme_major, "nvme");
  kill_workq:
@@ -2938,6 +2952,7 @@ static int __init nvme_init(void)
 static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
+	unregister_hotcpu_notifier(&nvme_nb);
 	unregister_blkdev(nvme_major, "nvme");
 	destroy_workqueue(nvme_workq);
 	BUG_ON(nvme_thread && !IS_ERR(nvme_thread));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 8541dd920bb7..2bf403195c09 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -90,7 +90,7 @@ struct nvme_dev {
 	struct miscdevice miscdev;
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
-	struct notifier_block nb;
+	struct work_struct cpu_work;
 	char name[12];
 	char serial[20];
 	char model[40];
-- 
cgit 


From 4b28252cada3d0521ab59751f4240ecdfb9bba18 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 14 Jun 2014 23:23:52 -0700
Subject: net: Fix GSO constants to match NETIF flags

Joseph Gasparakis reported that VXLAN GSO offload stopped working with
i40e device after recent UDP changes. The problem is that the
SKB_GSO_* bits are out of sync with the corresponding NETIF flags. This
patch fixes that. Also, we add BUILD_BUG_ONs in net_gso_ok for several
GSO constants that were missing to avoid the problem in the future.

Reported-by: Joseph Gasparakis <joseph.gasparakis@intel.com>
Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  1 +
 include/linux/netdevice.h       |  7 +++++++
 include/linux/skbuff.h          | 11 ++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index e5a589435e2b..d99800cbdcf3 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -117,6 +117,7 @@ enum {
 #define NETIF_F_GSO_IPIP	__NETIF_F(GSO_IPIP)
 #define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
+#define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index abe3de1db932..66f9a04ec270 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3305,6 +3305,13 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_FCOE    != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_GRE     != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_IPIP    != (NETIF_F_GSO_IPIP >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_SIT     != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_MPLS    != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
 
 	return (features & feature) == feature;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5b5cd3189c98..e13ed90be7c2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -338,17 +338,18 @@ enum {
 
 	SKB_GSO_GRE = 1 << 6,
 
-	SKB_GSO_IPIP = 1 << 7,
+	SKB_GSO_GRE_CSUM = 1 << 7,
 
-	SKB_GSO_SIT = 1 << 8,
+	SKB_GSO_IPIP = 1 << 8,
 
-	SKB_GSO_UDP_TUNNEL = 1 << 9,
+	SKB_GSO_SIT = 1 << 9,
 
-	SKB_GSO_MPLS = 1 << 10,
+	SKB_GSO_UDP_TUNNEL = 1 << 10,
 
 	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 
-	SKB_GSO_GRE_CSUM = 1 << 12,
+	SKB_GSO_MPLS = 1 << 12,
+
 };
 
 #if BITS_PER_LONG > 32
-- 
cgit 


From e5eb4e30a51236079fb22bb9f75fcd31915b03c6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 14 Jun 2014 23:24:28 -0700
Subject: net: add skb_pop_rcv_encapsulation

This function is used by UDP encapsulation protocols in RX when
crossing encapsulation boundary. If ip_summed is set to
CHECKSUM_UNNECESSARY and encapsulation is not set, change to
CHECKSUM_NONE since the checksum has not been validated within the
encapsulation. Clears csum_valid by the same rationale.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e13ed90be7c2..ec89301ada41 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1854,6 +1854,18 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
 }
 
+static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
+{
+	/* Only continue with checksum unnecessary if device indicated
+	 * it is valid across encapsulation (skb->encapsulation was set).
+	 */
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
+		skb->ip_summed = CHECKSUM_NONE;
+
+	skb->encapsulation = 0;
+	skb->csum_valid = 0;
+}
+
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
-- 
cgit 


From a6e15a39048ec3229b9a53425f4384f55f6cc1b3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 13 Jun 2014 13:30:35 -0700
Subject: PM / hibernate: introduce "nohibernate" boot parameter

To support using kernel features that are not compatible with hibernation,
this creates the "nohibernate" kernel boot parameter to disable both
hibernation and resume. This allows hibernation support to be a boot-time
choice instead of only a compile-time choice.

Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/kernel-parameters.txt |  3 +++
 include/linux/suspend.h             |  2 ++
 kernel/power/hibernate.c            | 31 ++++++++++++++++++++++++++++++-
 kernel/power/main.c                 |  6 ++----
 kernel/power/user.c                 |  3 +++
 5 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6eaa9cdb7094..f8f0466b8b1d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2184,6 +2184,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			in certain environments such as networked servers or
 			real-time systems.
 
+	nohibernate	[HIBERNATION] Disable hibernation and resume.
+
 	nohz=		[KNL] Boottime enable/disable dynamic ticks
 			Valid arguments: on, off
 			Default: on
@@ -2980,6 +2982,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 		noresume	Don't check if there's a hibernation image
 				present during boot.
 		nocompress	Don't compress/decompress hibernation images.
+		no		Disable hibernation and resume.
 
 	retain_initrd	[RAM] Keep initrd memory after extraction
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index f76994b9396c..519064e0c943 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -327,6 +327,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask);
 extern void hibernation_set_ops(const struct platform_hibernation_ops *ops);
 extern int hibernate(void);
 extern bool system_entering_hibernation(void);
+extern bool hibernation_available(void);
 asmlinkage int swsusp_save(void);
 extern struct pbe *restore_pblist;
 #else /* CONFIG_HIBERNATION */
@@ -339,6 +340,7 @@ static inline void swsusp_unset_page_free(struct page *p) {}
 static inline void hibernation_set_ops(const struct platform_hibernation_ops *ops) {}
 static inline int hibernate(void) { return -ENOSYS; }
 static inline bool system_entering_hibernation(void) { return false; }
+static inline bool hibernation_available(void) { return false; }
 #endif /* CONFIG_HIBERNATION */
 
 /* Hibernation and suspend events */
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 49e0a20fd010..258f492f0347 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -35,6 +35,7 @@
 
 static int nocompress;
 static int noresume;
+static int nohibernate;
 static int resume_wait;
 static unsigned int resume_delay;
 static char resume_file[256] = CONFIG_PM_STD_PARTITION;
@@ -62,6 +63,11 @@ bool freezer_test_done;
 
 static const struct platform_hibernation_ops *hibernation_ops;
 
+bool hibernation_available(void)
+{
+	return (nohibernate == 0);
+}
+
 /**
  * hibernation_set_ops - Set the global hibernate operations.
  * @ops: Hibernation operations to use in subsequent hibernation transitions.
@@ -642,6 +648,11 @@ int hibernate(void)
 {
 	int error;
 
+	if (!hibernation_available()) {
+		pr_debug("PM: Hibernation not available.\n");
+		return -EPERM;
+	}
+
 	lock_system_sleep();
 	/* The snapshot device should not be opened while we're running */
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
@@ -734,7 +745,7 @@ static int software_resume(void)
 	/*
 	 * If the user said "noresume".. bail out early.
 	 */
-	if (noresume)
+	if (noresume || !hibernation_available())
 		return 0;
 
 	/*
@@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
 	int i;
 	char *start = buf;
 
+	if (!hibernation_available())
+		return sprintf(buf, "[disabled]\n");
+
 	for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
 		if (!hibernation_modes[i])
 			continue;
@@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
 	char *p;
 	int mode = HIBERNATION_INVALID;
 
+	if (!hibernation_available())
+		return -EPERM;
+
 	p = memchr(buf, '\n', n);
 	len = p ? p - buf : n;
 
@@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str)
 		noresume = 1;
 	else if (!strncmp(str, "nocompress", 10))
 		nocompress = 1;
+	else if (!strncmp(str, "no", 2)) {
+		noresume = 1;
+		nohibernate = 1;
+	}
 	return 1;
 }
 
@@ -1125,9 +1146,17 @@ static int __init resumedelay_setup(char *str)
 	return 1;
 }
 
+static int __init nohibernate_setup(char *str)
+{
+	noresume = 1;
+	nohibernate = 1;
+	return 1;
+}
+
 __setup("noresume", noresume_setup);
 __setup("resume_offset=", resume_offset_setup);
 __setup("resume=", resume_setup);
 __setup("hibernate=", hibernate_setup);
 __setup("resumewait", resumewait_setup);
 __setup("resumedelay=", resumedelay_setup);
+__setup("nohibernate", nohibernate_setup);
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 573410d6647e..8e90f330f139 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
 			s += sprintf(s,"%s ", pm_states[i].label);
 
 #endif
-#ifdef CONFIG_HIBERNATION
-	s += sprintf(s, "%s\n", "disk");
-#else
+	if (hibernation_available())
+		s += sprintf(s, "disk ");
 	if (s != buf)
 		/* convert the last space to a newline */
 		*(s-1) = '\n';
-#endif
 	return (s - buf);
 }
 
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 98d357584cd6..526e8911460a 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
 	struct snapshot_data *data;
 	int error;
 
+	if (!hibernation_available())
+		return -EPERM;
+
 	lock_system_sleep();
 
 	if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
-- 
cgit 


From 736ed4de766d4f0e8e6142dd4f9d73ef61835ed9 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 17 Jun 2014 22:09:29 -0700
Subject: block: blk_max_size_offset() should check ->max_sectors

Commit 762380ad9322 inadvertently changed a check for max_sectors
to max_hw_sectors. Revert that part, so we still compare against
max_sectors.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 31e11051f1ba..713f8b62b435 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
 					       sector_t offset)
 {
 	if (!q->limits.chunk_sectors)
-		return q->limits.max_hw_sectors;
+		return q->limits.max_sectors;
 
 	return q->limits.chunk_sectors -
 			(offset & (q->limits.chunk_sectors - 1));
-- 
cgit 


From 8537b12034cf1fd3fab3da2c859d71f76846fae9 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 17 Jun 2014 22:12:35 -0700
Subject: blk-mq: bitmap tag: fix races on shared ::wake_index fields

Fix racy updates of shared blk_mq_bitmap_tags::wake_index
and blk_mq_hw_ctx::wake_index fields.

Cc: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c     | 32 +++++++++++++++++++++-----------
 block/blk-mq-tag.h     |  2 +-
 include/linux/blk-mq.h |  2 +-
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1aab39f71d95..6deb13055490 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags)
 	return bt_has_free_tags(&tags->bitmap_tags);
 }
 
-static inline void bt_index_inc(unsigned int *index)
+static inline int bt_index_inc(int index)
 {
-	*index = (*index + 1) & (BT_WAIT_QUEUES - 1);
+	return (index + 1) & (BT_WAIT_QUEUES - 1);
+}
+
+static inline void bt_index_atomic_inc(atomic_t *index)
+{
+	int old = atomic_read(index);
+	int new = bt_index_inc(old);
+	atomic_cmpxchg(index, old, new);
 }
 
 /*
@@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
 	int i, wake_index;
 
 	bt = &tags->bitmap_tags;
-	wake_index = bt->wake_index;
+	wake_index = atomic_read(&bt->wake_index);
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
 		struct bt_wait_state *bs = &bt->bs[wake_index];
 
 		if (waitqueue_active(&bs->wait))
 			wake_up(&bs->wait);
 
-		bt_index_inc(&wake_index);
+		wake_index = bt_index_inc(wake_index);
 	}
 }
 
@@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt,
 					 struct blk_mq_hw_ctx *hctx)
 {
 	struct bt_wait_state *bs;
+	int wait_index;
 
 	if (!hctx)
 		return &bt->bs[0];
 
-	bs = &bt->bs[hctx->wait_index];
-	bt_index_inc(&hctx->wait_index);
+	wait_index = atomic_read(&hctx->wait_index);
+	bs = &bt->bs[wait_index];
+	bt_index_atomic_inc(&hctx->wait_index);
 	return bs;
 }
 
@@ -313,18 +322,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt)
 {
 	int i, wake_index;
 
-	wake_index = bt->wake_index;
+	wake_index = atomic_read(&bt->wake_index);
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
 		struct bt_wait_state *bs = &bt->bs[wake_index];
 
 		if (waitqueue_active(&bs->wait)) {
-			if (wake_index != bt->wake_index)
-				bt->wake_index = wake_index;
+			int o = atomic_read(&bt->wake_index);
+			if (wake_index != o)
+				atomic_cmpxchg(&bt->wake_index, o, wake_index);
 
 			return bs;
 		}
 
-		bt_index_inc(&wake_index);
+		wake_index = bt_index_inc(wake_index);
 	}
 
 	return NULL;
@@ -344,7 +354,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
 	bs = bt_wake_ptr(bt);
 	if (bs && atomic_dec_and_test(&bs->wait_cnt)) {
 		atomic_set(&bs->wait_cnt, bt->wake_cnt);
-		bt_index_inc(&bt->wake_index);
+		bt_index_atomic_inc(&bt->wake_index);
 		wake_up(&bs->wait);
 	}
 }
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 98696a65d4d4..6206ed17ef76 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags {
 	unsigned int map_nr;
 	struct blk_align_bitmap *map;
 
-	unsigned int wake_index;
+	atomic_t wake_index;
 	struct bt_wait_state *bs;
 };
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a002cf191427..eb726b9c5762 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -42,7 +42,7 @@ struct blk_mq_hw_ctx {
 	unsigned int		nr_ctx;
 	struct blk_mq_ctx	**ctxs;
 
-	unsigned int		wait_index;
+	atomic_t		wait_index;
 
 	struct blk_mq_tags	*tags;
 
-- 
cgit 


From 2b8f2a28eac1d35a432705d269f02bdaeba9be8f Mon Sep 17 00:00:00 2001
From: Daniel Mack <zonque@gmail.com>
Date: Wed, 18 Jun 2014 11:01:41 +0200
Subject: net: phylib: add link_change_notify callback to phy device

Add a notify callback to inform phy drivers when the core is about to
do its link adjustment. No change for drivers that do not implement
this callback.

Signed-off-by: Daniel Mack <zonque@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 3 +++
 include/linux/phy.h   | 9 +++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 3bc079a67a3d..f7c61812ea4a 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -720,6 +720,9 @@ void phy_state_machine(struct work_struct *work)
 
 	mutex_lock(&phydev->lock);
 
+	if (phydev->drv->link_change_notify)
+		phydev->drv->link_change_notify(phydev);
+
 	switch (phydev->state) {
 	case PHY_DOWN:
 	case PHY_STARTING:
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 864ddafad8cc..68041446c450 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -536,6 +536,15 @@ struct phy_driver {
 	/* See set_wol, but for checking whether Wake on LAN is enabled. */
 	void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
 
+	/*
+	 * Called to inform a PHY device driver when the core is about to
+	 * change the link state. This callback is supposed to be used as
+	 * fixup hook for drivers that need to take action when the link
+	 * state changes. Drivers are by no means allowed to mess with the
+	 * PHY device structure in their implementations.
+	 */
+	void (*link_change_notify)(struct phy_device *dev);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
-- 
cgit 


From e567bf7112518824830978d644dfb5a991e67d54 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Sun, 22 Jun 2014 16:32:48 -0600
Subject: Revert "block: add __init to elv_register"

This reverts commit b5097e956a4d2919ee248d6481e4204c5568ed5c.

The original commit is buggy, we do use the registration functions
at runtime, for instance when loading IO schedulers through sysfs.

Reported-by: Damien Wyart <damien.wyart@gmail.com>
---
 block/elevator.c         | 2 +-
 include/linux/elevator.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 34bded18910e..24c28b659bb3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -825,7 +825,7 @@ void elv_unregister_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(elv_unregister_queue);
 
-int __init elv_register(struct elevator_type *e)
+int elv_register(struct elevator_type *e)
 {
 	char *def = "";
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index e2a6bd7fb133..45a91474487d 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -143,7 +143,7 @@ extern void elv_drain_elevator(struct request_queue *);
  * io scheduler registration
  */
 extern void __init load_default_elevator_module(void);
-extern int __init elv_register(struct elevator_type *);
+extern int elv_register(struct elevator_type *);
 extern void elv_unregister(struct elevator_type *);
 
 /*
-- 
cgit 


From b3acc56bfe1287c6b666e80edc70b89eea2a1a80 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Mon, 23 Jun 2014 13:22:03 -0700
Subject: kexec: save PG_head_mask in VMCOREINFO

To allow filtering of huge pages, makedumpfile must be able to identify
them in the dump.  This can be done by checking the appropriate page
flag, so communicate its value to makedumpfile through the VMCOREINFO
interface.

There's only one small catch.  Depending on how many page flags are
available on a given architecture, this bit can be called PG_head or
PG_compound.

I sent a similar patch back in 2012, but Eric Biederman did not like
using an #ifdef.  So, this time I'm adding a common symbol
(PG_head_mask) instead.

See https://lkml.org/lkml/2012/11/28/91 for the previous version.

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Shaohua Li <shli@kernel.org>
Cc: Alexey Kardashevskiy <aik@ozlabs.ru>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 3 +++
 kernel/kexec.c             | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3c545b48aeab..8304959ad336 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -360,6 +360,9 @@ static inline void ClearPageCompound(struct page *page)
 	ClearPageHead(page);
 }
 #endif
+
+#define PG_head_mask ((1L << PG_head))
+
 #else
 /*
  * Reduce page flag use as much as possible by overlapping
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6748688813d0..369f41a94124 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
 #ifdef CONFIG_MEMORY_FAILURE
 	VMCOREINFO_NUMBER(PG_hwpoison);
 #endif
+	VMCOREINFO_NUMBER(PG_head_mask);
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
 
 	arch_crash_save_vmcoreinfo();
-- 
cgit 


From f3aca3d09525f87731ba6b892c9b010570bc54b4 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Mon, 23 Jun 2014 13:22:05 -0700
Subject: nmi: provide the option to issue an NMI back trace to every cpu but
 current

Sometimes it is preferred not to use the trigger_all_cpu_backtrace()
routine when one wants to avoid capturing a back trace for current.  For
instance if one was previously captured recently.

This patch provides a new routine namely
trigger_allbutself_cpu_backtrace() which offers the flexibility to issue
an NMI to every cpu but current and capture a back trace accordingly.

Patch x86 and sparc to support new routine.

[dzickus@redhat.com: add stub in #else clause]
[dzickus@redhat.com: don't print message in single processor case, wrap with get/put_cpu based on Oleg's suggestion]
[sfr@canb.auug.org.au: undo C99ism]
Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/irq_64.h |  2 +-
 arch/sparc/kernel/process_64.c  | 18 ++++++++++++------
 arch/x86/include/asm/irq.h      |  2 +-
 arch/x86/kernel/apic/hw_nmi.c   | 18 ++++++++++++++----
 include/linux/nmi.h             | 11 ++++++++++-
 5 files changed, 38 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 375cffcf7dbd..91d219381306 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void)
 	return retval;
 }
 
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index b2988f25e230..027e09986194 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
 	}
 }
 
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void)
 
 	spin_lock_irqsave(&global_cpu_snapshot_lock, flags);
 
-	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
-
 	this_cpu = raw_smp_processor_id();
 
-	__global_reg_self(tp, regs, this_cpu);
+	memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
+
+	if (include_self)
+		__global_reg_self(tp, regs, this_cpu);
 
 	smp_fetch_global_regs();
 
 	for_each_online_cpu(cpu) {
-		struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg;
+		struct global_reg_snapshot *gp;
+
+		if (!include_self && cpu == this_cpu)
+			continue;
+
+		gp = &global_cpu_snapshot[cpu].reg;
 
 		__global_reg_poll(gp);
 
@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void)
 
 static void sysrq_handle_globreg(int key)
 {
-	arch_trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace(true);
 }
 
 static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index cb6cfcd034cf..a80cbb88ea91 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector);
 extern void init_ISA_irqs(void);
 
 #ifdef CONFIG_X86_LOCAL_APIC
-void arch_trigger_all_cpu_backtrace(void);
+void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 #endif
 
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index c3fcb5de5083..6a1e71bde323 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
 /* "in progress" flag of arch_trigger_all_cpu_backtrace */
 static unsigned long backtrace_flag;
 
-void arch_trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(bool include_self)
 {
 	int i;
+	int cpu = get_cpu();
 
-	if (test_and_set_bit(0, &backtrace_flag))
+	if (test_and_set_bit(0, &backtrace_flag)) {
 		/*
 		 * If there is already a trigger_all_cpu_backtrace() in progress
 		 * (backtrace_flag == 1), don't output double cpu dump infos.
 		 */
+		put_cpu();
 		return;
+	}
 
 	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+	if (!include_self)
+		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
 
-	printk(KERN_INFO "sending NMI to all CPUs:\n");
-	apic->send_IPI_all(NMI_VECTOR);
+	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+		pr_info("sending NMI to %s CPUs:\n",
+			(include_self ? "all" : "other"));
+		apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
+	}
 
 	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
 	for (i = 0; i < 10 * 1000; i++) {
 		if (cpumask_empty(to_cpumask(backtrace_mask)))
 			break;
 		mdelay(1);
+		touch_softlockup_watchdog();
 	}
 
 	clear_bit(0, &backtrace_flag);
 	smp_mb__after_atomic();
+	put_cpu();
 }
 
 static int
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 6a45fb583ff1..a17ab6398d7c 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void)
 #ifdef arch_trigger_all_cpu_backtrace
 static inline bool trigger_all_cpu_backtrace(void)
 {
-	arch_trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace(true);
 
 	return true;
 }
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+	arch_trigger_all_cpu_backtrace(false);
+	return true;
+}
 #else
 static inline bool trigger_all_cpu_backtrace(void)
 {
 	return false;
 }
+static inline bool trigger_allbutself_cpu_backtrace(void)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_LOCKUP_DETECTOR
-- 
cgit 


From ed235875e2ca983197831337a986f0517074e1a0 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Mon, 23 Jun 2014 13:22:05 -0700
Subject: kernel/watchdog.c: print traces for all cpus on lockup detection

A 'softlockup' is defined as a bug that causes the kernel to loop in
kernel mode for more than a predefined period to time, without giving
other tasks a chance to run.

Currently, upon detection of this condition by the per-cpu watchdog
task, debug information (including a stack trace) is sent to the system
log.

On some occasions, we have observed that the "victim" rather than the
actual "culprit" (i.e.  the owner/holder of the contended resource) is
reported to the user.  Often this information has proven to be
insufficient to assist debugging efforts.

To avoid loss of useful debug information, for architectures which
support NMI, this patch makes it possible to improve soft lockup
reporting.  This is accomplished by issuing an NMI to each cpu to obtain
a stack trace.

If NMI is not supported we just revert back to the old method.  A sysctl
and boot-time parameter is available to toggle this feature.

[dzickus@redhat.com: add CONFIG_SMP in certain areas]
[akpm@linux-foundation.org: additional CONFIG_SMP=n optimisations]
[mq@suse.cz: fix warning]
Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Mateusz Guzik <mguzik@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Jan Moskyto Matejka <mq@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  5 +++++
 Documentation/sysctl/kernel.txt     | 17 ++++++++++++++++
 include/linux/nmi.h                 |  1 +
 kernel/sysctl.c                     | 11 +++++++++++
 kernel/watchdog.c                   | 39 +++++++++++++++++++++++++++++++++++++
 5 files changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 884904975d0b..c1b9aa8c5a52 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			[KNL] Should the soft-lockup detector generate panics.
 			Format: <integer>
 
+	softlockup_all_cpu_backtrace=
+			[KNL] Should the soft-lockup detector generate
+			backtraces on all cpus.
+			Format: <integer>
+
 	sonypi.*=	[HW] Sony Programmable I/O Control Device driver
 			See Documentation/laptops/sonypi.txt
 
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 708bb7f1b7e0..c14374e71775 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -75,6 +75,7 @@ show up in /proc/sys/kernel:
 - shmall
 - shmmax                      [ sysv ipc ]
 - shmmni
+- softlockup_all_cpu_backtrace
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/sysrq.txt
 - sysctl_writes_strict
@@ -783,6 +784,22 @@ via the /proc/sys interface:
 
 ==============================================================
 
+softlockup_all_cpu_backtrace:
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+==============================================================
+
 tainted:
 
 Non-zero if the kernel has been tainted.  Numeric values, which
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index a17ab6398d7c..447775ee2c4b 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -57,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int watchdog_user_enabled;
 extern int watchdog_thresh;
+extern int sysctl_softlockup_all_cpu_backtrace;
 struct ctl_table;
 extern int proc_dowatchdog(struct ctl_table *, int ,
 			   void __user *, size_t *, loff_t *);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 075d1903138f..75b22e22a72c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -860,6 +860,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "softlockup_all_cpu_backtrace",
+		.data		= &sysctl_softlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif /* CONFIG_SMP */
 	{
 		.procname       = "nmi_watchdog",
 		.data           = &watchdog_user_enabled,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 30e482240dae..c3319bd1b040 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -31,6 +31,12 @@
 
 int watchdog_user_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
+#ifdef CONFIG_SMP
+int __read_mostly sysctl_softlockup_all_cpu_backtrace;
+#else
+#define sysctl_softlockup_all_cpu_backtrace 0
+#endif
+
 static int __read_mostly watchdog_running;
 static u64 __read_mostly sample_period;
 
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 #endif
+static unsigned long soft_lockup_nmi_warn;
 
 /* boot commands */
 /*
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
 }
 __setup("nosoftlockup", nosoftlockup_setup);
 /*  */
+#ifdef CONFIG_SMP
+static int __init softlockup_all_cpu_backtrace_setup(char *str)
+{
+	sysctl_softlockup_all_cpu_backtrace =
+		!!simple_strtol(str, NULL, 0);
+	return 1;
+}
+__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#endif
 
 /*
  * Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
+	int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
 
 	/* kick the hardlockup detector */
 	watchdog_interrupt_count();
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		if (__this_cpu_read(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
 
+		if (softlockup_all_cpu_backtrace) {
+			/* Prevent multiple soft-lockup reports if one cpu is already
+			 * engaged in dumping cpu back traces
+			 */
+			if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
+				/* Someone else will report us. Let's give up */
+				__this_cpu_write(soft_watchdog_warn, true);
+				return HRTIMER_RESTART;
+			}
+		}
+
 		printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
 			smp_processor_id(), duration,
 			current->comm, task_pid_nr(current));
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 		else
 			dump_stack();
 
+		if (softlockup_all_cpu_backtrace) {
+			/* Avoid generating two back traces for current
+			 * given that one is already made above
+			 */
+			trigger_allbutself_cpu_backtrace();
+
+			clear_bit(0, &soft_lockup_nmi_warn);
+			/* Barrier to sync with other cpus */
+			smp_mb__after_atomic();
+		}
+
 		if (softlockup_panic)
 			panic("softlockup: hung tasks");
 		__this_cpu_write(soft_watchdog_warn, true);
-- 
cgit 


From 3a4b0eda8e4b27e6aca86f9f4d327c1070815e30 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Tue, 24 Jun 2014 18:10:26 +0800
Subject: bio: remove unused macro bip_vec_idx()

Macro bip_vec_idx() was used by bio integrity originally, but no longer
used now. So remove it.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/bio.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5a645769f020..f91decbca96b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -644,10 +644,6 @@ struct biovec_slab {
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
-
-
-#define bip_vec_idx(bip, idx)	(&(bip->bip_vec[(idx)]))
-
 #define bip_for_each_vec(bvl, bip, iter)				\
 	for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
 
-- 
cgit 


From 66cb45aa41315d1d9972cada354fbdf7870d7714 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Tue, 24 Jun 2014 16:22:24 -0600
Subject: block: add support for limiting gaps in SG lists

Another restriction inherited for NVMe - those devices don't support
SG lists that have "gaps" in them. Gaps refers to cases where the
previous SG entry doesn't end on a page boundary. For NVMe, all SG
entries must start at offset 0 (except the first) and end on a page
boundary (except the last).

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c            |  8 ++++++++
 block/blk-merge.c      | 10 ++++++++++
 include/linux/bio.h    |  9 +++++++++
 include/linux/blkdev.h |  1 +
 4 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 8c2e55e39a1b..0ec61c9e536c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -746,6 +746,14 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 
 			goto done;
 		}
+
+		/*
+		 * If the queue doesn't support SG gaps and adding this
+		 * offset would create a gap, disallow it.
+		 */
+		if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS) &&
+		    bvec_gap_to_prev(prev, offset))
+			return 0;
 	}
 
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b3bf0df0f4c2..54535831f1e1 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -568,6 +568,8 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
 
 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 {
+	struct request_queue *q = rq->q;
+
 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
 		return false;
 
@@ -591,6 +593,14 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	    !blk_write_same_mergeable(rq->bio, bio))
 		return false;
 
+	if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) {
+		struct bio_vec *bprev;
+
+		bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1];
+		if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset))
+			return false;
+	}
+
 	return true;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f91decbca96b..d2633ee099d9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -186,6 +186,15 @@ static inline void *bio_data(struct bio *bio)
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
 	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
 
+/*
+ * Check if adding a bio_vec after bprv with offset would create a gap in
+ * the SG list. Most drivers don't care about this, but some do.
+ */
+static inline bool bvec_gap_to_prev(struct bio_vec *bprv, unsigned int offset)
+{
+	return offset || ((bprv->bv_offset + bprv->bv_len) & (PAGE_SIZE - 1));
+}
+
 #define bio_io_error(bio) bio_endio((bio), -EIO)
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 713f8b62b435..8699bcf5f099 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -512,6 +512,7 @@ struct request_queue {
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
 #define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_SG_GAPS     22	/* queue doesn't support SG gaps */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit 


From 0b86dbf675e0170a191a9ca18e5e99fd39a678c0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 23 Jun 2014 08:44:40 +0100
Subject: Fix 32-bit regression in block device read(2)

blkdev_read_iter() wants to cap the iov_iter by the amount of data
remaining to the end of device.  That's what iov_iter_truncate() is for
(trim iter->count if it's above the given limit).  So far, so good, but
the argument of iov_iter_truncate() is size_t, so on 32bit boxen (in
case of a large device) we end up with that upper limit truncated down
to 32 bits *before* comparing it with iter->count.

Easily fixed by making iov_iter_truncate() take 64bit argument - it does
the right thing after such change (we only reach the assignment in there
when the current value of iter->count is greater than the limit, i.e.
for anything that would get truncated we don't reach the assignment at
all) and that argument is not the new value of iter->count - it's an
upper limit for such.

The overhead of passing u64 is not an issue - the thing is inlined, so
callers passing size_t won't pay any penalty.

Reported-and-tested-by: Theodore Tso <tytso@mit.edu>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Tested-by: Alan Cox <gnomes@lxorguk.ukuu.org.uk>
Tested-by: Bruno Wolff III <bruno@wolff.to>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uio.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index e2231e47cec1..d54985e0705e 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -94,8 +94,20 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
-static inline void iov_iter_truncate(struct iov_iter *i, size_t count)
+/*
+ * Cap the iov_iter by given limit; note that the second argument is
+ * *not* the new size - it's upper limit for such.  Passing it a value
+ * greater than the amount of data in iov_iter is fine - it'll just do
+ * nothing in that case.
+ */
+static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
 {
+	/*
+	 * count doesn't have to fit in size_t - comparison extends both
+	 * operands to u64 here and any value that would be truncated by
+	 * conversion in assignement is by definition greater than all
+	 * values of size_t, including old i->count.
+	 */
 	if (i->count > count)
 		i->count = count;
 }
-- 
cgit 


From ac5ccdba3a1659b3517e7e99ef7d35a6a2d77cf4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 19 Jun 2014 21:22:56 +0300
Subject: iovec: move memcpy_from/toiovecend to lib/iovec.c

ERROR: "memcpy_fromiovecend" [drivers/vhost/vhost_scsi.ko] undefined!

commit 9f977ef7b671f6169eca78bf40f230fe84b7c7e5
    vhost-scsi: Include prot_bytes into expected data transfer length
in target-pending makes drivers/vhost/scsi.c call memcpy_fromiovecend().
This function is not available when CONFIG_NET is not enabled.

socket.h already includes uio.h, so no callers need updating.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 include/linux/socket.h |  4 ----
 include/linux/uio.h    |  5 ++++-
 lib/iovec.c            | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/iovec.c       | 55 --------------------------------------------------
 4 files changed, 59 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8e98297f1388..ec538fc287a6 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -305,8 +305,6 @@ struct ucred {
 /* IPX options */
 #define IPX_TYPE	1
 
-extern int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-			       int offset, int len);
 extern int csum_partial_copy_fromiovecend(unsigned char *kdata, 
 					  struct iovec *iov, 
 					  int offset, 
@@ -315,8 +313,6 @@ extern unsigned long iov_pages(const struct iovec *iov, int offset,
 			       unsigned long nr_segs);
 
 extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode);
-extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
-			     int offset, int len);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index e2231e47cec1..04c8c4bb4927 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -111,6 +111,9 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
-
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len);
+int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
+		      int offset, int len);
 
 #endif
diff --git a/lib/iovec.c b/lib/iovec.c
index 454baa88bf27..7a7c2da4cddf 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -51,3 +51,58 @@ int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
 	return 0;
 }
 EXPORT_SYMBOL(memcpy_toiovec);
+
+/*
+ *	Copy kernel to iovec. Returns -EFAULT on error.
+ */
+
+int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
+		      int offset, int len)
+{
+	int copy;
+	for (; len > 0; ++iov) {
+		/* Skip over the finished iovecs */
+		if (unlikely(offset >= iov->iov_len)) {
+			offset -= iov->iov_len;
+			continue;
+		}
+		copy = min_t(unsigned int, iov->iov_len - offset, len);
+		if (copy_to_user(iov->iov_base + offset, kdata, copy))
+			return -EFAULT;
+		offset = 0;
+		kdata += copy;
+		len -= copy;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_toiovecend);
+
+/*
+ *	Copy iovec to kernel. Returns -EFAULT on error.
+ */
+
+int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
+			int offset, int len)
+{
+	/* Skip over the finished iovecs */
+	while (offset >= iov->iov_len) {
+		offset -= iov->iov_len;
+		iov++;
+	}
+
+	while (len > 0) {
+		u8 __user *base = iov->iov_base + offset;
+		int copy = min_t(unsigned int, len, iov->iov_len - offset);
+
+		offset = 0;
+		if (copy_from_user(kdata, base, copy))
+			return -EFAULT;
+		len -= copy;
+		kdata += copy;
+		iov++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(memcpy_fromiovecend);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index b61869429f4c..827dd6beb49c 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -74,61 +74,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
 	return err;
 }
 
-/*
- *	Copy kernel to iovec. Returns -EFAULT on error.
- */
-
-int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
-		      int offset, int len)
-{
-	int copy;
-	for (; len > 0; ++iov) {
-		/* Skip over the finished iovecs */
-		if (unlikely(offset >= iov->iov_len)) {
-			offset -= iov->iov_len;
-			continue;
-		}
-		copy = min_t(unsigned int, iov->iov_len - offset, len);
-		if (copy_to_user(iov->iov_base + offset, kdata, copy))
-			return -EFAULT;
-		offset = 0;
-		kdata += copy;
-		len -= copy;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovecend);
-
-/*
- *	Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-			int offset, int len)
-{
-	/* Skip over the finished iovecs */
-	while (offset >= iov->iov_len) {
-		offset -= iov->iov_len;
-		iov++;
-	}
-
-	while (len > 0) {
-		u8 __user *base = iov->iov_base + offset;
-		int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
-		offset = 0;
-		if (copy_from_user(kdata, base, copy))
-			return -EFAULT;
-		len -= copy;
-		kdata += copy;
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-
 /*
  *	And now for the all-in-one: copy and checksum from a user iovec
  *	directly to a datagram
-- 
cgit 


From 4e26445faad366d67d7723622bf6a60a6f0f5993 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Mon, 30 Jun 2014 11:50:28 +0800
Subject: kernfs: introduce kernfs_pin_sb()

kernfs_pin_sb() tries to get a refcnt of the superblock.

This will be used by cgroupfs.

v2:
- make kernfs_pin_sb() return the superblock.
- drop kernfs_drop_sb().

tj: Updated the comment a bit.

[ This is a prerequisite for a bugfix. ]
Cc: <stable@vger.kernel.org> # 3.15
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Li Zefan <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/kernfs/mount.c      | 30 ++++++++++++++++++++++++++++++
 include/linux/kernfs.h |  1 +
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d171b98a6cdd..f973ae9b05f1 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -211,6 +211,36 @@ void kernfs_kill_sb(struct super_block *sb)
 	kernfs_put(root_kn);
 }
 
+/**
+ * kernfs_pin_sb: try to pin the superblock associated with a kernfs_root
+ * @kernfs_root: the kernfs_root in question
+ * @ns: the namespace tag
+ *
+ * Pin the superblock so the superblock won't be destroyed in subsequent
+ * operations.  This can be used to block ->kill_sb() which may be useful
+ * for kernfs users which dynamically manage superblocks.
+ *
+ * Returns NULL if there's no superblock associated to this kernfs_root, or
+ * -EINVAL if the superblock is being freed.
+ */
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns)
+{
+	struct kernfs_super_info *info;
+	struct super_block *sb = NULL;
+
+	mutex_lock(&kernfs_mutex);
+	list_for_each_entry(info, &root->supers, node) {
+		if (info->ns == ns) {
+			sb = info->sb;
+			if (!atomic_inc_not_zero(&info->sb->s_active))
+				sb = ERR_PTR(-EINVAL);
+			break;
+		}
+	}
+	mutex_unlock(&kernfs_mutex);
+	return sb;
+}
+
 void __init kernfs_init(void)
 {
 	kernfs_node_cache = kmem_cache_create("kernfs_node_cache",
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 17aa1cce6f8e..20f493564917 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -304,6 +304,7 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
 			       struct kernfs_root *root, unsigned long magic,
 			       bool *new_sb_created, const void *ns);
 void kernfs_kill_sb(struct super_block *sb);
+struct super_block *kernfs_pin_sb(struct kernfs_root *root, const void *ns);
 
 void kernfs_init(void);
 
-- 
cgit 


From b14bf2d0c0358140041d1c1805a674376964d0e0 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 30 Jun 2014 11:04:21 -0400
Subject: usb-storage/SCSI: Add broken_fua blacklist flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some buggy JMicron USB-ATA bridges don't know how to translate the FUA
bit in READs or WRITEs.  This patch adds an entry in unusual_devs.h
and a blacklist flag to tell the sd driver not to use FUA.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Reported-by: Michael Büsch <m@bues.ch>
Tested-by: Michael Büsch <m@bues.ch>
Acked-by: James Bottomley <James.Bottomley@HansenPartnership.com>
CC: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
CC: <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/sd.c                  | 5 ++++-
 drivers/usb/storage/scsiglue.c     | 4 ++++
 drivers/usb/storage/unusual_devs.h | 7 +++++++
 include/linux/usb_usual.h          | 4 +++-
 include/scsi/scsi_device.h         | 1 +
 5 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index e9689d57ccb6..6825eda1114a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2441,7 +2441,10 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
 		}
 
 		sdkp->DPOFUA = (data.device_specific & 0x10) != 0;
-		if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
+		if (sdp->broken_fua) {
+			sd_first_printk(KERN_NOTICE, sdkp, "Disabling FUA\n");
+			sdkp->DPOFUA = 0;
+		} else if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
 			sd_first_printk(KERN_NOTICE, sdkp,
 				  "Uses READ/WRITE(6), disabling FUA\n");
 			sdkp->DPOFUA = 0;
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 9d38ddc8da49..866b5df36ed1 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -256,6 +256,10 @@ static int slave_configure(struct scsi_device *sdev)
 		if (us->fflags & US_FL_WRITE_CACHE)
 			sdev->wce_default_on = 1;
 
+		/* A few buggy USB-ATA bridges don't understand FUA */
+		if (us->fflags & US_FL_BROKEN_FUA)
+			sdev->broken_fua = 1;
+
 	} else {
 
 		/* Non-disk-type devices don't need to blacklist any pages
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 174a447868cd..80a5b366255f 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1936,6 +1936,13 @@ UNUSUAL_DEV(  0x14cd, 0x6600, 0x0201, 0x0201,
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_IGNORE_RESIDUE ),
 
+/* Reported by Michael Büsch <m@bues.ch> */
+UNUSUAL_DEV(  0x152d, 0x0567, 0x0114, 0x0114,
+		"JMicron",
+		"USB to ATA/ATAPI Bridge",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_BROKEN_FUA ),
+
 /* Reported by Alexandre Oliva <oliva@lsd.ic.unicamp.br>
  * JMicron responds to USN and several other SCSI ioctls with a
  * residue that causes subsequent I/O requests to fail.  */
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 1a64b26046ed..9b7de1b46437 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -70,7 +70,9 @@
 	US_FLAG(NEEDS_CAP16,	0x00400000)			\
 		/* cannot handle READ_CAPACITY_10 */		\
 	US_FLAG(IGNORE_UAS,	0x00800000)			\
-		/* Device advertises UAS but it is broken */
+		/* Device advertises UAS but it is broken */	\
+	US_FLAG(BROKEN_FUA,	0x01000000)			\
+		/* Cannot handle FUA in WRITE or READ CDBs */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 5853c913d2b0..27ab31017f09 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -173,6 +173,7 @@ struct scsi_device {
 	unsigned is_visible:1;	/* is the device visible in sysfs */
 	unsigned wce_default_on:1;	/* Cache is ON by default */
 	unsigned no_dif:1;	/* T10 PI (DIF) should be disabled */
+	unsigned broken_fua:1;		/* Don't set FUA bit */
 
 	atomic_t disk_events_disable_depth; /* disable depth for disk events */
 
-- 
cgit 


From 330d282216d6e4d845a21b72572dc4df4122e8fa Mon Sep 17 00:00:00 2001
From: Zhengyu He <hzy@google.com>
Date: Tue, 1 Jul 2014 12:11:47 -0700
Subject: core: fix typo in percpu read_mostly section

This fixes a typo that named the read_mostly section of percpu as
readmostly. It works fine with SMP because the linker script specifies
.data..percpu..readmostly. However, UP kernel builds don't have percpu
sections defined and the non-percpu version of the section is called
data..read_mostly, so .data..readmostly will float around and may break
things unexpectedly.

Looking at the original change that introduced data..percpu..readmostly
(commit c957ef2c59e952803766ddc22e89981ab534606f), it looks like this
was the original intention.

Tested: Built UP kernel and confirmed the sections got merged.

- Before the patch:
$ objdump -h vmlinux.o  | grep '\.data\.\.read.*mostly'
38 .data..read_mostly 00004418  0000000000000000  0000000000000000  00431ac0  2**6
50 .data..readmostly 00000014  0000000000000000  0000000000000000  00444000  2**3

- After the patch:
$ objdump -h vmlinux.o  | grep '\.data\.\.read.*mostly'
38 .data..read_mostly 00004438  0000000000000000  0000000000000000  00431ac0  2**6

Signed-off-by: Zhengyu He <hzy@google.com>
Signed-off-by: Filipe Brandenburger <filbranden@google.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 include/linux/percpu-defs.h       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 471ba48c7ae4..c1c0b0cf39b4 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -693,7 +693,7 @@
 	. = ALIGN(PAGE_SIZE);						\
 	*(.data..percpu..page_aligned)					\
 	. = ALIGN(cacheline);						\
-	*(.data..percpu..readmostly)					\
+	*(.data..percpu..read_mostly)					\
 	. = ALIGN(cacheline);						\
 	*(.data..percpu)						\
 	*(.data..percpu..shared_aligned)				\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index a5fc7d01aad6..dec01d6c3f80 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -146,10 +146,10 @@
  * Declaration/definition used for per-CPU variables that must be read mostly.
  */
 #define DECLARE_PER_CPU_READ_MOSTLY(type, name)			\
-	DECLARE_PER_CPU_SECTION(type, name, "..readmostly")
+	DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
 
 #define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
-	DEFINE_PER_CPU_SECTION(type, name, "..readmostly")
+	DEFINE_PER_CPU_SECTION(type, name, "..read_mostly")
 
 /*
  * Intermodule exports for per-CPU variables.  sparse forgets about
-- 
cgit 


From ecca47ce8294843045e7465d76fee84dbf07a004 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Jul 2014 16:41:03 -0400
Subject: kernfs: kernfs_notify() must be useable from non-sleepable contexts

d911d9874801 ("kernfs: make kernfs_notify() trigger inotify events
too") added fsnotify triggering to kernfs_notify() which requires a
sleepable context.  There are already existing users of
kernfs_notify() which invoke it from an atomic context and in general
it's silly to require a sleepable context for triggering a
notification.

The following is an invalid context bug triggerd by md invoking
sysfs_notify() from IO completion path.

 BUG: sleeping function called from invalid context at kernel/locking/mutex.c:586
 in_atomic(): 1, irqs_disabled(): 1, pid: 0, name: swapper/1
 2 locks held by swapper/1/0:
  #0:  (&(&vblk->vq_lock)->rlock){-.-...}, at: [<ffffffffa0039042>] virtblk_done+0x42/0xe0 [virtio_blk]
  #1:  (&(&bitmap->counts.lock)->rlock){-.....}, at: [<ffffffff81633718>] bitmap_endwrite+0x68/0x240
 irq event stamp: 33518
 hardirqs last  enabled at (33515): [<ffffffff8102544f>] default_idle+0x1f/0x230
 hardirqs last disabled at (33516): [<ffffffff818122ed>] common_interrupt+0x6d/0x72
 softirqs last  enabled at (33518): [<ffffffff810a1272>] _local_bh_enable+0x22/0x50
 softirqs last disabled at (33517): [<ffffffff810a29e0>] irq_enter+0x60/0x80
 CPU: 1 PID: 0 Comm: swapper/1 Not tainted 3.16.0-0.rc2.git2.1.fc21.x86_64 #1
 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
  0000000000000000 f90db13964f4ee05 ffff88007d403b80 ffffffff81807b4c
  0000000000000000 ffff88007d403ba8 ffffffff810d4f14 0000000000000000
  0000000000441800 ffff880078fa1780 ffff88007d403c38 ffffffff8180caf2
 Call Trace:
  <IRQ>  [<ffffffff81807b4c>] dump_stack+0x4d/0x66
  [<ffffffff810d4f14>] __might_sleep+0x184/0x240
  [<ffffffff8180caf2>] mutex_lock_nested+0x42/0x440
  [<ffffffff812d76a0>] kernfs_notify+0x90/0x150
  [<ffffffff8163377c>] bitmap_endwrite+0xcc/0x240
  [<ffffffffa00de863>] close_write+0x93/0xb0 [raid1]
  [<ffffffffa00df029>] r1_bio_write_done+0x29/0x50 [raid1]
  [<ffffffffa00e0474>] raid1_end_write_request+0xe4/0x260 [raid1]
  [<ffffffff813acb8b>] bio_endio+0x6b/0xa0
  [<ffffffff813b46c4>] blk_update_request+0x94/0x420
  [<ffffffff813bf0ea>] blk_mq_end_io+0x1a/0x70
  [<ffffffffa00392c2>] virtblk_request_done+0x32/0x80 [virtio_blk]
  [<ffffffff813c0648>] __blk_mq_complete_request+0x88/0x120
  [<ffffffff813c070a>] blk_mq_complete_request+0x2a/0x30
  [<ffffffffa0039066>] virtblk_done+0x66/0xe0 [virtio_blk]
  [<ffffffffa002535a>] vring_interrupt+0x3a/0xa0 [virtio_ring]
  [<ffffffff81116177>] handle_irq_event_percpu+0x77/0x340
  [<ffffffff8111647d>] handle_irq_event+0x3d/0x60
  [<ffffffff81119436>] handle_edge_irq+0x66/0x130
  [<ffffffff8101c3e4>] handle_irq+0x84/0x150
  [<ffffffff818146ad>] do_IRQ+0x4d/0xe0
  [<ffffffff818122f2>] common_interrupt+0x72/0x72
  <EOI>  [<ffffffff8105f706>] ? native_safe_halt+0x6/0x10
  [<ffffffff81025454>] default_idle+0x24/0x230
  [<ffffffff81025f9f>] arch_cpu_idle+0xf/0x20
  [<ffffffff810f5adc>] cpu_startup_entry+0x37c/0x7b0
  [<ffffffff8104df1b>] start_secondary+0x25b/0x300

This patch fixes it by punting the notification delivery through a
work item.  This ends up adding an extra pointer to kernfs_elem_attr
enlarging kernfs_node by a pointer, which is not ideal but not a very
big deal either.  If this turns out to be an actual issue, we can move
kernfs_elem_attr->size to kernfs_node->iattr later.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Josh Boyer <jwboyer@fedoraproject.org>
Cc: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/file.c       | 69 ++++++++++++++++++++++++++++++++++++++++----------
 include/linux/kernfs.h |  1 +
 2 files changed, 56 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3d37f607f97..d895b4b7b661 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -39,6 +39,19 @@ struct kernfs_open_node {
 	struct list_head	files; /* goes through kernfs_open_file.list */
 };
 
+/*
+ * kernfs_notify() may be called from any context and bounces notifications
+ * through a work item.  To minimize space overhead in kernfs_node, the
+ * pending queue is implemented as a singly linked list of kernfs_nodes.
+ * The list is terminated with the self pointer so that whether a
+ * kernfs_node is on the list or not can be determined by testing the next
+ * pointer for NULL.
+ */
+#define KERNFS_NOTIFY_EOL			((void *)&kernfs_notify_list)
+
+static DEFINE_SPINLOCK(kernfs_notify_lock);
+static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
+
 static struct kernfs_open_file *kernfs_of(struct file *file)
 {
 	return ((struct seq_file *)file->private_data)->private;
@@ -783,24 +796,25 @@ static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait)
 	return DEFAULT_POLLMASK|POLLERR|POLLPRI;
 }
 
-/**
- * kernfs_notify - notify a kernfs file
- * @kn: file to notify
- *
- * Notify @kn such that poll(2) on @kn wakes up.
- */
-void kernfs_notify(struct kernfs_node *kn)
+static void kernfs_notify_workfn(struct work_struct *work)
 {
-	struct kernfs_root *root = kernfs_root(kn);
+	struct kernfs_node *kn;
 	struct kernfs_open_node *on;
 	struct kernfs_super_info *info;
-	unsigned long flags;
-
-	if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+repeat:
+	/* pop one off the notify_list */
+	spin_lock_irq(&kernfs_notify_lock);
+	kn = kernfs_notify_list;
+	if (kn == KERNFS_NOTIFY_EOL) {
+		spin_unlock_irq(&kernfs_notify_lock);
 		return;
+	}
+	kernfs_notify_list = kn->attr.notify_next;
+	kn->attr.notify_next = NULL;
+	spin_unlock_irq(&kernfs_notify_lock);
 
 	/* kick poll */
-	spin_lock_irqsave(&kernfs_open_node_lock, flags);
+	spin_lock_irq(&kernfs_open_node_lock);
 
 	on = kn->attr.open;
 	if (on) {
@@ -808,12 +822,12 @@ void kernfs_notify(struct kernfs_node *kn)
 		wake_up_interruptible(&on->poll);
 	}
 
-	spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+	spin_unlock_irq(&kernfs_open_node_lock);
 
 	/* kick fsnotify */
 	mutex_lock(&kernfs_mutex);
 
-	list_for_each_entry(info, &root->supers, node) {
+	list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
 		struct inode *inode;
 		struct dentry *dentry;
 
@@ -833,6 +847,33 @@ void kernfs_notify(struct kernfs_node *kn)
 	}
 
 	mutex_unlock(&kernfs_mutex);
+	kernfs_put(kn);
+	goto repeat;
+}
+
+/**
+ * kernfs_notify - notify a kernfs file
+ * @kn: file to notify
+ *
+ * Notify @kn such that poll(2) on @kn wakes up.  Maybe be called from any
+ * context.
+ */
+void kernfs_notify(struct kernfs_node *kn)
+{
+	static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
+	unsigned long flags;
+
+	if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
+		return;
+
+	spin_lock_irqsave(&kernfs_notify_lock, flags);
+	if (!kn->attr.notify_next) {
+		kernfs_get(kn);
+		kn->attr.notify_next = kernfs_notify_list;
+		kernfs_notify_list = kn;
+		schedule_work(&kernfs_notify_work);
+	}
+	spin_unlock_irqrestore(&kernfs_notify_lock, flags);
 }
 EXPORT_SYMBOL_GPL(kernfs_notify);
 
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 17aa1cce6f8e..145375ea0bd9 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -91,6 +91,7 @@ struct kernfs_elem_attr {
 	const struct kernfs_ops	*ops;
 	struct kernfs_open_node	*open;
 	loff_t			size;
+	struct kernfs_node	*notify_next;	/* for kernfs_notify() */
 };
 
 /*
-- 
cgit 


From b9cd18de4db3c9ffa7e17b0dc0ca99ed5aa4d43a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Jul 2014 15:43:15 -0400
Subject: ptrace,x86: force IRET path after a ptrace_stop()

The 'sysret' fastpath does not correctly restore even all regular
registers, much less any segment registers or reflags values.  That is
very much part of why it's faster than 'iret'.

Normally that isn't a problem, because the normal ptrace() interface
catches the process using the signal handler infrastructure, which
always returns with an iret.

However, some paths can get caught using ptrace_event() instead of the
signal path, and for those we need to make sure that we aren't going to
return to user space using 'sysret'.  Otherwise the modifications that
may have been done to the register set by the tracer wouldn't
necessarily take effect.

Fix it by forcing IRET path by setting TIF_NOTIFY_RESUME from
arch_ptrace_stop_needed() which is invoked from ptrace_stop().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/ptrace.h | 16 ++++++++++++++++
 include/linux/ptrace.h        |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 14fd6fd75a19..6205f0c434db 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -231,6 +231,22 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 
 #define ARCH_HAS_USER_SINGLE_STEP_INFO
 
+/*
+ * When hitting ptrace_stop(), we cannot return using SYSRET because
+ * that does not restore the full CPU state, only a minimal set.  The
+ * ptracer can change arbitrary register values, which is usually okay
+ * because the usual ptrace stops run off the signal delivery path which
+ * forces IRET; however, ptrace_event() stops happen in arbitrary places
+ * in the kernel and don't force IRET path.
+ *
+ * So force IRET path after a ptrace stop.
+ */
+#define arch_ptrace_stop_needed(code, info)				\
+({									\
+	set_thread_flag(TIF_NOTIFY_RESUME);				\
+	false;								\
+})
+
 struct user_desc;
 extern int do_get_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info);
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 077904c8b70d..cc79eff4a1ad 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -334,6 +334,9 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
  * calling arch_ptrace_stop() when it would be superfluous.  For example,
  * if the thread has not been back to user mode since the last stop, the
  * thread state might indicate that nothing needs to be done.
+ *
+ * This is guaranteed to be invoked once before a task stops for ptrace and
+ * may include arch-specific operations necessary prior to a ptrace stop.
  */
 #define arch_ptrace_stop_needed(code, info)	(0)
 #endif
-- 
cgit