From 01df530c2791610727e345b3dd97ef75943c7320 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 6 Feb 2013 12:40:28 -0300
Subject: [media] bttv: convert to the control framework

Note that the private chroma agc control has been replaced with the standard
CHROMA_AGC control.
Also fixes a mute/automute problem where closing the file handle would force
mute on. That's not what you want since that would make the mute state out of
sync with the mute control. Instead check against the user count.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/v4l2-controls.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index dcd63745e83a..1d00ca9f0ba3 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -146,6 +146,11 @@ enum v4l2_colorfx {
  * of controls. We reserve 16 controls for this driver. */
 #define V4L2_CID_USER_MEYE_BASE			(V4L2_CID_USER_BASE + 0x1000)
 
+/* The base for the bttv driver controls.
+ * We reserve 32 controls for this driver. */
+#define V4L2_CID_USER_BTTV_BASE			(V4L2_CID_USER_BASE + 0x1010)
+
+
 /* MPEG-class control IDs */
 
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit 


From 53bf0f446bc387eabdd535dca080789cc74607f4 Mon Sep 17 00:00:00 2001
From: Kamil Debski <k.debski@samsung.com>
Date: Fri, 25 Jan 2013 06:29:56 -0300
Subject: [media] v4l: Define video buffer flag for the COPY timestamp type

Define video buffer flag for the COPY timestamp. In this case the timestamp
value is copied from the OUTPUT to the corresponding CAPTURE buffer.

Signed-off-by: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/io.xml | 6 ++++++
 include/uapi/linux/videodev2.h         | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
index e6c58559ca6b..2c4c068dde83 100644
--- a/Documentation/DocBook/media/v4l/io.xml
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -1145,6 +1145,12 @@ in which case caches have not been used.</entry>
 	    same clock outside V4L2, use
 	    <function>clock_gettime(2)</function> .</entry>
 	  </row>
+	  <row>
+	    <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_COPY</constant></entry>
+	    <entry>0x4000</entry>
+	    <entry>The CAPTURE buffer timestamp has been taken from the
+	    corresponding OUTPUT buffer. This flag applies only to mem2mem devices.</entry>
+	  </row>
 	</tbody>
       </tgroup>
     </table>
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 234d1d870914..b5f5cddcf1c3 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -705,6 +705,7 @@ struct v4l2_buffer {
 #define V4L2_BUF_FLAG_TIMESTAMP_MASK		0xe000
 #define V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN		0x0000
 #define V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC	0x2000
+#define V4L2_BUF_FLAG_TIMESTAMP_COPY		0x4000
 
 /**
  * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor
-- 
cgit 


From 192f1e78cb9cbc1a2cee866f5e03a52857e648b6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 Feb 2013 05:51:21 -0300
Subject: [media] s2255: convert to the control framework

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/usb/s2255/s2255drv.c | 172 ++++++++++++-------------------------
 include/uapi/linux/v4l2-controls.h |   4 +
 2 files changed, 59 insertions(+), 117 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c
index 498c57ea5d32..2dcb29b647f0 100644
--- a/drivers/media/usb/s2255/s2255drv.c
+++ b/drivers/media/usb/s2255/s2255drv.c
@@ -47,6 +47,7 @@
 #include <media/v4l2-common.h>
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
+#include <media/v4l2-ctrls.h>
 #include <linux/vmalloc.h>
 #include <linux/usb.h>
 
@@ -217,6 +218,7 @@ struct s2255_dev;
 
 struct s2255_channel {
 	struct video_device	vdev;
+	struct v4l2_ctrl_handler hdl;
 	int			resources;
 	struct s2255_dmaqueue	vidq;
 	struct s2255_bufferi	buffer;
@@ -336,7 +338,7 @@ struct s2255_fh {
  */
 #define S2255_V4L2_YC_ON  1
 #define S2255_V4L2_YC_OFF 0
-#define V4L2_CID_PRIVATE_COLORFILTER (V4L2_CID_PRIVATE_BASE + 0)
+#define V4L2_CID_S2255_COLORFILTER (V4L2_CID_USER_S2255_BASE + 0)
 
 /* frame prefix size (sent once every frame) */
 #define PREFIX_SIZE		512
@@ -810,28 +812,6 @@ static void res_free(struct s2255_fh *fh)
 	dprintk(1, "res: put\n");
 }
 
-static int vidioc_querymenu(struct file *file, void *priv,
-			    struct v4l2_querymenu *qmenu)
-{
-	static const char *colorfilter[] = {
-		"Off",
-		"On",
-		NULL
-	};
-	if (qmenu->id == V4L2_CID_PRIVATE_COLORFILTER) {
-		int i;
-		const char **menu_items = colorfilter;
-		for (i = 0; i < qmenu->index && menu_items[i]; i++)
-			; /* do nothing (from v4l2-common.c) */
-		if (menu_items[i] == NULL || menu_items[i][0] == '\0')
-			return -EINVAL;
-		strlcpy(qmenu->name, menu_items[qmenu->index],
-			sizeof(qmenu->name));
-		return 0;
-	}
-	return v4l2_ctrl_query_menu(qmenu, NULL, NULL);
-}
-
 static int vidioc_querycap(struct file *file, void *priv,
 			   struct v4l2_capability *cap)
 {
@@ -1427,109 +1407,32 @@ static int vidioc_s_input(struct file *file, void *priv, unsigned int i)
 	return 0;
 }
 
-/* --- controls ---------------------------------------------- */
-static int vidioc_queryctrl(struct file *file, void *priv,
-			    struct v4l2_queryctrl *qc)
+static int s2255_s_ctrl(struct v4l2_ctrl *ctrl)
 {
-	struct s2255_fh *fh = priv;
-	struct s2255_channel *channel = fh->channel;
-	struct s2255_dev *dev = fh->dev;
-	switch (qc->id) {
-	case V4L2_CID_BRIGHTNESS:
-		v4l2_ctrl_query_fill(qc, -127, 127, 1, DEF_BRIGHT);
-		break;
-	case V4L2_CID_CONTRAST:
-		v4l2_ctrl_query_fill(qc, 0, 255, 1, DEF_CONTRAST);
-		break;
-	case V4L2_CID_SATURATION:
-		v4l2_ctrl_query_fill(qc, 0, 255, 1, DEF_SATURATION);
-		break;
-	case V4L2_CID_HUE:
-		v4l2_ctrl_query_fill(qc, 0, 255, 1, DEF_HUE);
-		break;
-	case V4L2_CID_PRIVATE_COLORFILTER:
-		if (dev->dsp_fw_ver < S2255_MIN_DSP_COLORFILTER)
-			return -EINVAL;
-		if ((dev->pid == 0x2257) && (channel->idx > 1))
-			return -EINVAL;
-		strlcpy(qc->name, "Color Filter", sizeof(qc->name));
-		qc->type = V4L2_CTRL_TYPE_MENU;
-		qc->minimum = 0;
-		qc->maximum = 1;
-		qc->step = 1;
-		qc->default_value = 1;
-		qc->flags = 0;
-		break;
-	default:
-		return -EINVAL;
-	}
-	dprintk(4, "%s, id %d\n", __func__, qc->id);
-	return 0;
-}
-
-static int vidioc_g_ctrl(struct file *file, void *priv,
-			 struct v4l2_control *ctrl)
-{
-	struct s2255_fh *fh = priv;
-	struct s2255_dev *dev = fh->dev;
-	struct s2255_channel *channel = fh->channel;
-	switch (ctrl->id) {
-	case V4L2_CID_BRIGHTNESS:
-		ctrl->value = channel->mode.bright;
-		break;
-	case V4L2_CID_CONTRAST:
-		ctrl->value = channel->mode.contrast;
-		break;
-	case V4L2_CID_SATURATION:
-		ctrl->value = channel->mode.saturation;
-		break;
-	case V4L2_CID_HUE:
-		ctrl->value = channel->mode.hue;
-		break;
-	case V4L2_CID_PRIVATE_COLORFILTER:
-		if (dev->dsp_fw_ver < S2255_MIN_DSP_COLORFILTER)
-			return -EINVAL;
-		if ((dev->pid == 0x2257) && (channel->idx > 1))
-			return -EINVAL;
-		ctrl->value = !((channel->mode.color & MASK_INPUT_TYPE) >> 16);
-		break;
-	default:
-		return -EINVAL;
-	}
-	dprintk(4, "%s, id %d val %d\n", __func__, ctrl->id, ctrl->value);
-	return 0;
-}
-
-static int vidioc_s_ctrl(struct file *file, void *priv,
-			 struct v4l2_control *ctrl)
-{
-	struct s2255_fh *fh = priv;
-	struct s2255_channel *channel = fh->channel;
-	struct s2255_dev *dev = to_s2255_dev(channel->vdev.v4l2_dev);
+	struct s2255_channel *channel =
+		container_of(ctrl->handler, struct s2255_channel, hdl);
 	struct s2255_mode mode;
+
 	mode = channel->mode;
 	dprintk(4, "%s\n", __func__);
+
 	/* update the mode to the corresponding value */
 	switch (ctrl->id) {
 	case V4L2_CID_BRIGHTNESS:
-		mode.bright = ctrl->value;
+		mode.bright = ctrl->val;
 		break;
 	case V4L2_CID_CONTRAST:
-		mode.contrast = ctrl->value;
+		mode.contrast = ctrl->val;
 		break;
 	case V4L2_CID_HUE:
-		mode.hue = ctrl->value;
+		mode.hue = ctrl->val;
 		break;
 	case V4L2_CID_SATURATION:
-		mode.saturation = ctrl->value;
+		mode.saturation = ctrl->val;
 		break;
-	case V4L2_CID_PRIVATE_COLORFILTER:
-		if (dev->dsp_fw_ver < S2255_MIN_DSP_COLORFILTER)
-			return -EINVAL;
-		if ((dev->pid == 0x2257) && (channel->idx > 1))
-			return -EINVAL;
+	case V4L2_CID_S2255_COLORFILTER:
 		mode.color &= ~MASK_INPUT_TYPE;
-		mode.color |= ((ctrl->value ? 0 : 1) << 16);
+		mode.color |= !ctrl->val << 16;
 		break;
 	default:
 		return -EINVAL;
@@ -1539,7 +1442,7 @@ static int vidioc_s_ctrl(struct file *file, void *priv,
 	   some V4L programs restart stream unnecessarily
 	   after a s_crtl.
 	*/
-	s2255_set_mode(fh->channel, &mode);
+	s2255_set_mode(channel, &mode);
 	return 0;
 }
 
@@ -1886,7 +1789,6 @@ static const struct v4l2_file_operations s2255_fops_v4l = {
 };
 
 static const struct v4l2_ioctl_ops s2255_ioctl_ops = {
-	.vidioc_querymenu = vidioc_querymenu,
 	.vidioc_querycap = vidioc_querycap,
 	.vidioc_enum_fmt_vid_cap = vidioc_enum_fmt_vid_cap,
 	.vidioc_g_fmt_vid_cap = vidioc_g_fmt_vid_cap,
@@ -1900,9 +1802,6 @@ static const struct v4l2_ioctl_ops s2255_ioctl_ops = {
 	.vidioc_enum_input = vidioc_enum_input,
 	.vidioc_g_input = vidioc_g_input,
 	.vidioc_s_input = vidioc_s_input,
-	.vidioc_queryctrl = vidioc_queryctrl,
-	.vidioc_g_ctrl = vidioc_g_ctrl,
-	.vidioc_s_ctrl = vidioc_s_ctrl,
 	.vidioc_streamon = vidioc_streamon,
 	.vidioc_streamoff = vidioc_streamoff,
 	.vidioc_s_jpegcomp = vidioc_s_jpegcomp,
@@ -1915,8 +1814,13 @@ static const struct v4l2_ioctl_ops s2255_ioctl_ops = {
 static void s2255_video_device_release(struct video_device *vdev)
 {
 	struct s2255_dev *dev = to_s2255_dev(vdev->v4l2_dev);
-	dprintk(4, "%s, chnls: %d \n", __func__,
+	struct s2255_channel *channel =
+		container_of(vdev, struct s2255_channel, vdev);
+
+	v4l2_ctrl_handler_free(&channel->hdl);
+	dprintk(4, "%s, chnls: %d\n", __func__,
 		atomic_read(&dev->num_channels));
+
 	if (atomic_dec_and_test(&dev->num_channels))
 		s2255_destroy(dev);
 	return;
@@ -1931,6 +1835,20 @@ static struct video_device template = {
 	.current_norm = V4L2_STD_NTSC_M,
 };
 
+static const struct v4l2_ctrl_ops s2255_ctrl_ops = {
+	.s_ctrl = s2255_s_ctrl,
+};
+
+static const struct v4l2_ctrl_config color_filter_ctrl = {
+	.ops = &s2255_ctrl_ops,
+	.name = "Color Filter",
+	.id = V4L2_CID_S2255_COLORFILTER,
+	.type = V4L2_CTRL_TYPE_BOOLEAN,
+	.max = 1,
+	.step = 1,
+	.def = 1,
+};
+
 static int s2255_probe_v4l(struct s2255_dev *dev)
 {
 	int ret;
@@ -1945,9 +1863,29 @@ static int s2255_probe_v4l(struct s2255_dev *dev)
 	for (i = 0; i < MAX_CHANNELS; i++) {
 		channel = &dev->channel[i];
 		INIT_LIST_HEAD(&channel->vidq.active);
+
+		v4l2_ctrl_handler_init(&channel->hdl, 5);
+		v4l2_ctrl_new_std(&channel->hdl, &s2255_ctrl_ops,
+				V4L2_CID_BRIGHTNESS, -127, 127, 1, DEF_BRIGHT);
+		v4l2_ctrl_new_std(&channel->hdl, &s2255_ctrl_ops,
+				V4L2_CID_CONTRAST, 0, 255, 1, DEF_CONTRAST);
+		v4l2_ctrl_new_std(&channel->hdl, &s2255_ctrl_ops,
+				V4L2_CID_SATURATION, 0, 255, 1, DEF_SATURATION);
+		v4l2_ctrl_new_std(&channel->hdl, &s2255_ctrl_ops,
+				V4L2_CID_HUE, 0, 255, 1, DEF_HUE);
+		if (dev->dsp_fw_ver >= S2255_MIN_DSP_COLORFILTER &&
+		    (dev->pid != 0x2257 || channel->idx <= 1))
+			v4l2_ctrl_new_custom(&channel->hdl, &color_filter_ctrl, NULL);
+		if (channel->hdl.error) {
+			ret = channel->hdl.error;
+			v4l2_ctrl_handler_free(&channel->hdl);
+			dev_err(&dev->udev->dev, "couldn't register control\n");
+			break;
+		}
 		channel->vidq.dev = dev;
 		/* register 4 video devices */
 		channel->vdev = template;
+		channel->vdev.ctrl_handler = &channel->hdl;
 		channel->vdev.lock = &dev->lock;
 		channel->vdev.v4l2_dev = &dev->v4l2_dev;
 		video_set_drvdata(&channel->vdev, channel);
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 1d00ca9f0ba3..7eab0b91827b 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -151,6 +151,10 @@ enum v4l2_colorfx {
 #define V4L2_CID_USER_BTTV_BASE			(V4L2_CID_USER_BASE + 0x1010)
 
 
+/* The base for the s2255 driver controls.
+ * We reserve 8 controls for this driver. */
+#define V4L2_CID_USER_S2255_BASE		(V4L2_CID_USER_BASE + 0x1010)
+
 /* MPEG-class control IDs */
 
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit 


From 2b83451b45d720ca38c03878ce42ff9139cad9e3 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 28 Feb 2013 12:33:20 +0100
Subject: KVM: ioeventfd for virtio-ccw devices.

Enhance KVM_IOEVENTFD with a new flag that allows to attach to virtio-ccw
devices on s390 via the KVM_VIRTIO_CCW_NOTIFY_BUS.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  8 ++++++++
 include/uapi/linux/kvm.h          |  3 +++
 virt/kvm/eventfd.c                | 17 +++++++++++++----
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 119358dfb742..c16b442556e8 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1486,15 +1486,23 @@ struct kvm_ioeventfd {
 	__u8  pad[36];
 };
 
+For the special case of virtio-ccw devices on s390, the ioevent is matched
+to a subchannel/virtqueue tuple instead.
+
 The following flags are defined:
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
 #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
 
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
+For virtio-ccw devices, addr contains the subchannel id and datamatch the
+virtqueue index.
+
 
 4.60 KVM_DIRTY_TLB
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 3c56ba3d80c1..74d0ff3dfd66 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -449,12 +449,15 @@ enum {
 	kvm_ioeventfd_flag_nr_datamatch,
 	kvm_ioeventfd_flag_nr_pio,
 	kvm_ioeventfd_flag_nr_deassign,
+	kvm_ioeventfd_flag_nr_virtio_ccw_notify,
 	kvm_ioeventfd_flag_nr_max,
 };
 
 #define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
 #define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
 #define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+	(1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
 
 #define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0b6fe69bb03d..020522ed9094 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -674,15 +674,24 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
 	return false;
 }
 
+static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
+{
+	if (flags & KVM_IOEVENTFD_FLAG_PIO)
+		return KVM_PIO_BUS;
+	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
+		return KVM_VIRTIO_CCW_NOTIFY_BUS;
+	return KVM_MMIO_BUS;
+}
+
 static int
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
-	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
+	enum kvm_bus              bus_idx;
 	struct _ioeventfd        *p;
 	struct eventfd_ctx       *eventfd;
 	int                       ret;
 
+	bus_idx = ioeventfd_bus_from_flags(args->flags);
 	/* must be natural-word sized */
 	switch (args->len) {
 	case 1:
@@ -757,12 +766,12 @@ fail:
 static int
 kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
-	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
-	enum kvm_bus              bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
+	enum kvm_bus              bus_idx;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
 
+	bus_idx = ioeventfd_bus_from_flags(args->flags);
 	eventfd = eventfd_ctx_fdget(args->fd);
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
-- 
cgit 


From a947b0a93efa9a25c012aa88848f4cf8d9b41280 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 22 Feb 2013 10:54:54 +0100
Subject: xfrm: allow to avoid copying DSCP during encapsulation

By default, DSCP is copying during encapsulation.
Copying the DSCP in IPsec tunneling may be a bit dangerous because packets with
different DSCP may get reordered relative to each other in the network and then
dropped by the remote IPsec GW if the reordering becomes too big compared to the
replay window.

It is possible to avoid this copy with netfilter rules, but it's very convenient
to be able to configure it for each SA directly.

This patch adds a toogle for this purpose. By default, it's not set to maintain
backward compatibility.

Field flags in struct xfrm_usersa_info is full, hence I add a new attribute.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h           |  1 +
 include/uapi/linux/xfrm.h    |  3 +++
 net/ipv4/ipcomp.c            |  1 +
 net/ipv4/xfrm4_mode_tunnel.c |  8 ++++++--
 net/ipv6/xfrm6_mode_tunnel.c |  7 +++++--
 net/xfrm/xfrm_state.c        |  1 +
 net/xfrm/xfrm_user.c         | 13 +++++++++++++
 7 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 24c8886fd969..ae16531d0d35 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -162,6 +162,7 @@ struct xfrm_state {
 		xfrm_address_t	saddr;
 		int		header_len;
 		int		trailer_len;
+		u32		extra_flags;
 	} props;
 
 	struct xfrm_lifetime_cfg lft;
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 28e493b5b94c..a8cd6a4a2970 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -297,6 +297,7 @@ enum xfrm_attr_type_t {
 	XFRMA_MARK,		/* struct xfrm_mark */
 	XFRMA_TFCPAD,		/* __u32 */
 	XFRMA_REPLAY_ESN_VAL,	/* struct xfrm_replay_esn */
+	XFRMA_SA_EXTRA_FLAGS,	/* __u32 */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -367,6 +368,8 @@ struct xfrm_usersa_info {
 #define XFRM_STATE_ESN		128
 };
 
+#define XFRM_SA_XFLAG_DONT_ENCAP_DSCP	1
+
 struct xfrm_usersa_id {
 	xfrm_address_t			daddr;
 	__be32				spi;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index f01d1b1aff7f..59cb8c769056 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -75,6 +75,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
 	t->props.mode = x->props.mode;
 	t->props.saddr.a4 = x->props.saddr.a4;
 	t->props.flags = x->props.flags;
+	t->props.extra_flags = x->props.extra_flags;
 	memcpy(&t->mark, &x->mark, sizeof(t->mark));
 
 	if (xfrm_init_state(t))
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index fe5189e2e114..eb1dd4d643f2 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -103,8 +103,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	/* DS disclosed */
-	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
+	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		top_iph->tos = 0;
+	else
+		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
+	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
 					    XFRM_MODE_SKB_CB(skb)->tos);
 
 	flags = x->props.flags;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 9bf6a74a71d2..4770d515c2c8 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -49,8 +49,11 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 	       sizeof(top_iph->flow_lbl));
 	top_iph->nexthdr = xfrm_af2proto(skb_dst(skb)->ops->family);
 
-	dsfield = XFRM_MODE_SKB_CB(skb)->tos;
-	dsfield = INET_ECN_encapsulate(dsfield, dsfield);
+	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
+		dsfield = 0;
+	else
+		dsfield = XFRM_MODE_SKB_CB(skb)->tos;
+	dsfield = INET_ECN_encapsulate(dsfield, XFRM_MODE_SKB_CB(skb)->tos);
 	if (x->props.flags & XFRM_STATE_NOECN)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 2c341bdaf47c..78f66fa92449 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1187,6 +1187,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
 		goto error;
 
 	x->props.flags = orig->props.flags;
+	x->props.extra_flags = orig->props.extra_flags;
 
 	x->curlft.add_time = orig->curlft.add_time;
 	x->km.state = orig->km.state;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index fbd9e6cd0fd7..204cba192af8 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -515,6 +515,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	copy_from_user_state(x, p);
 
+	if (attrs[XFRMA_SA_EXTRA_FLAGS])
+		x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
+
 	if ((err = attach_aead(&x->aead, &x->props.ealgo,
 			       attrs[XFRMA_ALG_AEAD])))
 		goto error;
@@ -779,6 +782,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 
 	copy_to_user_state(x, p);
 
+	if (x->props.extra_flags) {
+		ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS,
+				  x->props.extra_flags);
+		if (ret)
+			goto out;
+	}
+
 	if (x->coaddr) {
 		ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr);
 		if (ret)
@@ -2302,6 +2312,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
 	[XFRMA_MARK]		= { .len = sizeof(struct xfrm_mark) },
 	[XFRMA_TFCPAD]		= { .type = NLA_U32 },
 	[XFRMA_REPLAY_ESN_VAL]	= { .len = sizeof(struct xfrm_replay_state_esn) },
+	[XFRMA_SA_EXTRA_FLAGS]	= { .type = NLA_U32 },
 };
 
 static struct xfrm_link {
@@ -2495,6 +2506,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x)
 				    x->security->ctx_len);
 	if (x->coaddr)
 		l += nla_total_size(sizeof(*x->coaddr));
+	if (x->props.extra_flags)
+		l += nla_total_size(sizeof(x->props.extra_flags));
 
 	/* Must count x->lastused as it may become non-zero behind our back. */
 	l += nla_total_size(sizeof(u64));
-- 
cgit 


From f8bacc210408f7a2a182f184a9fa1475b8a67440 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 14 Feb 2013 23:27:01 +0100
Subject: cfg80211: clean up mesh plink station change API

Make the ability to leave the plink_state unchanged not use a
magic -1 variable that isn't in the enum, but an explicit change
flag; reject invalid plink states or actions and move the needed
constants for plink actions to the right header file. Also
reject plink_state changes for non-mesh interfaces.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 15 ++-------------
 include/uapi/linux/nl80211.h | 20 +++++++++++++++++++-
 net/mac80211/cfg.c           | 14 +++++++++++---
 net/wireless/nl80211.c       | 29 ++++++++++++++++++++++-------
 4 files changed, 54 insertions(+), 24 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d581c6de5d64..9b54574c3df9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -610,23 +610,11 @@ struct cfg80211_ap_settings {
 	bool radar_required;
 };
 
-/**
- * enum plink_action - actions to perform in mesh peers
- *
- * @PLINK_ACTION_INVALID: action 0 is reserved
- * @PLINK_ACTION_OPEN: start mesh peer link establishment
- * @PLINK_ACTION_BLOCK: block traffic from this mesh peer
- */
-enum plink_actions {
-	PLINK_ACTION_INVALID,
-	PLINK_ACTION_OPEN,
-	PLINK_ACTION_BLOCK,
-};
-
 /**
  * enum station_parameters_apply_mask - station parameter values to apply
  * @STATION_PARAM_APPLY_UAPSD: apply new uAPSD parameters (uapsd_queues, max_sp)
  * @STATION_PARAM_APPLY_CAPABILITY: apply new capability
+ * @STATION_PARAM_APPLY_PLINK_STATE: apply new plink state
  *
  * Not all station parameters have in-band "no change" signalling,
  * for those that don't these flags will are used.
@@ -634,6 +622,7 @@ enum plink_actions {
 enum station_parameters_apply_mask {
 	STATION_PARAM_APPLY_UAPSD = BIT(0),
 	STATION_PARAM_APPLY_CAPABILITY = BIT(1),
+	STATION_PARAM_APPLY_PLINK_STATE = BIT(2),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c46bb016f4e4..7dcc69f73d2c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -884,7 +884,8 @@ enum nl80211_commands {
  *	consisting of a nested array.
  *
  * @NL80211_ATTR_MESH_ID: mesh id (1-32 bytes).
- * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link.
+ * @NL80211_ATTR_STA_PLINK_ACTION: action to perform on the mesh peer link
+ *	(see &enum nl80211_plink_action).
  * @NL80211_ATTR_MPATH_NEXT_HOP: MAC address of the next hop for a mesh path.
  * @NL80211_ATTR_MPATH_INFO: information about a mesh_path, part of mesh path
  * 	info given for %NL80211_CMD_GET_MPATH, nested attribute described at
@@ -3307,6 +3308,23 @@ enum nl80211_plink_state {
 	MAX_NL80211_PLINK_STATES = NUM_NL80211_PLINK_STATES - 1
 };
 
+/**
+ * enum nl80211_plink_action - actions to perform in mesh peers
+ *
+ * @NL80211_PLINK_ACTION_NO_ACTION: perform no action
+ * @NL80211_PLINK_ACTION_OPEN: start mesh peer link establishment
+ * @NL80211_PLINK_ACTION_BLOCK: block traffic from this mesh peer
+ * @NUM_NL80211_PLINK_ACTIONS: number of possible actions
+ */
+enum plink_actions {
+	NL80211_PLINK_ACTION_NO_ACTION,
+	NL80211_PLINK_ACTION_OPEN,
+	NL80211_PLINK_ACTION_BLOCK,
+
+	NUM_NL80211_PLINK_ACTIONS,
+};
+
+
 #define NL80211_KCK_LEN			16
 #define NL80211_KEK_LEN			16
 #define NL80211_REPLAY_CTR_LEN		8
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fb306814576a..ca28405d5f65 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1261,7 +1261,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
 		u32 changed = 0;
-		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) {
+		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED &&
+		    (params->sta_modify_mask &
+					STATION_PARAM_APPLY_PLINK_STATE)) {
 			switch (params->plink_state) {
 			case NL80211_PLINK_ESTAB:
 				if (sta->plink_state != NL80211_PLINK_ESTAB)
@@ -1292,12 +1294,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 				/*  nothing  */
 				break;
 			}
+		} else if (params->sta_modify_mask &
+					STATION_PARAM_APPLY_PLINK_STATE) {
+			return -EINVAL;
 		} else {
 			switch (params->plink_action) {
-			case PLINK_ACTION_OPEN:
+			case NL80211_PLINK_ACTION_NO_ACTION:
+				/* nothing */
+				break;
+			case NL80211_PLINK_ACTION_OPEN:
 				changed |= mesh_plink_open(sta);
 				break;
-			case PLINK_ACTION_BLOCK:
+			case NL80211_PLINK_ACTION_BLOCK:
 				changed |= mesh_plink_block(sta);
 				break;
 			}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d44ab216c0ec..9e7ece0e5e5e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3412,7 +3412,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
-	params.plink_state = -1;
 
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
@@ -3451,13 +3450,20 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
+	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
 		params.plink_action =
-		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
+			return -EINVAL;
+	}
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE])
+	if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) {
 		params.plink_state =
-		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
+		if (params.plink_state >= NUM_NL80211_PLINK_STATES)
+			return -EINVAL;
+		params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE;
+	}
 
 	if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) {
 		enum nl80211_mesh_power_mode pm = nla_get_u32(
@@ -3479,6 +3485,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 		if (params.local_pm)
 			return -EINVAL;
+		if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
 
 		/* TDLS can't be set, ... */
 		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
@@ -3542,6 +3550,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 		if (params.local_pm)
 			return -EINVAL;
+		if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
 		/* reject any changes other than AUTHORIZED or WME (for TDLS) */
 		if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
 					      BIT(NL80211_STA_FLAG_WME)))
@@ -3553,6 +3563,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 		if (params.local_pm)
 			return -EINVAL;
+		if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
 		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
 		    info->attrs[NL80211_ATTR_VHT_CAPABILITY])
 			return -EINVAL;
@@ -3652,9 +3664,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.vht_capa =
 			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
 
-	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION])
+	if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) {
 		params.plink_action =
-		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+			nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
+		if (params.plink_action >= NUM_NL80211_PLINK_ACTIONS)
+			return -EINVAL;
+	}
 
 	if (!rdev->ops->add_station)
 		return -EOPNOTSUPP;
-- 
cgit 


From 77ee7c891a04c3d254711ddf1bde5d7381339fb3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 15 Feb 2013 00:48:33 +0100
Subject: cfg80211: comprehensively check station changes

The station change API isn't being checked properly before
drivers are called, and as a result it is difficult to see
what should be allowed and what not.

In order to comprehensively check the API parameters parse
everything first, and then have the driver call a function
(cfg80211_check_station_change()) with the additionally
information about the kind of station that is being changed;
this allows the function to make better decisions than the
old code could.

While at it, also add a few checks, particularly in mesh
and clarify the TDLS station lifetime in documentation.

To be able to reduce a few checks, ignore any flag set bits
when the mask isn't set, they shouldn't be applied then.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c |   8 +-
 include/net/cfg80211.h                     |  48 ++++-
 include/uapi/linux/nl80211.h               |  16 +-
 net/mac80211/cfg.c                         | 125 ++++++++-----
 net/wireless/nl80211.c                     | 288 +++++++++++++++++------------
 5 files changed, 311 insertions(+), 174 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 752ffc4f4166..28c413f861a2 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -2990,13 +2990,15 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev,
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	struct ath6kl_vif *vif = netdev_priv(dev);
+	int err;
 
 	if (vif->nw_type != AP_NETWORK)
 		return -EOPNOTSUPP;
 
-	/* Use this only for authorizing/unauthorizing a station */
-	if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
-		return -EOPNOTSUPP;
+	err = cfg80211_check_station_change(wiphy, params,
+					    CFG80211_STA_AP_MLME_CLIENT);
+	if (err)
+		return err;
 
 	if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED))
 		return ath6kl_wmi_ap_set_mlme(ar->wmi, vif->fw_vif_idx,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7ca321d2b599..ed2b08da3b93 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -677,6 +677,49 @@ struct station_parameters {
 	u8 ext_capab_len;
 };
 
+/**
+ * enum cfg80211_station_type - the type of station being modified
+ * @CFG80211_STA_AP_CLIENT: client of an AP interface
+ * @CFG80211_STA_AP_MLME_CLIENT: client of an AP interface that has
+ *	the AP MLME in the device
+ * @CFG80211_STA_AP_STA: AP station on managed interface
+ * @CFG80211_STA_IBSS: IBSS station
+ * @CFG80211_STA_TDLS_PEER_SETUP: TDLS peer on managed interface (dummy entry
+ *	while TDLS setup is in progress, it moves out of this state when
+ *	being marked authorized; use this only if TDLS with external setup is
+ *	supported/used)
+ * @CFG80211_STA_TDLS_PEER_ACTIVE: TDLS peer on managed interface (active
+ *	entry that is operating, has been marked authorized by userspace)
+ * @CFG80211_STA_MESH_PEER_NONSEC: peer on mesh interface (non-secured)
+ * @CFG80211_STA_MESH_PEER_SECURE: peer on mesh interface (secured)
+ */
+enum cfg80211_station_type {
+	CFG80211_STA_AP_CLIENT,
+	CFG80211_STA_AP_MLME_CLIENT,
+	CFG80211_STA_AP_STA,
+	CFG80211_STA_IBSS,
+	CFG80211_STA_TDLS_PEER_SETUP,
+	CFG80211_STA_TDLS_PEER_ACTIVE,
+	CFG80211_STA_MESH_PEER_NONSEC,
+	CFG80211_STA_MESH_PEER_SECURE,
+};
+
+/**
+ * cfg80211_check_station_change - validate parameter changes
+ * @wiphy: the wiphy this operates on
+ * @params: the new parameters for a station
+ * @statype: the type of station being modified
+ *
+ * Utility function for the @change_station driver method. Call this function
+ * with the appropriate station type looking up the station (and checking that
+ * it exists). It will verify whether the station change is acceptable, and if
+ * not will return an error code. Note that it may modify the parameters for
+ * backward compatibility reasons, so don't use them before calling this.
+ */
+int cfg80211_check_station_change(struct wiphy *wiphy,
+				  struct station_parameters *params,
+				  enum cfg80211_station_type statype);
+
 /**
  * enum station_info_flags - station information flags
  *
@@ -1770,9 +1813,8 @@ struct cfg80211_gtk_rekey_data {
  * @change_station: Modify a given station. Note that flags changes are not much
  *	validated in cfg80211, in particular the auth/assoc/authorized flags
  *	might come to the driver in invalid combinations -- make sure to check
- *	them, also against the existing state! Also, supported_rates changes are
- *	not checked in station mode -- drivers need to reject (or ignore) them
- *	for anything but TDLS peers.
+ *	them, also against the existing state! Drivers must call
+ *	cfg80211_check_station_change() to validate the information.
  * @get_station: get station information for the station identified by @mac
  * @dump_station: dump station callback -- resume dump at index @idx
  *
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7dcc69f73d2c..523ed3d65b41 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -36,7 +36,21 @@
  * The station is still assumed to belong to the AP interface it was added
  * to.
  *
- * TODO: need more info?
+ * Station handling varies per interface type and depending on the driver's
+ * capabilities.
+ *
+ * For drivers supporting TDLS with external setup (WIPHY_FLAG_SUPPORTS_TDLS
+ * and WIPHY_FLAG_TDLS_EXTERNAL_SETUP), the station lifetime is as follows:
+ *  - a setup station entry is added, not yet authorized, without any rate
+ *    or capability information, this just exists to avoid race conditions
+ *  - when the TDLS setup is done, a single NL80211_CMD_SET_STATION is valid
+ *    to add rate and capability information to the station and at the same
+ *    time mark it authorized.
+ *  - %NL80211_TDLS_ENABLE_LINK is then used
+ *  - after this, the only valid operation is to remove it by tearing down
+ *    the TDLS link (%NL80211_TDLS_DISABLE_LINK)
+ *
+ * TODO: need more info for other interface types
  */
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ca28405d5f65..c115f82c037c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1177,6 +1177,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 			mask |= BIT(NL80211_STA_FLAG_ASSOCIATED);
 		if (set & BIT(NL80211_STA_FLAG_AUTHENTICATED))
 			set |= BIT(NL80211_STA_FLAG_ASSOCIATED);
+	} else if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+		/*
+		 * TDLS -- everything follows authorized, but
+		 * only becoming authorized is possible, not
+		 * going back
+		 */
+		if (set & BIT(NL80211_STA_FLAG_AUTHORIZED)) {
+			set |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+			       BIT(NL80211_STA_FLAG_ASSOCIATED);
+			mask |= BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				BIT(NL80211_STA_FLAG_ASSOCIATED);
+		}
 	}
 
 	ret = sta_apply_auth_flags(local, sta, mask, set);
@@ -1261,9 +1273,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
 		u32 changed = 0;
-		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED &&
-		    (params->sta_modify_mask &
-					STATION_PARAM_APPLY_PLINK_STATE)) {
+
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
 			switch (params->plink_state) {
 			case NL80211_PLINK_ESTAB:
 				if (sta->plink_state != NL80211_PLINK_ESTAB)
@@ -1294,21 +1305,18 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 				/*  nothing  */
 				break;
 			}
-		} else if (params->sta_modify_mask &
-					STATION_PARAM_APPLY_PLINK_STATE) {
-			return -EINVAL;
-		} else {
-			switch (params->plink_action) {
-			case NL80211_PLINK_ACTION_NO_ACTION:
-				/* nothing */
-				break;
-			case NL80211_PLINK_ACTION_OPEN:
-				changed |= mesh_plink_open(sta);
-				break;
-			case NL80211_PLINK_ACTION_BLOCK:
-				changed |= mesh_plink_block(sta);
-				break;
-			}
+		}
+
+		switch (params->plink_action) {
+		case NL80211_PLINK_ACTION_NO_ACTION:
+			/* nothing */
+			break;
+		case NL80211_PLINK_ACTION_OPEN:
+			changed |= mesh_plink_open(sta);
+			break;
+		case NL80211_PLINK_ACTION_BLOCK:
+			changed |= mesh_plink_block(sta);
+			break;
 		}
 
 		if (params->local_pm)
@@ -1354,8 +1362,10 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	 * defaults -- if userspace wants something else we'll
 	 * change it accordingly in sta_apply_parameters()
 	 */
-	sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
-	sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
+		sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
+		sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
+	}
 
 	err = sta_apply_parameters(local, sta, params);
 	if (err) {
@@ -1364,8 +1374,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
 	}
 
 	/*
-	 * for TDLS, rate control should be initialized only when supported
-	 * rates are known.
+	 * for TDLS, rate control should be initialized only when
+	 * rates are known and station is marked authorized
 	 */
 	if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
 		rate_control_rate_init(sta);
@@ -1402,50 +1412,67 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_change_station(struct wiphy *wiphy,
-				    struct net_device *dev,
-				    u8 *mac,
+				    struct net_device *dev, u8 *mac,
 				    struct station_parameters *params)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct sta_info *sta;
 	struct ieee80211_sub_if_data *vlansdata;
+	enum cfg80211_station_type statype;
 	int err;
 
 	mutex_lock(&local->sta_mtx);
 
 	sta = sta_info_get_bss(sdata, mac);
 	if (!sta) {
-		mutex_unlock(&local->sta_mtx);
-		return -ENOENT;
+		err = -ENOENT;
+		goto out_err;
 	}
 
-	/* in station mode, some updates are only valid with TDLS */
-	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
-	    (params->supported_rates || params->ht_capa || params->vht_capa ||
-	     params->sta_modify_mask ||
-	     (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME))) &&
-	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
-		mutex_unlock(&local->sta_mtx);
-		return -EINVAL;
+	switch (sdata->vif.type) {
+	case NL80211_IFTYPE_MESH_POINT:
+		if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED)
+			statype = CFG80211_STA_MESH_PEER_SECURE;
+		else
+			statype = CFG80211_STA_MESH_PEER_NONSEC;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		statype = CFG80211_STA_IBSS;
+		break;
+	case NL80211_IFTYPE_STATION:
+		if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+			statype = CFG80211_STA_AP_STA;
+			break;
+		}
+		if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+			statype = CFG80211_STA_TDLS_PEER_ACTIVE;
+		else
+			statype = CFG80211_STA_TDLS_PEER_SETUP;
+		break;
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+		statype = CFG80211_STA_AP_CLIENT;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		goto out_err;
 	}
 
+	err = cfg80211_check_station_change(wiphy, params, statype);
+	if (err)
+		goto out_err;
+
 	if (params->vlan && params->vlan != sta->sdata->dev) {
 		bool prev_4addr = false;
 		bool new_4addr = false;
 
 		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
-		if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
-		    vlansdata->vif.type != NL80211_IFTYPE_AP) {
-			mutex_unlock(&local->sta_mtx);
-			return -EINVAL;
-		}
-
 		if (params->vlan->ieee80211_ptr->use_4addr) {
 			if (vlansdata->u.vlan.sta) {
-				mutex_unlock(&local->sta_mtx);
-				return -EBUSY;
+				err = -EBUSY;
+				goto out_err;
 			}
 
 			rcu_assign_pointer(vlansdata->u.vlan.sta, sta);
@@ -1472,12 +1499,12 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 	}
 
 	err = sta_apply_parameters(local, sta, params);
-	if (err) {
-		mutex_unlock(&local->sta_mtx);
-		return err;
-	}
+	if (err)
+		goto out_err;
 
-	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && params->supported_rates)
+	/* When peer becomes authorized, init rate control as well */
+	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+	    test_sta_flag(sta, WLAN_STA_AUTHORIZED))
 		rate_control_rate_init(sta);
 
 	mutex_unlock(&local->sta_mtx);
@@ -1487,7 +1514,11 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 		ieee80211_recalc_ps(local, -1);
 		ieee80211_recalc_ps_vif(sdata);
 	}
+
 	return 0;
+out_err:
+	mutex_unlock(&local->sta_mtx);
+	return err;
 }
 
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9e7c10420da8..83151a50e5ad 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2967,6 +2967,7 @@ static int parse_station_flags(struct genl_info *info,
 		sta_flags = nla_data(nla);
 		params->sta_flags_mask = sta_flags->mask;
 		params->sta_flags_set = sta_flags->set;
+		params->sta_flags_set &= params->sta_flags_mask;
 		if ((params->sta_flags_mask |
 		     params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID))
 			return -EINVAL;
@@ -3320,6 +3321,136 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 	return genlmsg_reply(msg, info);
 }
 
+int cfg80211_check_station_change(struct wiphy *wiphy,
+				  struct station_parameters *params,
+				  enum cfg80211_station_type statype)
+{
+	if (params->listen_interval != -1)
+		return -EINVAL;
+	if (params->aid)
+		return -EINVAL;
+
+	/* When you run into this, adjust the code below for the new flag */
+	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+
+	switch (statype) {
+	case CFG80211_STA_MESH_PEER_NONSEC:
+	case CFG80211_STA_MESH_PEER_SECURE:
+		/*
+		 * No ignoring the TDLS flag here -- the userspace mesh
+		 * code doesn't have the bug of including TDLS in the
+		 * mask everywhere.
+		 */
+		if (params->sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				  BIT(NL80211_STA_FLAG_MFP) |
+				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
+			return -EINVAL;
+		break;
+	case CFG80211_STA_TDLS_PEER_SETUP:
+	case CFG80211_STA_TDLS_PEER_ACTIVE:
+		if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
+			return -EINVAL;
+		/* ignore since it can't change */
+		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+		break;
+	default:
+		/* disallow mesh-specific things */
+		if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+			return -EINVAL;
+		if (params->local_pm)
+			return -EINVAL;
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
+	}
+
+	if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
+	    statype != CFG80211_STA_TDLS_PEER_ACTIVE) {
+		/* TDLS can't be set, ... */
+		if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+			return -EINVAL;
+		/*
+		 * ... but don't bother the driver with it. This works around
+		 * a hostapd/wpa_supplicant issue -- it always includes the
+		 * TLDS_PEER flag in the mask even for AP mode.
+		 */
+		params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+	}
+
+	if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
+		/* reject other things that can't change */
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
+			return -EINVAL;
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY)
+			return -EINVAL;
+		if (params->supported_rates)
+			return -EINVAL;
+		if (params->ext_capab || params->ht_capa || params->vht_capa)
+			return -EINVAL;
+	}
+
+	if (statype != CFG80211_STA_AP_CLIENT) {
+		if (params->vlan)
+			return -EINVAL;
+	}
+
+	switch (statype) {
+	case CFG80211_STA_AP_MLME_CLIENT:
+		/* Use this only for authorizing/unauthorizing a station */
+		if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED)))
+			return -EOPNOTSUPP;
+		break;
+	case CFG80211_STA_AP_CLIENT:
+		/* accept only the listed bits */
+		if (params->sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+				  BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				  BIT(NL80211_STA_FLAG_ASSOCIATED) |
+				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+				  BIT(NL80211_STA_FLAG_WME) |
+				  BIT(NL80211_STA_FLAG_MFP)))
+			return -EINVAL;
+
+		/* but authenticated/associated only if driver handles it */
+		if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
+		    params->sta_flags_mask &
+				(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+				 BIT(NL80211_STA_FLAG_ASSOCIATED)))
+			return -EINVAL;
+		break;
+	case CFG80211_STA_IBSS:
+	case CFG80211_STA_AP_STA:
+		/* reject any changes other than AUTHORIZED */
+		if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
+			return -EINVAL;
+		break;
+	case CFG80211_STA_TDLS_PEER_SETUP:
+		/* reject any changes other than AUTHORIZED or WME */
+		if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+					       BIT(NL80211_STA_FLAG_WME)))
+			return -EINVAL;
+		/* force (at least) rates when authorizing */
+		if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) &&
+		    !params->supported_rates)
+			return -EINVAL;
+		break;
+	case CFG80211_STA_TDLS_PEER_ACTIVE:
+		/* reject any changes */
+		return -EINVAL;
+	case CFG80211_STA_MESH_PEER_NONSEC:
+		if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
+			return -EINVAL;
+		break;
+	case CFG80211_STA_MESH_PEER_SECURE:
+		if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION)
+			return -EINVAL;
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(cfg80211_check_station_change);
+
 /*
  * Get vlan interface making sure it is running and on the right wiphy.
  */
@@ -3342,6 +3473,13 @@ static struct net_device *get_vlan(struct genl_info *info,
 		goto error;
 	}
 
+	if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
+	    v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) {
+		ret = -EINVAL;
+		goto error;
+	}
+
 	if (!netif_running(v)) {
 		ret = -ENETDOWN;
 		goto error;
@@ -3410,15 +3548,18 @@ static int nl80211_set_station_tdls(struct genl_info *info,
 static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
-	int err;
 	struct net_device *dev = info->user_ptr[1];
 	struct station_parameters params;
-	u8 *mac_addr = NULL;
+	u8 *mac_addr;
+	int err;
 
 	memset(&params, 0, sizeof(params));
 
 	params.listen_interval = -1;
 
+	if (!rdev->ops->change_station)
+		return -EOPNOTSUPP;
+
 	if (info->attrs[NL80211_ATTR_STA_AID])
 		return -EINVAL;
 
@@ -3450,9 +3591,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL])
 		return -EINVAL;
 
-	if (!rdev->ops->change_station)
-		return -EOPNOTSUPP;
-
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
@@ -3482,133 +3620,33 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.local_pm = pm;
 	}
 
+	/* Include parameters for TDLS peer (will check later) */
+	err = nl80211_set_station_tdls(info, &params);
+	if (err)
+		return err;
+
+	params.vlan = get_vlan(info, rdev);
+	if (IS_ERR(params.vlan))
+		return PTR_ERR(params.vlan);
+
 	switch (dev->ieee80211_ptr->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_GO:
-		/* disallow mesh-specific things */
-		if (params.plink_action)
-			return -EINVAL;
-		if (params.local_pm)
-			return -EINVAL;
-		if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
-			return -EINVAL;
-
-		/* TDLS can't be set, ... */
-		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
-			return -EINVAL;
-		/*
-		 * ... but don't bother the driver with it. This works around
-		 * a hostapd/wpa_supplicant issue -- it always includes the
-		 * TLDS_PEER flag in the mask even for AP mode.
-		 */
-		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
-
-		/* accept only the listed bits */
-		if (params.sta_flags_mask &
-				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
-				  BIT(NL80211_STA_FLAG_AUTHENTICATED) |
-				  BIT(NL80211_STA_FLAG_ASSOCIATED) |
-				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
-				  BIT(NL80211_STA_FLAG_WME) |
-				  BIT(NL80211_STA_FLAG_MFP)))
-			return -EINVAL;
-
-		/* but authenticated/associated only if driver handles it */
-		if (!(rdev->wiphy.features &
-				NL80211_FEATURE_FULL_AP_CLIENT_STATE) &&
-		    params.sta_flags_mask &
-				(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
-				 BIT(NL80211_STA_FLAG_ASSOCIATED)))
-			return -EINVAL;
-
-		/* reject other things that can't change */
-		if (params.supported_rates)
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
-		    info->attrs[NL80211_ATTR_VHT_CAPABILITY])
-			return -EINVAL;
-
-		/* must be last in here for error handling */
-		params.vlan = get_vlan(info, rdev);
-		if (IS_ERR(params.vlan))
-			return PTR_ERR(params.vlan);
-		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
-		/*
-		 * Don't allow userspace to change the TDLS_PEER flag,
-		 * but silently ignore attempts to change it since we
-		 * don't have state here to verify that it doesn't try
-		 * to change the flag.
-		 */
-		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
-		/* Include parameters for TDLS peer (driver will check) */
-		err = nl80211_set_station_tdls(info, &params);
-		if (err)
-			return err;
-		/* disallow things sta doesn't support */
-		if (params.plink_action)
-			return -EINVAL;
-		if (params.local_pm)
-			return -EINVAL;
-		if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
-			return -EINVAL;
-		/* reject any changes other than AUTHORIZED or WME (for TDLS) */
-		if (params.sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
-					      BIT(NL80211_STA_FLAG_WME)))
-			return -EINVAL;
-		break;
 	case NL80211_IFTYPE_ADHOC:
-		/* disallow things sta doesn't support */
-		if (params.plink_action)
-			return -EINVAL;
-		if (params.local_pm)
-			return -EINVAL;
-		if (params.sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE)
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
-		    info->attrs[NL80211_ATTR_VHT_CAPABILITY])
-			return -EINVAL;
-		/* reject any changes other than AUTHORIZED */
-		if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
-			return -EINVAL;
-		break;
 	case NL80211_IFTYPE_MESH_POINT:
-		/* disallow things mesh doesn't support */
-		if (params.vlan)
-			return -EINVAL;
-		if (params.supported_rates)
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_STA_CAPABILITY])
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY])
-			return -EINVAL;
-		if (info->attrs[NL80211_ATTR_HT_CAPABILITY] ||
-		    info->attrs[NL80211_ATTR_VHT_CAPABILITY])
-			return -EINVAL;
-		/*
-		 * No special handling for TDLS here -- the userspace
-		 * mesh code doesn't have this bug.
-		 */
-		if (params.sta_flags_mask &
-				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
-				  BIT(NL80211_STA_FLAG_MFP) |
-				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
-			return -EINVAL;
 		break;
 	default:
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto out_put_vlan;
 	}
 
-	/* be aware of params.vlan when changing code here */
-
+	/* driver will call cfg80211_check_station_change() */
 	err = rdev_change_station(rdev, dev, mac_addr, &params);
 
+ out_put_vlan:
 	if (params.vlan)
 		dev_put(params.vlan);
 
@@ -3687,6 +3725,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 	if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
 		return -EINVAL;
 
+	/* When you run into this, adjust the code below for the new flag */
+	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
+
 	switch (dev->ieee80211_ptr->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
@@ -3730,8 +3771,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		/* ignore uAPSD data */
 		params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD;
 
-		/* associated is disallowed */
-		if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED))
+		/* these are disallowed */
+		if (params.sta_flags_mask &
+				(BIT(NL80211_STA_FLAG_ASSOCIATED) |
+				 BIT(NL80211_STA_FLAG_AUTHENTICATED)))
 			return -EINVAL;
 		/* Only TDLS peers can be added */
 		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
@@ -3742,6 +3785,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		/* ... with external setup is supported */
 		if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
 			return -EOPNOTSUPP;
+		/*
+		 * Older wpa_supplicant versions always mark the TDLS peer
+		 * as authorized, but it shouldn't yet be.
+		 */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED);
 		break;
 	default:
 		return -EOPNOTSUPP;
-- 
cgit 


From 3713b4e364effef4b170c97d54528b1cdb16aa6b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 14 Feb 2013 16:19:38 +0100
Subject: nl80211: allow splitting wiphy information in dumps

The per-wiphy information is getting large, to the point
where with more than the typical number of channels it's
too large and overflows, and userspace can't get any of
the information at all.

To address this (in a way that doesn't require making all
messages bigger) allow userspace to specify that it can
deal with wiphy information split across multiple parts
of the dump, and if it can split up the data. This also
splits up each channel separately so an arbitrary number
of channels can be supported.

Additionally, since GET_WIPHY has the same problem, add
support for filtering the wiphy dump and get information
for a single wiphy only, this allows userspace apps to
use dump in this case to retrieve all data from a single
device.

As userspace needs to know if all this this is supported,
add a global nl80211 feature set and include a bit for
this behaviour in it.

Cc: Dennis H Jensen <dennis.h.jensen@siemens.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  28 ++
 net/wireless/nl80211.c       | 933 ++++++++++++++++++++++++++-----------------
 2 files changed, 599 insertions(+), 362 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 523ed3d65b41..9844c10a2999 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -625,6 +625,10 @@
  *	%NL80211_ATTR_RADAR_EVENT is used to inform about the type of the
  *	event.
  *
+ * @NL80211_CMD_GET_PROTOCOL_FEATURES: Get global nl80211 protocol features,
+ *	i.e. features for the nl80211 protocol rather than device features.
+ *	Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -779,6 +783,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_RADAR_DETECT,
 
+	NL80211_CMD_GET_PROTOCOL_FEATURES,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1383,6 +1389,13 @@ enum nl80211_commands {
  *	advertised to the driver, e.g., to enable TDLS off channel operations
  *	and PU-APSD.
  *
+ * @NL80211_ATTR_PROTOCOL_FEATURES: global nl80211 feature flags, see
+ *	&enum nl80211_protocol_features, the attribute is a u32.
+ *
+ * @NL80211_ATTR_SPLIT_WIPHY_DUMP: flag attribute, userspace supports
+ *	receiving the data for a single wiphy split across multiple
+ *	messages, given with wiphy dump message
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1669,6 +1682,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_STA_CAPABILITY,
 	NL80211_ATTR_STA_EXT_CAPABILITY,
 
+	NL80211_ATTR_PROTOCOL_FEATURES,
+	NL80211_ATTR_SPLIT_WIPHY_DUMP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3619,4 +3635,16 @@ enum nl80211_dfs_state {
 	NL80211_DFS_AVAILABLE,
 };
 
+/**
+ * enum enum nl80211_protocol_features - nl80211 protocol features
+ * @NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP: nl80211 supports splitting
+ *	wiphy dumps (if requested by the application with the attribute
+ *	%NL80211_ATTR_SPLIT_WIPHY_DUMP. Also supported is filtering the
+ *	wiphy dump by %NL80211_ATTR_WIPHY, %NL80211_ATTR_IFINDEX or
+ *	%NL80211_ATTR_WDEV.
+ */
+enum nl80211_protocol_features {
+	NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP =	1 << 0,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 83151a50e5ad..f187a920ec71 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -370,6 +370,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED },
 	[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
 	[NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, },
+	[NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, },
 };
 
 /* policy for the key attributes */
@@ -892,412 +893,545 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags,
-			      struct cfg80211_registered_device *dev)
+#ifdef CONFIG_PM
+static int nl80211_send_wowlan(struct sk_buff *msg,
+			       struct cfg80211_registered_device *dev)
 {
-	void *hdr;
-	struct nlattr *nl_bands, *nl_band;
-	struct nlattr *nl_freqs, *nl_freq;
-	struct nlattr *nl_rates, *nl_rate;
-	struct nlattr *nl_cmds;
-	enum ieee80211_band band;
-	struct ieee80211_channel *chan;
-	struct ieee80211_rate *rate;
-	int i;
-	const struct ieee80211_txrx_stypes *mgmt_stypes =
-				dev->wiphy.mgmt_stypes;
+	struct nlattr *nl_wowlan;
 
-	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
-	if (!hdr)
-		return -1;
+	if (!dev->wiphy.wowlan.flags && !dev->wiphy.wowlan.n_patterns)
+		return 0;
 
-	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
-	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)) ||
-	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
-			cfg80211_rdev_list_generation) ||
-	    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
-		       dev->wiphy.retry_short) ||
-	    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
-		       dev->wiphy.retry_long) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
-			dev->wiphy.frag_threshold) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
-			dev->wiphy.rts_threshold) ||
-	    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
-		       dev->wiphy.coverage_class) ||
-	    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
-		       dev->wiphy.max_scan_ssids) ||
-	    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
-		       dev->wiphy.max_sched_scan_ssids) ||
-	    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
-			dev->wiphy.max_scan_ie_len) ||
-	    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
-			dev->wiphy.max_sched_scan_ie_len) ||
-	    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
-		       dev->wiphy.max_match_sets))
-		goto nla_put_failure;
+	nl_wowlan = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
+	if (!nl_wowlan)
+		return -ENOBUFS;
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
-	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
-	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
-	    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
-	    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
-	    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
-		goto nla_put_failure;
-	if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
-	    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
-		goto nla_put_failure;
+	if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
+	    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
+	     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
+		return -ENOBUFS;
 
-	if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
-		    sizeof(u32) * dev->wiphy.n_cipher_suites,
-		    dev->wiphy.cipher_suites))
-		goto nla_put_failure;
+	if (dev->wiphy.wowlan.n_patterns) {
+		struct nl80211_wowlan_pattern_support pat = {
+			.max_patterns = dev->wiphy.wowlan.n_patterns,
+			.min_pattern_len = dev->wiphy.wowlan.pattern_min_len,
+			.max_pattern_len = dev->wiphy.wowlan.pattern_max_len,
+			.max_pkt_offset = dev->wiphy.wowlan.max_pkt_offset,
+		};
 
-	if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
-		       dev->wiphy.max_num_pmkids))
-		goto nla_put_failure;
+		if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
+			    sizeof(pat), &pat))
+			return -ENOBUFS;
+	}
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
-	    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
-		goto nla_put_failure;
+	nla_nest_end(msg, nl_wowlan);
 
-	if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
-			dev->wiphy.available_antennas_tx) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
-			dev->wiphy.available_antennas_rx))
-		goto nla_put_failure;
+	return 0;
+}
+#endif
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
-	    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
-			dev->wiphy.probe_resp_offload))
-		goto nla_put_failure;
+static int nl80211_send_band_rateinfo(struct sk_buff *msg,
+				      struct ieee80211_supported_band *sband)
+{
+	struct nlattr *nl_rates, *nl_rate;
+	struct ieee80211_rate *rate;
+	int i;
 
-	if ((dev->wiphy.available_antennas_tx ||
-	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
-		u32 tx_ant = 0, rx_ant = 0;
-		int res;
-		res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
-		if (!res) {
-			if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX,
-					tx_ant) ||
-			    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX,
-					rx_ant))
-				goto nla_put_failure;
-		}
-	}
+	/* add HT info */
+	if (sband->ht_cap.ht_supported &&
+	    (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
+		     sizeof(sband->ht_cap.mcs),
+		     &sband->ht_cap.mcs) ||
+	     nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
+			 sband->ht_cap.cap) ||
+	     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
+			sband->ht_cap.ampdu_factor) ||
+	     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
+			sband->ht_cap.ampdu_density)))
+		return -ENOBUFS;
 
-	if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
-				dev->wiphy.interface_modes))
-		goto nla_put_failure;
+	/* add VHT info */
+	if (sband->vht_cap.vht_supported &&
+	    (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
+		     sizeof(sband->vht_cap.vht_mcs),
+		     &sband->vht_cap.vht_mcs) ||
+	     nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
+			 sband->vht_cap.cap)))
+		return -ENOBUFS;
 
-	nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
-	if (!nl_bands)
-		goto nla_put_failure;
+	/* add bitrates */
+	nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
+	if (!nl_rates)
+		return -ENOBUFS;
 
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
-		if (!dev->wiphy.bands[band])
-			continue;
+	for (i = 0; i < sband->n_bitrates; i++) {
+		nl_rate = nla_nest_start(msg, i);
+		if (!nl_rate)
+			return -ENOBUFS;
 
-		nl_band = nla_nest_start(msg, band);
-		if (!nl_band)
-			goto nla_put_failure;
+		rate = &sband->bitrates[i];
+		if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
+				rate->bitrate))
+			return -ENOBUFS;
+		if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
+		    nla_put_flag(msg,
+				 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
+			return -ENOBUFS;
 
-		/* add HT info */
-		if (dev->wiphy.bands[band]->ht_cap.ht_supported &&
-		    (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET,
-			     sizeof(dev->wiphy.bands[band]->ht_cap.mcs),
-			     &dev->wiphy.bands[band]->ht_cap.mcs) ||
-		     nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA,
-				 dev->wiphy.bands[band]->ht_cap.cap) ||
-		     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR,
-				dev->wiphy.bands[band]->ht_cap.ampdu_factor) ||
-		     nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY,
-				dev->wiphy.bands[band]->ht_cap.ampdu_density)))
-			goto nla_put_failure;
+		nla_nest_end(msg, nl_rate);
+	}
 
-		/* add VHT info */
-		if (dev->wiphy.bands[band]->vht_cap.vht_supported &&
-		    (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET,
-			     sizeof(dev->wiphy.bands[band]->vht_cap.vht_mcs),
-			     &dev->wiphy.bands[band]->vht_cap.vht_mcs) ||
-		     nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA,
-				 dev->wiphy.bands[band]->vht_cap.cap)))
-			goto nla_put_failure;
+	nla_nest_end(msg, nl_rates);
 
-		/* add frequencies */
-		nl_freqs = nla_nest_start(msg, NL80211_BAND_ATTR_FREQS);
-		if (!nl_freqs)
-			goto nla_put_failure;
+	return 0;
+}
 
-		for (i = 0; i < dev->wiphy.bands[band]->n_channels; i++) {
-			nl_freq = nla_nest_start(msg, i);
-			if (!nl_freq)
-				goto nla_put_failure;
+static int
+nl80211_send_mgmt_stypes(struct sk_buff *msg,
+			 const struct ieee80211_txrx_stypes *mgmt_stypes)
+{
+	u16 stypes;
+	struct nlattr *nl_ftypes, *nl_ifs;
+	enum nl80211_iftype ift;
+	int i;
 
-			chan = &dev->wiphy.bands[band]->channels[i];
+	if (!mgmt_stypes)
+		return 0;
 
-			if (nl80211_msg_put_channel(msg, chan))
-				goto nla_put_failure;
+	nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
+	if (!nl_ifs)
+		return -ENOBUFS;
 
-			nla_nest_end(msg, nl_freq);
+	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+		nl_ftypes = nla_nest_start(msg, ift);
+		if (!nl_ftypes)
+			return -ENOBUFS;
+		i = 0;
+		stypes = mgmt_stypes[ift].tx;
+		while (stypes) {
+			if ((stypes & 1) &&
+			    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
+					(i << 4) | IEEE80211_FTYPE_MGMT))
+				return -ENOBUFS;
+			stypes >>= 1;
+			i++;
 		}
+		nla_nest_end(msg, nl_ftypes);
+	}
 
-		nla_nest_end(msg, nl_freqs);
+	nla_nest_end(msg, nl_ifs);
 
-		/* add bitrates */
-		nl_rates = nla_nest_start(msg, NL80211_BAND_ATTR_RATES);
-		if (!nl_rates)
-			goto nla_put_failure;
+	nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
+	if (!nl_ifs)
+		return -ENOBUFS;
 
-		for (i = 0; i < dev->wiphy.bands[band]->n_bitrates; i++) {
-			nl_rate = nla_nest_start(msg, i);
-			if (!nl_rate)
-				goto nla_put_failure;
+	for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
+		nl_ftypes = nla_nest_start(msg, ift);
+		if (!nl_ftypes)
+			return -ENOBUFS;
+		i = 0;
+		stypes = mgmt_stypes[ift].rx;
+		while (stypes) {
+			if ((stypes & 1) &&
+			    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
+					(i << 4) | IEEE80211_FTYPE_MGMT))
+				return -ENOBUFS;
+			stypes >>= 1;
+			i++;
+		}
+		nla_nest_end(msg, nl_ftypes);
+	}
+	nla_nest_end(msg, nl_ifs);
 
-			rate = &dev->wiphy.bands[band]->bitrates[i];
-			if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE,
-					rate->bitrate))
-				goto nla_put_failure;
-			if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) &&
-			    nla_put_flag(msg,
-					 NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE))
-				goto nla_put_failure;
+	return 0;
+}
 
-			nla_nest_end(msg, nl_rate);
-		}
+static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
+			      struct sk_buff *msg, u32 portid, u32 seq,
+			      int flags, bool split, long *split_start,
+			      long *band_start, long *chan_start)
+{
+	void *hdr;
+	struct nlattr *nl_bands, *nl_band;
+	struct nlattr *nl_freqs, *nl_freq;
+	struct nlattr *nl_cmds;
+	enum ieee80211_band band;
+	struct ieee80211_channel *chan;
+	int i;
+	const struct ieee80211_txrx_stypes *mgmt_stypes =
+				dev->wiphy.mgmt_stypes;
+	long start = 0, start_chan = 0, start_band = 0;
 
-		nla_nest_end(msg, nl_rates);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY);
+	if (!hdr)
+		return -ENOBUFS;
 
-		nla_nest_end(msg, nl_band);
+	/* allow always using the variables */
+	if (!split) {
+		split_start = &start;
+		band_start = &start_band;
+		chan_start = &start_chan;
 	}
-	nla_nest_end(msg, nl_bands);
 
-	nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
-	if (!nl_cmds)
-		goto nla_put_failure;
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx) ||
+	    nla_put_string(msg, NL80211_ATTR_WIPHY_NAME,
+			   wiphy_name(&dev->wiphy)) ||
+	    nla_put_u32(msg, NL80211_ATTR_GENERATION,
+			cfg80211_rdev_list_generation))
+		goto nla_put_failure;
+
+	switch (*split_start) {
+	case 0:
+		if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
+			       dev->wiphy.retry_short) ||
+		    nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
+			       dev->wiphy.retry_long) ||
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
+				dev->wiphy.frag_threshold) ||
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
+				dev->wiphy.rts_threshold) ||
+		    nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+			       dev->wiphy.coverage_class) ||
+		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
+			       dev->wiphy.max_scan_ssids) ||
+		    nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS,
+			       dev->wiphy.max_sched_scan_ssids) ||
+		    nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
+				dev->wiphy.max_scan_ie_len) ||
+		    nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN,
+				dev->wiphy.max_sched_scan_ie_len) ||
+		    nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS,
+			       dev->wiphy.max_match_sets))
+			goto nla_put_failure;
 
-	i = 0;
-#define CMD(op, n)						\
-	 do {							\
-		if (dev->ops->op) {				\
-			i++;					\
-			if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
-				goto nla_put_failure;		\
-		}						\
-	} while (0)
-
-	CMD(add_virtual_intf, NEW_INTERFACE);
-	CMD(change_virtual_intf, SET_INTERFACE);
-	CMD(add_key, NEW_KEY);
-	CMD(start_ap, START_AP);
-	CMD(add_station, NEW_STATION);
-	CMD(add_mpath, NEW_MPATH);
-	CMD(update_mesh_config, SET_MESH_CONFIG);
-	CMD(change_bss, SET_BSS);
-	CMD(auth, AUTHENTICATE);
-	CMD(assoc, ASSOCIATE);
-	CMD(deauth, DEAUTHENTICATE);
-	CMD(disassoc, DISASSOCIATE);
-	CMD(join_ibss, JOIN_IBSS);
-	CMD(join_mesh, JOIN_MESH);
-	CMD(set_pmksa, SET_PMKSA);
-	CMD(del_pmksa, DEL_PMKSA);
-	CMD(flush_pmksa, FLUSH_PMKSA);
-	if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
-		CMD(remain_on_channel, REMAIN_ON_CHANNEL);
-	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
-	CMD(mgmt_tx, FRAME);
-	CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
-	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
+		if ((dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) &&
+		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN))
 			goto nla_put_failure;
-	}
-	if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
-	    dev->ops->join_mesh) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+		if ((dev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) &&
+		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH))
 			goto nla_put_failure;
-	}
-	CMD(set_wds_peer, SET_WDS_PEER);
-	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
-		CMD(tdls_mgmt, TDLS_MGMT);
-		CMD(tdls_oper, TDLS_OPER);
-	}
-	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
-		CMD(sched_scan_start, START_SCHED_SCAN);
-	CMD(probe_client, PROBE_CLIENT);
-	CMD(set_noack_map, SET_NOACK_MAP);
-	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+		if ((dev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+		    nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD))
+			goto nla_put_failure;
+		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) &&
+		    nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT))
+			goto nla_put_failure;
+		if ((dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
+		    nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT))
+			goto nla_put_failure;
+		if ((dev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) &&
+		    nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP))
 			goto nla_put_failure;
-	}
-	CMD(start_p2p_device, START_P2P_DEVICE);
-	CMD(set_mcast_rate, SET_MCAST_RATE);
 
-#ifdef CONFIG_NL80211_TESTMODE
-	CMD(testmode_cmd, TESTMODE);
-#endif
+		(*split_start)++;
+		if (split)
+			break;
+	case 1:
+		if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES,
+			    sizeof(u32) * dev->wiphy.n_cipher_suites,
+			    dev->wiphy.cipher_suites))
+			goto nla_put_failure;
 
-#undef CMD
+		if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
+			       dev->wiphy.max_num_pmkids))
+			goto nla_put_failure;
 
-	if (dev->ops->connect || dev->ops->auth) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
+		if ((dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
+		    nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE))
 			goto nla_put_failure;
-	}
 
-	if (dev->ops->disconnect || dev->ops->deauth) {
-		i++;
-		if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+		if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX,
+				dev->wiphy.available_antennas_tx) ||
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
+				dev->wiphy.available_antennas_rx))
 			goto nla_put_failure;
-	}
 
-	nla_nest_end(msg, nl_cmds);
+		if ((dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) &&
+		    nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
+				dev->wiphy.probe_resp_offload))
+			goto nla_put_failure;
 
-	if (dev->ops->remain_on_channel &&
-	    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
-	    nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
-			dev->wiphy.max_remain_on_channel_duration))
-		goto nla_put_failure;
+		if ((dev->wiphy.available_antennas_tx ||
+		     dev->wiphy.available_antennas_rx) &&
+		    dev->ops->get_antenna) {
+			u32 tx_ant = 0, rx_ant = 0;
+			int res;
+			res = rdev_get_antenna(dev, &tx_ant, &rx_ant);
+			if (!res) {
+				if (nla_put_u32(msg,
+						NL80211_ATTR_WIPHY_ANTENNA_TX,
+						tx_ant) ||
+				    nla_put_u32(msg,
+						NL80211_ATTR_WIPHY_ANTENNA_RX,
+						rx_ant))
+					goto nla_put_failure;
+			}
+		}
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
-	    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
-		goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 2:
+		if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES,
+					dev->wiphy.interface_modes))
+				goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 3:
+		nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS);
+		if (!nl_bands)
+			goto nla_put_failure;
 
-	if (mgmt_stypes) {
-		u16 stypes;
-		struct nlattr *nl_ftypes, *nl_ifs;
-		enum nl80211_iftype ift;
+		for (band = *band_start; band < IEEE80211_NUM_BANDS; band++) {
+			struct ieee80211_supported_band *sband;
 
-		nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
-		if (!nl_ifs)
-			goto nla_put_failure;
+			sband = dev->wiphy.bands[band];
 
-		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
-			nl_ftypes = nla_nest_start(msg, ift);
-			if (!nl_ftypes)
+			if (!sband)
+				continue;
+
+			nl_band = nla_nest_start(msg, band);
+			if (!nl_band)
 				goto nla_put_failure;
-			i = 0;
-			stypes = mgmt_stypes[ift].tx;
-			while (stypes) {
-				if ((stypes & 1) &&
-				    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
-						(i << 4) | IEEE80211_FTYPE_MGMT))
+
+			switch (*chan_start) {
+			case 0:
+				if (nl80211_send_band_rateinfo(msg, sband))
 					goto nla_put_failure;
-				stypes >>= 1;
-				i++;
+				(*chan_start)++;
+				if (split)
+					break;
+			default:
+				/* add frequencies */
+				nl_freqs = nla_nest_start(
+					msg, NL80211_BAND_ATTR_FREQS);
+				if (!nl_freqs)
+					goto nla_put_failure;
+
+				for (i = *chan_start - 1;
+				     i < sband->n_channels;
+				     i++) {
+					nl_freq = nla_nest_start(msg, i);
+					if (!nl_freq)
+						goto nla_put_failure;
+
+					chan = &sband->channels[i];
+
+					if (nl80211_msg_put_channel(msg, chan))
+						goto nla_put_failure;
+
+					nla_nest_end(msg, nl_freq);
+					if (split)
+						break;
+				}
+				if (i < sband->n_channels)
+					*chan_start = i + 2;
+				else
+					*chan_start = 0;
+				nla_nest_end(msg, nl_freqs);
+			}
+
+			nla_nest_end(msg, nl_band);
+
+			if (split) {
+				/* start again here */
+				if (*chan_start)
+					band--;
+				break;
 			}
-			nla_nest_end(msg, nl_ftypes);
 		}
+		nla_nest_end(msg, nl_bands);
 
-		nla_nest_end(msg, nl_ifs);
+		if (band < IEEE80211_NUM_BANDS)
+			*band_start = band + 1;
+		else
+			*band_start = 0;
 
-		nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
-		if (!nl_ifs)
+		/* if bands & channels are done, continue outside */
+		if (*band_start == 0 && *chan_start == 0)
+			(*split_start)++;
+		if (split)
+			break;
+	case 4:
+		nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
+		if (!nl_cmds)
 			goto nla_put_failure;
 
-		for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
-			nl_ftypes = nla_nest_start(msg, ift);
-			if (!nl_ftypes)
+		i = 0;
+#define CMD(op, n)							\
+		 do {							\
+			if (dev->ops->op) {				\
+				i++;					\
+				if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \
+					goto nla_put_failure;		\
+			}						\
+		} while (0)
+
+		CMD(add_virtual_intf, NEW_INTERFACE);
+		CMD(change_virtual_intf, SET_INTERFACE);
+		CMD(add_key, NEW_KEY);
+		CMD(start_ap, START_AP);
+		CMD(add_station, NEW_STATION);
+		CMD(add_mpath, NEW_MPATH);
+		CMD(update_mesh_config, SET_MESH_CONFIG);
+		CMD(change_bss, SET_BSS);
+		CMD(auth, AUTHENTICATE);
+		CMD(assoc, ASSOCIATE);
+		CMD(deauth, DEAUTHENTICATE);
+		CMD(disassoc, DISASSOCIATE);
+		CMD(join_ibss, JOIN_IBSS);
+		CMD(join_mesh, JOIN_MESH);
+		CMD(set_pmksa, SET_PMKSA);
+		CMD(del_pmksa, DEL_PMKSA);
+		CMD(flush_pmksa, FLUSH_PMKSA);
+		if (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)
+			CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+		CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
+		CMD(mgmt_tx, FRAME);
+		CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL);
+		if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS))
 				goto nla_put_failure;
-			i = 0;
-			stypes = mgmt_stypes[ift].rx;
-			while (stypes) {
-				if ((stypes & 1) &&
-				    nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE,
-						(i << 4) | IEEE80211_FTYPE_MGMT))
-					goto nla_put_failure;
-				stypes >>= 1;
-				i++;
-			}
-			nla_nest_end(msg, nl_ftypes);
 		}
-		nla_nest_end(msg, nl_ifs);
-	}
+		if (dev->ops->set_monitor_channel || dev->ops->start_ap ||
+		    dev->ops->join_mesh) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL))
+				goto nla_put_failure;
+		}
+		CMD(set_wds_peer, SET_WDS_PEER);
+		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) {
+			CMD(tdls_mgmt, TDLS_MGMT);
+			CMD(tdls_oper, TDLS_OPER);
+		}
+		if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+			CMD(sched_scan_start, START_SCHED_SCAN);
+		CMD(probe_client, PROBE_CLIENT);
+		CMD(set_noack_map, SET_NOACK_MAP);
+		if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS))
+				goto nla_put_failure;
+		}
+		CMD(start_p2p_device, START_P2P_DEVICE);
+		CMD(set_mcast_rate, SET_MCAST_RATE);
 
-#ifdef CONFIG_PM
-	if (dev->wiphy.wowlan.flags || dev->wiphy.wowlan.n_patterns) {
-		struct nlattr *nl_wowlan;
+#ifdef CONFIG_NL80211_TESTMODE
+		CMD(testmode_cmd, TESTMODE);
+#endif
 
-		nl_wowlan = nla_nest_start(msg,
-				NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED);
-		if (!nl_wowlan)
-			goto nla_put_failure;
+#undef CMD
 
-		if (((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_ANY) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_DISCONNECT) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_MAGIC_PKT) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) ||
-		    ((dev->wiphy.wowlan.flags & WIPHY_WOWLAN_RFKILL_RELEASE) &&
-		     nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)))
-		    goto nla_put_failure;
-		if (dev->wiphy.wowlan.n_patterns) {
-			struct nl80211_wowlan_pattern_support pat = {
-				.max_patterns = dev->wiphy.wowlan.n_patterns,
-				.min_pattern_len =
-					dev->wiphy.wowlan.pattern_min_len,
-				.max_pattern_len =
-					dev->wiphy.wowlan.pattern_max_len,
-				.max_pkt_offset =
-					dev->wiphy.wowlan.max_pkt_offset,
-			};
-			if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN,
-				    sizeof(pat), &pat))
+		if (dev->ops->connect || dev->ops->auth) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_CONNECT))
 				goto nla_put_failure;
 		}
 
-		nla_nest_end(msg, nl_wowlan);
-	}
+		if (dev->ops->disconnect || dev->ops->deauth) {
+			i++;
+			if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT))
+				goto nla_put_failure;
+		}
+
+		nla_nest_end(msg, nl_cmds);
+		(*split_start)++;
+		if (split)
+			break;
+	case 5:
+		if (dev->ops->remain_on_channel &&
+		    (dev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) &&
+		    nla_put_u32(msg,
+				NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION,
+				dev->wiphy.max_remain_on_channel_duration))
+			goto nla_put_failure;
+
+		if ((dev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) &&
+		    nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK))
+			goto nla_put_failure;
+
+		if (nl80211_send_mgmt_stypes(msg, mgmt_stypes))
+			goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 6:
+#ifdef CONFIG_PM
+		if (nl80211_send_wowlan(msg, dev))
+			goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+#else
+		(*split_start)++;
 #endif
+	case 7:
+		if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
+					dev->wiphy.software_iftypes))
+			goto nla_put_failure;
 
-	if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES,
-				dev->wiphy.software_iftypes))
-		goto nla_put_failure;
+		if (nl80211_put_iface_combinations(&dev->wiphy, msg))
+			goto nla_put_failure;
 
-	if (nl80211_put_iface_combinations(&dev->wiphy, msg))
-		goto nla_put_failure;
+		(*split_start)++;
+		if (split)
+			break;
+	case 8:
+		if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
+		    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
+				dev->wiphy.ap_sme_capa))
+			goto nla_put_failure;
 
-	if ((dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) &&
-	    nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME,
-			dev->wiphy.ap_sme_capa))
-		goto nla_put_failure;
+		if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS,
+				dev->wiphy.features))
+			goto nla_put_failure;
 
-	if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS,
-			dev->wiphy.features))
-		goto nla_put_failure;
+		if (dev->wiphy.ht_capa_mod_mask &&
+		    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
+			    sizeof(*dev->wiphy.ht_capa_mod_mask),
+			    dev->wiphy.ht_capa_mod_mask))
+			goto nla_put_failure;
 
-	if (dev->wiphy.ht_capa_mod_mask &&
-	    nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
-		    sizeof(*dev->wiphy.ht_capa_mod_mask),
-		    dev->wiphy.ht_capa_mod_mask))
-		goto nla_put_failure;
+		if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
+		    dev->wiphy.max_acl_mac_addrs &&
+		    nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
+				dev->wiphy.max_acl_mac_addrs))
+			goto nla_put_failure;
 
-	if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME &&
-	    dev->wiphy.max_acl_mac_addrs &&
-	    nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX,
-			dev->wiphy.max_acl_mac_addrs))
-		goto nla_put_failure;
+		/*
+		 * Any information below this point is only available to
+		 * applications that can deal with it being split. This
+		 * helps ensure that newly added capabilities don't break
+		 * older tools by overrunning their buffers.
+		 *
+		 * We still increment split_start so that in the split
+		 * case we'll continue with more data in the next round,
+		 * but break unconditionally so unsplit data stops here.
+		 */
+		(*split_start)++;
+		break;
+	case 9:
+		/* placeholder */
 
+		/* done */
+		*split_start = 0;
+		break;
+	}
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -1310,39 +1444,80 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 	int idx = 0, ret;
 	int start = cb->args[0];
 	struct cfg80211_registered_device *dev;
+	s64 filter_wiphy = -1;
+	bool split = false;
+	struct nlattr **tb = nl80211_fam.attrbuf;
+	int res;
 
 	mutex_lock(&cfg80211_mutex);
+	res = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+			  tb, nl80211_fam.maxattr, nl80211_policy);
+	if (res == 0) {
+		split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP];
+		if (tb[NL80211_ATTR_WIPHY])
+			filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]);
+		if (tb[NL80211_ATTR_WDEV])
+			filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32;
+		if (tb[NL80211_ATTR_IFINDEX]) {
+			struct net_device *netdev;
+			int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]);
+
+			netdev = dev_get_by_index(sock_net(skb->sk), ifidx);
+			if (!netdev) {
+				mutex_unlock(&cfg80211_mutex);
+				return -ENODEV;
+			}
+			if (netdev->ieee80211_ptr) {
+				dev = wiphy_to_dev(
+					netdev->ieee80211_ptr->wiphy);
+				filter_wiphy = dev->wiphy_idx;
+			}
+			dev_put(netdev);
+		}
+	}
+
 	list_for_each_entry(dev, &cfg80211_rdev_list, list) {
 		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
 			continue;
 		if (++idx <= start)
 			continue;
-		ret = nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid,
-					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 dev);
-		if (ret < 0) {
-			/*
-			 * If sending the wiphy data didn't fit (ENOBUFS or
-			 * EMSGSIZE returned), this SKB is still empty (so
-			 * it's not too big because another wiphy dataset is
-			 * already in the skb) and we've not tried to adjust
-			 * the dump allocation yet ... then adjust the alloc
-			 * size to be bigger, and return 1 but with the empty
-			 * skb. This results in an empty message being RX'ed
-			 * in userspace, but that is ignored.
-			 *
-			 * We can then retry with the larger buffer.
-			 */
-			if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
-			    !skb->len &&
-			    cb->min_dump_alloc < 4096) {
-				cb->min_dump_alloc = 4096;
-				mutex_unlock(&cfg80211_mutex);
-				return 1;
+		if (filter_wiphy != -1 && dev->wiphy_idx != filter_wiphy)
+			continue;
+		/* attempt to fit multiple wiphy data chunks into the skb */
+		do {
+			ret = nl80211_send_wiphy(dev, skb,
+						 NETLINK_CB(cb->skb).portid,
+						 cb->nlh->nlmsg_seq,
+						 NLM_F_MULTI,
+						 split, &cb->args[1],
+						 &cb->args[2],
+						 &cb->args[3]);
+			if (ret < 0) {
+				/*
+				 * If sending the wiphy data didn't fit (ENOBUFS
+				 * or EMSGSIZE returned), this SKB is still
+				 * empty (so it's not too big because another
+				 * wiphy dataset is already in the skb) and
+				 * we've not tried to adjust the dump allocation
+				 * yet ... then adjust the alloc size to be
+				 * bigger, and return 1 but with the empty skb.
+				 * This results in an empty message being RX'ed
+				 * in userspace, but that is ignored.
+				 *
+				 * We can then retry with the larger buffer.
+				 */
+				if ((ret == -ENOBUFS || ret == -EMSGSIZE) &&
+				    !skb->len &&
+				    cb->min_dump_alloc < 4096) {
+					cb->min_dump_alloc = 4096;
+					mutex_unlock(&cfg80211_mutex);
+					return 1;
+				}
+				idx--;
+				break;
 			}
-			idx--;
-			break;
-		}
+		} while (cb->args[1] > 0);
+		break;
 	}
 	mutex_unlock(&cfg80211_mutex);
 
@@ -1360,7 +1535,8 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) {
+	if (nl80211_send_wiphy(dev, msg, info->snd_portid, info->snd_seq, 0,
+			       false, NULL, NULL, NULL) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
 	}
@@ -7821,6 +7997,33 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+static int nl80211_get_protocol_features(struct sk_buff *skb,
+					 struct genl_info *info)
+{
+	void *hdr;
+	struct sk_buff *msg;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_GET_PROTOCOL_FEATURES);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES,
+			NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+ nla_put_failure:
+	kfree_skb(msg);
+	return -ENOBUFS;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -8497,6 +8700,11 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_GET_PROTOCOL_FEATURES,
+		.doit = nl80211_get_protocol_features,
+		.policy = nl80211_policy,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -8524,7 +8732,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
 	if (!msg)
 		return;
 
-	if (nl80211_send_wiphy(msg, 0, 0, 0, rdev) < 0) {
+	if (nl80211_send_wiphy(rdev, msg, 0, 0, 0,
+			       false, NULL, NULL, NULL) < 0) {
 		nlmsg_free(msg);
 		return;
 	}
-- 
cgit 


From ee2aca343c9aa64d277a75a5df043299dc84cfd9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 21 Feb 2013 17:36:01 +0100
Subject: cfg80211: add ability to override VHT capabilities

For testing it's sometimes useful to be able to
override certain VHT capability advertisement,
add the ability to do that in cfg80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 +++++++++++
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/core.h          | 10 ++++++++--
 net/wireless/mlme.c          | 35 ++++++++++++++++++++++++++++++---
 net/wireless/nl80211.c       | 47 ++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/sme.c           |  4 +++-
 6 files changed, 103 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ed2b08da3b93..73a523901c73 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1430,9 +1430,11 @@ struct cfg80211_auth_request {
  * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association.
  *
  * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
+ * @ASSOC_REQ_DISABLE_VHT:  Disable VHT
  */
 enum cfg80211_assoc_req_flags {
 	ASSOC_REQ_DISABLE_HT		= BIT(0),
+	ASSOC_REQ_DISABLE_VHT		= BIT(1),
 };
 
 /**
@@ -1454,6 +1456,8 @@ enum cfg80211_assoc_req_flags {
  * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
  *   will be used in ht_capa.  Un-supported values will be ignored.
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
+ * @vht_capa: VHT capability override
+ * @vht_capa_mask: VHT capability mask indicating which fields to use
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
@@ -1464,6 +1468,7 @@ struct cfg80211_assoc_request {
 	u32 flags;
 	struct ieee80211_ht_cap ht_capa;
 	struct ieee80211_ht_cap ht_capa_mask;
+	struct ieee80211_vht_cap vht_capa, vht_capa_mask;
 };
 
 /**
@@ -1574,6 +1579,8 @@ struct cfg80211_ibss_params {
  * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
  *   will be used in ht_capa.  Un-supported values will be ignored.
  * @ht_capa_mask:  The bits of ht_capa which are to be used.
+ * @vht_capa:  VHT Capability overrides
+ * @vht_capa_mask: The bits of vht_capa which are to be used.
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
@@ -1592,6 +1599,8 @@ struct cfg80211_connect_params {
 	int bg_scan_period;
 	struct ieee80211_ht_cap ht_capa;
 	struct ieee80211_ht_cap ht_capa_mask;
+	struct ieee80211_vht_cap vht_capa;
+	struct ieee80211_vht_cap vht_capa_mask;
 };
 
 /**
@@ -2516,6 +2525,8 @@ struct wiphy_wowlan_support {
  * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features.
  * @ht_capa_mod_mask:  Specify what ht_cap values can be over-ridden.
  *	If null, then none can be over-ridden.
+ * @vht_capa_mod_mask:  Specify what VHT capabilities can be over-ridden.
+ *	If null, then none can be over-ridden.
  *
  * @max_acl_mac_addrs: Maximum number of MAC addresses that the device
  *	supports for ACL.
@@ -2624,6 +2635,7 @@ struct wiphy {
 	struct dentry *debugfsdir;
 
 	const struct ieee80211_ht_cap *ht_capa_mod_mask;
+	const struct ieee80211_vht_cap *vht_capa_mod_mask;
 
 #ifdef CONFIG_NET_NS
 	/* the network namespace this phy lives in currently */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9844c10a2999..2c3e88360037 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1685,6 +1685,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_PROTOCOL_FEATURES,
 	NL80211_ATTR_SPLIT_WIPHY_DUMP,
 
+	NL80211_ATTR_DISABLE_VHT,
+	NL80211_ATTR_VHT_CAPABILITY_MASK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 3aec0e429d8a..c2f94f22bb22 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -335,7 +335,9 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  const u8 *ie, int ie_len, bool use_mfp,
 			  struct cfg80211_crypto_settings *crypt,
 			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			  struct ieee80211_ht_cap *ht_capa_mask);
+			  struct ieee80211_ht_cap *ht_capa_mask,
+			  struct ieee80211_vht_cap *vht_capa,
+			  struct ieee80211_vht_cap *vht_capa_mask);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, struct ieee80211_channel *chan,
 			const u8 *bssid, const u8 *prev_bssid,
@@ -343,7 +345,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			const u8 *ie, int ie_len, bool use_mfp,
 			struct cfg80211_crypto_settings *crypt,
 			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			struct ieee80211_ht_cap *ht_capa_mask);
+			struct ieee80211_ht_cap *ht_capa_mask,
+			struct ieee80211_vht_cap *vht_capa,
+			struct ieee80211_vht_cap *vht_capa_mask);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
 			   const u8 *ie, int ie_len, u16 reason,
@@ -375,6 +379,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  bool no_cck, bool dont_wait_for_ack, u64 *cookie);
 void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
 			       const struct ieee80211_ht_cap *ht_capa_mask);
+void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
+				const struct ieee80211_vht_cap *vht_capa_mask);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 5a97ce6d283b..c82adfee7697 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -343,6 +343,23 @@ void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
 		p1[i] &= p2[i];
 }
 
+/*  Do a logical ht_capa &= ht_capa_mask.  */
+void cfg80211_oper_and_vht_capa(struct ieee80211_vht_cap *vht_capa,
+				const struct ieee80211_vht_cap *vht_capa_mask)
+{
+	int i;
+	u8 *p1, *p2;
+	if (!vht_capa_mask) {
+		memset(vht_capa, 0, sizeof(*vht_capa));
+		return;
+	}
+
+	p1 = (u8*)(vht_capa);
+	p2 = (u8*)(vht_capa_mask);
+	for (i = 0; i < sizeof(*vht_capa); i++)
+		p1[i] &= p2[i];
+}
+
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
@@ -351,7 +368,9 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  const u8 *ie, int ie_len, bool use_mfp,
 			  struct cfg80211_crypto_settings *crypt,
 			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			  struct ieee80211_ht_cap *ht_capa_mask)
+			  struct ieee80211_ht_cap *ht_capa_mask,
+			  struct ieee80211_vht_cap *vht_capa,
+			  struct ieee80211_vht_cap *vht_capa_mask)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_assoc_request req;
@@ -388,6 +407,13 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 		       sizeof(req.ht_capa_mask));
 	cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
 				  rdev->wiphy.ht_capa_mod_mask);
+	if (vht_capa)
+		memcpy(&req.vht_capa, vht_capa, sizeof(req.vht_capa));
+	if (vht_capa_mask)
+		memcpy(&req.vht_capa_mask, vht_capa_mask,
+		       sizeof(req.vht_capa_mask));
+	cfg80211_oper_and_vht_capa(&req.vht_capa_mask,
+				   rdev->wiphy.vht_capa_mod_mask);
 
 	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
 				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
@@ -422,7 +448,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			const u8 *ie, int ie_len, bool use_mfp,
 			struct cfg80211_crypto_settings *crypt,
 			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
-			struct ieee80211_ht_cap *ht_capa_mask)
+			struct ieee80211_ht_cap *ht_capa_mask,
+			struct ieee80211_vht_cap *vht_capa,
+			struct ieee80211_vht_cap *vht_capa_mask)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
@@ -431,7 +459,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	wdev_lock(wdev);
 	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
 				    ssid, ssid_len, ie, ie_len, use_mfp, crypt,
-				    assoc_flags, ht_capa, ht_capa_mask);
+				    assoc_flags, ht_capa, ht_capa_mask,
+				    vht_capa, vht_capa_mask);
 	wdev_unlock(wdev);
 	mutex_unlock(&rdev->devlist_mtx);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0e5176784b42..6a5893f5e481 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -371,6 +371,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 },
 	[NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, },
+	[NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_VHT_CAPABILITY_MASK] = {
+		.len = NL80211_VHT_CAPABILITY_LEN,
+	},
 };
 
 /* policy for the key attributes */
@@ -1522,6 +1526,12 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
 			     dev->wiphy.extended_capabilities_mask)))
 			goto nla_put_failure;
 
+		if (dev->wiphy.vht_capa_mod_mask &&
+		    nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK,
+			    sizeof(*dev->wiphy.vht_capa_mod_mask),
+			    dev->wiphy.vht_capa_mod_mask))
+			goto nla_put_failure;
+
 		/* done */
 		*split_start = 0;
 		break;
@@ -5982,6 +5992,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	u32 flags = 0;
 	struct ieee80211_ht_cap *ht_capa = NULL;
 	struct ieee80211_ht_cap *ht_capa_mask = NULL;
+	struct ieee80211_vht_cap *vht_capa = NULL;
+	struct ieee80211_vht_cap *vht_capa_mask = NULL;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -6038,12 +6050,25 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
+		flags |= ASSOC_REQ_DISABLE_VHT;
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+		vht_capa_mask =
+			nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]);
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
+		if (!vht_capa_mask)
+			return -EINVAL;
+		vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]);
+	}
+
 	err = nl80211_crypto_settings(rdev, info, &crypto, 1);
 	if (!err)
 		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
 					  ssid, ssid_len, ie, ie_len, use_mfp,
-					  &crypto, flags, ht_capa,
-					  ht_capa_mask);
+					  &crypto, flags, ht_capa, ht_capa_mask,
+					  vht_capa, vht_capa_mask);
 
 	return err;
 }
@@ -6623,6 +6648,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		       sizeof(connect.ht_capa));
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT]))
+		connect.flags |= ASSOC_REQ_DISABLE_VHT;
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK])
+		memcpy(&connect.vht_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]),
+		       sizeof(connect.vht_capa_mask));
+
+	if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) {
+		if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) {
+			kfree(connkeys);
+			return -EINVAL;
+		}
+		memcpy(&connect.vht_capa,
+		       nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]),
+		       sizeof(connect.vht_capa));
+	}
+
 	err = cfg80211_connect(rdev, dev, &connect, connkeys);
 	if (err)
 		kfree(connkeys);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f432bd3755b1..7da118c034f0 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -195,7 +195,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    params->mfp != NL80211_MFP_NO,
 					    &params->crypto,
 					    params->flags, &params->ht_capa,
-					    &params->ht_capa_mask);
+					    &params->ht_capa_mask,
+					    &params->vht_capa,
+					    &params->vht_capa_mask);
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
-- 
cgit 


From 355199e02b831fd4f652c34d6c7673d973da1369 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Wed, 27 Feb 2013 17:14:27 +0200
Subject: cfg80211: Extend support for IEEE 802.11r Fast BSS Transition

Add NL80211_CMD_UPDATE_FT_IES to support update of FT IEs to the WLAN
driver and NL80211_CMD_FT_EVENT to send FT events from the WLAN driver.
This will carry the target AP's MAC address along with the relevant
Information Elements. This event is used to report received FT IEs
(MDIE, FTIE, RSN IE, TIE, RICIE). These changes allow FT to be supported
with drivers that use an internal SME instead of user space option (like
FT implementation in wpa_supplicant with mac80211-based drivers).

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 41 ++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 19 +++++++++++
 net/wireless/nl80211.c       | 76 ++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      | 13 ++++++++
 net/wireless/trace.h         | 46 +++++++++++++++++++++++++++
 5 files changed, 195 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 73a523901c73..dfef0d5b5d3d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1762,6 +1762,21 @@ struct cfg80211_gtk_rekey_data {
 	u8 replay_ctr[NL80211_REPLAY_CTR_LEN];
 };
 
+/**
+ * struct cfg80211_update_ft_ies_params - FT IE Information
+ *
+ * This structure provides information needed to update the fast transition IE
+ *
+ * @md: The Mobility Domain ID, 2 Octet value
+ * @ie: Fast Transition IEs
+ * @ie_len: Length of ft_ie in octets
+ */
+struct cfg80211_update_ft_ies_params {
+	u16 md;
+	const u8 *ie;
+	size_t ie_len;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -2208,6 +2223,8 @@ struct cfg80211_ops {
 	int	(*start_radar_detection)(struct wiphy *wiphy,
 					 struct net_device *dev,
 					 struct cfg80211_chan_def *chandef);
+	int	(*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
+				 struct cfg80211_update_ft_ies_params *ftie);
 };
 
 /*
@@ -4044,6 +4061,30 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate);
  */
 void cfg80211_unregister_wdev(struct wireless_dev *wdev);
 
+/**
+ * struct cfg80211_ft_event - FT Information Elements
+ * @ies: FT IEs
+ * @ies_len: length of the FT IE in bytes
+ * @target_ap: target AP's MAC address
+ * @ric_ies: RIC IE
+ * @ric_ies_len: length of the RIC IE in bytes
+ */
+struct cfg80211_ft_event_params {
+	const u8 *ies;
+	size_t ies_len;
+	const u8 *target_ap;
+	const u8 *ric_ies;
+	size_t ric_ies_len;
+};
+
+/**
+ * cfg80211_ft_event - notify userspace about FT IE and RIC IE
+ * @netdev: network device
+ * @ft_event: IE information
+ */
+void cfg80211_ft_event(struct net_device *netdev,
+		       struct cfg80211_ft_event_params *ft_event);
+
 /**
  * cfg80211_get_p2p_attr - find and copy a P2P attribute from IE buffer
  * @ies: the input IE buffer
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2c3e88360037..2d0cff57ff89 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -629,6 +629,14 @@
  *	i.e. features for the nl80211 protocol rather than device features.
  *	Returns the features in the %NL80211_ATTR_PROTOCOL_FEATURES bitmap.
  *
+ * @NL80211_CMD_UPDATE_FT_IES: Pass down the most up-to-date Fast Transition
+ *	Information Element to the WLAN driver
+ *
+ * @NL80211_CMD_FT_EVENT: Send a Fast transition event from the WLAN driver
+ *	to the supplicant. This will carry the target AP's MAC address along
+ *	with the relevant Information Elements. This event is used to report
+ *	received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -785,6 +793,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_GET_PROTOCOL_FEATURES,
 
+	NL80211_CMD_UPDATE_FT_IES,
+	NL80211_CMD_FT_EVENT,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1396,6 +1407,11 @@ enum nl80211_commands {
  *	receiving the data for a single wiphy split across multiple
  *	messages, given with wiphy dump message
  *
+ * @NL80211_ATTR_MDID: Mobility Domain Identifier
+ *
+ * @NL80211_ATTR_IE_RIC: Resource Information Container Information
+ *	Element
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1688,6 +1704,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_DISABLE_VHT,
 	NL80211_ATTR_VHT_CAPABILITY_MASK,
 
+	NL80211_ATTR_MDID,
+	NL80211_ATTR_IE_RIC,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a8bd453d22b9..08de0c6035f1 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -375,6 +375,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_VHT_CAPABILITY_MASK] = {
 		.len = NL80211_VHT_CAPABILITY_LEN,
 	},
+	[NL80211_ATTR_MDID] = { .type = NLA_U16 },
+	[NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY,
+				  .len = IEEE80211_MAX_DATA_LEN },
 };
 
 /* policy for the key attributes */
@@ -8160,6 +8163,27 @@ static int nl80211_get_protocol_features(struct sk_buff *skb,
 	return -ENOBUFS;
 }
 
+static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct cfg80211_update_ft_ies_params ft_params;
+	struct net_device *dev = info->user_ptr[1];
+
+	if (!rdev->ops->update_ft_ies)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MDID] ||
+	    !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+		return -EINVAL;
+
+	memset(&ft_params, 0, sizeof(ft_params));
+	ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]);
+	ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+	ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+
+	return rdev_update_ft_ies(rdev, dev, &ft_params);
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -8841,6 +8865,14 @@ static struct genl_ops nl80211_ops[] = {
 		.doit = nl80211_get_protocol_features,
 		.policy = nl80211_policy,
 	},
+	{
+		.cmd = NL80211_CMD_UPDATE_FT_IES,
+		.doit = nl80211_update_ft_ies,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -10542,6 +10574,50 @@ static struct notifier_block nl80211_netlink_notifier = {
 	.notifier_call = nl80211_netlink_notify,
 };
 
+void cfg80211_ft_event(struct net_device *netdev,
+		       struct cfg80211_ft_event_params *ft_event)
+{
+	struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	trace_cfg80211_ft_event(wiphy, netdev, ft_event);
+
+	if (!ft_event->target_ap)
+		return;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap);
+	if (ft_event->ies)
+		nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies);
+	if (ft_event->ric_ies)
+		nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len,
+			ft_event->ric_ies);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, GFP_KERNEL);
+}
+EXPORT_SYMBOL(cfg80211_ft_event);
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 422d38291d66..8c8b26f574e8 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -887,4 +887,17 @@ static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
 	trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
+
+static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
+				     struct net_device *dev,
+				     struct cfg80211_update_ft_ies_params *ftie)
+{
+	int ret;
+
+	trace_rdev_update_ft_ies(&rdev->wiphy, dev, ftie);
+	ret = rdev->ops->update_ft_ies(&rdev->wiphy, dev, ftie);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index b7a531380e19..ccadef2106ac 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1785,6 +1785,26 @@ TRACE_EVENT(rdev_set_mac_acl,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->acl_policy)
 );
 
+TRACE_EVENT(rdev_update_ft_ies,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_update_ft_ies_params *ftie),
+	TP_ARGS(wiphy, netdev, ftie),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__field(u16, md)
+		__dynamic_array(u8, ie, ftie->ie_len)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		__entry->md = ftie->md;
+		memcpy(__get_dynamic_array(ie), ftie->ie, ftie->ie_len);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", md: 0x%x",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
@@ -2413,6 +2433,32 @@ TRACE_EVENT(cfg80211_report_wowlan_wakeup,
 	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT, WIPHY_PR_ARG, WDEV_PR_ARG)
 );
 
+TRACE_EVENT(cfg80211_ft_event,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 struct cfg80211_ft_event_params *ft_event),
+	TP_ARGS(wiphy, netdev, ft_event),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		__dynamic_array(u8, ies, ft_event->ies_len)
+		MAC_ENTRY(target_ap)
+		__dynamic_array(u8, ric_ies, ft_event->ric_ies_len)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		if (ft_event->ies)
+			memcpy(__get_dynamic_array(ies), ft_event->ies,
+			       ft_event->ies_len);
+		MAC_ASSIGN(target_ap, ft_event->target_ap);
+		if (ft_event->ric_ies)
+			memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
+			       ft_event->ric_ies_len);
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT,
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
+);
+
 #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit 


From bb2798d45fc0575f5d08c0bb7baf4d5d5e8cc0c3 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Mon, 4 Mar 2013 13:06:10 -0800
Subject: nl80211: explicit userspace MPM

Secure mesh had the implicit requirement that the Mesh
Peering Management entity be in userspace.  However
userspace might want to implement an open MPM as well, so
specify a mesh setup parameter to indicate this.

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h | 25 ++++++++++++++++++-------
 net/wireless/mesh.c          |  1 +
 net/wireless/nl80211.c       |  8 ++++++++
 4 files changed, 29 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dfef0d5b5d3d..69b2b2631b9a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1151,6 +1151,7 @@ struct mesh_config {
  * @ie_len: length of vendor information elements
  * @is_authenticated: this mesh requires authentication
  * @is_secure: this mesh uses security
+ * @user_mpm: userspace handles all MPM functions
  * @dtim_period: DTIM period to use
  * @beacon_interval: beacon interval to use
  * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a]
@@ -1168,6 +1169,7 @@ struct mesh_setup {
 	u8 ie_len;
 	bool is_authenticated;
 	bool is_secure;
+	bool user_mpm;
 	u8 dtim_period;
 	u16 beacon_interval;
 	int mcast_rate[IEEE80211_NUM_BANDS];
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2d0cff57ff89..8134c6a96f57 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -513,9 +513,11 @@
  * @NL80211_CMD_NEW_PEER_CANDIDATE: Notification on the reception of a
  *      beacon or probe response from a compatible mesh peer.  This is only
  *      sent while no station information (sta_info) exists for the new peer
- *      candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH is set.  On
- *      reception of this notification, userspace may decide to create a new
- *      station (@NL80211_CMD_NEW_STATION).  To stop this notification from
+ *      candidate and when @NL80211_MESH_SETUP_USERSPACE_AUTH,
+ *      @NL80211_MESH_SETUP_USERSPACE_AMPE, or
+ *      @NL80211_MESH_SETUP_USERSPACE_MPM is set.  On reception of this
+ *      notification, userspace may decide to create a new station
+ *      (@NL80211_CMD_NEW_STATION).  To stop this notification from
  *      reoccurring, the userspace authentication daemon may want to create the
  *      new station with the AUTHENTICATED flag unset and maybe change it later
  *      depending on the authentication result.
@@ -1199,10 +1201,10 @@ enum nl80211_commands {
  * @NL80211_ATTR_SUPPORT_MESH_AUTH: Currently, this means the underlying driver
  *	allows auth frames in a mesh to be passed to userspace for processing via
  *	the @NL80211_MESH_SETUP_USERSPACE_AUTH flag.
- * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as
- *	defined in &enum nl80211_plink_state. Used when userspace is
- *	driving the peer link management state machine.
- *	@NL80211_MESH_SETUP_USERSPACE_AMPE must be enabled.
+ * @NL80211_ATTR_STA_PLINK_STATE: The state of a mesh peer link as defined in
+ *	&enum nl80211_plink_state. Used when userspace is driving the peer link
+ *	management state machine.  @NL80211_MESH_SETUP_USERSPACE_AMPE or
+ *	@NL80211_MESH_SETUP_USERSPACE_MPM must be enabled.
  *
  * @NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED: indicates, as part of the wiphy
  *	capabilities, the supported WoWLAN triggers
@@ -2612,6 +2614,9 @@ enum nl80211_meshconf_params {
  *	vendor specific synchronization method or disable it to use the default
  *	neighbor offset synchronization
  *
+ * @NL80211_MESH_SETUP_USERSPACE_MPM: Enable this option if userspace will
+ *	implement an MPM which handles peer allocation and state.
+ *
  * @NL80211_MESH_SETUP_ATTR_MAX: highest possible mesh setup attribute number
  *
  * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use
@@ -2624,6 +2629,7 @@ enum nl80211_mesh_setup_params {
 	NL80211_MESH_SETUP_USERSPACE_AUTH,
 	NL80211_MESH_SETUP_USERSPACE_AMPE,
 	NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC,
+	NL80211_MESH_SETUP_USERSPACE_MPM,
 
 	/* keep last */
 	__NL80211_MESH_SETUP_ATTR_AFTER_LAST,
@@ -3526,6 +3532,10 @@ enum nl80211_ap_sme_features {
  *	stations the authenticated/associated bits have to be set in the mask.
  * @NL80211_FEATURE_ADVERTISE_CHAN_LIMITS: cfg80211 advertises channel limits
  *	(HT40, VHT 80/160 MHz) if this flag is set
+ * @NL80211_FEATURE_USERSPACE_MPM: This driver supports a userspace Mesh
+ *	Peering Management entity which may be implemented by registering for
+ *	beacons or NL80211_CMD_NEW_PEER_CANDIDATE events. The mesh beacon is
+ *	still generated by the driver.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -3544,6 +3554,7 @@ enum nl80211_feature_flags {
 	/* bit 13 is reserved */
 	NL80211_FEATURE_ADVERTISE_CHAN_LIMITS		= 1 << 14,
 	NL80211_FEATURE_FULL_AP_CLIENT_STATE		= 1 << 15,
+	NL80211_FEATURE_USERSPACE_MPM			= 1 << 16,
 };
 
 /**
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index 9688b249a805..0bb93f3061a4 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -85,6 +85,7 @@ const struct mesh_setup default_mesh_setup = {
 	.ie = NULL,
 	.ie_len = 0,
 	.is_secure = false,
+	.user_mpm = false,
 	.beacon_interval = MESH_DEFAULT_BEACON_INTERVAL,
 	.dtim_period = MESH_DEFAULT_DTIM_PERIOD,
 };
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7469020175d5..bdf39836d9d8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4618,6 +4618,7 @@ static const struct nla_policy
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 },
 	[NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG },
+	[NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG },
 	[NL80211_MESH_SETUP_IE] = { .type = NLA_BINARY,
 				    .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG },
@@ -4756,6 +4757,7 @@ do {									    \
 static int nl80211_parse_mesh_setup(struct genl_info *info,
 				     struct mesh_setup *setup)
 {
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1];
 
 	if (!info->attrs[NL80211_ATTR_MESH_SETUP])
@@ -4792,8 +4794,14 @@ static int nl80211_parse_mesh_setup(struct genl_info *info,
 		setup->ie = nla_data(ieattr);
 		setup->ie_len = nla_len(ieattr);
 	}
+	if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] &&
+	    !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM))
+		return -EINVAL;
+	setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]);
 	setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]);
 	setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]);
+	if (setup->is_secure)
+		setup->user_mpm = true;
 
 	return 0;
 }
-- 
cgit 


From d37bb18ae3a3fa7ef239aad533742a8b07eae15f Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Mon, 4 Mar 2013 13:06:13 -0800
Subject: nl80211: user_mpm overrides auto_open_plinks

If the user requested a userspace MPM, automatically
disable auto_open_plinks to fully disable the kernel MPM.

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 6 ++++--
 net/wireless/nl80211.c       | 3 +++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8134c6a96f57..79da8710448e 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2467,8 +2467,10 @@ enum nl80211_mesh_power_mode {
  * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh
  *	point.
  *
- * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically
- *	open peer links when we detect compatible mesh peers.
+ * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically open
+ *	peer links when we detect compatible mesh peers. Disabled if
+ *	@NL80211_MESH_SETUP_USERSPACE_MPM or @NL80211_MESH_SETUP_USERSPACE_AMPE are
+ *	set.
  *
  * @NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES: the number of action frames
  *	containing a PREQ that an MP can send to a particular destination (path
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 946b2e7acdf2..f924d45af1b8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7449,6 +7449,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 			return err;
 	}
 
+	if (setup.user_mpm)
+		cfg.auto_open_plinks = false;
+
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
 		err = nl80211_parse_chandef(rdev, info, &setup.chandef);
 		if (err)
-- 
cgit 


From 6906f4ed6f85b2d72fd944e15da6a905fdde8b40 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 6 Mar 2013 06:49:21 +0000
Subject: htb: add HTB_DIRECT_QLEN attribute

HTB uses an internal pfifo queue, which limit is not reported
to userland tools (tc), and value inherited from device tx_queue_len
at setup time.

Introduce TCA_HTB_DIRECT_QLEN attribute to allow finer control.

Remove two obsolete pr_err() calls as well.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  1 +
 net/sched/sch_htb.c            | 31 ++++++++++++++++---------------
 2 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 32aef0a439ef..dbd71b0c7d8c 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -348,6 +348,7 @@ enum {
 	TCA_HTB_INIT,
 	TCA_HTB_CTAB,
 	TCA_HTB_RTAB,
+	TCA_HTB_DIRECT_QLEN,
 	__TCA_HTB_MAX,
 };
 
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 571f1d211f4d..79b1876b6cd2 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -981,6 +981,7 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
+	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
 };
 
 static void htb_work_func(struct work_struct *work)
@@ -994,7 +995,7 @@ static void htb_work_func(struct work_struct *work)
 static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct htb_sched *q = qdisc_priv(sch);
-	struct nlattr *tb[TCA_HTB_INIT + 1];
+	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_glob *gopt;
 	int err;
 	int i;
@@ -1002,20 +1003,16 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	if (!opt)
 		return -EINVAL;
 
-	err = nla_parse_nested(tb, TCA_HTB_INIT, opt, htb_policy);
+	err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_HTB_INIT] == NULL) {
-		pr_err("HTB: hey probably you have bad tc tool ?\n");
+	if (!tb[TCA_HTB_INIT])
 		return -EINVAL;
-	}
+
 	gopt = nla_data(tb[TCA_HTB_INIT]);
-	if (gopt->version != HTB_VER >> 16) {
-		pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
-		       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
+	if (gopt->version != HTB_VER >> 16)
 		return -EINVAL;
-	}
 
 	err = qdisc_class_hash_init(&q->clhash);
 	if (err < 0)
@@ -1027,10 +1024,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 	INIT_WORK(&q->work, htb_work_func);
 	skb_queue_head_init(&q->direct_queue);
 
-	q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
-	if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
-		q->direct_qlen = 2;
-
+	if (tb[TCA_HTB_DIRECT_QLEN])
+		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
+	else {
+		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
+		if (q->direct_qlen < 2)	/* some devices have zero tx_queue_len */
+			q->direct_qlen = 2;
+	}
 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
 		q->rate2quantum = 1;
 	q->defcls = gopt->defcls;
@@ -1056,7 +1056,8 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	nest = nla_nest_start(skb, TCA_OPTIONS);
 	if (nest == NULL)
 		goto nla_put_failure;
-	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt))
+	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
+	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
@@ -1311,7 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
-	struct nlattr *tb[__TCA_HTB_MAX];
+	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 
 	/* extract all subattrs from opt attr */
-- 
cgit 


From c031e234ee304b507b79f76a7677ea0a7a8890e8 Mon Sep 17 00:00:00 2001
From: Andy King <acking@vmware.com>
Date: Thu, 7 Mar 2013 05:26:13 +0000
Subject: VSOCK: Split vm_sockets.h into kernel/uapi

Split the vSockets header into kernel and UAPI parts.  The former gets the bits
that used to be in __KERNEL__ guards, while the latter gets everything that is
user-visible.  Tested by compiling vsock (+transport) and a simple user-mode
vSockets application.

Reported-by: David Howells <dhowells@redhat.com>
Acked-by: Dmitry Torokhov <dtor@vmware.com>
Signed-off-by: Andy King <acking@vmware.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/vm_sockets.h      | 23 +++++++++++++++++++++++
 include/uapi/linux/vm_sockets.h | 23 ++++++++---------------
 2 files changed, 31 insertions(+), 15 deletions(-)
 create mode 100644 include/linux/vm_sockets.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h
new file mode 100644
index 000000000000..0805eecba8f7
--- /dev/null
+++ b/include/linux/vm_sockets.h
@@ -0,0 +1,23 @@
+/*
+ * VMware vSockets Driver
+ *
+ * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _VM_SOCKETS_H
+#define _VM_SOCKETS_H
+
+#include <uapi/linux/vm_sockets.h>
+
+int vm_sockets_get_local_cid(void);
+
+#endif /* _VM_SOCKETS_H */
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index df91301847ec..b4ed5d895699 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -13,12 +13,10 @@
  * more details.
  */
 
-#ifndef _VM_SOCKETS_H_
-#define _VM_SOCKETS_H_
+#ifndef _UAPI_VM_SOCKETS_H
+#define _UAPI_VM_SOCKETS_H
 
-#if !defined(__KERNEL__)
-#include <sys/socket.h>
-#endif
+#include <linux/socket.h>
 
 /* Option name for STREAM socket buffer size.  Use as the option name in
  * setsockopt(3) or getsockopt(3) to set or get an unsigned long long that
@@ -137,14 +135,13 @@
 #define VM_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF))
 
 /* Address structure for vSockets.   The address family should be set to
- * whatever vmci_sock_get_af_value_fd() returns.  The structure members should
- * all align on their natural boundaries without resorting to compiler packing
- * directives.  The total size of this structure should be exactly the same as
- * that of struct sockaddr.
+ * AF_VSOCK.  The structure members should all align on their natural
+ * boundaries without resorting to compiler packing directives.  The total size
+ * of this structure should be exactly the same as that of struct sockaddr.
  */
 
 struct sockaddr_vm {
-	sa_family_t svm_family;
+	__kernel_sa_family_t svm_family;
 	unsigned short svm_reserved1;
 	unsigned int svm_port;
 	unsigned int svm_cid;
@@ -156,8 +153,4 @@ struct sockaddr_vm {
 
 #define IOCTL_VM_SOCKETS_GET_LOCAL_CID		_IO(7, 0xb9)
 
-#if defined(__KERNEL__)
-int vm_sockets_get_local_cid(void);
-#endif
-
-#endif
+#endif /* _UAPI_VM_SOCKETS_H */
-- 
cgit 


From 26fd76cab2e61cedc5c25f7151fb31b57ddc53c7 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Fri, 22 Feb 2013 10:53:25 +0100
Subject: NFC: llcp: Implement socket options

Some LLCP services (e.g. the validation ones) require some control over
the LLCP link parameters like the receive window (RW) or the MIU extension
(MIUX). This can only be done through socket options.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/socket.h   |   1 +
 include/uapi/linux/nfc.h |   4 ++
 net/nfc/llcp/llcp.h      |   3 ++
 net/nfc/llcp/sock.c      | 119 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 125 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 2b9f74b0ffea..428c37a1f95c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -298,6 +298,7 @@ struct ucred {
 #define SOL_IUCV	277
 #define SOL_CAIF	278
 #define SOL_ALG		279
+#define SOL_NFC		280
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 7969f46f1bb3..855630fe731d 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -220,4 +220,8 @@ struct sockaddr_nfc_llcp {
 #define NFC_LLCP_DIRECTION_RX		0x00
 #define NFC_LLCP_DIRECTION_TX		0x01
 
+/* socket option names */
+#define NFC_LLCP_RW   0
+#define NFC_LLCP_MIUX 1
+
 #endif /*__LINUX_NFC_H */
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
index 32cec81939e6..5f117adac2e5 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp/llcp.h
@@ -104,6 +104,9 @@ struct nfc_llcp_sock {
 	u8 dsap;
 	char *service_name;
 	size_t service_name_len;
+	u8 rw;
+	u16 miux;
+
 
 	/* Remote link parameters */
 	u8 remote_rw;
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index cc564992ba95..9357a756f7a9 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -223,6 +223,121 @@ error:
 	return ret;
 }
 
+static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	u32 opt;
+	int err = 0;
+
+	pr_debug("%p optname %d\n", sk, optname);
+
+	if (level != SOL_NFC)
+		return -ENOPROTOOPT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case NFC_LLCP_RW:
+		if (sk->sk_state == LLCP_CONNECTED ||
+		    sk->sk_state == LLCP_BOUND ||
+		    sk->sk_state == LLCP_LISTEN) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt > LLCP_MAX_RW) {
+			err = -EINVAL;
+			break;
+		}
+
+		llcp_sock->rw = (u8) opt;
+
+		break;
+
+	case NFC_LLCP_MIUX:
+		if (sk->sk_state == LLCP_CONNECTED ||
+		    sk->sk_state == LLCP_BOUND ||
+		    sk->sk_state == LLCP_LISTEN) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (get_user(opt, (u32 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		if (opt > LLCP_MAX_MIUX) {
+			err = -EINVAL;
+			break;
+		}
+
+		llcp_sock->miux = (u16) opt;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+
+	return err;
+}
+
+static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	int len, err = 0;
+
+	pr_debug("%p optname %d\n", sk, optname);
+
+	if (level != SOL_NFC)
+		return -ENOPROTOOPT;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(u32, len, sizeof(u32));
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case NFC_LLCP_RW:
+		if (put_user(llcp_sock->rw, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_MIUX:
+		if (put_user(llcp_sock->miux, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	return err;
+}
+
 void nfc_llcp_accept_unlink(struct sock *sk)
 {
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
@@ -735,8 +850,8 @@ static const struct proto_ops llcp_sock_ops = {
 	.ioctl          = sock_no_ioctl,
 	.listen         = llcp_sock_listen,
 	.shutdown       = sock_no_shutdown,
-	.setsockopt     = sock_no_setsockopt,
-	.getsockopt     = sock_no_getsockopt,
+	.setsockopt     = nfc_llcp_setsockopt,
+	.getsockopt     = nfc_llcp_getsockopt,
 	.sendmsg        = llcp_sock_sendmsg,
 	.recvmsg        = llcp_sock_recvmsg,
 	.mmap           = sock_no_mmap,
-- 
cgit 


From d9b8d8e19b073096d3609bbd60f82148d128b555 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@linux.intel.com>
Date: Fri, 15 Feb 2013 10:43:06 +0100
Subject: NFC: llcp: Service Name Lookup netlink interface

This adds a netlink interface for service name lookup support.
Multiple URIs can be passed nested into the NFC_ATTR_LLC_SDP attribute
using the NFC_CMD_LLC_SDREQ netlink command.
When the SNL reply is received, a NFC_EVENT_LLC_SDRES event is sent to
the user space. URI and SAP tuples are passed back, nested into
NFC_ATTR_LLC_SDP attribute.

Signed-off-by: Thierry Escande <thierry.escande@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/uapi/linux/nfc.h |  12 ++++
 net/nfc/llcp/commands.c  |  78 ++++++++++++++++++++++
 net/nfc/llcp/llcp.c      |  32 +++++++++
 net/nfc/llcp/llcp.h      |   9 +++
 net/nfc/netlink.c        | 169 +++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nfc.h            |  14 ++++
 6 files changed, 314 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 855630fe731d..7440bc81a04b 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -90,6 +90,8 @@ enum nfc_commands {
 	NFC_CMD_LLC_SET_PARAMS,
 	NFC_CMD_ENABLE_SE,
 	NFC_CMD_DISABLE_SE,
+	NFC_CMD_LLC_SDREQ,
+	NFC_EVENT_LLC_SDRES,
 /* private: internal use only */
 	__NFC_CMD_AFTER_LAST
 };
@@ -140,11 +142,21 @@ enum nfc_attrs {
 	NFC_ATTR_LLC_PARAM_RW,
 	NFC_ATTR_LLC_PARAM_MIUX,
 	NFC_ATTR_SE,
+	NFC_ATTR_LLC_SDP,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
 #define NFC_ATTR_MAX (__NFC_ATTR_AFTER_LAST - 1)
 
+enum nfc_sdp_attr {
+	NFC_SDP_ATTR_UNSPEC,
+	NFC_SDP_ATTR_URI,
+	NFC_SDP_ATTR_SAP,
+/* private: internal use only */
+	__NFC_SDP_ATTR_AFTER_LAST
+};
+#define NFC_SDP_ATTR_MAX (__NFC_SDP_ATTR_AFTER_LAST - 1)
+
 #define NFC_DEVICE_NAME_MAXSIZE 8
 #define NFC_NFCID1_MAXSIZE 10
 #define NFC_SENSB_RES_MAXSIZE 12
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c
index 59f7ffca783e..c943edb07b72 100644
--- a/net/nfc/llcp/commands.c
+++ b/net/nfc/llcp/commands.c
@@ -144,12 +144,59 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
 	return sdres;
 }
 
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+						  size_t uri_len)
+{
+	struct nfc_llcp_sdp_tlv *sdreq;
+
+	pr_debug("uri: %s, len: %zu\n", uri, uri_len);
+
+	sdreq = kzalloc(sizeof(struct nfc_llcp_sdp_tlv), GFP_KERNEL);
+	if (sdreq == NULL)
+		return NULL;
+
+	sdreq->tlv_len = uri_len + 3;
+
+	if (uri[uri_len - 1] == 0)
+		sdreq->tlv_len--;
+
+	sdreq->tlv = kzalloc(sdreq->tlv_len + 1, GFP_KERNEL);
+	if (sdreq->tlv == NULL) {
+		kfree(sdreq);
+		return NULL;
+	}
+
+	sdreq->tlv[0] = LLCP_TLV_SDREQ;
+	sdreq->tlv[1] = sdreq->tlv_len - 2;
+	sdreq->tlv[2] = tid;
+
+	sdreq->tid = tid;
+	sdreq->uri = sdreq->tlv + 3;
+	memcpy(sdreq->uri, uri, uri_len);
+
+	INIT_HLIST_NODE(&sdreq->node);
+
+	return sdreq;
+}
+
 void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
 {
 	kfree(sdp->tlv);
 	kfree(sdp);
 }
 
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
+{
+	struct nfc_llcp_sdp_tlv *sdp;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(sdp, n, head, node) {
+		hlist_del(&sdp->node);
+
+		nfc_llcp_free_sdp_tlv(sdp);
+	}
+}
+
 int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
 			  u8 *tlv_array, u16 tlv_array_len)
 {
@@ -511,6 +558,37 @@ int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
 	return 0;
 }
 
+int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
+			    struct hlist_head *tlv_list, size_t tlvs_len)
+{
+	struct nfc_llcp_sdp_tlv *sdreq;
+	struct hlist_node *n;
+	struct sk_buff *skb;
+
+	skb = nfc_llcp_allocate_snl(local, tlvs_len);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	mutex_lock(&local->sdreq_lock);
+
+	hlist_for_each_entry_safe(sdreq, n, tlv_list, node) {
+		pr_debug("tid %d for %s\n", sdreq->tid, sdreq->uri);
+
+		memcpy(skb_put(skb, sdreq->tlv_len), sdreq->tlv,
+		       sdreq->tlv_len);
+
+		hlist_del(&sdreq->node);
+
+		hlist_add_head(&sdreq->node, &local->pending_sdreqs);
+	}
+
+	mutex_unlock(&local->sdreq_lock);
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+}
+
 int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason)
 {
 	struct sk_buff *skb;
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
index 30b61c1aa984..99e911060422 100644
--- a/net/nfc/llcp/llcp.c
+++ b/net/nfc/llcp/llcp.c
@@ -156,6 +156,7 @@ static void local_release(struct kref *ref)
 	cancel_work_sync(&local->rx_work);
 	cancel_work_sync(&local->timeout_work);
 	kfree_skb(local->rx_pending);
+	nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
 	kfree(local);
 }
 
@@ -1147,6 +1148,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
 	struct nfc_llcp_sdp_tlv *sdp;
 	HLIST_HEAD(llc_sdres_list);
 	size_t sdres_tlvs_len;
+	HLIST_HEAD(nl_sdres_list);
 
 	dsap = nfc_llcp_dsap(skb);
 	ssap = nfc_llcp_ssap(skb);
@@ -1229,6 +1231,30 @@ add_snl:
 			hlist_add_head(&sdp->node, &llc_sdres_list);
 			break;
 
+		case LLCP_TLV_SDRES:
+			mutex_lock(&local->sdreq_lock);
+
+			pr_debug("LLCP_TLV_SDRES: searching tid %d\n", tlv[2]);
+
+			hlist_for_each_entry(sdp, &local->pending_sdreqs, node) {
+				if (sdp->tid != tlv[2])
+					continue;
+
+				sdp->sap = tlv[3];
+
+				pr_debug("Found: uri=%s, sap=%d\n",
+					 sdp->uri, sdp->sap);
+
+				hlist_del(&sdp->node);
+
+				hlist_add_head(&sdp->node, &nl_sdres_list);
+
+				break;
+			}
+
+			mutex_unlock(&local->sdreq_lock);
+			break;
+
 		default:
 			pr_err("Invalid SNL tlv value 0x%x\n", type);
 			break;
@@ -1239,6 +1265,9 @@ add_snl:
 	}
 
 exit:
+	if (!hlist_empty(&nl_sdres_list))
+		nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list);
+
 	if (!hlist_empty(&llc_sdres_list))
 		nfc_llcp_send_snl_sdres(local, &llc_sdres_list, sdres_tlvs_len);
 }
@@ -1426,6 +1455,9 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
 	local->remote_miu = LLCP_DEFAULT_MIU;
 	local->remote_lto = LLCP_DEFAULT_LTO;
 
+	mutex_init(&local->sdreq_lock);
+	INIT_HLIST_HEAD(&local->pending_sdreqs);
+
 	list_add(&local->list, &llcp_devices);
 
 	return 0;
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
index 465f5953e2d1..ca8c6d94ab85 100644
--- a/net/nfc/llcp/llcp.h
+++ b/net/nfc/llcp/llcp.h
@@ -97,6 +97,10 @@ struct nfc_llcp_local {
 	u8  remote_opt;
 	u16 remote_wks;
 
+	struct mutex sdreq_lock;
+	struct hlist_head pending_sdreqs;
+	u8 sdreq_next_tid;
+
 	/* sockets array */
 	struct llcp_sock_list sockets;
 	struct llcp_sock_list connecting_sockets;
@@ -230,7 +234,10 @@ int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
 void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
 u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
 struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+						  size_t uri_len);
 void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
 void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
 int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_symm(struct nfc_dev *dev);
@@ -238,6 +245,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_snl_sdres(struct nfc_llcp_local *local,
 			    struct hlist_head *tlv_list, size_t tlvs_len);
+int nfc_llcp_send_snl_sdreq(struct nfc_llcp_local *local,
+			    struct hlist_head *tlv_list, size_t tlvs_len);
 int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
 int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
 int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 63975c039b85..73fd51098f4d 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -56,6 +56,12 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 	[NFC_ATTR_LLC_PARAM_LTO] = { .type = NLA_U8 },
 	[NFC_ATTR_LLC_PARAM_RW] = { .type = NLA_U8 },
 	[NFC_ATTR_LLC_PARAM_MIUX] = { .type = NLA_U16 },
+	[NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy nfc_sdp_genl_policy[NFC_SDP_ATTR_MAX + 1] = {
+	[NFC_SDP_ATTR_URI] = { .type = NLA_STRING },
+	[NFC_SDP_ATTR_SAP] = { .type = NLA_U8 },
 };
 
 static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -351,6 +357,74 @@ free_msg:
 	return -EMSGSIZE;
 }
 
+int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
+{
+	struct sk_buff *msg;
+	struct nlattr *sdp_attr, *uri_attr;
+	struct nfc_llcp_sdp_tlv *sdres;
+	struct hlist_node *n;
+	void *hdr;
+	int rc = -EMSGSIZE;
+	int i;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+			  NFC_EVENT_LLC_SDRES);
+	if (!hdr)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx))
+		goto nla_put_failure;
+
+	sdp_attr = nla_nest_start(msg, NFC_ATTR_LLC_SDP);
+	if (sdp_attr == NULL) {
+		rc = -ENOMEM;
+		goto nla_put_failure;
+	}
+
+	i = 1;
+	hlist_for_each_entry_safe(sdres, n, sdres_list, node) {
+		pr_debug("uri: %s, sap: %d\n", sdres->uri, sdres->sap);
+
+		uri_attr = nla_nest_start(msg, i++);
+		if (uri_attr == NULL) {
+			rc = -ENOMEM;
+			goto nla_put_failure;
+		}
+
+		if (nla_put_u8(msg, NFC_SDP_ATTR_SAP, sdres->sap))
+			goto nla_put_failure;
+
+		if (nla_put_string(msg, NFC_SDP_ATTR_URI, sdres->uri))
+			goto nla_put_failure;
+
+		nla_nest_end(msg, uri_attr);
+
+		hlist_del(&sdres->node);
+
+		nfc_llcp_free_sdp_tlv(sdres);
+	}
+
+	nla_nest_end(msg, sdp_attr);
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+
+free_msg:
+	nlmsg_free(msg);
+
+	nfc_llcp_free_sdp_tlv_list(sdres_list);
+
+	return rc;
+}
+
 static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 				u32 portid, u32 seq,
 				struct netlink_callback *cb,
@@ -862,6 +936,96 @@ exit:
 	return rc;
 }
 
+static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	struct nlattr *attr, *sdp_attrs[NFC_SDP_ATTR_MAX+1];
+	u32 idx;
+	u8 tid;
+	char *uri;
+	int rc = 0, rem;
+	size_t uri_len, tlvs_len;
+	struct hlist_head sdreq_list;
+	struct nfc_llcp_sdp_tlv *sdreq;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_LLC_SDP])
+		return -EINVAL;
+
+	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+	dev = nfc_get_device(idx);
+	if (!dev) {
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	device_lock(&dev->dev);
+
+	if (dev->dep_link_up == false) {
+		rc = -ENOLINK;
+		goto exit;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (!local) {
+		nfc_put_device(dev);
+		rc = -ENODEV;
+		goto exit;
+	}
+
+	INIT_HLIST_HEAD(&sdreq_list);
+
+	tlvs_len = 0;
+
+	nla_for_each_nested(attr, info->attrs[NFC_ATTR_LLC_SDP], rem) {
+		rc = nla_parse_nested(sdp_attrs, NFC_SDP_ATTR_MAX, attr,
+				      nfc_sdp_genl_policy);
+
+		if (rc != 0) {
+			rc = -EINVAL;
+			goto exit;
+		}
+
+		if (!sdp_attrs[NFC_SDP_ATTR_URI])
+			continue;
+
+		uri_len = nla_len(sdp_attrs[NFC_SDP_ATTR_URI]);
+		if (uri_len == 0)
+			continue;
+
+		uri = nla_data(sdp_attrs[NFC_SDP_ATTR_URI]);
+		if (uri == NULL || *uri == 0)
+			continue;
+
+		tid = local->sdreq_next_tid++;
+
+		sdreq = nfc_llcp_build_sdreq_tlv(tid, uri, uri_len);
+		if (sdreq == NULL) {
+			rc = -ENOMEM;
+			goto exit;
+		}
+
+		tlvs_len += sdreq->tlv_len;
+
+		hlist_add_head(&sdreq->node, &sdreq_list);
+	}
+
+	if (hlist_empty(&sdreq_list)) {
+		rc = -EINVAL;
+		goto exit;
+	}
+
+	rc = nfc_llcp_send_snl_sdreq(local, &sdreq_list, tlvs_len);
+exit:
+	device_unlock(&dev->dev);
+
+	nfc_put_device(dev);
+
+	return rc;
+}
+
 static struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
@@ -916,6 +1080,11 @@ static struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_llc_set_params,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_LLC_SDREQ,
+		.doit = nfc_genl_llc_sdreq,
+		.policy = nfc_genl_policy,
+	},
 };
 
 
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 87d914d2876a..94bfe19ba678 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,6 +46,8 @@ struct nfc_rawsock {
 #define to_rawsock_sk(_tx_work) \
 	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
 
+struct nfc_llcp_sdp_tlv;
+
 #ifdef CONFIG_NFC_LLCP
 
 void nfc_llcp_mac_is_down(struct nfc_dev *dev);
@@ -59,6 +61,8 @@ int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
 int __init nfc_llcp_init(void);
 void nfc_llcp_exit(void);
+void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
+void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head);
 
 #else
 
@@ -112,6 +116,14 @@ static inline void nfc_llcp_exit(void)
 {
 }
 
+static inline void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp)
+{
+}
+
+static inline void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head)
+{
+}
+
 #endif
 
 int __init rawsock_init(void);
@@ -144,6 +156,8 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
 int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol);
 int nfc_genl_tm_deactivated(struct nfc_dev *dev);
 
+int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
+
 struct nfc_dev *nfc_get_device(unsigned int idx);
 
 static inline void nfc_put_device(struct nfc_dev *dev)
-- 
cgit 


From dad9f8972e04cd081a028d8fb1249d746d97fc03 Mon Sep 17 00:00:00 2001
From: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
Date: Mon, 11 Mar 2013 09:31:22 -0600
Subject: VFIO-AER: Vfio-pci driver changes for supporting AER

- New VFIO_SET_IRQ ioctl option to pass the eventfd that is signaled when
  an error occurs in the vfio_pci_device

- Register pci_error_handler for the vfio_pci driver

- When the device encounters an error, the error handler registered by
  the vfio_pci driver gets invoked by the AER infrastructure

- In the error handler, signal the eventfd registered for the device.

- This results in the qemu eventfd handler getting invoked and
  appropriate action taken for the guest.

Signed-off-by: Vijay Mohan Pandarathil <vijaymohan.pandarathil@hp.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci.c         | 44 ++++++++++++++++++++++++-
 drivers/vfio/pci/vfio_pci_intrs.c   | 64 +++++++++++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h           |  1 +
 4 files changed, 109 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8189cb6a86af..acfcb1ae77bc 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -201,7 +201,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type)
 
 			return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
 		}
-	}
+	} else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+		if (pci_is_pcie(vdev->pdev))
+			return 1;
 
 	return 0;
 }
@@ -317,6 +319,17 @@ static long vfio_pci_ioctl(void *device_data,
 		if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
 			return -EINVAL;
 
+		switch (info.index) {
+		case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+			break;
+		case VFIO_PCI_ERR_IRQ_INDEX:
+			if (pci_is_pcie(vdev->pdev))
+				break;
+		/* pass thru to return error */
+		default:
+			return -EINVAL;
+		}
+
 		info.flags = VFIO_IRQ_INFO_EVENTFD;
 
 		info.count = vfio_pci_get_irq_count(vdev, info.index);
@@ -551,11 +564,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
 	kfree(vdev);
 }
 
+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+						  pci_channel_state_t state)
+{
+	struct vfio_pci_device *vdev;
+	struct vfio_device *device;
+
+	device = vfio_device_get_from_dev(&pdev->dev);
+	if (device == NULL)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	vdev = vfio_device_data(device);
+	if (vdev == NULL) {
+		vfio_device_put(device);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (vdev->err_trigger)
+		eventfd_signal(vdev->err_trigger, 1);
+
+	vfio_device_put(device);
+
+	return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static struct pci_error_handlers vfio_err_handlers = {
+	.error_detected = vfio_pci_aer_err_detected,
+};
+
 static struct pci_driver vfio_pci_driver = {
 	.name		= "vfio-pci",
 	.id_table	= NULL, /* only dynamic ids */
 	.probe		= vfio_pci_probe,
 	.remove		= vfio_pci_remove,
+	.err_handler	= &vfio_err_handlers,
 };
 
 static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3639371fa697..b84bf2210a91 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -745,6 +745,63 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device *vdev,
 	return 0;
 }
 
+static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+				    unsigned index, unsigned start,
+				    unsigned count, uint32_t flags, void *data)
+{
+	int32_t fd = *(int32_t *)data;
+	struct pci_dev *pdev = vdev->pdev;
+
+	if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
+	    !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
+		return -EINVAL;
+
+	/*
+	 * device_lock synchronizes setting and checking of
+	 * err_trigger. The vfio_pci_aer_err_detected() is also
+	 * called with device_lock held.
+	 */
+
+	/* DATA_NONE/DATA_BOOL enables loopback testing */
+
+	if (flags & VFIO_IRQ_SET_DATA_NONE) {
+		device_lock(&pdev->dev);
+		if (vdev->err_trigger)
+			eventfd_signal(vdev->err_trigger, 1);
+		device_unlock(&pdev->dev);
+		return 0;
+	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+		uint8_t trigger = *(uint8_t *)data;
+		device_lock(&pdev->dev);
+		if (trigger && vdev->err_trigger)
+			eventfd_signal(vdev->err_trigger, 1);
+		device_unlock(&pdev->dev);
+		return 0;
+	}
+
+	/* Handle SET_DATA_EVENTFD */
+
+	if (fd == -1) {
+		device_lock(&pdev->dev);
+		if (vdev->err_trigger)
+			eventfd_ctx_put(vdev->err_trigger);
+		vdev->err_trigger = NULL;
+		device_unlock(&pdev->dev);
+		return 0;
+	} else if (fd >= 0) {
+		struct eventfd_ctx *efdctx;
+		efdctx = eventfd_ctx_fdget(fd);
+		if (IS_ERR(efdctx))
+			return PTR_ERR(efdctx);
+		device_lock(&pdev->dev);
+		if (vdev->err_trigger)
+			eventfd_ctx_put(vdev->err_trigger);
+		vdev->err_trigger = efdctx;
+		device_unlock(&pdev->dev);
+		return 0;
+	} else
+		return -EINVAL;
+}
 int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 			    unsigned index, unsigned start, unsigned count,
 			    void *data)
@@ -779,6 +836,13 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
 			break;
 		}
 		break;
+	case VFIO_PCI_ERR_IRQ_INDEX:
+		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+		case VFIO_IRQ_SET_ACTION_TRIGGER:
+			if (pci_is_pcie(vdev->pdev))
+				func = vfio_pci_set_err_trigger;
+			break;
+		}
 	}
 
 	if (!func)
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index d7e55d03f49e..9c6d5d0f3b02 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -56,6 +56,7 @@ struct vfio_pci_device {
 	bool			has_vga;
 	struct pci_saved_state	*pci_saved_state;
 	atomic_t		refcnt;
+	struct eventfd_ctx	*err_trigger;
 };
 
 #define is_intx(vdev) (vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX)
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 4f41f309911e..284ff2436829 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -319,6 +319,7 @@ enum {
 	VFIO_PCI_INTX_IRQ_INDEX,
 	VFIO_PCI_MSI_IRQ_INDEX,
 	VFIO_PCI_MSIX_IRQ_INDEX,
+	VFIO_PCI_ERR_IRQ_INDEX,
 	VFIO_PCI_NUM_IRQS
 };
 
-- 
cgit 


From 6ba8a3b19e764b6a65e4030ab0999be50c291e6c Mon Sep 17 00:00:00 2001
From: Nandita Dukkipati <nanditad@google.com>
Date: Mon, 11 Mar 2013 10:00:43 +0000
Subject: tcp: Tail loss probe (TLP)

This patch series implement the Tail loss probe (TLP) algorithm described
in http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01. The
first patch implements the basic algorithm.

TLP's goal is to reduce tail latency of short transactions. It achieves
this by converting retransmission timeouts (RTOs) occuring due
to tail losses (losses at end of transactions) into fast recovery.
TLP transmits one packet in two round-trips when a connection is in
Open state and isn't receiving any ACKs. The transmitted packet, aka
loss probe, can be either new or a retransmission. When there is tail
loss, the ACK from a loss probe triggers FACK/early-retransmit based
fast recovery, thus avoiding a costly RTO. In the absence of loss,
there is no change in the connection state.

PTO stands for probe timeout. It is a timer event indicating
that an ACK is overdue and triggers a loss probe packet. The PTO value
is set to max(2*SRTT, 10ms) and is adjusted to account for delayed
ACK timer when there is only one oustanding packet.

TLP Algorithm

On transmission of new data in Open state:
  -> packets_out > 1: schedule PTO in max(2*SRTT, 10ms).
  -> packets_out == 1: schedule PTO in max(2*RTT, 1.5*RTT + 200ms)
  -> PTO = min(PTO, RTO)

Conditions for scheduling PTO:
  -> Connection is in Open state.
  -> Connection is either cwnd limited or no new data to send.
  -> Number of probes per tail loss episode is limited to one.
  -> Connection is SACK enabled.

When PTO fires:
  new_segment_exists:
    -> transmit new segment.
    -> packets_out++. cwnd remains same.

  no_new_packet:
    -> retransmit the last segment.
       Its ACK triggers FACK or early retransmit based recovery.

ACK path:
  -> rearm RTO at start of ACK processing.
  -> reschedule PTO if need be.

In addition, the patch includes a small variation to the Early Retransmit
(ER) algorithm, such that ER and TLP together can in principle recover any
N-degree of tail loss through fast recovery. TLP is controlled by the same
sysctl as ER, tcp_early_retrans sysctl.
tcp_early_retrans==0; disables TLP and ER.
		 ==1; enables RFC5827 ER.
		 ==2; delayed ER.
		 ==3; TLP and delayed ER. [DEFAULT]
		 ==4; TLP only.

The TLP patch series have been extensively tested on Google Web servers.
It is most effective for short Web trasactions, where it reduced RTOs by 15%
and improved HTTP response time (average by 6%, 99th percentile by 10%).
The transmitted probes account for <0.5% of the overall transmissions.

Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |   8 ++-
 include/linux/tcp.h                    |   1 -
 include/net/inet_connection_sock.h     |   5 +-
 include/net/tcp.h                      |   6 +-
 include/uapi/linux/snmp.h              |   1 +
 net/ipv4/inet_diag.c                   |   4 +-
 net/ipv4/proc.c                        |   1 +
 net/ipv4/sysctl_net_ipv4.c             |   4 +-
 net/ipv4/tcp_input.c                   |  24 ++++---
 net/ipv4/tcp_ipv4.c                    |   4 +-
 net/ipv4/tcp_output.c                  | 128 +++++++++++++++++++++++++++++++--
 net/ipv4/tcp_timer.c                   |  13 ++--
 12 files changed, 171 insertions(+), 28 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index dc2dc87d2557..1cae6c383e1b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -190,7 +190,9 @@ tcp_early_retrans - INTEGER
 	Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold
 	for triggering fast retransmit when the amount of outstanding data is
 	small and when no previously unsent data can be transmitted (such
-	that limited transmit could be used).
+	that limited transmit could be used). Also controls the use of
+	Tail loss probe (TLP) that converts RTOs occuring due to tail
+	losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
 	Possible values:
 		0 disables ER
 		1 enables ER
@@ -198,7 +200,9 @@ tcp_early_retrans - INTEGER
 		  by a fourth of RTT. This mitigates connection falsely
 		  recovers when network has a small degree of reordering
 		  (less than 3 packets).
-	Default: 2
+		3 enables delayed ER and TLP.
+		4 enables TLP only.
+	Default: 3
 
 tcp_ecn - INTEGER
 	Control use of Explicit Congestion Notification (ECN) by TCP.
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 515c3746b675..01860d74555c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -201,7 +201,6 @@ struct tcp_sock {
 		unused      : 1;
 	u8	repair_queue;
 	u8	do_early_retrans:1,/* Enable RFC5827 early-retransmit  */
-		early_retrans_delayed:1, /* Delayed ER timer installed */
 		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 183292722f6e..de2c78529afa 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -133,6 +133,8 @@ struct inet_connection_sock {
 #define ICSK_TIME_RETRANS	1	/* Retransmit timer */
 #define ICSK_TIME_DACK		2	/* Delayed ack timer */
 #define ICSK_TIME_PROBE0	3	/* Zero window probe timer */
+#define ICSK_TIME_EARLY_RETRANS 4	/* Early retransmit timer */
+#define ICSK_TIME_LOSS_PROBE	5	/* Tail loss probe timer */
 
 static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
 {
@@ -222,7 +224,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
 		when = max_when;
 	}
 
-	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
+	if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
+	    what == ICSK_TIME_EARLY_RETRANS || what ==  ICSK_TIME_LOSS_PROBE) {
 		icsk->icsk_pending = what;
 		icsk->icsk_timeout = jiffies + when;
 		sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a2baa5e4ba31..ab9f947b118b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -543,6 +543,8 @@ extern bool tcp_syn_flood_action(struct sock *sk,
 extern void tcp_push_one(struct sock *, unsigned int mss_now);
 extern void tcp_send_ack(struct sock *sk);
 extern void tcp_send_delayed_ack(struct sock *sk);
+extern void tcp_send_loss_probe(struct sock *sk);
+extern bool tcp_schedule_loss_probe(struct sock *sk);
 
 /* tcp_input.c */
 extern void tcp_cwnd_application_limited(struct sock *sk);
@@ -873,8 +875,8 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
 static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
 {
 	tp->do_early_retrans = sysctl_tcp_early_retrans &&
-		!sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3;
-	tp->early_retrans_delayed = 0;
+		sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
+		sysctl_tcp_reordering == 3;
 }
 
 static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index b49eab89c9fd..290bed6b085f 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -202,6 +202,7 @@ enum
 	LINUX_MIB_TCPFORWARDRETRANS,		/* TCPForwardRetrans */
 	LINUX_MIB_TCPSLOWSTARTRETRANS,		/* TCPSlowStartRetrans */
 	LINUX_MIB_TCPTIMEOUTS,			/* TCPTimeouts */
+	LINUX_MIB_TCPLOSSPROBES,		/* TCPLossProbes */
 	LINUX_MIB_TCPRENORECOVERYFAIL,		/* TCPRenoRecoveryFail */
 	LINUX_MIB_TCPSACKRECOVERYFAIL,		/* TCPSackRecoveryFail */
 	LINUX_MIB_TCPSCHEDULERFAILED,		/* TCPSchedulerFailed */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7afa2c3c788f..8620408af574 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -158,7 +158,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 
 #define EXPIRES_IN_MS(tmo)  DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
 
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		r->idiag_timer = 1;
 		r->idiag_retrans = icsk->icsk_retransmits;
 		r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 32030a24e776..4c35911d935f 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -224,6 +224,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS),
 	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
 	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
+	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 960fd29d9b8e..cca4550f4082 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -28,7 +28,7 @@
 
 static int zero;
 static int one = 1;
-static int two = 2;
+static int four = 4;
 static int tcp_retr1_max = 255;
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -760,7 +760,7 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= &zero,
-		.extra2		= &two,
+		.extra2		= &four,
 	},
 	{
 		.procname	= "udp_mem",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0d9bdacce99f..b794f89ac1f2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -98,7 +98,7 @@ int sysctl_tcp_frto_response __read_mostly;
 int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
-int sysctl_tcp_early_retrans __read_mostly = 2;
+int sysctl_tcp_early_retrans __read_mostly = 3;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -2150,15 +2150,16 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
 	 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
 	 * available, or RTO is scheduled to fire first.
 	 */
-	if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
+	if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
+	    (flag & FLAG_ECE) || !tp->srtt)
 		return false;
 
 	delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
 	if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
 		return false;
 
-	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
-	tp->early_retrans_delayed = 1;
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
+				  TCP_RTO_MAX);
 	return true;
 }
 
@@ -2321,7 +2322,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
 	 * interval if appropriate.
 	 */
 	if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
-	    (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
+	    (tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
 	    !tcp_may_send_now(sk))
 		return !tcp_pause_early_retransmit(sk, flag);
 
@@ -3081,6 +3082,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
  */
 void tcp_rearm_rto(struct sock *sk)
 {
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* If the retrans timer is currently being used by Fast Open
@@ -3094,12 +3096,13 @@ void tcp_rearm_rto(struct sock *sk)
 	} else {
 		u32 rto = inet_csk(sk)->icsk_rto;
 		/* Offset the time elapsed after installing regular RTO */
-		if (tp->early_retrans_delayed) {
+		if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 			struct sk_buff *skb = tcp_write_queue_head(sk);
 			const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
 			s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
 			/* delta may not be positive if the socket is locked
-			 * when the delayed ER timer fires and is rescheduled.
+			 * when the retrans timer fires and is rescheduled.
 			 */
 			if (delta > 0)
 				rto = delta;
@@ -3107,7 +3110,6 @@ void tcp_rearm_rto(struct sock *sk)
 		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
 					  TCP_RTO_MAX);
 	}
-	tp->early_retrans_delayed = 0;
 }
 
 /* This function is called when the delayed ER timer fires. TCP enters
@@ -3601,7 +3603,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (after(ack, tp->snd_nxt))
 		goto invalid_ack;
 
-	if (tp->early_retrans_delayed)
+	if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 		tcp_rearm_rto(sk);
 
 	if (after(ack, prior_snd_una))
@@ -3678,6 +3681,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		if (dst)
 			dst_confirm(dst);
 	}
+
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
+		tcp_schedule_loss_probe(sk);
 	return 1;
 
 no_queue:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8cdee120a50c..b7ab868c8284 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2703,7 +2703,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 	__u16 srcp = ntohs(inet->inet_sport);
 	int rx_queue;
 
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active	= 1;
 		timer_expires	= icsk->icsk_timeout;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e2b4461074da..beb63dbc85f5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -74,6 +74,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 /* Account for new data that has been sent to the network. */
 static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	unsigned int prior_packets = tp->packets_out;
 
@@ -85,7 +86,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 		tp->frto_counter = 3;
 
 	tp->packets_out += tcp_skb_pcount(skb);
-	if (!prior_packets || tp->early_retrans_delayed)
+	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 		tcp_rearm_rto(sk);
 }
 
@@ -1959,6 +1961,9 @@ static int tcp_mtu_probe(struct sock *sk)
  * snd_up-64k-mss .. snd_up cannot be large. However, taking into
  * account rare use of URG, this is not a big flaw.
  *
+ * Send at most one packet when push_one > 0. Temporarily ignore
+ * cwnd limit to force at most one packet out when push_one == 2.
+
  * Returns true, if no segments are in flight and we have queued segments,
  * but cannot send anything now because of SWS or another problem.
  */
@@ -1994,8 +1999,13 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			goto repair; /* Skip network transmission */
 
 		cwnd_quota = tcp_cwnd_test(tp, skb);
-		if (!cwnd_quota)
-			break;
+		if (!cwnd_quota) {
+			if (push_one == 2)
+				/* Force out a loss probe pkt. */
+				cwnd_quota = 1;
+			else
+				break;
+		}
 
 		if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
 			break;
@@ -2049,10 +2059,120 @@ repair:
 	if (likely(sent_pkts)) {
 		if (tcp_in_cwnd_reduction(sk))
 			tp->prr_out += sent_pkts;
+
+		/* Send one loss probe per tail loss episode. */
+		if (push_one != 2)
+			tcp_schedule_loss_probe(sk);
 		tcp_cwnd_validate(sk);
 		return false;
 	}
-	return !tp->packets_out && tcp_send_head(sk);
+	return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
+}
+
+bool tcp_schedule_loss_probe(struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 timeout, tlp_time_stamp, rto_time_stamp;
+	u32 rtt = tp->srtt >> 3;
+
+	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
+		return false;
+	/* No consecutive loss probes. */
+	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
+		tcp_rearm_rto(sk);
+		return false;
+	}
+	/* Don't do any loss probe on a Fast Open connection before 3WHS
+	 * finishes.
+	 */
+	if (sk->sk_state == TCP_SYN_RECV)
+		return false;
+
+	/* TLP is only scheduled when next timer event is RTO. */
+	if (icsk->icsk_pending != ICSK_TIME_RETRANS)
+		return false;
+
+	/* Schedule a loss probe in 2*RTT for SACK capable connections
+	 * in Open state, that are either limited by cwnd or application.
+	 */
+	if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
+	    !tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+		return false;
+
+	if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
+	     tcp_send_head(sk))
+		return false;
+
+	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+	 * for delayed ack when there's one outstanding packet.
+	 */
+	timeout = rtt << 1;
+	if (tp->packets_out == 1)
+		timeout = max_t(u32, timeout,
+				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
+	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+
+	/* If RTO is shorter, just schedule TLP in its place. */
+	tlp_time_stamp = tcp_time_stamp + timeout;
+	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
+	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
+		s32 delta = rto_time_stamp - tcp_time_stamp;
+		if (delta > 0)
+			timeout = delta;
+	}
+
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
+				  TCP_RTO_MAX);
+	return true;
+}
+
+/* When probe timeout (PTO) fires, send a new segment if one exists, else
+ * retransmit the last segment.
+ */
+void tcp_send_loss_probe(struct sock *sk)
+{
+	struct sk_buff *skb;
+	int pcount;
+	int mss = tcp_current_mss(sk);
+	int err = -1;
+
+	if (tcp_send_head(sk) != NULL) {
+		err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+		goto rearm_timer;
+	}
+
+	/* Retransmit last segment. */
+	skb = tcp_write_queue_tail(sk);
+	if (WARN_ON(!skb))
+		goto rearm_timer;
+
+	pcount = tcp_skb_pcount(skb);
+	if (WARN_ON(!pcount))
+		goto rearm_timer;
+
+	if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
+		if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss)))
+			goto rearm_timer;
+		skb = tcp_write_queue_tail(sk);
+	}
+
+	if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
+		goto rearm_timer;
+
+	/* Probe with zero data doesn't trigger fast recovery. */
+	if (skb->len > 0)
+		err = __tcp_retransmit_skb(sk, skb);
+
+rearm_timer:
+	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+				  inet_csk(sk)->icsk_rto,
+				  TCP_RTO_MAX);
+
+	if (likely(!err))
+		NET_INC_STATS_BH(sock_net(sk),
+				 LINUX_MIB_TCPLOSSPROBES);
+	return;
 }
 
 /* Push out any pending frames which were held back due to
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b78aac30c498..ecd61d54147f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -342,10 +342,6 @@ void tcp_retransmit_timer(struct sock *sk)
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
-	if (tp->early_retrans_delayed) {
-		tcp_resume_early_retransmit(sk);
-		return;
-	}
 	if (tp->fastopen_rsk) {
 		WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
 			     sk->sk_state != TCP_FIN_WAIT1);
@@ -495,13 +491,20 @@ void tcp_write_timer_handler(struct sock *sk)
 	}
 
 	event = icsk->icsk_pending;
-	icsk->icsk_pending = 0;
 
 	switch (event) {
+	case ICSK_TIME_EARLY_RETRANS:
+		tcp_resume_early_retransmit(sk);
+		break;
+	case ICSK_TIME_LOSS_PROBE:
+		tcp_send_loss_probe(sk);
+		break;
 	case ICSK_TIME_RETRANS:
+		icsk->icsk_pending = 0;
 		tcp_retransmit_timer(sk);
 		break;
 	case ICSK_TIME_PROBE0:
+		icsk->icsk_pending = 0;
 		tcp_probe_timer(sk);
 		break;
 	}
-- 
cgit 


From 9b717a8d245075ffb8e95a2dfb4ee97ce4747457 Mon Sep 17 00:00:00 2001
From: Nandita Dukkipati <nanditad@google.com>
Date: Mon, 11 Mar 2013 10:00:44 +0000
Subject: tcp: TLP loss detection.

This is the second of the TLP patch series; it augments the basic TLP
algorithm with a loss detection scheme.

This patch implements a mechanism for loss detection when a Tail
loss probe retransmission plugs a hole thereby masking packet loss
from the sender. The loss detection algorithm relies on counting
TLP dupacks as outlined in Sec. 3 of:
http://tools.ietf.org/html/draft-dukkipati-tcpm-tcp-loss-probe-01

The basic idea is: Sender keeps track of TLP "episode" upon
retransmission of a TLP packet. An episode ends when the sender receives
an ACK above the SND.NXT (tracked by tlp_high_seq) at the time of the
episode. We want to make sure that before the episode ends the sender
receives a "TLP dupack", indicating that the TLP retransmission was
unnecessary, so there was no loss/hole that needed plugging. If the
sender gets no TLP dupack before the end of the episode, then it reduces
ssthresh and the congestion window, because the TLP packet arriving at
the receiver probably plugged a hole.

Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h       |  1 +
 include/uapi/linux/snmp.h |  1 +
 net/ipv4/proc.c           |  1 +
 net/ipv4/tcp_input.c      | 39 +++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_minisocks.c  |  1 +
 net/ipv4/tcp_output.c     |  9 +++++++++
 net/ipv4/tcp_timer.c      |  2 ++
 7 files changed, 54 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 01860d74555c..763c108ee03d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -204,6 +204,7 @@ struct tcp_sock {
 		syn_data:1,	/* SYN includes data */
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_data_acked:1;/* data in SYN is acked by SYN-ACK */
+	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 /* RTT measurement */
 	u32	srtt;		/* smoothed round trip time << 3	*/
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 290bed6b085f..e00013a1debc 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -203,6 +203,7 @@ enum
 	LINUX_MIB_TCPSLOWSTARTRETRANS,		/* TCPSlowStartRetrans */
 	LINUX_MIB_TCPTIMEOUTS,			/* TCPTimeouts */
 	LINUX_MIB_TCPLOSSPROBES,		/* TCPLossProbes */
+	LINUX_MIB_TCPLOSSPROBERECOVERY,		/* TCPLossProbeRecovery */
 	LINUX_MIB_TCPRENORECOVERYFAIL,		/* TCPRenoRecoveryFail */
 	LINUX_MIB_TCPSACKRECOVERYFAIL,		/* TCPSackRecoveryFail */
 	LINUX_MIB_TCPSCHEDULERFAILED,		/* TCPSchedulerFailed */
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4c35911d935f..b6f2ea174898 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -225,6 +225,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS),
 	SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS),
 	SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES),
+	SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY),
 	SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL),
 	SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b794f89ac1f2..836d74dd0187 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2682,6 +2682,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	tp->high_seq = tp->snd_nxt;
+	tp->tlp_high_seq = 0;
 	tp->snd_cwnd_cnt = 0;
 	tp->prior_cwnd = tp->snd_cwnd;
 	tp->prr_delivered = 0;
@@ -3569,6 +3570,38 @@ static void tcp_send_challenge_ack(struct sock *sk)
 	}
 }
 
+/* This routine deals with acks during a TLP episode.
+ * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
+ */
+static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	bool is_tlp_dupack = (ack == tp->tlp_high_seq) &&
+			     !(flag & (FLAG_SND_UNA_ADVANCED |
+				       FLAG_NOT_DUP | FLAG_DATA_SACKED));
+
+	/* Mark the end of TLP episode on receiving TLP dupack or when
+	 * ack is after tlp_high_seq.
+	 */
+	if (is_tlp_dupack) {
+		tp->tlp_high_seq = 0;
+		return;
+	}
+
+	if (after(ack, tp->tlp_high_seq)) {
+		tp->tlp_high_seq = 0;
+		/* Don't reduce cwnd if DSACK arrives for TLP retrans. */
+		if (!(flag & FLAG_DSACKING_ACK)) {
+			tcp_init_cwnd_reduction(sk, true);
+			tcp_set_ca_state(sk, TCP_CA_CWR);
+			tcp_end_cwnd_reduction(sk);
+			tcp_set_ca_state(sk, TCP_CA_Open);
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPLOSSPROBERECOVERY);
+		}
+	}
+}
+
 /* This routine deals with incoming acks, but not outgoing ones. */
 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 {
@@ -3676,6 +3709,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 			tcp_cong_avoid(sk, ack, prior_in_flight);
 	}
 
+	if (tp->tlp_high_seq)
+		tcp_process_tlp_ack(sk, ack, flag);
+
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
 		struct dst_entry *dst = __sk_dst_get(sk);
 		if (dst)
@@ -3697,6 +3733,9 @@ no_queue:
 	 */
 	if (tcp_send_head(sk))
 		tcp_ack_probe(sk);
+
+	if (tp->tlp_high_seq)
+		tcp_process_tlp_ack(sk, ack, flag);
 	return 1;
 
 invalid_ack:
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b83a49cc3816..4bdb09fca401 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -440,6 +440,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->fackets_out = 0;
 		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 		tcp_enable_early_retrans(newtp);
+		newtp->tlp_high_seq = 0;
 
 		/* So many TCP implementations out there (incorrectly) count the
 		 * initial SYN frame in their delayed-ACK and congestion control
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index beb63dbc85f5..8e7742f0b5d2 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2132,6 +2132,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
  */
 void tcp_send_loss_probe(struct sock *sk)
 {
+	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int pcount;
 	int mss = tcp_current_mss(sk);
@@ -2142,6 +2143,10 @@ void tcp_send_loss_probe(struct sock *sk)
 		goto rearm_timer;
 	}
 
+	/* At most one outstanding TLP retransmission. */
+	if (tp->tlp_high_seq)
+		goto rearm_timer;
+
 	/* Retransmit last segment. */
 	skb = tcp_write_queue_tail(sk);
 	if (WARN_ON(!skb))
@@ -2164,6 +2169,10 @@ void tcp_send_loss_probe(struct sock *sk)
 	if (skb->len > 0)
 		err = __tcp_retransmit_skb(sk, skb);
 
+	/* Record snd_nxt for loss detection. */
+	if (likely(!err))
+		tp->tlp_high_seq = tp->snd_nxt;
+
 rearm_timer:
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 				  inet_csk(sk)->icsk_rto,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ecd61d54147f..eeccf795e917 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -356,6 +356,8 @@ void tcp_retransmit_timer(struct sock *sk)
 
 	WARN_ON(tcp_write_queue_empty(sk));
 
+	tp->tlp_high_seq = 0;
+
 	if (!tp->snd_wnd && !sock_flag(sk, SOCK_DEAD) &&
 	    !((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
 		/* Receiver dastardly shrinks window. Our retransmits
-- 
cgit 


From 5a1bbf21325bd4f2641f6141fb8c47f6095578dd Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 2 Feb 2013 11:53:47 -0800
Subject: Input: add new keycodes for passenger control units

Entertainment systems used in aircraft need additional keycodes for their
Passenger Control Units, so let's add them.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/uapi/linux/input.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 558828590a69..6e4e3c6b3961 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -702,6 +702,11 @@ struct input_keymap_entry {
 #define KEY_CAMERA_LEFT		0x219
 #define KEY_CAMERA_RIGHT	0x21a
 
+#define KEY_ATTENDANT_ON	0x21b
+#define KEY_ATTENDANT_OFF	0x21c
+#define KEY_ATTENDANT_TOGGLE	0x21d	/* Attendant call on or off */
+#define KEY_LIGHTS_TOGGLE	0x21e	/* Reading light on or off */
+
 #define BTN_TRIGGER_HAPPY		0x2c0
 #define BTN_TRIGGER_HAPPY1		0x2c0
 #define BTN_TRIGGER_HAPPY2		0x2c1
-- 
cgit 


From 96dd86fa588169b745a71aedf2070e80f4943623 Mon Sep 17 00:00:00 2001
From: "K. Y. Srinivasan" <kys@microsoft.com>
Date: Fri, 15 Mar 2013 12:30:06 -0700
Subject: Drivers: hv: Add a new driver to support host initiated backup

This driver supports host initiated backup of the guest. On Windows guests,
the host can generate application consistent backups using the Windows VSS
framework. On Linux, we ensure that the backup will be file system consistent.
This driver allows the host to initiate a  "Freeze" operation on all the mounted
file systems in the guest. Once the mounted file systems in the guest are frozen,
the host snapshots the guest's file systems. Once this is done, the guest's file
systems are "thawed".

This driver has a user-level component (daemon) that invokes the appropriate
operation on all the mounted file systems in response to the requests from
the host. The duration for which the guest is frozen is very short - a few seconds.
During this interval, the diff disk is comitted.

In this version of the patch I have addressed the feedback from Olaf Herring.
Also, some of the connector related issues have been fixed.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/Makefile            |   2 +-
 drivers/hv/hv_snapshot.c       | 287 +++++++++++++++++++++++++++++++++++++++++
 drivers/hv/hv_util.c           |  10 ++
 include/linux/hyperv.h         |  69 ++++++++++
 include/uapi/linux/connector.h |   5 +-
 tools/hv/hv_vss_daemon.c       | 220 +++++++++++++++++++++++++++++++
 6 files changed, 591 insertions(+), 2 deletions(-)
 create mode 100644 drivers/hv/hv_snapshot.c
 create mode 100644 tools/hv/hv_vss_daemon.c

(limited to 'include/uapi/linux')

diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index e6abfa02d8b7..0a74b5661186 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -5,4 +5,4 @@ obj-$(CONFIG_HYPERV_BALLOON)	+= hv_balloon.o
 hv_vmbus-y := vmbus_drv.o \
 		 hv.o connection.o channel.o \
 		 channel_mgmt.o ring_buffer.o
-hv_utils-y := hv_util.o hv_kvp.o
+hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o
diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
new file mode 100644
index 000000000000..8ad5653ce447
--- /dev/null
+++ b/drivers/hv/hv_snapshot.c
@@ -0,0 +1,287 @@
+/*
+ * An implementation of host initiated guest snapshot.
+ *
+ *
+ * Copyright (C) 2013, Microsoft, Inc.
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/nls.h>
+#include <linux/connector.h>
+#include <linux/workqueue.h>
+#include <linux/hyperv.h>
+
+
+
+/*
+ * Global state maintained for transaction that is being processed.
+ * Note that only one transaction can be active at any point in time.
+ *
+ * This state is set when we receive a request from the host; we
+ * cleanup this state when the transaction is completed - when we respond
+ * to the host with the key value.
+ */
+
+static struct {
+	bool active; /* transaction status - active or not */
+	int recv_len; /* number of bytes received. */
+	struct vmbus_channel *recv_channel; /* chn we got the request */
+	u64 recv_req_id; /* request ID. */
+	struct hv_vss_msg  *msg; /* current message */
+} vss_transaction;
+
+
+static void vss_respond_to_host(int error);
+
+static struct cb_id vss_id = { CN_VSS_IDX, CN_VSS_VAL };
+static const char vss_name[] = "vss_kernel_module";
+static __u8 *recv_buffer;
+
+static void vss_send_op(struct work_struct *dummy);
+static DECLARE_WORK(vss_send_op_work, vss_send_op);
+
+/*
+ * Callback when data is received from user mode.
+ */
+
+static void
+vss_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
+{
+	struct hv_vss_msg *vss_msg;
+
+	vss_msg = (struct hv_vss_msg *)msg->data;
+
+	if (vss_msg->vss_hdr.operation == VSS_OP_REGISTER) {
+		pr_info("VSS daemon registered\n");
+		vss_transaction.active = false;
+		if (vss_transaction.recv_channel != NULL)
+			hv_vss_onchannelcallback(vss_transaction.recv_channel);
+		return;
+
+	}
+	vss_respond_to_host(vss_msg->error);
+}
+
+
+static void vss_send_op(struct work_struct *dummy)
+{
+	int op = vss_transaction.msg->vss_hdr.operation;
+	struct cn_msg *msg;
+	struct hv_vss_msg *vss_msg;
+
+	msg = kzalloc(sizeof(*msg) + sizeof(*vss_msg), GFP_ATOMIC);
+	if (!msg)
+		return;
+
+	vss_msg = (struct hv_vss_msg *)msg->data;
+
+	msg->id.idx =  CN_VSS_IDX;
+	msg->id.val = CN_VSS_VAL;
+
+	vss_msg->vss_hdr.operation = op;
+	msg->len = sizeof(struct hv_vss_msg);
+
+	cn_netlink_send(msg, 0, GFP_ATOMIC);
+	kfree(msg);
+
+	return;
+}
+
+/*
+ * Send a response back to the host.
+ */
+
+static void
+vss_respond_to_host(int error)
+{
+	struct icmsg_hdr *icmsghdrp;
+	u32	buf_len;
+	struct vmbus_channel *channel;
+	u64	req_id;
+
+	/*
+	 * If a transaction is not active; log and return.
+	 */
+
+	if (!vss_transaction.active) {
+		/*
+		 * This is a spurious call!
+		 */
+		pr_warn("VSS: Transaction not active\n");
+		return;
+	}
+	/*
+	 * Copy the global state for completing the transaction. Note that
+	 * only one transaction can be active at a time.
+	 */
+
+	buf_len = vss_transaction.recv_len;
+	channel = vss_transaction.recv_channel;
+	req_id = vss_transaction.recv_req_id;
+	vss_transaction.active = false;
+
+	icmsghdrp = (struct icmsg_hdr *)
+			&recv_buffer[sizeof(struct vmbuspipe_hdr)];
+
+	if (channel->onchannel_callback == NULL)
+		/*
+		 * We have raced with util driver being unloaded;
+		 * silently return.
+		 */
+		return;
+
+	icmsghdrp->status = error;
+
+	icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION | ICMSGHDRFLAG_RESPONSE;
+
+	vmbus_sendpacket(channel, recv_buffer, buf_len, req_id,
+				VM_PKT_DATA_INBAND, 0);
+
+}
+
+/*
+ * This callback is invoked when we get a VSS message from the host.
+ * The host ensures that only one VSS transaction can be active at a time.
+ */
+
+void hv_vss_onchannelcallback(void *context)
+{
+	struct vmbus_channel *channel = context;
+	u32 recvlen;
+	u64 requestid;
+	struct hv_vss_msg *vss_msg;
+
+
+	struct icmsg_hdr *icmsghdrp;
+	struct icmsg_negotiate *negop = NULL;
+
+	if (vss_transaction.active) {
+		/*
+		 * We will defer processing this callback once
+		 * the current transaction is complete.
+		 */
+		vss_transaction.recv_channel = channel;
+		return;
+	}
+
+	vmbus_recvpacket(channel, recv_buffer, PAGE_SIZE * 2, &recvlen,
+			 &requestid);
+
+	if (recvlen > 0) {
+		icmsghdrp = (struct icmsg_hdr *)&recv_buffer[
+			sizeof(struct vmbuspipe_hdr)];
+
+		if (icmsghdrp->icmsgtype == ICMSGTYPE_NEGOTIATE) {
+			vmbus_prep_negotiate_resp(icmsghdrp, negop,
+				 recv_buffer, MAX_SRV_VER, MAX_SRV_VER);
+			/*
+			 * We currently negotiate the highest number the
+			 * host has presented. If this version is not
+			 * atleast 5.0, reject.
+			 */
+			negop = (struct icmsg_negotiate *)&recv_buffer[
+				sizeof(struct vmbuspipe_hdr) +
+				sizeof(struct icmsg_hdr)];
+
+			if (negop->icversion_data[1].major < 5)
+				negop->icframe_vercnt = 0;
+		} else {
+			vss_msg = (struct hv_vss_msg *)&recv_buffer[
+				sizeof(struct vmbuspipe_hdr) +
+				sizeof(struct icmsg_hdr)];
+
+			/*
+			 * Stash away this global state for completing the
+			 * transaction; note transactions are serialized.
+			 */
+
+			vss_transaction.recv_len = recvlen;
+			vss_transaction.recv_channel = channel;
+			vss_transaction.recv_req_id = requestid;
+			vss_transaction.active = true;
+			vss_transaction.msg = (struct hv_vss_msg *)vss_msg;
+
+			switch (vss_msg->vss_hdr.operation) {
+				/*
+				 * Initiate a "freeze/thaw"
+				 * operation in the guest.
+				 * We respond to the host once
+				 * the operation is complete.
+				 *
+				 * We send the message to the
+				 * user space daemon and the
+				 * operation is performed in
+				 * the daemon.
+				 */
+			case VSS_OP_FREEZE:
+			case VSS_OP_THAW:
+				schedule_work(&vss_send_op_work);
+				return;
+
+			case VSS_OP_HOT_BACKUP:
+				vss_msg->vss_cf.flags =
+					 VSS_HBU_NO_AUTO_RECOVERY;
+				vss_respond_to_host(0);
+				return;
+
+			case VSS_OP_GET_DM_INFO:
+				vss_msg->dm_info.flags = 0;
+				vss_respond_to_host(0);
+				return;
+
+			default:
+				vss_respond_to_host(0);
+				return;
+
+			}
+
+		}
+
+		icmsghdrp->icflags = ICMSGHDRFLAG_TRANSACTION
+			| ICMSGHDRFLAG_RESPONSE;
+
+		vmbus_sendpacket(channel, recv_buffer,
+				       recvlen, requestid,
+				       VM_PKT_DATA_INBAND, 0);
+	}
+
+}
+
+int
+hv_vss_init(struct hv_util_service *srv)
+{
+	int err;
+
+	err = cn_add_callback(&vss_id, vss_name, vss_cn_callback);
+	if (err)
+		return err;
+	recv_buffer = srv->recv_buffer;
+
+	/*
+	 * When this driver loads, the user level daemon that
+	 * processes the host requests may not yet be running.
+	 * Defer processing channel callbacks until the daemon
+	 * has registered.
+	 */
+	vss_transaction.active = true;
+	return 0;
+}
+
+void hv_vss_deinit(void)
+{
+	cn_del_callback(&vss_id);
+	cancel_work_sync(&vss_send_op_work);
+}
diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c
index 1d4cbd8e8261..2f561c5dfe24 100644
--- a/drivers/hv/hv_util.c
+++ b/drivers/hv/hv_util.c
@@ -49,6 +49,12 @@ static struct hv_util_service util_kvp = {
 	.util_deinit = hv_kvp_deinit,
 };
 
+static struct hv_util_service util_vss = {
+	.util_cb = hv_vss_onchannelcallback,
+	.util_init = hv_vss_init,
+	.util_deinit = hv_vss_deinit,
+};
+
 static void perform_shutdown(struct work_struct *dummy)
 {
 	orderly_poweroff(true);
@@ -339,6 +345,10 @@ static const struct hv_vmbus_device_id id_table[] = {
 	{ HV_KVP_GUID,
 	  .driver_data = (unsigned long)&util_kvp
 	},
+	/* VSS GUID */
+	{ HV_VSS_GUID,
+	  .driver_data = (unsigned long)&util_vss
+	},
 	{ },
 };
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index df77ba9a8166..95d0850584da 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -27,6 +27,63 @@
 
 #include <linux/types.h>
 
+
+/*
+ * Implementation of host controlled snapshot of the guest.
+ */
+
+#define VSS_OP_REGISTER 128
+
+enum hv_vss_op {
+	VSS_OP_CREATE = 0,
+	VSS_OP_DELETE,
+	VSS_OP_HOT_BACKUP,
+	VSS_OP_GET_DM_INFO,
+	VSS_OP_BU_COMPLETE,
+	/*
+	 * Following operations are only supported with IC version >= 5.0
+	 */
+	VSS_OP_FREEZE, /* Freeze the file systems in the VM */
+	VSS_OP_THAW, /* Unfreeze the file systems */
+	VSS_OP_AUTO_RECOVER,
+	VSS_OP_COUNT /* Number of operations, must be last */
+};
+
+
+/*
+ * Header for all VSS messages.
+ */
+struct hv_vss_hdr {
+	__u8 operation;
+	__u8 reserved[7];
+} __attribute__((packed));
+
+
+/*
+ * Flag values for the hv_vss_check_feature. Linux supports only
+ * one value.
+ */
+#define VSS_HBU_NO_AUTO_RECOVERY	0x00000005
+
+struct hv_vss_check_feature {
+	__u32 flags;
+} __attribute__((packed));
+
+struct hv_vss_check_dm_info {
+	__u32 flags;
+} __attribute__((packed));
+
+struct hv_vss_msg {
+	union {
+		struct hv_vss_hdr vss_hdr;
+		int error;
+	};
+	union {
+		struct hv_vss_check_feature vss_cf;
+		struct hv_vss_check_dm_info dm_info;
+	};
+} __attribute__((packed));
+
 /*
  * An implementation of HyperV key value pair (KVP) functionality for Linux.
  *
@@ -1252,6 +1309,14 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver);
 			0xb9, 0x8b, 0x8b, 0xa1, 0xa1, 0xf3, 0xf9, 0x5a \
 		}
 
+/*
+ * VSS (Backup/Restore) GUID
+ */
+#define HV_VSS_GUID \
+	.guid = { \
+			0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \
+			0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4,  0x40 \
+		}
 /*
  * Common header for Hyper-V ICs
  */
@@ -1356,6 +1421,10 @@ int hv_kvp_init(struct hv_util_service *);
 void hv_kvp_deinit(void);
 void hv_kvp_onchannelcallback(void *);
 
+int hv_vss_init(struct hv_util_service *);
+void hv_vss_deinit(void);
+void hv_vss_onchannelcallback(void *);
+
 /*
  * Negotiated version with the Host.
  */
diff --git a/include/uapi/linux/connector.h b/include/uapi/linux/connector.h
index 8761a0349c74..4cb283505e45 100644
--- a/include/uapi/linux/connector.h
+++ b/include/uapi/linux/connector.h
@@ -44,8 +44,11 @@
 #define CN_VAL_DRBD			0x1
 #define CN_KVP_IDX			0x9	/* HyperV KVP */
 #define CN_KVP_VAL			0x1	/* queries from the kernel */
+#define CN_VSS_IDX			0xA     /* HyperV VSS */
+#define CN_VSS_VAL			0x1     /* queries from the kernel */
 
-#define CN_NETLINK_USERS		10	/* Highest index + 1 */
+
+#define CN_NETLINK_USERS		11	/* Highest index + 1 */
 
 /*
  * Maximum connector's message size.
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
new file mode 100644
index 000000000000..95269952aa92
--- /dev/null
+++ b/tools/hv/hv_vss_daemon.c
@@ -0,0 +1,220 @@
+/*
+ * An implementation of the host initiated guest snapshot for Hyper-V.
+ *
+ *
+ * Copyright (C) 2013, Microsoft, Inc.
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ */
+
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <linux/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <linux/connector.h>
+#include <linux/hyperv.h>
+#include <linux/netlink.h>
+#include <syslog.h>
+
+static char vss_recv_buffer[4096];
+static char vss_send_buffer[4096];
+static struct sockaddr_nl addr;
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+
+static int vss_operate(int operation)
+{
+	char *fs_op;
+	char cmd[512];
+	char buf[512];
+	FILE *file;
+	char *p;
+	char *x;
+	int error;
+
+	switch (operation) {
+	case VSS_OP_FREEZE:
+		fs_op = "-f ";
+		break;
+	case VSS_OP_THAW:
+		fs_op = "-u ";
+		break;
+	}
+
+	file = popen("mount | awk '/^\/dev\// { print $3}'", "r");
+	if (file == NULL)
+		return;
+
+	while ((p = fgets(buf, sizeof(buf), file)) != NULL) {
+		x = strchr(p, '\n');
+		*x = '\0';
+		if (!strncmp(p, "/", sizeof("/")))
+			continue;
+
+		sprintf(cmd, "%s %s %s", "fsfreeze ", fs_op, p);
+		syslog(LOG_INFO, "VSS cmd is %s\n", cmd);
+		error = system(cmd);
+	}
+	pclose(file);
+
+	sprintf(cmd, "%s %s %s", "fsfreeze ", fs_op, "/");
+	syslog(LOG_INFO, "VSS cmd is %s\n", cmd);
+	error = system(cmd);
+
+	return error;
+}
+
+static int netlink_send(int fd, struct cn_msg *msg)
+{
+	struct nlmsghdr *nlh;
+	unsigned int size;
+	struct msghdr message;
+	char buffer[64];
+	struct iovec iov[2];
+
+	size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
+
+	nlh = (struct nlmsghdr *)buffer;
+	nlh->nlmsg_seq = 0;
+	nlh->nlmsg_pid = getpid();
+	nlh->nlmsg_type = NLMSG_DONE;
+	nlh->nlmsg_len = NLMSG_LENGTH(size - sizeof(*nlh));
+	nlh->nlmsg_flags = 0;
+
+	iov[0].iov_base = nlh;
+	iov[0].iov_len = sizeof(*nlh);
+
+	iov[1].iov_base = msg;
+	iov[1].iov_len = size;
+
+	memset(&message, 0, sizeof(message));
+	message.msg_name = &addr;
+	message.msg_namelen = sizeof(addr);
+	message.msg_iov = iov;
+	message.msg_iovlen = 2;
+
+	return sendmsg(fd, &message, 0);
+}
+
+int main(void)
+{
+	int fd, len, nl_group;
+	int error;
+	struct cn_msg *message;
+	struct pollfd pfd;
+	struct nlmsghdr *incoming_msg;
+	struct cn_msg	*incoming_cn_msg;
+	int	op;
+	struct hv_vss_msg *vss_msg;
+
+	daemon(1, 0);
+	openlog("Hyper-V VSS", 0, LOG_USER);
+	syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
+
+	fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+	if (fd < 0) {
+		syslog(LOG_ERR, "netlink socket creation failed; error:%d", fd);
+		exit(EXIT_FAILURE);
+	}
+	addr.nl_family = AF_NETLINK;
+	addr.nl_pad = 0;
+	addr.nl_pid = 0;
+	addr.nl_groups = 0;
+
+
+	error = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+	if (error < 0) {
+		syslog(LOG_ERR, "bind failed; error:%d", error);
+		close(fd);
+		exit(EXIT_FAILURE);
+	}
+	nl_group = CN_VSS_IDX;
+	setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group));
+	/*
+	 * Register ourselves with the kernel.
+	 */
+	message = (struct cn_msg *)vss_send_buffer;
+	message->id.idx = CN_VSS_IDX;
+	message->id.val = CN_VSS_VAL;
+	message->ack = 0;
+	vss_msg = (struct hv_vss_msg *)message->data;
+	vss_msg->vss_hdr.operation = VSS_OP_REGISTER;
+
+	message->len = sizeof(struct hv_vss_msg);
+
+	len = netlink_send(fd, message);
+	if (len < 0) {
+		syslog(LOG_ERR, "netlink_send failed; error:%d", len);
+		close(fd);
+		exit(EXIT_FAILURE);
+	}
+
+	pfd.fd = fd;
+
+	while (1) {
+		struct sockaddr *addr_p = (struct sockaddr *) &addr;
+		socklen_t addr_l = sizeof(addr);
+		pfd.events = POLLIN;
+		pfd.revents = 0;
+		poll(&pfd, 1, -1);
+
+		len = recvfrom(fd, vss_recv_buffer, sizeof(vss_recv_buffer), 0,
+				addr_p, &addr_l);
+
+		if (len < 0 || addr.nl_pid) {
+			syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
+					addr.nl_pid, errno, strerror(errno));
+			close(fd);
+			return -1;
+		}
+
+		incoming_msg = (struct nlmsghdr *)vss_recv_buffer;
+
+		if (incoming_msg->nlmsg_type != NLMSG_DONE)
+			continue;
+
+		incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
+		vss_msg = (struct hv_vss_msg *)incoming_cn_msg->data;
+		op = vss_msg->vss_hdr.operation;
+		error =  HV_S_OK;
+
+		switch (op) {
+		case VSS_OP_FREEZE:
+		case VSS_OP_THAW:
+			error = vss_operate(op);
+			if (error)
+				error = HV_E_FAIL;
+			break;
+		default:
+			syslog(LOG_ERR, "Illegal op:%d\n", op);
+		}
+		vss_msg->error = error;
+		len = netlink_send(fd, incoming_cn_msg);
+		if (len < 0) {
+			syslog(LOG_ERR, "net_link send failed; error:%d", len);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+}
-- 
cgit 


From 6681712d67eef14c4ce793561c3231659153a320 Mon Sep 17 00:00:00 2001
From: David Stevens <dlstevens@us.ibm.com>
Date: Fri, 15 Mar 2013 04:35:51 +0000
Subject: vxlan: generalize forwarding tables

This patch generalizes VXLAN forwarding table entries allowing an administrator
to:
	1) specify multiple destinations for a given MAC
	2) specify alternate vni's in the VXLAN header
	3) specify alternate destination UDP ports
	4) use multicast MAC addresses as fdb lookup keys
	5) specify multicast destinations
	6) specify the outgoing interface for forwarded packets

The combination allows configuration of more complex topologies using VXLAN
encapsulation.

Changes since v1: rebase to 3.9.0-rc2

Signed-Off-By: David L Stevens <dlstevens@us.ibm.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 263 ++++++++++++++++++++++++++++++++---------
 include/uapi/linux/neighbour.h |   3 +
 net/core/rtnetlink.c           |   2 +-
 3 files changed, 210 insertions(+), 58 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index db0df07c18dc..33427fd62515 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -81,13 +81,22 @@ struct vxlan_net {
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 };
 
+struct vxlan_rdst {
+	struct rcu_head		 rcu;
+	__be32			 remote_ip;
+	__be16			 remote_port;
+	u32			 remote_vni;
+	u32			 remote_ifindex;
+	struct vxlan_rdst	*remote_next;
+};
+
 /* Forwarding table entry */
 struct vxlan_fdb {
 	struct hlist_node hlist;	/* linked list of entries */
 	struct rcu_head	  rcu;
 	unsigned long	  updated;	/* jiffies */
 	unsigned long	  used;
-	__be32		  remote_ip;
+	struct vxlan_rdst remote;
 	u16		  state;	/* see ndm_state */
 	u8		  eth_addr[ETH_ALEN];
 };
@@ -157,7 +166,8 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id)
 /* Fill in neighbour message in skbuff. */
 static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 			   const struct vxlan_fdb *fdb,
-			   u32 portid, u32 seq, int type, unsigned int flags)
+			   u32 portid, u32 seq, int type, unsigned int flags,
+			   const struct vxlan_rdst *rdst)
 {
 	unsigned long now = jiffies;
 	struct nda_cacheinfo ci;
@@ -176,7 +186,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 
 	if (type == RTM_GETNEIGH) {
 		ndm->ndm_family	= AF_INET;
-		send_ip = fdb->remote_ip != 0;
+		send_ip = rdst->remote_ip != htonl(INADDR_ANY);
 		send_eth = !is_zero_ether_addr(fdb->eth_addr);
 	} else
 		ndm->ndm_family	= AF_BRIDGE;
@@ -188,7 +198,17 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 
-	if (send_ip && nla_put_be32(skb, NDA_DST, fdb->remote_ip))
+	if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
+		goto nla_put_failure;
+
+	if (rdst->remote_port && rdst->remote_port != vxlan_port &&
+	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+		goto nla_put_failure;
+	if (rdst->remote_vni != vxlan->vni &&
+	    nla_put_be32(skb, NDA_VNI, rdst->remote_vni))
+		goto nla_put_failure;
+	if (rdst->remote_ifindex &&
+	    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
 		goto nla_put_failure;
 
 	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
@@ -211,6 +231,9 @@ static inline size_t vxlan_nlmsg_size(void)
 	return NLMSG_ALIGN(sizeof(struct ndmsg))
 		+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
 		+ nla_total_size(sizeof(__be32)) /* NDA_DST */
+		+ nla_total_size(sizeof(__be32)) /* NDA_PORT */
+		+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
+		+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
@@ -225,7 +248,7 @@ static void vxlan_fdb_notify(struct vxlan_dev *vxlan,
 	if (skb == NULL)
 		goto errout;
 
-	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0);
+	err = vxlan_fdb_info(skb, vxlan, fdb, 0, 0, type, 0, &fdb->remote);
 	if (err < 0) {
 		/* -EMSGSIZE implies BUG in vxlan_nlmsg_size() */
 		WARN_ON(err == -EMSGSIZE);
@@ -247,7 +270,8 @@ static void vxlan_ip_miss(struct net_device *dev, __be32 ipa)
 
 	memset(&f, 0, sizeof f);
 	f.state = NUD_STALE;
-	f.remote_ip = ipa; /* goes to NDA_DST */
+	f.remote.remote_ip = ipa; /* goes to NDA_DST */
+	f.remote.remote_vni = VXLAN_N_VID;
 
 	vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH);
 }
@@ -300,10 +324,38 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
 	return NULL;
 }
 
+/* Add/update destinations for multicast */
+static int vxlan_fdb_append(struct vxlan_fdb *f,
+			    __be32 ip, __u32 port, __u32 vni, __u32 ifindex)
+{
+	struct vxlan_rdst *rd_prev, *rd;
+
+	rd_prev = NULL;
+	for (rd = &f->remote; rd; rd = rd->remote_next) {
+		if (rd->remote_ip == ip &&
+		    rd->remote_port == port &&
+		    rd->remote_vni == vni &&
+		    rd->remote_ifindex == ifindex)
+			return 0;
+		rd_prev = rd;
+	}
+	rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
+	if (rd == NULL)
+		return -ENOBUFS;
+	rd->remote_ip = ip;
+	rd->remote_port = port;
+	rd->remote_vni = vni;
+	rd->remote_ifindex = ifindex;
+	rd->remote_next = NULL;
+	rd_prev->remote_next = rd;
+	return 1;
+}
+
 /* Add new entry to forwarding table -- assumes lock held */
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			    const u8 *mac, __be32 ip,
-			    __u16 state, __u16 flags)
+			    __u16 state, __u16 flags,
+			    __u32 port, __u32 vni, __u32 ifindex)
 {
 	struct vxlan_fdb *f;
 	int notify = 0;
@@ -320,6 +372,14 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			f->updated = jiffies;
 			notify = 1;
 		}
+		if ((flags & NLM_F_APPEND) &&
+		    is_multicast_ether_addr(f->eth_addr)) {
+			int rc = vxlan_fdb_append(f, ip, port, vni, ifindex);
+
+			if (rc < 0)
+				return rc;
+			notify |= rc;
+		}
 	} else {
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
@@ -333,7 +393,11 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			return -ENOMEM;
 
 		notify = 1;
-		f->remote_ip = ip;
+		f->remote.remote_ip = ip;
+		f->remote.remote_port = port;
+		f->remote.remote_vni = vni;
+		f->remote.remote_ifindex = ifindex;
+		f->remote.remote_next = NULL;
 		f->state = state;
 		f->updated = f->used = jiffies;
 		memcpy(f->eth_addr, mac, ETH_ALEN);
@@ -349,6 +413,19 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 	return 0;
 }
 
+void vxlan_fdb_free(struct rcu_head *head)
+{
+	struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
+
+	while (f->remote.remote_next) {
+		struct vxlan_rdst *rd = f->remote.remote_next;
+
+		f->remote.remote_next = rd->remote_next;
+		kfree(rd);
+	}
+	kfree(f);
+}
+
 static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
 {
 	netdev_dbg(vxlan->dev,
@@ -358,7 +435,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
 	vxlan_fdb_notify(vxlan, f, RTM_DELNEIGH);
 
 	hlist_del_rcu(&f->hlist);
-	kfree_rcu(f, rcu);
+	call_rcu(&f->rcu, vxlan_fdb_free);
 }
 
 /* Add static entry (via netlink) */
@@ -367,7 +444,9 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			 const unsigned char *addr, u16 flags)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct net *net = dev_net(vxlan->dev);
 	__be32 ip;
+	u32 port, vni, ifindex;
 	int err;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@ -384,8 +463,36 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 	ip = nla_get_be32(tb[NDA_DST]);
 
+	if (tb[NDA_PORT]) {
+		if (nla_len(tb[NDA_PORT]) != sizeof(u32))
+			return -EINVAL;
+		port = nla_get_u32(tb[NDA_PORT]);
+	} else
+		port = vxlan_port;
+
+	if (tb[NDA_VNI]) {
+		if (nla_len(tb[NDA_VNI]) != sizeof(u32))
+			return -EINVAL;
+		vni = nla_get_u32(tb[NDA_VNI]);
+	} else
+		vni = vxlan->vni;
+
+	if (tb[NDA_IFINDEX]) {
+		struct net_device *dev;
+
+		if (nla_len(tb[NDA_IFINDEX]) != sizeof(u32))
+			return -EINVAL;
+		ifindex = nla_get_u32(tb[NDA_IFINDEX]);
+		dev = dev_get_by_index(net, ifindex);
+		if (!dev)
+			return -EADDRNOTAVAIL;
+		dev_put(dev);
+	} else
+		ifindex = 0;
+
 	spin_lock_bh(&vxlan->hash_lock);
-	err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags);
+	err = vxlan_fdb_create(vxlan, addr, ip, ndm->ndm_state, flags, port,
+		vni, ifindex);
 	spin_unlock_bh(&vxlan->hash_lock);
 
 	return err;
@@ -423,18 +530,21 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		int err;
 
 		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
-			if (idx < cb->args[0])
-				goto skip;
-
-			err = vxlan_fdb_info(skb, vxlan, f,
-					     NETLINK_CB(cb->skb).portid,
-					     cb->nlh->nlmsg_seq,
-					     RTM_NEWNEIGH,
-					     NLM_F_MULTI);
-			if (err < 0)
-				break;
+			struct vxlan_rdst *rd;
+			for (rd = &f->remote; rd; rd = rd->remote_next) {
+				if (idx < cb->args[0])
+					goto skip;
+
+				err = vxlan_fdb_info(skb, vxlan, f,
+						     NETLINK_CB(cb->skb).portid,
+						     cb->nlh->nlmsg_seq,
+						     RTM_NEWNEIGH,
+						     NLM_F_MULTI, rd);
+				if (err < 0)
+					break;
 skip:
-			++idx;
+				++idx;
+			}
 		}
 	}
 
@@ -454,22 +564,23 @@ static void vxlan_snoop(struct net_device *dev,
 	f = vxlan_find_mac(vxlan, src_mac);
 	if (likely(f)) {
 		f->used = jiffies;
-		if (likely(f->remote_ip == src_ip))
+		if (likely(f->remote.remote_ip == src_ip))
 			return;
 
 		if (net_ratelimit())
 			netdev_info(dev,
 				    "%pM migrated from %pI4 to %pI4\n",
-				    src_mac, &f->remote_ip, &src_ip);
+				    src_mac, &f->remote.remote_ip, &src_ip);
 
-		f->remote_ip = src_ip;
+		f->remote.remote_ip = src_ip;
 		f->updated = jiffies;
 	} else {
 		/* learned new entry */
 		spin_lock(&vxlan->hash_lock);
 		err = vxlan_fdb_create(vxlan, src_mac, src_ip,
 				       NUD_REACHABLE,
-				       NLM_F_EXCL|NLM_F_CREATE);
+				       NLM_F_EXCL|NLM_F_CREATE,
+				       vxlan_port, vxlan->vni, 0);
 		spin_unlock(&vxlan->hash_lock);
 	}
 }
@@ -701,7 +812,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
 		}
 
 		f = vxlan_find_mac(vxlan, n->ha);
-		if (f && f->remote_ip == 0) {
+		if (f && f->remote.remote_ip == htonl(INADDR_ANY)) {
 			/* bridge-local neighbor */
 			neigh_release(n);
 			goto out;
@@ -834,47 +945,26 @@ static int handle_offloads(struct sk_buff *skb)
 	return 0;
 }
 
-/* Transmit local packets over Vxlan
- *
- * Outer IP header inherits ECN and DF from inner header.
- * Outer UDP destination is the VXLAN assigned port.
- *           source port is based on hash of flow
- */
-static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
+				  struct vxlan_rdst *rdst, bool did_rsc)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct rtable *rt;
 	const struct iphdr *old_iph;
-	struct ethhdr *eth;
 	struct iphdr *iph;
 	struct vxlanhdr *vxh;
 	struct udphdr *uh;
 	struct flowi4 fl4;
 	unsigned int pkt_len = skb->len;
 	__be32 dst;
-	__u16 src_port;
+	__u16 src_port, dst_port;
+        u32 vni;
 	__be16 df = 0;
 	__u8 tos, ttl;
-	bool did_rsc = false;
-	const struct vxlan_fdb *f;
-
-	skb_reset_mac_header(skb);
-	eth = eth_hdr(skb);
-
-	if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP)
-		return arp_reduce(dev, skb);
-	else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP)
-		did_rsc = route_shortcircuit(dev, skb);
 
-	f = vxlan_find_mac(vxlan, eth->h_dest);
-	if (f == NULL) {
-		did_rsc = false;
-		dst = vxlan->gaddr;
-		if (!dst && (vxlan->flags & VXLAN_F_L2MISS) &&
-		    !is_multicast_ether_addr(eth->h_dest))
-			vxlan_fdb_miss(vxlan, eth->h_dest);
-	} else
-		dst = f->remote_ip;
+	dst_port = rdst->remote_port ? rdst->remote_port : vxlan_port;
+	vni = rdst->remote_vni;
+	dst = rdst->remote_ip;
 
 	if (!dst) {
 		if (did_rsc) {
@@ -922,7 +1012,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	src_port = vxlan_src_port(vxlan, skb);
 
 	memset(&fl4, 0, sizeof(fl4));
-	fl4.flowi4_oif = vxlan->link;
+	fl4.flowi4_oif = rdst->remote_ifindex;
 	fl4.flowi4_tos = RT_TOS(tos);
 	fl4.daddr = dst;
 	fl4.saddr = vxlan->saddr;
@@ -949,13 +1039,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
-	vxh->vx_vni = htonl(vxlan->vni << 8);
+	vxh->vx_vni = htonl(vni << 8);
 
 	__skb_push(skb, sizeof(*uh));
 	skb_reset_transport_header(skb);
 	uh = udp_hdr(skb);
 
-	uh->dest = htons(vxlan_port);
+	uh->dest = htons(dst_port);
 	uh->source = htons(src_port);
 
 	uh->len = htons(skb->len);
@@ -995,6 +1085,64 @@ tx_free:
 	return NETDEV_TX_OK;
 }
 
+/* Transmit local packets over Vxlan
+ *
+ * Outer IP header inherits ECN and DF from inner header.
+ * Outer UDP destination is the VXLAN assigned port.
+ *           source port is based on hash of flow
+ */
+static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct ethhdr *eth;
+	bool did_rsc = false;
+	struct vxlan_rdst group, *rdst0, *rdst;
+	struct vxlan_fdb *f;
+	int rc1, rc;
+
+	skb_reset_mac_header(skb);
+	eth = eth_hdr(skb);
+
+	if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP)
+		return arp_reduce(dev, skb);
+	else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP)
+		did_rsc = route_shortcircuit(dev, skb);
+
+	f = vxlan_find_mac(vxlan, eth->h_dest);
+	if (f == NULL) {
+		did_rsc = false;
+		group.remote_port = vxlan_port;
+		group.remote_vni = vxlan->vni;
+		group.remote_ip = vxlan->gaddr;
+		group.remote_ifindex = vxlan->link;
+		group.remote_next = 0;
+		rdst0 = &group;
+
+		if (group.remote_ip == htonl(INADDR_ANY) &&
+		    (vxlan->flags & VXLAN_F_L2MISS) &&
+		    !is_multicast_ether_addr(eth->h_dest))
+			vxlan_fdb_miss(vxlan, eth->h_dest);
+	} else
+		rdst0 = &f->remote;
+
+	rc = NETDEV_TX_OK;
+
+	/* if there are multiple destinations, send copies */
+	for (rdst = rdst0->remote_next; rdst; rdst = rdst->remote_next) {
+		struct sk_buff *skb1;
+
+		skb1 = skb_clone(skb, GFP_ATOMIC);
+		rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc);
+		if (rc == NETDEV_TX_OK)
+			rc = rc1;
+	}
+
+	rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc);
+	if (rc == NETDEV_TX_OK)
+		rc = rc1;
+	return rc;
+}
+
 /* Walk the forwarding table and purge stale entries */
 static void vxlan_cleanup(unsigned long arg)
 {
@@ -1558,6 +1706,7 @@ static void __exit vxlan_cleanup_module(void)
 {
 	rtnl_link_unregister(&vxlan_link_ops);
 	unregister_pernet_device(&vxlan_net_ops);
+	rcu_barrier();
 }
 module_exit(vxlan_cleanup_module);
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index adb068c53c4e..f175212420ab 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -21,6 +21,9 @@ enum {
 	NDA_CACHEINFO,
 	NDA_PROBES,
 	NDA_VLAN,
+	NDA_PORT,
+	NDA_VNI,
+	NDA_IFINDEX,
 	__NDA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 55b5624a4b00..0e86baf8f809 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2112,7 +2112,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 	}
 
 	addr = nla_data(tb[NDA_LLADDR]);
-	if (!is_valid_ether_addr(addr)) {
+	if (is_zero_ether_addr(addr)) {
 		pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ether address\n");
 		return -EINVAL;
 	}
-- 
cgit 


From 1a2c6181c4a1922021b4d7df373bba612c3e5f04 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <christoph.paasch@uclouvain.be>
Date: Sun, 17 Mar 2013 08:23:34 +0000
Subject: tcp: Remove TCPCT

TCPCT uses option-number 253, reserved for experimental use and should
not be used in production environments.
Further, TCPCT does not fully implement RFC 6013.

As a nice side-effect, removing TCPCT increases TCP's performance for
very short flows:

Doing an apache-benchmark with -c 100 -n 100000, sending HTTP-requests
for files of 1KB size.

before this patch:
	average (among 7 runs) of 20845.5 Requests/Second
after:
	average (among 7 runs) of 21403.6 Requests/Second

Signed-off-by: Christoph Paasch <christoph.paasch@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |   8 -
 drivers/infiniband/hw/cxgb4/cm.c       |   2 +-
 include/linux/tcp.h                    |  10 --
 include/net/request_sock.h             |   8 +-
 include/net/tcp.h                      |  89 +----------
 include/uapi/linux/tcp.h               |  26 ----
 net/dccp/ipv4.c                        |   5 +-
 net/dccp/ipv6.c                        |   5 +-
 net/ipv4/inet_connection_sock.c        |   2 +-
 net/ipv4/syncookies.c                  |   3 +-
 net/ipv4/sysctl_net_ipv4.c             |   7 -
 net/ipv4/tcp.c                         | 267 ---------------------------------
 net/ipv4/tcp_input.c                   |  69 +--------
 net/ipv4/tcp_ipv4.c                    |  60 +-------
 net/ipv4/tcp_minisocks.c               |  40 +----
 net/ipv4/tcp_output.c                  | 219 +--------------------------
 net/ipv6/syncookies.c                  |   3 +-
 net/ipv6/tcp_ipv6.c                    |  56 +------
 18 files changed, 38 insertions(+), 841 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 18a24c405ac0..17953e2bc3e9 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -175,14 +175,6 @@ tcp_congestion_control - STRING
 	is inherited.
 	[see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ]
 
-tcp_cookie_size - INTEGER
-	Default size of TCP Cookie Transactions (TCPCT) option, that may be
-	overridden on a per socket basis by the TCPCT socket option.
-	Values greater than the maximum (16) are interpreted as the maximum.
-	Values greater than zero and less than the minimum (8) are interpreted
-	as the minimum.  Odd values are interpreted as the next even value.
-	Default: 0 (off).
-
 tcp_dsack - BOOLEAN
 	Allows TCP to send "duplicate" SACKs.
 
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8dcc84fd9d30..54fd31fcc332 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2915,7 +2915,7 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
 	 */
 	memset(&tmp_opt, 0, sizeof(tmp_opt));
 	tcp_clear_options(&tmp_opt);
-	tcp_parse_options(skb, &tmp_opt, NULL, 0, NULL);
+	tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 	req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
 	memset(req, 0, sizeof(*req));
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 763c108ee03d..ed6a7456eecd 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -90,9 +90,6 @@ struct tcp_options_received {
 		sack_ok : 4,	/* SACK seen on SYN packet		*/
 		snd_wscale : 4,	/* Window scaling received from sender	*/
 		rcv_wscale : 4;	/* Window scaling to send to receiver	*/
-	u8	cookie_plus:6,	/* bytes in authenticator/cookie option	*/
-		cookie_out_never:1,
-		cookie_in_always:1;
 	u8	num_sacks;	/* Number of SACK blocks		*/
 	u16	user_mss;	/* mss requested by user in ioctl	*/
 	u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
@@ -102,7 +99,6 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
 {
 	rx_opt->tstamp_ok = rx_opt->sack_ok = 0;
 	rx_opt->wscale_ok = rx_opt->snd_wscale = 0;
-	rx_opt->cookie_plus = 0;
 }
 
 /* This is the max number of SACKS that we'll generate and process. It's safe
@@ -320,12 +316,6 @@ struct tcp_sock {
 	struct tcp_md5sig_info	__rcu *md5sig_info;
 #endif
 
-	/* When the cookie options are generated and exchanged, then this
-	 * object holds a reference to them (cookie_values->kref).  Also
-	 * contains related tcp_cookie_transactions fields.
-	 */
-	struct tcp_cookie_values  *cookie_values;
-
 /* TCP fastopen related information */
 	struct tcp_fastopen_request *fastopen_req;
 	/* fastopen_rsk points to request_sock that resulted in this big
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index a51dbd17c2de..9069e65c1c56 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -27,19 +27,13 @@ struct sk_buff;
 struct dst_entry;
 struct proto;
 
-/* empty to "strongly type" an otherwise void parameter.
- */
-struct request_values {
-};
-
 struct request_sock_ops {
 	int		family;
 	int		obj_size;
 	struct kmem_cache	*slab;
 	char		*slab_name;
 	int		(*rtx_syn_ack)(struct sock *sk,
-				       struct request_sock *req,
-				       struct request_values *rvp);
+				       struct request_sock *req);
 	void		(*send_ack)(struct sock *sk, struct sk_buff *skb,
 				    struct request_sock *req);
 	void		(*send_reset)(struct sock *sk,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ab9f947b118b..7f2f17198d75 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -179,7 +179,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCPOPT_SACK             5       /* SACK Block */
 #define TCPOPT_TIMESTAMP	8	/* Better RTT estimations/PAWS */
 #define TCPOPT_MD5SIG		19	/* MD5 Signature (RFC2385) */
-#define TCPOPT_COOKIE		253	/* Cookie extension (experimental) */
 #define TCPOPT_EXP		254	/* Experimental */
 /* Magic number to be after the option value for sharing TCP
  * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
@@ -454,7 +453,7 @@ extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req);
 extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		       size_t len, int nonblock, int flags, int *addr_len);
 extern void tcp_parse_options(const struct sk_buff *skb,
-			      struct tcp_options_received *opt_rx, const u8 **hvpp,
+			      struct tcp_options_received *opt_rx,
 			      int estab, struct tcp_fastopen_cookie *foc);
 extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
 
@@ -476,7 +475,6 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
 extern int tcp_connect(struct sock *sk);
 extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 					struct request_sock *req,
-					struct request_values *rvp,
 					struct tcp_fastopen_cookie *foc);
 extern int tcp_disconnect(struct sock *sk, int flags);
 
@@ -1589,91 +1587,6 @@ struct tcp_request_sock_ops {
 #endif
 };
 
-/* Using SHA1 for now, define some constants.
- */
-#define COOKIE_DIGEST_WORDS (SHA_DIGEST_WORDS)
-#define COOKIE_MESSAGE_WORDS (SHA_MESSAGE_BYTES / 4)
-#define COOKIE_WORKSPACE_WORDS (COOKIE_DIGEST_WORDS + COOKIE_MESSAGE_WORDS)
-
-extern int tcp_cookie_generator(u32 *bakery);
-
-/**
- *	struct tcp_cookie_values - each socket needs extra space for the
- *	cookies, together with (optional) space for any SYN data.
- *
- *	A tcp_sock contains a pointer to the current value, and this is
- *	cloned to the tcp_timewait_sock.
- *
- * @cookie_pair:	variable data from the option exchange.
- *
- * @cookie_desired:	user specified tcpct_cookie_desired.  Zero
- *			indicates default (sysctl_tcp_cookie_size).
- *			After cookie sent, remembers size of cookie.
- *			Range 0, TCP_COOKIE_MIN to TCP_COOKIE_MAX.
- *
- * @s_data_desired:	user specified tcpct_s_data_desired.  When the
- *			constant payload is specified (@s_data_constant),
- *			holds its length instead.
- *			Range 0 to TCP_MSS_DESIRED.
- *
- * @s_data_payload:	constant data that is to be included in the
- *			payload of SYN or SYNACK segments when the
- *			cookie option is present.
- */
-struct tcp_cookie_values {
-	struct kref	kref;
-	u8		cookie_pair[TCP_COOKIE_PAIR_SIZE];
-	u8		cookie_pair_size;
-	u8		cookie_desired;
-	u16		s_data_desired:11,
-			s_data_constant:1,
-			s_data_in:1,
-			s_data_out:1,
-			s_data_unused:2;
-	u8		s_data_payload[0];
-};
-
-static inline void tcp_cookie_values_release(struct kref *kref)
-{
-	kfree(container_of(kref, struct tcp_cookie_values, kref));
-}
-
-/* The length of constant payload data.  Note that s_data_desired is
- * overloaded, depending on s_data_constant: either the length of constant
- * data (returned here) or the limit on variable data.
- */
-static inline int tcp_s_data_size(const struct tcp_sock *tp)
-{
-	return (tp->cookie_values != NULL && tp->cookie_values->s_data_constant)
-		? tp->cookie_values->s_data_desired
-		: 0;
-}
-
-/**
- *	struct tcp_extend_values - tcp_ipv?.c to tcp_output.c workspace.
- *
- *	As tcp_request_sock has already been extended in other places, the
- *	only remaining method is to pass stack values along as function
- *	parameters.  These parameters are not needed after sending SYNACK.
- *
- * @cookie_bakery:	cryptographic secret and message workspace.
- *
- * @cookie_plus:	bytes in authenticator/cookie option, copied from
- *			struct tcp_options_received (above).
- */
-struct tcp_extend_values {
-	struct request_values		rv;
-	u32				cookie_bakery[COOKIE_WORKSPACE_WORDS];
-	u8				cookie_plus:6,
-					cookie_out_never:1,
-					cookie_in_always:1;
-};
-
-static inline struct tcp_extend_values *tcp_xv(struct request_values *rvp)
-{
-	return (struct tcp_extend_values *)rvp;
-}
-
 extern void tcp_v4_init(void);
 extern void tcp_init(void);
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 6b1ead0b0c9d..8d776ebc4829 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -102,7 +102,6 @@ enum {
 #define TCP_QUICKACK		12	/* Block/reenable quick acks */
 #define TCP_CONGESTION		13	/* Congestion control algorithm */
 #define TCP_MD5SIG		14	/* TCP MD5 Signature (RFC2385) */
-#define TCP_COOKIE_TRANSACTIONS	15	/* TCP Cookie Transactions */
 #define TCP_THIN_LINEAR_TIMEOUTS 16      /* Use linear timeouts for thin streams*/
 #define TCP_THIN_DUPACK         17      /* Fast retrans. after 1 dupack */
 #define TCP_USER_TIMEOUT	18	/* How long for loss retry before timeout */
@@ -199,29 +198,4 @@ struct tcp_md5sig {
 	__u8	tcpm_key[TCP_MD5SIG_MAXKEYLEN];		/* key (binary) */
 };
 
-/* for TCP_COOKIE_TRANSACTIONS (TCPCT) socket option */
-#define TCP_COOKIE_MIN		 8		/*  64-bits */
-#define TCP_COOKIE_MAX		16		/* 128-bits */
-#define TCP_COOKIE_PAIR_SIZE	(2*TCP_COOKIE_MAX)
-
-/* Flags for both getsockopt and setsockopt */
-#define TCP_COOKIE_IN_ALWAYS	(1 << 0)	/* Discard SYN without cookie */
-#define TCP_COOKIE_OUT_NEVER	(1 << 1)	/* Prohibit outgoing cookies,
-						 * supercedes everything. */
-
-/* Flags for getsockopt */
-#define TCP_S_DATA_IN		(1 << 2)	/* Was data received? */
-#define TCP_S_DATA_OUT		(1 << 3)	/* Was data sent? */
-
-/* TCP_COOKIE_TRANSACTIONS data */
-struct tcp_cookie_transactions {
-	__u16	tcpct_flags;			/* see above */
-	__u8	__tcpct_pad1;			/* zero */
-	__u8	tcpct_cookie_desired;		/* bytes */
-	__u16	tcpct_s_data_desired;		/* bytes of variable data */
-	__u16	tcpct_used;			/* bytes in value */
-	__u8	tcpct_value[TCP_MSS_DEFAULT];
-};
-
-
 #endif /* _UAPI_LINUX_TCP_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 4f9f5eb478f1..ebc54fef85a5 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -500,8 +500,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
 	return &rt->dst;
 }
 
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
-				 struct request_values *rv_unused)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
 {
 	int err = -1;
 	struct sk_buff *skb;
@@ -658,7 +657,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_gss     = dreq->dreq_iss;
 	dreq->dreq_service = service;
 
-	if (dccp_v4_send_response(sk, req, NULL))
+	if (dccp_v4_send_response(sk, req))
 		goto drop_and_free;
 
 	inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e05981f271e..9c61f9c02fdb 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -213,8 +213,7 @@ out:
 }
 
 
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
-				 struct request_values *rv_unused)
+static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 {
 	struct inet6_request_sock *ireq6 = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -428,7 +427,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	dreq->dreq_gss     = dreq->dreq_iss;
 	dreq->dreq_service = service;
 
-	if (dccp_v6_send_response(sk, req, NULL))
+	if (dccp_v6_send_response(sk, req))
 		goto drop_and_free;
 
 	inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 786d97aee751..6acb541c9091 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -559,7 +559,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
 
 int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
 {
-	int err = req->rsk_ops->rtx_syn_ack(parent, req, NULL);
+	int err = req->rsk_ops->rtx_syn_ack(parent, req);
 
 	if (!err)
 		req->num_retrans++;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index ef54377fb11c..7f4a5cb8f8d0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -267,7 +267,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 			     struct ip_options *opt)
 {
 	struct tcp_options_received tcp_opt;
-	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct tcp_request_sock *treq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -294,7 +293,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
+	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
 	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
 		goto out;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index cca4550f4082..cb45062c8be0 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -732,13 +732,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "tcp_cookie_size",
-		.data		= &sysctl_tcp_cookie_size,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
 	{
 		.procname       = "tcp_thin_linear_timeouts",
 		.data           = &sysctl_tcp_thin_linear_timeouts,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8d14573ade77..17a6810af5c8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -409,15 +409,6 @@ void tcp_init_sock(struct sock *sk)
 
 	icsk->icsk_sync_mss = tcp_sync_mss;
 
-	/* TCP Cookie Transactions */
-	if (sysctl_tcp_cookie_size > 0) {
-		/* Default, cookies without s_data_payload. */
-		tp->cookie_values =
-			kzalloc(sizeof(*tp->cookie_values),
-				sk->sk_allocation);
-		if (tp->cookie_values != NULL)
-			kref_init(&tp->cookie_values->kref);
-	}
 	/* Presumed zeroed, in order of appearance:
 	 *	cookie_in_always, cookie_out_never,
 	 *	s_data_constant, s_data_in, s_data_out
@@ -2397,92 +2388,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		release_sock(sk);
 		return err;
 	}
-	case TCP_COOKIE_TRANSACTIONS: {
-		struct tcp_cookie_transactions ctd;
-		struct tcp_cookie_values *cvp = NULL;
-
-		if (sizeof(ctd) > optlen)
-			return -EINVAL;
-		if (copy_from_user(&ctd, optval, sizeof(ctd)))
-			return -EFAULT;
-
-		if (ctd.tcpct_used > sizeof(ctd.tcpct_value) ||
-		    ctd.tcpct_s_data_desired > TCP_MSS_DESIRED)
-			return -EINVAL;
-
-		if (ctd.tcpct_cookie_desired == 0) {
-			/* default to global value */
-		} else if ((0x1 & ctd.tcpct_cookie_desired) ||
-			   ctd.tcpct_cookie_desired > TCP_COOKIE_MAX ||
-			   ctd.tcpct_cookie_desired < TCP_COOKIE_MIN) {
-			return -EINVAL;
-		}
-
-		if (TCP_COOKIE_OUT_NEVER & ctd.tcpct_flags) {
-			/* Supercedes all other values */
-			lock_sock(sk);
-			if (tp->cookie_values != NULL) {
-				kref_put(&tp->cookie_values->kref,
-					 tcp_cookie_values_release);
-				tp->cookie_values = NULL;
-			}
-			tp->rx_opt.cookie_in_always = 0; /* false */
-			tp->rx_opt.cookie_out_never = 1; /* true */
-			release_sock(sk);
-			return err;
-		}
-
-		/* Allocate ancillary memory before locking.
-		 */
-		if (ctd.tcpct_used > 0 ||
-		    (tp->cookie_values == NULL &&
-		     (sysctl_tcp_cookie_size > 0 ||
-		      ctd.tcpct_cookie_desired > 0 ||
-		      ctd.tcpct_s_data_desired > 0))) {
-			cvp = kzalloc(sizeof(*cvp) + ctd.tcpct_used,
-				      GFP_KERNEL);
-			if (cvp == NULL)
-				return -ENOMEM;
-
-			kref_init(&cvp->kref);
-		}
-		lock_sock(sk);
-		tp->rx_opt.cookie_in_always =
-			(TCP_COOKIE_IN_ALWAYS & ctd.tcpct_flags);
-		tp->rx_opt.cookie_out_never = 0; /* false */
-
-		if (tp->cookie_values != NULL) {
-			if (cvp != NULL) {
-				/* Changed values are recorded by a changed
-				 * pointer, ensuring the cookie will differ,
-				 * without separately hashing each value later.
-				 */
-				kref_put(&tp->cookie_values->kref,
-					 tcp_cookie_values_release);
-			} else {
-				cvp = tp->cookie_values;
-			}
-		}
-
-		if (cvp != NULL) {
-			cvp->cookie_desired = ctd.tcpct_cookie_desired;
-
-			if (ctd.tcpct_used > 0) {
-				memcpy(cvp->s_data_payload, ctd.tcpct_value,
-				       ctd.tcpct_used);
-				cvp->s_data_desired = ctd.tcpct_used;
-				cvp->s_data_constant = 1; /* true */
-			} else {
-				/* No constant payload data. */
-				cvp->s_data_desired = ctd.tcpct_s_data_desired;
-				cvp->s_data_constant = 0; /* false */
-			}
-
-			tp->cookie_values = cvp;
-		}
-		release_sock(sk);
-		return err;
-	}
 	default:
 		/* fallthru */
 		break;
@@ -2902,41 +2807,6 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EFAULT;
 		return 0;
 
-	case TCP_COOKIE_TRANSACTIONS: {
-		struct tcp_cookie_transactions ctd;
-		struct tcp_cookie_values *cvp = tp->cookie_values;
-
-		if (get_user(len, optlen))
-			return -EFAULT;
-		if (len < sizeof(ctd))
-			return -EINVAL;
-
-		memset(&ctd, 0, sizeof(ctd));
-		ctd.tcpct_flags = (tp->rx_opt.cookie_in_always ?
-				   TCP_COOKIE_IN_ALWAYS : 0)
-				| (tp->rx_opt.cookie_out_never ?
-				   TCP_COOKIE_OUT_NEVER : 0);
-
-		if (cvp != NULL) {
-			ctd.tcpct_flags |= (cvp->s_data_in ?
-					    TCP_S_DATA_IN : 0)
-					 | (cvp->s_data_out ?
-					    TCP_S_DATA_OUT : 0);
-
-			ctd.tcpct_cookie_desired = cvp->cookie_desired;
-			ctd.tcpct_s_data_desired = cvp->s_data_desired;
-
-			memcpy(&ctd.tcpct_value[0], &cvp->cookie_pair[0],
-			       cvp->cookie_pair_size);
-			ctd.tcpct_used = cvp->cookie_pair_size;
-		}
-
-		if (put_user(sizeof(ctd), optlen))
-			return -EFAULT;
-		if (copy_to_user(optval, &ctd, sizeof(ctd)))
-			return -EFAULT;
-		return 0;
-	}
 	case TCP_THIN_LINEAR_TIMEOUTS:
 		val = tp->thin_lto;
 		break;
@@ -3409,134 +3279,6 @@ EXPORT_SYMBOL(tcp_md5_hash_key);
 
 #endif
 
-/* Each Responder maintains up to two secret values concurrently for
- * efficient secret rollover.  Each secret value has 4 states:
- *
- * Generating.  (tcp_secret_generating != tcp_secret_primary)
- *    Generates new Responder-Cookies, but not yet used for primary
- *    verification.  This is a short-term state, typically lasting only
- *    one round trip time (RTT).
- *
- * Primary.  (tcp_secret_generating == tcp_secret_primary)
- *    Used both for generation and primary verification.
- *
- * Retiring.  (tcp_secret_retiring != tcp_secret_secondary)
- *    Used for verification, until the first failure that can be
- *    verified by the newer Generating secret.  At that time, this
- *    cookie's state is changed to Secondary, and the Generating
- *    cookie's state is changed to Primary.  This is a short-term state,
- *    typically lasting only one round trip time (RTT).
- *
- * Secondary.  (tcp_secret_retiring == tcp_secret_secondary)
- *    Used for secondary verification, after primary verification
- *    failures.  This state lasts no more than twice the Maximum Segment
- *    Lifetime (2MSL).  Then, the secret is discarded.
- */
-struct tcp_cookie_secret {
-	/* The secret is divided into two parts.  The digest part is the
-	 * equivalent of previously hashing a secret and saving the state,
-	 * and serves as an initialization vector (IV).  The message part
-	 * serves as the trailing secret.
-	 */
-	u32				secrets[COOKIE_WORKSPACE_WORDS];
-	unsigned long			expires;
-};
-
-#define TCP_SECRET_1MSL (HZ * TCP_PAWS_MSL)
-#define TCP_SECRET_2MSL (HZ * TCP_PAWS_MSL * 2)
-#define TCP_SECRET_LIFE (HZ * 600)
-
-static struct tcp_cookie_secret tcp_secret_one;
-static struct tcp_cookie_secret tcp_secret_two;
-
-/* Essentially a circular list, without dynamic allocation. */
-static struct tcp_cookie_secret *tcp_secret_generating;
-static struct tcp_cookie_secret *tcp_secret_primary;
-static struct tcp_cookie_secret *tcp_secret_retiring;
-static struct tcp_cookie_secret *tcp_secret_secondary;
-
-static DEFINE_SPINLOCK(tcp_secret_locker);
-
-/* Select a pseudo-random word in the cookie workspace.
- */
-static inline u32 tcp_cookie_work(const u32 *ws, const int n)
-{
-	return ws[COOKIE_DIGEST_WORDS + ((COOKIE_MESSAGE_WORDS-1) & ws[n])];
-}
-
-/* Fill bakery[COOKIE_WORKSPACE_WORDS] with generator, updating as needed.
- * Called in softirq context.
- * Returns: 0 for success.
- */
-int tcp_cookie_generator(u32 *bakery)
-{
-	unsigned long jiffy = jiffies;
-
-	if (unlikely(time_after_eq(jiffy, tcp_secret_generating->expires))) {
-		spin_lock_bh(&tcp_secret_locker);
-		if (!time_after_eq(jiffy, tcp_secret_generating->expires)) {
-			/* refreshed by another */
-			memcpy(bakery,
-			       &tcp_secret_generating->secrets[0],
-			       COOKIE_WORKSPACE_WORDS);
-		} else {
-			/* still needs refreshing */
-			get_random_bytes(bakery, COOKIE_WORKSPACE_WORDS);
-
-			/* The first time, paranoia assumes that the
-			 * randomization function isn't as strong.  But,
-			 * this secret initialization is delayed until
-			 * the last possible moment (packet arrival).
-			 * Although that time is observable, it is
-			 * unpredictably variable.  Mash in the most
-			 * volatile clock bits available, and expire the
-			 * secret extra quickly.
-			 */
-			if (unlikely(tcp_secret_primary->expires ==
-				     tcp_secret_secondary->expires)) {
-				struct timespec tv;
-
-				getnstimeofday(&tv);
-				bakery[COOKIE_DIGEST_WORDS+0] ^=
-					(u32)tv.tv_nsec;
-
-				tcp_secret_secondary->expires = jiffy
-					+ TCP_SECRET_1MSL
-					+ (0x0f & tcp_cookie_work(bakery, 0));
-			} else {
-				tcp_secret_secondary->expires = jiffy
-					+ TCP_SECRET_LIFE
-					+ (0xff & tcp_cookie_work(bakery, 1));
-				tcp_secret_primary->expires = jiffy
-					+ TCP_SECRET_2MSL
-					+ (0x1f & tcp_cookie_work(bakery, 2));
-			}
-			memcpy(&tcp_secret_secondary->secrets[0],
-			       bakery, COOKIE_WORKSPACE_WORDS);
-
-			rcu_assign_pointer(tcp_secret_generating,
-					   tcp_secret_secondary);
-			rcu_assign_pointer(tcp_secret_retiring,
-					   tcp_secret_primary);
-			/*
-			 * Neither call_rcu() nor synchronize_rcu() needed.
-			 * Retiring data is not freed.  It is replaced after
-			 * further (locked) pointer updates, and a quiet time
-			 * (minimum 1MSL, maximum LIFE - 2MSL).
-			 */
-		}
-		spin_unlock_bh(&tcp_secret_locker);
-	} else {
-		rcu_read_lock_bh();
-		memcpy(bakery,
-		       &rcu_dereference(tcp_secret_generating)->secrets[0],
-		       COOKIE_WORKSPACE_WORDS);
-		rcu_read_unlock_bh();
-	}
-	return 0;
-}
-EXPORT_SYMBOL(tcp_cookie_generator);
-
 void tcp_done(struct sock *sk)
 {
 	struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
@@ -3591,7 +3333,6 @@ void __init tcp_init(void)
 	unsigned long limit;
 	int max_rshare, max_wshare, cnt;
 	unsigned int i;
-	unsigned long jiffy = jiffies;
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
@@ -3667,13 +3408,5 @@ void __init tcp_init(void)
 
 	tcp_register_congestion_control(&tcp_reno);
 
-	memset(&tcp_secret_one.secrets[0], 0, sizeof(tcp_secret_one.secrets));
-	memset(&tcp_secret_two.secrets[0], 0, sizeof(tcp_secret_two.secrets));
-	tcp_secret_one.expires = jiffy; /* past due */
-	tcp_secret_two.expires = jiffy; /* past due */
-	tcp_secret_generating = &tcp_secret_one;
-	tcp_secret_primary = &tcp_secret_one;
-	tcp_secret_retiring = &tcp_secret_two;
-	tcp_secret_secondary = &tcp_secret_two;
 	tcp_tasklet_init();
 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 836d74dd0187..19f0149fb6a2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3760,8 +3760,8 @@ old_ack:
  * But, this can also be called on packets in the established flow when
  * the fast version below fails.
  */
-void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx,
-		       const u8 **hvpp, int estab,
+void tcp_parse_options(const struct sk_buff *skb,
+		       struct tcp_options_received *opt_rx, int estab,
 		       struct tcp_fastopen_cookie *foc)
 {
 	const unsigned char *ptr;
@@ -3845,31 +3845,6 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 				 */
 				break;
 #endif
-			case TCPOPT_COOKIE:
-				/* This option is variable length.
-				 */
-				switch (opsize) {
-				case TCPOLEN_COOKIE_BASE:
-					/* not yet implemented */
-					break;
-				case TCPOLEN_COOKIE_PAIR:
-					/* not yet implemented */
-					break;
-				case TCPOLEN_COOKIE_MIN+0:
-				case TCPOLEN_COOKIE_MIN+2:
-				case TCPOLEN_COOKIE_MIN+4:
-				case TCPOLEN_COOKIE_MIN+6:
-				case TCPOLEN_COOKIE_MAX:
-					/* 16-bit multiple */
-					opt_rx->cookie_plus = opsize;
-					*hvpp = ptr;
-					break;
-				default:
-					/* ignore option */
-					break;
-				}
-				break;
-
 			case TCPOPT_EXP:
 				/* Fast Open option shares code 254 using a
 				 * 16 bits magic number. It's valid only in
@@ -3915,8 +3890,7 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
  * If it is wrong it falls back on tcp_parse_options().
  */
 static bool tcp_fast_parse_options(const struct sk_buff *skb,
-				   const struct tcphdr *th,
-				   struct tcp_sock *tp, const u8 **hvpp)
+				   const struct tcphdr *th, struct tcp_sock *tp)
 {
 	/* In the spirit of fast parsing, compare doff directly to constant
 	 * values.  Because equality is used, short doff can be ignored here.
@@ -3930,7 +3904,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
 			return true;
 	}
 
-	tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL);
+	tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
 	if (tp->rx_opt.saw_tstamp)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
@@ -5311,12 +5285,10 @@ out:
 static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 				  const struct tcphdr *th, int syn_inerr)
 {
-	const u8 *hash_location;
 	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* RFC1323: H1. Apply PAWS check first. */
-	if (tcp_fast_parse_options(skb, th, tp, &hash_location) &&
-	    tp->rx_opt.saw_tstamp &&
+	if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
@@ -5670,12 +5642,11 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 
 	if (mss == tp->rx_opt.user_mss) {
 		struct tcp_options_received opt;
-		const u8 *hash_location;
 
 		/* Get original SYNACK MSS value if user MSS sets mss_clamp */
 		tcp_clear_options(&opt);
 		opt.user_mss = opt.mss_clamp = 0;
-		tcp_parse_options(synack, &opt, &hash_location, 0, NULL);
+		tcp_parse_options(synack, &opt, 0, NULL);
 		mss = opt.mss_clamp;
 	}
 
@@ -5706,14 +5677,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 const struct tcphdr *th, unsigned int len)
 {
-	const u8 *hash_location;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct tcp_fastopen_cookie foc = { .len = -1 };
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
-	tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, &foc);
+	tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
 	if (tp->rx_opt.saw_tstamp)
 		tp->rx_opt.rcv_tsecr -= tp->tsoffset;
 
@@ -5810,30 +5779,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		 * is initialized. */
 		tp->copied_seq = tp->rcv_nxt;
 
-		if (cvp != NULL &&
-		    cvp->cookie_pair_size > 0 &&
-		    tp->rx_opt.cookie_plus > 0) {
-			int cookie_size = tp->rx_opt.cookie_plus
-					- TCPOLEN_COOKIE_BASE;
-			int cookie_pair_size = cookie_size
-					     + cvp->cookie_desired;
-
-			/* A cookie extension option was sent and returned.
-			 * Note that each incoming SYNACK replaces the
-			 * Responder cookie.  The initial exchange is most
-			 * fragile, as protection against spoofing relies
-			 * entirely upon the sequence and timestamp (above).
-			 * This replacement strategy allows the correct pair to
-			 * pass through, while any others will be filtered via
-			 * Responder verification later.
-			 */
-			if (sizeof(cvp->cookie_pair) >= cookie_pair_size) {
-				memcpy(&cvp->cookie_pair[cvp->cookie_desired],
-				       hash_location, cookie_size);
-				cvp->cookie_pair_size = cookie_pair_size;
-			}
-		}
-
 		smp_mb();
 
 		tcp_finish_connect(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index b7ab868c8284..b27c758ca23f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -838,7 +838,6 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  */
 static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct request_sock *req,
-			      struct request_values *rvp,
 			      u16 queue_mapping,
 			      bool nocache)
 {
@@ -851,7 +850,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
 		return -1;
 
-	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
@@ -868,10 +867,9 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	return err;
 }
 
-static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
-			     struct request_values *rvp)
+static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
 {
-	int res = tcp_v4_send_synack(sk, NULL, req, rvp, 0, false);
+	int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
 
 	if (!res)
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
@@ -1371,8 +1369,7 @@ static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
 static int tcp_v4_conn_req_fastopen(struct sock *sk,
 				    struct sk_buff *skb,
 				    struct sk_buff *skb_synack,
-				    struct request_sock *req,
-				    struct request_values *rvp)
+				    struct request_sock *req)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
@@ -1467,9 +1464,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk,
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet_request_sock *ireq;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -1519,42 +1514,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
 	tmp_opt.user_mss  = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
-	    want_cookie ? NULL : &foc);
-
-	if (tmp_opt.cookie_plus > 0 &&
-	    tmp_opt.saw_tstamp &&
-	    !tp->rx_opt.cookie_out_never &&
-	    (sysctl_tcp_cookie_size > 0 ||
-	     (tp->cookie_values != NULL &&
-	      tp->cookie_values->cookie_desired > 0))) {
-		u8 *c;
-		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
-		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
-		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
-			goto drop_and_release;
-
-		/* Secret recipe starts with IP addresses */
-		*mess++ ^= (__force u32)daddr;
-		*mess++ ^= (__force u32)saddr;
-
-		/* plus variable length Initiator Cookie */
-		c = (u8 *)mess;
-		while (l-- > 0)
-			*c++ ^= *hash_location++;
-
-		want_cookie = false;	/* not our kind of cookie */
-		tmp_ext.cookie_out_never = 0; /* false */
-		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
-	} else if (!tp->rx_opt.cookie_in_always) {
-		/* redundant indications, but ensure initialization. */
-		tmp_ext.cookie_out_never = 1; /* true */
-		tmp_ext.cookie_plus = 0;
-	} else {
-		goto drop_and_release;
-	}
-	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1636,7 +1596,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	 * of tcp_v4_send_synack()->tcp_select_initial_window().
 	 */
 	skb_synack = tcp_make_synack(sk, dst, req,
-	    (struct request_values *)&tmp_ext,
 	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
 
 	if (skb_synack) {
@@ -1660,8 +1619,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 		if (fastopen_cookie_present(&foc) && foc.len != 0)
 			NET_INC_STATS_BH(sock_net(sk),
 			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
-	    (struct request_values *)&tmp_ext))
+	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req))
 		goto drop_and_free;
 
 	return 0;
@@ -2241,12 +2199,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	if (inet_csk(sk)->icsk_bind_hash)
 		inet_put_port(sk);
 
-	/* TCP Cookie Transactions */
-	if (tp->cookie_values != NULL) {
-		kref_put(&tp->cookie_values->kref,
-			 tcp_cookie_values_release);
-		tp->cookie_values = NULL;
-	}
 	BUG_ON(tp->fastopen_rsk != NULL);
 
 	/* If socket is aborted during connect operation */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4bdb09fca401..8f0234f8bb95 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -93,13 +93,12 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 			   const struct tcphdr *th)
 {
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
 	bool paws_reject = false;
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) {
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+		tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.rcv_tsecr	-= tcptw->tw_ts_offset;
@@ -388,32 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		struct tcp_request_sock *treq = tcp_rsk(req);
 		struct inet_connection_sock *newicsk = inet_csk(newsk);
 		struct tcp_sock *newtp = tcp_sk(newsk);
-		struct tcp_sock *oldtp = tcp_sk(sk);
-		struct tcp_cookie_values *oldcvp = oldtp->cookie_values;
-
-		/* TCP Cookie Transactions require space for the cookie pair,
-		 * as it differs for each connection.  There is no need to
-		 * copy any s_data_payload stored at the original socket.
-		 * Failure will prevent resuming the connection.
-		 *
-		 * Presumed copied, in order of appearance:
-		 *	cookie_in_always, cookie_out_never
-		 */
-		if (oldcvp != NULL) {
-			struct tcp_cookie_values *newcvp =
-				kzalloc(sizeof(*newtp->cookie_values),
-					GFP_ATOMIC);
-
-			if (newcvp != NULL) {
-				kref_init(&newcvp->kref);
-				newcvp->cookie_desired =
-						oldcvp->cookie_desired;
-				newtp->cookie_values = newcvp;
-			} else {
-				/* Not Yet Implemented */
-				newtp->cookie_values = NULL;
-			}
-		}
 
 		/* Now setup tcp_sock */
 		newtp->pred_flags = 0;
@@ -422,8 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->rcv_nxt = treq->rcv_isn + 1;
 
 		newtp->snd_sml = newtp->snd_una =
-		newtp->snd_nxt = newtp->snd_up =
-			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
+		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
 
 		tcp_prequeue_init(newtp);
 		INIT_LIST_HEAD(&newtp->tsq_node);
@@ -460,8 +432,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		skb_queue_head_init(&newtp->out_of_order_queue);
-		newtp->write_seq = newtp->pushed_seq =
-			treq->snt_isn + 1 + tcp_s_data_size(oldtp);
+		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
 
 		newtp->rx_opt.saw_tstamp = 0;
 
@@ -538,7 +509,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 			   bool fastopen)
 {
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct sock *child;
 	const struct tcphdr *th = tcp_hdr(skb);
 	__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
@@ -548,7 +518,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
 	tmp_opt.saw_tstamp = 0;
 	if (th->doff > (sizeof(struct tcphdr)>>2)) {
-		tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
+		tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 		if (tmp_opt.saw_tstamp) {
 			tmp_opt.ts_recent = req->ts_recent;
@@ -648,7 +618,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	 */
 	if ((flg & TCP_FLAG_ACK) && !fastopen &&
 	    (TCP_SKB_CB(skb)->ack_seq !=
-	     tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk))))
+	     tcp_rsk(req)->snt_isn + 1))
 		return sk;
 
 	/* Also, it would be not so bad idea to check rcv_tsecr, which
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e7742f0b5d2..ac5871ebe086 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,9 +65,6 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
-int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
-EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
-
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			   int push_one, gfp_t gfp);
 
@@ -386,7 +383,6 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
 #define OPTION_TS		(1 << 1)
 #define OPTION_MD5		(1 << 2)
 #define OPTION_WSCALE		(1 << 3)
-#define OPTION_COOKIE_EXTENSION	(1 << 4)
 #define OPTION_FAST_OPEN_COOKIE	(1 << 8)
 
 struct tcp_out_options {
@@ -400,36 +396,6 @@ struct tcp_out_options {
 	struct tcp_fastopen_cookie *fastopen_cookie;	/* Fast open cookie */
 };
 
-/* The sysctl int routines are generic, so check consistency here.
- */
-static u8 tcp_cookie_size_check(u8 desired)
-{
-	int cookie_size;
-
-	if (desired > 0)
-		/* previously specified */
-		return desired;
-
-	cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
-	if (cookie_size <= 0)
-		/* no default specified */
-		return 0;
-
-	if (cookie_size <= TCP_COOKIE_MIN)
-		/* value too small, specify minimum */
-		return TCP_COOKIE_MIN;
-
-	if (cookie_size >= TCP_COOKIE_MAX)
-		/* value too large, specify maximum */
-		return TCP_COOKIE_MAX;
-
-	if (cookie_size & 1)
-		/* 8-bit multiple, illegal, fix it */
-		cookie_size++;
-
-	return (u8)cookie_size;
-}
-
 /* Write previously computed TCP options to the packet.
  *
  * Beware: Something in the Internet is very sensitive to the ordering of
@@ -448,27 +414,9 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 {
 	u16 options = opts->options;	/* mungable copy */
 
-	/* Having both authentication and cookies for security is redundant,
-	 * and there's certainly not enough room.  Instead, the cookie-less
-	 * extension variant is proposed.
-	 *
-	 * Consider the pessimal case with authentication.  The options
-	 * could look like:
-	 *   COOKIE|MD5(20) + MSS(4) + SACK|TS(12) + WSCALE(4) == 40
-	 */
 	if (unlikely(OPTION_MD5 & options)) {
-		if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
-			*ptr++ = htonl((TCPOPT_COOKIE << 24) |
-				       (TCPOLEN_COOKIE_BASE << 16) |
-				       (TCPOPT_MD5SIG << 8) |
-				       TCPOLEN_MD5SIG);
-		} else {
-			*ptr++ = htonl((TCPOPT_NOP << 24) |
-				       (TCPOPT_NOP << 16) |
-				       (TCPOPT_MD5SIG << 8) |
-				       TCPOLEN_MD5SIG);
-		}
-		options &= ~OPTION_COOKIE_EXTENSION;
+		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
+			       (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
 		/* overload cookie hash location */
 		opts->hash_location = (__u8 *)ptr;
 		ptr += 4;
@@ -497,44 +445,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
 		*ptr++ = htonl(opts->tsecr);
 	}
 
-	/* Specification requires after timestamp, so do it now.
-	 *
-	 * Consider the pessimal case without authentication.  The options
-	 * could look like:
-	 *   MSS(4) + SACK|TS(12) + COOKIE(20) + WSCALE(4) == 40
-	 */
-	if (unlikely(OPTION_COOKIE_EXTENSION & options)) {
-		__u8 *cookie_copy = opts->hash_location;
-		u8 cookie_size = opts->hash_size;
-
-		/* 8-bit multiple handled in tcp_cookie_size_check() above,
-		 * and elsewhere.
-		 */
-		if (0x2 & cookie_size) {
-			__u8 *p = (__u8 *)ptr;
-
-			/* 16-bit multiple */
-			*p++ = TCPOPT_COOKIE;
-			*p++ = TCPOLEN_COOKIE_BASE + cookie_size;
-			*p++ = *cookie_copy++;
-			*p++ = *cookie_copy++;
-			ptr++;
-			cookie_size -= 2;
-		} else {
-			/* 32-bit multiple */
-			*ptr++ = htonl(((TCPOPT_NOP << 24) |
-					(TCPOPT_NOP << 16) |
-					(TCPOPT_COOKIE << 8) |
-					TCPOLEN_COOKIE_BASE) +
-				       cookie_size);
-		}
-
-		if (cookie_size > 0) {
-			memcpy(ptr, cookie_copy, cookie_size);
-			ptr += (cookie_size / 4);
-		}
-	}
-
 	if (unlikely(OPTION_SACK_ADVERTISE & options)) {
 		*ptr++ = htonl((TCPOPT_NOP << 24) |
 			       (TCPOPT_NOP << 16) |
@@ -593,11 +503,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 				struct tcp_md5sig_key **md5)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	struct tcp_cookie_values *cvp = tp->cookie_values;
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
-	u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
-			 tcp_cookie_size_check(cvp->cookie_desired) :
-			 0;
 	struct tcp_fastopen_request *fastopen = tp->fastopen_req;
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -649,52 +555,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
 			tp->syn_fastopen = 1;
 		}
 	}
-	/* Note that timestamps are required by the specification.
-	 *
-	 * Odd numbers of bytes are prohibited by the specification, ensuring
-	 * that the cookie is 16-bit aligned, and the resulting cookie pair is
-	 * 32-bit aligned.
-	 */
-	if (*md5 == NULL &&
-	    (OPTION_TS & opts->options) &&
-	    cookie_size > 0) {
-		int need = TCPOLEN_COOKIE_BASE + cookie_size;
-
-		if (0x2 & need) {
-			/* 32-bit multiple */
-			need += 2; /* NOPs */
-
-			if (need > remaining) {
-				/* try shrinking cookie to fit */
-				cookie_size -= 2;
-				need -= 4;
-			}
-		}
-		while (need > remaining && TCP_COOKIE_MIN <= cookie_size) {
-			cookie_size -= 4;
-			need -= 4;
-		}
-		if (TCP_COOKIE_MIN <= cookie_size) {
-			opts->options |= OPTION_COOKIE_EXTENSION;
-			opts->hash_location = (__u8 *)&cvp->cookie_pair[0];
-			opts->hash_size = cookie_size;
-
-			/* Remember for future incarnations. */
-			cvp->cookie_desired = cookie_size;
-
-			if (cvp->cookie_desired != cvp->cookie_pair_size) {
-				/* Currently use random bytes as a nonce,
-				 * assuming these are completely unpredictable
-				 * by hostile users of the same system.
-				 */
-				get_random_bytes(&cvp->cookie_pair[0],
-						 cookie_size);
-				cvp->cookie_pair_size = cookie_size;
-			}
 
-			remaining -= need;
-		}
-	}
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -704,14 +565,10 @@ static unsigned int tcp_synack_options(struct sock *sk,
 				   unsigned int mss, struct sk_buff *skb,
 				   struct tcp_out_options *opts,
 				   struct tcp_md5sig_key **md5,
-				   struct tcp_extend_values *xvp,
 				   struct tcp_fastopen_cookie *foc)
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	unsigned int remaining = MAX_TCP_OPTION_SPACE;
-	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
-			 xvp->cookie_plus :
-			 0;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -759,28 +616,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
 			remaining -= need;
 		}
 	}
-	/* Similar rationale to tcp_syn_options() applies here, too.
-	 * If the <SYN> options fit, the same options should fit now!
-	 */
-	if (*md5 == NULL &&
-	    ireq->tstamp_ok &&
-	    cookie_plus > TCPOLEN_COOKIE_BASE) {
-		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
-
-		if (0x2 & need) {
-			/* 32-bit multiple */
-			need += 2; /* NOPs */
-		}
-		if (need <= remaining) {
-			opts->options |= OPTION_COOKIE_EXTENSION;
-			opts->hash_size = cookie_plus - TCPOLEN_COOKIE_BASE;
-			remaining -= need;
-		} else {
-			/* There's no error return, so flag it. */
-			xvp->cookie_out_never = 1; /* true */
-			opts->hash_size = 0;
-		}
-	}
+
 	return MAX_TCP_OPTION_SPACE - remaining;
 }
 
@@ -2802,32 +2638,24 @@ int tcp_send_synack(struct sock *sk)
  * sk: listener socket
  * dst: dst entry attached to the SYNACK
  * req: request_sock pointer
- * rvp: request_values pointer
  *
  * Allocate one skb and build a SYNACK packet.
  * @dst is consumed : Caller should not use it again.
  */
 struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 				struct request_sock *req,
-				struct request_values *rvp,
 				struct tcp_fastopen_cookie *foc)
 {
 	struct tcp_out_options opts;
-	struct tcp_extend_values *xvp = tcp_xv(rvp);
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct tcp_sock *tp = tcp_sk(sk);
-	const struct tcp_cookie_values *cvp = tp->cookie_values;
 	struct tcphdr *th;
 	struct sk_buff *skb;
 	struct tcp_md5sig_key *md5;
 	int tcp_header_size;
 	int mss;
-	int s_data_desired = 0;
 
-	if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
-		s_data_desired = cvp->s_data_desired;
-	skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
-			sk_gfp_atomic(sk, GFP_ATOMIC));
+	skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
 	if (unlikely(!skb)) {
 		dst_release(dst);
 		return NULL;
@@ -2869,9 +2697,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	else
 #endif
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
-	tcp_header_size = tcp_synack_options(sk, req, mss,
-					     skb, &opts, &md5, xvp, foc)
-			+ sizeof(*th);
+	tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5,
+					     foc) + sizeof(*th);
 
 	skb_push(skb, tcp_header_size);
 	skb_reset_transport_header(skb);
@@ -2889,40 +2716,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
 			     TCPHDR_SYN | TCPHDR_ACK);
 
-	if (OPTION_COOKIE_EXTENSION & opts.options) {
-		if (s_data_desired) {
-			u8 *buf = skb_put(skb, s_data_desired);
-
-			/* copy data directly from the listening socket. */
-			memcpy(buf, cvp->s_data_payload, s_data_desired);
-			TCP_SKB_CB(skb)->end_seq += s_data_desired;
-		}
-
-		if (opts.hash_size > 0) {
-			__u32 workspace[SHA_WORKSPACE_WORDS];
-			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
-			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
-
-			/* Secret recipe depends on the Timestamp, (future)
-			 * Sequence and Acknowledgment Numbers, Initiator
-			 * Cookie, and others handled by IP variant caller.
-			 */
-			*tail-- ^= opts.tsval;
-			*tail-- ^= tcp_rsk(req)->rcv_isn + 1;
-			*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
-
-			/* recommended */
-			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
-			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
-
-			sha_transform((__u32 *)&xvp->cookie_bakery[0],
-				      (char *)mess,
-				      &workspace[0]);
-			opts.hash_location =
-				(__u8 *)&xvp->cookie_bakery[0];
-		}
-	}
-
 	th->seq = htonl(TCP_SKB_CB(skb)->seq);
 	/* XXX data is queued and acked as is. No buffer/window check */
 	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8a0848b60b35..d5dda20bd717 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -149,7 +149,6 @@ static inline int cookie_check(const struct sk_buff *skb, __u32 cookie)
 struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_options_received tcp_opt;
-	const u8 *hash_location;
 	struct inet_request_sock *ireq;
 	struct inet6_request_sock *ireq6;
 	struct tcp_request_sock *treq;
@@ -177,7 +176,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 
 	/* check for timestamp cookie support */
 	memset(&tcp_opt, 0, sizeof(tcp_opt));
-	tcp_parse_options(skb, &tcp_opt, &hash_location, 0, NULL);
+	tcp_parse_options(skb, &tcp_opt, 0, NULL);
 
 	if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
 		goto out;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9b6460055df5..0a97add2ab74 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -454,7 +454,6 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			      struct flowi6 *fl6,
 			      struct request_sock *req,
-			      struct request_values *rvp,
 			      u16 queue_mapping)
 {
 	struct inet6_request_sock *treq = inet6_rsk(req);
@@ -466,7 +465,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
 		goto done;
 
-	skb = tcp_make_synack(sk, dst, req, rvp, NULL);
+	skb = tcp_make_synack(sk, dst, req, NULL);
 
 	if (skb) {
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
@@ -481,13 +480,12 @@ done:
 	return err;
 }
 
-static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
-			     struct request_values *rvp)
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)
 {
 	struct flowi6 fl6;
 	int res;
 
-	res = tcp_v6_send_synack(sk, NULL, &fl6, req, rvp, 0);
+	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0);
 	if (!res)
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
 	return res;
@@ -940,9 +938,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp_extend_values tmp_ext;
 	struct tcp_options_received tmp_opt;
-	const u8 *hash_location;
 	struct request_sock *req;
 	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -980,50 +976,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_clear_options(&tmp_opt);
 	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 	tmp_opt.user_mss = tp->rx_opt.user_mss;
-	tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL);
-
-	if (tmp_opt.cookie_plus > 0 &&
-	    tmp_opt.saw_tstamp &&
-	    !tp->rx_opt.cookie_out_never &&
-	    (sysctl_tcp_cookie_size > 0 ||
-	     (tp->cookie_values != NULL &&
-	      tp->cookie_values->cookie_desired > 0))) {
-		u8 *c;
-		u32 *d;
-		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
-		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
-
-		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
-			goto drop_and_free;
-
-		/* Secret recipe starts with IP addresses */
-		d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-		*mess++ ^= *d++;
-
-		/* plus variable length Initiator Cookie */
-		c = (u8 *)mess;
-		while (l-- > 0)
-			*c++ ^= *hash_location++;
-
-		want_cookie = false;	/* not our kind of cookie */
-		tmp_ext.cookie_out_never = 0; /* false */
-		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
-	} else if (!tp->rx_opt.cookie_in_always) {
-		/* redundant indications, but ensure initialization. */
-		tmp_ext.cookie_out_never = 1; /* true */
-		tmp_ext.cookie_plus = 0;
-	} else {
-		goto drop_and_free;
-	}
-	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
+	tcp_parse_options(skb, &tmp_opt, 0, NULL);
 
 	if (want_cookie && !tmp_opt.saw_tstamp)
 		tcp_clear_options(&tmp_opt);
@@ -1101,7 +1054,6 @@ have_isn:
 		goto drop_and_release;
 
 	if (tcp_v6_send_synack(sk, dst, &fl6, req,
-			       (struct request_values *)&tmp_ext,
 			       skb_get_queue_mapping(skb)) ||
 	    want_cookie)
 		goto drop_and_free;
-- 
cgit 


From 77f65ebdca506870d99bfabe52bde222511022ec Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 19 Mar 2013 10:18:11 +0000
Subject: packet: packet fanout rollover during socket overload

Changes:
  v3->v2: rebase (no other changes)
          passes selftest
  v2->v1: read f->num_members only once
          fix bug: test rollover mode + flag

Minimize packet drop in a fanout group. If one socket is full,
roll over packets to another from the group. Maintain flow
affinity during normal load using an rxhash fanout policy, while
dispersing unexpected traffic storms that hit a single cpu, such
as spoofed-source DoS flows. Rollover breaks affinity for flows
arriving at saturated sockets during those conditions.

The patch adds a fanout policy ROLLOVER that rotates between sockets,
filling each socket before moving to the next. It also adds a fanout
flag ROLLOVER. If passed along with any other fanout policy, the
primary policy is applied until the chosen socket is full. Then,
rollover selects another socket, to delay packet drop until the
entire system is saturated.

Probing sockets is not free. Selecting the last used socket, as
rollover does, is a greedy approach that maximizes chance of
success, at the cost of extreme load imbalance. In practice, with
sufficiently long queues to absorb bursts, sockets are drained in
parallel and load balance looks uniform in `top`.

To avoid contention, scales counters with number of sockets and
accesses them lockfree. Values are bounds checked to ensure
correctness.

Tested using an application with 9 threads pinned to CPUs, one socket
per thread and sufficient busywork per packet operation to limits each
thread to handling 32 Kpps. When sent 500 Kpps single UDP stream
packets, a FANOUT_CPU setup processes 32 Kpps in total without this
patch, 270 Kpps with the patch. Tested with read() and with a packet
ring (V1).

Also, passes psock_fanout.c unit test added to selftests.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_packet.h                     |   2 +
 net/packet/af_packet.c                             | 109 +++++--
 net/packet/internal.h                              |   3 +-
 tools/testing/selftests/Makefile                   |   1 +
 tools/testing/selftests/net-afpacket/Makefile      |  18 ++
 .../testing/selftests/net-afpacket/psock_fanout.c  | 326 +++++++++++++++++++++
 .../selftests/net-afpacket/run_afpackettests       |  16 +
 7 files changed, 451 insertions(+), 24 deletions(-)
 create mode 100644 tools/testing/selftests/net-afpacket/Makefile
 create mode 100644 tools/testing/selftests/net-afpacket/psock_fanout.c
 create mode 100644 tools/testing/selftests/net-afpacket/run_afpackettests

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index f9a60375f0d0..8136658ea477 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -55,6 +55,8 @@ struct sockaddr_ll {
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
 #define PACKET_FANOUT_CPU		2
+#define PACKET_FANOUT_ROLLOVER		3
+#define PACKET_FANOUT_FLAG_ROLLOVER	0x1000
 #define PACKET_FANOUT_FLAG_DEFRAG	0x8000
 
 struct tpacket_stats {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1d6793dbfbae..bd0d14c97d41 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -181,6 +181,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 
 struct packet_sock;
 static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
+static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
+		       struct packet_type *pt, struct net_device *orig_dev);
 
 static void *packet_previous_frame(struct packet_sock *po,
 		struct packet_ring_buffer *rb,
@@ -973,11 +975,11 @@ static void *packet_current_rx_frame(struct packet_sock *po,
 
 static void *prb_lookup_block(struct packet_sock *po,
 				     struct packet_ring_buffer *rb,
-				     unsigned int previous,
+				     unsigned int idx,
 				     int status)
 {
 	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
-	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
+	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
 
 	if (status != BLOCK_STATUS(pbd))
 		return NULL;
@@ -1041,6 +1043,29 @@ static void packet_increment_head(struct packet_ring_buffer *buff)
 	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
 }
 
+static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+{
+	struct sock *sk = &po->sk;
+	bool has_room;
+
+	if (po->prot_hook.func != tpacket_rcv)
+		return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
+			<= sk->sk_rcvbuf;
+
+	spin_lock(&sk->sk_receive_queue.lock);
+	if (po->tp_version == TPACKET_V3)
+		has_room = prb_lookup_block(po, &po->rx_ring,
+					    po->rx_ring.prb_bdqc.kactive_blk_num,
+					    TP_STATUS_KERNEL);
+	else
+		has_room = packet_lookup_frame(po, &po->rx_ring,
+					       po->rx_ring.head,
+					       TP_STATUS_KERNEL);
+	spin_unlock(&sk->sk_receive_queue.lock);
+
+	return has_room;
+}
+
 static void packet_sock_destruct(struct sock *sk)
 {
 	skb_queue_purge(&sk->sk_error_queue);
@@ -1066,16 +1091,16 @@ static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
 	return x;
 }
 
-static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_hash(struct packet_fanout *f,
+				      struct sk_buff *skb,
+				      unsigned int num)
 {
-	u32 idx, hash = skb->rxhash;
-
-	idx = ((u64)hash * num) >> 32;
-
-	return f->arr[idx];
+	return (((u64)skb->rxhash) * num) >> 32;
 }
 
-static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_lb(struct packet_fanout *f,
+				    struct sk_buff *skb,
+				    unsigned int num)
 {
 	int cur, old;
 
@@ -1083,14 +1108,40 @@ static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb
 	while ((old = atomic_cmpxchg(&f->rr_cur, cur,
 				     fanout_rr_next(f, num))) != cur)
 		cur = old;
-	return f->arr[cur];
+	return cur;
+}
+
+static unsigned int fanout_demux_cpu(struct packet_fanout *f,
+				     struct sk_buff *skb,
+				     unsigned int num)
+{
+	return smp_processor_id() % num;
 }
 
-static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
+static unsigned int fanout_demux_rollover(struct packet_fanout *f,
+					  struct sk_buff *skb,
+					  unsigned int idx, unsigned int skip,
+					  unsigned int num)
 {
-	unsigned int cpu = smp_processor_id();
+	unsigned int i, j;
 
-	return f->arr[cpu % num];
+	i = j = min_t(int, f->next[idx], num - 1);
+	do {
+		if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
+			if (i != j)
+				f->next[idx] = i;
+			return i;
+		}
+		if (++i == num)
+			i = 0;
+	} while (i != j);
+
+	return idx;
+}
+
+static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
+{
+	return f->flags & (flag >> 8);
 }
 
 static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
@@ -1099,7 +1150,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	struct packet_fanout *f = pt->af_packet_priv;
 	unsigned int num = f->num_members;
 	struct packet_sock *po;
-	struct sock *sk;
+	unsigned int idx;
 
 	if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
 	    !num) {
@@ -1110,23 +1161,31 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	switch (f->type) {
 	case PACKET_FANOUT_HASH:
 	default:
-		if (f->defrag) {
+		if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
 			skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
 			if (!skb)
 				return 0;
 		}
 		skb_get_rxhash(skb);
-		sk = fanout_demux_hash(f, skb, num);
+		idx = fanout_demux_hash(f, skb, num);
 		break;
 	case PACKET_FANOUT_LB:
-		sk = fanout_demux_lb(f, skb, num);
+		idx = fanout_demux_lb(f, skb, num);
 		break;
 	case PACKET_FANOUT_CPU:
-		sk = fanout_demux_cpu(f, skb, num);
+		idx = fanout_demux_cpu(f, skb, num);
+		break;
+	case PACKET_FANOUT_ROLLOVER:
+		idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
 		break;
 	}
 
-	po = pkt_sk(sk);
+	po = pkt_sk(f->arr[idx]);
+	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
+	    unlikely(!packet_rcv_has_room(po, skb))) {
+		idx = fanout_demux_rollover(f, skb, idx, idx, num);
+		po = pkt_sk(f->arr[idx]);
+	}
 
 	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
 }
@@ -1175,10 +1234,13 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 	struct packet_sock *po = pkt_sk(sk);
 	struct packet_fanout *f, *match;
 	u8 type = type_flags & 0xff;
-	u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
+	u8 flags = type_flags >> 8;
 	int err;
 
 	switch (type) {
+	case PACKET_FANOUT_ROLLOVER:
+		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
+			return -EINVAL;
 	case PACKET_FANOUT_HASH:
 	case PACKET_FANOUT_LB:
 	case PACKET_FANOUT_CPU:
@@ -1203,7 +1265,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		}
 	}
 	err = -EINVAL;
-	if (match && match->defrag != defrag)
+	if (match && match->flags != flags)
 		goto out;
 	if (!match) {
 		err = -ENOMEM;
@@ -1213,7 +1275,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		write_pnet(&match->net, sock_net(sk));
 		match->id = id;
 		match->type = type;
-		match->defrag = defrag;
+		match->flags = flags;
 		atomic_set(&match->rr_cur, 0);
 		INIT_LIST_HEAD(&match->list);
 		spin_lock_init(&match->lock);
@@ -3240,7 +3302,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	case PACKET_FANOUT:
 		val = (po->fanout ?
 		       ((u32)po->fanout->id |
-			((u32)po->fanout->type << 16)) :
+			((u32)po->fanout->type << 16) |
+			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
 	case PACKET_TX_HAS_OFF:
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e84cab8cb7a9..e891f025a1b9 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -77,10 +77,11 @@ struct packet_fanout {
 	unsigned int		num_members;
 	u16			id;
 	u8			type;
-	u8			defrag;
+	u8			flags;
 	atomic_t		rr_cur;
 	struct list_head	list;
 	struct sock		*arr[PACKET_FANOUT_MAX];
+	int			next[PACKET_FANOUT_MAX];
 	spinlock_t		lock;
 	atomic_t		sk_ref;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 7c6280f1cf4d..7f50078d0e8c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -6,6 +6,7 @@ TARGETS += cpu-hotplug
 TARGETS += memory-hotplug
 TARGETS += efivarfs
 TARGETS += net-socket
+TARGETS += net-afpacket
 
 all:
 	for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/net-afpacket/Makefile b/tools/testing/selftests/net-afpacket/Makefile
new file mode 100644
index 000000000000..45f2ffb7fda7
--- /dev/null
+++ b/tools/testing/selftests/net-afpacket/Makefile
@@ -0,0 +1,18 @@
+# Makefile for net-socket selftests
+
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -Wall
+
+CFLAGS += -I../../../../usr/include/
+
+AF_PACKET_PROGS = psock_fanout
+
+all: $(AF_PACKET_PROGS)
+%: %.c
+	$(CC) $(CFLAGS) -o $@ $^
+
+run_tests: all
+	@/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
+
+clean:
+	$(RM) $(AF_PACKET_PROGS)
diff --git a/tools/testing/selftests/net-afpacket/psock_fanout.c b/tools/testing/selftests/net-afpacket/psock_fanout.c
new file mode 100644
index 000000000000..09dbf93c53d4
--- /dev/null
+++ b/tools/testing/selftests/net-afpacket/psock_fanout.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn (willemb@google.com)
+ *
+ * A basic test of packet socket fanout behavior.
+ *
+ * Control:
+ * - create fanout fails as expected with illegal flag combinations
+ * - join   fanout fails as expected with diverging types or flags
+ *
+ * Datapath:
+ *   Open a pair of packet sockets and a pair of INET sockets, send a known
+ *   number of packets across the two INET sockets and count the number of
+ *   packets enqueued onto the two packet sockets.
+ *
+ *   The test currently runs for
+ *   - PACKET_FANOUT_HASH
+ *   - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
+ *   - PACKET_FANOUT_ROLLOVER
+ *
+ * Todo:
+ * - datapath: PACKET_FANOUT_LB
+ * - datapath: PACKET_FANOUT_CPU
+ * - functionality: PACKET_FANOUT_FLAG_DEFRAG
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* Hack: build even if local includes are old */
+#ifndef PACKET_FANOUT
+#define PACKET_FANOUT			18
+#define PACKET_FANOUT_HASH		0
+#define PACKET_FANOUT_LB		1
+#define PACKET_FANOUT_CPU		2
+#define PACKET_FANOUT_FLAG_DEFRAG	0x8000
+
+#ifndef PACKET_FANOUT_ROLLOVER
+#define PACKET_FANOUT_ROLLOVER		3
+#endif
+
+#ifndef PACKET_FANOUT_FLAG_ROLLOVER
+#define PACKET_FANOUT_FLAG_ROLLOVER	0x1000
+#endif
+
+#endif
+
+#define DATA_LEN			100
+#define DATA_CHAR			'a'
+
+static void pair_udp_open(int fds[], uint16_t port)
+{
+	struct sockaddr_in saddr, daddr;
+
+	fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
+	fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fds[0] == -1 || fds[1] == -1) {
+		fprintf(stderr, "ERROR: socket dgram\n");
+		exit(1);
+	}
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = htons(port);
+	saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	memset(&daddr, 0, sizeof(daddr));
+	daddr.sin_family = AF_INET;
+	daddr.sin_port = htons(port + 1);
+	daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	/* must bind both to get consistent hash result */
+	if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
+		perror("bind");
+		exit(1);
+	}
+	if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
+		perror("bind");
+		exit(1);
+	}
+	if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
+		perror("bind");
+		exit(1);
+	}
+}
+
+static void pair_udp_send(int fds[], int num)
+{
+	char buf[DATA_LEN], rbuf[DATA_LEN];
+
+	memset(buf, DATA_CHAR, sizeof(buf));
+	while (num--) {
+		/* Should really handle EINTR and EAGAIN */
+		if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
+			fprintf(stderr, "ERROR: send failed left=%d\n", num);
+			exit(1);
+		}
+		if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
+			fprintf(stderr, "ERROR: recv failed left=%d\n", num);
+			exit(1);
+		}
+		if (memcmp(buf, rbuf, sizeof(buf))) {
+			fprintf(stderr, "ERROR: data failed left=%d\n", num);
+			exit(1);
+		}
+	}
+}
+
+static void sock_fanout_setfilter(int fd)
+{
+	struct sock_filter bpf_filter[] = {
+		{ 0x80, 0, 0, 0x00000000 },  /* LD  pktlen		      */
+		{ 0x35, 0, 5, DATA_LEN   },  /* JGE DATA_LEN  [f goto nomatch]*/
+		{ 0x30, 0, 0, 0x00000050 },  /* LD  ip[80]		      */
+		{ 0x15, 0, 3, DATA_CHAR  },  /* JEQ DATA_CHAR [f goto nomatch]*/
+		{ 0x30, 0, 0, 0x00000051 },  /* LD  ip[81]		      */
+		{ 0x15, 0, 1, DATA_CHAR  },  /* JEQ DATA_CHAR [f goto nomatch]*/
+		{ 0x6, 0, 0, 0x00000060  },  /* RET match	              */
+/* nomatch */	{ 0x6, 0, 0, 0x00000000  },  /* RET no match		      */
+	};
+	struct sock_fprog bpf_prog;
+
+	bpf_prog.filter = bpf_filter;
+	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+		       sizeof(bpf_prog))) {
+		perror("setsockopt SO_ATTACH_FILTER");
+		exit(1);
+	}
+}
+
+/* Open a socket in a given fanout mode.
+ * @return -1 if mode is bad, a valid socket otherwise */
+static int sock_fanout_open(uint16_t typeflags, int num_packets)
+{
+	int fd, val;
+
+	fd = socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_IP));
+	if (fd < 0) {
+		perror("socket packet");
+		exit(1);
+	}
+
+	/* fanout group ID is always 0: tests whether old groups are deleted */
+	val = ((int) typeflags) << 16;
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+		if (close(fd)) {
+			perror("close packet");
+			exit(1);
+		}
+		return -1;
+	}
+
+	val = sizeof(struct iphdr) + sizeof(struct udphdr) + DATA_LEN;
+	val *= num_packets;
+	/* hack: apparently, the above calculation is too small (TODO: fix) */
+	val *= 3;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val))) {
+		perror("setsockopt SO_RCVBUF");
+		exit(1);
+	}
+
+	sock_fanout_setfilter(fd);
+	return fd;
+}
+
+static void sock_fanout_read(int fds[], const int expect[])
+{
+	struct tpacket_stats stats;
+	socklen_t ssize;
+	int ret[2];
+
+	ssize = sizeof(stats);
+	if (getsockopt(fds[0], SOL_PACKET, PACKET_STATISTICS, &stats, &ssize)) {
+		perror("getsockopt statistics 0");
+		exit(1);
+	}
+	ret[0] = stats.tp_packets - stats.tp_drops;
+	ssize = sizeof(stats);
+	if (getsockopt(fds[1], SOL_PACKET, PACKET_STATISTICS, &stats, &ssize)) {
+		perror("getsockopt statistics 1");
+		exit(1);
+	}
+	ret[1] = stats.tp_packets - stats.tp_drops;
+
+	fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
+			ret[0], ret[1], expect[0], expect[1]);
+
+	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
+	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
+		fprintf(stderr, "ERROR: incorrect queue lengths\n");
+		exit(1);
+	}
+}
+
+/* Test illegal mode + flag combination */
+static void test_control_single(void)
+{
+	fprintf(stderr, "test: control single socket\n");
+
+	if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
+			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+		fprintf(stderr, "ERROR: opened socket with dual rollover\n");
+		exit(1);
+	}
+}
+
+/* Test illegal group with different modes or flags */
+static void test_control_group(void)
+{
+	int fds[2];
+
+	fprintf(stderr, "test: control multiple sockets\n");
+
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed to open HASH socket\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_HASH |
+			       PACKET_FANOUT_FLAG_DEFRAG, 10) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_HASH |
+			       PACKET_FANOUT_FLAG_ROLLOVER, 10) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_CPU, 10) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong mode\n");
+		exit(1);
+	}
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 20);
+	if (fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+	if (close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
+static void test_datapath(uint16_t typeflags,
+			  const int expect1[], const int expect2[])
+{
+	const int expect0[] = { 0, 0 };
+	int fds[2], fds_udp[2][2];
+
+	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
+
+	fds[0] = sock_fanout_open(typeflags, 20);
+	fds[1] = sock_fanout_open(typeflags, 20);
+	if (fds[0] == -1 || fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed open\n");
+		exit(1);
+	}
+	pair_udp_open(fds_udp[0], 8000);
+	pair_udp_open(fds_udp[1], 8002);
+	sock_fanout_read(fds, expect0);
+
+	/* Send data, but not enough to overflow a queue */
+	pair_udp_send(fds_udp[0], 15);
+	pair_udp_send(fds_udp[1], 5);
+	sock_fanout_read(fds, expect1);
+
+	/* Send more data, overflow the queue */
+	pair_udp_send(fds_udp[0], 15);
+	/* TODO: ensure consistent order between expect1 and expect2 */
+	sock_fanout_read(fds, expect2);
+
+	if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
+	    close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
+	    close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "close datapath\n");
+		exit(1);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	const int expect_hash[2][2]	= { { 15, 5 }, { 5, 0 } };
+	const int expect_hash_rb[2][2]	= { { 15, 5 }, { 5, 10 } };
+	const int expect_rb[2][2]	= { { 20, 0 }, { 0, 15 } };
+
+	test_control_single();
+	test_control_group();
+
+	test_datapath(PACKET_FANOUT_HASH, expect_hash[0], expect_hash[1]);
+	test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
+		      expect_hash_rb[0], expect_hash_rb[1]);
+	test_datapath(PACKET_FANOUT_ROLLOVER, expect_rb[0], expect_rb[1]);
+
+	printf("OK. All tests passed\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net-afpacket/run_afpackettests b/tools/testing/selftests/net-afpacket/run_afpackettests
new file mode 100644
index 000000000000..7907824c6355
--- /dev/null
+++ b/tools/testing/selftests/net-afpacket/run_afpackettests
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+if [ $(id -u) != 0 ]; then
+	echo $msg must be run as root >&2
+	exit 0
+fi
+
+echo "--------------------"
+echo "running psock_fanout test"
+echo "--------------------"
+./psock_fanout
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
-- 
cgit 


From cee7e443344a3845e5b9111614b41e0b1afb60ce Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko.sakkinen@iki.fi>
Date: Tue, 6 Nov 2012 10:17:49 +0200
Subject: smack: SMACK_MAGIC to include/uapi/linux/magic.h

SMACK_MAGIC moved to a proper place for easy user space access
(i.e. libsmack).

Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@iki.fi>
---
 include/uapi/linux/magic.h | 1 +
 security/smack/smack.h     | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 873e086ce3a1..249df3720be2 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -11,6 +11,7 @@
 #define DEBUGFS_MAGIC          0x64626720
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
+#define SMACK_MAGIC		0x43415d53	/* "SMAC" */
 #define RAMFS_MAGIC		0x858458f6	/* some random number */
 #define TMPFS_MAGIC		0x01021994
 #define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 99b36124f712..8ad30955e15d 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -148,11 +148,6 @@ struct smack_known {
 #define SMACK_UNLABELED_SOCKET	0
 #define SMACK_CIPSO_SOCKET	1
 
-/*
- * smackfs magic number
- */
-#define SMACK_MAGIC	0x43415d53 /* "SMAC" */
-
 /*
  * CIPSO defaults.
  */
-- 
cgit 


From 0d2e1a2926b1839a4b74519e660739b2566c9386 Mon Sep 17 00:00:00 2001
From: Erwan Yvin <erwan.yvin@stericsson.com>
Date: Wed, 20 Mar 2013 13:52:24 +1030
Subject: caif_virtio: Introduce caif over virtio

Add the CAIF Virtio shared memory driver for talking
to a modem.

This CAIF Link layer communicates to the modem over
shared memory. It is implemented as a virtio_driver.
The underlying virtio device is managed by the remoteproc
framework. The Virtio queue is used for transmitting data
to the modem, and the new vringh is used for receiving data.

Genalloc is used for managing the shared memory used for TX
data. The default dma-alloc-coherent allocator can only
allocate whole pages, and this wastes too much shared memory.

Flow control is implemented by stopping the TX-queues if the
virtio queues go full or we run out of memory. Queued are
reopened when queues are below the watermark.

NAPI is used in RX path, and a dedicated tasklet is used
for releasing TX buffers.

Signed-off-by: Erwan Yvin <erwan.yvin@stericsson.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (minor fixes)
---
 drivers/net/caif/Kconfig        |  14 +
 drivers/net/caif/Makefile       |   3 +
 drivers/net/caif/caif_virtio.c  | 785 ++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio_caif.h     |  24 ++
 include/uapi/linux/virtio_ids.h |   1 +
 5 files changed, 827 insertions(+)
 create mode 100644 drivers/net/caif/caif_virtio.c
 create mode 100644 include/linux/virtio_caif.h

(limited to 'include/uapi/linux')

diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 60c2142373c9..893f9154011e 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -47,3 +47,17 @@ config CAIF_HSI
        The caif low level driver for CAIF over HSI.
        Be aware that if you enable this then you also need to
        enable a low-level HSI driver.
+
+config CAIF_VIRTIO
+	tristate "CAIF virtio transport driver"
+	depends on CAIF
+	select VHOST_RING
+	select VIRTIO
+	select GENERIC_ALLOCATOR
+	default n
+	---help---
+	The caif driver for CAIF over Virtio.
+
+if CAIF_VIRTIO
+source "drivers/vhost/Kconfig"
+endif
diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile
index 91dff861560f..d9ee26a96c6e 100644
--- a/drivers/net/caif/Makefile
+++ b/drivers/net/caif/Makefile
@@ -13,3 +13,6 @@ obj-$(CONFIG_CAIF_SHM) += caif_shm.o
 
 # HSI interface
 obj-$(CONFIG_CAIF_HSI) += caif_hsi.o
+
+# Virtio interface
+obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o
diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c
new file mode 100644
index 000000000000..b1e1205e4e28
--- /dev/null
+++ b/drivers/net/caif/caif_virtio.c
@@ -0,0 +1,785 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2013
+ * Authors: Vicram Arv / vikram.arv@stericsson.com,
+ *	    Dmitry Tarnyagin / dmitry.tarnyagin@stericsson.com
+ *	    Sjur Brendeland / sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/virtio.h>
+#include <linux/vringh.h>
+#include <linux/debugfs.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_caif.h>
+#include <linux/virtio_ring.h>
+#include <linux/dma-mapping.h>
+#include <net/caif/caif_dev.h>
+#include <linux/virtio_config.h>
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Vicram Arv <vikram.arv@stericsson.com>");
+MODULE_AUTHOR("Sjur Brendeland <sjur.brandeland@stericsson.com>");
+MODULE_DESCRIPTION("Virtio CAIF Driver");
+
+/* NAPI schedule quota */
+#define CFV_DEFAULT_QUOTA 32
+
+/* Defaults used if virtio config space is unavailable */
+#define CFV_DEF_MTU_SIZE 4096
+#define CFV_DEF_HEADROOM 32
+#define CFV_DEF_TAILROOM 32
+
+/* Required IP header alignment */
+#define IP_HDR_ALIGN 4
+
+/* struct cfv_napi_contxt - NAPI context info
+ * @riov: IOV holding data read from the ring. Note that riov may
+ *	  still hold data when cfv_rx_poll() returns.
+ * @head: Last descriptor ID we received from vringh_getdesc_kern.
+ *	  We use this to put descriptor back on the used ring. USHRT_MAX is
+ *	  used to indicate invalid head-id.
+ */
+struct cfv_napi_context {
+	struct vringh_kiov riov;
+	unsigned short head;
+};
+
+/* struct cfv_stats - statistics for debugfs
+ * @rx_napi_complete:	Number of NAPI completions (RX)
+ * @rx_napi_resched:	Number of calls where the full quota was used (RX)
+ * @rx_nomem:		Number of SKB alloc failures (RX)
+ * @rx_kicks:		Number of RX kicks
+ * @tx_full_ring:	Number times TX ring was full
+ * @tx_no_mem:		Number of times TX went out of memory
+ * @tx_flow_on:		Number of flow on (TX)
+ * @tx_kicks:		Number of TX kicks
+ */
+struct cfv_stats {
+	u32 rx_napi_complete;
+	u32 rx_napi_resched;
+	u32 rx_nomem;
+	u32 rx_kicks;
+	u32 tx_full_ring;
+	u32 tx_no_mem;
+	u32 tx_flow_on;
+	u32 tx_kicks;
+};
+
+/* struct cfv_info - Caif Virtio control structure
+ * @cfdev:	caif common header
+ * @vdev:	Associated virtio device
+ * @vr_rx:	rx/downlink host vring
+ * @vq_tx:	tx/uplink virtqueue
+ * @ndev:	CAIF link layer device
+ * @watermark_tx: indicates number of free descriptors we need
+ *		to reopen the tx-queues after overload.
+ * @tx_lock:	protects vq_tx from concurrent use
+ * @tx_release_tasklet: Tasklet for freeing consumed TX buffers
+ * @napi:       Napi context used in cfv_rx_poll()
+ * @ctx:        Context data used in cfv_rx_poll()
+ * @tx_hr:	transmit headroom
+ * @rx_hr:	receive headroom
+ * @tx_tr:	transmit tail room
+ * @rx_tr:	receive tail room
+ * @mtu:	transmit max size
+ * @mru:	receive max size
+ * @allocsz:    size of dma memory reserved for TX buffers
+ * @alloc_addr: virtual address to dma memory for TX buffers
+ * @alloc_dma:  dma address to dma memory for TX buffers
+ * @genpool:    Gen Pool used for allocating TX buffers
+ * @reserved_mem: Pointer to memory reserve allocated from genpool
+ * @reserved_size: Size of memory reserve allocated from genpool
+ * @stats:       Statistics exposed in sysfs
+ * @debugfs:    Debugfs dentry for statistic counters
+ */
+struct cfv_info {
+	struct caif_dev_common cfdev;
+	struct virtio_device *vdev;
+	struct vringh *vr_rx;
+	struct virtqueue *vq_tx;
+	struct net_device *ndev;
+	unsigned int watermark_tx;
+	/* Protect access to vq_tx */
+	spinlock_t tx_lock;
+	struct tasklet_struct tx_release_tasklet;
+	struct napi_struct napi;
+	struct cfv_napi_context ctx;
+	u16 tx_hr;
+	u16 rx_hr;
+	u16 tx_tr;
+	u16 rx_tr;
+	u32 mtu;
+	u32 mru;
+	size_t allocsz;
+	void *alloc_addr;
+	dma_addr_t alloc_dma;
+	struct gen_pool *genpool;
+	unsigned long reserved_mem;
+	size_t reserved_size;
+	struct cfv_stats stats;
+	struct dentry *debugfs;
+};
+
+/* struct buf_info - maintains transmit buffer data handle
+ * @size:	size of transmit buffer
+ * @dma_handle: handle to allocated dma device memory area
+ * @vaddr:	virtual address mapping to allocated memory area
+ */
+struct buf_info {
+	size_t size;
+	u8 *vaddr;
+};
+
+/* Called from virtio device, in IRQ context */
+static void cfv_release_cb(struct virtqueue *vq_tx)
+{
+	struct cfv_info *cfv = vq_tx->vdev->priv;
+
+	++cfv->stats.tx_kicks;
+	tasklet_schedule(&cfv->tx_release_tasklet);
+}
+
+static void free_buf_info(struct cfv_info *cfv, struct buf_info *buf_info)
+{
+	if (!buf_info)
+		return;
+	gen_pool_free(cfv->genpool, (unsigned long) buf_info->vaddr,
+		      buf_info->size);
+	kfree(buf_info);
+}
+
+/* This is invoked whenever the remote processor completed processing
+ * a TX msg we just sent, and the buffer is put back to the used ring.
+ */
+static void cfv_release_used_buf(struct virtqueue *vq_tx)
+{
+	struct cfv_info *cfv = vq_tx->vdev->priv;
+	unsigned long flags;
+
+	BUG_ON(vq_tx != cfv->vq_tx);
+
+	for (;;) {
+		unsigned int len;
+		struct buf_info *buf_info;
+
+		/* Get used buffer from used ring to recycle used descriptors */
+		spin_lock_irqsave(&cfv->tx_lock, flags);
+		buf_info = virtqueue_get_buf(vq_tx, &len);
+		spin_unlock_irqrestore(&cfv->tx_lock, flags);
+
+		/* Stop looping if there are no more buffers to free */
+		if (!buf_info)
+			break;
+
+		free_buf_info(cfv, buf_info);
+
+		/* watermark_tx indicates if we previously stopped the tx
+		 * queues. If we have enough free stots in the virtio ring,
+		 * re-establish memory reserved and open up tx queues.
+		 */
+		if (cfv->vq_tx->num_free <= cfv->watermark_tx)
+			continue;
+
+		/* Re-establish memory reserve */
+		if (cfv->reserved_mem == 0 && cfv->genpool)
+			cfv->reserved_mem =
+				gen_pool_alloc(cfv->genpool,
+					       cfv->reserved_size);
+
+		/* Open up the tx queues */
+		if (cfv->reserved_mem) {
+			cfv->watermark_tx =
+				virtqueue_get_vring_size(cfv->vq_tx);
+			netif_tx_wake_all_queues(cfv->ndev);
+			/* Buffers are recycled in cfv_netdev_tx, so
+			 * disable notifications when queues are opened.
+			 */
+			virtqueue_disable_cb(cfv->vq_tx);
+			++cfv->stats.tx_flow_on;
+		} else {
+			/* if no memory reserve, wait for more free slots */
+			WARN_ON(cfv->watermark_tx >
+			       virtqueue_get_vring_size(cfv->vq_tx));
+			cfv->watermark_tx +=
+				virtqueue_get_vring_size(cfv->vq_tx) / 4;
+		}
+	}
+}
+
+/* Allocate a SKB and copy packet data to it */
+static struct sk_buff *cfv_alloc_and_copy_skb(int *err,
+					      struct cfv_info *cfv,
+					      u8 *frm, u32 frm_len)
+{
+	struct sk_buff *skb;
+	u32 cfpkt_len, pad_len;
+
+	*err = 0;
+	/* Verify that packet size with down-link header and mtu size */
+	if (frm_len > cfv->mru || frm_len <= cfv->rx_hr + cfv->rx_tr) {
+		netdev_err(cfv->ndev,
+			   "Invalid frmlen:%u  mtu:%u hr:%d tr:%d\n",
+			   frm_len, cfv->mru,  cfv->rx_hr,
+			   cfv->rx_tr);
+		*err = -EPROTO;
+		return NULL;
+	}
+
+	cfpkt_len = frm_len - (cfv->rx_hr + cfv->rx_tr);
+	pad_len = (unsigned long)(frm + cfv->rx_hr) & (IP_HDR_ALIGN - 1);
+
+	skb = netdev_alloc_skb(cfv->ndev, frm_len + pad_len);
+	if (!skb) {
+		*err = -ENOMEM;
+		return NULL;
+	}
+
+	skb_reserve(skb, cfv->rx_hr + pad_len);
+
+	memcpy(skb_put(skb, cfpkt_len), frm + cfv->rx_hr, cfpkt_len);
+	return skb;
+}
+
+/* Get packets from the host vring */
+static int cfv_rx_poll(struct napi_struct *napi, int quota)
+{
+	struct cfv_info *cfv = container_of(napi, struct cfv_info, napi);
+	int rxcnt = 0;
+	int err = 0;
+	void *buf;
+	struct sk_buff *skb;
+	struct vringh_kiov *riov = &cfv->ctx.riov;
+	unsigned int skb_len;
+
+again:
+	do {
+		skb = NULL;
+
+		/* Put the previous iovec back on the used ring and
+		 * fetch a new iovec if we have processed all elements.
+		 */
+		if (riov->i == riov->used) {
+			if (cfv->ctx.head != USHRT_MAX) {
+				vringh_complete_kern(cfv->vr_rx,
+						     cfv->ctx.head,
+						     0);
+				cfv->ctx.head = USHRT_MAX;
+			}
+
+			err = vringh_getdesc_kern(
+				cfv->vr_rx,
+				riov,
+				NULL,
+				&cfv->ctx.head,
+				GFP_ATOMIC);
+
+			if (err <= 0)
+				goto exit;
+		}
+
+		buf = phys_to_virt((unsigned long) riov->iov[riov->i].iov_base);
+		/* TODO: Add check on valid buffer address */
+
+		skb = cfv_alloc_and_copy_skb(&err, cfv, buf,
+					     riov->iov[riov->i].iov_len);
+		if (unlikely(err))
+			goto exit;
+
+		/* Push received packet up the stack. */
+		skb_len = skb->len;
+		skb->protocol = htons(ETH_P_CAIF);
+		skb_reset_mac_header(skb);
+		skb->dev = cfv->ndev;
+		err = netif_receive_skb(skb);
+		if (unlikely(err)) {
+			++cfv->ndev->stats.rx_dropped;
+		} else {
+			++cfv->ndev->stats.rx_packets;
+			cfv->ndev->stats.rx_bytes += skb_len;
+		}
+
+		++riov->i;
+		++rxcnt;
+	} while (rxcnt < quota);
+
+	++cfv->stats.rx_napi_resched;
+	goto out;
+
+exit:
+	switch (err) {
+	case 0:
+		++cfv->stats.rx_napi_complete;
+
+		/* Really out of patckets? (stolen from virtio_net)*/
+		napi_complete(napi);
+		if (unlikely(vringh_notify_enable_kern(cfv->vr_rx)) &&
+		    napi_schedule_prep(napi)) {
+			vringh_notify_disable_kern(cfv->vr_rx);
+			__napi_schedule(napi);
+			goto again;
+		}
+		break;
+
+	case -ENOMEM:
+		++cfv->stats.rx_nomem;
+		dev_kfree_skb(skb);
+		/* Stop NAPI poll on OOM, we hope to be polled later */
+		napi_complete(napi);
+		vringh_notify_enable_kern(cfv->vr_rx);
+		break;
+
+	default:
+		/* We're doomed, any modem fault is fatal */
+		netdev_warn(cfv->ndev, "Bad ring, disable device\n");
+		cfv->ndev->stats.rx_dropped = riov->used - riov->i;
+		napi_complete(napi);
+		vringh_notify_disable_kern(cfv->vr_rx);
+		netif_carrier_off(cfv->ndev);
+		break;
+	}
+out:
+	if (rxcnt && vringh_need_notify_kern(cfv->vr_rx) > 0)
+		vringh_notify(cfv->vr_rx);
+	return rxcnt;
+}
+
+static void cfv_recv(struct virtio_device *vdev, struct vringh *vr_rx)
+{
+	struct cfv_info *cfv = vdev->priv;
+
+	++cfv->stats.rx_kicks;
+	vringh_notify_disable_kern(cfv->vr_rx);
+	napi_schedule(&cfv->napi);
+}
+
+static void cfv_destroy_genpool(struct cfv_info *cfv)
+{
+	if (cfv->alloc_addr)
+		dma_free_coherent(cfv->vdev->dev.parent->parent,
+				  cfv->allocsz, cfv->alloc_addr,
+				  cfv->alloc_dma);
+
+	if (!cfv->genpool)
+		return;
+	gen_pool_free(cfv->genpool,  cfv->reserved_mem,
+		      cfv->reserved_size);
+	gen_pool_destroy(cfv->genpool);
+	cfv->genpool = NULL;
+}
+
+static int cfv_create_genpool(struct cfv_info *cfv)
+{
+	int err;
+
+	/* dma_alloc can only allocate whole pages, and we need a more
+	 * fine graned allocation so we use genpool. We ask for space needed
+	 * by IP and a full ring. If the dma allcoation fails we retry with a
+	 * smaller allocation size.
+	 */
+	err = -ENOMEM;
+	cfv->allocsz = (virtqueue_get_vring_size(cfv->vq_tx) *
+			(ETH_DATA_LEN + cfv->tx_hr + cfv->tx_tr) * 11)/10;
+	if (cfv->allocsz <= (num_possible_cpus() + 1) * cfv->ndev->mtu)
+		return -EINVAL;
+
+	for (;;) {
+		if (cfv->allocsz <= num_possible_cpus() * cfv->ndev->mtu) {
+			netdev_info(cfv->ndev, "Not enough device memory\n");
+			return -ENOMEM;
+		}
+
+		cfv->alloc_addr = dma_alloc_coherent(
+						cfv->vdev->dev.parent->parent,
+						cfv->allocsz, &cfv->alloc_dma,
+						GFP_ATOMIC);
+		if (cfv->alloc_addr)
+			break;
+
+		cfv->allocsz = (cfv->allocsz * 3) >> 2;
+	}
+
+	netdev_dbg(cfv->ndev, "Allocated %zd bytes from dma-memory\n",
+		   cfv->allocsz);
+
+	/* Allocate on 128 bytes boundaries (1 << 7)*/
+	cfv->genpool = gen_pool_create(7, -1);
+	if (!cfv->genpool)
+		goto err;
+
+	err = gen_pool_add_virt(cfv->genpool, (unsigned long)cfv->alloc_addr,
+				(phys_addr_t)virt_to_phys(cfv->alloc_addr),
+				cfv->allocsz, -1);
+	if (err)
+		goto err;
+
+	/* Reserve some memory for low memory situations. If we hit the roof
+	 * in the memory pool, we stop TX flow and release the reserve.
+	 */
+	cfv->reserved_size = num_possible_cpus() * cfv->ndev->mtu;
+	cfv->reserved_mem = gen_pool_alloc(cfv->genpool,
+					   cfv->reserved_size);
+	if (!cfv->reserved_mem)
+		goto err;
+
+	cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx);
+	return 0;
+err:
+	cfv_destroy_genpool(cfv);
+	return err;
+}
+
+/* Enable the CAIF interface and allocate the memory-pool */
+static int cfv_netdev_open(struct net_device *netdev)
+{
+	struct cfv_info *cfv = netdev_priv(netdev);
+
+	if (cfv_create_genpool(cfv))
+		return -ENOMEM;
+
+	netif_carrier_on(netdev);
+	napi_enable(&cfv->napi);
+
+	/* Schedule NAPI to read any pending packets */
+	napi_schedule(&cfv->napi);
+	return 0;
+}
+
+/* Disable the CAIF interface and free the memory-pool */
+static int cfv_netdev_close(struct net_device *netdev)
+{
+	struct cfv_info *cfv = netdev_priv(netdev);
+	unsigned long flags;
+	struct buf_info *buf_info;
+
+	/* Disable interrupts, queues and NAPI polling */
+	netif_carrier_off(netdev);
+	virtqueue_disable_cb(cfv->vq_tx);
+	vringh_notify_disable_kern(cfv->vr_rx);
+	napi_disable(&cfv->napi);
+
+	/* Release any TX buffers on both used and avilable rings */
+	cfv_release_used_buf(cfv->vq_tx);
+	spin_lock_irqsave(&cfv->tx_lock, flags);
+	while ((buf_info = virtqueue_detach_unused_buf(cfv->vq_tx)))
+		free_buf_info(cfv, buf_info);
+	spin_unlock_irqrestore(&cfv->tx_lock, flags);
+
+	/* Release all dma allocated memory and destroy the pool */
+	cfv_destroy_genpool(cfv);
+	return 0;
+}
+
+/* Allocate a buffer in dma-memory and copy skb to it */
+static struct buf_info *cfv_alloc_and_copy_to_shm(struct cfv_info *cfv,
+						       struct sk_buff *skb,
+						       struct scatterlist *sg)
+{
+	struct caif_payload_info *info = (void *)&skb->cb;
+	struct buf_info *buf_info = NULL;
+	u8 pad_len, hdr_ofs;
+
+	if (!cfv->genpool)
+		goto err;
+
+	if (unlikely(cfv->tx_hr + skb->len + cfv->tx_tr > cfv->mtu)) {
+		netdev_warn(cfv->ndev, "Invalid packet len (%d > %d)\n",
+			    cfv->tx_hr + skb->len + cfv->tx_tr, cfv->mtu);
+		goto err;
+	}
+
+	buf_info = kmalloc(sizeof(struct buf_info), GFP_ATOMIC);
+	if (unlikely(!buf_info))
+		goto err;
+
+	/* Make the IP header aligned in tbe buffer */
+	hdr_ofs = cfv->tx_hr + info->hdr_len;
+	pad_len = hdr_ofs & (IP_HDR_ALIGN - 1);
+	buf_info->size = cfv->tx_hr + skb->len + cfv->tx_tr + pad_len;
+
+	/* allocate dma memory buffer */
+	buf_info->vaddr = (void *)gen_pool_alloc(cfv->genpool, buf_info->size);
+	if (unlikely(!buf_info->vaddr))
+		goto err;
+
+	/* copy skbuf contents to send buffer */
+	skb_copy_bits(skb, 0, buf_info->vaddr + cfv->tx_hr + pad_len, skb->len);
+	sg_init_one(sg, buf_info->vaddr + pad_len,
+		    skb->len + cfv->tx_hr + cfv->rx_hr);
+
+	return buf_info;
+err:
+	kfree(buf_info);
+	return NULL;
+}
+
+/* Put the CAIF packet on the virtio ring and kick the receiver */
+static int cfv_netdev_tx(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct cfv_info *cfv = netdev_priv(netdev);
+	struct buf_info *buf_info;
+	struct scatterlist sg;
+	unsigned long flags;
+	bool flow_off = false;
+	int ret;
+
+	/* garbage collect released buffers */
+	cfv_release_used_buf(cfv->vq_tx);
+	spin_lock_irqsave(&cfv->tx_lock, flags);
+
+	/* Flow-off check takes into account number of cpus to make sure
+	 * virtqueue will not be overfilled in any possible smp conditions.
+	 *
+	 * Flow-on is triggered when sufficient buffers are freed
+	 */
+	if (unlikely(cfv->vq_tx->num_free <= num_present_cpus())) {
+		flow_off = true;
+		cfv->stats.tx_full_ring++;
+	}
+
+	/* If we run out of memory, we release the memory reserve and retry
+	 * allocation.
+	 */
+	buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg);
+	if (unlikely(!buf_info)) {
+		cfv->stats.tx_no_mem++;
+		flow_off = true;
+
+		if (cfv->reserved_mem && cfv->genpool) {
+			gen_pool_free(cfv->genpool,  cfv->reserved_mem,
+				      cfv->reserved_size);
+			cfv->reserved_mem = 0;
+			buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg);
+		}
+	}
+
+	if (unlikely(flow_off)) {
+		/* Turn flow on when a 1/4 of the descriptors are released */
+		cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx) / 4;
+		/* Enable notifications of recycled TX buffers */
+		virtqueue_enable_cb(cfv->vq_tx);
+		netif_tx_stop_all_queues(netdev);
+	}
+
+	if (unlikely(!buf_info)) {
+		/* If the memory reserve does it's job, this shouldn't happen */
+		netdev_warn(cfv->ndev, "Out of gen_pool memory\n");
+		goto err;
+	}
+
+	ret = virtqueue_add_buf(cfv->vq_tx, &sg, 1, 0,
+				buf_info, GFP_ATOMIC);
+	if (unlikely((ret < 0))) {
+		/* If flow control works, this shouldn't happen */
+		netdev_warn(cfv->ndev, "Failed adding buffer to TX vring:%d\n",
+			    ret);
+		goto err;
+	}
+
+	/* update netdev statistics */
+	cfv->ndev->stats.tx_packets++;
+	cfv->ndev->stats.tx_bytes += skb->len;
+	spin_unlock_irqrestore(&cfv->tx_lock, flags);
+
+	/* tell the remote processor it has a pending message to read */
+	virtqueue_kick(cfv->vq_tx);
+
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+err:
+	spin_unlock_irqrestore(&cfv->tx_lock, flags);
+	cfv->ndev->stats.tx_dropped++;
+	free_buf_info(cfv, buf_info);
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static void cfv_tx_release_tasklet(unsigned long drv)
+{
+	struct cfv_info *cfv = (struct cfv_info *)drv;
+	cfv_release_used_buf(cfv->vq_tx);
+}
+
+static const struct net_device_ops cfv_netdev_ops = {
+	.ndo_open = cfv_netdev_open,
+	.ndo_stop = cfv_netdev_close,
+	.ndo_start_xmit = cfv_netdev_tx,
+};
+
+static void cfv_netdev_setup(struct net_device *netdev)
+{
+	netdev->netdev_ops = &cfv_netdev_ops;
+	netdev->type = ARPHRD_CAIF;
+	netdev->tx_queue_len = 100;
+	netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
+	netdev->mtu = CFV_DEF_MTU_SIZE;
+	netdev->destructor = free_netdev;
+}
+
+/* Create debugfs counters for the device */
+static inline void debugfs_init(struct cfv_info *cfv)
+{
+	cfv->debugfs =
+		debugfs_create_dir(netdev_name(cfv->ndev), NULL);
+
+	if (IS_ERR(cfv->debugfs))
+		return;
+
+	debugfs_create_u32("rx-napi-complete", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.rx_napi_complete);
+	debugfs_create_u32("rx-napi-resched", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.rx_napi_resched);
+	debugfs_create_u32("rx-nomem", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.rx_nomem);
+	debugfs_create_u32("rx-kicks", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.rx_kicks);
+	debugfs_create_u32("tx-full-ring", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.tx_full_ring);
+	debugfs_create_u32("tx-no-mem", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.tx_no_mem);
+	debugfs_create_u32("tx-kicks", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.tx_kicks);
+	debugfs_create_u32("tx-flow-on", S_IRUSR, cfv->debugfs,
+			   &cfv->stats.tx_flow_on);
+}
+
+/* Setup CAIF for the a virtio device */
+static int cfv_probe(struct virtio_device *vdev)
+{
+	vq_callback_t *vq_cbs = cfv_release_cb;
+	vrh_callback_t *vrh_cbs = cfv_recv;
+	const char *names =  "output";
+	const char *cfv_netdev_name = "cfvrt";
+	struct net_device *netdev;
+	struct cfv_info *cfv;
+	int err = -EINVAL;
+
+	netdev = alloc_netdev(sizeof(struct cfv_info), cfv_netdev_name,
+			      cfv_netdev_setup);
+	if (!netdev)
+		return -ENOMEM;
+
+	cfv = netdev_priv(netdev);
+	cfv->vdev = vdev;
+	cfv->ndev = netdev;
+
+	spin_lock_init(&cfv->tx_lock);
+
+	/* Get the RX virtio ring. This is a "host side vring". */
+	err = vdev->vringh_config->find_vrhs(vdev, 1, &cfv->vr_rx, &vrh_cbs);
+	if (err)
+		goto err;
+
+	/* Get the TX virtio ring. This is a "guest side vring". */
+	err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names);
+	if (err)
+		goto err;
+
+	/* Get the CAIF configuration from virtio config space, if available */
+#define GET_VIRTIO_CONFIG_OPS(_v, _var, _f) \
+	((_v)->config->get(_v, offsetof(struct virtio_caif_transf_config, _f), \
+			   &_var, \
+			   FIELD_SIZEOF(struct virtio_caif_transf_config, _f)))
+
+	if (vdev->config->get) {
+		GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_hr, headroom);
+		GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_hr, headroom);
+		GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_tr, tailroom);
+		GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_tr, tailroom);
+		GET_VIRTIO_CONFIG_OPS(vdev, cfv->mtu, mtu);
+		GET_VIRTIO_CONFIG_OPS(vdev, cfv->mru, mtu);
+	} else {
+		cfv->tx_hr = CFV_DEF_HEADROOM;
+		cfv->rx_hr = CFV_DEF_HEADROOM;
+		cfv->tx_tr = CFV_DEF_TAILROOM;
+		cfv->rx_tr = CFV_DEF_TAILROOM;
+		cfv->mtu = CFV_DEF_MTU_SIZE;
+		cfv->mru = CFV_DEF_MTU_SIZE;
+	}
+
+	netdev->needed_headroom = cfv->tx_hr;
+	netdev->needed_tailroom = cfv->tx_tr;
+
+	/* Disable buffer release interrupts unless we have stopped TX queues */
+	virtqueue_disable_cb(cfv->vq_tx);
+
+	netdev->mtu = cfv->mtu - cfv->tx_tr;
+	vdev->priv = cfv;
+
+	/* Initialize NAPI poll context data */
+	vringh_kiov_init(&cfv->ctx.riov, NULL, 0);
+	cfv->ctx.head = USHRT_MAX;
+	netif_napi_add(netdev, &cfv->napi, cfv_rx_poll, CFV_DEFAULT_QUOTA);
+
+	tasklet_init(&cfv->tx_release_tasklet,
+		     cfv_tx_release_tasklet,
+		     (unsigned long)cfv);
+
+	/* Carrier is off until netdevice is opened */
+	netif_carrier_off(netdev);
+
+	/* register Netdev */
+	err = register_netdev(netdev);
+	if (err) {
+		dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err);
+		goto err;
+	}
+
+	debugfs_init(cfv);
+
+	return 0;
+err:
+	netdev_warn(cfv->ndev, "CAIF Virtio probe failed:%d\n", err);
+
+	if (cfv->vr_rx)
+		vdev->vringh_config->del_vrhs(cfv->vdev);
+	if (cfv->vdev)
+		vdev->config->del_vqs(cfv->vdev);
+	free_netdev(netdev);
+	return err;
+}
+
+static void cfv_remove(struct virtio_device *vdev)
+{
+	struct cfv_info *cfv = vdev->priv;
+
+	rtnl_lock();
+	dev_close(cfv->ndev);
+	rtnl_unlock();
+
+	tasklet_kill(&cfv->tx_release_tasklet);
+	debugfs_remove_recursive(cfv->debugfs);
+
+	vringh_kiov_cleanup(&cfv->ctx.riov);
+	vdev->config->reset(vdev);
+	vdev->vringh_config->del_vrhs(cfv->vdev);
+	cfv->vr_rx = NULL;
+	vdev->config->del_vqs(cfv->vdev);
+	unregister_netdev(cfv->ndev);
+}
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_CAIF, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static unsigned int features[] = {
+};
+
+static struct virtio_driver caif_virtio_driver = {
+	.feature_table		= features,
+	.feature_table_size	= ARRAY_SIZE(features),
+	.driver.name		= KBUILD_MODNAME,
+	.driver.owner		= THIS_MODULE,
+	.id_table		= id_table,
+	.probe			= cfv_probe,
+	.remove			= cfv_remove,
+};
+
+module_virtio_driver(caif_virtio_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
diff --git a/include/linux/virtio_caif.h b/include/linux/virtio_caif.h
new file mode 100644
index 000000000000..5d2d3124ca3d
--- /dev/null
+++ b/include/linux/virtio_caif.h
@@ -0,0 +1,24 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2012
+ * Author: Sjur Brændeland <sjur.brandeland@stericsson.com>
+ *
+ * This header is BSD licensed so
+ * anyone can use the definitions to implement compatible remote processors
+ */
+
+#ifndef VIRTIO_CAIF_H
+#define VIRTIO_CAIF_H
+
+#include <linux/types.h>
+struct virtio_caif_transf_config {
+	u16 headroom;
+	u16 tailroom;
+	u32 mtu;
+	u8 reserved[4];
+};
+
+struct virtio_caif_config {
+	struct virtio_caif_transf_config uplink, downlink;
+	u8 reserved[8];
+};
+#endif
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index a7630d04029f..284fc3a05f7b 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -38,5 +38,6 @@
 #define VIRTIO_ID_SCSI		8 /* virtio scsi */
 #define VIRTIO_ID_9P		9 /* 9p virtio console */
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
+#define VIRTIO_ID_CAIF	       12 /* Virtio caif */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit 


From 3e5289d5e3f98b7b5b8cac32e9e5a7004c067436 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 19 Mar 2013 06:39:31 +0000
Subject: filter: add ANC_PAY_OFFSET instruction for loading payload start
 offset

It is very useful to do dynamic truncation of packets. In particular,
we're interested to push the necessary header bytes to the user space and
cut off user payload that should probably not be transferred for some reasons
(e.g. privacy, speed, or others). With the ancillary extension PAY_OFFSET,
we can load it into the accumulator, and return it. E.g. in bpfc syntax ...

        ld #poff        ; { 0x20, 0, 0, 0xfffff034 },
        ret a           ; { 0x16, 0, 0, 0x00000000 },

... as a filter will accomplish this without having to do a big hackery in
a BPF filter itself. Follow-up JIT implementations are welcome.

Thanks to Eric Dumazet for suggesting and discussing this during the
Netfilter Workshop in Copenhagen.

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h      | 1 +
 include/uapi/linux/filter.h | 3 ++-
 net/core/filter.c           | 5 +++++
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index c45eabc135e1..d2059cb4e465 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -126,6 +126,7 @@ enum {
 	BPF_S_ANC_SECCOMP_LD_W,
 	BPF_S_ANC_VLAN_TAG,
 	BPF_S_ANC_VLAN_TAG_PRESENT,
+	BPF_S_ANC_PAY_OFFSET,
 };
 
 #endif /* __LINUX_FILTER_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 9cfde6941099..8eb9ccaa5b48 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -129,7 +129,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_ALU_XOR_X	40
 #define SKF_AD_VLAN_TAG	44
 #define SKF_AD_VLAN_TAG_PRESENT 48
-#define SKF_AD_MAX	52
+#define SKF_AD_PAY_OFFSET	52
+#define SKF_AD_MAX	56
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e20b55a7830..dad2a178f9f8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -348,6 +348,9 @@ load_b:
 		case BPF_S_ANC_VLAN_TAG_PRESENT:
 			A = !!vlan_tx_tag_present(skb);
 			continue;
+		case BPF_S_ANC_PAY_OFFSET:
+			A = __skb_get_poff(skb);
+			continue;
 		case BPF_S_ANC_NLATTR: {
 			struct nlattr *nla;
 
@@ -612,6 +615,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
 			ANCILLARY(ALU_XOR_X);
 			ANCILLARY(VLAN_TAG);
 			ANCILLARY(VLAN_TAG_PRESENT);
+			ANCILLARY(PAY_OFFSET);
 			}
 
 			/* ancillary operation unknown or unsupported */
@@ -814,6 +818,7 @@ static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
 		[BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS,
+		[BPF_S_ANC_PAY_OFFSET]	= BPF_LD|BPF_B|BPF_ABS,
 		[BPF_S_LD_W_LEN]	= BPF_LD|BPF_W|BPF_LEN,
 		[BPF_S_LD_W_IND]	= BPF_LD|BPF_W|BPF_IND,
 		[BPF_S_LD_H_IND]	= BPF_LD|BPF_H|BPF_IND,
-- 
cgit 


From 2b5faa4c553f90ee2dde1d976b220b1ca9741ef0 Mon Sep 17 00:00:00 2001
From: Jesper Derehag <jderehag@hotmail.com>
Date: Tue, 19 Mar 2013 20:50:05 +0000
Subject: connector: Added coredumping event to the process connector

Process connector can now also detect coredumping events.

Main aim of patch is get notified at start of coredumping, instead of
having to wait for it to finish and then being notified through EXIT
event.

Could be used for instance by process-managers that want to get
notified as soon as possible about process failures, and not
necessarily beeing notified after coredump, which could be in the
order of minutes depending on size of coredump, piping and so on.

Signed-off-by: Jesper Derehag <jderehag@hotmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_proc.c  | 25 +++++++++++++++++++++++++
 include/linux/cn_proc.h      |  4 ++++
 include/uapi/linux/cn_proc.h | 10 +++++++++-
 kernel/signal.c              |  2 ++
 4 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 1110478dd0fd..08ae128cce9b 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -232,6 +232,31 @@ void proc_comm_connector(struct task_struct *task)
 	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
 }
 
+void proc_coredump_connector(struct task_struct *task)
+{
+	struct cn_msg *msg;
+	struct proc_event *ev;
+	__u8 buffer[CN_PROC_MSG_SIZE];
+	struct timespec ts;
+
+	if (atomic_read(&proc_event_num_listeners) < 1)
+		return;
+
+	msg = (struct cn_msg *)buffer;
+	ev = (struct proc_event *)msg->data;
+	get_seq(&msg->seq, &ev->cpu);
+	ktime_get_ts(&ts); /* get high res monotonic timestamp */
+	put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
+	ev->what = PROC_EVENT_COREDUMP;
+	ev->event_data.coredump.process_pid = task->pid;
+	ev->event_data.coredump.process_tgid = task->tgid;
+
+	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
+	msg->ack = 0; /* not used */
+	msg->len = sizeof(*ev);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+}
+
 void proc_exit_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h
index 2c1bc1ea04ee..1d5b02a96c46 100644
--- a/include/linux/cn_proc.h
+++ b/include/linux/cn_proc.h
@@ -26,6 +26,7 @@ void proc_id_connector(struct task_struct *task, int which_id);
 void proc_sid_connector(struct task_struct *task);
 void proc_ptrace_connector(struct task_struct *task, int which_id);
 void proc_comm_connector(struct task_struct *task);
+void proc_coredump_connector(struct task_struct *task);
 void proc_exit_connector(struct task_struct *task);
 #else
 static inline void proc_fork_connector(struct task_struct *task)
@@ -48,6 +49,9 @@ static inline void proc_ptrace_connector(struct task_struct *task,
 					 int ptrace_id)
 {}
 
+static inline void proc_coredump_connector(struct task_struct *task)
+{}
+
 static inline void proc_exit_connector(struct task_struct *task)
 {}
 #endif	/* CONFIG_PROC_EVENTS */
diff --git a/include/uapi/linux/cn_proc.h b/include/uapi/linux/cn_proc.h
index 0d7b49973bb3..f6c271035bbd 100644
--- a/include/uapi/linux/cn_proc.h
+++ b/include/uapi/linux/cn_proc.h
@@ -56,7 +56,9 @@ struct proc_event {
 		PROC_EVENT_PTRACE = 0x00000100,
 		PROC_EVENT_COMM = 0x00000200,
 		/* "next" should be 0x00000400 */
-		/* "last" is the last process event: exit */
+		/* "last" is the last process event: exit,
+		 * while "next to last" is coredumping event */
+		PROC_EVENT_COREDUMP = 0x40000000,
 		PROC_EVENT_EXIT = 0x80000000
 	} what;
 	__u32 cpu;
@@ -110,11 +112,17 @@ struct proc_event {
 			char           comm[16];
 		} comm;
 
+		struct coredump_proc_event {
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
+		} coredump;
+
 		struct exit_proc_event {
 			__kernel_pid_t process_pid;
 			__kernel_pid_t process_tgid;
 			__u32 exit_code, exit_signal;
 		} exit;
+
 	} event_data;
 };
 
diff --git a/kernel/signal.c b/kernel/signal.c
index dd72567767d9..497330ec2ae9 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -32,6 +32,7 @@
 #include <linux/user_namespace.h>
 #include <linux/uprobes.h>
 #include <linux/compat.h>
+#include <linux/cn_proc.h>
 #define CREATE_TRACE_POINTS
 #include <trace/events/signal.h>
 
@@ -2350,6 +2351,7 @@ relock:
 		if (sig_kernel_coredump(signr)) {
 			if (print_fatal_signals)
 				print_fatal_signal(info->si_signo);
+			proc_coredump_connector(current);
 			/*
 			 * If it was able to dump core, this kills all
 			 * other threads in the group and synchronizes with
-- 
cgit 


From eaaa31392690c7609f7afeec5ba38a79d009842d Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Thu, 21 Mar 2013 20:33:48 +0400
Subject: netlink: Diag core and basic socket info dumping (v2)

The netlink_diag can be built as a module, just like it's done in
unix sockets.

The core dumping message carries the basic info about netlink sockets:
family, type and protocol, portis, dst_group, dst_portid, state.

Groups can be received as an optional parameter NETLINK_DIAG_GROUPS.

Netlink sockets cab be filtered by protocols.

The socket inode number and cookie is reserved for future per-socket info
retrieving. The per-protocol filtering is also reserved for future by
requiring the sdiag_protocol to be zero.

The file /proc/net/netlink doesn't provide enough information for
dumping netlink sockets. It doesn't provide dst_group, dst_portid,
groups above 32.

v2: fix NETLINK_DIAG_MAX. Now it's equal to the last constant.

Acked-by: Pavel Emelyanov <xemul@parallels.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Pablo Neira Ayuso <pablo@netfilter.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Andrey Vagin <avagin@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink_diag.h |  42 +++++++++
 net/Kconfig                       |   1 +
 net/netlink/Kconfig               |  10 ++
 net/netlink/Makefile              |   3 +
 net/netlink/diag.c                | 188 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 244 insertions(+)
 create mode 100644 include/uapi/linux/netlink_diag.h
 create mode 100644 net/netlink/Kconfig
 create mode 100644 net/netlink/diag.c

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
new file mode 100644
index 000000000000..88009a31cd06
--- /dev/null
+++ b/include/uapi/linux/netlink_diag.h
@@ -0,0 +1,42 @@
+#ifndef __NETLINK_DIAG_H__
+#define __NETLINK_DIAG_H__
+
+#include <linux/types.h>
+
+struct netlink_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u16	pad;
+	__u32	ndiag_ino;
+	__u32	ndiag_show;
+	__u32	ndiag_cookie[2];
+};
+
+struct netlink_diag_msg {
+	__u8	ndiag_family;
+	__u8	ndiag_type;
+	__u8	ndiag_protocol;
+	__u8	ndiag_state;
+
+	__u32	ndiag_portid;
+	__u32	ndiag_dst_portid;
+	__u32	ndiag_dst_group;
+	__u32	ndiag_ino;
+	__u32	ndiag_cookie[2];
+};
+
+enum {
+	NETLINK_DIAG_MEMINFO,
+	NETLINK_DIAG_GROUPS,
+
+	__NETLINK_DIAG_MAX,
+};
+
+#define NETLINK_DIAG_MAX (__NETLINK_DIAG_MAX - 1)
+
+#define NDIAG_PROTO_ALL		((__u8) ~0)
+
+#define NDIAG_SHOW_MEMINFO	0x00000001 /* show memory info of a socket */
+#define NDIAG_SHOW_GROUPS	0x00000002 /* show groups of a netlink socket */
+
+#endif
diff --git a/net/Kconfig b/net/Kconfig
index 6f676ab885be..2ddc9046868e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -217,6 +217,7 @@ source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
 source "net/openvswitch/Kconfig"
 source "net/vmw_vsock/Kconfig"
+source "net/netlink/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/netlink/Kconfig b/net/netlink/Kconfig
new file mode 100644
index 000000000000..5d6e8c05b3d4
--- /dev/null
+++ b/net/netlink/Kconfig
@@ -0,0 +1,10 @@
+#
+# Netlink Sockets
+#
+
+config NETLINK_DIAG
+	tristate "NETLINK: socket monitoring interface"
+	default n
+	---help---
+	  Support for NETLINK socket monitoring interface used by the ss tool.
+	  If unsure, say Y.
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index bdd6ddf4e95b..e837917f6c03 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,3 +3,6 @@
 #
 
 obj-y  				:= af_netlink.o genetlink.o
+
+obj-$(CONFIG_NETLINK_DIAG)	+= netlink_diag.o
+netlink_diag-y			:= diag.o
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
new file mode 100644
index 000000000000..5ffb1d1cf402
--- /dev/null
+++ b/net/netlink/diag.c
@@ -0,0 +1,188 @@
+#include <linux/module.h>
+
+#include <net/sock.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <linux/netlink_diag.h>
+
+#include "af_netlink.h"
+
+static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	if (nlk->groups == NULL)
+		return 0;
+
+	return nla_put(nlskb, NETLINK_DIAG_GROUPS, NLGRPSZ(nlk->ngroups),
+		       nlk->groups);
+}
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			struct netlink_diag_req *req,
+			u32 portid, u32 seq, u32 flags, int sk_ino)
+{
+	struct nlmsghdr *nlh;
+	struct netlink_diag_msg *rep;
+	struct netlink_sock *nlk = nlk_sk(sk);
+
+	nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+			flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rep = nlmsg_data(nlh);
+	rep->ndiag_family	= AF_NETLINK;
+	rep->ndiag_type		= sk->sk_type;
+	rep->ndiag_protocol	= sk->sk_protocol;
+	rep->ndiag_state	= sk->sk_state;
+
+	rep->ndiag_ino		= sk_ino;
+	rep->ndiag_portid	= nlk->portid;
+	rep->ndiag_dst_portid	= nlk->dst_portid;
+	rep->ndiag_dst_group	= nlk->dst_group;
+	sock_diag_save_cookie(sk, rep->ndiag_cookie);
+
+	if ((req->ndiag_show & NDIAG_SHOW_GROUPS) &&
+	    sk_diag_dump_groups(sk, skb))
+		goto out_nlmsg_trim;
+
+	if ((req->ndiag_show & NDIAG_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
+	return nlmsg_end(skb, nlh);
+
+out_nlmsg_trim:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+				int protocol, int s_num)
+{
+	struct netlink_table *tbl = &nl_table[protocol];
+	struct nl_portid_hash *hash = &tbl->hash;
+	struct net *net = sock_net(skb->sk);
+	struct netlink_diag_req *req;
+	struct sock *sk;
+	int ret = 0, num = 0, i;
+
+	req = nlmsg_data(cb->nlh);
+
+	for (i = 0; i <= hash->mask; i++) {
+		sk_for_each(sk, &hash->table[i]) {
+			if (!net_eq(sock_net(sk), net))
+				continue;
+			if (num < s_num) {
+				num++;
+				continue;
+			}
+
+			if (sk_diag_fill(sk, skb, req,
+					 NETLINK_CB(cb->skb).portid,
+					 cb->nlh->nlmsg_seq,
+					 NLM_F_MULTI,
+					 sock_i_ino(sk)) < 0) {
+				ret = 1;
+				goto done;
+			}
+
+			num++;
+		}
+	}
+
+	sk_for_each_bound(sk, &tbl->mc_list) {
+		if (sk_hashed(sk))
+			continue;
+		if (!net_eq(sock_net(sk), net))
+			continue;
+		if (num < s_num) {
+			num++;
+			continue;
+		}
+
+		if (sk_diag_fill(sk, skb, req,
+				 NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq,
+				 NLM_F_MULTI,
+				 sock_i_ino(sk)) < 0) {
+			ret = 1;
+			goto done;
+		}
+		num++;
+	}
+done:
+	cb->args[0] = num;
+	cb->args[1] = protocol;
+
+	return ret;
+}
+
+static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct netlink_diag_req *req;
+	int s_num = cb->args[0];
+
+	req = nlmsg_data(cb->nlh);
+
+	read_lock(&nl_table_lock);
+
+	if (req->sdiag_protocol == NDIAG_PROTO_ALL) {
+		int i;
+
+		for (i = cb->args[1]; i < MAX_LINKS; i++) {
+			if (__netlink_diag_dump(skb, cb, i, s_num))
+				break;
+			s_num = 0;
+		}
+	} else {
+		if (req->sdiag_protocol >= MAX_LINKS) {
+			read_unlock(&nl_table_lock);
+			return -ENOENT;
+		}
+
+		__netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num);
+	}
+
+	read_unlock(&nl_table_lock);
+
+	return skb->len;
+}
+
+static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct netlink_diag_req);
+	struct net *net = sock_net(skb->sk);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		struct netlink_dump_control c = {
+			.dump = netlink_diag_dump,
+		};
+		return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+	} else
+		return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler netlink_diag_handler = {
+	.family = AF_NETLINK,
+	.dump = netlink_diag_handler_dump,
+};
+
+static int __init netlink_diag_init(void)
+{
+	return sock_diag_register(&netlink_diag_handler);
+}
+
+static void __exit netlink_diag_exit(void)
+{
+	sock_diag_unregister(&netlink_diag_handler);
+}
+
+module_init(netlink_diag_init);
+module_exit(netlink_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 16 /* AF_NETLINK */);
-- 
cgit 


From e64171b97b88a1adf297d429826fdbb9e232ab53 Mon Sep 17 00:00:00 2001
From: Manjunath Hadli <manjunath.hadli@ti.com>
Date: Thu, 7 Feb 2013 13:48:51 -0300
Subject: [media] media: add support for decoder as one of media entity types

A lot of SOCs including Texas Instruments Davinci family mainly use
video decoders as input devices. This patch adds a flag
'MEDIA_ENT_T_V4L2_SUBDEV_DECODER' media entity type for decoder's.
Along side updates the documentation for this media entity type.

Signed-off-by: Manjunath Hadli <manjunath.hadli@ti.com>
Signed-off-by: Lad, Prabhakar <prabhakar.lad@ti.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml | 10 ++++++++++
 include/uapi/linux/media.h                                  |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
index 576b68b33f2c..116c301656e0 100644
--- a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
+++ b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
@@ -272,6 +272,16 @@
 	    <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_LENS</constant></entry>
 	    <entry>Lens controller</entry>
 	  </row>
+	  <row>
+	    <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_DECODER</constant></entry>
+	    <entry>Video decoder, the basic function of the video decoder is to
+	    accept analogue video from a wide variety of sources such as
+	    broadcast, DVD players, cameras and video cassette recorders, in
+	    either NTSC, PAL or HD format and still occasionally SECAM, separate
+	    it into its component parts, luminance and chrominance, and output
+	    it in some digital video standard, with appropriate embedded timing
+	    signals.</entry>
+	  </row>
 	</tbody>
       </tgroup>
     </table>
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 0ef883327de2..ed49574ad757 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -56,6 +56,8 @@ struct media_device_info {
 #define MEDIA_ENT_T_V4L2_SUBDEV_SENSOR	(MEDIA_ENT_T_V4L2_SUBDEV + 1)
 #define MEDIA_ENT_T_V4L2_SUBDEV_FLASH	(MEDIA_ENT_T_V4L2_SUBDEV + 2)
 #define MEDIA_ENT_T_V4L2_SUBDEV_LENS	(MEDIA_ENT_T_V4L2_SUBDEV + 3)
+/* A converter of analogue video to its digital representation. */
+#define MEDIA_ENT_T_V4L2_SUBDEV_DECODER	(MEDIA_ENT_T_V4L2_SUBDEV + 4)
 
 #define MEDIA_ENT_FL_DEFAULT		(1 << 0)
 
-- 
cgit 


From 1ff3c9677bff7e468e0c487d0ffefe4e901d33f4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 3 May 2012 12:43:40 -0700
Subject: timekeeping: Add CLOCK_TAI clockid

This add a CLOCK_TAI clockid and the needed accessors.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h      |  1 +
 include/uapi/linux/time.h |  6 ++----
 kernel/posix-timers.c     | 10 ++++++++++
 kernel/time/timekeeping.c | 30 ++++++++++++++++++++++++++++++
 4 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 47210a175e78..22d81b3c955b 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -183,6 +183,7 @@ extern u64 timekeeping_max_deferment(void);
 extern int timekeeping_inject_offset(struct timespec *ts);
 extern s32 timekeeping_get_tai_offset(void);
 extern void timekeeping_set_tai_offset(s32 tai_offset);
+extern void timekeeping_clocktai(struct timespec *ts);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/include/uapi/linux/time.h b/include/uapi/linux/time.h
index 0d3c0edc3eda..e75e1b6ff27f 100644
--- a/include/uapi/linux/time.h
+++ b/include/uapi/linux/time.h
@@ -54,11 +54,9 @@ struct itimerval {
 #define CLOCK_BOOTTIME			7
 #define CLOCK_REALTIME_ALARM		8
 #define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_SGI_CYCLE			10	/* Hardware specific */
+#define CLOCK_TAI			11
 
-/*
- * The IDs of various hardware clocks:
- */
-#define CLOCK_SGI_CYCLE			10
 #define MAX_CLOCKS			16
 #define CLOCKS_MASK			(CLOCK_REALTIME | CLOCK_MONOTONIC)
 #define CLOCKS_MONO			CLOCK_MONOTONIC
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 6edbb2c55c22..fbfc5f1b7710 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -221,6 +221,11 @@ static int posix_get_boottime(const clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+static int posix_get_tai(clockid_t which_clock, struct timespec *tp)
+{
+	timekeeping_clocktai(tp);
+	return 0;
+}
 
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
@@ -261,6 +266,10 @@ static __init int init_posix_timers(void)
 		.clock_getres	= posix_get_coarse_res,
 		.clock_get	= posix_get_monotonic_coarse,
 	};
+	struct k_clock clock_tai = {
+		.clock_getres	= hrtimer_get_res,
+		.clock_get	= posix_get_tai,
+	};
 	struct k_clock clock_boottime = {
 		.clock_getres	= hrtimer_get_res,
 		.clock_get	= posix_get_boottime,
@@ -278,6 +287,7 @@ static __init int init_posix_timers(void)
 	posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
 	posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
 	posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
+	posix_timers_register_clock(CLOCK_TAI, &clock_tai);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 937098aab498..8a842756572d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -379,6 +379,36 @@ void ktime_get_ts(struct timespec *ts)
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
 
+
+/**
+ * timekeeping_clocktai - Returns the TAI time of day in a timespec
+ * @ts:		pointer to the timespec to be set
+ *
+ * Returns the time of day in a timespec.
+ */
+void timekeeping_clocktai(struct timespec *ts)
+{
+	struct timekeeper *tk = &timekeeper;
+	unsigned long seq;
+	u64 nsecs;
+
+	WARN_ON(timekeeping_suspended);
+
+	do {
+		seq = read_seqbegin(&tk->lock);
+
+		ts->tv_sec = tk->xtime_sec + tk->tai_offset;
+		nsecs = timekeeping_get_ns(tk);
+
+	} while (read_seqretry(&tk->lock, seq));
+
+	ts->tv_nsec = 0;
+	timespec_add_ns(ts, nsecs);
+
+}
+EXPORT_SYMBOL(timekeeping_clocktai);
+
+
 #ifdef CONFIG_NTP_PPS
 
 /**
-- 
cgit 


From 9ca5470cc1433200698a43de2d6e683815e536e6 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sun, 17 Mar 2013 10:34:04 -0300
Subject: [media] v4l2-ctrls: add V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER control

Control whether video sequence headers should be repeated.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/controls.xml | 6 ++++++
 drivers/media/v4l2-core/v4l2-ctrls.c         | 2 ++
 include/uapi/linux/v4l2-controls.h           | 1 +
 3 files changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index 9e8f85498678..b4952e23201b 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -2299,6 +2299,12 @@ Possible values are:</entry>
 		</entrytbl>
 	      </row>
 	      <row><entry></entry></row>
+	      <row>
+		<entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER</constant>&nbsp;</entry>
+		<entry>boolean</entry>
+	      </row><row><entry spanname="descr">Repeat the video sequence headers. Repeating these
+headers makes random access to the video stream easier. Applicable to the MPEG1, 2 and 4 encoder.</entry>
+	      </row>
 	      <row>
 		<entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER</constant>&nbsp;</entry>
 		<entry>boolean</entry>
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index b36d1ec13ee6..f662df3bfe2d 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -695,6 +695,7 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_MPEG_VIDEO_DEC_PTS:			return "Video Decoder PTS";
 	case V4L2_CID_MPEG_VIDEO_DEC_FRAME:			return "Video Decoder Frame Count";
 	case V4L2_CID_MPEG_VIDEO_VBV_DELAY:			return "Initial Delay for VBV Control";
+	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:		return "Repeat Sequence Header";
 
 	/* CAMERA controls */
 	/* Keep the order of the 'case's the same as in videodev2.h! */
@@ -844,6 +845,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM:
 	case V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE:
 	case V4L2_CID_MPEG_VIDEO_MPEG4_QPEL:
+	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:
 	case V4L2_CID_WIDE_DYNAMIC_RANGE:
 	case V4L2_CID_IMAGE_STABILIZATION:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 7eab0b91827b..844dc0205037 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -360,6 +360,7 @@ enum v4l2_mpeg_video_multi_slice_mode {
 #define V4L2_CID_MPEG_VIDEO_DEC_PTS			(V4L2_CID_MPEG_BASE+223)
 #define V4L2_CID_MPEG_VIDEO_DEC_FRAME			(V4L2_CID_MPEG_BASE+224)
 #define V4L2_CID_MPEG_VIDEO_VBV_DELAY			(V4L2_CID_MPEG_BASE+225)
+#define V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER		(V4L2_CID_MPEG_BASE+226)
 
 #define V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP		(V4L2_CID_MPEG_BASE+300)
 #define V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP		(V4L2_CID_MPEG_BASE+301)
-- 
cgit 


From 81b8cd9284945a323009482418c3d80de1d59d25 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 Feb 2013 15:22:56 -0300
Subject: [media] videodev2.h: remove obsolete DV_PRESET API

This API is now obsolete and can be removed.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/videodev2.h | 54 ------------------------------------------
 1 file changed, 54 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index b5f5cddcf1c3..9d57dba27412 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -981,52 +981,6 @@ struct v4l2_standard {
 	__u32		     reserved[4];
 };
 
-/* The DV Preset API is deprecated in favor of the DV Timings API.
-   New drivers shouldn't use this anymore! */
-
-/*
- *	V I D E O	T I M I N G S	D V	P R E S E T
- */
-struct v4l2_dv_preset {
-	__u32	preset;
-	__u32	reserved[4];
-};
-
-/*
- *	D V	P R E S E T S	E N U M E R A T I O N
- */
-struct v4l2_dv_enum_preset {
-	__u32	index;
-	__u32	preset;
-	__u8	name[32]; /* Name of the preset timing */
-	__u32	width;
-	__u32	height;
-	__u32	reserved[4];
-};
-
-/*
- * 	D V	P R E S E T	V A L U E S
- */
-#define		V4L2_DV_INVALID		0
-#define		V4L2_DV_480P59_94	1 /* BT.1362 */
-#define		V4L2_DV_576P50		2 /* BT.1362 */
-#define		V4L2_DV_720P24		3 /* SMPTE 296M */
-#define		V4L2_DV_720P25		4 /* SMPTE 296M */
-#define		V4L2_DV_720P30		5 /* SMPTE 296M */
-#define		V4L2_DV_720P50		6 /* SMPTE 296M */
-#define		V4L2_DV_720P59_94	7 /* SMPTE 274M */
-#define		V4L2_DV_720P60		8 /* SMPTE 274M/296M */
-#define		V4L2_DV_1080I29_97	9 /* BT.1120/ SMPTE 274M */
-#define		V4L2_DV_1080I30		10 /* BT.1120/ SMPTE 274M */
-#define		V4L2_DV_1080I25		11 /* BT.1120 */
-#define		V4L2_DV_1080I50		12 /* SMPTE 296M */
-#define		V4L2_DV_1080I60		13 /* SMPTE 296M */
-#define		V4L2_DV_1080P24		14 /* SMPTE 296M */
-#define		V4L2_DV_1080P25		15 /* SMPTE 296M */
-#define		V4L2_DV_1080P30		16 /* SMPTE 296M */
-#define		V4L2_DV_1080P50		17 /* BT.1120 */
-#define		V4L2_DV_1080P60		18 /* BT.1120 */
-
 /*
  *	D V 	B T	T I M I N G S
  */
@@ -1240,7 +1194,6 @@ struct v4l2_input {
 #define V4L2_IN_ST_VTR         0x04000000  /* VTR time constant */
 
 /* capabilities flags */
-#define V4L2_IN_CAP_PRESETS		0x00000001 /* Supports S_DV_PRESET */
 #define V4L2_IN_CAP_DV_TIMINGS		0x00000002 /* Supports S_DV_TIMINGS */
 #define V4L2_IN_CAP_CUSTOM_TIMINGS	V4L2_IN_CAP_DV_TIMINGS /* For compatibility */
 #define V4L2_IN_CAP_STD			0x00000004 /* Supports S_STD */
@@ -1264,7 +1217,6 @@ struct v4l2_output {
 #define V4L2_OUTPUT_TYPE_ANALOGVGAOVERLAY	3
 
 /* capabilities flags */
-#define V4L2_OUT_CAP_PRESETS		0x00000001 /* Supports S_DV_PRESET */
 #define V4L2_OUT_CAP_DV_TIMINGS		0x00000002 /* Supports S_DV_TIMINGS */
 #define V4L2_OUT_CAP_CUSTOM_TIMINGS	V4L2_OUT_CAP_DV_TIMINGS /* For compatibility */
 #define V4L2_OUT_CAP_STD		0x00000004 /* Supports S_STD */
@@ -1981,12 +1933,6 @@ struct v4l2_create_buffers {
 
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
 
-/* These four DV Preset ioctls are deprecated in favor of the DV Timings
-   ioctls. */
-#define	VIDIOC_ENUM_DV_PRESETS	_IOWR('V', 83, struct v4l2_dv_enum_preset)
-#define	VIDIOC_S_DV_PRESET	_IOWR('V', 84, struct v4l2_dv_preset)
-#define	VIDIOC_G_DV_PRESET	_IOWR('V', 85, struct v4l2_dv_preset)
-#define	VIDIOC_QUERY_DV_PRESET	_IOR('V',  86, struct v4l2_dv_preset)
 #define	VIDIOC_S_DV_TIMINGS	_IOWR('V', 87, struct v4l2_dv_timings)
 #define	VIDIOC_G_DV_TIMINGS	_IOWR('V', 88, struct v4l2_dv_timings)
 #define	VIDIOC_DQEVENT		 _IOR('V', 89, struct v4l2_event)
-- 
cgit 


From 79b0c6400517456935f84f8d46c8bb0cf73f1813 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 18 Mar 2013 12:16:34 -0300
Subject: [media] v4l2: add new VIDIOC_DBG_G_CHIP_NAME ioctl

Simplify the debugging ioctls by creating the VIDIOC_DBG_G_CHIP_NAME ioctl.
This will eventually replace VIDIOC_DBG_G_CHIP_IDENT. Chip matching is done
by the name or index of subdevices or an index to a bridge chip. Most of this
can all be done automatically, so most drivers just need to provide get/set
register ops.
In particular, it is now possible to get/set subdev registers without
requiring assistance of the bridge driver.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/v4l2-core/v4l2-common.c |   5 +-
 drivers/media/v4l2-core/v4l2-dev.c    |   5 +-
 drivers/media/v4l2-core/v4l2-ioctl.c  | 115 ++++++++++++++++++++++++++++++++--
 include/media/v4l2-ioctl.h            |   3 +
 include/uapi/linux/videodev2.h        |  34 +++++++---
 5 files changed, 146 insertions(+), 16 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index a9e7703560ad..f8fac9cefc3c 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL(v4l2_ctrl_next);
 int v4l2_chip_match_host(const struct v4l2_dbg_match *match)
 {
 	switch (match->type) {
-	case V4L2_CHIP_MATCH_HOST:
+	case V4L2_CHIP_MATCH_BRIDGE:
 		return match->addr == 0;
 	default:
 		return 0;
@@ -254,6 +254,9 @@ int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match
 		return len && !strncmp(c->driver->driver.name, match->name, len);
 	case V4L2_CHIP_MATCH_I2C_ADDR:
 		return c->addr == match->addr;
+	case V4L2_CHIP_MATCH_SUBDEV_IDX:
+	case V4L2_CHIP_MATCH_SUBDEV_NAME:
+		return 1;
 	default:
 		return 0;
 	}
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 1fbd95755e75..670b9ca8ecbe 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -591,9 +591,10 @@ static void determine_valid_ioctls(struct video_device *vdev)
 	SET_VALID_IOCTL(ops, VIDIOC_G_FREQUENCY, vidioc_g_frequency);
 	SET_VALID_IOCTL(ops, VIDIOC_S_FREQUENCY, vidioc_s_frequency);
 	SET_VALID_IOCTL(ops, VIDIOC_LOG_STATUS, vidioc_log_status);
+	set_bit(_IOC_NR(VIDIOC_DBG_G_CHIP_NAME), valid_ioctls);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	SET_VALID_IOCTL(ops, VIDIOC_DBG_G_REGISTER, vidioc_g_register);
-	SET_VALID_IOCTL(ops, VIDIOC_DBG_S_REGISTER, vidioc_s_register);
+	set_bit(_IOC_NR(VIDIOC_DBG_G_REGISTER), valid_ioctls);
+	set_bit(_IOC_NR(VIDIOC_DBG_S_REGISTER), valid_ioctls);
 #endif
 	SET_VALID_IOCTL(ops, VIDIOC_DBG_G_CHIP_IDENT, vidioc_g_chip_ident);
 	/* yes, really vidioc_subscribe_event */
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 83aa39dfe07c..336ed2dd607c 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -629,7 +629,8 @@ static void v4l_print_dbg_chip_ident(const void *arg, bool write_only)
 	const struct v4l2_dbg_chip_ident *p = arg;
 
 	pr_cont("type=%u, ", p->match.type);
-	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER)
+	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER ||
+	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME)
 		pr_cont("name=%.*s, ",
 				(int)sizeof(p->match.name), p->match.name);
 	else
@@ -638,12 +639,27 @@ static void v4l_print_dbg_chip_ident(const void *arg, bool write_only)
 			p->ident, p->revision);
 }
 
+static void v4l_print_dbg_chip_name(const void *arg, bool write_only)
+{
+	const struct v4l2_dbg_chip_name *p = arg;
+
+	pr_cont("type=%u, ", p->match.type);
+	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER ||
+	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME)
+		pr_cont("name=%.*s, ",
+				(int)sizeof(p->match.name), p->match.name);
+	else
+		pr_cont("addr=%u, ", p->match.addr);
+	pr_cont("name=%.*s\n", (int)sizeof(p->name), p->name);
+}
+
 static void v4l_print_dbg_register(const void *arg, bool write_only)
 {
 	const struct v4l2_dbg_register *p = arg;
 
 	pr_cont("type=%u, ", p->match.type);
-	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER)
+	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER ||
+	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME)
 		pr_cont("name=%.*s, ",
 				(int)sizeof(p->match.name), p->match.name);
 	else
@@ -1775,15 +1791,38 @@ static int v4l_log_status(const struct v4l2_ioctl_ops *ops,
 	return ret;
 }
 
+static bool v4l_dbg_found_match(const struct v4l2_dbg_match *match,
+		struct v4l2_subdev *sd, int idx)
+{
+	if (match->type == V4L2_CHIP_MATCH_SUBDEV_IDX)
+		return match->addr == idx;
+	return !strcmp(match->name, sd->name);
+}
+
 static int v4l_dbg_g_register(const struct v4l2_ioctl_ops *ops,
 				struct file *file, void *fh, void *arg)
 {
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	struct v4l2_dbg_register *p = arg;
+	struct video_device *vfd = video_devdata(file);
+	struct v4l2_subdev *sd;
+	int idx = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	return ops->vidioc_g_register(file, fh, p);
+	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV_IDX ||
+	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME) {
+		if (vfd->v4l2_dev == NULL)
+			return -EINVAL;
+		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev) {
+			if (v4l_dbg_found_match(&p->match, sd, idx++))
+				return v4l2_subdev_call(sd, core, g_register, p);
+		}
+		return -EINVAL;
+	}
+	if (ops->vidioc_g_register)
+		return ops->vidioc_g_register(file, fh, p);
+	return -EINVAL;
 #else
 	return -ENOTTY;
 #endif
@@ -1794,10 +1833,25 @@ static int v4l_dbg_s_register(const struct v4l2_ioctl_ops *ops,
 {
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	const struct v4l2_dbg_register *p = arg;
+	struct video_device *vfd = video_devdata(file);
+	struct v4l2_subdev *sd;
+	int idx = 0;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	return ops->vidioc_s_register(file, fh, p);
+	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV_IDX ||
+	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME) {
+		if (vfd->v4l2_dev == NULL)
+			return -EINVAL;
+		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev) {
+			if (v4l_dbg_found_match(&p->match, sd, idx++))
+				return v4l2_subdev_call(sd, core, s_register, p);
+		}
+		return -EINVAL;
+	}
+	if (ops->vidioc_s_register)
+		return ops->vidioc_s_register(file, fh, p);
+	return -EINVAL;
 #else
 	return -ENOTTY;
 #endif
@@ -1810,9 +1864,61 @@ static int v4l_dbg_g_chip_ident(const struct v4l2_ioctl_ops *ops,
 
 	p->ident = V4L2_IDENT_NONE;
 	p->revision = 0;
+	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME ||
+	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_IDX)
+		return -EINVAL;
 	return ops->vidioc_g_chip_ident(file, fh, p);
 }
 
+static int v4l_dbg_g_chip_name(const struct v4l2_ioctl_ops *ops,
+				struct file *file, void *fh, void *arg)
+{
+	struct video_device *vfd = video_devdata(file);
+	struct v4l2_dbg_chip_name *p = arg;
+	struct v4l2_subdev *sd;
+	int idx = 0;
+
+	switch (p->match.type) {
+	case V4L2_CHIP_MATCH_BRIDGE:
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+		if (ops->vidioc_s_register)
+			p->flags |= V4L2_CHIP_FL_WRITABLE;
+		if (ops->vidioc_g_register)
+			p->flags |= V4L2_CHIP_FL_READABLE;
+#endif
+		if (ops->vidioc_g_chip_name)
+			return ops->vidioc_g_chip_name(file, fh, arg);
+		if (p->match.addr)
+			return -EINVAL;
+		if (vfd->v4l2_dev)
+			strlcpy(p->name, vfd->v4l2_dev->name, sizeof(p->name));
+		else if (vfd->parent)
+			strlcpy(p->name, vfd->parent->driver->name, sizeof(p->name));
+		else
+			strlcpy(p->name, "bridge", sizeof(p->name));
+		return 0;
+
+	case V4L2_CHIP_MATCH_SUBDEV_IDX:
+	case V4L2_CHIP_MATCH_SUBDEV_NAME:
+		if (vfd->v4l2_dev == NULL)
+			break;
+		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev) {
+			if (v4l_dbg_found_match(&p->match, sd, idx++)) {
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+				if (sd->ops->core && sd->ops->core->s_register)
+					p->flags |= V4L2_CHIP_FL_WRITABLE;
+				if (sd->ops->core && sd->ops->core->g_register)
+					p->flags |= V4L2_CHIP_FL_READABLE;
+#endif
+				strlcpy(p->name, sd->name, sizeof(p->name));
+				return 0;
+			}
+		}
+		break;
+	}
+	return -EINVAL;
+}
+
 static int v4l_dqevent(const struct v4l2_ioctl_ops *ops,
 				struct file *file, void *fh, void *arg)
 {
@@ -2027,6 +2133,7 @@ static struct v4l2_ioctl_info v4l2_ioctls[] = {
 	IOCTL_INFO_STD(VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings, v4l_print_dv_timings, 0),
 	IOCTL_INFO_STD(VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap, v4l_print_dv_timings_cap, INFO_FL_CLEAR(v4l2_dv_timings_cap, type)),
 	IOCTL_INFO_FNC(VIDIOC_ENUM_FREQ_BANDS, v4l_enum_freq_bands, v4l_print_freq_band, 0),
+	IOCTL_INFO_FNC(VIDIOC_DBG_G_CHIP_NAME, v4l_dbg_g_chip_name, v4l_print_dbg_chip_name, INFO_FL_CLEAR(v4l2_dbg_chip_name, match)),
 };
 #define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls)
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index e10c2432f902..75a7dfc07dec 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -247,6 +247,9 @@ struct v4l2_ioctl_ops {
 	int (*vidioc_g_chip_ident)     (struct file *file, void *fh,
 					struct v4l2_dbg_chip_ident *chip);
 
+	int (*vidioc_g_chip_name)      (struct file *file, void *fh,
+					struct v4l2_dbg_chip_name *chip);
+
 	int (*vidioc_enum_framesizes)   (struct file *file, void *fh,
 					 struct v4l2_frmsizeenum *fsize);
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 9d57dba27412..e9c49c5e6416 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1807,10 +1807,13 @@ struct v4l2_event_subscription {
 
 /* VIDIOC_DBG_G_REGISTER and VIDIOC_DBG_S_REGISTER */
 
-#define V4L2_CHIP_MATCH_HOST       0  /* Match against chip ID on host (0 for the host) */
-#define V4L2_CHIP_MATCH_I2C_DRIVER 1  /* Match against I2C driver name */
-#define V4L2_CHIP_MATCH_I2C_ADDR   2  /* Match against I2C 7-bit address */
-#define V4L2_CHIP_MATCH_AC97       3  /* Match against anciliary AC97 chip */
+#define V4L2_CHIP_MATCH_BRIDGE      0  /* Match against chip ID on the bridge (0 for the bridge) */
+#define V4L2_CHIP_MATCH_HOST V4L2_CHIP_MATCH_BRIDGE
+#define V4L2_CHIP_MATCH_I2C_DRIVER  1  /* Match against I2C driver name */
+#define V4L2_CHIP_MATCH_I2C_ADDR    2  /* Match against I2C 7-bit address */
+#define V4L2_CHIP_MATCH_AC97        3  /* Match against anciliary AC97 chip */
+#define V4L2_CHIP_MATCH_SUBDEV_NAME 4  /* Match against subdev name */
+#define V4L2_CHIP_MATCH_SUBDEV_IDX  5  /* Match against subdev index */
 
 struct v4l2_dbg_match {
 	__u32 type; /* Match type */
@@ -1834,6 +1837,17 @@ struct v4l2_dbg_chip_ident {
 	__u32 revision;    /* chip revision, chip specific */
 } __attribute__ ((packed));
 
+#define V4L2_CHIP_FL_READABLE (1 << 0)
+#define V4L2_CHIP_FL_WRITABLE (1 << 1)
+
+/* VIDIOC_DBG_G_CHIP_NAME */
+struct v4l2_dbg_chip_name {
+	struct v4l2_dbg_match match;
+	char name[32];
+	__u32 flags;
+	__u32 reserved[8];
+} __attribute__ ((packed));
+
 /**
  * struct v4l2_create_buffers - VIDIOC_CREATE_BUFS argument
  * @index:	on return, index of the first created buffer
@@ -1911,15 +1925,12 @@ struct v4l2_create_buffers {
 #define VIDIOC_G_EXT_CTRLS	_IOWR('V', 71, struct v4l2_ext_controls)
 #define VIDIOC_S_EXT_CTRLS	_IOWR('V', 72, struct v4l2_ext_controls)
 #define VIDIOC_TRY_EXT_CTRLS	_IOWR('V', 73, struct v4l2_ext_controls)
-#if 1
 #define VIDIOC_ENUM_FRAMESIZES	_IOWR('V', 74, struct v4l2_frmsizeenum)
 #define VIDIOC_ENUM_FRAMEINTERVALS _IOWR('V', 75, struct v4l2_frmivalenum)
 #define VIDIOC_G_ENC_INDEX       _IOR('V', 76, struct v4l2_enc_idx)
 #define VIDIOC_ENCODER_CMD      _IOWR('V', 77, struct v4l2_encoder_cmd)
 #define VIDIOC_TRY_ENCODER_CMD  _IOWR('V', 78, struct v4l2_encoder_cmd)
-#endif
 
-#if 1
 /* Experimental, meant for debugging, testing and internal use.
    Only implemented if CONFIG_VIDEO_ADV_DEBUG is defined.
    You must be root to use these ioctls. Never use these in applications! */
@@ -1927,9 +1938,10 @@ struct v4l2_create_buffers {
 #define	VIDIOC_DBG_G_REGISTER 	_IOWR('V', 80, struct v4l2_dbg_register)
 
 /* Experimental, meant for debugging, testing and internal use.
-   Never use this ioctl in applications! */
+   Never use this ioctl in applications!
+   Note: this ioctl is deprecated in favor of VIDIOC_DBG_G_CHIP_NAME and
+   will go away in the future. */
 #define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident)
-#endif
 
 #define VIDIOC_S_HW_FREQ_SEEK	 _IOW('V', 82, struct v4l2_hw_freq_seek)
 
@@ -1963,6 +1975,10 @@ struct v4l2_create_buffers {
    versions. */
 #define VIDIOC_ENUM_FREQ_BANDS	_IOWR('V', 101, struct v4l2_frequency_band)
 
+/* Experimental, meant for debugging, testing and internal use.
+   Never use these in applications! */
+#define VIDIOC_DBG_G_CHIP_NAME  _IOWR('V', 102, struct v4l2_dbg_chip_name)
+
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
 
-- 
cgit 


From 3edce1cf813aa6a087df7730cec0e67d57288300 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Sun, 17 Mar 2013 21:00:06 +0100
Subject: USB: cdc-wdm: implement IOCTL_WDM_MAX_COMMAND
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Userspace applications need to know the maximum supported message
size.

The cdc-wdm driver translates between a character device stream
and a message based protocol.  Each message is transported as a
usb control message with no further encapsulation or syncronization.
Each read or write on the character device should translate to
exactly one usb control message to ensure that message boundaries
are kept intact.  That means that the userspace application must
know the maximum message size supported by the device and driver,
making this size a vital part of the cdc-wdm character device API.

CDC WDM and CDC MBIM functions export the maximum supported
message size through CDC functional descriptors.  The cdc-wdm and
cdc_mbim drivers will parse these descriptors and use the value
chosen by the device.  The only current way for a userspace
application to retrive the value is by duplicating the descriptor
parsing. This is an unnecessary complex task, and application
writers are likely to postpone it, using a fixed value and adding
a "todo" item.

QMI functions have no way to tell the host what message size they
support.  The qmi_wwan driver use a fixed value based on protocol
recommendations and observed device behaviour.  Userspace
applications must know and hard code the same value.  This scheme
will break if we ever encounter a QMI device needing a device
specific message size quirk.  We are currently unable to support
such a device because using a non default size would break the
implicit userspace API.

The message size is currently a hidden attribute of the cdc-wdm
userspace API.  Retrieving it is unnecessarily complex, increasing
the possibility of drivers and applications using different limits.
The resulting errors are hard to debug, and can only be replicated
on identical hardware.

Exporting the maximum message size from the driver simplifies the
task for the userspace application, and creates a unified
information source independent of device and function class. It also
serves to document that the message size is part of the cdc-wdm
userspace API.

This proposed API extension has been presented for the authors of
userspace applications and libraries using the current API: libmbim,
libqmi, uqmi, oFono and ModemManager.  The replies were:

Aleksander Morgado:
 "We do really need max message size for MBIM; and as you say, it may be
  good to have the max message size info also for QMI, so the new ioctl
  seems a good addition. So +1 from my side, for what it's worth."

Dan Williams:
 "Yeah, +1 here.  I'd prefer the sysfs file, but the fact that that
  doesn't work for fd passing pretty much kills it."

No negative replies are so far received.

Cc: Aleksander Morgado <aleksander@lanedo.com>
Cc: Dan Williams <dcbw@redhat.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Oliver Neukum <oliver@neukum.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ioctl/ioctl-number.txt |  1 +
 drivers/usb/class/cdc-wdm.c          | 19 +++++++++++++++++++
 include/linux/usb/cdc-wdm.h          |  2 ++
 include/uapi/linux/usb/cdc-wdm.h     | 21 +++++++++++++++++++++
 4 files changed, 43 insertions(+)
 create mode 100644 include/uapi/linux/usb/cdc-wdm.h

(limited to 'include/uapi/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 3210540f8bd3..237acab169dd 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -131,6 +131,7 @@ Code  Seq#(hex)	Include File		Comments
 'H'	40-4F	sound/hdspm.h		conflict!
 'H'	40-4F	sound/hdsp.h		conflict!
 'H'	90	sound/usb/usx2y/usb_stream.h
+'H'	A0	uapi/linux/usb/cdc-wdm.h
 'H'	C0-F0	net/bluetooth/hci.h	conflict!
 'H'	C0-DF	net/bluetooth/hidp/hidp.h	conflict!
 'H'	C0-DF	net/bluetooth/cmtp/cmtp.h	conflict!
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 122d056d96d5..8a230f0ef77c 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -13,6 +13,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/ioctl.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -644,6 +645,22 @@ static int wdm_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static long wdm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct wdm_device *desc = file->private_data;
+	int rv = 0;
+
+	switch (cmd) {
+	case IOCTL_WDM_MAX_COMMAND:
+		if (copy_to_user((void __user *)arg, &desc->wMaxCommand, sizeof(desc->wMaxCommand)))
+			rv = -EFAULT;
+		break;
+	default:
+		rv = -ENOTTY;
+	}
+	return rv;
+}
+
 static const struct file_operations wdm_fops = {
 	.owner =	THIS_MODULE,
 	.read =		wdm_read,
@@ -652,6 +669,8 @@ static const struct file_operations wdm_fops = {
 	.flush =	wdm_flush,
 	.release =	wdm_release,
 	.poll =		wdm_poll,
+	.unlocked_ioctl = wdm_ioctl,
+	.compat_ioctl = wdm_ioctl,
 	.llseek =	noop_llseek,
 };
 
diff --git a/include/linux/usb/cdc-wdm.h b/include/linux/usb/cdc-wdm.h
index 719c332620fa..0b3f4295c025 100644
--- a/include/linux/usb/cdc-wdm.h
+++ b/include/linux/usb/cdc-wdm.h
@@ -11,6 +11,8 @@
 #ifndef __LINUX_USB_CDC_WDM_H
 #define __LINUX_USB_CDC_WDM_H
 
+#include <uapi/linux/usb/cdc-wdm.h>
+
 extern struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf,
 					struct usb_endpoint_descriptor *ep,
 					int bufsize,
diff --git a/include/uapi/linux/usb/cdc-wdm.h b/include/uapi/linux/usb/cdc-wdm.h
new file mode 100644
index 000000000000..f03134feebd6
--- /dev/null
+++ b/include/uapi/linux/usb/cdc-wdm.h
@@ -0,0 +1,21 @@
+/*
+ * USB CDC Device Management userspace API definitions
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#ifndef _UAPI__LINUX_USB_CDC_WDM_H
+#define _UAPI__LINUX_USB_CDC_WDM_H
+
+/*
+ * This IOCTL is used to retrieve the wMaxCommand for the device,
+ * defining the message limit for both reading and writing.
+ *
+ * For CDC WDM functions this will be the wMaxCommand field of the
+ * Device Management Functional Descriptor.
+ */
+#define IOCTL_WDM_MAX_COMMAND _IOR('H', 0xA0, __u16)
+
+#endif /* _UAPI__LINUX_USB_CDC_WDM_H */
-- 
cgit 


From a88b9ce5ad4fc633b159b37d3ed8af60a4008fbc Mon Sep 17 00:00:00 2001
From: Hong zhi guo <honkiko@gmail.com>
Date: Mon, 25 Mar 2013 19:04:05 +0000
Subject: netlink: remove duplicated NLMSG_ALIGN

NLMSG_HDRLEN is already aligned value. It's for directly reference
without extra alignment.

The redundant alignment here may confuse the API users.

Signed-off-by: Hong Zhiguo <honkiko@gmail.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 78d5b8a546d6..32a354f67ba4 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -78,7 +78,7 @@ struct nlmsghdr {
 #define NLMSG_ALIGNTO	4U
 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
 #define NLMSG_HDRLEN	 ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
-#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
 #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
 #define NLMSG_DATA(nlh)  ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
 #define NLMSG_NEXT(nlh,len)	 ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
-- 
cgit 


From 876d6dcdf26a2e860801ec61195e580d03f7b204 Mon Sep 17 00:00:00 2001
From: Paul Clothier <Paul.Clothier@imgtec.com>
Date: Tue, 19 Mar 2013 12:04:43 +0000
Subject: metag: ptrace: Implement NT_METAG_TLS

Implement functionality to get the TLS pointer for the metag
architecture using regsets.

This provides multi-threaded debug support for GDB.

Signed-off-by: Paul Clothier <Paul.Clothier@imgtec.com>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
---
 arch/metag/kernel/ptrace.c | 34 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/elf.h   |  1 +
 2 files changed, 35 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c
index 47a8828615a5..7563628822bd 100644
--- a/arch/metag/kernel/ptrace.c
+++ b/arch/metag/kernel/ptrace.c
@@ -288,10 +288,36 @@ static int metag_rp_state_set(struct task_struct *target,
 	return metag_rp_state_copyin(regs, pos, count, kbuf, ubuf);
 }
 
+static int metag_tls_get(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			void *kbuf, void __user *ubuf)
+{
+	void __user *tls = target->thread.tls_ptr;
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+}
+
+static int metag_tls_set(struct task_struct *target,
+			const struct user_regset *regset,
+			unsigned int pos, unsigned int count,
+			const void *kbuf, const void __user *ubuf)
+{
+	int ret;
+	void __user *tls;
+
+	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1);
+	if (ret)
+		return ret;
+
+	target->thread.tls_ptr = tls;
+	return ret;
+}
+
 enum metag_regset {
 	REGSET_GENERAL,
 	REGSET_CBUF,
 	REGSET_READPIPE,
+	REGSET_TLS,
 };
 
 static const struct user_regset metag_regsets[] = {
@@ -319,6 +345,14 @@ static const struct user_regset metag_regsets[] = {
 		.get = metag_rp_state_get,
 		.set = metag_rp_state_set,
 	},
+	[REGSET_TLS] = {
+		.core_note_type = NT_METAG_TLS,
+		.n = 1,
+		.size = sizeof(void *),
+		.align = sizeof(void *),
+		.get = metag_tls_get,
+		.set = metag_tls_set,
+	},
 };
 
 static const struct user_regset_view user_metag_view = {
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 8072d352b98f..ef6103bf1f9b 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -397,6 +397,7 @@ typedef struct elf64_shdr {
 #define NT_ARM_HW_WATCH	0x403		/* ARM hardware watchpoint registers */
 #define NT_METAG_CBUF	0x500		/* Metag catch buffer registers */
 #define NT_METAG_RPIPE	0x501		/* Metag read pipeline state */
+#define NT_METAG_TLS	0x502		/* Metag TLS pointer */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit 


From e5c5d22e8dcf7c2d430336cbf8e180bd38e8daf1 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Thu, 28 Mar 2013 13:38:25 +0900
Subject: net: add ETH_P_802_3_MIN

Add a new constant ETH_P_802_3_MIN, the minimum ethernet type for
an 802.3 frame. Frames with a lower value in the ethernet type field
are Ethernet II.

Also update all the users of this value that David Miller and
I could find to use the new constant.

Also correct a bug in util.c. The comparison with ETH_P_802_3_MIN
should be >= not >.

As suggested by Jesse Gross.

Compile tested only.

Cc: David Miller <davem@davemloft.net>
Cc: Jesse Gross <jesse@nicira.com>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Bart De Schuymer <bart.de.schuymer@pandora.be>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Patrick McHardy <kaber@trash.net>
Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: linux-bluetooth@vger.kernel.org
Cc: netfilter-devel@vger.kernel.org
Cc: bridge@lists.linux-foundation.org
Cc: linux-wireless@vger.kernel.org
Cc: linux1394-devel@lists.sourceforge.net
Cc: linux-media@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: dev@openvswitch.org
Acked-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Acked-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/firewire/net.c           |  2 +-
 drivers/isdn/i4l/isdn_net.c      |  2 +-
 drivers/media/dvb-core/dvb_net.c | 10 +++++-----
 drivers/net/ethernet/sun/niu.c   |  2 +-
 drivers/net/plip/plip.c          |  2 +-
 drivers/net/wireless/ray_cs.c    |  2 +-
 include/linux/if_vlan.h          |  2 +-
 include/uapi/linux/if_ether.h    |  3 +++
 net/atm/lec.h                    |  2 +-
 net/bluetooth/bnep/netdev.c      |  2 +-
 net/bridge/netfilter/ebtables.c  |  2 +-
 net/ethernet/eth.c               |  2 +-
 net/mac80211/tx.c                |  2 +-
 net/openvswitch/datapath.c       |  2 +-
 net/openvswitch/flow.c           |  6 +++---
 net/wireless/util.c              |  2 +-
 16 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 56796330a162..4d565365e476 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -547,7 +547,7 @@ static int fwnet_finish_incoming_packet(struct net_device *net,
 			if (memcmp(eth->h_dest, net->dev_addr, net->addr_len))
 				skb->pkt_type = PACKET_OTHERHOST;
 		}
-		if (ntohs(eth->h_proto) >= 1536) {
+		if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN) {
 			protocol = eth->h_proto;
 		} else {
 			rawp = (u16 *)skb->data;
diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c
index babc621a07fb..88d657dff474 100644
--- a/drivers/isdn/i4l/isdn_net.c
+++ b/drivers/isdn/i4l/isdn_net.c
@@ -1385,7 +1385,7 @@ isdn_net_type_trans(struct sk_buff *skb, struct net_device *dev)
 		if (memcmp(eth->h_dest, dev->dev_addr, ETH_ALEN))
 			skb->pkt_type = PACKET_OTHERHOST;
 	}
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		return eth->h_proto;
 
 	rawp = skb->data;
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 44225b186f6d..83a23afb13ab 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -185,7 +185,7 @@ static __be16 dvb_net_eth_type_trans(struct sk_buff *skb,
 			skb->pkt_type=PACKET_MULTICAST;
 	}
 
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		return eth->h_proto;
 
 	rawp = skb->data;
@@ -228,9 +228,9 @@ static int ule_test_sndu( struct dvb_net_priv *p )
 static int ule_bridged_sndu( struct dvb_net_priv *p )
 {
 	struct ethhdr *hdr = (struct ethhdr*) p->ule_next_hdr;
-	if(ntohs(hdr->h_proto) < 1536) {
+	if(ntohs(hdr->h_proto) < ETH_P_802_3_MIN) {
 		int framelen = p->ule_sndu_len - ((p->ule_next_hdr+sizeof(struct ethhdr)) - p->ule_skb->data);
-		/* A frame Type < 1536 for a bridged frame, introduces a LLC Length field. */
+		/* A frame Type < ETH_P_802_3_MIN for a bridged frame, introduces a LLC Length field. */
 		if(framelen != ntohs(hdr->h_proto)) {
 			return -1;
 		}
@@ -320,7 +320,7 @@ static int handle_ule_extensions( struct dvb_net_priv *p )
 			(int) p->ule_sndu_type, l, total_ext_len);
 #endif
 
-	} while (p->ule_sndu_type < 1536);
+	} while (p->ule_sndu_type < ETH_P_802_3_MIN);
 
 	return total_ext_len;
 }
@@ -712,7 +712,7 @@ static void dvb_net_ule( struct net_device *dev, const u8 *buf, size_t buf_len )
 				}
 
 				/* Handle ULE Extension Headers. */
-				if (priv->ule_sndu_type < 1536) {
+				if (priv->ule_sndu_type < ETH_P_802_3_MIN) {
 					/* There is an extension header.  Handle it accordingly. */
 					int l = handle_ule_extensions(priv);
 					if (l < 0) {
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index e4c1c88e4c2a..95cff98d8a34 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6618,7 +6618,7 @@ static u64 niu_compute_tx_flags(struct sk_buff *skb, struct ethhdr *ehdr,
 	       (len << TXHDR_LEN_SHIFT) |
 	       ((l3off / 2) << TXHDR_L3START_SHIFT) |
 	       (ihl << TXHDR_IHL_SHIFT) |
-	       ((eth_proto_inner < 1536) ? TXHDR_LLC : 0) |
+	       ((eth_proto_inner < ETH_P_802_3_MIN) ? TXHDR_LLC : 0) |
 	       ((eth_proto == ETH_P_8021Q) ? TXHDR_VLAN : 0) |
 	       (ipv6 ? TXHDR_IP_VER : 0) |
 	       csum_bits);
diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c
index bed62d9c53c8..1f7bef90b467 100644
--- a/drivers/net/plip/plip.c
+++ b/drivers/net/plip/plip.c
@@ -560,7 +560,7 @@ static __be16 plip_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 *	so don't forget to remove it.
 	 */
 
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		return eth->h_proto;
 
 	rawp = skb->data;
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 4775b5d172d5..ebada812b3a5 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -953,7 +953,7 @@ static int translate_frame(ray_dev_t *local, struct tx_msg __iomem *ptx,
 			   unsigned char *data, int len)
 {
 	__be16 proto = ((struct ethhdr *)data)->h_proto;
-	if (ntohs(proto) >= 1536) { /* DIX II ethernet frame */
+	if (ntohs(proto) >= ETH_P_802_3_MIN) { /* DIX II ethernet frame */
 		pr_debug("ray_cs translate_frame DIX II\n");
 		/* Copy LLC header to card buffer */
 		memcpy_toio(&ptx->var, eth2_llc, sizeof(eth2_llc));
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 218a3b686d90..70962f3fdb79 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -339,7 +339,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb,
 	 */
 
 	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
+	if (ntohs(proto) >= ETH_P_802_3_MIN) {
 		skb->protocol = proto;
 		return;
 	}
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 798032d01112..ade07f1c491a 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -94,6 +94,9 @@
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
 #define ETH_P_AF_IUCV   0xFBFB		/* IBM af_iucv [ NOT AN OFFICIALLY REGISTERED ID ] */
 
+#define ETH_P_802_3_MIN	0x0600		/* If the value in the ethernet type is less than this value
+					 * then the frame is Ethernet II. Else it is 802.3 */
+
 /*
  *	Non DIX types. Won't clash for 1500 types.
  */
diff --git a/net/atm/lec.h b/net/atm/lec.h
index a86aff9a3c04..4149db1b7885 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -58,7 +58,7 @@ struct lane2_ops {
  *    field in h_type field. Data follows immediately after header.
  * 2. LLC Data frames whose total length, including LLC field and data,
  *    but not padding required to meet the minimum data frame length,
- *    is less than 1536(0x0600) MUST be encoded by placing that length
+ *    is less than ETH_P_802_3_MIN MUST be encoded by placing that length
  *    in the h_type field. The LLC field follows header immediately.
  * 3. LLC data frames longer than this maximum MUST be encoded by placing
  *    the value 0 in the h_type field.
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index e58c8b32589c..4b488ec26105 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -136,7 +136,7 @@ static u16 bnep_net_eth_proto(struct sk_buff *skb)
 	struct ethhdr *eh = (void *) skb->data;
 	u16 proto = ntohs(eh->h_proto);
 
-	if (proto >= 1536)
+	if (proto >= ETH_P_802_3_MIN)
 		return proto;
 
 	if (get_unaligned((__be16 *) skb->data) == htons(0xFFFF))
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 8d493c91a562..3d110c4fc787 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -138,7 +138,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 		ethproto = h->h_proto;
 
 	if (e->bitmask & EBT_802_3) {
-		if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
+		if (FWINV2(ntohs(ethproto) >= ETH_P_802_3_MIN, EBT_IPROTO))
 			return 1;
 	} else if (!(e->bitmask & EBT_NOPROTO) &&
 	   FWINV2(e->ethproto != ethproto, EBT_IPROTO))
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index a36c85eab5b4..5359560926bc 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -195,7 +195,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	if (netdev_uses_trailer_tags(dev))
 		return htons(ETH_P_TRAILER);
 
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		return eth->h_proto;
 
 	/*
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 8914d2d2881a..4e8a86163fc7 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2085,7 +2085,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		encaps_data = bridge_tunnel_header;
 		encaps_len = sizeof(bridge_tunnel_header);
 		skip_header_bytes -= 2;
-	} else if (ethertype >= 0x600) {
+	} else if (ethertype >= ETH_P_802_3_MIN) {
 		encaps_data = rfc1042_header;
 		encaps_len = sizeof(rfc1042_header);
 		skip_header_bytes -= 2;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index d61cd9971808..8759265a3e46 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -681,7 +681,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 	/* Normally, setting the skb 'protocol' field would be handled by a
 	 * call to eth_type_trans(), but it assumes there's a sending
 	 * device, which we may not have. */
-	if (ntohs(eth->h_proto) >= 1536)
+	if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
 		packet->protocol = eth->h_proto;
 	else
 		packet->protocol = htons(ETH_P_802_2);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index fe0e4215c73d..332486839347 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -466,7 +466,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 	proto = *(__be16 *) skb->data;
 	__skb_pull(skb, sizeof(__be16));
 
-	if (ntohs(proto) >= 1536)
+	if (ntohs(proto) >= ETH_P_802_3_MIN)
 		return proto;
 
 	if (skb->len < sizeof(struct llc_snap_hdr))
@@ -483,7 +483,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 
 	__skb_pull(skb, sizeof(struct llc_snap_hdr));
 
-	if (ntohs(llc->ethertype) >= 1536)
+	if (ntohs(llc->ethertype) >= ETH_P_802_3_MIN)
 		return llc->ethertype;
 
 	return htons(ETH_P_802_2);
@@ -1038,7 +1038,7 @@ int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
 
 	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
 		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
-		if (ntohs(swkey->eth.type) < 1536)
+		if (ntohs(swkey->eth.type) < ETH_P_802_3_MIN)
 			return -EINVAL;
 		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
 	} else {
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 37a56ee1e1ed..6cbac99ae03d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -511,7 +511,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
 		encaps_data = bridge_tunnel_header;
 		encaps_len = sizeof(bridge_tunnel_header);
 		skip_header_bytes -= 2;
-	} else if (ethertype > 0x600) {
+	} else if (ethertype >= ETH_P_802_3_MIN) {
 		encaps_data = rfc1042_header;
 		encaps_len = sizeof(rfc1042_header);
 		skip_header_bytes -= 2;
-- 
cgit 


From aec330a8cbd373d78c327201ec28fd5f252f3846 Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Tue, 26 Mar 2013 22:47:23 -0300
Subject: [media] v4l2: Add standard controls for FM receivers

This commit introduces new class of standard controls
V4L2_CTRL_CLASS_FM_RX. This class is intended to all controls
pertaining to FM receiver chips. Also, two controls belonging to said
class are added as a part of this commit: V4L2_CID_TUNE_DEEMPHASIS and
V4L2_CID_RDS_RECEPTION.
This patch is based on the code found in the patch by Manjunatha Halli [1]
[1] http://lists-archives.com/linux-kernel/27641307-new-control-class-and-features-for-fm-rx.html

Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/v4l2-core/v4l2-ctrls.c | 14 +++++++++++---
 include/uapi/linux/v4l2-controls.h   | 13 +++++++++++++
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index f662df3bfe2d..ec89fd16361c 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -297,8 +297,8 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"Text",
 		NULL
 	};
-	static const char * const tune_preemphasis[] = {
-		"No Preemphasis",
+	static const char * const tune_emphasis[] = {
+		"None",
 		"50 Microseconds",
 		"75 Microseconds",
 		NULL,
@@ -508,7 +508,9 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 	case V4L2_CID_SCENE_MODE:
 		return scene_mode;
 	case V4L2_CID_TUNE_PREEMPHASIS:
-		return tune_preemphasis;
+		return tune_emphasis;
+	case V4L2_CID_TUNE_DEEMPHASIS:
+		return tune_emphasis;
 	case V4L2_CID_FLASH_LED_MODE:
 		return flash_led_mode;
 	case V4L2_CID_FLASH_STROBE_SOURCE:
@@ -800,6 +802,9 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_DV_RX_POWER_PRESENT:	return "Power Present";
 	case V4L2_CID_DV_RX_RGB_RANGE:		return "Rx RGB Quantization Range";
 
+	case V4L2_CID_FM_RX_CLASS:		return "FM Radio Receiver Controls";
+	case V4L2_CID_TUNE_DEEMPHASIS:		return "De-Emphasis";
+	case V4L2_CID_RDS_RECEPTION:		return "RDS Reception";
 	default:
 		return NULL;
 	}
@@ -848,6 +853,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER:
 	case V4L2_CID_WIDE_DYNAMIC_RANGE:
 	case V4L2_CID_IMAGE_STABILIZATION:
+	case V4L2_CID_RDS_RECEPTION:
 		*type = V4L2_CTRL_TYPE_BOOLEAN;
 		*min = 0;
 		*max = *step = 1;
@@ -906,6 +912,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_DV_TX_RGB_RANGE:
 	case V4L2_CID_DV_RX_RGB_RANGE:
 	case V4L2_CID_TEST_PATTERN:
+	case V4L2_CID_TUNE_DEEMPHASIS:
 		*type = V4L2_CTRL_TYPE_MENU;
 		break;
 	case V4L2_CID_LINK_FREQ:
@@ -928,6 +935,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type,
 	case V4L2_CID_IMAGE_SOURCE_CLASS:
 	case V4L2_CID_IMAGE_PROC_CLASS:
 	case V4L2_CID_DV_CLASS:
+	case V4L2_CID_FM_RX_CLASS:
 		*type = V4L2_CTRL_TYPE_CTRL_CLASS;
 		/* You can neither read not write these */
 		*flags |= V4L2_CTRL_FLAG_READ_ONLY | V4L2_CTRL_FLAG_WRITE_ONLY;
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 844dc0205037..7d2604877a73 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -59,6 +59,7 @@
 #define V4L2_CTRL_CLASS_IMAGE_SOURCE	0x009e0000	/* Image source controls */
 #define V4L2_CTRL_CLASS_IMAGE_PROC	0x009f0000	/* Image processing controls */
 #define V4L2_CTRL_CLASS_DV		0x00a00000	/* Digital Video controls */
+#define V4L2_CTRL_CLASS_FM_RX		0x00a10000	/* Digital Video controls */
 
 /* User-class control IDs */
 
@@ -835,4 +836,16 @@ enum v4l2_dv_rgb_range {
 #define	V4L2_CID_DV_RX_POWER_PRESENT		(V4L2_CID_DV_CLASS_BASE + 100)
 #define V4L2_CID_DV_RX_RGB_RANGE		(V4L2_CID_DV_CLASS_BASE + 101)
 
+#define V4L2_CID_FM_RX_CLASS_BASE		(V4L2_CTRL_CLASS_FM_RX | 0x900)
+#define V4L2_CID_FM_RX_CLASS			(V4L2_CTRL_CLASS_FM_RX | 1)
+
+#define V4L2_CID_TUNE_DEEMPHASIS		(V4L2_CID_FM_RX_CLASS_BASE + 1)
+enum v4l2_deemphasis {
+	V4L2_DEEMPHASIS_DISABLED	= V4L2_PREEMPHASIS_DISABLED,
+	V4L2_DEEMPHASIS_50_uS		= V4L2_PREEMPHASIS_50_uS,
+	V4L2_DEEMPHASIS_75_uS		= V4L2_PREEMPHASIS_75_uS,
+};
+
+#define V4L2_CID_RDS_RECEPTION			(V4L2_CID_FM_RX_CLASS_BASE + 2)
+
 #endif
-- 
cgit 


From 33a80fc2eb43afbc25f04cc1d5ed899da213c21f Mon Sep 17 00:00:00 2001
From: Andrey Smirnov <andrew.smirnov@gmail.com>
Date: Tue, 26 Mar 2013 22:47:25 -0300
Subject: [media] v4l2: Add private controls base for SI476X

Add a base to be used for allocation of all the SI476X specific
controls in the corresponding driver.

Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/v4l2-controls.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 7d2604877a73..910d7cd3d9f4 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -156,6 +156,10 @@ enum v4l2_colorfx {
  * We reserve 8 controls for this driver. */
 #define V4L2_CID_USER_S2255_BASE		(V4L2_CID_USER_BASE + 0x1010)
 
+/* The base for the si476x driver controls. See include/media/si476x.h for the list
+ * of controls. Total of 16 controls is reserved for that driver */
+#define V4L2_CID_USER_SI476X_BASE		(V4L2_CID_USER_BASE + 0x1010)
+
 /* MPEG-class control IDs */
 
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit 


From 2cb5972da8ee95c58db3375c288563f3b75bc61c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 29 Mar 2013 10:14:32 -0300
Subject: [media] v4l2-controls.h: update private control ranges to prevent
 overlap

These ranges shouldn't overlap.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/v4l2-controls.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 910d7cd3d9f4..7da22cec30cd 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -153,12 +153,12 @@ enum v4l2_colorfx {
 
 
 /* The base for the s2255 driver controls.
- * We reserve 8 controls for this driver. */
-#define V4L2_CID_USER_S2255_BASE		(V4L2_CID_USER_BASE + 0x1010)
+ * We reserve 16 controls for this driver. */
+#define V4L2_CID_USER_S2255_BASE		(V4L2_CID_USER_BASE + 0x1030)
 
 /* The base for the si476x driver controls. See include/media/si476x.h for the list
- * of controls. Total of 16 controls is reserved for that driver */
-#define V4L2_CID_USER_SI476X_BASE		(V4L2_CID_USER_BASE + 0x1010)
+ * of controls. Total of 16 controls is reserved for this driver */
+#define V4L2_CID_USER_SI476X_BASE		(V4L2_CID_USER_BASE + 0x1040)
 
 /* MPEG-class control IDs */
 
-- 
cgit 


From 22e3880a76bb9a0c4fa5c8fefdc8697a36a4dae1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 29 Mar 2013 14:46:52 +0100
Subject: openvswitch: Expose <linux/openvswitch.h> to userspace

It contains the public netlink interface bits required by userspace to
make use of the interface.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/openvswitch.h      | 433 +------------------------------------
 include/uapi/linux/Kbuild        |   1 +
 include/uapi/linux/openvswitch.h | 456 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 458 insertions(+), 432 deletions(-)
 create mode 100644 include/uapi/linux/openvswitch.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
index 8b9d7217eddc..e6b240b6196c 100644
--- a/include/linux/openvswitch.h
+++ b/include/linux/openvswitch.h
@@ -19,437 +19,6 @@
 #ifndef _LINUX_OPENVSWITCH_H
 #define _LINUX_OPENVSWITCH_H 1
 
-#include <linux/types.h>
-#include <linux/if_ether.h>
-
-/**
- * struct ovs_header - header for OVS Generic Netlink messages.
- * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
- * specific to a datapath).
- *
- * Attributes following the header are specific to a particular OVS Generic
- * Netlink family, but all of the OVS families use this header.
- */
-
-struct ovs_header {
-	int dp_ifindex;
-};
-
-/* Datapaths. */
-
-#define OVS_DATAPATH_FAMILY  "ovs_datapath"
-#define OVS_DATAPATH_MCGROUP "ovs_datapath"
-#define OVS_DATAPATH_VERSION 0x1
-
-enum ovs_datapath_cmd {
-	OVS_DP_CMD_UNSPEC,
-	OVS_DP_CMD_NEW,
-	OVS_DP_CMD_DEL,
-	OVS_DP_CMD_GET,
-	OVS_DP_CMD_SET
-};
-
-/**
- * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
- * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
- * port".  This is the name of the network device whose dp_ifindex is given in
- * the &struct ovs_header.  Always present in notifications.  Required in
- * %OVS_DP_NEW requests.  May be used as an alternative to specifying
- * dp_ifindex in other requests (with a dp_ifindex of 0).
- * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
- * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
- * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
- * not be sent.
- * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
- * datapath.  Always present in notifications.
- *
- * These attributes follow the &struct ovs_header within the Generic Netlink
- * payload for %OVS_DP_* commands.
- */
-enum ovs_datapath_attr {
-	OVS_DP_ATTR_UNSPEC,
-	OVS_DP_ATTR_NAME,       /* name of dp_ifindex netdev */
-	OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
-	OVS_DP_ATTR_STATS,      /* struct ovs_dp_stats */
-	__OVS_DP_ATTR_MAX
-};
-
-#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
-
-struct ovs_dp_stats {
-	__u64 n_hit;             /* Number of flow table matches. */
-	__u64 n_missed;          /* Number of flow table misses. */
-	__u64 n_lost;            /* Number of misses not sent to userspace. */
-	__u64 n_flows;           /* Number of flows present */
-};
-
-struct ovs_vport_stats {
-	__u64   rx_packets;		/* total packets received       */
-	__u64   tx_packets;		/* total packets transmitted    */
-	__u64   rx_bytes;		/* total bytes received         */
-	__u64   tx_bytes;		/* total bytes transmitted      */
-	__u64   rx_errors;		/* bad packets received         */
-	__u64   tx_errors;		/* packet transmit problems     */
-	__u64   rx_dropped;		/* no space in linux buffers    */
-	__u64   tx_dropped;		/* no space available in linux  */
-};
-
-/* Fixed logical ports. */
-#define OVSP_LOCAL      ((__u32)0)
-
-/* Packet transfer. */
-
-#define OVS_PACKET_FAMILY "ovs_packet"
-#define OVS_PACKET_VERSION 0x1
-
-enum ovs_packet_cmd {
-	OVS_PACKET_CMD_UNSPEC,
-
-	/* Kernel-to-user notifications. */
-	OVS_PACKET_CMD_MISS,    /* Flow table miss. */
-	OVS_PACKET_CMD_ACTION,  /* OVS_ACTION_ATTR_USERSPACE action. */
-
-	/* Userspace commands. */
-	OVS_PACKET_CMD_EXECUTE  /* Apply actions to a packet. */
-};
-
-/**
- * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
- * @OVS_PACKET_ATTR_PACKET: Present for all notifications.  Contains the entire
- * packet as received, from the start of the Ethernet header onward.  For
- * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
- * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
- * the flow key extracted from the packet as originally received.
- * @OVS_PACKET_ATTR_KEY: Present for all notifications.  Contains the flow key
- * extracted from the packet as nested %OVS_KEY_ATTR_* attributes.  This allows
- * userspace to adapt its flow setup strategy by comparing its notion of the
- * flow key against the kernel's.
- * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet.  Used
- * for %OVS_PACKET_CMD_EXECUTE.  It has nested %OVS_ACTION_ATTR_* attributes.
- * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
- * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
- * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
- * specified there.
- *
- * These attributes follow the &struct ovs_header within the Generic Netlink
- * payload for %OVS_PACKET_* commands.
- */
-enum ovs_packet_attr {
-	OVS_PACKET_ATTR_UNSPEC,
-	OVS_PACKET_ATTR_PACKET,      /* Packet data. */
-	OVS_PACKET_ATTR_KEY,         /* Nested OVS_KEY_ATTR_* attributes. */
-	OVS_PACKET_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
-	OVS_PACKET_ATTR_USERDATA,    /* OVS_ACTION_ATTR_USERSPACE arg. */
-	__OVS_PACKET_ATTR_MAX
-};
-
-#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
-
-/* Virtual ports. */
-
-#define OVS_VPORT_FAMILY  "ovs_vport"
-#define OVS_VPORT_MCGROUP "ovs_vport"
-#define OVS_VPORT_VERSION 0x1
-
-enum ovs_vport_cmd {
-	OVS_VPORT_CMD_UNSPEC,
-	OVS_VPORT_CMD_NEW,
-	OVS_VPORT_CMD_DEL,
-	OVS_VPORT_CMD_GET,
-	OVS_VPORT_CMD_SET
-};
-
-enum ovs_vport_type {
-	OVS_VPORT_TYPE_UNSPEC,
-	OVS_VPORT_TYPE_NETDEV,   /* network device */
-	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
-	__OVS_VPORT_TYPE_MAX
-};
-
-#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
-
-/**
- * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
- * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
- * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
- * of vport.
- * @OVS_VPORT_ATTR_NAME: Name of vport.  For a vport based on a network device
- * this is the name of the network device.  Maximum length %IFNAMSIZ-1 bytes
- * plus a null terminator.
- * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
- * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
- * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
- * this port.  A value of zero indicates that upcalls should not be sent.
- * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
- * packets sent or received through the vport.
- *
- * These attributes follow the &struct ovs_header within the Generic Netlink
- * payload for %OVS_VPORT_* commands.
- *
- * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
- * %OVS_VPORT_ATTR_NAME attributes are required.  %OVS_VPORT_ATTR_PORT_NO is
- * optional; if not specified a free port number is automatically selected.
- * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
- * of vport.
- * and other attributes are ignored.
- *
- * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
- * look up the vport to operate on; otherwise dp_idx from the &struct
- * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
- */
-enum ovs_vport_attr {
-	OVS_VPORT_ATTR_UNSPEC,
-	OVS_VPORT_ATTR_PORT_NO,	/* u32 port number within datapath */
-	OVS_VPORT_ATTR_TYPE,	/* u32 OVS_VPORT_TYPE_* constant. */
-	OVS_VPORT_ATTR_NAME,	/* string name, up to IFNAMSIZ bytes long */
-	OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
-	OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
-	OVS_VPORT_ATTR_STATS,	/* struct ovs_vport_stats */
-	__OVS_VPORT_ATTR_MAX
-};
-
-#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
-
-/* Flows. */
-
-#define OVS_FLOW_FAMILY  "ovs_flow"
-#define OVS_FLOW_MCGROUP "ovs_flow"
-#define OVS_FLOW_VERSION 0x1
-
-enum ovs_flow_cmd {
-	OVS_FLOW_CMD_UNSPEC,
-	OVS_FLOW_CMD_NEW,
-	OVS_FLOW_CMD_DEL,
-	OVS_FLOW_CMD_GET,
-	OVS_FLOW_CMD_SET
-};
-
-struct ovs_flow_stats {
-	__u64 n_packets;         /* Number of matched packets. */
-	__u64 n_bytes;           /* Number of matched bytes. */
-};
-
-enum ovs_key_attr {
-	OVS_KEY_ATTR_UNSPEC,
-	OVS_KEY_ATTR_ENCAP,	/* Nested set of encapsulated attributes. */
-	OVS_KEY_ATTR_PRIORITY,  /* u32 skb->priority */
-	OVS_KEY_ATTR_IN_PORT,   /* u32 OVS dp port number */
-	OVS_KEY_ATTR_ETHERNET,  /* struct ovs_key_ethernet */
-	OVS_KEY_ATTR_VLAN,	/* be16 VLAN TCI */
-	OVS_KEY_ATTR_ETHERTYPE,	/* be16 Ethernet type */
-	OVS_KEY_ATTR_IPV4,      /* struct ovs_key_ipv4 */
-	OVS_KEY_ATTR_IPV6,      /* struct ovs_key_ipv6 */
-	OVS_KEY_ATTR_TCP,       /* struct ovs_key_tcp */
-	OVS_KEY_ATTR_UDP,       /* struct ovs_key_udp */
-	OVS_KEY_ATTR_ICMP,      /* struct ovs_key_icmp */
-	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
-	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
-	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
-	OVS_KEY_ATTR_SKB_MARK,  /* u32 skb mark */
-	__OVS_KEY_ATTR_MAX
-};
-
-#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
-
-/**
- * enum ovs_frag_type - IPv4 and IPv6 fragment type
- * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
- * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
- * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
- *
- * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
- * ovs_key_ipv6.
- */
-enum ovs_frag_type {
-	OVS_FRAG_TYPE_NONE,
-	OVS_FRAG_TYPE_FIRST,
-	OVS_FRAG_TYPE_LATER,
-	__OVS_FRAG_TYPE_MAX
-};
-
-#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
-
-struct ovs_key_ethernet {
-	__u8	 eth_src[ETH_ALEN];
-	__u8	 eth_dst[ETH_ALEN];
-};
-
-struct ovs_key_ipv4 {
-	__be32 ipv4_src;
-	__be32 ipv4_dst;
-	__u8   ipv4_proto;
-	__u8   ipv4_tos;
-	__u8   ipv4_ttl;
-	__u8   ipv4_frag;	/* One of OVS_FRAG_TYPE_*. */
-};
-
-struct ovs_key_ipv6 {
-	__be32 ipv6_src[4];
-	__be32 ipv6_dst[4];
-	__be32 ipv6_label;	/* 20-bits in least-significant bits. */
-	__u8   ipv6_proto;
-	__u8   ipv6_tclass;
-	__u8   ipv6_hlimit;
-	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
-};
-
-struct ovs_key_tcp {
-	__be16 tcp_src;
-	__be16 tcp_dst;
-};
-
-struct ovs_key_udp {
-	__be16 udp_src;
-	__be16 udp_dst;
-};
-
-struct ovs_key_icmp {
-	__u8 icmp_type;
-	__u8 icmp_code;
-};
-
-struct ovs_key_icmpv6 {
-	__u8 icmpv6_type;
-	__u8 icmpv6_code;
-};
-
-struct ovs_key_arp {
-	__be32 arp_sip;
-	__be32 arp_tip;
-	__be16 arp_op;
-	__u8   arp_sha[ETH_ALEN];
-	__u8   arp_tha[ETH_ALEN];
-};
-
-struct ovs_key_nd {
-	__u32 nd_target[4];
-	__u8  nd_sll[ETH_ALEN];
-	__u8  nd_tll[ETH_ALEN];
-};
-
-/**
- * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
- * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
- * key.  Always present in notifications.  Required for all requests (except
- * dumps).
- * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
- * the actions to take for packets that match the key.  Always present in
- * notifications.  Required for %OVS_FLOW_CMD_NEW requests, optional for
- * %OVS_FLOW_CMD_SET requests.
- * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
- * flow.  Present in notifications if the stats would be nonzero.  Ignored in
- * requests.
- * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
- * TCP flags seen on packets in this flow.  Only present in notifications for
- * TCP flows, and only if it would be nonzero.  Ignored in requests.
- * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
- * the system monotonic clock, at which a packet was last processed for this
- * flow.  Only present in notifications if a packet has been processed for this
- * flow.  Ignored in requests.
- * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
- * last-used time, accumulated TCP flags, and statistics for this flow.
- * Otherwise ignored in requests.  Never present in notifications.
- *
- * These attributes follow the &struct ovs_header within the Generic Netlink
- * payload for %OVS_FLOW_* commands.
- */
-enum ovs_flow_attr {
-	OVS_FLOW_ATTR_UNSPEC,
-	OVS_FLOW_ATTR_KEY,       /* Sequence of OVS_KEY_ATTR_* attributes. */
-	OVS_FLOW_ATTR_ACTIONS,   /* Nested OVS_ACTION_ATTR_* attributes. */
-	OVS_FLOW_ATTR_STATS,     /* struct ovs_flow_stats. */
-	OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
-	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
-	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
-	__OVS_FLOW_ATTR_MAX
-};
-
-#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
-
-/**
- * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
- * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
- * @OVS_ACTION_ATTR_SAMPLE.  A value of 0 samples no packets, a value of
- * %UINT32_MAX samples all packets and intermediate values sample intermediate
- * fractions of packets.
- * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
- * Actions are passed as nested attributes.
- *
- * Executes the specified actions with the given probability on a per-packet
- * basis.
- */
-enum ovs_sample_attr {
-	OVS_SAMPLE_ATTR_UNSPEC,
-	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
-	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
-	__OVS_SAMPLE_ATTR_MAX,
-};
-
-#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
-
-/**
- * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
- * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
- * message should be sent.  Required.
- * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
- * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
- */
-enum ovs_userspace_attr {
-	OVS_USERSPACE_ATTR_UNSPEC,
-	OVS_USERSPACE_ATTR_PID,	      /* u32 Netlink PID to receive upcalls. */
-	OVS_USERSPACE_ATTR_USERDATA,  /* Optional user-specified cookie. */
-	__OVS_USERSPACE_ATTR_MAX
-};
-
-#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
-
-/**
- * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
- * @vlan_tpid: Tag protocol identifier (TPID) to push.
- * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
- * (but it will not be set in the 802.1Q header that is pushed).
- *
- * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
- * values are those that the kernel module also parses as 802.1Q headers, to
- * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
- * from having surprising results.
- */
-struct ovs_action_push_vlan {
-	__be16 vlan_tpid;	/* 802.1Q TPID. */
-	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
-};
-
-/**
- * enum ovs_action_attr - Action types.
- *
- * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
- * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
- * %OVS_USERSPACE_ATTR_* attributes.
- * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
- * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
- * value.
- * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
- * packet.
- * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
- * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
- * the nested %OVS_SAMPLE_ATTR_* attributes.
- *
- * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
- * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
- * type may not be changed.
- */
-
-enum ovs_action_attr {
-	OVS_ACTION_ATTR_UNSPEC,
-	OVS_ACTION_ATTR_OUTPUT,	      /* u32 port number. */
-	OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
-	OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
-	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
-	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
-	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
-	__OVS_ACTION_ATTR_MAX
-};
-
-#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+#include <uapi/linux/openvswitch.h>
 
 #endif /* _LINUX_OPENVSWITCH_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 5c8a1d25e21c..d8fbc6aeac86 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -285,6 +285,7 @@ header-y += nvram.h
 header-y += omap3isp.h
 header-y += omapfb.h
 header-y += oom.h
+header-y += openvswitch.h
 header-y += packet_diag.h
 header-y += param.h
 header-y += parport.h
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
new file mode 100644
index 000000000000..405918dd7b3f
--- /dev/null
+++ b/include/uapi/linux/openvswitch.h
@@ -0,0 +1,456 @@
+
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _UAPI__LINUX_OPENVSWITCH_H
+#define _UAPI__LINUX_OPENVSWITCH_H 1
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+/**
+ * struct ovs_header - header for OVS Generic Netlink messages.
+ * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
+ * specific to a datapath).
+ *
+ * Attributes following the header are specific to a particular OVS Generic
+ * Netlink family, but all of the OVS families use this header.
+ */
+
+struct ovs_header {
+	int dp_ifindex;
+};
+
+/* Datapaths. */
+
+#define OVS_DATAPATH_FAMILY  "ovs_datapath"
+#define OVS_DATAPATH_MCGROUP "ovs_datapath"
+#define OVS_DATAPATH_VERSION 0x1
+
+enum ovs_datapath_cmd {
+	OVS_DP_CMD_UNSPEC,
+	OVS_DP_CMD_NEW,
+	OVS_DP_CMD_DEL,
+	OVS_DP_CMD_GET,
+	OVS_DP_CMD_SET
+};
+
+/**
+ * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
+ * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
+ * port".  This is the name of the network device whose dp_ifindex is given in
+ * the &struct ovs_header.  Always present in notifications.  Required in
+ * %OVS_DP_NEW requests.  May be used as an alternative to specifying
+ * dp_ifindex in other requests (with a dp_ifindex of 0).
+ * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
+ * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
+ * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
+ * not be sent.
+ * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
+ * datapath.  Always present in notifications.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_DP_* commands.
+ */
+enum ovs_datapath_attr {
+	OVS_DP_ATTR_UNSPEC,
+	OVS_DP_ATTR_NAME,       /* name of dp_ifindex netdev */
+	OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
+	OVS_DP_ATTR_STATS,      /* struct ovs_dp_stats */
+	__OVS_DP_ATTR_MAX
+};
+
+#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
+
+struct ovs_dp_stats {
+	__u64 n_hit;             /* Number of flow table matches. */
+	__u64 n_missed;          /* Number of flow table misses. */
+	__u64 n_lost;            /* Number of misses not sent to userspace. */
+	__u64 n_flows;           /* Number of flows present */
+};
+
+struct ovs_vport_stats {
+	__u64   rx_packets;		/* total packets received       */
+	__u64   tx_packets;		/* total packets transmitted    */
+	__u64   rx_bytes;		/* total bytes received         */
+	__u64   tx_bytes;		/* total bytes transmitted      */
+	__u64   rx_errors;		/* bad packets received         */
+	__u64   tx_errors;		/* packet transmit problems     */
+	__u64   rx_dropped;		/* no space in linux buffers    */
+	__u64   tx_dropped;		/* no space available in linux  */
+};
+
+/* Fixed logical ports. */
+#define OVSP_LOCAL      ((__u32)0)
+
+/* Packet transfer. */
+
+#define OVS_PACKET_FAMILY "ovs_packet"
+#define OVS_PACKET_VERSION 0x1
+
+enum ovs_packet_cmd {
+	OVS_PACKET_CMD_UNSPEC,
+
+	/* Kernel-to-user notifications. */
+	OVS_PACKET_CMD_MISS,    /* Flow table miss. */
+	OVS_PACKET_CMD_ACTION,  /* OVS_ACTION_ATTR_USERSPACE action. */
+
+	/* Userspace commands. */
+	OVS_PACKET_CMD_EXECUTE  /* Apply actions to a packet. */
+};
+
+/**
+ * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
+ * @OVS_PACKET_ATTR_PACKET: Present for all notifications.  Contains the entire
+ * packet as received, from the start of the Ethernet header onward.  For
+ * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
+ * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
+ * the flow key extracted from the packet as originally received.
+ * @OVS_PACKET_ATTR_KEY: Present for all notifications.  Contains the flow key
+ * extracted from the packet as nested %OVS_KEY_ATTR_* attributes.  This allows
+ * userspace to adapt its flow setup strategy by comparing its notion of the
+ * flow key against the kernel's.
+ * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet.  Used
+ * for %OVS_PACKET_CMD_EXECUTE.  It has nested %OVS_ACTION_ATTR_* attributes.
+ * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
+ * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
+ * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
+ * specified there.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_PACKET_* commands.
+ */
+enum ovs_packet_attr {
+	OVS_PACKET_ATTR_UNSPEC,
+	OVS_PACKET_ATTR_PACKET,      /* Packet data. */
+	OVS_PACKET_ATTR_KEY,         /* Nested OVS_KEY_ATTR_* attributes. */
+	OVS_PACKET_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_PACKET_ATTR_USERDATA,    /* OVS_ACTION_ATTR_USERSPACE arg. */
+	__OVS_PACKET_ATTR_MAX
+};
+
+#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
+
+/* Virtual ports. */
+
+#define OVS_VPORT_FAMILY  "ovs_vport"
+#define OVS_VPORT_MCGROUP "ovs_vport"
+#define OVS_VPORT_VERSION 0x1
+
+enum ovs_vport_cmd {
+	OVS_VPORT_CMD_UNSPEC,
+	OVS_VPORT_CMD_NEW,
+	OVS_VPORT_CMD_DEL,
+	OVS_VPORT_CMD_GET,
+	OVS_VPORT_CMD_SET
+};
+
+enum ovs_vport_type {
+	OVS_VPORT_TYPE_UNSPEC,
+	OVS_VPORT_TYPE_NETDEV,   /* network device */
+	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
+	__OVS_VPORT_TYPE_MAX
+};
+
+#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
+
+/**
+ * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
+ * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
+ * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
+ * of vport.
+ * @OVS_VPORT_ATTR_NAME: Name of vport.  For a vport based on a network device
+ * this is the name of the network device.  Maximum length %IFNAMSIZ-1 bytes
+ * plus a null terminator.
+ * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
+ * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
+ * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
+ * this port.  A value of zero indicates that upcalls should not be sent.
+ * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
+ * packets sent or received through the vport.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_VPORT_* commands.
+ *
+ * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
+ * %OVS_VPORT_ATTR_NAME attributes are required.  %OVS_VPORT_ATTR_PORT_NO is
+ * optional; if not specified a free port number is automatically selected.
+ * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
+ * of vport.
+ * and other attributes are ignored.
+ *
+ * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
+ * look up the vport to operate on; otherwise dp_idx from the &struct
+ * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
+ */
+enum ovs_vport_attr {
+	OVS_VPORT_ATTR_UNSPEC,
+	OVS_VPORT_ATTR_PORT_NO,	/* u32 port number within datapath */
+	OVS_VPORT_ATTR_TYPE,	/* u32 OVS_VPORT_TYPE_* constant. */
+	OVS_VPORT_ATTR_NAME,	/* string name, up to IFNAMSIZ bytes long */
+	OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
+	OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
+	OVS_VPORT_ATTR_STATS,	/* struct ovs_vport_stats */
+	__OVS_VPORT_ATTR_MAX
+};
+
+#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
+
+/* Flows. */
+
+#define OVS_FLOW_FAMILY  "ovs_flow"
+#define OVS_FLOW_MCGROUP "ovs_flow"
+#define OVS_FLOW_VERSION 0x1
+
+enum ovs_flow_cmd {
+	OVS_FLOW_CMD_UNSPEC,
+	OVS_FLOW_CMD_NEW,
+	OVS_FLOW_CMD_DEL,
+	OVS_FLOW_CMD_GET,
+	OVS_FLOW_CMD_SET
+};
+
+struct ovs_flow_stats {
+	__u64 n_packets;         /* Number of matched packets. */
+	__u64 n_bytes;           /* Number of matched bytes. */
+};
+
+enum ovs_key_attr {
+	OVS_KEY_ATTR_UNSPEC,
+	OVS_KEY_ATTR_ENCAP,	/* Nested set of encapsulated attributes. */
+	OVS_KEY_ATTR_PRIORITY,  /* u32 skb->priority */
+	OVS_KEY_ATTR_IN_PORT,   /* u32 OVS dp port number */
+	OVS_KEY_ATTR_ETHERNET,  /* struct ovs_key_ethernet */
+	OVS_KEY_ATTR_VLAN,	/* be16 VLAN TCI */
+	OVS_KEY_ATTR_ETHERTYPE,	/* be16 Ethernet type */
+	OVS_KEY_ATTR_IPV4,      /* struct ovs_key_ipv4 */
+	OVS_KEY_ATTR_IPV6,      /* struct ovs_key_ipv6 */
+	OVS_KEY_ATTR_TCP,       /* struct ovs_key_tcp */
+	OVS_KEY_ATTR_UDP,       /* struct ovs_key_udp */
+	OVS_KEY_ATTR_ICMP,      /* struct ovs_key_icmp */
+	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
+	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
+	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
+	OVS_KEY_ATTR_SKB_MARK,  /* u32 skb mark */
+	__OVS_KEY_ATTR_MAX
+};
+
+#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
+
+/**
+ * enum ovs_frag_type - IPv4 and IPv6 fragment type
+ * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
+ * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
+ * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
+ *
+ * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
+ * ovs_key_ipv6.
+ */
+enum ovs_frag_type {
+	OVS_FRAG_TYPE_NONE,
+	OVS_FRAG_TYPE_FIRST,
+	OVS_FRAG_TYPE_LATER,
+	__OVS_FRAG_TYPE_MAX
+};
+
+#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
+
+struct ovs_key_ethernet {
+	__u8	 eth_src[ETH_ALEN];
+	__u8	 eth_dst[ETH_ALEN];
+};
+
+struct ovs_key_ipv4 {
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+	__u8   ipv4_proto;
+	__u8   ipv4_tos;
+	__u8   ipv4_ttl;
+	__u8   ipv4_frag;	/* One of OVS_FRAG_TYPE_*. */
+};
+
+struct ovs_key_ipv6 {
+	__be32 ipv6_src[4];
+	__be32 ipv6_dst[4];
+	__be32 ipv6_label;	/* 20-bits in least-significant bits. */
+	__u8   ipv6_proto;
+	__u8   ipv6_tclass;
+	__u8   ipv6_hlimit;
+	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
+};
+
+struct ovs_key_tcp {
+	__be16 tcp_src;
+	__be16 tcp_dst;
+};
+
+struct ovs_key_udp {
+	__be16 udp_src;
+	__be16 udp_dst;
+};
+
+struct ovs_key_icmp {
+	__u8 icmp_type;
+	__u8 icmp_code;
+};
+
+struct ovs_key_icmpv6 {
+	__u8 icmpv6_type;
+	__u8 icmpv6_code;
+};
+
+struct ovs_key_arp {
+	__be32 arp_sip;
+	__be32 arp_tip;
+	__be16 arp_op;
+	__u8   arp_sha[ETH_ALEN];
+	__u8   arp_tha[ETH_ALEN];
+};
+
+struct ovs_key_nd {
+	__u32 nd_target[4];
+	__u8  nd_sll[ETH_ALEN];
+	__u8  nd_tll[ETH_ALEN];
+};
+
+/**
+ * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
+ * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
+ * key.  Always present in notifications.  Required for all requests (except
+ * dumps).
+ * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
+ * the actions to take for packets that match the key.  Always present in
+ * notifications.  Required for %OVS_FLOW_CMD_NEW requests, optional for
+ * %OVS_FLOW_CMD_SET requests.
+ * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
+ * flow.  Present in notifications if the stats would be nonzero.  Ignored in
+ * requests.
+ * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
+ * TCP flags seen on packets in this flow.  Only present in notifications for
+ * TCP flows, and only if it would be nonzero.  Ignored in requests.
+ * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
+ * the system monotonic clock, at which a packet was last processed for this
+ * flow.  Only present in notifications if a packet has been processed for this
+ * flow.  Ignored in requests.
+ * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
+ * last-used time, accumulated TCP flags, and statistics for this flow.
+ * Otherwise ignored in requests.  Never present in notifications.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_FLOW_* commands.
+ */
+enum ovs_flow_attr {
+	OVS_FLOW_ATTR_UNSPEC,
+	OVS_FLOW_ATTR_KEY,       /* Sequence of OVS_KEY_ATTR_* attributes. */
+	OVS_FLOW_ATTR_ACTIONS,   /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_FLOW_ATTR_STATS,     /* struct ovs_flow_stats. */
+	OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
+	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
+	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
+	__OVS_FLOW_ATTR_MAX
+};
+
+#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
+
+/**
+ * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
+ * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
+ * @OVS_ACTION_ATTR_SAMPLE.  A value of 0 samples no packets, a value of
+ * %UINT32_MAX samples all packets and intermediate values sample intermediate
+ * fractions of packets.
+ * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
+ * Actions are passed as nested attributes.
+ *
+ * Executes the specified actions with the given probability on a per-packet
+ * basis.
+ */
+enum ovs_sample_attr {
+	OVS_SAMPLE_ATTR_UNSPEC,
+	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
+	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	__OVS_SAMPLE_ATTR_MAX,
+};
+
+#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
+
+/**
+ * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
+ * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
+ * message should be sent.  Required.
+ * @OVS_USERSPACE_ATTR_USERDATA: If present, its variable-length argument is
+ * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
+ */
+enum ovs_userspace_attr {
+	OVS_USERSPACE_ATTR_UNSPEC,
+	OVS_USERSPACE_ATTR_PID,	      /* u32 Netlink PID to receive upcalls. */
+	OVS_USERSPACE_ATTR_USERDATA,  /* Optional user-specified cookie. */
+	__OVS_USERSPACE_ATTR_MAX
+};
+
+#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
+
+/**
+ * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
+ * @vlan_tpid: Tag protocol identifier (TPID) to push.
+ * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
+ * (but it will not be set in the 802.1Q header that is pushed).
+ *
+ * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
+ * values are those that the kernel module also parses as 802.1Q headers, to
+ * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
+ * from having surprising results.
+ */
+struct ovs_action_push_vlan {
+	__be16 vlan_tpid;	/* 802.1Q TPID. */
+	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
+};
+
+/**
+ * enum ovs_action_attr - Action types.
+ *
+ * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
+ * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
+ * %OVS_USERSPACE_ATTR_* attributes.
+ * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
+ * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
+ * value.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
+ * packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
+ * the nested %OVS_SAMPLE_ATTR_* attributes.
+ *
+ * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
+ * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
+ * type may not be changed.
+ */
+
+enum ovs_action_attr {
+	OVS_ACTION_ATTR_UNSPEC,
+	OVS_ACTION_ATTR_OUTPUT,	      /* u32 port number. */
+	OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
+	OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
+	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
+	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
+	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
+	__OVS_ACTION_ATTR_MAX
+};
+
+#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+
+#endif /* _LINUX_OPENVSWITCH_H */
-- 
cgit 


From fc39f46b54b600f053bf9bab757023344e97925e Mon Sep 17 00:00:00 2001
From: Sylwester Nawrocki <s.nawrocki@samsung.com>
Date: Thu, 14 Mar 2013 07:01:24 -0300
Subject: [media] V4L: Add MATRIX option to V4L2_CID_EXPOSURE_METERING control

This patch adds a menu option to the V4L2_CID_EXPOSURE_METERING
control for multi-zone metering.

Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/controls.xml | 7 +++++++
 drivers/media/v4l2-core/v4l2-ctrls.c         | 1 +
 include/uapi/linux/v4l2-controls.h           | 1 +
 3 files changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
index c8eb6c222274..8d7a77928d49 100644
--- a/Documentation/DocBook/media/v4l/controls.xml
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -3142,6 +3142,13 @@ giving priority to the center of the metered area.</entry>
 		  <entry><constant>V4L2_EXPOSURE_METERING_SPOT</constant>&nbsp;</entry>
 		  <entry>Measure only very small area at the center of the frame.</entry>
 		</row>
+		<row>
+		  <entry><constant>V4L2_EXPOSURE_METERING_MATRIX</constant>&nbsp;</entry>
+		  <entry>A multi-zone metering. The light intensity is measured
+in several points of the frame and the the results are combined. The
+algorithm of the zones selection and their significance in calculating the
+final value is device dependant.</entry>
+		</row>
 	      </tbody>
 	    </entrytbl>
 	  </row>
diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c
index ec89fd16361c..ebb8e48619a2 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls.c
@@ -234,6 +234,7 @@ const char * const *v4l2_ctrl_get_menu(u32 id)
 		"Average",
 		"Center Weighted",
 		"Spot",
+		"Matrix",
 		NULL
 	};
 	static const char * const camera_auto_focus_range[] = {
diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h
index 7da22cec30cd..69bd5bb0d5af 100644
--- a/include/uapi/linux/v4l2-controls.h
+++ b/include/uapi/linux/v4l2-controls.h
@@ -658,6 +658,7 @@ enum v4l2_exposure_metering {
 	V4L2_EXPOSURE_METERING_AVERAGE		= 0,
 	V4L2_EXPOSURE_METERING_CENTER_WEIGHTED	= 1,
 	V4L2_EXPOSURE_METERING_SPOT		= 2,
+	V4L2_EXPOSURE_METERING_MATRIX		= 3,
 };
 
 #define V4L2_CID_SCENE_MODE			(V4L2_CID_CAMERA_CLASS_BASE+26)
-- 
cgit 


From c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 24 Jan 2013 16:10:28 +0100
Subject: perf/core: Add weighted samples

For some events it's useful to weight sample with a hardware
provided number. This expresses how expensive the action the
sample represent was.  This allows the profiler to scale
the samples to be more informative to the programmer.

There is already the period which is used similarly, but it
means something different, so I chose to not overload it.
Instead a new sample type for WEIGHT is added.

Can be used for multiple things. Initially it is used for TSX
abort costs and profiling by memory latencies (so to make
expensive load appear higher up in the histograms). The concept
is quite generic and can be extended to many other kinds of
events or architectures, as long as the hardware provides
suitable auxillary values. In principle it could be also used
for software tracepoints.

This adds the generic glue. A new optional sample format for a
64-bit weight value.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: namhyung.kim@lge.com
Link: http://lkml.kernel.org/r/1359040242-8269-5-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h      | 2 ++
 include/uapi/linux/perf_event.h | 6 +++++-
 kernel/events/core.c            | 6 ++++++
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cd3bb2cd9494..7ce0b37b155b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -573,6 +573,7 @@ struct perf_sample_data {
 	struct perf_branch_stack	*br_stack;
 	struct perf_regs_user		regs_user;
 	u64				stack_user_size;
+	u64				weight;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -586,6 +587,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
 	data->regs_user.regs = NULL;
 	data->stack_user_size = 0;
+	data->weight = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9fa9c622a7f4..cdc255da02e2 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -132,8 +132,10 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 	PERF_SAMPLE_REGS_USER			= 1U << 12,
 	PERF_SAMPLE_STACK_USER			= 1U << 13,
+	PERF_SAMPLE_WEIGHT			= 1U << 14,
+
+	PERF_SAMPLE_MAX = 1U << 15,		/* non-ABI */
 
-	PERF_SAMPLE_MAX = 1U << 14,		/* non-ABI */
 };
 
 /*
@@ -588,6 +590,8 @@ enum perf_event_type {
 	 * 	{ u64			size;
 	 * 	  char			data[size];
 	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
+	 *
+	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7b4a55d41efc..9e3edb272b3e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -976,6 +976,9 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_PERIOD)
 		size += sizeof(data->period);
 
+	if (sample_type & PERF_SAMPLE_WEIGHT)
+		size += sizeof(data->weight);
+
 	if (sample_type & PERF_SAMPLE_READ)
 		size += event->read_size;
 
@@ -4193,6 +4196,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 		perf_output_sample_ustack(handle,
 					  data->stack_user_size,
 					  data->regs_user.regs);
+
+	if (sample_type & PERF_SAMPLE_WEIGHT)
+		perf_output_put(handle, data->weight);
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
-- 
cgit 


From d6be9ad6c960f43800a6f118932bc8a5a4eadcd1 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 24 Jan 2013 16:10:31 +0100
Subject: perf: Add generic memory sampling interface

This patch adds PERF_SAMPLE_DATA_SRC.

PERF_SAMPLE_DATA_SRC collects the data source, i.e., where
did the data associated with the sampled instruction
come from. Information is stored in a perf_mem_data_src
structure. It contains opcode, mem level, tlb, snoop,
lock information, subject to availability in hardware.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: namhyung.kim@lge.com
Link: http://lkml.kernel.org/r/1359040242-8269-8-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h      |  2 ++
 include/uapi/linux/perf_event.h | 68 +++++++++++++++++++++++++++++++++++++++--
 kernel/events/core.c            |  6 ++++
 3 files changed, 74 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7ce0b37b155b..42a6daaf4e0a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -568,6 +568,7 @@ struct perf_sample_data {
 		u32	reserved;
 	}				cpu_entry;
 	u64				period;
+	union  perf_mem_data_src	data_src;
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
@@ -588,6 +589,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->regs_user.regs = NULL;
 	data->stack_user_size = 0;
 	data->weight = 0;
+	data->data_src.val = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index cdc255da02e2..5b5762006855 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -133,9 +133,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_REGS_USER			= 1U << 12,
 	PERF_SAMPLE_STACK_USER			= 1U << 13,
 	PERF_SAMPLE_WEIGHT			= 1U << 14,
+	PERF_SAMPLE_DATA_SRC			= 1U << 15,
 
-	PERF_SAMPLE_MAX = 1U << 15,		/* non-ABI */
-
+	PERF_SAMPLE_MAX = 1U << 16,		/* non-ABI */
 };
 
 /*
@@ -592,6 +592,7 @@ enum perf_event_type {
 	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
 	 *
 	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
+	 *	{ u64			data_src;     } && PERF_SAMPLE_DATA_SRC
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -617,4 +618,67 @@ enum perf_callchain_context {
 #define PERF_FLAG_FD_OUTPUT		(1U << 1)
 #define PERF_FLAG_PID_CGROUP		(1U << 2) /* pid=cgroup id, per-cpu mode only */
 
+union perf_mem_data_src {
+	__u64 val;
+	struct {
+		__u64   mem_op:5,	/* type of opcode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_snoop:5,	/* snoop mode */
+			mem_lock:2,	/* lock instr */
+			mem_dtlb:7,	/* tlb access */
+			mem_rsvd:31;
+	};
+};
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA		0x01 /* not available */
+#define PERF_MEM_OP_LOAD	0x02 /* load instruction */
+#define PERF_MEM_OP_STORE	0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH	0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC	0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT	0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA		0x01  /* not available */
+#define PERF_MEM_LVL_HIT	0x02  /* hit level */
+#define PERF_MEM_LVL_MISS	0x04  /* miss level  */
+#define PERF_MEM_LVL_L1		0x08  /* L1 */
+#define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2		0x20  /* L2 hit */
+#define PERF_MEM_LVL_L3		0x40  /* L3 hit */
+#define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1	0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2	0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO		0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT	5
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA	0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT	0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS	0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT	19
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA	0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT	24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA		0x01 /* not available */
+#define PERF_MEM_TLB_HIT	0x02 /* hit level */
+#define PERF_MEM_TLB_MISS	0x04 /* miss level */
+#define PERF_MEM_TLB_L1		0x08 /* L1 */
+#define PERF_MEM_TLB_L2		0x10 /* L2 */
+#define PERF_MEM_TLB_WK		0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS		0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT	26
+
+#define PERF_MEM_S(a, s) \
+	(((u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9e3edb272b3e..77c96d18c23a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -982,6 +982,9 @@ static void perf_event__header_size(struct perf_event *event)
 	if (sample_type & PERF_SAMPLE_READ)
 		size += event->read_size;
 
+	if (sample_type & PERF_SAMPLE_DATA_SRC)
+		size += sizeof(data->data_src.val);
+
 	event->header_size = size;
 }
 
@@ -4199,6 +4202,9 @@ void perf_output_sample(struct perf_output_handle *handle,
 
 	if (sample_type & PERF_SAMPLE_WEIGHT)
 		perf_output_put(handle, data->weight);
+
+	if (sample_type & PERF_SAMPLE_DATA_SRC)
+		perf_output_put(handle, data->data_src.val);
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
-- 
cgit 


From 2fe85427e3bf65d791700d065132772fc26e4d75 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 24 Jan 2013 16:10:39 +0100
Subject: perf: Add PERF_RECORD_MISC_MMAP_DATA to RECORD_MMAP

Type of mapping was lost and made it hard for a tool
to distinguish code vs. data mmaps. Perf has the ability
to distinguish the two.

Use a bit in the header->misc bitmask to keep track of
the mmap type. If PERF_RECORD_MISC_MMAP_DATA is set then
the mapping is not executable (!VM_EXEC). If not set, then
the mapping is executable.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Cc: acme@redhat.com
Cc: jolsa@redhat.com
Cc: namhyung.kim@lge.com
Link: http://lkml.kernel.org/r/1359040242-8269-16-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h | 1 +
 kernel/events/core.c            | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 5b5762006855..964a450a6e2c 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -445,6 +445,7 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+#define PERF_RECORD_MISC_MMAP_DATA		(1 << 13)
 /*
  * Indicates that the content of PERF_SAMPLE_IP points to
  * the actual instruction that triggered the event. See also
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 77c96d18c23a..98c0845fcd20 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4791,6 +4791,9 @@ got_name:
 	mmap_event->file_name = name;
 	mmap_event->file_size = size;
 
+	if (!(vma->vm_flags & VM_EXEC))
+		mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+
 	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
 
 	rcu_read_lock();
-- 
cgit 


From 8746ddcf12bb263ad240e095ef16531006caeb50 Mon Sep 17 00:00:00 2001
From: "holger@eitzenberger.org" <holger@eitzenberger.org>
Date: Sat, 23 Mar 2013 10:04:03 +0000
Subject: netfilter: xt_NFQUEUE: introduce CPU fanout

Current NFQUEUE target uses a hash, computed over source and
destination address (and other parameters), for steering the packet
to the actual NFQUEUE. This, however forgets about the fact that the
packet eventually is handled by a particular CPU on user request.

If E. g.

  1) IRQ affinity is used to handle packets on a particular CPU already
     (both single-queue or multi-queue case)

and/or

  2) RPS is used to steer packets to a specific softirq

the target easily chooses an NFQUEUE which is not handled by a process
pinned to the same CPU.

The idea is therefore to use the CPU index for determining the
NFQUEUE handling the packet.

E. g. when having a system with 4 CPUs, 4 MQ queues and 4 NFQUEUEs it
looks like this:

 +-----+  +-----+  +-----+  +-----+
 |NFQ#0|  |NFQ#1|  |NFQ#2|  |NFQ#3|
 +-----+  +-----+  +-----+  +-----+
    ^        ^        ^        ^
    |        |NFQUEUE |        |
    +        +        +        +
 +-----+  +-----+  +-----+  +-----+
 |rx-0 |  |rx-1 |  |rx-2 |  |rx-3 |
 +-----+  +-----+  +-----+  +-----+

The NFQUEUEs not necessarily have to start with number 0, setups with
less NFQUEUEs than packet-handling CPUs are not a problem as well.

This patch extends the NFQUEUE target to accept a new
NFQ_FLAG_CPU_FANOUT flag. If this is specified the target uses the
CPU index for determining the NFQUEUE being used. I have to introduce
rev3 for this. The 'flags' are folded into _v2 'bypass'.

By changing the way which queue is assigned, I'm able to improve the
performance if the processes reading on the NFQUEUs are pinned
correctly.

Signed-off-by: Holger Eitzenberger <holger@eitzenberger.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_NFQUEUE.h |  9 +++++++
 net/netfilter/xt_NFQUEUE.c                | 41 +++++++++++++++++++++++++++++--
 2 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/xt_NFQUEUE.h b/include/uapi/linux/netfilter/xt_NFQUEUE.h
index 9eafdbbb401c..8bb5fe657d34 100644
--- a/include/uapi/linux/netfilter/xt_NFQUEUE.h
+++ b/include/uapi/linux/netfilter/xt_NFQUEUE.h
@@ -26,4 +26,13 @@ struct xt_NFQ_info_v2 {
 	__u16 bypass;
 };
 
+struct xt_NFQ_info_v3 {
+	__u16 queuenum;
+	__u16 queues_total;
+	__u16 flags;
+#define NFQ_FLAG_BYPASS		0x01 /* for compatibility with v2 */
+#define NFQ_FLAG_CPU_FANOUT	0x02 /* use current CPU (no hashing) */
+#define NFQ_FLAG_MASK		0x03
+};
+
 #endif /* _XT_NFQ_TARGET_H */
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 817f9e9f2b16..a287ef262d41 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -108,7 +108,7 @@ nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 static int nfqueue_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_NFQ_info_v2 *info = par->targinfo;
+	const struct xt_NFQ_info_v3 *info = par->targinfo;
 	u32 maxid;
 
 	if (unlikely(!rnd_inited)) {
@@ -125,11 +125,39 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par)
 		       info->queues_total, maxid);
 		return -ERANGE;
 	}
-	if (par->target->revision == 2 && info->bypass > 1)
+	if (par->target->revision == 2 && info->flags > 1)
+		return -EINVAL;
+	if (par->target->revision == 3 && info->flags & ~NFQ_FLAG_MASK)
 		return -EINVAL;
+
 	return 0;
 }
 
+static unsigned int
+nfqueue_tg_v3(struct sk_buff *skb, const struct xt_action_param *par)
+{
+	const struct xt_NFQ_info_v3 *info = par->targinfo;
+	u32 queue = info->queuenum;
+
+	if (info->queues_total > 1) {
+		if (info->flags & NFQ_FLAG_CPU_FANOUT) {
+			int cpu = smp_processor_id();
+
+			queue = info->queuenum + cpu % info->queues_total;
+		} else {
+			if (par->family == NFPROTO_IPV4)
+				queue = (((u64) hash_v4(skb) * info->queues_total) >>
+						 32) + queue;
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
+			else if (par->family == NFPROTO_IPV6)
+				queue = (((u64) hash_v6(skb) * info->queues_total) >>
+						 32) + queue;
+#endif
+		}
+	}
+	return NF_QUEUE_NR(queue);
+}
+
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
@@ -156,6 +184,15 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 		.targetsize	= sizeof(struct xt_NFQ_info_v2),
 		.me		= THIS_MODULE,
 	},
+	{
+		.name		= "NFQUEUE",
+		.revision	= 3,
+		.family		= NFPROTO_UNSPEC,
+		.checkentry	= nfqueue_tg_check,
+		.target		= nfqueue_tg_v3,
+		.targetsize	= sizeof(struct xt_NFQ_info_v3),
+		.me		= THIS_MODULE,
+	},
 };
 
 static int __init nfqueue_tg_init(void)
-- 
cgit 


From 608c380c304ce017dfddbceff988ffd0a36636d9 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Tue, 2 Apr 2013 09:59:17 +1030
Subject: virtio: do not export "u16" and "u64" to userspace

virtio_balloon.h exports "u16" and "u64" to userspace. Use "__u16" and
"__u64" instead.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/uapi/linux/virtio_balloon.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 652dc8bea921..5e26f61b5df5 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -52,8 +52,8 @@ struct virtio_balloon_config
 #define VIRTIO_BALLOON_S_NR       6
 
 struct virtio_balloon_stat {
-	u16 tag;
-	u64 val;
+	__u16 tag;
+	__u64 val;
 } __attribute__((packed));
 
 #endif /* _LINUX_VIRTIO_BALLOON_H */
-- 
cgit 


From 152b0f5da798c56566737f4d0bd85f69688e7d7b Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Tue, 2 Apr 2013 08:12:11 +0200
Subject: netfilter: fix struct ip6t_frag field description

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter_ipv6/ip6t_frag.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h
index b47f61b9e082..dfd8bc2268cf 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h
@@ -4,9 +4,9 @@
 #include <linux/types.h>
 
 struct ip6t_frag {
-	__u32 ids[2];			/* Security Parameter Index */
+	__u32 ids[2];			/* Identification range */
 	__u32 hdrlen;			/* Header Length */
-	__u8  flags;			/*  */
+	__u8  flags;			/* Flags */
 	__u8  invflags;			/* Inverse flags */
 };
 
-- 
cgit 


From 7b88fc086a217be7d16ec68a7f66093d344e39d7 Mon Sep 17 00:00:00 2001
From: Phil Edworthy <phil.edworthy@renesas.com>
Date: Mon, 18 Mar 2013 08:47:59 -0300
Subject: [media] soc_camera: Add RGB666 & RGB888 formats

Based on work done by Katsuya Matsubara.

Signed-off-by: Phil Edworthy <phil.edworthy@renesas.com>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 206 ++++++++++++++++++++-
 Documentation/DocBook/media_api.tmpl               |   1 +
 drivers/media/platform/soc_camera/soc_mediabus.c   |  42 +++++
 include/media/soc_camera.h                         |   7 +-
 include/media/soc_mediabus.h                       |   3 +
 include/uapi/linux/v4l2-mediabus.h                 |   6 +-
 6 files changed, 253 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index cc51372ed5e0..adc61982df7b 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -93,19 +93,35 @@
 
       <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-rgb">
 	<title>RGB formats</title>
-	<tgroup cols="11">
+	<tgroup cols="27">
 	  <colspec colname="id" align="left" />
 	  <colspec colname="code" align="center"/>
 	  <colspec colname="bit" />
-	  <colspec colnum="4" colname="b07" align="center" />
-	  <colspec colnum="5" colname="b06" align="center" />
-	  <colspec colnum="6" colname="b05" align="center" />
-	  <colspec colnum="7" colname="b04" align="center" />
-	  <colspec colnum="8" colname="b03" align="center" />
-	  <colspec colnum="9" colname="b02" align="center" />
-	  <colspec colnum="10" colname="b01" align="center" />
-	  <colspec colnum="11" colname="b00" align="center" />
-	  <spanspec namest="b07" nameend="b00" spanname="b0" />
+	  <colspec colnum="4" colname="b23" align="center" />
+	  <colspec colnum="5" colname="b22" align="center" />
+	  <colspec colnum="6" colname="b21" align="center" />
+	  <colspec colnum="7" colname="b20" align="center" />
+	  <colspec colnum="8" colname="b19" align="center" />
+	  <colspec colnum="9" colname="b18" align="center" />
+	  <colspec colnum="10" colname="b17" align="center" />
+	  <colspec colnum="11" colname="b16" align="center" />
+	  <colspec colnum="12" colname="b15" align="center" />
+	  <colspec colnum="13" colname="b14" align="center" />
+	  <colspec colnum="14" colname="b13" align="center" />
+	  <colspec colnum="15" colname="b12" align="center" />
+	  <colspec colnum="16" colname="b11" align="center" />
+	  <colspec colnum="17" colname="b10" align="center" />
+	  <colspec colnum="18" colname="b09" align="center" />
+	  <colspec colnum="19" colname="b08" align="center" />
+	  <colspec colnum="20" colname="b07" align="center" />
+	  <colspec colnum="21" colname="b06" align="center" />
+	  <colspec colnum="22" colname="b05" align="center" />
+	  <colspec colnum="23" colname="b04" align="center" />
+	  <colspec colnum="24" colname="b03" align="center" />
+	  <colspec colnum="25" colname="b02" align="center" />
+	  <colspec colnum="26" colname="b01" align="center" />
+	  <colspec colnum="27" colname="b00" align="center" />
+	  <spanspec namest="b23" nameend="b00" spanname="b0" />
 	  <thead>
 	    <row>
 	      <entry>Identifier</entry>
@@ -117,6 +133,22 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry>Bit</entry>
+	      <entry>23</entry>
+	      <entry>22</entry>
+	      <entry>21</entry>
+	      <entry>20</entry>
+	      <entry>19</entry>
+	      <entry>18</entry>
+	      <entry>17</entry>
+	      <entry>16</entry>
+	      <entry>15</entry>
+	      <entry>14</entry>
+	      <entry>13</entry>
+	      <entry>12</entry>
+	      <entry>11</entry>
+	      <entry>10</entry>
+	      <entry>9</entry>
+	      <entry>8</entry>
 	      <entry>7</entry>
 	      <entry>6</entry>
 	      <entry>5</entry>
@@ -132,6 +164,7 @@
 	      <entry>V4L2_MBUS_FMT_RGB444_2X8_PADHI_BE</entry>
 	      <entry>0x1001</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>0</entry>
 	      <entry>0</entry>
 	      <entry>0</entry>
@@ -145,6 +178,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>3</subscript></entry>
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
@@ -158,6 +192,7 @@
 	      <entry>V4L2_MBUS_FMT_RGB444_2X8_PADHI_LE</entry>
 	      <entry>0x1002</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>3</subscript></entry>
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
@@ -171,6 +206,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>0</entry>
 	      <entry>0</entry>
 	      <entry>0</entry>
@@ -184,6 +220,7 @@
 	      <entry>V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE</entry>
 	      <entry>0x1003</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>0</entry>
 	      <entry>r<subscript>4</subscript></entry>
 	      <entry>r<subscript>3</subscript></entry>
@@ -197,6 +234,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
@@ -210,6 +248,7 @@
 	      <entry>V4L2_MBUS_FMT_RGB555_2X8_PADHI_LE</entry>
 	      <entry>0x1004</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
@@ -223,6 +262,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>0</entry>
 	      <entry>r<subscript>4</subscript></entry>
 	      <entry>r<subscript>3</subscript></entry>
@@ -236,6 +276,7 @@
 	      <entry>V4L2_MBUS_FMT_BGR565_2X8_BE</entry>
 	      <entry>0x1005</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>b<subscript>4</subscript></entry>
 	      <entry>b<subscript>3</subscript></entry>
 	      <entry>b<subscript>2</subscript></entry>
@@ -249,6 +290,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
@@ -262,6 +304,7 @@
 	      <entry>V4L2_MBUS_FMT_BGR565_2X8_LE</entry>
 	      <entry>0x1006</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
@@ -275,6 +318,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>b<subscript>4</subscript></entry>
 	      <entry>b<subscript>3</subscript></entry>
 	      <entry>b<subscript>2</subscript></entry>
@@ -288,6 +332,7 @@
 	      <entry>V4L2_MBUS_FMT_RGB565_2X8_BE</entry>
 	      <entry>0x1007</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>r<subscript>4</subscript></entry>
 	      <entry>r<subscript>3</subscript></entry>
 	      <entry>r<subscript>2</subscript></entry>
@@ -301,6 +346,7 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
@@ -314,6 +360,7 @@
 	      <entry>V4L2_MBUS_FMT_RGB565_2X8_LE</entry>
 	      <entry>0x1008</entry>
 	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>g<subscript>2</subscript></entry>
 	      <entry>g<subscript>1</subscript></entry>
 	      <entry>g<subscript>0</subscript></entry>
@@ -327,6 +374,27 @@
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
+	      &dash-ent-16;
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	    </row>
+	    <row id="V4L2-MBUS-FMT-RGB666-1X18">
+	      <entry>V4L2_MBUS_FMT_RGB666_1X18</entry>
+	      <entry>0x1009</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>r<subscript>5</subscript></entry>
 	      <entry>r<subscript>4</subscript></entry>
 	      <entry>r<subscript>3</subscript></entry>
 	      <entry>r<subscript>2</subscript></entry>
@@ -335,6 +403,124 @@
 	      <entry>g<subscript>5</subscript></entry>
 	      <entry>g<subscript>4</subscript></entry>
 	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
+	    <row id="V4L2-MBUS-FMT-RGB888-1X24">
+	      <entry>V4L2_MBUS_FMT_RGB888_1X24</entry>
+	      <entry>0x100a</entry>
+	      <entry></entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
+	    <row id="V4L2-MBUS-FMT-RGB888-2X12-BE">
+	      <entry>V4L2_MBUS_FMT_RGB888_2X12_BE</entry>
+	      <entry>0x100b</entry>
+	      <entry></entry>
+	      &dash-ent-10;
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-10;
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
+	    <row id="V4L2-MBUS-FMT-RGB888-2X12-LE">
+	      <entry>V4L2_MBUS_FMT_RGB888_2X12_LE</entry>
+	      <entry>0x100c</entry>
+	      <entry></entry>
+	      &dash-ent-10;
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-10;
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
 	    </row>
 	  </tbody>
 	</tgroup>
diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl
index 1f6593deb995..6a8b7158697f 100644
--- a/Documentation/DocBook/media_api.tmpl
+++ b/Documentation/DocBook/media_api.tmpl
@@ -23,6 +23,7 @@
 <!-- LinuxTV v4l-dvb repository. -->
 <!ENTITY v4l-dvb		"<ulink url='http://linuxtv.org/repo/'>http://linuxtv.org/repo/</ulink>">
 <!ENTITY dash-ent-10            "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-16            "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
 ]>
 
 <book id="media_api">
diff --git a/drivers/media/platform/soc_camera/soc_mediabus.c b/drivers/media/platform/soc_camera/soc_mediabus.c
index 89dce097a827..7569e7746c92 100644
--- a/drivers/media/platform/soc_camera/soc_mediabus.c
+++ b/drivers/media/platform/soc_camera/soc_mediabus.c
@@ -96,6 +96,42 @@ static const struct soc_mbus_lookup mbus_fmt[] = {
 		.order			= SOC_MBUS_ORDER_LE,
 		.layout			= SOC_MBUS_LAYOUT_PACKED,
 	},
+}, {
+	.code = V4L2_MBUS_FMT_RGB666_1X18,
+	.fmt = {
+		.fourcc			= V4L2_PIX_FMT_RGB32,
+		.name			= "RGB666/32bpp",
+		.bits_per_sample	= 18,
+		.packing		= SOC_MBUS_PACKING_EXTEND32,
+		.order			= SOC_MBUS_ORDER_LE,
+	},
+}, {
+	.code = V4L2_MBUS_FMT_RGB888_1X24,
+	.fmt = {
+		.fourcc			= V4L2_PIX_FMT_RGB32,
+		.name			= "RGB888/32bpp",
+		.bits_per_sample	= 24,
+		.packing		= SOC_MBUS_PACKING_EXTEND32,
+		.order			= SOC_MBUS_ORDER_LE,
+	},
+}, {
+	.code = V4L2_MBUS_FMT_RGB888_2X12_BE,
+	.fmt = {
+		.fourcc			= V4L2_PIX_FMT_RGB32,
+		.name			= "RGB888/32bpp",
+		.bits_per_sample	= 12,
+		.packing		= SOC_MBUS_PACKING_EXTEND32,
+		.order			= SOC_MBUS_ORDER_BE,
+	},
+}, {
+	.code = V4L2_MBUS_FMT_RGB888_2X12_LE,
+	.fmt = {
+		.fourcc			= V4L2_PIX_FMT_RGB32,
+		.name			= "RGB888/32bpp",
+		.bits_per_sample	= 12,
+		.packing		= SOC_MBUS_PACKING_EXTEND32,
+		.order			= SOC_MBUS_ORDER_LE,
+	},
 }, {
 	.code = V4L2_MBUS_FMT_SBGGR8_1X8,
 	.fmt = {
@@ -358,6 +394,10 @@ int soc_mbus_samples_per_pixel(const struct soc_mbus_pixelfmt *mf,
 		*numerator = 1;
 		*denominator = 1;
 		return 0;
+	case SOC_MBUS_PACKING_EXTEND32:
+		*numerator = 1;
+		*denominator = 1;
+		return 0;
 	case SOC_MBUS_PACKING_2X8_PADHI:
 	case SOC_MBUS_PACKING_2X8_PADLO:
 		*numerator = 2;
@@ -392,6 +432,8 @@ s32 soc_mbus_bytes_per_line(u32 width, const struct soc_mbus_pixelfmt *mf)
 		return width * 3 / 2;
 	case SOC_MBUS_PACKING_VARIABLE:
 		return 0;
+	case SOC_MBUS_PACKING_EXTEND32:
+		return width * 4;
 	}
 	return -EINVAL;
 }
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 2cc70cf318bf..ff77d08c30fd 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -292,12 +292,17 @@ struct soc_camera_sense {
 #define SOCAM_DATAWIDTH_8	SOCAM_DATAWIDTH(8)
 #define SOCAM_DATAWIDTH_9	SOCAM_DATAWIDTH(9)
 #define SOCAM_DATAWIDTH_10	SOCAM_DATAWIDTH(10)
+#define SOCAM_DATAWIDTH_12	SOCAM_DATAWIDTH(12)
 #define SOCAM_DATAWIDTH_15	SOCAM_DATAWIDTH(15)
 #define SOCAM_DATAWIDTH_16	SOCAM_DATAWIDTH(16)
+#define SOCAM_DATAWIDTH_18	SOCAM_DATAWIDTH(18)
+#define SOCAM_DATAWIDTH_24	SOCAM_DATAWIDTH(24)
 
 #define SOCAM_DATAWIDTH_MASK (SOCAM_DATAWIDTH_4 | SOCAM_DATAWIDTH_8 | \
 			      SOCAM_DATAWIDTH_9 | SOCAM_DATAWIDTH_10 | \
-			      SOCAM_DATAWIDTH_15 | SOCAM_DATAWIDTH_16)
+			      SOCAM_DATAWIDTH_12 | SOCAM_DATAWIDTH_15 | \
+			      SOCAM_DATAWIDTH_16 | SOCAM_DATAWIDTH_18 | \
+			      SOCAM_DATAWIDTH_24)
 
 static inline void soc_camera_limit_side(int *start, int *length,
 		unsigned int start_min,
diff --git a/include/media/soc_mediabus.h b/include/media/soc_mediabus.h
index 0dc6f4625b92..d33f6d059692 100644
--- a/include/media/soc_mediabus.h
+++ b/include/media/soc_mediabus.h
@@ -26,6 +26,8 @@
  * @SOC_MBUS_PACKING_VARIABLE:	compressed formats with variable packing
  * @SOC_MBUS_PACKING_1_5X8:	used for packed YUV 4:2:0 formats, where 4
  *				pixels occupy 6 bytes in RAM
+ * @SOC_MBUS_PACKING_EXTEND32:	sample width (e.g., 24 bits) has to be extended
+ *				to 32 bits
  */
 enum soc_mbus_packing {
 	SOC_MBUS_PACKING_NONE,
@@ -34,6 +36,7 @@ enum soc_mbus_packing {
 	SOC_MBUS_PACKING_EXTEND16,
 	SOC_MBUS_PACKING_VARIABLE,
 	SOC_MBUS_PACKING_1_5X8,
+	SOC_MBUS_PACKING_EXTEND32,
 };
 
 /**
diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h
index b9b7bea04537..6ee63d09b32d 100644
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h
@@ -37,7 +37,7 @@
 enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_FIXED = 0x0001,
 
-	/* RGB - next is 0x1009 */
+	/* RGB - next is 0x100d */
 	V4L2_MBUS_FMT_RGB444_2X8_PADHI_BE = 0x1001,
 	V4L2_MBUS_FMT_RGB444_2X8_PADHI_LE = 0x1002,
 	V4L2_MBUS_FMT_RGB555_2X8_PADHI_BE = 0x1003,
@@ -46,6 +46,10 @@ enum v4l2_mbus_pixelcode {
 	V4L2_MBUS_FMT_BGR565_2X8_LE = 0x1006,
 	V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007,
 	V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008,
+	V4L2_MBUS_FMT_RGB666_1X18 = 0x1009,
+	V4L2_MBUS_FMT_RGB888_1X24 = 0x100a,
+	V4L2_MBUS_FMT_RGB888_2X12_BE = 0x100b,
+	V4L2_MBUS_FMT_RGB888_2X12_LE = 0x100c,
 
 	/* YUV (including grey) - next is 0x2017 */
 	V4L2_MBUS_FMT_Y8_1X8 = 0x2001,
-- 
cgit 


From fde04ab95d43e55959f12b92711b0ca4fed40637 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Thu, 4 Apr 2013 13:25:30 -0300
Subject: [media] demux.h: Remove duplicated enum

"enum dmx_ts_pes" and "typedef enum dmx_pes_type_t" are just the
same enum declared twice, since Kernel (2.6.12). There's no reason
to duplicate it there, and sparse complains about that:
	drivers/media/dvb-core/dmxdev.c:600:55: warning: mixing different enum types
So, remove the internal define, keeping just the external one.
Internally, use only "enum dmx_ts_pes", as it is too late to drop
dmx_pes_type_t from the userspace API.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb-core/demux.h                    | 39 -----------------------
 drivers/media/dvb-core/dmxdev.c                   |  2 +-
 drivers/media/dvb-core/dvb_demux.c                |  6 ++--
 drivers/media/dvb-core/dvb_demux.h                |  4 +--
 drivers/media/dvb-core/dvb_net.c                  |  2 +-
 drivers/media/firewire/firedtv-dvb.c              | 14 ++++----
 drivers/media/pci/ttpci/av7110.c                  |  6 ++--
 drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c | 10 +++---
 drivers/media/usb/ttusb-dec/ttusb_dec.c           | 20 ++++++------
 include/uapi/linux/dvb/dmx.h                      |  2 +-
 10 files changed, 33 insertions(+), 72 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/dvb-core/demux.h b/drivers/media/dvb-core/demux.h
index eb91fd808c16..833191bcd810 100644
--- a/drivers/media/dvb-core/demux.h
+++ b/drivers/media/dvb-core/demux.h
@@ -83,45 +83,6 @@ enum dmx_success {
 #define TS_DEMUX        8   /* in case TS_PACKET is set, send the TS to
 			       the demux device, not to the dvr device */
 
-/* PES type for filters which write to built-in decoder */
-/* these should be kept identical to the types in dmx.h */
-
-enum dmx_ts_pes
-{  /* also send packets to decoder (if it exists) */
-	DMX_TS_PES_AUDIO0,
-	DMX_TS_PES_VIDEO0,
-	DMX_TS_PES_TELETEXT0,
-	DMX_TS_PES_SUBTITLE0,
-	DMX_TS_PES_PCR0,
-
-	DMX_TS_PES_AUDIO1,
-	DMX_TS_PES_VIDEO1,
-	DMX_TS_PES_TELETEXT1,
-	DMX_TS_PES_SUBTITLE1,
-	DMX_TS_PES_PCR1,
-
-	DMX_TS_PES_AUDIO2,
-	DMX_TS_PES_VIDEO2,
-	DMX_TS_PES_TELETEXT2,
-	DMX_TS_PES_SUBTITLE2,
-	DMX_TS_PES_PCR2,
-
-	DMX_TS_PES_AUDIO3,
-	DMX_TS_PES_VIDEO3,
-	DMX_TS_PES_TELETEXT3,
-	DMX_TS_PES_SUBTITLE3,
-	DMX_TS_PES_PCR3,
-
-	DMX_TS_PES_OTHER
-};
-
-#define DMX_TS_PES_AUDIO    DMX_TS_PES_AUDIO0
-#define DMX_TS_PES_VIDEO    DMX_TS_PES_VIDEO0
-#define DMX_TS_PES_TELETEXT DMX_TS_PES_TELETEXT0
-#define DMX_TS_PES_SUBTITLE DMX_TS_PES_SUBTITLE0
-#define DMX_TS_PES_PCR      DMX_TS_PES_PCR0
-
-
 struct dmx_ts_feed {
 	int is_filtering; /* Set to non-zero when filtering in progress */
 	struct dmx_demux *parent; /* Back-pointer */
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c
index 8896993e631f..a1a3a5159d71 100644
--- a/drivers/media/dvb-core/dmxdev.c
+++ b/drivers/media/dvb-core/dmxdev.c
@@ -569,7 +569,7 @@ static int dvb_dmxdev_start_feed(struct dmxdev *dmxdev,
 	dmx_output_t otype;
 	int ret;
 	int ts_type;
-	dmx_pes_type_t ts_pes;
+	enum dmx_ts_pes ts_pes;
 	struct dmx_ts_feed *tsfeed;
 
 	feed->ts = NULL;
diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c
index 71641b2dde6b..3485655fa082 100644
--- a/drivers/media/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb-core/dvb_demux.c
@@ -674,7 +674,7 @@ static int dmx_ts_feed_set(struct dmx_ts_feed *ts_feed, u16 pid, int ts_type,
 		return -ERESTARTSYS;
 
 	if (ts_type & TS_DECODER) {
-		if (pes_type >= DMX_TS_PES_OTHER) {
+		if (pes_type >= DMX_PES_OTHER) {
 			mutex_unlock(&demux->mutex);
 			return -EINVAL;
 		}
@@ -846,7 +846,7 @@ static int dvbdmx_release_ts_feed(struct dmx_demux *dmx,
 
 	feed->pid = 0xffff;
 
-	if (feed->ts_type & TS_DECODER && feed->pes_type < DMX_TS_PES_OTHER)
+	if (feed->ts_type & TS_DECODER && feed->pes_type < DMX_PES_OTHER)
 		demux->pesfilter[feed->pes_type] = NULL;
 
 	mutex_unlock(&demux->mutex);
@@ -1268,7 +1268,7 @@ int dvb_dmx_init(struct dvb_demux *dvbdemux)
 
 	INIT_LIST_HEAD(&dvbdemux->frontend_list);
 
-	for (i = 0; i < DMX_TS_PES_OTHER; i++) {
+	for (i = 0; i < DMX_PES_OTHER; i++) {
 		dvbdemux->pesfilter[i] = NULL;
 		dvbdemux->pids[i] = 0xffff;
 	}
diff --git a/drivers/media/dvb-core/dvb_demux.h b/drivers/media/dvb-core/dvb_demux.h
index fa7188a253aa..ae7fc33c3231 100644
--- a/drivers/media/dvb-core/dvb_demux.h
+++ b/drivers/media/dvb-core/dvb_demux.h
@@ -119,8 +119,8 @@ struct dvb_demux {
 
 	struct list_head frontend_list;
 
-	struct dvb_demux_feed *pesfilter[DMX_TS_PES_OTHER];
-	u16 pids[DMX_TS_PES_OTHER];
+	struct dvb_demux_feed *pesfilter[DMX_PES_OTHER];
+	u16 pids[DMX_PES_OTHER];
 	int playing;
 	int recording;
 
diff --git a/drivers/media/dvb-core/dvb_net.c b/drivers/media/dvb-core/dvb_net.c
index 44225b186f6d..e17cb85d3ecf 100644
--- a/drivers/media/dvb-core/dvb_net.c
+++ b/drivers/media/dvb-core/dvb_net.c
@@ -1044,7 +1044,7 @@ static int dvb_net_feed_start(struct net_device *dev)
 		ret = priv->tsfeed->set(priv->tsfeed,
 					priv->pid, /* pid */
 					TS_PACKET, /* type */
-					DMX_TS_PES_OTHER, /* pes type */
+					DMX_PES_OTHER, /* pes type */
 					32768,     /* circular buffer size */
 					timeout    /* timeout */
 					);
diff --git a/drivers/media/firewire/firedtv-dvb.c b/drivers/media/firewire/firedtv-dvb.c
index eb7496eab130..f710e17953e3 100644
--- a/drivers/media/firewire/firedtv-dvb.c
+++ b/drivers/media/firewire/firedtv-dvb.c
@@ -71,11 +71,11 @@ int fdtv_start_feed(struct dvb_demux_feed *dvbdmxfeed)
 
 	if (dvbdmxfeed->type == DMX_TYPE_TS) {
 		switch (dvbdmxfeed->pes_type) {
-		case DMX_TS_PES_VIDEO:
-		case DMX_TS_PES_AUDIO:
-		case DMX_TS_PES_TELETEXT:
-		case DMX_TS_PES_PCR:
-		case DMX_TS_PES_OTHER:
+		case DMX_PES_VIDEO:
+		case DMX_PES_AUDIO:
+		case DMX_PES_TELETEXT:
+		case DMX_PES_PCR:
+		case DMX_PES_OTHER:
 			c = alloc_channel(fdtv);
 			break;
 		default:
@@ -132,7 +132,7 @@ int fdtv_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
 	      (demux->dmx.frontend->source != DMX_MEMORY_FE))) {
 
 		if (dvbdmxfeed->ts_type & TS_DECODER) {
-			if (dvbdmxfeed->pes_type >= DMX_TS_PES_OTHER ||
+			if (dvbdmxfeed->pes_type >= DMX_PES_OTHER ||
 			    !demux->pesfilter[dvbdmxfeed->pes_type])
 				return -EINVAL;
 
@@ -141,7 +141,7 @@ int fdtv_stop_feed(struct dvb_demux_feed *dvbdmxfeed)
 		}
 
 		if (!(dvbdmxfeed->ts_type & TS_DECODER &&
-		      dvbdmxfeed->pes_type < DMX_TS_PES_OTHER))
+		      dvbdmxfeed->pes_type < DMX_PES_OTHER))
 			return 0;
 	}
 
diff --git a/drivers/media/pci/ttpci/av7110.c b/drivers/media/pci/ttpci/av7110.c
index 3dc7aa9b6f40..f38329d29daa 100644
--- a/drivers/media/pci/ttpci/av7110.c
+++ b/drivers/media/pci/ttpci/av7110.c
@@ -990,7 +990,7 @@ static int av7110_start_feed(struct dvb_demux_feed *feed)
 
 	if (feed->type == DMX_TYPE_TS) {
 		if ((feed->ts_type & TS_DECODER) &&
-		    (feed->pes_type <= DMX_TS_PES_PCR)) {
+		    (feed->pes_type <= DMX_PES_PCR)) {
 			switch (demux->dmx.frontend->source) {
 			case DMX_MEMORY_FE:
 				if (feed->ts_type & TS_DECODER)
@@ -1051,14 +1051,14 @@ static int av7110_stop_feed(struct dvb_demux_feed *feed)
 
 	if (feed->type == DMX_TYPE_TS) {
 		if (feed->ts_type & TS_DECODER) {
-			if (feed->pes_type >= DMX_TS_PES_OTHER ||
+			if (feed->pes_type >= DMX_PES_OTHER ||
 			    !demux->pesfilter[feed->pes_type])
 				return -EINVAL;
 			demux->pids[feed->pes_type] |= 0x8000;
 			demux->pesfilter[feed->pes_type] = NULL;
 		}
 		if (feed->ts_type & TS_DECODER &&
-		    feed->pes_type < DMX_TS_PES_OTHER) {
+		    feed->pes_type < DMX_PES_OTHER) {
 			ret = dvb_feed_stop_pid(feed);
 		} else
 			if ((feed->ts_type & TS_PACKET) &&
diff --git a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
index e40718552850..21b9049c7b3f 100644
--- a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
+++ b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
@@ -930,11 +930,11 @@ static int ttusb_start_feed(struct dvb_demux_feed *dvbdmxfeed)
 
 	if (dvbdmxfeed->type == DMX_TYPE_TS) {
 		switch (dvbdmxfeed->pes_type) {
-		case DMX_TS_PES_VIDEO:
-		case DMX_TS_PES_AUDIO:
-		case DMX_TS_PES_TELETEXT:
-		case DMX_TS_PES_PCR:
-		case DMX_TS_PES_OTHER:
+		case DMX_PES_VIDEO:
+		case DMX_PES_AUDIO:
+		case DMX_PES_TELETEXT:
+		case DMX_PES_PCR:
+		case DMX_PES_OTHER:
 			break;
 		default:
 			return -EINVAL;
diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
index 504c81230339..e52c3b97f304 100644
--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
@@ -951,34 +951,34 @@ static int ttusb_dec_start_ts_feed(struct dvb_demux_feed *dvbdmxfeed)
 
 	switch (dvbdmxfeed->pes_type) {
 
-	case DMX_TS_PES_VIDEO:
-		dprintk("  pes_type: DMX_TS_PES_VIDEO\n");
+	case DMX_PES_VIDEO:
+		dprintk("  pes_type: DMX_PES_VIDEO\n");
 		dec->pid[DMX_PES_PCR] = dvbdmxfeed->pid;
 		dec->pid[DMX_PES_VIDEO] = dvbdmxfeed->pid;
 		dec->video_filter = dvbdmxfeed->filter;
 		ttusb_dec_set_pids(dec);
 		break;
 
-	case DMX_TS_PES_AUDIO:
-		dprintk("  pes_type: DMX_TS_PES_AUDIO\n");
+	case DMX_PES_AUDIO:
+		dprintk("  pes_type: DMX_PES_AUDIO\n");
 		dec->pid[DMX_PES_AUDIO] = dvbdmxfeed->pid;
 		dec->audio_filter = dvbdmxfeed->filter;
 		ttusb_dec_set_pids(dec);
 		break;
 
-	case DMX_TS_PES_TELETEXT:
+	case DMX_PES_TELETEXT:
 		dec->pid[DMX_PES_TELETEXT] = dvbdmxfeed->pid;
-		dprintk("  pes_type: DMX_TS_PES_TELETEXT(not supported)\n");
+		dprintk("  pes_type: DMX_PES_TELETEXT(not supported)\n");
 		return -ENOSYS;
 
-	case DMX_TS_PES_PCR:
-		dprintk("  pes_type: DMX_TS_PES_PCR\n");
+	case DMX_PES_PCR:
+		dprintk("  pes_type: DMX_PES_PCR\n");
 		dec->pid[DMX_PES_PCR] = dvbdmxfeed->pid;
 		ttusb_dec_set_pids(dec);
 		break;
 
-	case DMX_TS_PES_OTHER:
-		dprintk("  pes_type: DMX_TS_PES_OTHER(not supported)\n");
+	case DMX_PES_OTHER:
+		dprintk("  pes_type: DMX_PES_OTHER(not supported)\n");
 		return -ENOSYS;
 
 	default:
diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h
index b2a9ad8cafdc..b4fb650d9d4f 100644
--- a/include/uapi/linux/dvb/dmx.h
+++ b/include/uapi/linux/dvb/dmx.h
@@ -51,7 +51,7 @@ typedef enum
 } dmx_input_t;
 
 
-typedef enum
+typedef enum dmx_ts_pes
 {
 	DMX_PES_AUDIO0,
 	DMX_PES_VIDEO0,
-- 
cgit 


From cc2f5a8adbc7ab1fdb7d9bcf4ea9838c73e82dfe Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Fri, 5 Apr 2013 16:49:41 +0200
Subject: perf: Fix comments in PERF_MEM_LVL bitmask

This small patch fixes a mistake in the comments
for the PERF_MEM_LVL_* events. The L2, L3 bits simply
represent cache levels, not hits or misses. That is
encoded in PERF_MEM_LVL_MISS/PERF_MEM_LVL_HIT.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/20130405144941.GA30503@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 964a450a6e2c..fb104e51496e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -645,8 +645,8 @@ union perf_mem_data_src {
 #define PERF_MEM_LVL_MISS	0x04  /* miss level  */
 #define PERF_MEM_LVL_L1		0x08  /* L1 */
 #define PERF_MEM_LVL_LFB	0x10  /* Line Fill Buffer */
-#define PERF_MEM_LVL_L2		0x20  /* L2 hit */
-#define PERF_MEM_LVL_L3		0x40  /* L3 hit */
+#define PERF_MEM_LVL_L2		0x20  /* L2 */
+#define PERF_MEM_LVL_L3		0x40  /* L3 */
 #define PERF_MEM_LVL_LOC_RAM	0x80  /* Local DRAM */
 #define PERF_MEM_LVL_REM_RAM1	0x100 /* Remote DRAM (1 hop) */
 #define PERF_MEM_LVL_REM_RAM2	0x200 /* Remote DRAM (2 hops) */
-- 
cgit 


From 197c58f624bfb98d3f7f381e57b5d09b2360266f Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Mon, 8 Apr 2013 12:20:49 +0800
Subject: USB: fix an incorrect table index in comment

Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/usb/ch9.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h
index f738e25377ff..aa33fd1b2d4f 100644
--- a/include/uapi/linux/usb/ch9.h
+++ b/include/uapi/linux/usb/ch9.h
@@ -138,7 +138,7 @@
 
 /*
  * New Feature Selectors as added by USB 3.0
- * See USB 3.0 spec Table 9-6
+ * See USB 3.0 spec Table 9-7
  */
 #define USB_DEVICE_U1_ENABLE	48	/* dev may initiate U1 transition */
 #define USB_DEVICE_U2_ENABLE	49	/* dev may initiate U2 transition */
@@ -147,7 +147,7 @@
 
 #define USB_INTR_FUNC_SUSPEND_OPT_MASK	0xFF00
 /*
- * Suspend Options, Table 9-7 USB 3.0 spec
+ * Suspend Options, Table 9-8 USB 3.0 spec
  */
 #define USB_INTRF_FUNC_SUSPEND_LP	(1 << (8 + 0))
 #define USB_INTRF_FUNC_SUSPEND_RW	(1 << (8 + 1))
-- 
cgit 


From f53adae4eae5ad9f7343ff4a0fc68b468c981138 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Apr 2013 04:01:30 +0000
Subject: net: ipv6: add tokenized interface identifier support

This patch adds support for IPv6 tokenized IIDs, that allow
for administrators to assign well-known host-part addresses
to nodes whilst still obtaining global network prefix from
Router Advertisements. It is currently in draft status.

  The primary target for such support is server platforms
  where addresses are usually manually configured, rather
  than using DHCPv6 or SLAAC. By using tokenised identifiers,
  hosts can still determine their network prefix by use of
  SLAAC, but more readily be automatically renumbered should
  their network prefix change. [...]

  The disadvantage with static addresses is that they are
  likely to require manual editing should the network prefix
  in use change.  If instead there were a method to only
  manually configure the static identifier part of the IPv6
  address, then the address could be automatically updated
  when a new prefix was introduced, as described in [RFC4192]
  for example.  In such cases a DNS server might be
  configured with such a tokenised interface identifier of
  ::53, and SLAAC would use the token in constructing the
  interface address, using the advertised prefix. [...]

  http://tools.ietf.org/html/draft-chown-6man-tokenised-ipv6-identifiers-02

The implementation is partially based on top of Mark K.
Thompson's proof of concept. However, it uses the Netlink
interface for configuration resp. data retrival, so that
it can be easily extended in future. Successfully tested
by myself.

Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h       |  2 +
 include/uapi/linux/if_link.h |  1 +
 net/ipv6/addrconf.c          | 87 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 87 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 93563221d29a..f1063d62cd13 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -187,6 +187,8 @@ struct inet6_dev {
 	struct list_head	tempaddr_list;
 #endif
 
+	struct in6_addr		token;
+
 	struct neigh_parms	*nd_parms;
 	struct inet6_dev	*next;
 	struct ipv6_devconf	cnf;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c4edfe11f1f7..6b35c4211f65 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -201,6 +201,7 @@ enum {
 	IFLA_INET6_MCAST,	/* MC things. What of them?	*/
 	IFLA_INET6_CACHEINFO,	/* time values and max reasm size */
 	IFLA_INET6_ICMP6STATS,	/* statistics (icmpv6)		*/
+	IFLA_INET6_TOKEN,	/* device token			*/
 	__IFLA_INET6_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a33b157d9ccf..65d8139d60c5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 		ipv6_regen_rndid((unsigned long) ndev);
 	}
 #endif
+	memset(ndev->token.s6_addr, 0, sizeof(ndev->token.s6_addr));
 
 	if (netif_running(dev) && addrconf_qdisc_ok(dev))
 		ndev->if_flags |= IF_READY;
@@ -2136,8 +2137,14 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 
 		if (pinfo->prefix_len == 64) {
 			memcpy(&addr, &pinfo->prefix, 8);
-			if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
-			    ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+
+			if (!ipv6_addr_any(&in6_dev->token)) {
+				read_lock_bh(&in6_dev->lock);
+				memcpy(addr.s6_addr + 8,
+				       in6_dev->token.s6_addr + 8, 8);
+				read_unlock_bh(&in6_dev->lock);
+			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
@@ -4165,7 +4172,8 @@ static inline size_t inet6_ifla6_size(void)
 	     + nla_total_size(sizeof(struct ifla_cacheinfo))
 	     + nla_total_size(DEVCONF_MAX * 4) /* IFLA_INET6_CONF */
 	     + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
-	     + nla_total_size(ICMP6_MIB_MAX * 8); /* IFLA_INET6_ICMP6STATS */
+	     + nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+	     + nla_total_size(sizeof(struct in6_addr)); /* IFLA_INET6_TOKEN */
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -4252,6 +4260,13 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
 		goto nla_put_failure;
 	snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
 
+	nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
+	if (nla == NULL)
+		goto nla_put_failure;
+	read_lock_bh(&idev->lock);
+	memcpy(nla_data(nla), idev->token.s6_addr, nla_len(nla));
+	read_unlock_bh(&idev->lock);
+
 	return 0;
 
 nla_put_failure:
@@ -4279,6 +4294,71 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
 	return 0;
 }
 
+static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+{
+	struct in6_addr ll_addr;
+	struct inet6_ifaddr *ifp;
+	struct net_device *dev = idev->dev;
+
+	if (token == NULL)
+		return -EINVAL;
+	if (ipv6_addr_any(token))
+		return -EINVAL;
+	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+		return -EINVAL;
+	if (idev->dead || !(idev->if_flags & IF_READY))
+		return -EINVAL;
+	if (!ipv6_accept_ra(idev))
+		return -EINVAL;
+	if (idev->cnf.rtr_solicits <= 0)
+		return -EINVAL;
+
+	write_lock_bh(&idev->lock);
+
+	BUILD_BUG_ON(sizeof(token->s6_addr) != 16);
+	memcpy(idev->token.s6_addr + 8, token->s6_addr + 8, 8);
+
+	write_unlock_bh(&idev->lock);
+
+	ipv6_get_lladdr(dev, &ll_addr, IFA_F_TENTATIVE | IFA_F_OPTIMISTIC);
+	ndisc_send_rs(dev, &ll_addr, &in6addr_linklocal_allrouters);
+
+	write_lock_bh(&idev->lock);
+	idev->if_flags |= IF_RS_SENT;
+
+	/* Well, that's kinda nasty ... */
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
+		spin_lock(&ifp->lock);
+		if (ipv6_addr_src_scope(&ifp->addr) ==
+		    IPV6_ADDR_SCOPE_GLOBAL) {
+			ifp->valid_lft = 0;
+			ifp->prefered_lft = 0;
+		}
+		spin_unlock(&ifp->lock);
+	}
+
+	write_unlock_bh(&idev->lock);
+	return 0;
+}
+
+static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+{
+	int err = -EINVAL;
+	struct inet6_dev *idev = __in6_dev_get(dev);
+	struct nlattr *tb[IFLA_INET6_MAX + 1];
+
+	if (!idev)
+		return -EAFNOSUPPORT;
+
+	if (nla_parse_nested(tb, IFLA_INET6_MAX, nla, NULL) < 0)
+		BUG();
+
+	if (tb[IFLA_INET6_TOKEN])
+		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+
+	return err;
+}
+
 static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 			     u32 portid, u32 seq, int event, unsigned int flags)
 {
@@ -4981,6 +5061,7 @@ static struct rtnl_af_ops inet6_ops = {
 	.family		  = AF_INET6,
 	.fill_link_af	  = inet6_fill_link_af,
 	.get_link_af_size = inet6_get_link_af_size,
+	.set_link_af	  = inet6_set_link_af,
 };
 
 /*
-- 
cgit 


From 1b8664341100716202c29d67f24d67094a82971e Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 9 Apr 2013 05:54:01 +0000
Subject: net: sctp: introduce uapi header for sctp

This patch introduces an UAPI header for the SCTP protocol,
so that we can facilitate the maintenance and development of
user land applications or libraries, in particular in terms
of header synchronization.

To not break compatibility, some fragments from lksctp-tools'
netinet/sctp.h have been carefully included, while taking care
that neither kernel nor user land breaks, so both compile fine
with this change (for lksctp-tools I tested with the old
netinet/sctp.h header and with a newly adapted one that includes
the uapi sctp header). lksctp-tools smoke test run through
successfully as well in both cases.

Suggested-by: Neil Horman <nhorman@tuxdriver.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c            |   2 +-
 include/linux/sctp.h         |   6 +-
 include/net/sctp/constants.h |   1 -
 include/net/sctp/user.h      | 782 ---------------------------------------
 include/uapi/linux/Kbuild    |   1 +
 include/uapi/linux/sctp.h    | 846 +++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 850 insertions(+), 788 deletions(-)
 delete mode 100644 include/net/sctp/user.h
 create mode 100644 include/uapi/linux/sctp.h

(limited to 'include/uapi/linux')

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4f5ad246582f..d0ccd2fd79eb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -52,8 +52,8 @@
 #include <linux/mutex.h>
 #include <linux/sctp.h>
 #include <linux/slab.h>
+#include <linux/sctp.h>
 #include <net/sctp/sctp.h>
-#include <net/sctp/user.h>
 #include <net/ipv6.h>
 
 #include "dlm_internal.h"
diff --git a/include/linux/sctp.h b/include/linux/sctp.h
index c11a28706fa4..3bfe8d6ee248 100644
--- a/include/linux/sctp.h
+++ b/include/linux/sctp.h
@@ -53,7 +53,9 @@
 
 #include <linux/in.h>		/* We need in_addr.  */
 #include <linux/in6.h>		/* We need in6_addr.  */
+#include <linux/skbuff.h>
 
+#include <uapi/linux/sctp.h>
 
 /* Section 3.1.  SCTP Common Header Format */
 typedef struct sctphdr {
@@ -63,14 +65,10 @@ typedef struct sctphdr {
 	__le32 checksum;
 } __packed sctp_sctphdr_t;
 
-#ifdef __KERNEL__
-#include <linux/skbuff.h>
-
 static inline struct sctphdr *sctp_hdr(const struct sk_buff *skb)
 {
 	return (struct sctphdr *)skb_transport_header(skb);
 }
-#endif
 
 /* Section 3.2.  Chunk Field Descriptions. */
 typedef struct sctp_chunkhdr {
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index a7dd5c50df79..ca50e0751e47 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -49,7 +49,6 @@
 
 #include <linux/sctp.h>
 #include <linux/ipv6.h> /* For ipv6hdr. */
-#include <net/sctp/user.h>
 #include <net/tcp_states.h>  /* For TCP states used in sctp_sock_state_t */
 
 /* Value used for stream negotiation. */
diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
deleted file mode 100644
index 9a0ae091366d..000000000000
--- a/include/net/sctp/user.h
+++ /dev/null
@@ -1,782 +0,0 @@
-/* SCTP kernel implementation
- * (C) Copyright IBM Corp. 2001, 2004
- * Copyright (c) 1999-2000 Cisco, Inc.
- * Copyright (c) 1999-2001 Motorola, Inc.
- * Copyright (c) 2002 Intel Corp.
- *
- * This file is part of the SCTP kernel implementation
- *
- * This header represents the structures and constants needed to support
- * the SCTP Extension to the Sockets API.
- *
- * This SCTP implementation is free software;
- * you can redistribute it and/or modify it under the terms of
- * the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This SCTP implementation is distributed in the hope that it
- * will be useful, but WITHOUT ANY WARRANTY; without even the implied
- *                 ************************
- * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU CC; see the file COPYING.  If not, write to
- * the Free Software Foundation, 59 Temple Place - Suite 330,
- * Boston, MA 02111-1307, USA.
- *
- * Please send any bug reports or fixes you make to the
- * email address(es):
- *    lksctp developers <lksctp-developers@lists.sourceforge.net>
- *
- * Or submit a bug report through the following website:
- *    http://www.sf.net/projects/lksctp
- *
- * Written or modified by:
- *    La Monte H.P. Yarroll    <piggy@acm.org>
- *    R. Stewart               <randall@sctp.chicago.il.us>
- *    K. Morneau               <kmorneau@cisco.com>
- *    Q. Xie                   <qxie1@email.mot.com>
- *    Karl Knutson             <karl@athena.chicago.il.us>
- *    Jon Grimm                <jgrimm@us.ibm.com>
- *    Daisy Chang              <daisyc@us.ibm.com>
- *    Ryan Layer               <rmlayer@us.ibm.com>
- *    Ardelle Fan	       <ardelle.fan@intel.com>
- *    Sridhar Samudrala        <sri@us.ibm.com>
- *
- * Any bugs reported given to us we will try to fix... any fixes shared will
- * be incorporated into the next SCTP release.
- */
-
-#ifndef __net_sctp_user_h__
-#define __net_sctp_user_h__
-
-#include <linux/types.h>
-#include <linux/socket.h>
-
-typedef __s32 sctp_assoc_t;
-
-/* The following symbols come from the Sockets API Extensions for
- * SCTP <draft-ietf-tsvwg-sctpsocket-07.txt>.
- */
-#define SCTP_RTOINFO	0
-#define SCTP_ASSOCINFO  1
-#define SCTP_INITMSG	2
-#define SCTP_NODELAY	3		/* Get/set nodelay option. */
-#define SCTP_AUTOCLOSE	4
-#define SCTP_SET_PEER_PRIMARY_ADDR 5
-#define SCTP_PRIMARY_ADDR	6
-#define SCTP_ADAPTATION_LAYER	7
-#define SCTP_DISABLE_FRAGMENTS	8
-#define SCTP_PEER_ADDR_PARAMS	9
-#define SCTP_DEFAULT_SEND_PARAM	10
-#define SCTP_EVENTS	11
-#define SCTP_I_WANT_MAPPED_V4_ADDR 12	/* Turn on/off mapped v4 addresses  */
-#define SCTP_MAXSEG	13		/* Get/set maximum fragment. */
-#define SCTP_STATUS	14
-#define SCTP_GET_PEER_ADDR_INFO	15
-#define SCTP_DELAYED_ACK_TIME	16
-#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME
-#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME
-#define SCTP_CONTEXT	17
-#define SCTP_FRAGMENT_INTERLEAVE	18
-#define SCTP_PARTIAL_DELIVERY_POINT	19 /* Set/Get partial delivery point */
-#define SCTP_MAX_BURST	20		/* Set/Get max burst */
-#define SCTP_AUTH_CHUNK	21	/* Set only: add a chunk type to authenticate */
-#define SCTP_HMAC_IDENT	22
-#define SCTP_AUTH_KEY	23
-#define SCTP_AUTH_ACTIVE_KEY	24
-#define SCTP_AUTH_DELETE_KEY	25
-#define SCTP_PEER_AUTH_CHUNKS	26	/* Read only */
-#define SCTP_LOCAL_AUTH_CHUNKS	27	/* Read only */
-#define SCTP_GET_ASSOC_NUMBER	28	/* Read only */
-#define SCTP_GET_ASSOC_ID_LIST	29	/* Read only */
-#define SCTP_AUTO_ASCONF       30
-#define SCTP_PEER_ADDR_THLDS	31
-
-/* Internal Socket Options. Some of the sctp library functions are
- * implemented using these socket options.
- */
-#define SCTP_SOCKOPT_BINDX_ADD	100	/* BINDX requests for adding addrs */
-#define SCTP_SOCKOPT_BINDX_REM	101	/* BINDX requests for removing addrs. */
-#define SCTP_SOCKOPT_PEELOFF	102	/* peel off association. */
-/* Options 104-106 are deprecated and removed. Do not use this space */
-#define SCTP_SOCKOPT_CONNECTX_OLD	107	/* CONNECTX old requests. */
-#define SCTP_GET_PEER_ADDRS	108		/* Get all peer address. */
-#define SCTP_GET_LOCAL_ADDRS	109		/* Get all local address. */
-#define SCTP_SOCKOPT_CONNECTX	110		/* CONNECTX requests. */
-#define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
-#define SCTP_GET_ASSOC_STATS	112	/* Read only */
-
-/*
- * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
- *
- *   This cmsghdr structure provides information for initializing new
- *   SCTP associations with sendmsg().  The SCTP_INITMSG socket option
- *   uses this same data structure.  This structure is not used for
- *   recvmsg().
- *
- *   cmsg_level    cmsg_type      cmsg_data[]
- *   ------------  ------------   ----------------------
- *   IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
- *
- */
-struct sctp_initmsg {
-	__u16 sinit_num_ostreams;
-	__u16 sinit_max_instreams;
-	__u16 sinit_max_attempts;
-	__u16 sinit_max_init_timeo;
-};
-
-/*
- * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
- *
- *   This cmsghdr structure specifies SCTP options for sendmsg() and
- *   describes SCTP header information about a received message through
- *   recvmsg().
- *
- *   cmsg_level    cmsg_type      cmsg_data[]
- *   ------------  ------------   ----------------------
- *   IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
- *
- */
-struct sctp_sndrcvinfo {
-	__u16 sinfo_stream;
-	__u16 sinfo_ssn;
-	__u16 sinfo_flags;
-	__u32 sinfo_ppid;
-	__u32 sinfo_context;
-	__u32 sinfo_timetolive;
-	__u32 sinfo_tsn;
-	__u32 sinfo_cumtsn;
-	sctp_assoc_t sinfo_assoc_id;
-};
-
-/*
- *  sinfo_flags: 16 bits (unsigned integer)
- *
- *   This field may contain any of the following flags and is composed of
- *   a bitwise OR of these values.
- */
-
-enum sctp_sinfo_flags {
-	SCTP_UNORDERED = 1,  /* Send/receive message unordered. */
-	SCTP_ADDR_OVER = 2,  /* Override the primary destination. */
-	SCTP_ABORT=4,        /* Send an ABORT message to the peer. */
-	SCTP_SACK_IMMEDIATELY = 8,	/* SACK should be sent without delay */
-	SCTP_EOF=MSG_FIN,    /* Initiate graceful shutdown process. */	
-};
-
-
-/* These are cmsg_types.  */
-typedef enum sctp_cmsg_type {
-	SCTP_INIT,              /* 5.2.1 SCTP Initiation Structure */
-	SCTP_SNDRCV,            /* 5.2.2 SCTP Header Information Structure */
-} sctp_cmsg_t;
-
-
-/*
- * 5.3.1.1 SCTP_ASSOC_CHANGE
- *
- *   Communication notifications inform the ULP that an SCTP association
- *   has either begun or ended. The identifier for a new association is
- *   provided by this notificaion. The notification information has the
- *   following format:
- *
- */
-struct sctp_assoc_change {
-	__u16 sac_type;
-	__u16 sac_flags;
-	__u32 sac_length;
-	__u16 sac_state;
-	__u16 sac_error;
-	__u16 sac_outbound_streams;
-	__u16 sac_inbound_streams;
-	sctp_assoc_t sac_assoc_id;
-	__u8 sac_info[0];
-};
-
-/*
- *   sac_state: 32 bits (signed integer)
- *
- *   This field holds one of a number of values that communicate the
- *   event that happened to the association.  They include:
- *
- *   Note:  The following state names deviate from the API draft as
- *   the names clash too easily with other kernel symbols.
- */
-enum sctp_sac_state {
-	SCTP_COMM_UP,
-	SCTP_COMM_LOST,
-	SCTP_RESTART,
-	SCTP_SHUTDOWN_COMP,
-	SCTP_CANT_STR_ASSOC,
-};
-
-/*
- * 5.3.1.2 SCTP_PEER_ADDR_CHANGE
- *
- *   When a destination address on a multi-homed peer encounters a change
- *   an interface details event is sent.  The information has the
- *   following structure:
- */
-struct sctp_paddr_change {
-	__u16 spc_type;
-	__u16 spc_flags;
-	__u32 spc_length;
-	struct sockaddr_storage spc_aaddr;
-	int spc_state;
-	int spc_error;
-	sctp_assoc_t spc_assoc_id;
-} __attribute__((packed, aligned(4)));
-
-/*
- *    spc_state:  32 bits (signed integer)
- *
- *   This field holds one of a number of values that communicate the
- *   event that happened to the address.  They include:
- */
-enum sctp_spc_state {
-	SCTP_ADDR_AVAILABLE,
-	SCTP_ADDR_UNREACHABLE,
-	SCTP_ADDR_REMOVED,
-	SCTP_ADDR_ADDED,
-	SCTP_ADDR_MADE_PRIM,
-	SCTP_ADDR_CONFIRMED,
-};
-
-
-/*
- * 5.3.1.3 SCTP_REMOTE_ERROR
- *
- *   A remote peer may send an Operational Error message to its peer.
- *   This message indicates a variety of error conditions on an
- *   association. The entire error TLV as it appears on the wire is
- *   included in a SCTP_REMOTE_ERROR event.  Please refer to the SCTP
- *   specification [SCTP] and any extensions for a list of possible
- *   error formats. SCTP error TLVs have the format:
- */
-struct sctp_remote_error {
-	__u16 sre_type;
-	__u16 sre_flags;
-	__u32 sre_length;
-	__u16 sre_error;
-	sctp_assoc_t sre_assoc_id;
-	__u8 sre_data[0];
-};
-
-
-/*
- * 5.3.1.4 SCTP_SEND_FAILED
- *
- *   If SCTP cannot deliver a message it may return the message as a
- *   notification.
- */
-struct sctp_send_failed {
-	__u16 ssf_type;
-	__u16 ssf_flags;
-	__u32 ssf_length;
-	__u32 ssf_error;
-	struct sctp_sndrcvinfo ssf_info;
-	sctp_assoc_t ssf_assoc_id;
-	__u8 ssf_data[0];
-};
-
-/*
- *   ssf_flags: 16 bits (unsigned integer)
- *
- *   The flag value will take one of the following values
- *
- *   SCTP_DATA_UNSENT  - Indicates that the data was never put on
- *                       the wire.
- *
- *   SCTP_DATA_SENT    - Indicates that the data was put on the wire.
- *                       Note that this does not necessarily mean that the
- *                       data was (or was not) successfully delivered.
- */
-enum sctp_ssf_flags {
-	SCTP_DATA_UNSENT,
-	SCTP_DATA_SENT,
-};
-
-/*
- * 5.3.1.5 SCTP_SHUTDOWN_EVENT
- *
- *   When a peer sends a SHUTDOWN, SCTP delivers this notification to
- *   inform the application that it should cease sending data.
- */
-struct sctp_shutdown_event {
-	__u16 sse_type;
-	__u16 sse_flags;
-	__u32 sse_length;
-	sctp_assoc_t sse_assoc_id;
-};
-
-/*
- * 5.3.1.6 SCTP_ADAPTATION_INDICATION
- *
- *   When a peer sends a Adaptation Layer Indication parameter , SCTP
- *   delivers this notification to inform the application
- *   that of the peers requested adaptation layer.
- */
-struct sctp_adaptation_event {
-	__u16 sai_type;
-	__u16 sai_flags;
-	__u32 sai_length;
-	__u32 sai_adaptation_ind;
-	sctp_assoc_t sai_assoc_id;
-};
-
-/*
- * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT
- *
- *   When a receiver is engaged in a partial delivery of a
- *   message this notification will be used to indicate
- *   various events.
- */
-struct sctp_pdapi_event {
-	__u16 pdapi_type;
-	__u16 pdapi_flags;
-	__u32 pdapi_length;
-	__u32 pdapi_indication;
-	sctp_assoc_t pdapi_assoc_id;
-};
-
-enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, };
-
-struct sctp_authkey_event {
-	__u16 auth_type;
-	__u16 auth_flags;
-	__u32 auth_length;
-	__u16 auth_keynumber;
-	__u16 auth_altkeynumber;
-	__u32 auth_indication;
-	sctp_assoc_t auth_assoc_id;
-};
-
-enum { SCTP_AUTH_NEWKEY = 0, };
-
-/*
- * 6.1.9. SCTP_SENDER_DRY_EVENT
- *
- * When the SCTP stack has no more user data to send or retransmit, this
- * notification is given to the user. Also, at the time when a user app
- * subscribes to this event, if there is no data to be sent or
- * retransmit, the stack will immediately send up this notification.
- */
-struct sctp_sender_dry_event {
-	__u16 sender_dry_type;
-	__u16 sender_dry_flags;
-	__u32 sender_dry_length;
-	sctp_assoc_t sender_dry_assoc_id;
-};
-
-/*
- * Described in Section 7.3
- *   Ancillary Data and Notification Interest Options
- */
-struct sctp_event_subscribe {
-	__u8 sctp_data_io_event;
-	__u8 sctp_association_event;
-	__u8 sctp_address_event;
-	__u8 sctp_send_failure_event;
-	__u8 sctp_peer_error_event;
-	__u8 sctp_shutdown_event;
-	__u8 sctp_partial_delivery_event;
-	__u8 sctp_adaptation_layer_event;
-	__u8 sctp_authentication_event;
-	__u8 sctp_sender_dry_event;
-};
-
-/*
- * 5.3.1 SCTP Notification Structure
- *
- *   The notification structure is defined as the union of all
- *   notification types.
- *
- */
-union sctp_notification {
-	struct {
-		__u16 sn_type;             /* Notification type. */
-		__u16 sn_flags;
-		__u32 sn_length;
-	} sn_header;
-	struct sctp_assoc_change sn_assoc_change;
-	struct sctp_paddr_change sn_paddr_change;
-	struct sctp_remote_error sn_remote_error;
-	struct sctp_send_failed sn_send_failed;
-	struct sctp_shutdown_event sn_shutdown_event;
-	struct sctp_adaptation_event sn_adaptation_event;
-	struct sctp_pdapi_event sn_pdapi_event;
-	struct sctp_authkey_event sn_authkey_event;
-	struct sctp_sender_dry_event sn_sender_dry_event;
-};
-
-/* Section 5.3.1
- * All standard values for sn_type flags are greater than 2^15.
- * Values from 2^15 and down are reserved.
- */
-
-enum sctp_sn_type {
-	SCTP_SN_TYPE_BASE     = (1<<15),
-	SCTP_ASSOC_CHANGE,
-	SCTP_PEER_ADDR_CHANGE,
-	SCTP_SEND_FAILED,
-	SCTP_REMOTE_ERROR,
-	SCTP_SHUTDOWN_EVENT,
-	SCTP_PARTIAL_DELIVERY_EVENT,
-	SCTP_ADAPTATION_INDICATION,
-	SCTP_AUTHENTICATION_EVENT,
-#define SCTP_AUTHENTICATION_INDICATION	SCTP_AUTHENTICATION_EVENT
-	SCTP_SENDER_DRY_EVENT,
-};
-
-/* Notification error codes used to fill up the error fields in some
- * notifications.
- * SCTP_PEER_ADDRESS_CHAGE 	: spc_error
- * SCTP_ASSOC_CHANGE		: sac_error
- * These names should be potentially included in the draft 04 of the SCTP
- * sockets API specification.
- */
-typedef enum sctp_sn_error {
-	SCTP_FAILED_THRESHOLD,
-	SCTP_RECEIVED_SACK,
-	SCTP_HEARTBEAT_SUCCESS,
-	SCTP_RESPONSE_TO_USER_REQ,
-	SCTP_INTERNAL_ERROR,
-	SCTP_SHUTDOWN_GUARD_EXPIRES,
-	SCTP_PEER_FAULTY,
-} sctp_sn_error_t;
-
-/*
- * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO)
- *
- *   The protocol parameters used to initialize and bound retransmission
- *   timeout (RTO) are tunable.  See [SCTP] for more information on how
- *   these parameters are used in RTO calculation. 
- */
-struct sctp_rtoinfo {
-	sctp_assoc_t	srto_assoc_id;
-	__u32		srto_initial;
-	__u32		srto_max;
-	__u32		srto_min;
-};
-
-/*
- * 7.1.2 Association Parameters (SCTP_ASSOCINFO)
- *
- *   This option is used to both examine and set various association and
- *   endpoint parameters.
- */
-struct sctp_assocparams {
-	sctp_assoc_t	sasoc_assoc_id;
-	__u16		sasoc_asocmaxrxt;
-	__u16		sasoc_number_peer_destinations;
-	__u32		sasoc_peer_rwnd;
-	__u32		sasoc_local_rwnd;
-	__u32		sasoc_cookie_life;
-};
-
-/*
- * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR)
- *
- *  Requests that the peer mark the enclosed address as the association
- *  primary. The enclosed address must be one of the association's
- *  locally bound addresses. The following structure is used to make a
- *   set primary request:
- */
-struct sctp_setpeerprim {
-	sctp_assoc_t            sspp_assoc_id;
-	struct sockaddr_storage sspp_addr;
-} __attribute__((packed, aligned(4)));
-
-/*
- * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
- *
- *  Requests that the local SCTP stack use the enclosed peer address as
- *  the association primary. The enclosed address must be one of the
- *  association peer's addresses. The following structure is used to
- *  make a set peer primary request:
- */
-struct sctp_prim {
-	sctp_assoc_t            ssp_assoc_id;
-	struct sockaddr_storage ssp_addr;
-} __attribute__((packed, aligned(4)));
-
-/*
- * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER)
- *
- * Requests that the local endpoint set the specified Adaptation Layer
- * Indication parameter for all future INIT and INIT-ACK exchanges.
- */
-struct sctp_setadaptation {
-	__u32	ssb_adaptation_ind;
-};
-
-/*
- * 7.1.13 Peer Address Parameters  (SCTP_PEER_ADDR_PARAMS)
- *
- *   Applications can enable or disable heartbeats for any peer address
- *   of an association, modify an address's heartbeat interval, force a
- *   heartbeat to be sent immediately, and adjust the address's maximum
- *   number of retransmissions sent before an address is considered
- *   unreachable. The following structure is used to access and modify an
- *   address's parameters:
- */
-enum  sctp_spp_flags {
-	SPP_HB_ENABLE = 1<<0,		/*Enable heartbeats*/
-	SPP_HB_DISABLE = 1<<1,		/*Disable heartbeats*/
-	SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE,
-	SPP_HB_DEMAND = 1<<2,		/*Send heartbeat immediately*/
-	SPP_PMTUD_ENABLE = 1<<3,	/*Enable PMTU discovery*/
-	SPP_PMTUD_DISABLE = 1<<4,	/*Disable PMTU discovery*/
-	SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE,
-	SPP_SACKDELAY_ENABLE = 1<<5,	/*Enable SACK*/
-	SPP_SACKDELAY_DISABLE = 1<<6,	/*Disable SACK*/
-	SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
-	SPP_HB_TIME_IS_ZERO = 1<<7,	/* Set HB delay to 0 */
-};
-
-struct sctp_paddrparams {
-	sctp_assoc_t		spp_assoc_id;
-	struct sockaddr_storage	spp_address;
-	__u32			spp_hbinterval;
-	__u16			spp_pathmaxrxt;
-	__u32			spp_pathmtu;
-	__u32			spp_sackdelay;
-	__u32			spp_flags;
-} __attribute__((packed, aligned(4)));
-
-/*
- * 7.1.18.  Add a chunk that must be authenticated (SCTP_AUTH_CHUNK)
- *
- * This set option adds a chunk type that the user is requesting to be
- * received only in an authenticated way.  Changes to the list of chunks
- * will only effect future associations on the socket.
- */
-struct sctp_authchunk {
-	__u8		sauth_chunk;
-};
-
-/*
- * 7.1.19.  Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT)
- *
- * This option gets or sets the list of HMAC algorithms that the local
- * endpoint requires the peer to use.
-*/
-struct sctp_hmacalgo {
-	__u32		shmac_num_idents;
-	__u16		shmac_idents[];
-};
-
-/*
- * 7.1.20.  Set a shared key (SCTP_AUTH_KEY)
- *
- * This option will set a shared secret key which is used to build an
- * association shared key.
- */
-struct sctp_authkey {
-	sctp_assoc_t	sca_assoc_id;
-	__u16		sca_keynumber;
-	__u16		sca_keylength;
-	__u8		sca_key[];
-};
-
-/*
- * 7.1.21.  Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY)
- *
- * This option will get or set the active shared key to be used to build
- * the association shared key.
- */
-
-struct sctp_authkeyid {
-	sctp_assoc_t	scact_assoc_id;
-	__u16		scact_keynumber;
-};
-
-
-/*
- * 7.1.23.  Get or set delayed ack timer (SCTP_DELAYED_SACK)
- *
- * This option will effect the way delayed acks are performed.  This
- * option allows you to get or set the delayed ack time, in
- * milliseconds.  It also allows changing the delayed ack frequency.
- * Changing the frequency to 1 disables the delayed sack algorithm.  If
- * the assoc_id is 0, then this sets or gets the endpoints default
- * values.  If the assoc_id field is non-zero, then the set or get
- * effects the specified association for the one to many model (the
- * assoc_id field is ignored by the one to one model).  Note that if
- * sack_delay or sack_freq are 0 when setting this option, then the
- * current values will remain unchanged.
- */
-struct sctp_sack_info {
-	sctp_assoc_t	sack_assoc_id;
-	uint32_t	sack_delay;
-	uint32_t	sack_freq;
-};
-
-struct sctp_assoc_value {
-    sctp_assoc_t            assoc_id;
-    uint32_t                assoc_value;
-};
-
-/*
- * 7.2.2 Peer Address Information
- *
- *   Applications can retrieve information about a specific peer address
- *   of an association, including its reachability state, congestion
- *   window, and retransmission timer values.  This information is
- *   read-only. The following structure is used to access this
- *   information:
- */
-struct sctp_paddrinfo {
-	sctp_assoc_t		spinfo_assoc_id;
-	struct sockaddr_storage	spinfo_address;
-	__s32			spinfo_state;
-	__u32			spinfo_cwnd;
-	__u32			spinfo_srtt;
-	__u32			spinfo_rto;
-	__u32			spinfo_mtu;
-} __attribute__((packed, aligned(4)));
-
-/* Peer addresses's state. */
-/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x]
- * calls.
- * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters.
- *              Not yet confirmed by a heartbeat and not available for data
- *		transfers.
- * ACTIVE : Peer address confirmed, active and available for data transfers.
- * INACTIVE: Peer address inactive and not available for data transfers.
- */
-enum sctp_spinfo_state {
-	SCTP_INACTIVE,
-	SCTP_PF,
-	SCTP_ACTIVE,
-	SCTP_UNCONFIRMED,
-	SCTP_UNKNOWN = 0xffff  /* Value used for transport state unknown */
-};
-
-/*
- * 7.2.1 Association Status (SCTP_STATUS)
- *
- *   Applications can retrieve current status information about an
- *   association, including association state, peer receiver window size,
- *   number of unacked data chunks, and number of data chunks pending
- *   receipt.  This information is read-only.  The following structure is
- *   used to access this information:
- */
-struct sctp_status {
-	sctp_assoc_t		sstat_assoc_id;
-	__s32			sstat_state;
-	__u32			sstat_rwnd;
-	__u16			sstat_unackdata;
-	__u16			sstat_penddata;
-	__u16			sstat_instrms;
-	__u16			sstat_outstrms;
-	__u32			sstat_fragmentation_point;
-	struct sctp_paddrinfo	sstat_primary;
-};
-
-/*
- * 7.2.3.  Get the list of chunks the peer requires to be authenticated
- *         (SCTP_PEER_AUTH_CHUNKS)
- *
- * This option gets a list of chunks for a specified association that
- * the peer requires to be received authenticated only.
- */
-struct sctp_authchunks {
-	sctp_assoc_t	gauth_assoc_id;
-	__u32		gauth_number_of_chunks;
-	uint8_t		gauth_chunks[];
-};
-
-/*
- * 8.2.6. Get the Current Identifiers of Associations
- *        (SCTP_GET_ASSOC_ID_LIST)
- *
- * This option gets the current list of SCTP association identifiers of
- * the SCTP associations handled by a one-to-many style socket.
- */
-struct sctp_assoc_ids {
-	__u32		gaids_number_of_ids;
-	sctp_assoc_t	gaids_assoc_id[];
-};
-
-/*
- * 8.3, 8.5 get all peer/local addresses in an association.
- * This parameter struct is used by SCTP_GET_PEER_ADDRS and 
- * SCTP_GET_LOCAL_ADDRS socket options used internally to implement
- * sctp_getpaddrs() and sctp_getladdrs() API. 
- */
-struct sctp_getaddrs_old {
-	sctp_assoc_t            assoc_id;
-	int			addr_num;
-	struct sockaddr		__user *addrs;
-};
-struct sctp_getaddrs {
-	sctp_assoc_t		assoc_id; /*input*/
-	__u32			addr_num; /*output*/
-	__u8			addrs[0]; /*output, variable size*/
-};
-
-/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves
- * association stats. All stats are counts except sas_maxrto and
- * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since
- * the last call. Will return 0 when RTO was not update since last call
- */
-struct sctp_assoc_stats {
-	sctp_assoc_t	sas_assoc_id;    /* Input */
-					 /* Transport of observed max RTO */
-	struct sockaddr_storage sas_obs_rto_ipaddr;
-	__u64		sas_maxrto;      /* Maximum Observed RTO for period */
-	__u64		sas_isacks;	 /* SACKs received */
-	__u64		sas_osacks;	 /* SACKs sent */
-	__u64		sas_opackets;	 /* Packets sent */
-	__u64		sas_ipackets;	 /* Packets received */
-	__u64		sas_rtxchunks;   /* Retransmitted Chunks */
-	__u64		sas_outofseqtsns;/* TSN received > next expected */
-	__u64		sas_idupchunks;  /* Dups received (ordered+unordered) */
-	__u64		sas_gapcnt;      /* Gap Acknowledgements Received */
-	__u64		sas_ouodchunks;  /* Unordered data chunks sent */
-	__u64		sas_iuodchunks;  /* Unordered data chunks received */
-	__u64		sas_oodchunks;	 /* Ordered data chunks sent */
-	__u64		sas_iodchunks;	 /* Ordered data chunks received */
-	__u64		sas_octrlchunks; /* Control chunks sent */
-	__u64		sas_ictrlchunks; /* Control chunks received */
-};
-
-/* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
-/* On user space Linux, these live in <bits/socket.h> as an enum.  */
-enum sctp_msg_flags {
-	MSG_NOTIFICATION = 0x8000,
-#define MSG_NOTIFICATION MSG_NOTIFICATION
-};
-
-/*
- * 8.1 sctp_bindx()
- *
- * The flags parameter is formed from the bitwise OR of zero or more of the
- * following currently defined flags:
- */
-#define SCTP_BINDX_ADD_ADDR 0x01
-#define SCTP_BINDX_REM_ADDR 0x02
-
-/* This is the structure that is passed as an argument(optval) to
- * getsockopt(SCTP_SOCKOPT_PEELOFF).
- */
-typedef struct {
-	sctp_assoc_t associd;
-	int sd;
-} sctp_peeloff_arg_t;
-
-/*
- *  Peer Address Thresholds socket option
- */
-struct sctp_paddrthlds {
-	sctp_assoc_t spt_assoc_id;
-	struct sockaddr_storage spt_address;
-	__u16 spt_pathmaxrxt;
-	__u16 spt_pathpfthld;
-};
-#endif /* __net_sctp_user_h__ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 5c8a1d25e21c..7df190525337 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -331,6 +331,7 @@ header-y += rtnetlink.h
 header-y += scc.h
 header-y += sched.h
 header-y += screen_info.h
+header-y += sctp.h
 header-y += sdla.h
 header-y += seccomp.h
 header-y += securebits.h
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
new file mode 100644
index 000000000000..66b466e4ca08
--- /dev/null
+++ b/include/uapi/linux/sctp.h
@@ -0,0 +1,846 @@
+/* SCTP kernel implementation
+ * (C) Copyright IBM Corp. 2001, 2004
+ * Copyright (c) 1999-2000 Cisco, Inc.
+ * Copyright (c) 1999-2001 Motorola, Inc.
+ * Copyright (c) 2002 Intel Corp.
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * This header represents the structures and constants needed to support
+ * the SCTP Extension to the Sockets API.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ *                 ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email address(es):
+ *    lksctp developers <lksctp-developers@lists.sourceforge.net>
+ *
+ * Or submit a bug report through the following website:
+ *    http://www.sf.net/projects/lksctp
+ *
+ * Written or modified by:
+ *    La Monte H.P. Yarroll    <piggy@acm.org>
+ *    R. Stewart               <randall@sctp.chicago.il.us>
+ *    K. Morneau               <kmorneau@cisco.com>
+ *    Q. Xie                   <qxie1@email.mot.com>
+ *    Karl Knutson             <karl@athena.chicago.il.us>
+ *    Jon Grimm                <jgrimm@us.ibm.com>
+ *    Daisy Chang              <daisyc@us.ibm.com>
+ *    Ryan Layer               <rmlayer@us.ibm.com>
+ *    Ardelle Fan              <ardelle.fan@intel.com>
+ *    Sridhar Samudrala        <sri@us.ibm.com>
+ *    Inaky Perez-Gonzalez     <inaky.gonzalez@intel.com>
+ *    Vlad Yasevich            <vladislav.yasevich@hp.com>
+ *
+ * Any bugs reported given to us we will try to fix... any fixes shared will
+ * be incorporated into the next SCTP release.
+ */
+
+#ifndef _UAPI_SCTP_H
+#define _UAPI_SCTP_H
+
+#include <linux/types.h>
+#include <linux/socket.h>
+
+typedef __s32 sctp_assoc_t;
+
+/* The following symbols come from the Sockets API Extensions for
+ * SCTP <draft-ietf-tsvwg-sctpsocket-07.txt>.
+ */
+#define SCTP_RTOINFO	0
+#define SCTP_ASSOCINFO  1
+#define SCTP_INITMSG	2
+#define SCTP_NODELAY	3		/* Get/set nodelay option. */
+#define SCTP_AUTOCLOSE	4
+#define SCTP_SET_PEER_PRIMARY_ADDR 5
+#define SCTP_PRIMARY_ADDR	6
+#define SCTP_ADAPTATION_LAYER	7
+#define SCTP_DISABLE_FRAGMENTS	8
+#define SCTP_PEER_ADDR_PARAMS	9
+#define SCTP_DEFAULT_SEND_PARAM	10
+#define SCTP_EVENTS	11
+#define SCTP_I_WANT_MAPPED_V4_ADDR 12	/* Turn on/off mapped v4 addresses  */
+#define SCTP_MAXSEG	13		/* Get/set maximum fragment. */
+#define SCTP_STATUS	14
+#define SCTP_GET_PEER_ADDR_INFO	15
+#define SCTP_DELAYED_ACK_TIME	16
+#define SCTP_DELAYED_ACK SCTP_DELAYED_ACK_TIME
+#define SCTP_DELAYED_SACK SCTP_DELAYED_ACK_TIME
+#define SCTP_CONTEXT	17
+#define SCTP_FRAGMENT_INTERLEAVE	18
+#define SCTP_PARTIAL_DELIVERY_POINT	19 /* Set/Get partial delivery point */
+#define SCTP_MAX_BURST	20		/* Set/Get max burst */
+#define SCTP_AUTH_CHUNK	21	/* Set only: add a chunk type to authenticate */
+#define SCTP_HMAC_IDENT	22
+#define SCTP_AUTH_KEY	23
+#define SCTP_AUTH_ACTIVE_KEY	24
+#define SCTP_AUTH_DELETE_KEY	25
+#define SCTP_PEER_AUTH_CHUNKS	26	/* Read only */
+#define SCTP_LOCAL_AUTH_CHUNKS	27	/* Read only */
+#define SCTP_GET_ASSOC_NUMBER	28	/* Read only */
+#define SCTP_GET_ASSOC_ID_LIST	29	/* Read only */
+#define SCTP_AUTO_ASCONF       30
+#define SCTP_PEER_ADDR_THLDS	31
+
+/* Internal Socket Options. Some of the sctp library functions are
+ * implemented using these socket options.
+ */
+#define SCTP_SOCKOPT_BINDX_ADD	100	/* BINDX requests for adding addrs */
+#define SCTP_SOCKOPT_BINDX_REM	101	/* BINDX requests for removing addrs. */
+#define SCTP_SOCKOPT_PEELOFF	102	/* peel off association. */
+/* Options 104-106 are deprecated and removed. Do not use this space */
+#define SCTP_SOCKOPT_CONNECTX_OLD	107	/* CONNECTX old requests. */
+#define SCTP_GET_PEER_ADDRS	108		/* Get all peer address. */
+#define SCTP_GET_LOCAL_ADDRS	109		/* Get all local address. */
+#define SCTP_SOCKOPT_CONNECTX	110		/* CONNECTX requests. */
+#define SCTP_SOCKOPT_CONNECTX3	111	/* CONNECTX requests (updated) */
+#define SCTP_GET_ASSOC_STATS	112	/* Read only */
+
+/*
+ * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
+ *
+ *   This cmsghdr structure provides information for initializing new
+ *   SCTP associations with sendmsg().  The SCTP_INITMSG socket option
+ *   uses this same data structure.  This structure is not used for
+ *   recvmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   ----------------------
+ *   IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
+ *
+ */
+struct sctp_initmsg {
+	__u16 sinit_num_ostreams;
+	__u16 sinit_max_instreams;
+	__u16 sinit_max_attempts;
+	__u16 sinit_max_init_timeo;
+};
+
+/*
+ * 5.2.2 SCTP Header Information Structure (SCTP_SNDRCV)
+ *
+ *   This cmsghdr structure specifies SCTP options for sendmsg() and
+ *   describes SCTP header information about a received message through
+ *   recvmsg().
+ *
+ *   cmsg_level    cmsg_type      cmsg_data[]
+ *   ------------  ------------   ----------------------
+ *   IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
+ *
+ */
+struct sctp_sndrcvinfo {
+	__u16 sinfo_stream;
+	__u16 sinfo_ssn;
+	__u16 sinfo_flags;
+	__u32 sinfo_ppid;
+	__u32 sinfo_context;
+	__u32 sinfo_timetolive;
+	__u32 sinfo_tsn;
+	__u32 sinfo_cumtsn;
+	sctp_assoc_t sinfo_assoc_id;
+};
+
+/*
+ *  sinfo_flags: 16 bits (unsigned integer)
+ *
+ *   This field may contain any of the following flags and is composed of
+ *   a bitwise OR of these values.
+ */
+
+enum sctp_sinfo_flags {
+	SCTP_UNORDERED = 1,  /* Send/receive message unordered. */
+	SCTP_ADDR_OVER = 2,  /* Override the primary destination. */
+	SCTP_ABORT=4,        /* Send an ABORT message to the peer. */
+	SCTP_SACK_IMMEDIATELY = 8,	/* SACK should be sent without delay */
+	SCTP_EOF=MSG_FIN,    /* Initiate graceful shutdown process. */
+};
+
+typedef union {
+	__u8   			raw;
+	struct sctp_initmsg	init;
+	struct sctp_sndrcvinfo	sndrcv;
+} sctp_cmsg_data_t;
+
+/* These are cmsg_types.  */
+typedef enum sctp_cmsg_type {
+	SCTP_INIT,              /* 5.2.1 SCTP Initiation Structure */
+#define SCTP_INIT	SCTP_INIT
+	SCTP_SNDRCV,            /* 5.2.2 SCTP Header Information Structure */
+#define SCTP_SNDRCV	SCTP_SNDRCV
+} sctp_cmsg_t;
+
+/*
+ * 5.3.1.1 SCTP_ASSOC_CHANGE
+ *
+ *   Communication notifications inform the ULP that an SCTP association
+ *   has either begun or ended. The identifier for a new association is
+ *   provided by this notificaion. The notification information has the
+ *   following format:
+ *
+ */
+struct sctp_assoc_change {
+	__u16 sac_type;
+	__u16 sac_flags;
+	__u32 sac_length;
+	__u16 sac_state;
+	__u16 sac_error;
+	__u16 sac_outbound_streams;
+	__u16 sac_inbound_streams;
+	sctp_assoc_t sac_assoc_id;
+	__u8 sac_info[0];
+};
+
+/*
+ *   sac_state: 32 bits (signed integer)
+ *
+ *   This field holds one of a number of values that communicate the
+ *   event that happened to the association.  They include:
+ *
+ *   Note:  The following state names deviate from the API draft as
+ *   the names clash too easily with other kernel symbols.
+ */
+enum sctp_sac_state {
+	SCTP_COMM_UP,
+	SCTP_COMM_LOST,
+	SCTP_RESTART,
+	SCTP_SHUTDOWN_COMP,
+	SCTP_CANT_STR_ASSOC,
+};
+
+/*
+ * 5.3.1.2 SCTP_PEER_ADDR_CHANGE
+ *
+ *   When a destination address on a multi-homed peer encounters a change
+ *   an interface details event is sent.  The information has the
+ *   following structure:
+ */
+struct sctp_paddr_change {
+	__u16 spc_type;
+	__u16 spc_flags;
+	__u32 spc_length;
+	struct sockaddr_storage spc_aaddr;
+	int spc_state;
+	int spc_error;
+	sctp_assoc_t spc_assoc_id;
+} __attribute__((packed, aligned(4)));
+
+/*
+ *    spc_state:  32 bits (signed integer)
+ *
+ *   This field holds one of a number of values that communicate the
+ *   event that happened to the address.  They include:
+ */
+enum sctp_spc_state {
+	SCTP_ADDR_AVAILABLE,
+	SCTP_ADDR_UNREACHABLE,
+	SCTP_ADDR_REMOVED,
+	SCTP_ADDR_ADDED,
+	SCTP_ADDR_MADE_PRIM,
+	SCTP_ADDR_CONFIRMED,
+};
+
+
+/*
+ * 5.3.1.3 SCTP_REMOTE_ERROR
+ *
+ *   A remote peer may send an Operational Error message to its peer.
+ *   This message indicates a variety of error conditions on an
+ *   association. The entire error TLV as it appears on the wire is
+ *   included in a SCTP_REMOTE_ERROR event.  Please refer to the SCTP
+ *   specification [SCTP] and any extensions for a list of possible
+ *   error formats. SCTP error TLVs have the format:
+ */
+struct sctp_remote_error {
+	__u16 sre_type;
+	__u16 sre_flags;
+	__u32 sre_length;
+	__u16 sre_error;
+	sctp_assoc_t sre_assoc_id;
+	__u8 sre_data[0];
+};
+
+
+/*
+ * 5.3.1.4 SCTP_SEND_FAILED
+ *
+ *   If SCTP cannot deliver a message it may return the message as a
+ *   notification.
+ */
+struct sctp_send_failed {
+	__u16 ssf_type;
+	__u16 ssf_flags;
+	__u32 ssf_length;
+	__u32 ssf_error;
+	struct sctp_sndrcvinfo ssf_info;
+	sctp_assoc_t ssf_assoc_id;
+	__u8 ssf_data[0];
+};
+
+/*
+ *   ssf_flags: 16 bits (unsigned integer)
+ *
+ *   The flag value will take one of the following values
+ *
+ *   SCTP_DATA_UNSENT  - Indicates that the data was never put on
+ *                       the wire.
+ *
+ *   SCTP_DATA_SENT    - Indicates that the data was put on the wire.
+ *                       Note that this does not necessarily mean that the
+ *                       data was (or was not) successfully delivered.
+ */
+enum sctp_ssf_flags {
+	SCTP_DATA_UNSENT,
+	SCTP_DATA_SENT,
+};
+
+/*
+ * 5.3.1.5 SCTP_SHUTDOWN_EVENT
+ *
+ *   When a peer sends a SHUTDOWN, SCTP delivers this notification to
+ *   inform the application that it should cease sending data.
+ */
+struct sctp_shutdown_event {
+	__u16 sse_type;
+	__u16 sse_flags;
+	__u32 sse_length;
+	sctp_assoc_t sse_assoc_id;
+};
+
+/*
+ * 5.3.1.6 SCTP_ADAPTATION_INDICATION
+ *
+ *   When a peer sends a Adaptation Layer Indication parameter , SCTP
+ *   delivers this notification to inform the application
+ *   that of the peers requested adaptation layer.
+ */
+struct sctp_adaptation_event {
+	__u16 sai_type;
+	__u16 sai_flags;
+	__u32 sai_length;
+	__u32 sai_adaptation_ind;
+	sctp_assoc_t sai_assoc_id;
+};
+
+/*
+ * 5.3.1.7 SCTP_PARTIAL_DELIVERY_EVENT
+ *
+ *   When a receiver is engaged in a partial delivery of a
+ *   message this notification will be used to indicate
+ *   various events.
+ */
+struct sctp_pdapi_event {
+	__u16 pdapi_type;
+	__u16 pdapi_flags;
+	__u32 pdapi_length;
+	__u32 pdapi_indication;
+	sctp_assoc_t pdapi_assoc_id;
+};
+
+enum { SCTP_PARTIAL_DELIVERY_ABORTED=0, };
+
+/*
+ * 5.3.1.8.  SCTP_AUTHENTICATION_EVENT
+ *
+ *  When a receiver is using authentication this message will provide
+ *  notifications regarding new keys being made active as well as errors.
+ */
+struct sctp_authkey_event {
+	__u16 auth_type;
+	__u16 auth_flags;
+	__u32 auth_length;
+	__u16 auth_keynumber;
+	__u16 auth_altkeynumber;
+	__u32 auth_indication;
+	sctp_assoc_t auth_assoc_id;
+};
+
+enum { SCTP_AUTH_NEWKEY = 0, };
+
+/*
+ * 6.1.9. SCTP_SENDER_DRY_EVENT
+ *
+ * When the SCTP stack has no more user data to send or retransmit, this
+ * notification is given to the user. Also, at the time when a user app
+ * subscribes to this event, if there is no data to be sent or
+ * retransmit, the stack will immediately send up this notification.
+ */
+struct sctp_sender_dry_event {
+	__u16 sender_dry_type;
+	__u16 sender_dry_flags;
+	__u32 sender_dry_length;
+	sctp_assoc_t sender_dry_assoc_id;
+};
+
+/*
+ * Described in Section 7.3
+ *   Ancillary Data and Notification Interest Options
+ */
+struct sctp_event_subscribe {
+	__u8 sctp_data_io_event;
+	__u8 sctp_association_event;
+	__u8 sctp_address_event;
+	__u8 sctp_send_failure_event;
+	__u8 sctp_peer_error_event;
+	__u8 sctp_shutdown_event;
+	__u8 sctp_partial_delivery_event;
+	__u8 sctp_adaptation_layer_event;
+	__u8 sctp_authentication_event;
+	__u8 sctp_sender_dry_event;
+};
+
+/*
+ * 5.3.1 SCTP Notification Structure
+ *
+ *   The notification structure is defined as the union of all
+ *   notification types.
+ *
+ */
+union sctp_notification {
+	struct {
+		__u16 sn_type;             /* Notification type. */
+		__u16 sn_flags;
+		__u32 sn_length;
+	} sn_header;
+	struct sctp_assoc_change sn_assoc_change;
+	struct sctp_paddr_change sn_paddr_change;
+	struct sctp_remote_error sn_remote_error;
+	struct sctp_send_failed sn_send_failed;
+	struct sctp_shutdown_event sn_shutdown_event;
+	struct sctp_adaptation_event sn_adaptation_event;
+	struct sctp_pdapi_event sn_pdapi_event;
+	struct sctp_authkey_event sn_authkey_event;
+	struct sctp_sender_dry_event sn_sender_dry_event;
+};
+
+/* Section 5.3.1
+ * All standard values for sn_type flags are greater than 2^15.
+ * Values from 2^15 and down are reserved.
+ */
+
+enum sctp_sn_type {
+	SCTP_SN_TYPE_BASE     = (1<<15),
+	SCTP_ASSOC_CHANGE,
+#define SCTP_ASSOC_CHANGE		SCTP_ASSOC_CHANGE
+	SCTP_PEER_ADDR_CHANGE,
+#define SCTP_PEER_ADDR_CHANGE		SCTP_PEER_ADDR_CHANGE
+	SCTP_SEND_FAILED,
+#define SCTP_SEND_FAILED		SCTP_SEND_FAILED
+	SCTP_REMOTE_ERROR,
+#define SCTP_REMOTE_ERROR		SCTP_REMOTE_ERROR
+	SCTP_SHUTDOWN_EVENT,
+#define SCTP_SHUTDOWN_EVENT		SCTP_SHUTDOWN_EVENT
+	SCTP_PARTIAL_DELIVERY_EVENT,
+#define SCTP_PARTIAL_DELIVERY_EVENT	SCTP_PARTIAL_DELIVERY_EVENT
+	SCTP_ADAPTATION_INDICATION,
+#define SCTP_ADAPTATION_INDICATION	SCTP_ADAPTATION_INDICATION
+	SCTP_AUTHENTICATION_EVENT,
+#define SCTP_AUTHENTICATION_INDICATION	SCTP_AUTHENTICATION_EVENT
+	SCTP_SENDER_DRY_EVENT,
+#define SCTP_SENDER_DRY_EVENT		SCTP_SENDER_DRY_EVENT
+};
+
+/* Notification error codes used to fill up the error fields in some
+ * notifications.
+ * SCTP_PEER_ADDRESS_CHAGE 	: spc_error
+ * SCTP_ASSOC_CHANGE		: sac_error
+ * These names should be potentially included in the draft 04 of the SCTP
+ * sockets API specification.
+ */
+typedef enum sctp_sn_error {
+	SCTP_FAILED_THRESHOLD,
+	SCTP_RECEIVED_SACK,
+	SCTP_HEARTBEAT_SUCCESS,
+	SCTP_RESPONSE_TO_USER_REQ,
+	SCTP_INTERNAL_ERROR,
+	SCTP_SHUTDOWN_GUARD_EXPIRES,
+	SCTP_PEER_FAULTY,
+} sctp_sn_error_t;
+
+/*
+ * 7.1.1 Retransmission Timeout Parameters (SCTP_RTOINFO)
+ *
+ *   The protocol parameters used to initialize and bound retransmission
+ *   timeout (RTO) are tunable.  See [SCTP] for more information on how
+ *   these parameters are used in RTO calculation.
+ */
+struct sctp_rtoinfo {
+	sctp_assoc_t	srto_assoc_id;
+	__u32		srto_initial;
+	__u32		srto_max;
+	__u32		srto_min;
+};
+
+/*
+ * 7.1.2 Association Parameters (SCTP_ASSOCINFO)
+ *
+ *   This option is used to both examine and set various association and
+ *   endpoint parameters.
+ */
+struct sctp_assocparams {
+	sctp_assoc_t	sasoc_assoc_id;
+	__u16		sasoc_asocmaxrxt;
+	__u16		sasoc_number_peer_destinations;
+	__u32		sasoc_peer_rwnd;
+	__u32		sasoc_local_rwnd;
+	__u32		sasoc_cookie_life;
+};
+
+/*
+ * 7.1.9 Set Peer Primary Address (SCTP_SET_PEER_PRIMARY_ADDR)
+ *
+ *  Requests that the peer mark the enclosed address as the association
+ *  primary. The enclosed address must be one of the association's
+ *  locally bound addresses. The following structure is used to make a
+ *   set primary request:
+ */
+struct sctp_setpeerprim {
+	sctp_assoc_t            sspp_assoc_id;
+	struct sockaddr_storage sspp_addr;
+} __attribute__((packed, aligned(4)));
+
+/*
+ * 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
+ *
+ *  Requests that the local SCTP stack use the enclosed peer address as
+ *  the association primary. The enclosed address must be one of the
+ *  association peer's addresses. The following structure is used to
+ *  make a set peer primary request:
+ */
+struct sctp_prim {
+	sctp_assoc_t            ssp_assoc_id;
+	struct sockaddr_storage ssp_addr;
+} __attribute__((packed, aligned(4)));
+
+/* For backward compatibility use, define the old name too */
+#define sctp_setprim	sctp_prim
+
+/*
+ * 7.1.11 Set Adaptation Layer Indicator (SCTP_ADAPTATION_LAYER)
+ *
+ * Requests that the local endpoint set the specified Adaptation Layer
+ * Indication parameter for all future INIT and INIT-ACK exchanges.
+ */
+struct sctp_setadaptation {
+	__u32	ssb_adaptation_ind;
+};
+
+/*
+ * 7.1.13 Peer Address Parameters  (SCTP_PEER_ADDR_PARAMS)
+ *
+ *   Applications can enable or disable heartbeats for any peer address
+ *   of an association, modify an address's heartbeat interval, force a
+ *   heartbeat to be sent immediately, and adjust the address's maximum
+ *   number of retransmissions sent before an address is considered
+ *   unreachable. The following structure is used to access and modify an
+ *   address's parameters:
+ */
+enum  sctp_spp_flags {
+	SPP_HB_ENABLE = 1<<0,		/*Enable heartbeats*/
+	SPP_HB_DISABLE = 1<<1,		/*Disable heartbeats*/
+	SPP_HB = SPP_HB_ENABLE | SPP_HB_DISABLE,
+	SPP_HB_DEMAND = 1<<2,		/*Send heartbeat immediately*/
+	SPP_PMTUD_ENABLE = 1<<3,	/*Enable PMTU discovery*/
+	SPP_PMTUD_DISABLE = 1<<4,	/*Disable PMTU discovery*/
+	SPP_PMTUD = SPP_PMTUD_ENABLE | SPP_PMTUD_DISABLE,
+	SPP_SACKDELAY_ENABLE = 1<<5,	/*Enable SACK*/
+	SPP_SACKDELAY_DISABLE = 1<<6,	/*Disable SACK*/
+	SPP_SACKDELAY = SPP_SACKDELAY_ENABLE | SPP_SACKDELAY_DISABLE,
+	SPP_HB_TIME_IS_ZERO = 1<<7,	/* Set HB delay to 0 */
+};
+
+struct sctp_paddrparams {
+	sctp_assoc_t		spp_assoc_id;
+	struct sockaddr_storage	spp_address;
+	__u32			spp_hbinterval;
+	__u16			spp_pathmaxrxt;
+	__u32			spp_pathmtu;
+	__u32			spp_sackdelay;
+	__u32			spp_flags;
+} __attribute__((packed, aligned(4)));
+
+/*
+ * 7.1.18.  Add a chunk that must be authenticated (SCTP_AUTH_CHUNK)
+ *
+ * This set option adds a chunk type that the user is requesting to be
+ * received only in an authenticated way.  Changes to the list of chunks
+ * will only effect future associations on the socket.
+ */
+struct sctp_authchunk {
+	__u8		sauth_chunk;
+};
+
+/*
+ * 7.1.19.  Get or set the list of supported HMAC Identifiers (SCTP_HMAC_IDENT)
+ *
+ * This option gets or sets the list of HMAC algorithms that the local
+ * endpoint requires the peer to use.
+ */
+#ifndef __KERNEL__
+/* This here is only used by user space as is. It might not be a good idea
+ * to export/reveal the whole structure with reserved fields etc.
+ */
+enum {
+	SCTP_AUTH_HMAC_ID_SHA1 = 1,
+	SCTP_AUTH_HMAC_ID_SHA256 = 3,
+};
+#endif
+
+struct sctp_hmacalgo {
+	__u32		shmac_num_idents;
+	__u16		shmac_idents[];
+};
+
+/* Sadly, user and kernel space have different names for
+ * this structure member, so this is to not break anything.
+ */
+#define shmac_number_of_idents	shmac_num_idents
+
+/*
+ * 7.1.20.  Set a shared key (SCTP_AUTH_KEY)
+ *
+ * This option will set a shared secret key which is used to build an
+ * association shared key.
+ */
+struct sctp_authkey {
+	sctp_assoc_t	sca_assoc_id;
+	__u16		sca_keynumber;
+	__u16		sca_keylength;
+	__u8		sca_key[];
+};
+
+/*
+ * 7.1.21.  Get or set the active shared key (SCTP_AUTH_ACTIVE_KEY)
+ *
+ * This option will get or set the active shared key to be used to build
+ * the association shared key.
+ */
+
+struct sctp_authkeyid {
+	sctp_assoc_t	scact_assoc_id;
+	__u16		scact_keynumber;
+};
+
+
+/*
+ * 7.1.23.  Get or set delayed ack timer (SCTP_DELAYED_SACK)
+ *
+ * This option will effect the way delayed acks are performed.  This
+ * option allows you to get or set the delayed ack time, in
+ * milliseconds.  It also allows changing the delayed ack frequency.
+ * Changing the frequency to 1 disables the delayed sack algorithm.  If
+ * the assoc_id is 0, then this sets or gets the endpoints default
+ * values.  If the assoc_id field is non-zero, then the set or get
+ * effects the specified association for the one to many model (the
+ * assoc_id field is ignored by the one to one model).  Note that if
+ * sack_delay or sack_freq are 0 when setting this option, then the
+ * current values will remain unchanged.
+ */
+struct sctp_sack_info {
+	sctp_assoc_t	sack_assoc_id;
+	uint32_t	sack_delay;
+	uint32_t	sack_freq;
+};
+
+struct sctp_assoc_value {
+    sctp_assoc_t            assoc_id;
+    uint32_t                assoc_value;
+};
+
+/*
+ * 7.2.2 Peer Address Information
+ *
+ *   Applications can retrieve information about a specific peer address
+ *   of an association, including its reachability state, congestion
+ *   window, and retransmission timer values.  This information is
+ *   read-only. The following structure is used to access this
+ *   information:
+ */
+struct sctp_paddrinfo {
+	sctp_assoc_t		spinfo_assoc_id;
+	struct sockaddr_storage	spinfo_address;
+	__s32			spinfo_state;
+	__u32			spinfo_cwnd;
+	__u32			spinfo_srtt;
+	__u32			spinfo_rto;
+	__u32			spinfo_mtu;
+} __attribute__((packed, aligned(4)));
+
+/* Peer addresses's state. */
+/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x]
+ * calls.
+ * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters.
+ *              Not yet confirmed by a heartbeat and not available for data
+ *		transfers.
+ * ACTIVE : Peer address confirmed, active and available for data transfers.
+ * INACTIVE: Peer address inactive and not available for data transfers.
+ */
+enum sctp_spinfo_state {
+	SCTP_INACTIVE,
+	SCTP_PF,
+	SCTP_ACTIVE,
+	SCTP_UNCONFIRMED,
+	SCTP_UNKNOWN = 0xffff  /* Value used for transport state unknown */
+};
+
+/*
+ * 7.2.1 Association Status (SCTP_STATUS)
+ *
+ *   Applications can retrieve current status information about an
+ *   association, including association state, peer receiver window size,
+ *   number of unacked data chunks, and number of data chunks pending
+ *   receipt.  This information is read-only.  The following structure is
+ *   used to access this information:
+ */
+struct sctp_status {
+	sctp_assoc_t		sstat_assoc_id;
+	__s32			sstat_state;
+	__u32			sstat_rwnd;
+	__u16			sstat_unackdata;
+	__u16			sstat_penddata;
+	__u16			sstat_instrms;
+	__u16			sstat_outstrms;
+	__u32			sstat_fragmentation_point;
+	struct sctp_paddrinfo	sstat_primary;
+};
+
+/*
+ * 7.2.3.  Get the list of chunks the peer requires to be authenticated
+ *         (SCTP_PEER_AUTH_CHUNKS)
+ *
+ * This option gets a list of chunks for a specified association that
+ * the peer requires to be received authenticated only.
+ */
+struct sctp_authchunks {
+	sctp_assoc_t	gauth_assoc_id;
+	__u32		gauth_number_of_chunks;
+	uint8_t		gauth_chunks[];
+};
+
+/* The broken spelling has been released already in lksctp-tools header,
+ * so don't break anyone, now that it's fixed.
+ */
+#define guth_number_of_chunks	gauth_number_of_chunks
+
+/* Association states.  */
+enum sctp_sstat_state {
+	SCTP_EMPTY                = 0,
+	SCTP_CLOSED               = 1,
+	SCTP_COOKIE_WAIT          = 2,
+	SCTP_COOKIE_ECHOED        = 3,
+	SCTP_ESTABLISHED          = 4,
+	SCTP_SHUTDOWN_PENDING     = 5,
+	SCTP_SHUTDOWN_SENT        = 6,
+	SCTP_SHUTDOWN_RECEIVED    = 7,
+	SCTP_SHUTDOWN_ACK_SENT    = 8,
+};
+
+/*
+ * 8.2.6. Get the Current Identifiers of Associations
+ *        (SCTP_GET_ASSOC_ID_LIST)
+ *
+ * This option gets the current list of SCTP association identifiers of
+ * the SCTP associations handled by a one-to-many style socket.
+ */
+struct sctp_assoc_ids {
+	__u32		gaids_number_of_ids;
+	sctp_assoc_t	gaids_assoc_id[];
+};
+
+/*
+ * 8.3, 8.5 get all peer/local addresses in an association.
+ * This parameter struct is used by SCTP_GET_PEER_ADDRS and
+ * SCTP_GET_LOCAL_ADDRS socket options used internally to implement
+ * sctp_getpaddrs() and sctp_getladdrs() API.
+ */
+struct sctp_getaddrs_old {
+	sctp_assoc_t            assoc_id;
+	int			addr_num;
+#ifdef __KERNEL__
+	struct sockaddr		__user *addrs;
+#else
+	struct sockaddr		*addrs;
+#endif
+};
+
+struct sctp_getaddrs {
+	sctp_assoc_t		assoc_id; /*input*/
+	__u32			addr_num; /*output*/
+	__u8			addrs[0]; /*output, variable size*/
+};
+
+/* A socket user request obtained via SCTP_GET_ASSOC_STATS that retrieves
+ * association stats. All stats are counts except sas_maxrto and
+ * sas_obs_rto_ipaddr. maxrto is the max observed rto + transport since
+ * the last call. Will return 0 when RTO was not update since last call
+ */
+struct sctp_assoc_stats {
+	sctp_assoc_t	sas_assoc_id;    /* Input */
+					 /* Transport of observed max RTO */
+	struct sockaddr_storage sas_obs_rto_ipaddr;
+	__u64		sas_maxrto;      /* Maximum Observed RTO for period */
+	__u64		sas_isacks;	 /* SACKs received */
+	__u64		sas_osacks;	 /* SACKs sent */
+	__u64		sas_opackets;	 /* Packets sent */
+	__u64		sas_ipackets;	 /* Packets received */
+	__u64		sas_rtxchunks;   /* Retransmitted Chunks */
+	__u64		sas_outofseqtsns;/* TSN received > next expected */
+	__u64		sas_idupchunks;  /* Dups received (ordered+unordered) */
+	__u64		sas_gapcnt;      /* Gap Acknowledgements Received */
+	__u64		sas_ouodchunks;  /* Unordered data chunks sent */
+	__u64		sas_iuodchunks;  /* Unordered data chunks received */
+	__u64		sas_oodchunks;	 /* Ordered data chunks sent */
+	__u64		sas_iodchunks;	 /* Ordered data chunks received */
+	__u64		sas_octrlchunks; /* Control chunks sent */
+	__u64		sas_ictrlchunks; /* Control chunks received */
+};
+
+/* These are bit fields for msghdr->msg_flags.  See section 5.1.  */
+/* On user space Linux, these live in <bits/socket.h> as an enum.  */
+enum sctp_msg_flags {
+	MSG_NOTIFICATION = 0x8000,
+#define MSG_NOTIFICATION MSG_NOTIFICATION
+};
+
+/*
+ * 8.1 sctp_bindx()
+ *
+ * The flags parameter is formed from the bitwise OR of zero or more of the
+ * following currently defined flags:
+ */
+#define SCTP_BINDX_ADD_ADDR 0x01
+#define SCTP_BINDX_REM_ADDR 0x02
+
+/* This is the structure that is passed as an argument(optval) to
+ * getsockopt(SCTP_SOCKOPT_PEELOFF).
+ */
+typedef struct {
+	sctp_assoc_t associd;
+	int sd;
+} sctp_peeloff_arg_t;
+
+/*
+ *  Peer Address Thresholds socket option
+ */
+struct sctp_paddrthlds {
+	sctp_assoc_t spt_assoc_id;
+	struct sockaddr_storage spt_address;
+	__u16 spt_pathmaxrxt;
+	__u16 spt_pathpfthld;
+};
+
+#endif /* _UAPI_SCTP_H */
-- 
cgit 


From 064f370c5fd982e1264c03f5b704e00f5e41eb36 Mon Sep 17 00:00:00 2001
From: Thierry Escande <thierry.escande@linux.intel.com>
Date: Tue, 2 Apr 2013 10:25:16 +0200
Subject: NFC: llcp: Add support in getsockopt for RW, LTO, and MIU remote
 parameters

Useful for LLCP validation tests.

Signed-off-by: Thierry Escande <thierry.escande@linux.intel.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/uapi/linux/nfc.h |  7 +++++--
 net/nfc/llcp/sock.c      | 23 ++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 7440bc81a04b..7c6f627a717d 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -233,7 +233,10 @@ struct sockaddr_nfc_llcp {
 #define NFC_LLCP_DIRECTION_TX		0x01
 
 /* socket option names */
-#define NFC_LLCP_RW   0
-#define NFC_LLCP_MIUX 1
+#define NFC_LLCP_RW		0
+#define NFC_LLCP_MIUX		1
+#define NFC_LLCP_REMOTE_MIU	2
+#define NFC_LLCP_REMOTE_LTO	3
+#define NFC_LLCP_REMOTE_RW	4
 
 #endif /*__LINUX_NFC_H */
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index 641c535be3d4..fd01ac6e0bf4 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -303,7 +303,7 @@ static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
 	int len, err = 0;
-	u16 miux;
+	u16 miux, remote_miu;
 	u8 rw;
 
 	pr_debug("%p optname %d\n", sk, optname);
@@ -339,6 +339,27 @@ static int nfc_llcp_getsockopt(struct socket *sock, int level, int optname,
 
 		break;
 
+	case NFC_LLCP_REMOTE_MIU:
+		remote_miu = llcp_sock->remote_miu > LLCP_MAX_MIU ?
+				local->remote_miu : llcp_sock->remote_miu;
+
+		if (put_user(remote_miu, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_REMOTE_LTO:
+		if (put_user(local->remote_lto / 10, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
+	case NFC_LLCP_REMOTE_RW:
+		if (put_user(llcp_sock->remote_rw, (u32 __user *) optval))
+			err = -EFAULT;
+
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
-- 
cgit 


From 44b3decb414919760c7327df05e63372c1bf5d9a Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 11 Apr 2013 11:51:36 +0200
Subject: rfkill: Add NFC to the list of supported radios

And return the proper string for it.

Acked-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/uapi/linux/rfkill.h | 2 ++
 net/rfkill/core.c           | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rfkill.h b/include/uapi/linux/rfkill.h
index 2753c6cc9740..058757f7a733 100644
--- a/include/uapi/linux/rfkill.h
+++ b/include/uapi/linux/rfkill.h
@@ -37,6 +37,7 @@
  * @RFKILL_TYPE_WWAN: switch is on a wireless WAN device.
  * @RFKILL_TYPE_GPS: switch is on a GPS device.
  * @RFKILL_TYPE_FM: switch is on a FM radio device.
+ * @RFKILL_TYPE_NFC: switch is on an NFC device.
  * @NUM_RFKILL_TYPES: number of defined rfkill types
  */
 enum rfkill_type {
@@ -48,6 +49,7 @@ enum rfkill_type {
 	RFKILL_TYPE_WWAN,
 	RFKILL_TYPE_GPS,
 	RFKILL_TYPE_FM,
+	RFKILL_TYPE_NFC,
 	NUM_RFKILL_TYPES,
 };
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 9b9be5279f5d..1cec5e4f3a5e 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -587,7 +587,7 @@ static ssize_t rfkill_name_show(struct device *dev,
 
 static const char *rfkill_get_type_str(enum rfkill_type type)
 {
-	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_FM + 1);
+	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_NFC + 1);
 
 	switch (type) {
 	case RFKILL_TYPE_WLAN:
@@ -604,6 +604,8 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
 		return "gps";
 	case RFKILL_TYPE_FM:
 		return "fm";
+	case RFKILL_TYPE_NFC:
+		return "nfc";
 	default:
 		BUG();
 	}
-- 
cgit 


From 3eef25107cab65a1158b11ba373fb9b4fc25b4b8 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 3 Apr 2013 04:08:19 -0300
Subject: [media] v4l2: drop V4L2_CHIP_MATCH_SUBDEV_NAME

After using the new VIDIOC_DBG_G_CHIP_NAME ioctl I realized that the matching
by name possibility is useless. Just drop it and rename MATCH_SUBDEV_IDX to
just MATCH_SUBDEV.
The v4l2-dbg utility is much better placed to match by name by just enumerating
all bridge and subdev devices until chip_name.name matches.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 .../DocBook/media/v4l/vidioc-dbg-g-chip-ident.xml  |  7 +--
 .../DocBook/media/v4l/vidioc-dbg-g-chip-name.xml   | 18 +------
 .../DocBook/media/v4l/vidioc-dbg-g-register.xml    | 17 +------
 drivers/media/v4l2-core/v4l2-common.c              |  3 +-
 drivers/media/v4l2-core/v4l2-ioctl.c               | 55 ++++++++--------------
 include/uapi/linux/videodev2.h                     |  3 +-
 6 files changed, 26 insertions(+), 77 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-ident.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-ident.xml
index 82e43c6c72b8..921e18550d26 100644
--- a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-ident.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-ident.xml
@@ -221,13 +221,8 @@ the values from <xref linkend="chip-ids" />.</entry>
 	    <entry>Match the nth anciliary AC97 chip.</entry>
 	  </row>
 	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV_NAME</constant></entry>
+	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
 	    <entry>4</entry>
-	    <entry>Match the sub-device by name. Can't be used with this ioctl.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV_IDX</constant></entry>
-	    <entry>5</entry>
 	    <entry>Match the nth sub-device. Can't be used with this ioctl.</entry>
 	  </row>
 	</tbody>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml
index 5fce8d84288e..fa3bd42ab167 100644
--- a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml
@@ -87,16 +87,7 @@ connected to the PCI or USB bus. Non-zero numbers identify specific
 parts of the bridge chip such as an AC97 register block.</para>
 
     <para>When <structfield>match.type</structfield> is
-<constant>V4L2_CHIP_MATCH_SUBDEV_NAME</constant>,
-<structfield>match.name</structfield> contains the name of a sub-device.
-For instance
-<constant>"saa7127 6-0044"</constant> will match the saa7127 sub-device
-at the given i2c bus. This match type is not very useful for this ioctl
-and is here only for consistency.
-</para>
-
-    <para>When <structfield>match.type</structfield> is
-<constant>V4L2_CHIP_MATCH_SUBDEV_IDX</constant>,
+<constant>V4L2_CHIP_MATCH_SUBDEV</constant>,
 <structfield>match.addr</structfield> selects the nth sub-device. This
 allows you to enumerate over all sub-devices.</para>
 
@@ -207,13 +198,8 @@ is set, then the driver supports reading registers from the device. If
 	    <entry>Match the nth anciliary AC97 chip. Can't be used with this ioctl.</entry>
 	  </row>
 	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV_NAME</constant></entry>
+	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
 	    <entry>4</entry>
-	    <entry>Match the sub-device by name.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV_IDX</constant></entry>
-	    <entry>5</entry>
 	    <entry>Match the nth sub-device.</entry>
 	  </row>
 	</tbody>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
index 3082b4149dbe..db7844f2439f 100644
--- a/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
@@ -123,15 +123,7 @@ bus address.</para>
 on the TV card.</para>
 
     <para>When <structfield>match.type</structfield> is
-<constant>V4L2_CHIP_MATCH_SUBDEV_NAME</constant>,
-<structfield>match.name</structfield> contains the sub-device name.
-For instance
-<constant>"saa7127 6-0044"</constant> will match this specific saa7127
-sub-device. Again with the &VIDIOC-DBG-G-CHIP-NAME; ioctl you can find
-out which sub-devices are present.</para>
-
-    <para>When <structfield>match.type</structfield> is
-<constant>V4L2_CHIP_MATCH_SUBDEV_IDX</constant>,
+<constant>V4L2_CHIP_MATCH_SUBDEV</constant>,
 <structfield>match.addr</structfield> selects the nth sub-device.</para>
 
     <note>
@@ -250,13 +242,8 @@ register.</entry>
 	    <entry>Match the nth anciliary AC97 chip.</entry>
 	  </row>
 	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV_NAME</constant></entry>
+	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
 	    <entry>4</entry>
-	    <entry>Match the sub-device by name.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV_IDX</constant></entry>
-	    <entry>5</entry>
 	    <entry>Match the nth sub-device.</entry>
 	  </row>
 	</tbody>
diff --git a/drivers/media/v4l2-core/v4l2-common.c b/drivers/media/v4l2-core/v4l2-common.c
index f8fac9cefc3c..3fed63f4e026 100644
--- a/drivers/media/v4l2-core/v4l2-common.c
+++ b/drivers/media/v4l2-core/v4l2-common.c
@@ -254,8 +254,7 @@ int v4l2_chip_match_i2c_client(struct i2c_client *c, const struct v4l2_dbg_match
 		return len && !strncmp(c->driver->driver.name, match->name, len);
 	case V4L2_CHIP_MATCH_I2C_ADDR:
 		return c->addr == match->addr;
-	case V4L2_CHIP_MATCH_SUBDEV_IDX:
-	case V4L2_CHIP_MATCH_SUBDEV_NAME:
+	case V4L2_CHIP_MATCH_SUBDEV:
 		return 1;
 	default:
 		return 0;
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index feac07e50293..7a96162f544f 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -629,8 +629,7 @@ static void v4l_print_dbg_chip_ident(const void *arg, bool write_only)
 	const struct v4l2_dbg_chip_ident *p = arg;
 
 	pr_cont("type=%u, ", p->match.type);
-	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER ||
-	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME)
+	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER)
 		pr_cont("name=%.*s, ",
 				(int)sizeof(p->match.name), p->match.name);
 	else
@@ -644,8 +643,7 @@ static void v4l_print_dbg_chip_name(const void *arg, bool write_only)
 	const struct v4l2_dbg_chip_name *p = arg;
 
 	pr_cont("type=%u, ", p->match.type);
-	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER ||
-	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME)
+	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER)
 		pr_cont("name=%.*s, ",
 				(int)sizeof(p->match.name), p->match.name);
 	else
@@ -658,8 +656,7 @@ static void v4l_print_dbg_register(const void *arg, bool write_only)
 	const struct v4l2_dbg_register *p = arg;
 
 	pr_cont("type=%u, ", p->match.type);
-	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER ||
-	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME)
+	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER)
 		pr_cont("name=%.*s, ",
 				(int)sizeof(p->match.name), p->match.name);
 	else
@@ -1791,14 +1788,6 @@ static int v4l_log_status(const struct v4l2_ioctl_ops *ops,
 	return ret;
 }
 
-static bool v4l_dbg_found_match(const struct v4l2_dbg_match *match,
-		struct v4l2_subdev *sd, int idx)
-{
-	if (match->type == V4L2_CHIP_MATCH_SUBDEV_IDX)
-		return match->addr == idx;
-	return !strcmp(match->name, sd->name);
-}
-
 static int v4l_dbg_g_register(const struct v4l2_ioctl_ops *ops,
 				struct file *file, void *fh, void *arg)
 {
@@ -1810,14 +1799,12 @@ static int v4l_dbg_g_register(const struct v4l2_ioctl_ops *ops,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV_IDX ||
-	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME) {
+	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV) {
 		if (vfd->v4l2_dev == NULL)
 			return -EINVAL;
-		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev) {
-			if (v4l_dbg_found_match(&p->match, sd, idx++))
+		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev)
+			if (p->match.addr == idx++)
 				return v4l2_subdev_call(sd, core, g_register, p);
-		}
 		return -EINVAL;
 	}
 	if (ops->vidioc_g_register)
@@ -1839,14 +1826,12 @@ static int v4l_dbg_s_register(const struct v4l2_ioctl_ops *ops,
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV_IDX ||
-	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME) {
+	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV) {
 		if (vfd->v4l2_dev == NULL)
 			return -EINVAL;
-		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev) {
-			if (v4l_dbg_found_match(&p->match, sd, idx++))
+		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev)
+			if (p->match.addr == idx++)
 				return v4l2_subdev_call(sd, core, s_register, p);
-		}
 		return -EINVAL;
 	}
 	if (ops->vidioc_s_register)
@@ -1864,8 +1849,7 @@ static int v4l_dbg_g_chip_ident(const struct v4l2_ioctl_ops *ops,
 
 	p->ident = V4L2_IDENT_NONE;
 	p->revision = 0;
-	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV_NAME ||
-	    p->match.type == V4L2_CHIP_MATCH_SUBDEV_IDX)
+	if (p->match.type == V4L2_CHIP_MATCH_SUBDEV)
 		return -EINVAL;
 	return ops->vidioc_g_chip_ident(file, fh, p);
 }
@@ -1897,19 +1881,18 @@ static int v4l_dbg_g_chip_name(const struct v4l2_ioctl_ops *ops,
 			strlcpy(p->name, "bridge", sizeof(p->name));
 		return 0;
 
-	case V4L2_CHIP_MATCH_SUBDEV_IDX:
-	case V4L2_CHIP_MATCH_SUBDEV_NAME:
+	case V4L2_CHIP_MATCH_SUBDEV:
 		if (vfd->v4l2_dev == NULL)
 			break;
 		v4l2_device_for_each_subdev(sd, vfd->v4l2_dev) {
-			if (v4l_dbg_found_match(&p->match, sd, idx++)) {
-				if (sd->ops->core && sd->ops->core->s_register)
-					p->flags |= V4L2_CHIP_FL_WRITABLE;
-				if (sd->ops->core && sd->ops->core->g_register)
-					p->flags |= V4L2_CHIP_FL_READABLE;
-				strlcpy(p->name, sd->name, sizeof(p->name));
-				return 0;
-			}
+			if (p->match.addr != idx++)
+				continue;
+			if (sd->ops->core && sd->ops->core->s_register)
+				p->flags |= V4L2_CHIP_FL_WRITABLE;
+			if (sd->ops->core && sd->ops->core->g_register)
+				p->flags |= V4L2_CHIP_FL_READABLE;
+			strlcpy(p->name, sd->name, sizeof(p->name));
+			return 0;
 		}
 		break;
 	}
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index e9c49c5e6416..4c941c103c44 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1812,8 +1812,7 @@ struct v4l2_event_subscription {
 #define V4L2_CHIP_MATCH_I2C_DRIVER  1  /* Match against I2C driver name */
 #define V4L2_CHIP_MATCH_I2C_ADDR    2  /* Match against I2C 7-bit address */
 #define V4L2_CHIP_MATCH_AC97        3  /* Match against anciliary AC97 chip */
-#define V4L2_CHIP_MATCH_SUBDEV_NAME 4  /* Match against subdev name */
-#define V4L2_CHIP_MATCH_SUBDEV_IDX  5  /* Match against subdev index */
+#define V4L2_CHIP_MATCH_SUBDEV      4  /* Match against subdev index */
 
 struct v4l2_dbg_match {
 	__u32 type; /* Match type */
-- 
cgit 


From 96b03d2a3078d5e95a8b106634faa7cea88ebe5e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sat, 6 Apr 2013 06:16:58 -0300
Subject: [media] v4l2: rename VIDIOC_DBG_G_CHIP_NAME to _CHIP_INFO

This ioctl will be extended to return more information than just the name.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/media/v4l/compat.xml         |   2 +-
 Documentation/DocBook/media/v4l/v4l2.xml           |   4 +-
 .../DocBook/media/v4l/vidioc-dbg-g-chip-info.xml   | 223 +++++++++++++++++++++
 .../DocBook/media/v4l/vidioc-dbg-g-chip-name.xml   | 223 ---------------------
 .../DocBook/media/v4l/vidioc-dbg-g-register.xml    |  10 +-
 drivers/media/usb/em28xx/em28xx-video.c            |   8 +-
 drivers/media/v4l2-core/v4l2-dev.c                 |   2 +-
 drivers/media/v4l2-core/v4l2-ioctl.c               |  14 +-
 include/media/v4l2-ioctl.h                         |   4 +-
 include/uapi/linux/videodev2.h                     |   8 +-
 10 files changed, 249 insertions(+), 249 deletions(-)
 create mode 100644 Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml
 delete mode 100644 Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index e44161ffdd07..f43542ae2981 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -2507,7 +2507,7 @@ that used it. It was originally scheduled for removal in 2.6.35.
 	  </para>
         </listitem>
         <listitem>
-	  <para>Added new debugging ioctl &VIDIOC-DBG-G-CHIP-NAME;.
+	  <para>Added new debugging ioctl &VIDIOC-DBG-G-CHIP-INFO;.
 	  </para>
         </listitem>
       </orderedlist>
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml
index c1f334084213..bfc93cdcf696 100644
--- a/Documentation/DocBook/media/v4l/v4l2.xml
+++ b/Documentation/DocBook/media/v4l/v4l2.xml
@@ -147,7 +147,7 @@ applications. -->
 	<revremark>Remove obsolete and unused DV_PRESET ioctls:
 	VIDIOC_G_DV_PRESET, VIDIOC_S_DV_PRESET, VIDIOC_QUERY_DV_PRESET and
 	VIDIOC_ENUM_DV_PRESET. Remove the related v4l2_input/output capability
-	flags V4L2_IN_CAP_PRESETS and V4L2_OUT_CAP_PRESETS. Added VIDIOC_DBG_G_CHIP_NAME.
+	flags V4L2_IN_CAP_PRESETS and V4L2_OUT_CAP_PRESETS. Added VIDIOC_DBG_G_CHIP_INFO.
 	</revremark>
       </revision>
 
@@ -548,7 +548,7 @@ and discussions on the V4L mailing list.</revremark>
     &sub-create-bufs;
     &sub-cropcap;
     &sub-dbg-g-chip-ident;
-    &sub-dbg-g-chip-name;
+    &sub-dbg-g-chip-info;
     &sub-dbg-g-register;
     &sub-decoder-cmd;
     &sub-dqevent;
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml
new file mode 100644
index 000000000000..e1cece6c5de1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml
@@ -0,0 +1,223 @@
+<refentry id="vidioc-dbg-g-chip-info">
+  <refmeta>
+    <refentrytitle>ioctl VIDIOC_DBG_G_CHIP_INFO</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>VIDIOC_DBG_G_CHIP_INFO</refname>
+    <refpurpose>Identify the chips on a TV card</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>struct v4l2_dbg_chip_info
+*<parameter>argp</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+
+    <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>VIDIOC_DBG_G_CHIP_INFO</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>argp</parameter></term>
+	<listitem>
+	  <para></para>
+	</listitem>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+    <note>
+      <title>Experimental</title>
+
+      <para>This is an <link
+linkend="experimental">experimental</link> interface and may change in
+the future.</para>
+    </note>
+
+    <para>For driver debugging purposes this ioctl allows test
+applications to query the driver about the chips present on the TV
+card. Regular applications must not use it. When you found a chip
+specific bug, please contact the linux-media mailing list (&v4l-ml;)
+so it can be fixed.</para>
+
+    <para>Additionally the Linux kernel must be compiled with the
+<constant>CONFIG_VIDEO_ADV_DEBUG</constant> option to enable this ioctl.</para>
+
+    <para>To query the driver applications must initialize the
+<structfield>match.type</structfield> and
+<structfield>match.addr</structfield> or <structfield>match.name</structfield>
+fields of a &v4l2-dbg-chip-info;
+and call <constant>VIDIOC_DBG_G_CHIP_INFO</constant> with a pointer to
+this structure. On success the driver stores information about the
+selected chip in the <structfield>name</structfield> and
+<structfield>flags</structfield> fields. On failure the structure
+remains unchanged.</para>
+
+    <para>When <structfield>match.type</structfield> is
+<constant>V4L2_CHIP_MATCH_BRIDGE</constant>,
+<structfield>match.addr</structfield> selects the nth bridge 'chip'
+on the TV card. You can enumerate all chips by starting at zero and
+incrementing <structfield>match.addr</structfield> by one until
+<constant>VIDIOC_DBG_G_CHIP_INFO</constant> fails with an &EINVAL;.
+The number zero always selects the bridge chip itself, &eg; the chip
+connected to the PCI or USB bus. Non-zero numbers identify specific
+parts of the bridge chip such as an AC97 register block.</para>
+
+    <para>When <structfield>match.type</structfield> is
+<constant>V4L2_CHIP_MATCH_SUBDEV</constant>,
+<structfield>match.addr</structfield> selects the nth sub-device. This
+allows you to enumerate over all sub-devices.</para>
+
+    <para>On success, the <structfield>name</structfield> field will
+contain a chip name and the <structfield>flags</structfield> field will
+contain <constant>V4L2_CHIP_FL_READABLE</constant> if the driver supports
+reading registers from the device or <constant>V4L2_CHIP_FL_WRITABLE</constant>
+if the driver supports writing registers to the device.</para>
+
+    <para>We recommended the <application>v4l2-dbg</application>
+utility over calling this ioctl directly. It is available from the
+LinuxTV v4l-dvb repository; see <ulink
+url="http://linuxtv.org/repo/">http://linuxtv.org/repo/</ulink> for
+access instructions.</para>
+
+    <!-- Note for convenience vidioc-dbg-g-register.sgml
+	 contains a duplicate of this table. -->
+    <table pgwide="1" frame="none" id="name-v4l2-dbg-match">
+      <title>struct <structname>v4l2_dbg_match</structname></title>
+      <tgroup cols="4">
+	&cs-ustr;
+	<tbody valign="top">
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>type</structfield></entry>
+	    <entry>See <xref linkend="name-chip-match-types" /> for a list of
+possible types.</entry>
+	  </row>
+	  <row>
+	    <entry>union</entry>
+	    <entry>(anonymous)</entry>
+	  </row>
+	  <row>
+	    <entry></entry>
+	    <entry>__u32</entry>
+	    <entry><structfield>addr</structfield></entry>
+	    <entry>Match a chip by this number, interpreted according
+to the <structfield>type</structfield> field.</entry>
+	  </row>
+	  <row>
+	    <entry></entry>
+	    <entry>char</entry>
+	    <entry><structfield>name[32]</structfield></entry>
+	    <entry>Match a chip by this name, interpreted according
+to the <structfield>type</structfield> field.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <table pgwide="1" frame="none" id="v4l2-dbg-chip-info">
+      <title>struct <structname>v4l2_dbg_chip_info</structname></title>
+      <tgroup cols="3">
+	&cs-str;
+	<tbody valign="top">
+	  <row>
+	    <entry>struct v4l2_dbg_match</entry>
+	    <entry><structfield>match</structfield></entry>
+	    <entry>How to match the chip, see <xref linkend="name-v4l2-dbg-match" />.</entry>
+	  </row>
+	  <row>
+	    <entry>char</entry>
+	    <entry><structfield>name[32]</structfield></entry>
+	    <entry>The name of the chip.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>flags</structfield></entry>
+	    <entry>Set by the driver. If <constant>V4L2_CHIP_FL_READABLE</constant>
+is set, then the driver supports reading registers from the device. If
+<constant>V4L2_CHIP_FL_WRITABLE</constant> is set, then it supports writing registers.</entry>
+	  </row>
+	  <row>
+	    <entry>__u32</entry>
+	    <entry><structfield>reserved[8]</structfield></entry>
+	    <entry>Reserved fields, both application and driver must set these to 0.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+
+    <!-- Note for convenience vidioc-dbg-g-register.sgml
+	 contains a duplicate of this table. -->
+    <table pgwide="1" frame="none" id="name-chip-match-types">
+      <title>Chip Match Types</title>
+      <tgroup cols="3">
+	&cs-def;
+	<tbody valign="top">
+	  <row>
+	    <entry><constant>V4L2_CHIP_MATCH_BRIDGE</constant></entry>
+	    <entry>0</entry>
+	    <entry>Match the nth chip on the card, zero for the
+	    bridge chip. Does not match sub-devices.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CHIP_MATCH_I2C_DRIVER</constant></entry>
+	    <entry>1</entry>
+	    <entry>Match an &i2c; chip by its driver name. Can't be used with this ioctl.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CHIP_MATCH_I2C_ADDR</constant></entry>
+	    <entry>2</entry>
+	    <entry>Match a chip by its 7 bit &i2c; bus address. Can't be used with this ioctl.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CHIP_MATCH_AC97</constant></entry>
+	    <entry>3</entry>
+	    <entry>Match the nth anciliary AC97 chip. Can't be used with this ioctl.</entry>
+	  </row>
+	  <row>
+	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
+	    <entry>4</entry>
+	    <entry>Match the nth sub-device.</entry>
+	  </row>
+	</tbody>
+      </tgroup>
+    </table>
+  </refsect1>
+
+  <refsect1>
+    &return-value;
+
+    <variablelist>
+      <varlistentry>
+	<term><errorcode>EINVAL</errorcode></term>
+	<listitem>
+	  <para>The <structfield>match_type</structfield> is invalid or
+no device could be matched.</para>
+	</listitem>
+      </varlistentry>
+     </variablelist>
+  </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml
deleted file mode 100644
index fa3bd42ab167..000000000000
--- a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-name.xml
+++ /dev/null
@@ -1,223 +0,0 @@
-<refentry id="vidioc-dbg-g-chip-name">
-  <refmeta>
-    <refentrytitle>ioctl VIDIOC_DBG_G_CHIP_NAME</refentrytitle>
-    &manvol;
-  </refmeta>
-
-  <refnamediv>
-    <refname>VIDIOC_DBG_G_CHIP_NAME</refname>
-    <refpurpose>Identify the chips on a TV card</refpurpose>
-  </refnamediv>
-
-  <refsynopsisdiv>
-    <funcsynopsis>
-      <funcprototype>
-	<funcdef>int <function>ioctl</function></funcdef>
-	<paramdef>int <parameter>fd</parameter></paramdef>
-	<paramdef>int <parameter>request</parameter></paramdef>
-	<paramdef>struct v4l2_dbg_chip_name
-*<parameter>argp</parameter></paramdef>
-      </funcprototype>
-    </funcsynopsis>
-  </refsynopsisdiv>
-
-  <refsect1>
-    <title>Arguments</title>
-
-    <variablelist>
-      <varlistentry>
-	<term><parameter>fd</parameter></term>
-	<listitem>
-	  <para>&fd;</para>
-	</listitem>
-      </varlistentry>
-      <varlistentry>
-	<term><parameter>request</parameter></term>
-	<listitem>
-	  <para>VIDIOC_DBG_G_CHIP_NAME</para>
-	</listitem>
-      </varlistentry>
-      <varlistentry>
-	<term><parameter>argp</parameter></term>
-	<listitem>
-	  <para></para>
-	</listitem>
-      </varlistentry>
-    </variablelist>
-  </refsect1>
-
-  <refsect1>
-    <title>Description</title>
-
-    <note>
-      <title>Experimental</title>
-
-      <para>This is an <link
-linkend="experimental">experimental</link> interface and may change in
-the future.</para>
-    </note>
-
-    <para>For driver debugging purposes this ioctl allows test
-applications to query the driver about the chips present on the TV
-card. Regular applications must not use it. When you found a chip
-specific bug, please contact the linux-media mailing list (&v4l-ml;)
-so it can be fixed.</para>
-
-    <para>Additionally the Linux kernel must be compiled with the
-<constant>CONFIG_VIDEO_ADV_DEBUG</constant> option to enable this ioctl.</para>
-
-    <para>To query the driver applications must initialize the
-<structfield>match.type</structfield> and
-<structfield>match.addr</structfield> or <structfield>match.name</structfield>
-fields of a &v4l2-dbg-chip-name;
-and call <constant>VIDIOC_DBG_G_CHIP_NAME</constant> with a pointer to
-this structure. On success the driver stores information about the
-selected chip in the <structfield>name</structfield> and
-<structfield>flags</structfield> fields. On failure the structure
-remains unchanged.</para>
-
-    <para>When <structfield>match.type</structfield> is
-<constant>V4L2_CHIP_MATCH_BRIDGE</constant>,
-<structfield>match.addr</structfield> selects the nth bridge 'chip'
-on the TV card. You can enumerate all chips by starting at zero and
-incrementing <structfield>match.addr</structfield> by one until
-<constant>VIDIOC_DBG_G_CHIP_NAME</constant> fails with an &EINVAL;.
-The number zero always selects the bridge chip itself, &eg; the chip
-connected to the PCI or USB bus. Non-zero numbers identify specific
-parts of the bridge chip such as an AC97 register block.</para>
-
-    <para>When <structfield>match.type</structfield> is
-<constant>V4L2_CHIP_MATCH_SUBDEV</constant>,
-<structfield>match.addr</structfield> selects the nth sub-device. This
-allows you to enumerate over all sub-devices.</para>
-
-    <para>On success, the <structfield>name</structfield> field will
-contain a chip name and the <structfield>flags</structfield> field will
-contain <constant>V4L2_CHIP_FL_READABLE</constant> if the driver supports
-reading registers from the device or <constant>V4L2_CHIP_FL_WRITABLE</constant>
-if the driver supports writing registers to the device.</para>
-
-    <para>We recommended the <application>v4l2-dbg</application>
-utility over calling this ioctl directly. It is available from the
-LinuxTV v4l-dvb repository; see <ulink
-url="http://linuxtv.org/repo/">http://linuxtv.org/repo/</ulink> for
-access instructions.</para>
-
-    <!-- Note for convenience vidioc-dbg-g-register.sgml
-	 contains a duplicate of this table. -->
-    <table pgwide="1" frame="none" id="name-v4l2-dbg-match">
-      <title>struct <structname>v4l2_dbg_match</structname></title>
-      <tgroup cols="4">
-	&cs-ustr;
-	<tbody valign="top">
-	  <row>
-	    <entry>__u32</entry>
-	    <entry><structfield>type</structfield></entry>
-	    <entry>See <xref linkend="name-chip-match-types" /> for a list of
-possible types.</entry>
-	  </row>
-	  <row>
-	    <entry>union</entry>
-	    <entry>(anonymous)</entry>
-	  </row>
-	  <row>
-	    <entry></entry>
-	    <entry>__u32</entry>
-	    <entry><structfield>addr</structfield></entry>
-	    <entry>Match a chip by this number, interpreted according
-to the <structfield>type</structfield> field.</entry>
-	  </row>
-	  <row>
-	    <entry></entry>
-	    <entry>char</entry>
-	    <entry><structfield>name[32]</structfield></entry>
-	    <entry>Match a chip by this name, interpreted according
-to the <structfield>type</structfield> field.</entry>
-	  </row>
-	</tbody>
-      </tgroup>
-    </table>
-
-    <table pgwide="1" frame="none" id="v4l2-dbg-chip-name">
-      <title>struct <structname>v4l2_dbg_chip_name</structname></title>
-      <tgroup cols="3">
-	&cs-str;
-	<tbody valign="top">
-	  <row>
-	    <entry>struct v4l2_dbg_match</entry>
-	    <entry><structfield>match</structfield></entry>
-	    <entry>How to match the chip, see <xref linkend="name-v4l2-dbg-match" />.</entry>
-	  </row>
-	  <row>
-	    <entry>char</entry>
-	    <entry><structfield>name[32]</structfield></entry>
-	    <entry>The name of the chip.</entry>
-	  </row>
-	  <row>
-	    <entry>__u32</entry>
-	    <entry><structfield>flags</structfield></entry>
-	    <entry>Set by the driver. If <constant>V4L2_CHIP_FL_READABLE</constant>
-is set, then the driver supports reading registers from the device. If
-<constant>V4L2_CHIP_FL_WRITABLE</constant> is set, then it supports writing registers.</entry>
-	  </row>
-	  <row>
-	    <entry>__u32</entry>
-	    <entry><structfield>reserved[8]</structfield></entry>
-	    <entry>Reserved fields, both application and driver must set these to 0.</entry>
-	  </row>
-	</tbody>
-      </tgroup>
-    </table>
-
-    <!-- Note for convenience vidioc-dbg-g-register.sgml
-	 contains a duplicate of this table. -->
-    <table pgwide="1" frame="none" id="name-chip-match-types">
-      <title>Chip Match Types</title>
-      <tgroup cols="3">
-	&cs-def;
-	<tbody valign="top">
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_BRIDGE</constant></entry>
-	    <entry>0</entry>
-	    <entry>Match the nth chip on the card, zero for the
-	    bridge chip. Does not match sub-devices.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_I2C_DRIVER</constant></entry>
-	    <entry>1</entry>
-	    <entry>Match an &i2c; chip by its driver name. Can't be used with this ioctl.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_I2C_ADDR</constant></entry>
-	    <entry>2</entry>
-	    <entry>Match a chip by its 7 bit &i2c; bus address. Can't be used with this ioctl.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_AC97</constant></entry>
-	    <entry>3</entry>
-	    <entry>Match the nth anciliary AC97 chip. Can't be used with this ioctl.</entry>
-	  </row>
-	  <row>
-	    <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
-	    <entry>4</entry>
-	    <entry>Match the nth sub-device.</entry>
-	  </row>
-	</tbody>
-      </tgroup>
-    </table>
-  </refsect1>
-
-  <refsect1>
-    &return-value;
-
-    <variablelist>
-      <varlistentry>
-	<term><errorcode>EINVAL</errorcode></term>
-	<listitem>
-	  <para>The <structfield>match_type</structfield> is invalid or
-no device could be matched.</para>
-	</listitem>
-      </varlistentry>
-     </variablelist>
-  </refsect1>
-</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
index db7844f2439f..d13bac9e2445 100644
--- a/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
@@ -99,7 +99,7 @@ unchanged.</para>
 <structfield>match.addr</structfield> selects the nth non-sub-device chip
 on the TV card.  The number zero always selects the host chip, &eg; the
 chip connected to the PCI or USB bus. You can find out which chips are
-present with the &VIDIOC-DBG-G-CHIP-NAME; ioctl.</para>
+present with the &VIDIOC-DBG-G-CHIP-INFO; ioctl.</para>
 
     <para>When <structfield>match.type</structfield> is
 <constant>V4L2_CHIP_MATCH_I2C_DRIVER</constant>,
@@ -109,7 +109,7 @@ For instance
 supported by the saa7127 driver, regardless of its &i2c; bus address.
 When multiple chips supported by the same driver are present, the
 effect of these ioctls is undefined. Again with the
-&VIDIOC-DBG-G-CHIP-NAME; ioctl you can find out which &i2c; chips are
+&VIDIOC-DBG-G-CHIP-INFO; ioctl you can find out which &i2c; chips are
 present.</para>
 
     <para>When <structfield>match.type</structfield> is
@@ -131,14 +131,14 @@ on the TV card.</para>
 
       <para>Due to a flaw in the Linux &i2c; bus driver these ioctls may
 return successfully without actually reading or writing a register. To
-catch the most likely failure we recommend a &VIDIOC-DBG-G-CHIP-NAME;
+catch the most likely failure we recommend a &VIDIOC-DBG-G-CHIP-INFO;
 call confirming the presence of the selected &i2c; chip.</para>
     </note>
 
     <para>These ioctls are optional, not all drivers may support them.
 However when a driver supports these ioctls it must also support
-&VIDIOC-DBG-G-CHIP-NAME;. Conversely it may support
-<constant>VIDIOC_DBG_G_CHIP_NAME</constant> but not these ioctls.</para>
+&VIDIOC-DBG-G-CHIP-INFO;. Conversely it may support
+<constant>VIDIOC_DBG_G_CHIP_INFO</constant> but not these ioctls.</para>
 
     <para><constant>VIDIOC_DBG_G_REGISTER</constant> and
 <constant>VIDIOC_DBG_S_REGISTER</constant> were introduced in Linux
diff --git a/drivers/media/usb/em28xx/em28xx-video.c b/drivers/media/usb/em28xx/em28xx-video.c
index 39951f5731e1..c27c1f671396 100644
--- a/drivers/media/usb/em28xx/em28xx-video.c
+++ b/drivers/media/usb/em28xx/em28xx-video.c
@@ -1332,8 +1332,8 @@ static int vidioc_g_chip_ident(struct file *file, void *priv,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int vidioc_g_chip_name(struct file *file, void *priv,
-	       struct v4l2_dbg_chip_name *chip)
+static int vidioc_g_chip_info(struct file *file, void *priv,
+	       struct v4l2_dbg_chip_info *chip)
 {
 	struct em28xx_fh      *fh  = priv;
 	struct em28xx         *dev = fh->dev;
@@ -1797,7 +1797,7 @@ static const struct v4l2_ioctl_ops video_ioctl_ops = {
 	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
 	.vidioc_g_chip_ident        = vidioc_g_chip_ident,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	.vidioc_g_chip_name         = vidioc_g_chip_name,
+	.vidioc_g_chip_info         = vidioc_g_chip_info,
 	.vidioc_g_register          = vidioc_g_register,
 	.vidioc_s_register          = vidioc_s_register,
 #endif
@@ -1827,7 +1827,7 @@ static const struct v4l2_ioctl_ops radio_ioctl_ops = {
 	.vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
 	.vidioc_unsubscribe_event = v4l2_event_unsubscribe,
 	.vidioc_g_chip_ident  = vidioc_g_chip_ident,
-	.vidioc_g_chip_name   = vidioc_g_chip_name,
+	.vidioc_g_chip_info   = vidioc_g_chip_info,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	.vidioc_g_register    = vidioc_g_register,
 	.vidioc_s_register    = vidioc_s_register,
diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 1c3b43cf773d..5923c5dfacd5 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -592,7 +592,7 @@ static void determine_valid_ioctls(struct video_device *vdev)
 	SET_VALID_IOCTL(ops, VIDIOC_S_FREQUENCY, vidioc_s_frequency);
 	SET_VALID_IOCTL(ops, VIDIOC_LOG_STATUS, vidioc_log_status);
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	set_bit(_IOC_NR(VIDIOC_DBG_G_CHIP_NAME), valid_ioctls);
+	set_bit(_IOC_NR(VIDIOC_DBG_G_CHIP_INFO), valid_ioctls);
 	set_bit(_IOC_NR(VIDIOC_DBG_G_REGISTER), valid_ioctls);
 	set_bit(_IOC_NR(VIDIOC_DBG_S_REGISTER), valid_ioctls);
 #endif
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index c48d0acd8bb9..f81bda1a48ec 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -638,9 +638,9 @@ static void v4l_print_dbg_chip_ident(const void *arg, bool write_only)
 			p->ident, p->revision);
 }
 
-static void v4l_print_dbg_chip_name(const void *arg, bool write_only)
+static void v4l_print_dbg_chip_info(const void *arg, bool write_only)
 {
-	const struct v4l2_dbg_chip_name *p = arg;
+	const struct v4l2_dbg_chip_info *p = arg;
 
 	pr_cont("type=%u, ", p->match.type);
 	if (p->match.type == V4L2_CHIP_MATCH_I2C_DRIVER)
@@ -1854,12 +1854,12 @@ static int v4l_dbg_g_chip_ident(const struct v4l2_ioctl_ops *ops,
 	return ops->vidioc_g_chip_ident(file, fh, p);
 }
 
-static int v4l_dbg_g_chip_name(const struct v4l2_ioctl_ops *ops,
+static int v4l_dbg_g_chip_info(const struct v4l2_ioctl_ops *ops,
 				struct file *file, void *fh, void *arg)
 {
 #ifdef CONFIG_VIDEO_ADV_DEBUG
 	struct video_device *vfd = video_devdata(file);
-	struct v4l2_dbg_chip_name *p = arg;
+	struct v4l2_dbg_chip_info *p = arg;
 	struct v4l2_subdev *sd;
 	int idx = 0;
 
@@ -1875,8 +1875,8 @@ static int v4l_dbg_g_chip_name(const struct v4l2_ioctl_ops *ops,
 			strlcpy(p->name, vfd->parent->driver->name, sizeof(p->name));
 		else
 			strlcpy(p->name, "bridge", sizeof(p->name));
-		if (ops->vidioc_g_chip_name)
-			return ops->vidioc_g_chip_name(file, fh, arg);
+		if (ops->vidioc_g_chip_info)
+			return ops->vidioc_g_chip_info(file, fh, arg);
 		if (p->match.addr)
 			return -EINVAL;
 		return 0;
@@ -2116,7 +2116,7 @@ static struct v4l2_ioctl_info v4l2_ioctls[] = {
 	IOCTL_INFO_STD(VIDIOC_QUERY_DV_TIMINGS, vidioc_query_dv_timings, v4l_print_dv_timings, 0),
 	IOCTL_INFO_STD(VIDIOC_DV_TIMINGS_CAP, vidioc_dv_timings_cap, v4l_print_dv_timings_cap, INFO_FL_CLEAR(v4l2_dv_timings_cap, type)),
 	IOCTL_INFO_FNC(VIDIOC_ENUM_FREQ_BANDS, v4l_enum_freq_bands, v4l_print_freq_band, 0),
-	IOCTL_INFO_FNC(VIDIOC_DBG_G_CHIP_NAME, v4l_dbg_g_chip_name, v4l_print_dbg_chip_name, INFO_FL_CLEAR(v4l2_dbg_chip_name, match)),
+	IOCTL_INFO_FNC(VIDIOC_DBG_G_CHIP_INFO, v4l_dbg_g_chip_info, v4l_print_dbg_chip_info, INFO_FL_CLEAR(v4l2_dbg_chip_info, match)),
 };
 #define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls)
 
diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h
index 6b917d69e408..931652f0e2af 100644
--- a/include/media/v4l2-ioctl.h
+++ b/include/media/v4l2-ioctl.h
@@ -244,8 +244,8 @@ struct v4l2_ioctl_ops {
 	int (*vidioc_s_register)       (struct file *file, void *fh,
 					const struct v4l2_dbg_register *reg);
 
-	int (*vidioc_g_chip_name)      (struct file *file, void *fh,
-					struct v4l2_dbg_chip_name *chip);
+	int (*vidioc_g_chip_info)      (struct file *file, void *fh,
+					struct v4l2_dbg_chip_info *chip);
 #endif
 	int (*vidioc_g_chip_ident)     (struct file *file, void *fh,
 					struct v4l2_dbg_chip_ident *chip);
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 4c941c103c44..be43b4659527 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1839,8 +1839,8 @@ struct v4l2_dbg_chip_ident {
 #define V4L2_CHIP_FL_READABLE (1 << 0)
 #define V4L2_CHIP_FL_WRITABLE (1 << 1)
 
-/* VIDIOC_DBG_G_CHIP_NAME */
-struct v4l2_dbg_chip_name {
+/* VIDIOC_DBG_G_CHIP_INFO */
+struct v4l2_dbg_chip_info {
 	struct v4l2_dbg_match match;
 	char name[32];
 	__u32 flags;
@@ -1938,7 +1938,7 @@ struct v4l2_create_buffers {
 
 /* Experimental, meant for debugging, testing and internal use.
    Never use this ioctl in applications!
-   Note: this ioctl is deprecated in favor of VIDIOC_DBG_G_CHIP_NAME and
+   Note: this ioctl is deprecated in favor of VIDIOC_DBG_G_CHIP_INFO and
    will go away in the future. */
 #define VIDIOC_DBG_G_CHIP_IDENT _IOWR('V', 81, struct v4l2_dbg_chip_ident)
 
@@ -1976,7 +1976,7 @@ struct v4l2_create_buffers {
 
 /* Experimental, meant for debugging, testing and internal use.
    Never use these in applications! */
-#define VIDIOC_DBG_G_CHIP_NAME  _IOWR('V', 102, struct v4l2_dbg_chip_name)
+#define VIDIOC_DBG_G_CHIP_INFO  _IOWR('V', 102, struct v4l2_dbg_chip_info)
 
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
-- 
cgit 


From b8399b83058848979538932473d817559f7ff8fb Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Mon, 8 Apr 2013 11:53:55 -0300
Subject: [media] videodev2.h: increase size of 'reserved' array

Increase the size of the 'reserved' array to give more room for future
extensions.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index be43b4659527..1282cd38e86a 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1844,7 +1844,7 @@ struct v4l2_dbg_chip_info {
 	struct v4l2_dbg_match match;
 	char name[32];
 	__u32 flags;
-	__u32 reserved[8];
+	__u32 reserved[32];
 } __attribute__ ((packed));
 
 /**
-- 
cgit 


From 292a878720b26213bc773619715525e9f7a4f4a1 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 20 Mar 2013 15:26:36 -0300
Subject: [media] videodev2.h: fix incorrect V4L2_DV_FL_HALF_LINE bitmask

This was set to 1 << 0 which is the same as V4L2_DV_FL_REDUCED_BLANKING.
It should be 1 << 3 instead. Luckily interlaced formats are rarely used,
which is why this bug wasn't seen until now.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 1282cd38e86a..97fb392bb2d9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1074,7 +1074,7 @@ struct v4l2_bt_timings {
    longer and field 2 is really one half-line shorter, so each field has
    exactly the same number of half-lines. Whether half-lines can be detected
    or used depends on the hardware. */
-#define V4L2_DV_FL_HALF_LINE			(1 << 0)
+#define V4L2_DV_FL_HALF_LINE			(1 << 3)
 
 
 /** struct v4l2_dv_timings - DV timings
-- 
cgit 


From a7b74bd82a26379495e0e727d2c0fa9b0b97d917 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 20 Mar 2013 14:31:34 -0300
Subject: [media] v4l2-dv-timings.h: add 480i59.94 and 576i50 CEA-861-E timings

These formats are supported by the HDPVR, but they were missing in the list.
Note that these formats are different from the common PAL/NTSC/SECAM formats
since all color channels are transmitted separately and so there is no PAL
or NTSC or SECAM color encoding involved.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/v4l2-dv-timings.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index 9ef8172e5ed0..4e0c58d25ff0 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -42,6 +42,15 @@
 		V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CEA861, 0) \
 }
 
+/* Note: these are the nominal timings, for HDMI links this format is typically
+ * double-clocked to meet the minimum pixelclock requirements.  */
+#define V4L2_DV_BT_CEA_720X480I59_94 { \
+	.type = V4L2_DV_BT_656_1120, \
+	V4L2_INIT_BT_TIMINGS(720, 480, 1, 0, \
+		13500000, 19, 62, 57, 4, 3, 15, 4, 3, 16, \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \
+}
+
 #define V4L2_DV_BT_CEA_720X480P59_94 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(720, 480, 0, 0, \
@@ -49,6 +58,15 @@
 		V4L2_DV_BT_STD_CEA861, 0) \
 }
 
+/* Note: these are the nominal timings, for HDMI links this format is typically
+ * double-clocked to meet the minimum pixelclock requirements.  */
+#define V4L2_DV_BT_CEA_720X576I50 { \
+	.type = V4L2_DV_BT_656_1120, \
+	V4L2_INIT_BT_TIMINGS(720, 576, 1, 0, \
+		13500000, 12, 63, 69, 2, 3, 19, 2, 3, 20, \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \
+}
+
 #define V4L2_DV_BT_CEA_720X576P50 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(720, 576, 0, 0, \
-- 
cgit 


From c7995c43facc6e5dea4de63fa9d283a337aabeb1 Mon Sep 17 00:00:00 2001
From: Atzm Watanabe <atzm@stratosphere.co.jp>
Date: Tue, 16 Apr 2013 02:50:52 +0000
Subject: vxlan: Allow setting destination to unicast address.

This patch allows setting VXLAN destination to unicast address.
It allows that VXLAN can be used as peer-to-peer tunnel without
multicast.

v4: generalize struct vxlan_dev, "gaddr" is replaced with vxlan_rdst.
    "GROUP" attribute is replaced with "REMOTE".
    they are based by David Stevens's comments.

v3: move a new attribute REMOTE into the last of an enum list
    based by Stephen Hemminger's comments.

v2: use a new attribute REMOTE instead of GROUP based by
    Cong Wang's comments.

Signed-off-by: Atzm Watanabe <atzm@stratosphere.co.jp>
Acked-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c          | 73 ++++++++++++++++++--------------------------
 include/uapi/linux/if_link.h |  2 +-
 2 files changed, 31 insertions(+), 44 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 97a306c9e65d..916a62149a12 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -105,10 +105,8 @@ struct vxlan_fdb {
 struct vxlan_dev {
 	struct hlist_node hlist;
 	struct net_device *dev;
-	__u32		  vni;		/* virtual network id */
-	__be32	          gaddr;	/* multicast group */
+	struct vxlan_rdst default_dst;	/* default destination */
 	__be32		  saddr;	/* source address */
-	unsigned int      link;		/* link to multicast over */
 	__u16		  port_min;	/* source port range */
 	__u16		  port_max;
 	__u8		  tos;		/* TOS override */
@@ -146,7 +144,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id)
 	struct vxlan_dev *vxlan;
 
 	hlist_for_each_entry_rcu(vxlan, vni_head(net, id), hlist) {
-		if (vxlan->vni == id)
+		if (vxlan->default_dst.remote_vni == id)
 			return vxlan;
 	}
 
@@ -194,7 +192,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (rdst->remote_port && rdst->remote_port != vxlan_port &&
 	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
 		goto nla_put_failure;
-	if (rdst->remote_vni != vxlan->vni &&
+	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
 	    nla_put_be32(skb, NDA_VNI, rdst->remote_vni))
 		goto nla_put_failure;
 	if (rdst->remote_ifindex &&
@@ -465,7 +463,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			return -EINVAL;
 		vni = nla_get_u32(tb[NDA_VNI]);
 	} else
-		vni = vxlan->vni;
+		vni = vxlan->default_dst.remote_vni;
 
 	if (tb[NDA_IFINDEX]) {
 		struct net_device *tdev;
@@ -570,7 +568,7 @@ static void vxlan_snoop(struct net_device *dev,
 		err = vxlan_fdb_create(vxlan, src_mac, src_ip,
 				       NUD_REACHABLE,
 				       NLM_F_EXCL|NLM_F_CREATE,
-				       vxlan_port, vxlan->vni, 0);
+				       vxlan_port, vxlan->default_dst.remote_vni, 0);
 		spin_unlock(&vxlan->hash_lock);
 	}
 }
@@ -591,7 +589,7 @@ static bool vxlan_group_used(struct vxlan_net *vn,
 			if (!netif_running(vxlan->dev))
 				continue;
 
-			if (vxlan->gaddr == this->gaddr)
+			if (vxlan->default_dst.remote_ip == this->default_dst.remote_ip)
 				return true;
 		}
 
@@ -605,8 +603,8 @@ static int vxlan_join_group(struct net_device *dev)
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct sock *sk = vn->sock->sk;
 	struct ip_mreqn mreq = {
-		.imr_multiaddr.s_addr	= vxlan->gaddr,
-		.imr_ifindex		= vxlan->link,
+		.imr_multiaddr.s_addr	= vxlan->default_dst.remote_ip,
+		.imr_ifindex		= vxlan->default_dst.remote_ifindex,
 	};
 	int err;
 
@@ -633,8 +631,8 @@ static int vxlan_leave_group(struct net_device *dev)
 	int err = 0;
 	struct sock *sk = vn->sock->sk;
 	struct ip_mreqn mreq = {
-		.imr_multiaddr.s_addr	= vxlan->gaddr,
-		.imr_ifindex		= vxlan->link,
+		.imr_multiaddr.s_addr	= vxlan->default_dst.remote_ip,
+		.imr_ifindex		= vxlan->default_dst.remote_ifindex,
 	};
 
 	/* Only leave group when last vxlan is done. */
@@ -1091,7 +1089,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct ethhdr *eth;
 	bool did_rsc = false;
-	struct vxlan_rdst group, *rdst0, *rdst;
+	struct vxlan_rdst *rdst0, *rdst;
 	struct vxlan_fdb *f;
 	int rc1, rc;
 
@@ -1106,14 +1104,9 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 	f = vxlan_find_mac(vxlan, eth->h_dest);
 	if (f == NULL) {
 		did_rsc = false;
-		group.remote_port = vxlan_port;
-		group.remote_vni = vxlan->vni;
-		group.remote_ip = vxlan->gaddr;
-		group.remote_ifindex = vxlan->link;
-		group.remote_next = NULL;
-		rdst0 = &group;
-
-		if (group.remote_ip == htonl(INADDR_ANY) &&
+		rdst0 = &vxlan->default_dst;
+
+		if (rdst0->remote_ip == htonl(INADDR_ANY) &&
 		    (vxlan->flags & VXLAN_F_L2MISS) &&
 		    !is_multicast_ether_addr(eth->h_dest))
 			vxlan_fdb_miss(vxlan, eth->h_dest);
@@ -1191,7 +1184,7 @@ static int vxlan_open(struct net_device *dev)
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	int err;
 
-	if (vxlan->gaddr) {
+	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
 		err = vxlan_join_group(dev);
 		if (err)
 			return err;
@@ -1225,7 +1218,7 @@ static int vxlan_stop(struct net_device *dev)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 
-	if (vxlan->gaddr)
+	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)))
 		vxlan_leave_group(dev);
 
 	del_timer_sync(&vxlan->age_timer);
@@ -1311,7 +1304,7 @@ static void vxlan_setup(struct net_device *dev)
 
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
-	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+	[IFLA_VXLAN_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 	[IFLA_VXLAN_LINK]	= { .type = NLA_U32 },
 	[IFLA_VXLAN_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 	[IFLA_VXLAN_TOS]	= { .type = NLA_U8 },
@@ -1349,14 +1342,6 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
 			return -ERANGE;
 	}
 
-	if (data[IFLA_VXLAN_GROUP]) {
-		__be32 gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
-		if (!IN_MULTICAST(ntohl(gaddr))) {
-			pr_debug("group address is not IPv4 multicast\n");
-			return -EADDRNOTAVAIL;
-		}
-	}
-
 	if (data[IFLA_VXLAN_PORT_RANGE]) {
 		const struct ifla_vxlan_port_range *p
 			= nla_data(data[IFLA_VXLAN_PORT_RANGE]);
@@ -1387,6 +1372,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[])
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct vxlan_rdst *dst = &vxlan->default_dst;
 	__u32 vni;
 	int err;
 
@@ -1398,21 +1384,21 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 		pr_info("duplicate VNI %u\n", vni);
 		return -EEXIST;
 	}
-	vxlan->vni = vni;
+	dst->remote_vni = vni;
 
-	if (data[IFLA_VXLAN_GROUP])
-		vxlan->gaddr = nla_get_be32(data[IFLA_VXLAN_GROUP]);
+	if (data[IFLA_VXLAN_REMOTE])
+		dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_REMOTE]);
 
 	if (data[IFLA_VXLAN_LOCAL])
 		vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
 
 	if (data[IFLA_VXLAN_LINK] &&
-	    (vxlan->link = nla_get_u32(data[IFLA_VXLAN_LINK]))) {
+	    (dst->remote_ifindex = nla_get_u32(data[IFLA_VXLAN_LINK]))) {
 		struct net_device *lowerdev
-			 = __dev_get_by_index(net, vxlan->link);
+			 = __dev_get_by_index(net, dst->remote_ifindex);
 
 		if (!lowerdev) {
-			pr_info("ifindex %d does not exist\n", vxlan->link);
+			pr_info("ifindex %d does not exist\n", dst->remote_ifindex);
 			return -ENODEV;
 		}
 
@@ -1464,7 +1450,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 
 	err = register_netdevice(dev);
 	if (!err)
-		hlist_add_head_rcu(&vxlan->hlist, vni_head(net, vxlan->vni));
+		hlist_add_head_rcu(&vxlan->hlist, vni_head(net, dst->remote_vni));
 
 	return err;
 }
@@ -1482,7 +1468,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
 {
 
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_ID */
-		nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */
+		nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_REMOTE */
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LINK */
 		nla_total_size(sizeof(__be32))+	/* IFLA_VXLAN_LOCAL */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL */
@@ -1501,18 +1487,19 @@ static size_t vxlan_get_size(const struct net_device *dev)
 static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
 	const struct vxlan_dev *vxlan = netdev_priv(dev);
+	const struct vxlan_rdst *dst = &vxlan->default_dst;
 	struct ifla_vxlan_port_range ports = {
 		.low =  htons(vxlan->port_min),
 		.high = htons(vxlan->port_max),
 	};
 
-	if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni))
+	if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni))
 		goto nla_put_failure;
 
-	if (vxlan->gaddr && nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr))
+	if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_REMOTE, dst->remote_ip))
 		goto nla_put_failure;
 
-	if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link))
+	if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
 		goto nla_put_failure;
 
 	if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr))
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6b35c4211f65..9922704f08af 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -296,7 +296,7 @@ enum macvlan_mode {
 enum {
 	IFLA_VXLAN_UNSPEC,
 	IFLA_VXLAN_ID,
-	IFLA_VXLAN_GROUP,
+	IFLA_VXLAN_REMOTE,
 	IFLA_VXLAN_LINK,
 	IFLA_VXLAN_LOCAL,
 	IFLA_VXLAN_TTL,
-- 
cgit 


From 4c82456eeb4da081dd63dc69e91aa6deabd29e03 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 17 Apr 2013 12:30:40 +0200
Subject: fuse: fix type definitions in uapi header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 7e98d53086d18c877cb44e9065219335184024de (Synchronize fuse header with
one used in library) added #ifdef __linux__ around defines if it is not set.
The kernel build is self-contained and can be built on non-Linux toolchains.
After the mentioned commit builds on non-Linux toolchains will try to include
stdint.h and fail due to -nostdinc, and then fail with a bunch of undefined type
errors.

Fix by checking for __KERNEL__ instead of __linux__ and using the standard int
types instead of the linux specific ones.

Reported-by: Arve Hjønnevåg <arve@android.com>
Reported-by: Colin Cross <ccross@android.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/uapi/linux/fuse.h | 436 +++++++++++++++++++++++-----------------------
 1 file changed, 216 insertions(+), 220 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 4c43b4448792..706d035fa748 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -95,15 +95,10 @@
 #ifndef _LINUX_FUSE_H
 #define _LINUX_FUSE_H
 
-#ifdef __linux__
+#ifdef __KERNEL__
 #include <linux/types.h>
 #else
 #include <stdint.h>
-#define __u64 uint64_t
-#define __s64 int64_t
-#define __u32 uint32_t
-#define __s32 int32_t
-#define __u16 uint16_t
 #endif
 
 /*
@@ -139,42 +134,42 @@
    userspace works under 64bit kernels */
 
 struct fuse_attr {
-	__u64	ino;
-	__u64	size;
-	__u64	blocks;
-	__u64	atime;
-	__u64	mtime;
-	__u64	ctime;
-	__u32	atimensec;
-	__u32	mtimensec;
-	__u32	ctimensec;
-	__u32	mode;
-	__u32	nlink;
-	__u32	uid;
-	__u32	gid;
-	__u32	rdev;
-	__u32	blksize;
-	__u32	padding;
+	uint64_t	ino;
+	uint64_t	size;
+	uint64_t	blocks;
+	uint64_t	atime;
+	uint64_t	mtime;
+	uint64_t	ctime;
+	uint32_t	atimensec;
+	uint32_t	mtimensec;
+	uint32_t	ctimensec;
+	uint32_t	mode;
+	uint32_t	nlink;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	rdev;
+	uint32_t	blksize;
+	uint32_t	padding;
 };
 
 struct fuse_kstatfs {
-	__u64	blocks;
-	__u64	bfree;
-	__u64	bavail;
-	__u64	files;
-	__u64	ffree;
-	__u32	bsize;
-	__u32	namelen;
-	__u32	frsize;
-	__u32	padding;
-	__u32	spare[6];
+	uint64_t	blocks;
+	uint64_t	bfree;
+	uint64_t	bavail;
+	uint64_t	files;
+	uint64_t	ffree;
+	uint32_t	bsize;
+	uint32_t	namelen;
+	uint32_t	frsize;
+	uint32_t	padding;
+	uint32_t	spare[6];
 };
 
 struct fuse_file_lock {
-	__u64	start;
-	__u64	end;
-	__u32	type;
-	__u32	pid; /* tgid */
+	uint64_t	start;
+	uint64_t	end;
+	uint32_t	type;
+	uint32_t	pid; /* tgid */
 };
 
 /**
@@ -364,143 +359,143 @@ enum fuse_notify_code {
 #define FUSE_COMPAT_ENTRY_OUT_SIZE 120
 
 struct fuse_entry_out {
-	__u64	nodeid;		/* Inode ID */
-	__u64	generation;	/* Inode generation: nodeid:gen must
-				   be unique for the fs's lifetime */
-	__u64	entry_valid;	/* Cache timeout for the name */
-	__u64	attr_valid;	/* Cache timeout for the attributes */
-	__u32	entry_valid_nsec;
-	__u32	attr_valid_nsec;
+	uint64_t	nodeid;		/* Inode ID */
+	uint64_t	generation;	/* Inode generation: nodeid:gen must
+					   be unique for the fs's lifetime */
+	uint64_t	entry_valid;	/* Cache timeout for the name */
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	entry_valid_nsec;
+	uint32_t	attr_valid_nsec;
 	struct fuse_attr attr;
 };
 
 struct fuse_forget_in {
-	__u64	nlookup;
+	uint64_t	nlookup;
 };
 
 struct fuse_forget_one {
-	__u64	nodeid;
-	__u64	nlookup;
+	uint64_t	nodeid;
+	uint64_t	nlookup;
 };
 
 struct fuse_batch_forget_in {
-	__u32	count;
-	__u32	dummy;
+	uint32_t	count;
+	uint32_t	dummy;
 };
 
 struct fuse_getattr_in {
-	__u32	getattr_flags;
-	__u32	dummy;
-	__u64	fh;
+	uint32_t	getattr_flags;
+	uint32_t	dummy;
+	uint64_t	fh;
 };
 
 #define FUSE_COMPAT_ATTR_OUT_SIZE 96
 
 struct fuse_attr_out {
-	__u64	attr_valid;	/* Cache timeout for the attributes */
-	__u32	attr_valid_nsec;
-	__u32	dummy;
+	uint64_t	attr_valid;	/* Cache timeout for the attributes */
+	uint32_t	attr_valid_nsec;
+	uint32_t	dummy;
 	struct fuse_attr attr;
 };
 
 #define FUSE_COMPAT_MKNOD_IN_SIZE 8
 
 struct fuse_mknod_in {
-	__u32	mode;
-	__u32	rdev;
-	__u32	umask;
-	__u32	padding;
+	uint32_t	mode;
+	uint32_t	rdev;
+	uint32_t	umask;
+	uint32_t	padding;
 };
 
 struct fuse_mkdir_in {
-	__u32	mode;
-	__u32	umask;
+	uint32_t	mode;
+	uint32_t	umask;
 };
 
 struct fuse_rename_in {
-	__u64	newdir;
+	uint64_t	newdir;
 };
 
 struct fuse_link_in {
-	__u64	oldnodeid;
+	uint64_t	oldnodeid;
 };
 
 struct fuse_setattr_in {
-	__u32	valid;
-	__u32	padding;
-	__u64	fh;
-	__u64	size;
-	__u64	lock_owner;
-	__u64	atime;
-	__u64	mtime;
-	__u64	unused2;
-	__u32	atimensec;
-	__u32	mtimensec;
-	__u32	unused3;
-	__u32	mode;
-	__u32	unused4;
-	__u32	uid;
-	__u32	gid;
-	__u32	unused5;
+	uint32_t	valid;
+	uint32_t	padding;
+	uint64_t	fh;
+	uint64_t	size;
+	uint64_t	lock_owner;
+	uint64_t	atime;
+	uint64_t	mtime;
+	uint64_t	unused2;
+	uint32_t	atimensec;
+	uint32_t	mtimensec;
+	uint32_t	unused3;
+	uint32_t	mode;
+	uint32_t	unused4;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	unused5;
 };
 
 struct fuse_open_in {
-	__u32	flags;
-	__u32	unused;
+	uint32_t	flags;
+	uint32_t	unused;
 };
 
 struct fuse_create_in {
-	__u32	flags;
-	__u32	mode;
-	__u32	umask;
-	__u32	padding;
+	uint32_t	flags;
+	uint32_t	mode;
+	uint32_t	umask;
+	uint32_t	padding;
 };
 
 struct fuse_open_out {
-	__u64	fh;
-	__u32	open_flags;
-	__u32	padding;
+	uint64_t	fh;
+	uint32_t	open_flags;
+	uint32_t	padding;
 };
 
 struct fuse_release_in {
-	__u64	fh;
-	__u32	flags;
-	__u32	release_flags;
-	__u64	lock_owner;
+	uint64_t	fh;
+	uint32_t	flags;
+	uint32_t	release_flags;
+	uint64_t	lock_owner;
 };
 
 struct fuse_flush_in {
-	__u64	fh;
-	__u32	unused;
-	__u32	padding;
-	__u64	lock_owner;
+	uint64_t	fh;
+	uint32_t	unused;
+	uint32_t	padding;
+	uint64_t	lock_owner;
 };
 
 struct fuse_read_in {
-	__u64	fh;
-	__u64	offset;
-	__u32	size;
-	__u32	read_flags;
-	__u64	lock_owner;
-	__u32	flags;
-	__u32	padding;
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	read_flags;
+	uint64_t	lock_owner;
+	uint32_t	flags;
+	uint32_t	padding;
 };
 
 #define FUSE_COMPAT_WRITE_IN_SIZE 24
 
 struct fuse_write_in {
-	__u64	fh;
-	__u64	offset;
-	__u32	size;
-	__u32	write_flags;
-	__u64	lock_owner;
-	__u32	flags;
-	__u32	padding;
+	uint64_t	fh;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	write_flags;
+	uint64_t	lock_owner;
+	uint32_t	flags;
+	uint32_t	padding;
 };
 
 struct fuse_write_out {
-	__u32	size;
-	__u32	padding;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 #define FUSE_COMPAT_STATFS_SIZE 48
@@ -510,32 +505,32 @@ struct fuse_statfs_out {
 };
 
 struct fuse_fsync_in {
-	__u64	fh;
-	__u32	fsync_flags;
-	__u32	padding;
+	uint64_t	fh;
+	uint32_t	fsync_flags;
+	uint32_t	padding;
 };
 
 struct fuse_setxattr_in {
-	__u32	size;
-	__u32	flags;
+	uint32_t	size;
+	uint32_t	flags;
 };
 
 struct fuse_getxattr_in {
-	__u32	size;
-	__u32	padding;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 struct fuse_getxattr_out {
-	__u32	size;
-	__u32	padding;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 struct fuse_lk_in {
-	__u64	fh;
-	__u64	owner;
+	uint64_t	fh;
+	uint64_t	owner;
 	struct fuse_file_lock lk;
-	__u32	lk_flags;
-	__u32	padding;
+	uint32_t	lk_flags;
+	uint32_t	padding;
 };
 
 struct fuse_lk_out {
@@ -543,134 +538,135 @@ struct fuse_lk_out {
 };
 
 struct fuse_access_in {
-	__u32	mask;
-	__u32	padding;
+	uint32_t	mask;
+	uint32_t	padding;
 };
 
 struct fuse_init_in {
-	__u32	major;
-	__u32	minor;
-	__u32	max_readahead;
-	__u32	flags;
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	max_readahead;
+	uint32_t	flags;
 };
 
 struct fuse_init_out {
-	__u32	major;
-	__u32	minor;
-	__u32	max_readahead;
-	__u32	flags;
-	__u16   max_background;
-	__u16   congestion_threshold;
-	__u32	max_write;
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	max_readahead;
+	uint32_t	flags;
+	uint16_t	max_background;
+	uint16_t	congestion_threshold;
+	uint32_t	max_write;
 };
 
 #define CUSE_INIT_INFO_MAX 4096
 
 struct cuse_init_in {
-	__u32	major;
-	__u32	minor;
-	__u32	unused;
-	__u32	flags;
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	unused;
+	uint32_t	flags;
 };
 
 struct cuse_init_out {
-	__u32	major;
-	__u32	minor;
-	__u32	unused;
-	__u32	flags;
-	__u32	max_read;
-	__u32	max_write;
-	__u32	dev_major;		/* chardev major */
-	__u32	dev_minor;		/* chardev minor */
-	__u32	spare[10];
+	uint32_t	major;
+	uint32_t	minor;
+	uint32_t	unused;
+	uint32_t	flags;
+	uint32_t	max_read;
+	uint32_t	max_write;
+	uint32_t	dev_major;		/* chardev major */
+	uint32_t	dev_minor;		/* chardev minor */
+	uint32_t	spare[10];
 };
 
 struct fuse_interrupt_in {
-	__u64	unique;
+	uint64_t	unique;
 };
 
 struct fuse_bmap_in {
-	__u64	block;
-	__u32	blocksize;
-	__u32	padding;
+	uint64_t	block;
+	uint32_t	blocksize;
+	uint32_t	padding;
 };
 
 struct fuse_bmap_out {
-	__u64	block;
+	uint64_t	block;
 };
 
 struct fuse_ioctl_in {
-	__u64	fh;
-	__u32	flags;
-	__u32	cmd;
-	__u64	arg;
-	__u32	in_size;
-	__u32	out_size;
+	uint64_t	fh;
+	uint32_t	flags;
+	uint32_t	cmd;
+	uint64_t	arg;
+	uint32_t	in_size;
+	uint32_t	out_size;
 };
 
 struct fuse_ioctl_iovec {
-	__u64	base;
-	__u64	len;
+	uint64_t	base;
+	uint64_t	len;
 };
 
 struct fuse_ioctl_out {
-	__s32	result;
-	__u32	flags;
-	__u32	in_iovs;
-	__u32	out_iovs;
+	int32_t		result;
+	uint32_t	flags;
+	uint32_t	in_iovs;
+	uint32_t	out_iovs;
 };
 
 struct fuse_poll_in {
-	__u64	fh;
-	__u64	kh;
-	__u32	flags;
-	__u32   events;
+	uint64_t	fh;
+	uint64_t	kh;
+	uint32_t	flags;
+	uint32_t	events;
 };
 
 struct fuse_poll_out {
-	__u32	revents;
-	__u32	padding;
+	uint32_t	revents;
+	uint32_t	padding;
 };
 
 struct fuse_notify_poll_wakeup_out {
-	__u64	kh;
+	uint64_t	kh;
 };
 
 struct fuse_fallocate_in {
-	__u64	fh;
-	__u64	offset;
-	__u64	length;
-	__u32	mode;
-	__u32	padding;
+	uint64_t	fh;
+	uint64_t	offset;
+	uint64_t	length;
+	uint32_t	mode;
+	uint32_t	padding;
 };
 
 struct fuse_in_header {
-	__u32	len;
-	__u32	opcode;
-	__u64	unique;
-	__u64	nodeid;
-	__u32	uid;
-	__u32	gid;
-	__u32	pid;
-	__u32	padding;
+	uint32_t	len;
+	uint32_t	opcode;
+	uint64_t	unique;
+	uint64_t	nodeid;
+	uint32_t	uid;
+	uint32_t	gid;
+	uint32_t	pid;
+	uint32_t	padding;
 };
 
 struct fuse_out_header {
-	__u32	len;
-	__s32	error;
-	__u64	unique;
+	uint32_t	len;
+	int32_t		error;
+	uint64_t	unique;
 };
 
 struct fuse_dirent {
-	__u64	ino;
-	__u64	off;
-	__u32	namelen;
-	__u32	type;
+	uint64_t	ino;
+	uint64_t	off;
+	uint32_t	namelen;
+	uint32_t	type;
 	char name[];
 };
 
 #define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+	(((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
 
@@ -685,47 +681,47 @@ struct fuse_direntplus {
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
 
 struct fuse_notify_inval_inode_out {
-	__u64	ino;
-	__s64	off;
-	__s64	len;
+	uint64_t	ino;
+	int64_t		off;
+	int64_t		len;
 };
 
 struct fuse_notify_inval_entry_out {
-	__u64	parent;
-	__u32	namelen;
-	__u32	padding;
+	uint64_t	parent;
+	uint32_t	namelen;
+	uint32_t	padding;
 };
 
 struct fuse_notify_delete_out {
-	__u64	parent;
-	__u64	child;
-	__u32	namelen;
-	__u32	padding;
+	uint64_t	parent;
+	uint64_t	child;
+	uint32_t	namelen;
+	uint32_t	padding;
 };
 
 struct fuse_notify_store_out {
-	__u64	nodeid;
-	__u64	offset;
-	__u32	size;
-	__u32	padding;
+	uint64_t	nodeid;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 struct fuse_notify_retrieve_out {
-	__u64	notify_unique;
-	__u64	nodeid;
-	__u64	offset;
-	__u32	size;
-	__u32	padding;
+	uint64_t	notify_unique;
+	uint64_t	nodeid;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	padding;
 };
 
 /* Matches the size of fuse_write_in */
 struct fuse_notify_retrieve_in {
-	__u64	dummy1;
-	__u64	offset;
-	__u32	size;
-	__u32	dummy2;
-	__u64	dummy3;
-	__u64	dummy4;
+	uint64_t	dummy1;
+	uint64_t	offset;
+	uint32_t	size;
+	uint32_t	dummy2;
+	uint64_t	dummy3;
+	uint64_t	dummy4;
 };
 
 #endif /* _LINUX_FUSE_H */
-- 
cgit 


From 0e280af026a5662ffd57c4e623b822df1f7f47ff Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 18 Apr 2013 06:52:51 +0000
Subject: tcp: introduce TCPSpuriousRtxHostQueues SNMP counter

Host queues (Qdisc + NIC) can hold packets so long that TCP can
eventually retransmit a packet before the first transmit even left
the host.

Its not clear right now if we could avoid this in the first place :

- We could arm RTO timer not at the time we enqueue packets, but
  at the time we TX complete them (tcp_wfree())

- Cancel the sending of the new copy of the packet if prior one
  is still in queue.

This patch adds instrumentation so that we can at least see how
often this problem happens.

TCPSpuriousRtxHostQueues SNMP counter is incremented every time
we detect the fast clone is not yet freed in tcp_transmit_skb()

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h | 1 +
 net/ipv4/proc.c           | 1 +
 net/ipv4/tcp_output.c     | 7 +++++++
 3 files changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index e00013a1debc..fefdec91c68b 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -247,6 +247,7 @@ enum
 	LINUX_MIB_TCPFASTOPENPASSIVEFAIL,	/* TCPFastOpenPassiveFail */
 	LINUX_MIB_TCPFASTOPENLISTENOVERFLOW,	/* TCPFastOpenListenOverflow */
 	LINUX_MIB_TCPFASTOPENCOOKIEREQD,	/* TCPFastOpenCookieReqd */
+	LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index b6f2ea174898..6da51d55d03a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -269,6 +269,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPFastOpenPassiveFail", LINUX_MIB_TCPFASTOPENPASSIVEFAIL),
 	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
 	SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
+	SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d12694353540..5f28131eb37e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -846,6 +846,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		__net_timestamp(skb);
 
 	if (likely(clone_it)) {
+		const struct sk_buff *fclone = skb + 1;
+
+		if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
+			     fclone->fclone == SKB_FCLONE_CLONE))
+			NET_INC_STATS_BH(sock_net(sk),
+					 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+
 		if (unlikely(skb_cloned(skb)))
 			skb = pskb_copy(skb, gfp_mask);
 		else
-- 
cgit 


From 8ad227ff89a7e6f05d07cd0acfd95ed3a24450ca Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 19 Apr 2013 02:04:31 +0000
Subject: net: vlan: add 802.1ad support

Add support for 802.1ad VLAN devices. This mainly consists of checking for
ETH_P_8021AD in addition to ETH_P_8021Q in a couple of places and check
offloading capabilities based on the used protocol.

Configuration is done using "ip link":

# ip link add link eth0 eth0.1000 \
	type vlan proto 802.1ad id 1000
# ip link add link eth0.1000 eth0.1000.1000 \
	type vlan proto 802.1q id 1000

52:54:00:12:34:56 > 92:b1:54:28:e4:8c, ethertype 802.1Q (0x8100), length 106: vlan 1000, p 0, ethertype 802.1Q, vlan 1000, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto ICMP (1), length 84)
    20.1.0.2 > 20.1.0.1: ICMP echo request, id 3003, seq 8, length 64
92:b1:54:28:e4:8c > 52:54:00:12:34:56, ethertype 802.1Q-QinQ (0x88a8), length 106: vlan 1000, p 0, ethertype 802.1Q, vlan 1000, p 0, ethertype IPv4, (tos 0x0, ttl 64, id 47944, offset 0, flags [none], proto ICMP (1), length 84)
    20.1.0.1 > 20.1.0.2: ICMP echo reply, id 3003, seq 8, length 64

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h         |  6 ++++--
 include/linux/netdev_features.h |  6 ++++++
 include/uapi/linux/if_link.h    |  1 +
 net/8021q/Kconfig               |  2 +-
 net/8021q/vlan.h                |  3 +++
 net/8021q/vlan_core.c           | 18 ++++++++++++++----
 net/8021q/vlan_netlink.c        | 25 ++++++++++++++++++++++---
 net/core/dev.c                  | 16 ++++++++++------
 8 files changed, 61 insertions(+), 16 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 8086ff9988b1..a78f9390da87 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -162,6 +162,8 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
 {
 	if (proto == htons(ETH_P_8021Q) && features & NETIF_F_HW_VLAN_CTAG_TX)
 		return true;
+	if (proto == htons(ETH_P_8021AD) && features & NETIF_F_HW_VLAN_STAG_TX)
+		return true;
 	return false;
 }
 
@@ -271,9 +273,9 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 {
 	struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data;
 
-	if (veth->h_vlan_proto != htons(ETH_P_8021Q)) {
+	if (veth->h_vlan_proto != htons(ETH_P_8021Q) &&
+	    veth->h_vlan_proto != htons(ETH_P_8021AD))
 		return -EINVAL;
-	}
 
 	*vlan_tci = ntohs(veth->h_vlan_TCI);
 	return 0;
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 785913b8983d..cbaa027ef5a7 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -25,6 +25,9 @@ enum {
 	NETIF_F_HW_VLAN_CTAG_TX_BIT,	/* Transmit VLAN CTAG HW acceleration */
 	NETIF_F_HW_VLAN_CTAG_RX_BIT,	/* Receive VLAN CTAG HW acceleration */
 	NETIF_F_HW_VLAN_CTAG_FILTER_BIT,/* Receive filtering on VLAN CTAGs */
+	NETIF_F_HW_VLAN_STAG_TX_BIT,	/* Transmit VLAN STAG HW acceleration */
+	NETIF_F_HW_VLAN_STAG_RX_BIT,	/* Receive VLAN STAG HW acceleration */
+	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
 	NETIF_F_VLAN_CHALLENGED_BIT,	/* Device cannot handle VLAN packets */
 	NETIF_F_GSO_BIT,		/* Enable software GSO. */
 	NETIF_F_LLTX_BIT,		/* LockLess TX - deprecated. Please */
@@ -83,6 +86,9 @@ enum {
 #define NETIF_F_HW_VLAN_CTAG_FILTER __NETIF_F(HW_VLAN_CTAG_FILTER)
 #define NETIF_F_HW_VLAN_CTAG_RX	__NETIF_F(HW_VLAN_CTAG_RX)
 #define NETIF_F_HW_VLAN_CTAG_TX	__NETIF_F(HW_VLAN_CTAG_TX)
+#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
+#define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
+#define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
 #define NETIF_F_IP_CSUM		__NETIF_F(IP_CSUM)
 #define NETIF_F_IPV6_CSUM	__NETIF_F(IPV6_CSUM)
 #define NETIF_F_LLTX		__NETIF_F(LLTX)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 9922704f08af..e3163544f339 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -250,6 +250,7 @@ enum {
 	IFLA_VLAN_FLAGS,
 	IFLA_VLAN_EGRESS_QOS,
 	IFLA_VLAN_INGRESS_QOS,
+	IFLA_VLAN_PROTOCOL,
 	__IFLA_VLAN_MAX,
 };
 
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
index 8f7517df41a5..b85a91fa61f1 100644
--- a/net/8021q/Kconfig
+++ b/net/8021q/Kconfig
@@ -3,7 +3,7 @@
 #
 
 config VLAN_8021Q
-	tristate "802.1Q VLAN Support"
+	tristate "802.1Q/802.1ad VLAN Support"
 	---help---
 	  Select this and you will be able to create 802.1Q VLAN interfaces
 	  on your ethernet interfaces.  802.1Q VLAN supports almost
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 245de9653db0..abc9cb631c47 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -91,6 +91,7 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
 
 enum vlan_protos {
 	VLAN_PROTO_8021Q	= 0,
+	VLAN_PROTO_8021AD,
 	VLAN_PROTO_NUM,
 };
 
@@ -116,6 +117,8 @@ static inline unsigned int vlan_proto_idx(__be16 proto)
 	switch (proto) {
 	case __constant_htons(ETH_P_8021Q):
 		return VLAN_PROTO_8021Q;
+	case __constant_htons(ETH_P_8021AD):
+		return VLAN_PROTO_8021AD;
 	default:
 		BUG();
 	}
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index bdb0b9d2e9cf..ebfa2fceb88b 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -194,6 +194,18 @@ struct vlan_vid_info {
 	int refcount;
 };
 
+static bool vlan_hw_filter_capable(const struct net_device *dev,
+				     const struct vlan_vid_info *vid_info)
+{
+	if (vid_info->proto == htons(ETH_P_8021Q) &&
+	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
+		return true;
+	if (vid_info->proto == htons(ETH_P_8021AD) &&
+	    dev->features & NETIF_F_HW_VLAN_STAG_FILTER)
+		return true;
+	return false;
+}
+
 static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
 					       __be16 proto, u16 vid)
 {
@@ -231,8 +243,7 @@ static int __vlan_vid_add(struct vlan_info *vlan_info, __be16 proto, u16 vid,
 	if (!vid_info)
 		return -ENOMEM;
 
-	if (proto == htons(ETH_P_8021Q) &&
-	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+	if (vlan_hw_filter_capable(dev, vid_info)) {
 		err =  ops->ndo_vlan_rx_add_vid(dev, proto, vid);
 		if (err) {
 			kfree(vid_info);
@@ -290,8 +301,7 @@ static void __vlan_vid_del(struct vlan_info *vlan_info,
 	u16 vid = vid_info->vid;
 	int err;
 
-	if (proto == htons(ETH_P_8021Q) &&
-	    dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) {
+	if (vlan_hw_filter_capable(dev, vid_info)) {
 		err = ops->ndo_vlan_rx_kill_vid(dev, proto, vid);
 		if (err) {
 			pr_warn("failed to kill vid %04x/%d for device %s\n",
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index a1a956ab39a5..309129732285 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -23,6 +23,7 @@ static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
 	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
 	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
 	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
+	[IFLA_VLAN_PROTOCOL]	= { .type = NLA_U16 },
 };
 
 static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
@@ -53,6 +54,16 @@ static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	if (!data)
 		return -EINVAL;
 
+	if (data[IFLA_VLAN_PROTOCOL]) {
+		switch (nla_get_be16(data[IFLA_VLAN_PROTOCOL])) {
+		case __constant_htons(ETH_P_8021Q):
+		case __constant_htons(ETH_P_8021AD):
+			break;
+		default:
+			return -EPROTONOSUPPORT;
+		}
+	}
+
 	if (data[IFLA_VLAN_ID]) {
 		id = nla_get_u16(data[IFLA_VLAN_ID]);
 		if (id >= VLAN_VID_MASK)
@@ -107,6 +118,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
+	__be16 proto;
 	int err;
 
 	if (!data[IFLA_VLAN_ID])
@@ -118,7 +130,12 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (!real_dev)
 		return -ENODEV;
 
-	vlan->vlan_proto = htons(ETH_P_8021Q);
+	if (data[IFLA_VLAN_PROTOCOL])
+		proto = nla_get_be16(data[IFLA_VLAN_PROTOCOL]);
+	else
+		proto = htons(ETH_P_8021Q);
+
+	vlan->vlan_proto = proto;
 	vlan->vlan_id	 = nla_get_u16(data[IFLA_VLAN_ID]);
 	vlan->real_dev	 = real_dev;
 	vlan->flags	 = VLAN_FLAG_REORDER_HDR;
@@ -152,7 +169,8 @@ static size_t vlan_get_size(const struct net_device *dev)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
-	return nla_total_size(2) +	/* IFLA_VLAN_ID */
+	return nla_total_size(2) +	/* IFLA_VLAN_PROTOCOL */
+	       nla_total_size(2) +	/* IFLA_VLAN_ID */
 	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
 	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
 	       vlan_qos_map_size(vlan->nr_egress_mappings);
@@ -167,7 +185,8 @@ static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct nlattr *nest;
 	unsigned int i;
 
-	if (nla_put_u16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id))
+	if (nla_put_be16(skb, IFLA_VLAN_PROTOCOL, vlan->vlan_proto) ||
+	    nla_put_u16(skb, IFLA_VLAN_ID, vlan->vlan_id))
 		goto nla_put_failure;
 	if (vlan->flags) {
 		f.flags = vlan->flags;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3a12ee132b59..fad4c385f7a1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2212,7 +2212,7 @@ __be16 skb_network_protocol(struct sk_buff *skb)
 	__be16 type = skb->protocol;
 	int vlan_depth = ETH_HLEN;
 
-	while (type == htons(ETH_P_8021Q)) {
+	while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
 		struct vlan_hdr *vh;
 
 		if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
@@ -2428,20 +2428,22 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
-	if (protocol == htons(ETH_P_8021Q)) {
+	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 		protocol = veh->h_vlan_encapsulated_proto;
 	} else if (!vlan_tx_tag_present(skb)) {
 		return harmonize_features(skb, protocol, features);
 	}
 
-	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX);
+	features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+					       NETIF_F_HW_VLAN_STAG_TX);
 
-	if (protocol != htons(ETH_P_8021Q)) {
+	if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
 		return harmonize_features(skb, protocol, features);
 	} else {
 		features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
-				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX;
+				NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+				NETIF_F_HW_VLAN_STAG_TX;
 		return harmonize_features(skb, protocol, features);
 	}
 }
@@ -3360,6 +3362,7 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 	case __constant_htons(ETH_P_IP):
 	case __constant_htons(ETH_P_IPV6):
 	case __constant_htons(ETH_P_8021Q):
+	case __constant_htons(ETH_P_8021AD):
 		return true;
 	default:
 		return false;
@@ -3400,7 +3403,8 @@ another_round:
 
 	__this_cpu_inc(softnet_data.processed);
 
-	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
+	if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
+	    skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
 		skb = vlan_untag(skb);
 		if (unlikely(!skb))
 			goto unlock;
-- 
cgit 


From ccdfcc398594ddf3f77348c5a10938dbe9efefbe Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 17 Apr 2013 06:47:01 +0000
Subject: netlink: mmaped netlink: ring setup

Add support for mmap'ed RX and TX ring setup and teardown based on the
af_packet.c code. The following patches will use this to add the real
mmap'ed receive and transmit functionality.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h |  32 ++++++
 net/Kconfig                  |   9 ++
 net/netlink/af_netlink.c     | 268 ++++++++++++++++++++++++++++++++++++++++++-
 net/netlink/af_netlink.h     |  20 ++++
 4 files changed, 327 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 32a354f67ba4..1a85940f8ab7 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI__LINUX_NETLINK_H
 #define _UAPI__LINUX_NETLINK_H
 
+#include <linux/kernel.h>
 #include <linux/socket.h> /* for __kernel_sa_family_t */
 #include <linux/types.h>
 
@@ -105,11 +106,42 @@ struct nlmsgerr {
 #define NETLINK_PKTINFO		3
 #define NETLINK_BROADCAST_ERROR	4
 #define NETLINK_NO_ENOBUFS	5
+#define NETLINK_RX_RING		6
+#define NETLINK_TX_RING		7
 
 struct nl_pktinfo {
 	__u32	group;
 };
 
+struct nl_mmap_req {
+	unsigned int	nm_block_size;
+	unsigned int	nm_block_nr;
+	unsigned int	nm_frame_size;
+	unsigned int	nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+	unsigned int	nm_status;
+	unsigned int	nm_len;
+	__u32		nm_group;
+	/* credentials */
+	__u32		nm_pid;
+	__u32		nm_uid;
+	__u32		nm_gid;
+};
+
+enum nl_mmap_status {
+	NL_MMAP_STATUS_UNUSED,
+	NL_MMAP_STATUS_RESERVED,
+	NL_MMAP_STATUS_VALID,
+	NL_MMAP_STATUS_COPY,
+	NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT		NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz)		__ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN			NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+
 #define NET_MAJOR 36		/* Major 36 is reserved for networking 						*/
 
 enum {
diff --git a/net/Kconfig b/net/Kconfig
index 2ddc9046868e..1a2221630e6a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -23,6 +23,15 @@ menuconfig NET
 
 if NET
 
+config NETLINK_MMAP
+	bool "Netlink: mmaped IO"
+	help
+	  This option enables support for memory mapped netlink IO. This
+	  reduces overhead by avoiding copying data between kernel- and
+	  userspace.
+
+	  If unsure, say N.
+
 config WANT_COMPAT_NETLINK_MESSAGES
 	bool
 	help
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 58b9025978fa..1d3c7128e90e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -55,6 +55,7 @@
 #include <linux/types.h>
 #include <linux/audit.h>
 #include <linux/mutex.h>
+#include <linux/vmalloc.h>
 
 #include <net/net_namespace.h>
 #include <net/sock.h>
@@ -107,6 +108,234 @@ static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u
 	return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 }
 
+#ifdef CONFIG_NETLINK_MMAP
+static __pure struct page *pgvec_to_page(const void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		return vmalloc_to_page(addr);
+	else
+		return virt_to_page(addr);
+}
+
+static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < len; i++) {
+		if (pg_vec[i] != NULL) {
+			if (is_vmalloc_addr(pg_vec[i]))
+				vfree(pg_vec[i]);
+			else
+				free_pages((unsigned long)pg_vec[i], order);
+		}
+	}
+	kfree(pg_vec);
+}
+
+static void *alloc_one_pg_vec_page(unsigned long order)
+{
+	void *buffer;
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
+			  __GFP_NOWARN | __GFP_NORETRY;
+
+	buffer = (void *)__get_free_pages(gfp_flags, order);
+	if (buffer != NULL)
+		return buffer;
+
+	buffer = vzalloc((1 << order) * PAGE_SIZE);
+	if (buffer != NULL)
+		return buffer;
+
+	gfp_flags &= ~__GFP_NORETRY;
+	return (void *)__get_free_pages(gfp_flags, order);
+}
+
+static void **alloc_pg_vec(struct netlink_sock *nlk,
+			   struct nl_mmap_req *req, unsigned int order)
+{
+	unsigned int block_nr = req->nm_block_nr;
+	unsigned int i;
+	void **pg_vec, *ptr;
+
+	pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
+	if (pg_vec == NULL)
+		return NULL;
+
+	for (i = 0; i < block_nr; i++) {
+		pg_vec[i] = ptr = alloc_one_pg_vec_page(order);
+		if (pg_vec[i] == NULL)
+			goto err1;
+	}
+
+	return pg_vec;
+err1:
+	free_pg_vec(pg_vec, order, block_nr);
+	return NULL;
+}
+
+static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
+			    bool closing, bool tx_ring)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ring *ring;
+	struct sk_buff_head *queue;
+	void **pg_vec = NULL;
+	unsigned int order = 0;
+	int err;
+
+	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+
+	if (!closing) {
+		if (atomic_read(&nlk->mapped))
+			return -EBUSY;
+		if (atomic_read(&ring->pending))
+			return -EBUSY;
+	}
+
+	if (req->nm_block_nr) {
+		if (ring->pg_vec != NULL)
+			return -EBUSY;
+
+		if ((int)req->nm_block_size <= 0)
+			return -EINVAL;
+		if (!IS_ALIGNED(req->nm_block_size, PAGE_SIZE))
+			return -EINVAL;
+		if (req->nm_frame_size < NL_MMAP_HDRLEN)
+			return -EINVAL;
+		if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
+			return -EINVAL;
+
+		ring->frames_per_block = req->nm_block_size /
+					 req->nm_frame_size;
+		if (ring->frames_per_block == 0)
+			return -EINVAL;
+		if (ring->frames_per_block * req->nm_block_nr !=
+		    req->nm_frame_nr)
+			return -EINVAL;
+
+		order = get_order(req->nm_block_size);
+		pg_vec = alloc_pg_vec(nlk, req, order);
+		if (pg_vec == NULL)
+			return -ENOMEM;
+	} else {
+		if (req->nm_frame_nr)
+			return -EINVAL;
+	}
+
+	err = -EBUSY;
+	mutex_lock(&nlk->pg_vec_lock);
+	if (closing || atomic_read(&nlk->mapped) == 0) {
+		err = 0;
+		spin_lock_bh(&queue->lock);
+
+		ring->frame_max		= req->nm_frame_nr - 1;
+		ring->head		= 0;
+		ring->frame_size	= req->nm_frame_size;
+		ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE;
+
+		swap(ring->pg_vec_len, req->nm_block_nr);
+		swap(ring->pg_vec_order, order);
+		swap(ring->pg_vec, pg_vec);
+
+		__skb_queue_purge(queue);
+		spin_unlock_bh(&queue->lock);
+
+		WARN_ON(atomic_read(&nlk->mapped));
+	}
+	mutex_unlock(&nlk->pg_vec_lock);
+
+	if (pg_vec)
+		free_pg_vec(pg_vec, order, req->nm_block_nr);
+	return err;
+}
+
+static void netlink_mm_open(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct socket *sock = file->private_data;
+	struct sock *sk = sock->sk;
+
+	if (sk)
+		atomic_inc(&nlk_sk(sk)->mapped);
+}
+
+static void netlink_mm_close(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+	struct socket *sock = file->private_data;
+	struct sock *sk = sock->sk;
+
+	if (sk)
+		atomic_dec(&nlk_sk(sk)->mapped);
+}
+
+static const struct vm_operations_struct netlink_mmap_ops = {
+	.open	= netlink_mm_open,
+	.close	= netlink_mm_close,
+};
+
+static int netlink_mmap(struct file *file, struct socket *sock,
+			struct vm_area_struct *vma)
+{
+	struct sock *sk = sock->sk;
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ring *ring;
+	unsigned long start, size, expected;
+	unsigned int i;
+	int err = -EINVAL;
+
+	if (vma->vm_pgoff)
+		return -EINVAL;
+
+	mutex_lock(&nlk->pg_vec_lock);
+
+	expected = 0;
+	for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+		if (ring->pg_vec == NULL)
+			continue;
+		expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
+	}
+
+	if (expected == 0)
+		goto out;
+
+	size = vma->vm_end - vma->vm_start;
+	if (size != expected)
+		goto out;
+
+	start = vma->vm_start;
+	for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
+		if (ring->pg_vec == NULL)
+			continue;
+
+		for (i = 0; i < ring->pg_vec_len; i++) {
+			struct page *page;
+			void *kaddr = ring->pg_vec[i];
+			unsigned int pg_num;
+
+			for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
+				page = pgvec_to_page(kaddr);
+				err = vm_insert_page(vma, start, page);
+				if (err < 0)
+					goto out;
+				start += PAGE_SIZE;
+				kaddr += PAGE_SIZE;
+			}
+		}
+	}
+
+	atomic_inc(&nlk->mapped);
+	vma->vm_ops = &netlink_mmap_ops;
+	err = 0;
+out:
+	mutex_unlock(&nlk->pg_vec_lock);
+	return 0;
+}
+#else /* CONFIG_NETLINK_MMAP */
+#define netlink_mmap			sock_no_mmap
+#endif /* CONFIG_NETLINK_MMAP */
+
 static void netlink_destroy_callback(struct netlink_callback *cb)
 {
 	kfree_skb(cb->skb);
@@ -146,6 +375,18 @@ static void netlink_sock_destruct(struct sock *sk)
 	}
 
 	skb_queue_purge(&sk->sk_receive_queue);
+#ifdef CONFIG_NETLINK_MMAP
+	if (1) {
+		struct nl_mmap_req req;
+
+		memset(&req, 0, sizeof(req));
+		if (nlk->rx_ring.pg_vec)
+			netlink_set_ring(sk, &req, true, false);
+		memset(&req, 0, sizeof(req));
+		if (nlk->tx_ring.pg_vec)
+			netlink_set_ring(sk, &req, true, true);
+	}
+#endif /* CONFIG_NETLINK_MMAP */
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
 		printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -409,6 +650,9 @@ static int __netlink_create(struct net *net, struct socket *sock,
 		mutex_init(nlk->cb_mutex);
 	}
 	init_waitqueue_head(&nlk->wait);
+#ifdef CONFIG_NETLINK_MMAP
+	mutex_init(&nlk->pg_vec_lock);
+#endif
 
 	sk->sk_destruct = netlink_sock_destruct;
 	sk->sk_protocol = protocol;
@@ -1211,7 +1455,8 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 	if (level != SOL_NETLINK)
 		return -ENOPROTOOPT;
 
-	if (optlen >= sizeof(int) &&
+	if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING &&
+	    optlen >= sizeof(int) &&
 	    get_user(val, (unsigned int __user *)optval))
 		return -EFAULT;
 
@@ -1260,6 +1505,25 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		}
 		err = 0;
 		break;
+#ifdef CONFIG_NETLINK_MMAP
+	case NETLINK_RX_RING:
+	case NETLINK_TX_RING: {
+		struct nl_mmap_req req;
+
+		/* Rings might consume more memory than queue limits, require
+		 * CAP_NET_ADMIN.
+		 */
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
+		if (optlen < sizeof(req))
+			return -EINVAL;
+		if (copy_from_user(&req, optval, sizeof(req)))
+			return -EFAULT;
+		err = netlink_set_ring(sk, &req, false,
+				       optname == NETLINK_TX_RING);
+		break;
+	}
+#endif /* CONFIG_NETLINK_MMAP */
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -2093,7 +2357,7 @@ static const struct proto_ops netlink_ops = {
 	.getsockopt =	netlink_getsockopt,
 	.sendmsg =	netlink_sendmsg,
 	.recvmsg =	netlink_recvmsg,
-	.mmap =		sock_no_mmap,
+	.mmap =		netlink_mmap,
 	.sendpage =	sock_no_sendpage,
 };
 
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index d9acb2a1d855..ed8522265f4e 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -6,6 +6,20 @@
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 #define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
 
+struct netlink_ring {
+	void			**pg_vec;
+	unsigned int		head;
+	unsigned int		frames_per_block;
+	unsigned int		frame_size;
+	unsigned int		frame_max;
+
+	unsigned int		pg_vec_order;
+	unsigned int		pg_vec_pages;
+	unsigned int		pg_vec_len;
+
+	atomic_t		pending;
+};
+
 struct netlink_sock {
 	/* struct sock has to be the first member of netlink_sock */
 	struct sock		sk;
@@ -24,6 +38,12 @@ struct netlink_sock {
 	void			(*netlink_rcv)(struct sk_buff *skb);
 	void			(*netlink_bind)(int group);
 	struct module		*module;
+#ifdef CONFIG_NETLINK_MMAP
+	struct mutex		pg_vec_lock;
+	struct netlink_ring	rx_ring;
+	struct netlink_ring	tx_ring;
+	atomic_t		mapped;
+#endif /* CONFIG_NETLINK_MMAP */
 };
 
 static inline struct netlink_sock *nlk_sk(struct sock *sk)
-- 
cgit 


From 4ae9fbee1690848a6aace1e0193ab27e981e35a5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 17 Apr 2013 06:47:06 +0000
Subject: netlink: add RX/TX-ring support to netlink diag

Based on AF_PACKET.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink_diag.h | 10 ++++++++++
 net/netlink/diag.c                | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink_diag.h b/include/uapi/linux/netlink_diag.h
index 88009a31cd06..4e31db4eea41 100644
--- a/include/uapi/linux/netlink_diag.h
+++ b/include/uapi/linux/netlink_diag.h
@@ -25,9 +25,18 @@ struct netlink_diag_msg {
 	__u32	ndiag_cookie[2];
 };
 
+struct netlink_diag_ring {
+	__u32	ndr_block_size;
+	__u32	ndr_block_nr;
+	__u32	ndr_frame_size;
+	__u32	ndr_frame_nr;
+};
+
 enum {
 	NETLINK_DIAG_MEMINFO,
 	NETLINK_DIAG_GROUPS,
+	NETLINK_DIAG_RX_RING,
+	NETLINK_DIAG_TX_RING,
 
 	__NETLINK_DIAG_MAX,
 };
@@ -38,5 +47,6 @@ enum {
 
 #define NDIAG_SHOW_MEMINFO	0x00000001 /* show memory info of a socket */
 #define NDIAG_SHOW_GROUPS	0x00000002 /* show groups of a netlink socket */
+#define NDIAG_SHOW_RING_CFG	0x00000004 /* show ring configuration */
 
 #endif
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 5ffb1d1cf402..4e4aa471cd05 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -7,6 +7,34 @@
 
 #include "af_netlink.h"
 
+static int sk_diag_put_ring(struct netlink_ring *ring, int nl_type,
+			    struct sk_buff *nlskb)
+{
+	struct netlink_diag_ring ndr;
+
+	ndr.ndr_block_size = ring->pg_vec_pages << PAGE_SHIFT;
+	ndr.ndr_block_nr   = ring->pg_vec_len;
+	ndr.ndr_frame_size = ring->frame_size;
+	ndr.ndr_frame_nr   = ring->frame_max + 1;
+
+	return nla_put(nlskb, nl_type, sizeof(ndr), &ndr);
+}
+
+static int sk_diag_put_rings_cfg(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	int ret;
+
+	mutex_lock(&nlk->pg_vec_lock);
+	ret = sk_diag_put_ring(&nlk->rx_ring, NETLINK_DIAG_RX_RING, nlskb);
+	if (!ret)
+		ret = sk_diag_put_ring(&nlk->tx_ring, NETLINK_DIAG_TX_RING,
+				       nlskb);
+	mutex_unlock(&nlk->pg_vec_lock);
+
+	return ret;
+}
+
 static int sk_diag_dump_groups(struct sock *sk, struct sk_buff *nlskb)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -51,6 +79,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	    sock_diag_put_meminfo(sk, skb, NETLINK_DIAG_MEMINFO))
 		goto out_nlmsg_trim;
 
+	if ((req->ndiag_show & NDIAG_SHOW_RING_CFG) &&
+	    sk_diag_put_rings_cfg(sk, skb))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
-- 
cgit 


From 5de17984898c5758fc6ebe08eccea9f4b6548914 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Thu, 18 Apr 2013 15:49:00 +0200
Subject: cfg80211: introduce critical protocol indication from user-space

Some protocols need a more reliable connection to complete
successful in reasonable time. This patch adds a user-space
API to indicate the wireless driver that a critical protocol
is about to commence and when it is done, using nl80211 primitives
NL80211_CMD_CRIT_PROTOCOL_START and NL80211_CRIT_PROTOCOL_STOP.

There can be only on critical protocol session started per
registered cfg80211 device.

The driver can support this by implementing the cfg80211 callbacks
.crit_proto_start() and .crit_proto_stop(). Examples of protocols
that can benefit from this are DHCP, EAPOL, APIPA. Exactly how the
link can/should be made more reliable is up to the driver. Things
to consider are avoid scanning, no multi-channel operations, and
alter coexistence schemes.

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Franky (Zhenhui) Lin <frankyl@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  23 +++++++++
 include/uapi/linux/nl80211.h |  39 +++++++++++++++
 net/wireless/core.h          |   3 ++
 net/wireless/mlme.c          |   5 ++
 net/wireless/nl80211.c       | 117 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  24 ++++++++-
 net/wireless/trace.h         |  35 +++++++++++++
 7 files changed, 245 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dff96d8cafcd..26b5b692c22b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2002,6 +2002,12 @@ struct cfg80211_update_ft_ies_params {
  * @update_ft_ies: Provide updated Fast BSS Transition information to the
  *	driver. If the SME is in the driver/firmware, this information can be
  *	used in building Authentication and Reassociation Request frames.
+ *
+ * @crit_proto_start: Indicates a critical protocol needs more link reliability
+ *	for a given duration (milliseconds). The protocol is provided so the
+ *	driver can take the most appropriate actions.
+ * @crit_proto_stop: Indicates critical protocol no longer needs increased link
+ *	reliability. This operation can not fail.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2231,6 +2237,12 @@ struct cfg80211_ops {
 					 struct cfg80211_chan_def *chandef);
 	int	(*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
 				 struct cfg80211_update_ft_ies_params *ftie);
+	int	(*crit_proto_start)(struct wiphy *wiphy,
+				    struct wireless_dev *wdev,
+				    enum nl80211_crit_proto_id protocol,
+				    u16 duration);
+	void	(*crit_proto_stop)(struct wiphy *wiphy,
+				   struct wireless_dev *wdev);
 };
 
 /*
@@ -4137,6 +4149,17 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev,
 				   struct cfg80211_wowlan_wakeup *wakeup,
 				   gfp_t gfp);
 
+/**
+ * cfg80211_crit_proto_stopped() - indicate critical protocol stopped by driver.
+ *
+ * @wdev: the wireless device for which critical protocol is stopped.
+ *
+ * This function can be called by the driver to indicate it has reverted
+ * operation back to normal. One reason could be that the duration given
+ * by .crit_proto_start() has expired.
+ */
+void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 79da8710448e..d1e48b5e348f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -639,6 +639,13 @@
  *	with the relevant Information Elements. This event is used to report
  *	received FT IEs (MDIE, FTIE, RSN IE, TIE, RICIE).
  *
+ * @NL80211_CMD_CRIT_PROTOCOL_START: Indicates user-space will start running
+ *	a critical protocol that needs more reliability in the connection to
+ *	complete.
+ *
+ * @NL80211_CMD_CRIT_PROTOCOL_STOP: Indicates the connection reliability can
+ *	return back to normal.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -798,6 +805,9 @@ enum nl80211_commands {
 	NL80211_CMD_UPDATE_FT_IES,
 	NL80211_CMD_FT_EVENT,
 
+	NL80211_CMD_CRIT_PROTOCOL_START,
+	NL80211_CMD_CRIT_PROTOCOL_STOP,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1414,6 +1424,11 @@ enum nl80211_commands {
  * @NL80211_ATTR_IE_RIC: Resource Information Container Information
  *	Element
  *
+ * @NL80211_ATTR_CRIT_PROT_ID: critical protocol identifier requiring increased
+ *	reliability, see &enum nl80211_crit_proto_id (u16).
+ * @NL80211_ATTR_MAX_CRIT_PROT_DURATION: duration in milliseconds in which
+ *      the connection should have increased reliability (u16).
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1709,6 +1724,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_MDID,
 	NL80211_ATTR_IE_RIC,
 
+	NL80211_ATTR_CRIT_PROT_ID,
+	NL80211_ATTR_MAX_CRIT_PROT_DURATION,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3682,4 +3700,25 @@ enum nl80211_protocol_features {
 	NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP =	1 << 0,
 };
 
+/**
+ * enum nl80211_crit_proto_id - nl80211 critical protocol identifiers
+ *
+ * @NL80211_CRIT_PROTO_UNSPEC: protocol unspecified.
+ * @NL80211_CRIT_PROTO_DHCP: BOOTP or DHCPv6 protocol.
+ * @NL80211_CRIT_PROTO_EAPOL: EAPOL protocol.
+ * @NL80211_CRIT_PROTO_APIPA: APIPA protocol.
+ * @NUM_NL80211_CRIT_PROTO: must be kept last.
+ */
+enum nl80211_crit_proto_id {
+	NL80211_CRIT_PROTO_UNSPEC,
+	NL80211_CRIT_PROTO_DHCP,
+	NL80211_CRIT_PROTO_EAPOL,
+	NL80211_CRIT_PROTO_APIPA,
+	/* add other protocols before this one */
+	NUM_NL80211_CRIT_PROTO
+};
+
+/* maximum duration for critical protocol measures */
+#define NL80211_CRIT_PROTO_MAX_DURATION		5000 /* msec */
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 124e5e773fbc..fd35dae547c4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -88,6 +88,9 @@ struct cfg80211_registered_device {
 
 	struct delayed_work dfs_update_channels_wk;
 
+	/* netlink port which started critical protocol (0 means not started) */
+	u32 crit_proto_nlportid;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 390198bf4b36..0c7b7dd855f6 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -648,6 +648,11 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
+	if (nlportid && rdev->crit_proto_nlportid == nlportid) {
+		rdev->crit_proto_nlportid = 0;
+		rdev_crit_proto_stop(rdev, wdev);
+	}
+
 	if (nlportid == wdev->ap_unexpected_nlportid)
 		wdev->ap_unexpected_nlportid = 0;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3abcbbada6d4..afa283841e8c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1424,6 +1424,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *dev,
 		}
 		CMD(start_p2p_device, START_P2P_DEVICE);
 		CMD(set_mcast_rate, SET_MCAST_RATE);
+		if (split) {
+			CMD(crit_proto_start, CRIT_PROTOCOL_START);
+			CMD(crit_proto_stop, CRIT_PROTOCOL_STOP);
+		}
 
 #ifdef CONFIG_NL80211_TESTMODE
 		CMD(testmode_cmd, TESTMODE);
@@ -8216,6 +8220,64 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
 	return rdev_update_ft_ies(rdev, dev, &ft_params);
 }
 
+static int nl80211_crit_protocol_start(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+	enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC;
+	u16 duration;
+	int ret;
+
+	if (!rdev->ops->crit_proto_start)
+		return -EOPNOTSUPP;
+
+	if (WARN_ON(!rdev->ops->crit_proto_stop))
+		return -EINVAL;
+
+	if (rdev->crit_proto_nlportid)
+		return -EBUSY;
+
+	/* determine protocol if provided */
+	if (info->attrs[NL80211_ATTR_CRIT_PROT_ID])
+		proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]);
+
+	if (proto >= NUM_NL80211_CRIT_PROTO)
+		return -EINVAL;
+
+	/* timeout must be provided */
+	if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION])
+		return -EINVAL;
+
+	duration =
+		nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]);
+
+	if (duration > NL80211_CRIT_PROTO_MAX_DURATION)
+		return -ERANGE;
+
+	ret = rdev_crit_proto_start(rdev, wdev, proto, duration);
+	if (!ret)
+		rdev->crit_proto_nlportid = info->snd_portid;
+
+	return ret;
+}
+
+static int nl80211_crit_protocol_stop(struct sk_buff *skb,
+				      struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct wireless_dev *wdev = info->user_ptr[1];
+
+	if (!rdev->ops->crit_proto_stop)
+		return -EOPNOTSUPP;
+
+	if (rdev->crit_proto_nlportid) {
+		rdev->crit_proto_nlportid = 0;
+		rdev_crit_proto_stop(rdev, wdev);
+	}
+	return 0;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -8905,6 +8967,22 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_CRIT_PROTOCOL_START,
+		.doit = nl80211_crit_protocol_start,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_CRIT_PROTOCOL_STOP,
+		.doit = nl80211_crit_protocol_stop,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	}
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -10650,6 +10728,45 @@ void cfg80211_ft_event(struct net_device *netdev,
 }
 EXPORT_SYMBOL(cfg80211_ft_event);
 
+void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev;
+	struct sk_buff *msg;
+	void *hdr;
+	u32 nlportid;
+
+	rdev = wiphy_to_dev(wdev->wiphy);
+	if (!rdev->crit_proto_nlportid)
+		return;
+
+	nlportid = rdev->crit_proto_nlportid;
+	rdev->crit_proto_nlportid = 0;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP);
+	if (!hdr)
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+	    nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
+	return;
+
+ nla_put_failure:
+	if (hdr)
+		genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+
+}
+EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
+
 /* initialisation/exit functions */
 
 int nl80211_init(void)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index d77e1c1d3a0e..9f15f0ac824d 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -875,7 +875,7 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev,
 	trace_rdev_stop_p2p_device(&rdev->wiphy, wdev);
 	rdev->ops->stop_p2p_device(&rdev->wiphy, wdev);
 	trace_rdev_return_void(&rdev->wiphy);
-}					
+}
 
 static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev,
 				   struct net_device *dev,
@@ -901,4 +901,26 @@ static inline int rdev_update_ft_ies(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int rdev_crit_proto_start(struct cfg80211_registered_device *rdev,
+					struct wireless_dev *wdev,
+					enum nl80211_crit_proto_id protocol,
+					u16 duration)
+{
+	int ret;
+
+	trace_rdev_crit_proto_start(&rdev->wiphy, wdev, protocol, duration);
+	ret = rdev->ops->crit_proto_start(&rdev->wiphy, wdev,
+					  protocol, duration);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+	return ret;
+}
+
+static inline void rdev_crit_proto_stop(struct cfg80211_registered_device *rdev,
+				       struct wireless_dev *wdev)
+{
+	trace_rdev_crit_proto_stop(&rdev->wiphy, wdev);
+	rdev->ops->crit_proto_stop(&rdev->wiphy, wdev);
+	trace_rdev_return_void(&rdev->wiphy);
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 3c2033b8f596..ecd4fcec3c94 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1806,6 +1806,41 @@ TRACE_EVENT(rdev_update_ft_ies,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->md)
 );
 
+TRACE_EVENT(rdev_crit_proto_start,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
+		 enum nl80211_crit_proto_id protocol, u16 duration),
+	TP_ARGS(wiphy, wdev, protocol, duration),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+		__field(u16, proto)
+		__field(u16, duration)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+		__entry->proto = protocol;
+		__entry->duration = duration;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", proto=%x, duration=%u",
+		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->proto, __entry->duration)
+);
+
+TRACE_EVENT(rdev_crit_proto_stop,
+	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev),
+	TP_ARGS(wiphy, wdev),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		WDEV_ENTRY
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		WDEV_ASSIGN;
+	),
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT,
+		  WIPHY_PR_ARG, WDEV_PR_ARG)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
-- 
cgit 


From 0a925864c1038a78fd1cc9b048d9a2b1ae04b63e Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Wed, 17 Apr 2013 23:50:49 +0300
Subject: ipvs: fix sparse warnings for some parameters

Some service fields are in network order:

- netmask: used once in network order and also as prefix len for IPv6
- port

Other parameters are in host order:

- struct ip_vs_flags: flags and mask moved between user and kernel only
- sync state: moved between user and kernel only
- syncid: sent over network as single octet

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |  8 ++++----
 include/uapi/linux/ip_vs.h      |  4 ++--
 net/netfilter/ipvs/ip_vs_core.c |  3 ++-
 net/netfilter/ipvs/ip_vs_ctl.c  | 40 ++++++++++++++++++++++++----------------
 4 files changed, 32 insertions(+), 23 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index f9f5b057b480..4c062ccff9aa 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -678,7 +678,7 @@ struct ip_vs_service_user_kern {
 	u16			af;
 	u16			protocol;
 	union nf_inet_addr	addr;		/* virtual ip address */
-	u16			port;
+	__be16			port;
 	u32			fwmark;		/* firwall mark of service */
 
 	/* virtual service options */
@@ -686,14 +686,14 @@ struct ip_vs_service_user_kern {
 	char			*pe_name;
 	unsigned int		flags;		/* virtual service flags */
 	unsigned int		timeout;	/* persistent timeout in sec */
-	u32			netmask;	/* persistent netmask */
+	__be32			netmask;	/* persistent netmask or plen */
 };
 
 
 struct ip_vs_dest_user_kern {
 	/* destination server address */
 	union nf_inet_addr	addr;
-	u16			port;
+	__be16			port;
 
 	/* real server options */
 	unsigned int		conn_flags;	/* connection flags */
@@ -721,7 +721,7 @@ struct ip_vs_service {
 	__u32                   fwmark;   /* firewall mark of the service */
 	unsigned int		flags;	  /* service status flags */
 	unsigned int		timeout;  /* persistent timeout in ticks */
-	__be32			netmask;  /* grouping granularity */
+	__be32			netmask;  /* grouping granularity, mask/plen */
 	struct net		*net;
 
 	struct list_head	destinations;  /* real server d-linked list */
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index 8a2d438dc499..a24537725e80 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -280,8 +280,8 @@ struct ip_vs_daemon_user {
 #define IPVS_GENL_VERSION	0x1
 
 struct ip_vs_flags {
-	__be32 flags;
-	__be32 mask;
+	__u32 flags;
+	__u32 mask;
 };
 
 /* Generic Netlink command attributes */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index f26fe3353a30..a0d7bd342775 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -235,7 +235,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
 	/* Mask saddr with the netmask to adjust template granularity */
 #ifdef CONFIG_IP_VS_IPV6
 	if (svc->af == AF_INET6)
-		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6, svc->netmask);
+		ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+				 (__force __u32) svc->netmask);
 	else
 #endif
 		snet.ip = iph->saddr.ip & svc->netmask;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 64075a775e35..5b142fb16480 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1164,9 +1164,13 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
 	}
 
 #ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
-		ret = -EINVAL;
-		goto out_err;
+	if (u->af == AF_INET6) {
+		__u32 plen = (__force __u32) u->netmask;
+
+		if (plen < 1 || plen > 128) {
+			ret = -EINVAL;
+			goto out_err;
+		}
 	}
 #endif
 
@@ -1277,9 +1281,13 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	}
 
 #ifdef CONFIG_IP_VS_IPV6
-	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
-		ret = -EINVAL;
-		goto out;
+	if (u->af == AF_INET6) {
+		__u32 plen = (__force __u32) u->netmask;
+
+		if (plen < 1 || plen > 128) {
+			ret = -EINVAL;
+			goto out;
+		}
 	}
 #endif
 
@@ -2892,7 +2900,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	} else {
 		if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
 		    nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
-		    nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
+		    nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port))
 			goto nla_put_failure;
 	}
 
@@ -2902,7 +2910,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	    (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
 	    nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
 	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
-	    nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
+	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
 		goto nla_put_failure;
 	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
 		goto nla_put_failure;
@@ -3015,7 +3023,7 @@ static int ip_vs_genl_parse_service(struct net *net,
 	} else {
 		usvc->protocol = nla_get_u16(nla_protocol);
 		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
-		usvc->port = nla_get_u16(nla_port);
+		usvc->port = nla_get_be16(nla_port);
 		usvc->fwmark = 0;
 	}
 
@@ -3055,7 +3063,7 @@ static int ip_vs_genl_parse_service(struct net *net,
 		usvc->sched_name = nla_data(nla_sched);
 		usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
 		usvc->timeout = nla_get_u32(nla_timeout);
-		usvc->netmask = nla_get_u32(nla_netmask);
+		usvc->netmask = nla_get_be32(nla_netmask);
 	}
 
 	return 0;
@@ -3081,7 +3089,7 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 		return -EMSGSIZE;
 
 	if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
-	    nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
+	    nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
 	    nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
 			(atomic_read(&dest->conn_flags) &
 			 IP_VS_CONN_F_FWD_MASK)) ||
@@ -3190,7 +3198,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 	memset(udest, 0, sizeof(*udest));
 
 	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
-	udest->port = nla_get_u16(nla_port);
+	udest->port = nla_get_be16(nla_port);
 
 	/* If a full entry was requested, check for the additional fields */
 	if (full_entry) {
@@ -3215,8 +3223,8 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
 	return 0;
 }
 
-static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
-				  const char *mcast_ifn, __be32 syncid)
+static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state,
+				  const char *mcast_ifn, __u32 syncid)
 {
 	struct nlattr *nl_daemon;
 
@@ -3237,8 +3245,8 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
-static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
-				  const char *mcast_ifn, __be32 syncid,
+static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
+				  const char *mcast_ifn, __u32 syncid,
 				  struct netlink_callback *cb)
 {
 	void *hdr;
-- 
cgit 


From 24bc69da32a93edac91b4dfb7806a7fb9c24c625 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 17 Apr 2013 17:26:23 -0600
Subject: PCI: Clean up MSI/MSI-X capability #defines

This doesn't change any existing symbols, but it puts them in logical
order and uses explicit masks instead of shifts, like the rest of the
file.

It also adds new symbols for PCI_MSIX_TABLE_BIR,
PCI_MSIX_TABLE_OFFSET, PCI_MSIX_PBA_BIR, and PCI_MSIX_PBA_OFFSET to
replace the mis-named PCI_MSIX_FLAGS_BIRMASK (the BAR index fields
are part of the Table and PBA registers, not the flags register).

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/uapi/linux/pci_regs.h | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ebfadc56d1b4..864e324da80d 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -292,12 +292,12 @@
 
 /* Message Signalled Interrupts registers */
 
-#define PCI_MSI_FLAGS		2	/* Various flags */
-#define  PCI_MSI_FLAGS_64BIT	0x80	/* 64-bit addresses allowed */
-#define  PCI_MSI_FLAGS_QSIZE	0x70	/* Message queue size configured */
-#define  PCI_MSI_FLAGS_QMASK	0x0e	/* Maximum queue size available */
-#define  PCI_MSI_FLAGS_ENABLE	0x01	/* MSI feature enabled */
-#define  PCI_MSI_FLAGS_MASKBIT	0x100	/* 64-bit mask bits allowed */
+#define PCI_MSI_FLAGS		2	/* Message Control */
+#define  PCI_MSI_FLAGS_ENABLE	0x0001	/* MSI feature enabled */
+#define  PCI_MSI_FLAGS_QMASK	0x000e	/* Maximum queue size available */
+#define  PCI_MSI_FLAGS_QSIZE	0x0070	/* Message queue size configured */
+#define  PCI_MSI_FLAGS_64BIT	0x0080	/* 64-bit addresses allowed */
+#define  PCI_MSI_FLAGS_MASKBIT	0x0100	/* Per-vector masking capable */
 #define PCI_MSI_RFU		3	/* Rest of capability flags */
 #define PCI_MSI_ADDRESS_LO	4	/* Lower 32 bits */
 #define PCI_MSI_ADDRESS_HI	8	/* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */
@@ -309,13 +309,17 @@
 #define PCI_MSI_PENDING_64	20	/* Pending intrs for 64-bit devices */
 
 /* MSI-X registers */
-#define PCI_MSIX_FLAGS		2
-#define  PCI_MSIX_FLAGS_QSIZE	0x7FF
-#define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
-#define  PCI_MSIX_FLAGS_MASKALL	(1 << 14)
-#define PCI_MSIX_TABLE		4
-#define PCI_MSIX_PBA		8
-#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
+#define PCI_MSIX_FLAGS		2	/* Message Control */
+#define  PCI_MSIX_FLAGS_QSIZE	0x07FF	/* Table size */
+#define  PCI_MSIX_FLAGS_MASKALL	0x4000	/* Mask all vectors for this function */
+#define  PCI_MSIX_FLAGS_ENABLE	0x8000	/* MSI-X enable */
+#define PCI_MSIX_TABLE		4	/* Table offset */
+#define  PCI_MSIX_TABLE_BIR	0x00000007 /* BAR index */
+#define  PCI_MSIX_TABLE_OFFSET	0xfffffff8 /* Offset into specified BAR */
+#define PCI_MSIX_PBA		8	/* Pending Bit Array offset */
+#define  PCI_MSIX_PBA_BIR	0x00000007 /* BAR index */
+#define  PCI_MSIX_PBA_OFFSET	0xfffffff8 /* Offset into specified BAR */
+#define  PCI_MSIX_FLAGS_BIRMASK	(7 << 0)   /* deprecated */
 #define PCI_CAP_MSIX_SIZEOF	12	/* size of MSIX registers */
 
 /* MSI-X entry's format */
-- 
cgit 


From 26ee65e680f4a2291f6258e11beceae0ad4eeba3 Mon Sep 17 00:00:00 2001
From: "sjur.brandeland@stericsson.com" <sjur.brandeland@stericsson.com>
Date: Mon, 22 Apr 2013 23:57:01 +0000
Subject: caif: Remove my bouncing email address.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove my soon bouncing email address.
Also remove the "Contact:" line in file header.
The MAINTAINERS file is a better place to find the
contact person anyway.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/caif/caif_hsi.c           | 1 -
 drivers/net/caif/caif_serial.c        | 4 ++--
 drivers/net/caif/caif_spi.c           | 1 -
 drivers/net/caif/caif_spi_slave.c     | 1 -
 include/net/caif/caif_dev.h           | 2 +-
 include/net/caif/caif_device.h        | 2 +-
 include/net/caif/caif_hsi.h           | 1 -
 include/net/caif/caif_layer.h         | 2 +-
 include/net/caif/cfcnfg.h             | 2 +-
 include/net/caif/cfctrl.h             | 2 +-
 include/net/caif/cffrml.h             | 2 +-
 include/net/caif/cfmuxl.h             | 2 +-
 include/net/caif/cfpkt.h              | 2 +-
 include/net/caif/cfserl.h             | 2 +-
 include/net/caif/cfsrvl.h             | 2 +-
 include/uapi/linux/caif/caif_socket.h | 2 +-
 include/uapi/linux/caif/if_caif.h     | 2 +-
 net/caif/caif_dev.c                   | 2 +-
 net/caif/caif_socket.c                | 2 +-
 net/caif/caif_usb.c                   | 2 +-
 net/caif/cfcnfg.c                     | 2 +-
 net/caif/cfctrl.c                     | 2 +-
 net/caif/cfdbgl.c                     | 2 +-
 net/caif/cfdgml.c                     | 2 +-
 net/caif/cffrml.c                     | 2 +-
 net/caif/cfmuxl.c                     | 2 +-
 net/caif/cfpkt_skbuff.c               | 2 +-
 net/caif/cfrfml.c                     | 2 +-
 net/caif/cfserl.c                     | 2 +-
 net/caif/cfsrvl.c                     | 2 +-
 net/caif/cfutill.c                    | 2 +-
 net/caif/cfveil.c                     | 2 +-
 net/caif/cfvidl.c                     | 2 +-
 net/caif/chnl_net.c                   | 2 +-
 34 files changed, 31 insertions(+), 35 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index 0def8b3106f4..77f50749acb1 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
  * Author:  Daniel Martensson / daniel.martensson@stericsson.com
  *	    Dmitry.Tarnyagin  / dmitry.tarnyagin@stericsson.com
  * License terms: GNU General Public License (GPL) version 2.
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index 666891a9a248..e56b56c08b27 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -21,7 +21,7 @@
 #include <linux/debugfs.h>
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Sjur Brendeland<sjur.brandeland@stericsson.com>");
+MODULE_AUTHOR("Sjur Brendeland");
 MODULE_DESCRIPTION("CAIF serial device TTY line discipline");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_LDISC(N_CAIF);
diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c
index b71ce9bf0afb..c2cb852e13b8 100644
--- a/drivers/net/caif/caif_spi.c
+++ b/drivers/net/caif/caif_spi.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
  * Author:  Daniel Martensson / Daniel.Martensson@stericsson.com
  * License terms: GNU General Public License (GPL) version 2.
  */
diff --git a/drivers/net/caif/caif_spi_slave.c b/drivers/net/caif/caif_spi_slave.c
index e139e133fc79..98f03663aa22 100644
--- a/drivers/net/caif/caif_spi_slave.c
+++ b/drivers/net/caif/caif_spi_slave.c
@@ -1,6 +1,5 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
  * Author:  Daniel Martensson / Daniel.Martensson@stericsson.com
  * License terms: GNU General Public License (GPL) version 2.
  */
diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index ef2dd9438bb1..028b754ae9b1 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/ sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/caif_device.h b/include/net/caif/caif_device.h
index d02f044adb8a..d6e3c4267c81 100644
--- a/include/net/caif/caif_device.h
+++ b/include/net/caif/caif_device.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/ sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/caif_hsi.h b/include/net/caif/caif_hsi.h
index bcb9cc3ce98b..4795e817afe5 100644
--- a/include/net/caif/caif_hsi.h
+++ b/include/net/caif/caif_hsi.h
@@ -1,6 +1,5 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Contact: Sjur Brendeland / sjur.brandeland@stericsson.com
  * Author:  Daniel Martensson / daniel.martensson@stericsson.com
  *	    Dmitry.Tarnyagin  / dmitry.tarnyagin@stericsson.com
  * License terms: GNU General Public License (GPL) version 2
diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h
index 0f3a39125f90..94e5ed64dc6d 100644
--- a/include/net/caif/caif_layer.h
+++ b/include/net/caif/caif_layer.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland / sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 90b4ff8bad83..70bfd017581f 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h
index 9e5425b4a1d7..f2ae33d23baf 100644
--- a/include/net/caif/cfctrl.h
+++ b/include/net/caif/cfctrl.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cffrml.h b/include/net/caif/cffrml.h
index afac1a48cce7..a06e33fbaa8b 100644
--- a/include/net/caif/cffrml.h
+++ b/include/net/caif/cffrml.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cfmuxl.h b/include/net/caif/cfmuxl.h
index 5847a196b8ad..752999572f21 100644
--- a/include/net/caif/cfmuxl.h
+++ b/include/net/caif/cfmuxl.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cfpkt.h b/include/net/caif/cfpkt.h
index 83a89ba3005b..1c1ad46250d5 100644
--- a/include/net/caif/cfpkt.h
+++ b/include/net/caif/cfpkt.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cfserl.h b/include/net/caif/cfserl.h
index f121299a3427..b5b020f3c72e 100644
--- a/include/net/caif/cfserl.h
+++ b/include/net/caif/cfserl.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index 0f5905241843..cd47705c2cc3 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/uapi/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h
index 3f3bac6af7bc..586e9f98184f 100644
--- a/include/uapi/linux/caif/caif_socket.h
+++ b/include/uapi/linux/caif/caif_socket.h
@@ -1,7 +1,7 @@
 /* linux/caif_socket.h
  * CAIF Definitions for CAIF socket and network layer
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	 Sjur Brendeland/ sjur.brandeland@stericsson.com
+ * Author:	 Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/include/uapi/linux/caif/if_caif.h b/include/uapi/linux/caif/if_caif.h
index 5e7eed4edf51..7618aabe8c6b 100644
--- a/include/uapi/linux/caif/if_caif.h
+++ b/include/uapi/linux/caif/if_caif.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/ sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index df6d56d8689a..1f9ece1a9c34 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -1,7 +1,7 @@
 /*
  * CAIF Interface registration.
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  *
  * Borrowed heavily from file: pn_dev.c. Thanks to Remi Denis-Courmont
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 630b8be6e748..05a41c7ec304 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index d76278d644b8..942e00a425fd 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -1,7 +1,7 @@
 /*
  * CAIF USB handler
  * Copyright (C) ST-Ericsson AB 2011
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  *
  */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 246ac3aa8de5..fa39fc298708 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 9cd057c59c59..2bd4b58f4372 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 2914659eb9b2..7aae0b56829e 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index a63f4a5f5aff..3bdddb32d55a 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index 204c5e226a61..8bc7caa28e64 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -2,7 +2,7 @@
  * CAIF Framing Layer.
  *
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 154d9f8f964c..8c5d6386319f 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index e8f9c149504d..6493351f39c6 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index db51830c8587..61d7617d9249 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index 147c232b1285..ce60f06d76de 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 95f7f5ea30ef..353f793d1b3b 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 86d2dadb4b73..1728fa4471cf 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 910ab0661f66..262224581efa 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index a8e2a2d758a5..b3b110e8a350 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Author:	Sjur Brendeland
  * License terms: GNU General Public License (GPL) version 2
  */
 
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 26a4e4e3a767..89586c05a479 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
- * Authors:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * Authors:	Sjur Brendeland
  *		Daniel Martensson / Daniel.Martensson@stericsson.com
  * License terms: GNU General Public License (GPL) version 2
  */
-- 
cgit 


From 7276d5d743d775388bf382cd7bdea1a14e486d32 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 23 Apr 2013 00:39:30 +0000
Subject: packet: minor: convert status bits into shifting format

This makes it more readable and clearer what bits are still free to
use. The compiler reduces this to a constant for us anyway.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_packet.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 8136658ea477..4dfc234d80e5 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -86,19 +86,19 @@ struct tpacket_auxdata {
 };
 
 /* Rx ring - header status */
-#define TP_STATUS_KERNEL	0x0
-#define TP_STATUS_USER		0x1
-#define TP_STATUS_COPY		0x2
-#define TP_STATUS_LOSING	0x4
-#define TP_STATUS_CSUMNOTREADY	0x8
-#define TP_STATUS_VLAN_VALID   0x10 /* auxdata has valid tp_vlan_tci */
-#define TP_STATUS_BLK_TMO	0x20
+#define TP_STATUS_KERNEL	      0
+#define TP_STATUS_USER		(1 << 0)
+#define TP_STATUS_COPY		(1 << 1)
+#define TP_STATUS_LOSING	(1 << 2)
+#define TP_STATUS_CSUMNOTREADY	(1 << 3)
+#define TP_STATUS_VLAN_VALID	(1 << 4) /* auxdata has valid tp_vlan_tci */
+#define TP_STATUS_BLK_TMO	(1 << 5)
 
 /* Tx ring - header status */
-#define TP_STATUS_AVAILABLE	0x0
-#define TP_STATUS_SEND_REQUEST	0x1
-#define TP_STATUS_SENDING	0x2
-#define TP_STATUS_WRONG_FORMAT	0x4
+#define TP_STATUS_AVAILABLE	      0
+#define TP_STATUS_SEND_REQUEST	(1 << 0)
+#define TP_STATUS_SENDING	(1 << 1)
+#define TP_STATUS_WRONG_FORMAT	(1 << 2)
 
 /* Rx ring - feature request bits */
 #define TP_FT_REQ_FILL_RXHASH	0x1
-- 
cgit 


From b9c32fb2717094231b31a7d7dcf5fd7f3638ac2f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 23 Apr 2013 00:39:31 +0000
Subject: packet: if hw/sw ts enabled in rx/tx ring, report which ts we got

Currently, there is no way to find out which timestamp is reported in
tpacket{,2,3}_hdr's tp_sec, tp_{n,u}sec members. It can be one of
SOF_TIMESTAMPING_SYS_HARDWARE, SOF_TIMESTAMPING_RAW_HARDWARE,
SOF_TIMESTAMPING_SOFTWARE, or a fallback variant late call from the
PF_PACKET code in software.

Therefore, report in the tp_status member of the ring buffer which
timestamp has been reported for RX and TX path. This should not break
anything for the following reasons: i) in RX ring path, the user needs
to test for tp_status & TP_STATUS_USER, and later for other flags as
well such as TP_STATUS_VLAN_VALID et al, so adding other flags will
do no harm; ii) in TX ring path, time stamps with PACKET_TIMESTAMP
socketoption are not available resp. had no effect except that the
application setting this is buggy. Next to TP_STATUS_AVAILABLE, the
user also should check for other flags such as TP_STATUS_WRONG_FORMAT
to reclaim frames to the application. Thus, in case TX ts are turned
off (default case), nothing happens to the application logic, and in
case we want to use this new feature, we now can also check which of
the ts source is reported in the status field as provided in the docs.

Reported-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_packet.h |  5 +++++
 net/packet/af_packet.c         | 36 +++++++++++++++++++++++-------------
 2 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 4dfc234d80e5..b950c02030c0 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -100,6 +100,11 @@ struct tpacket_auxdata {
 #define TP_STATUS_SENDING	(1 << 1)
 #define TP_STATUS_WRONG_FORMAT	(1 << 2)
 
+/* Rx and Tx ring - header status */
+#define TP_STATUS_TS_SOFTWARE		(1 << 29)
+#define TP_STATUS_TS_SYS_HARDWARE	(1 << 30)
+#define TP_STATUS_TS_RAW_HARDWARE	(1 << 31)
+
 /* Rx ring - feature request bits */
 #define TP_FT_REQ_FILL_RXHASH	0x1
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9d46a07a15c0..ba8309a3e01b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -339,34 +339,35 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
 	}
 }
 
-static bool tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
-				  unsigned int flags)
+static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
+				   unsigned int flags)
 {
 	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
 	if (shhwtstamps) {
 		if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
 		    ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
-			return true;
+			return TP_STATUS_TS_SYS_HARDWARE;
 		if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
 		    ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
-			return true;
+			return TP_STATUS_TS_RAW_HARDWARE;
 	}
 
 	if (ktime_to_timespec_cond(skb->tstamp, ts))
-		return true;
+		return TP_STATUS_TS_SOFTWARE;
 
-	return false;
+	return 0;
 }
 
-static void __packet_set_timestamp(struct packet_sock *po, void *frame,
-				   struct sk_buff *skb)
+static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
+				    struct sk_buff *skb)
 {
 	union tpacket_uhdr h;
 	struct timespec ts;
+	__u32 ts_status;
 
-	if (!tpacket_get_timestamp(skb, &ts, po->tp_tstamp))
-		return;
+	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
+		return 0;
 
 	h.raw = frame;
 	switch (po->tp_version) {
@@ -387,6 +388,8 @@ static void __packet_set_timestamp(struct packet_sock *po, void *frame,
 	/* one flush is safe, as both fields always lie on the same cacheline */
 	flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
 	smp_wmb();
+
+	return ts_status;
 }
 
 static void *packet_lookup_frame(struct packet_sock *po,
@@ -1721,6 +1724,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	unsigned short macoff, netoff, hdrlen;
 	struct sk_buff *copy_skb = NULL;
 	struct timespec ts;
+	__u32 ts_status;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -1803,9 +1807,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
-	if (!tpacket_get_timestamp(skb, &ts, po->tp_tstamp))
+
+	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
 		getnstimeofday(&ts);
 
+	status |= ts_status;
+
 	switch (po->tp_version) {
 	case TPACKET_V1:
 		h.h1->tp_len = skb->len;
@@ -1905,11 +1912,14 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
 	void *ph;
 
 	if (likely(po->tx_ring.pg_vec)) {
+		__u32 ts;
+
 		ph = skb_shinfo(skb)->destructor_arg;
 		BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
 		atomic_dec(&po->tx_ring.pending);
-		__packet_set_timestamp(po, ph, skb);
-		__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
+
+		ts = __packet_set_timestamp(po, ph, skb);
+		__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
 	}
 
 	sock_wfree(skb);
-- 
cgit 


From 4ed7e7bae6a4edc90bfa82b55716e4000b584436 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 20 Apr 2013 12:07:16 -0300
Subject: [media] videodev2.h: Remove the unused old V4L1 buffer types

Those aren't used anywhere for a long time. Drop it.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/uapi/linux/videodev2.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 97fb392bb2d9..f40b41c7e108 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -72,27 +72,6 @@
 #define VIDEO_MAX_FRAME               32
 #define VIDEO_MAX_PLANES               8
 
-#ifndef __KERNEL__
-
-/* These defines are V4L1 specific and should not be used with the V4L2 API!
-   They will be removed from this header in the future. */
-
-#define VID_TYPE_CAPTURE	1	/* Can capture */
-#define VID_TYPE_TUNER		2	/* Can tune */
-#define VID_TYPE_TELETEXT	4	/* Does teletext */
-#define VID_TYPE_OVERLAY	8	/* Overlay onto frame buffer */
-#define VID_TYPE_CHROMAKEY	16	/* Overlay by chromakey */
-#define VID_TYPE_CLIPPING	32	/* Can clip */
-#define VID_TYPE_FRAMERAM	64	/* Uses the frame buffer memory */
-#define VID_TYPE_SCALES		128	/* Scalable */
-#define VID_TYPE_MONOCHROME	256	/* Monochrome only */
-#define VID_TYPE_SUBCAPTURE	512	/* Can capture subareas of the image */
-#define VID_TYPE_MPEG_DECODER	1024	/* Can decode MPEG streams */
-#define VID_TYPE_MPEG_ENCODER	2048	/* Can encode MPEG streams */
-#define VID_TYPE_MJPEG_DECODER	4096	/* Can decode MJPEG streams */
-#define VID_TYPE_MJPEG_ENCODER	8192	/* Can encode MJPEG streams */
-#endif
-
 /*
  *	M I S C E L L A N E O U S
  */
-- 
cgit 


From 2171364d1a92d0a101b455315de7a92efb566008 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Wed, 17 Apr 2013 17:33:11 +0000
Subject: powerpc: Add HWCAP2 aux entry

We are currently out of free bits in AT_HWCAP. With POWER8, we have
several hardware features that we need to advertise.

Tested on POWER and x86.

Signed-off-by: Michael Neuling <michael@neuling.org>
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h | 1 +
 arch/powerpc/include/asm/elf.h      | 1 +
 fs/binfmt_elf.c                     | 3 +++
 fs/binfmt_elf_fdpic.c               | 8 ++++----
 include/uapi/linux/auxvec.h         | 1 +
 5 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index fb3245e928ea..ccadad6db4e4 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -52,6 +52,7 @@ struct cpu_spec {
 	char		*cpu_name;
 	unsigned long	cpu_features;		/* Kernel features */
 	unsigned int	cpu_user_features;	/* Userland features */
+	unsigned int	cpu_user_features2;	/* Userland features v2 */
 	unsigned int	mmu_features;		/* MMU features */
 
 	/* cache line sizes */
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index ac9790fc3836..cc0655a702a7 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -61,6 +61,7 @@ typedef elf_vrregset_t elf_fpxregset_t;
    instruction set this cpu supports.  This could be done in userspace,
    but it's not easy, and we've already done it here.  */
 # define ELF_HWCAP	(cur_cpu_spec->cpu_user_features)
+# define ELF_HWCAP2	(cur_cpu_spec->cpu_user_features2)
 
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 86af964c2425..f1c64a27257e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -240,6 +240,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
 	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
  	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
 	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
+#ifdef ELF_HWCAP2
+	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
+#endif
 	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
 	if (k_platform) {
 		NEW_AUX_ENT(AT_PLATFORM,
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9c13e023e2b7..bf2381d00132 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -483,7 +483,6 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 	size_t platform_len = 0, len;
 	char *k_platform, *k_base_platform;
 	char __user *u_platform, *u_base_platform, *p;
-	long hwcap;
 	int loop;
 	int nr;	/* reset for each csp adjustment */
 
@@ -502,8 +501,6 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 		return -EFAULT;
 #endif
 
-	hwcap = ELF_HWCAP;
-
 	/*
 	 * If this architecture has a platform capability string, copy it
 	 * to userspace.  In some cases (Sparc), this info is impossible
@@ -617,7 +614,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
 
 	nr = 0;
 	csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
-	NEW_AUX_ENT(AT_HWCAP,	hwcap);
+	NEW_AUX_ENT(AT_HWCAP,	ELF_HWCAP);
+#ifdef ELF_HWCAP2
+	NEW_AUX_ENT(AT_HWCAP2,	ELF_HWCAP2);
+#endif
 	NEW_AUX_ENT(AT_PAGESZ,	PAGE_SIZE);
 	NEW_AUX_ENT(AT_CLKTCK,	CLOCKS_PER_SEC);
 	NEW_AUX_ENT(AT_PHDR,	exec_params->ph_addr);
diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h
index 61594d598e7b..835c065cc7e1 100644
--- a/include/uapi/linux/auxvec.h
+++ b/include/uapi/linux/auxvec.h
@@ -28,6 +28,7 @@
 #define AT_BASE_PLATFORM 24	/* string identifying real platform, may
 				 * differ from AT_PLATFORM. */
 #define AT_RANDOM 25	/* address of 16 random bytes */
+#define AT_HWCAP2 26	/* extension of AT_HWCAP */
 
 #define AT_EXECFN  31	/* filename of program */
 
-- 
cgit 


From 948a902c842aaef49af9e48b00469229b04a43a9 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 15 Apr 2013 10:49:31 +0200
Subject: KVM: Drop __KVM_HAVE_IOAPIC condition on irq routing

We have a capability enquire system that allows user space to ask kvm
whether a feature is available.

The point behind this system is that we can have different kernel
configurations with different capabilities and user space can adjust
accordingly.

Because features can always be non existent, we can drop any #ifdefs
on CAP defines that could be used generically, like the irq routing
bits. These can be easily reused for non-IOAPIC systems as well.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/kvm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 74d0ff3dfd66..c741902c9e0b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -579,9 +579,7 @@ struct kvm_ppc_smmu_info {
 #ifdef __KVM_HAVE_PIT
 #define KVM_CAP_REINJECT_CONTROL 24
 #endif
-#ifdef __KVM_HAVE_IOAPIC
 #define KVM_CAP_IRQ_ROUTING 25
-#endif
 #define KVM_CAP_IRQ_INJECT_STATUS 26
 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
-- 
cgit 


From 852b6d57dc7fa378019786fa84727036e56839ea Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Fri, 12 Apr 2013 14:08:42 +0000
Subject: kvm: add device control API

Currently, devices that are emulated inside KVM are configured in a
hardcoded manner based on an assumption that any given architecture
only has one way to do it.  If there's any need to access device state,
it is done through inflexible one-purpose-only IOCTLs (e.g.
KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
cumbersome and depletes a limited numberspace.

This API provides a mechanism to instantiate a device of a certain
type, returning an ID that can be used to set/get attributes of the
device.  Attributes may include configuration parameters (e.g.
register base address), device state, operational commands, etc.  It
is similar to the ONE_REG API, except that it acts on devices rather
than vcpus.

Both device types and individual attributes can be tested without having
to create the device or get/set the attribute, without the need for
separately managing enumerated capabilities.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt        |  70 +++++++++++++++++
 Documentation/virtual/kvm/devices/README |   1 +
 include/linux/kvm_host.h                 |  35 +++++++++
 include/uapi/linux/kvm.h                 |  27 +++++++
 virt/kvm/kvm_main.c                      | 129 +++++++++++++++++++++++++++++++
 5 files changed, 262 insertions(+)
 create mode 100644 Documentation/virtual/kvm/devices/README

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a1f2200e43d0..66b58e494935 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2189,6 +2189,76 @@ header; first `n_valid' valid entries with contents from the data
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
+4.79 KVM_CREATE_DEVICE
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: vm ioctl
+Parameters: struct kvm_create_device (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device type is unknown or unsupported
+  EEXIST: Device already created, and this type of device may not
+          be instantiated multiple times
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
+Creates an emulated device in the kernel.  The file descriptor returned
+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
+
+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
+device type is supported (not necessarily whether it can be created
+in the current vm).
+
+Individual devices should not define flags.  Attributes should be used
+for specifying any behavior that is not implied by the device type
+number.
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: device ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+  EPERM:  The attribute cannot (currently) be accessed this way
+          (e.g. read-only attribute, or attribute that only makes
+          sense when the device is in a different state)
+
+  Other error conditions may be defined by individual device types.
+
+Gets/sets a specified piece of device configuration and/or state.  The
+semantics are device-specific.  See individual device documentation in
+the "devices" directory.  As with ONE_REG, the size of the data
+transferred is defined by the particular attribute.
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
+4.81 KVM_HAS_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: device ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+
+Tests whether a device supports a particular attribute.  A successful
+return indicates the attribute is implemented.  It does not necessarily
+indicate that the attribute can be read or written in the device's
+current state.  "addr" is ignored.
 
 4.77 KVM_ARM_VCPU_INIT
 
diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README
new file mode 100644
index 000000000000..34a69834124a
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/README
@@ -0,0 +1 @@
+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dcef724f4ba6..6dab6b5b3e3b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1064,6 +1064,41 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
+struct kvm_device_ops;
+
+struct kvm_device {
+	struct kvm_device_ops *ops;
+	struct kvm *kvm;
+	atomic_t users;
+	void *private;
+};
+
+/* create, destroy, and name are mandatory */
+struct kvm_device_ops {
+	const char *name;
+	int (*create)(struct kvm_device *dev, u32 type);
+
+	/*
+	 * Destroy is responsible for freeing dev.
+	 *
+	 * Destroy may be called before or after destructors are called
+	 * on emulated I/O regions, depending on whether a reference is
+	 * held by a vcpu or other kvm component that gets destroyed
+	 * after the emulated I/O.
+	 */
+	void (*destroy)(struct kvm_device *dev);
+
+	int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
+	int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
+	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
+	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
+		      unsigned long arg);
+};
+
+void kvm_device_get(struct kvm_device *dev);
+void kvm_device_put(struct kvm_device *dev);
+struct kvm_device *kvm_device_from_filp(struct file *filp);
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c741902c9e0b..38a0be0c199f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -666,6 +666,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_EPR 86
 #define KVM_CAP_ARM_PSCI 87
 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
+#define KVM_CAP_DEVICE_CTRL 89
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -818,6 +819,24 @@ struct kvm_arm_device_addr {
 	__u64 addr;
 };
 
+/*
+ * Device control API, available with KVM_CAP_DEVICE_CTRL
+ */
+#define KVM_CREATE_DEVICE_TEST		1
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
 /*
  * ioctls for VM fds
  */
@@ -906,6 +925,14 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
 
+/* ioctl for vm fd */
+#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
+
+/* ioctls for fds returned by KVM_CREATE_DEVICE */
+#define KVM_SET_DEVICE_ATTR	  _IOW(KVMIO,  0xe1, struct kvm_device_attr)
+#define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
+#define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
+
 /*
  * ioctls for vcpu fds
  */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f9492f3847d6..5f0d78c2537b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2159,6 +2159,117 @@ out:
 }
 #endif
 
+static int kvm_device_ioctl_attr(struct kvm_device *dev,
+				 int (*accessor)(struct kvm_device *dev,
+						 struct kvm_device_attr *attr),
+				 unsigned long arg)
+{
+	struct kvm_device_attr attr;
+
+	if (!accessor)
+		return -EPERM;
+
+	if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
+		return -EFAULT;
+
+	return accessor(dev, &attr);
+}
+
+static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
+			     unsigned long arg)
+{
+	struct kvm_device *dev = filp->private_data;
+
+	switch (ioctl) {
+	case KVM_SET_DEVICE_ATTR:
+		return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
+	case KVM_GET_DEVICE_ATTR:
+		return kvm_device_ioctl_attr(dev, dev->ops->get_attr, arg);
+	case KVM_HAS_DEVICE_ATTR:
+		return kvm_device_ioctl_attr(dev, dev->ops->has_attr, arg);
+	default:
+		if (dev->ops->ioctl)
+			return dev->ops->ioctl(dev, ioctl, arg);
+
+		return -ENOTTY;
+	}
+}
+
+void kvm_device_get(struct kvm_device *dev)
+{
+	atomic_inc(&dev->users);
+}
+
+void kvm_device_put(struct kvm_device *dev)
+{
+	if (atomic_dec_and_test(&dev->users))
+		dev->ops->destroy(dev);
+}
+
+static int kvm_device_release(struct inode *inode, struct file *filp)
+{
+	struct kvm_device *dev = filp->private_data;
+	struct kvm *kvm = dev->kvm;
+
+	kvm_device_put(dev);
+	kvm_put_kvm(kvm);
+	return 0;
+}
+
+static const struct file_operations kvm_device_fops = {
+	.unlocked_ioctl = kvm_device_ioctl,
+	.release = kvm_device_release,
+};
+
+struct kvm_device *kvm_device_from_filp(struct file *filp)
+{
+	if (filp->f_op != &kvm_device_fops)
+		return NULL;
+
+	return filp->private_data;
+}
+
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	struct kvm_device_ops *ops = NULL;
+	struct kvm_device *dev;
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int ret;
+
+	switch (cd->type) {
+	default:
+		return -ENODEV;
+	}
+
+	if (test)
+		return 0;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
+		return -ENOMEM;
+
+	dev->ops = ops;
+	dev->kvm = kvm;
+	atomic_set(&dev->users, 1);
+
+	ret = ops->create(dev, cd->type);
+	if (ret < 0) {
+		kfree(dev);
+		return ret;
+	}
+
+	ret = anon_inode_getfd(ops->name, &kvm_device_fops, dev, O_RDWR);
+	if (ret < 0) {
+		ops->destroy(dev);
+		return ret;
+	}
+
+	kvm_get_kvm(kvm);
+	cd->fd = ret;
+	return 0;
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2304,6 +2415,24 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */
+	case KVM_CREATE_DEVICE: {
+		struct kvm_create_device cd;
+
+		r = -EFAULT;
+		if (copy_from_user(&cd, argp, sizeof(cd)))
+			goto out;
+
+		r = kvm_ioctl_create_device(kvm, &cd);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &cd, sizeof(cd)))
+			goto out;
+
+		r = 0;
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
-- 
cgit 


From 5df554ad5b7522ea62b0ff9d5be35183494efc21 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Fri, 12 Apr 2013 14:08:46 +0000
Subject: kvm/ppc/mpic: in-kernel MPIC emulation

Hook the MPIC code up to the KVM interfaces, add locking, etc.

Signed-off-by: Scott Wood <scottwood@freescale.com>
[agraf: add stub function for kvmppc_mpic_set_epr, non-booke, 64bit]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/devices/mpic.txt |  37 ++
 arch/powerpc/include/asm/kvm_host.h        |   8 +-
 arch/powerpc/include/asm/kvm_ppc.h         |  17 +
 arch/powerpc/include/uapi/asm/kvm.h        |   8 +
 arch/powerpc/kvm/Kconfig                   |   9 +
 arch/powerpc/kvm/Makefile                  |   2 +
 arch/powerpc/kvm/booke.c                   |   8 +-
 arch/powerpc/kvm/mpic.c                    | 762 ++++++++++++++++++++++-------
 arch/powerpc/kvm/powerpc.c                 |  12 +-
 include/linux/kvm_host.h                   |   2 +
 include/uapi/linux/kvm.h                   |   3 +
 virt/kvm/kvm_main.c                        |   6 +
 12 files changed, 674 insertions(+), 200 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/mpic.txt

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt
new file mode 100644
index 000000000000..ce98e3264fa8
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/mpic.txt
@@ -0,0 +1,37 @@
+MPIC interrupt controller
+=========================
+
+Device types supported:
+  KVM_DEV_TYPE_FSL_MPIC_20     Freescale MPIC v2.0
+  KVM_DEV_TYPE_FSL_MPIC_42     Freescale MPIC v4.2
+
+Only one MPIC instance, of any type, may be instantiated.  The created
+MPIC will act as the system interrupt controller, connecting to each
+vcpu's interrupt inputs.
+
+Groups:
+  KVM_DEV_MPIC_GRP_MISC
+  Attributes:
+    KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
+      Base address of the 256 KiB MPIC register space.  Must be
+      naturally aligned.  A value of zero disables the mapping.
+      Reset value is zero.
+
+  KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
+    Access an MPIC register, as if the access were made from the guest.
+    "attr" is the byte offset into the MPIC register space.  Accesses
+    must be 4-byte aligned.
+
+    MSIs may be signaled by using this attribute group to write
+    to the relevant MSIIR.
+
+  KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
+    IRQ input line for each standard openpic source.  0 is inactive and 1
+    is active, regardless of interrupt sense.
+
+    For edge-triggered interrupts:  Writing 1 is considered an activating
+    edge, and writing 0 is ignored.  Reading returns 1 if a previously
+    signaled edge has not been acknowledged, and 0 otherwise.
+
+    "attr" is the IRQ number.  IRQ numbers for standard sources are the
+    byte offset of the relevant IVPR from EIVPR0, divided by 32.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1443768a6588..153c8c2b0f88 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -361,6 +361,11 @@ struct kvmppc_slb {
 #define KVMPPC_BOOKE_MAX_IAC	4
 #define KVMPPC_BOOKE_MAX_DAC	2
 
+/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */
+#define KVMPPC_EPR_NONE		0 /* EPR not supported */
+#define KVMPPC_EPR_USER		1 /* exit to userspace to fill EPR */
+#define KVMPPC_EPR_KERNEL	2 /* in-kernel irqchip */
+
 struct kvmppc_booke_debug_reg {
 	u32 dbcr0;
 	u32 dbcr1;
@@ -526,7 +531,7 @@ struct kvm_vcpu_arch {
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
-	u8 epr_enabled;
+	u8 epr_flags; /* KVMPPC_EPR_xxx */
 	u8 epr_needed;
 
 	u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
@@ -593,5 +598,6 @@ struct kvm_vcpu_arch {
 #define KVM_MMIO_REG_FQPR	0x0060
 
 #define __KVM_HAVE_ARCH_WQP
+#define __KVM_HAVE_CREATE_DEVICE
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bcc68b1afc66..3810f9c7616c 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -164,6 +164,8 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
 extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
 
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
@@ -245,6 +247,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
+struct openpic;
+void kvmppc_mpic_put(struct openpic *opp);
+
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 {
@@ -270,6 +275,18 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 #endif
 }
 
+#ifdef CONFIG_KVM_MPIC
+
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
+
+#else
+
+static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+}
+
+#endif /* CONFIG_KVM_MPIC */
+
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 			      struct kvm_config_tlb *cfg);
 int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 41d59d8bdfe8..02ad96606860 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -382,6 +382,14 @@ struct kvm_get_htab_header {
 	__u16	n_invalid;
 };
 
+/* Device control API: PPC-specific devices */
+#define KVM_DEV_MPIC_GRP_MISC		1
+#define   KVM_DEV_MPIC_BASE_ADDR	0	/* 64-bit */
+
+#define KVM_DEV_MPIC_GRP_REGISTER	2	/* 32-bit */
+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE	3	/* 32-bit */
+
+/* One-Reg API: PPC-specific registers */
 #define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
 #define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
 #define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 448952035b6e..f47e95e0b6de 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -151,6 +151,15 @@ config KVM_E500MC
 
 	  If unsure, say N.
 
+config KVM_MPIC
+	bool "KVM in-kernel MPIC emulation"
+	depends on KVM
+	help
+	  Enable support for emulating MPIC devices inside the
+          host kernel, rather than relying on userspace to emulate.
+          Currently, support is limited to certain versions of
+          Freescale's MPIC implementation.
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index b772eded8c26..4a2277a221bb 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -103,6 +103,8 @@ kvm-book3s_32-objs := \
 	book3s_32_mmu.o
 kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
 
+kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
+
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
 obj-$(CONFIG_KVM_440) += kvm.o
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 02756537cd90..4da11ed48c59 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -346,7 +346,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		keep_irq = true;
 	}
 
-	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled)
+	if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_flags)
 		update_epr = true;
 
 	switch (priority) {
@@ -427,8 +427,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
 			set_guest_dear(vcpu, vcpu->arch.queued_dear);
-		if (update_epr == true)
-			kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+		if (update_epr == true) {
+			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
+				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+		}
 
 		new_msr &= msr_mask;
 #if defined(CONFIG_64BIT)
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 1df67aed7a91..cb451b91e342 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -23,6 +23,19 @@
  * THE SOFTWARE.
  */
 
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/anon_inodes.h>
+#include <asm/uaccess.h>
+#include <asm/mpic.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm_host.h>
+#include <asm/kvm_ppc.h>
+#include "iodev.h"
+
 #define MAX_CPU     32
 #define MAX_SRC     256
 #define MAX_TMR     4
@@ -36,6 +49,7 @@
 #define OPENPIC_FLAG_ILR          (2 << 0)
 
 /* OpenPIC address map */
+#define OPENPIC_REG_SIZE             0x40000
 #define OPENPIC_GLB_REG_START        0x0
 #define OPENPIC_GLB_REG_SIZE         0x10F0
 #define OPENPIC_TMR_REG_START        0x10F0
@@ -89,6 +103,7 @@ static struct fsl_mpic_info fsl_mpic_42 = {
 #define ILR_INTTGT_INT    0x00
 #define ILR_INTTGT_CINT   0x01	/* critical */
 #define ILR_INTTGT_MCP    0x02	/* machine check */
+#define NUM_OUTPUTS       3
 
 #define MSIIR_OFFSET       0x140
 #define MSIIR_SRS_SHIFT    29
@@ -98,18 +113,19 @@ static struct fsl_mpic_info fsl_mpic_42 = {
 
 static int get_current_cpu(void)
 {
-	CPUState *cpu_single_cpu;
-
-	if (!cpu_single_env)
-		return -1;
-
-	cpu_single_cpu = ENV_GET_CPU(cpu_single_env);
-	return cpu_single_cpu->cpu_index;
+#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
+	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
+	return vcpu ? vcpu->vcpu_id : -1;
+#else
+	/* XXX */
+	return -1;
+#endif
 }
 
-static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx);
-static void openpic_cpu_write_internal(void *opaque, gpa_t addr,
-				       uint32_t val, int idx);
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+				      u32 val, int idx);
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+				     u32 *ptr, int idx);
 
 enum irq_type {
 	IRQ_TYPE_NORMAL = 0,
@@ -131,7 +147,7 @@ struct irq_source {
 	uint32_t idr;		/* IRQ destination register */
 	uint32_t destmask;	/* bitmap of CPU destinations */
 	int last_cpu;
-	int output;		/* IRQ level, e.g. OPENPIC_OUTPUT_INT */
+	int output;		/* IRQ level, e.g. ILR_INTTGT_INT */
 	int pending;		/* TRUE if IRQ is pending */
 	enum irq_type type;
 	bool level:1;		/* level-triggered */
@@ -158,16 +174,27 @@ struct irq_source {
 #define IDR_CI      0x40000000	/* critical interrupt */
 
 struct irq_dest {
+	struct kvm_vcpu *vcpu;
+
 	int32_t ctpr;		/* CPU current task priority */
 	struct irq_queue raised;
 	struct irq_queue servicing;
-	qemu_irq *irqs;
 
 	/* Count of IRQ sources asserting on non-INT outputs */
-	uint32_t outputs_active[OPENPIC_OUTPUT_NB];
+	uint32_t outputs_active[NUM_OUTPUTS];
 };
 
 struct openpic {
+	struct kvm *kvm;
+	struct kvm_device *dev;
+	struct kvm_io_device mmio;
+	struct list_head mmio_regions;
+	atomic_t users;
+	bool mmio_mapped;
+
+	gpa_t reg_base;
+	spinlock_t lock;
+
 	/* Behavior control */
 	struct fsl_mpic_info *fsl;
 	uint32_t model;
@@ -208,6 +235,47 @@ struct openpic {
 	uint32_t irq_msi;
 };
 
+
+static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
+			   int output)
+{
+	struct kvm_interrupt irq = {
+		.irq = KVM_INTERRUPT_SET_LEVEL,
+	};
+
+	if (!dst->vcpu) {
+		pr_debug("%s: destination cpu %d does not exist\n",
+			 __func__, (int)(dst - &opp->dst[0]));
+		return;
+	}
+
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id,
+		output);
+
+	if (output != ILR_INTTGT_INT)	/* TODO */
+		return;
+
+	kvm_vcpu_ioctl_interrupt(dst->vcpu, &irq);
+}
+
+static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
+			   int output)
+{
+	if (!dst->vcpu) {
+		pr_debug("%s: destination cpu %d does not exist\n",
+			 __func__, (int)(dst - &opp->dst[0]));
+		return;
+	}
+
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id,
+		output);
+
+	if (output != ILR_INTTGT_INT)	/* TODO */
+		return;
+
+	kvmppc_core_dequeue_external(dst->vcpu);
+}
+
 static inline void IRQ_setbit(struct irq_queue *q, int n_IRQ)
 {
 	set_bit(n_IRQ, q->queue);
@@ -268,7 +336,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
 	pr_debug("%s: IRQ %d active %d was %d\n",
 		__func__, n_IRQ, active, was_active);
 
-	if (src->output != OPENPIC_OUTPUT_INT) {
+	if (src->output != ILR_INTTGT_INT) {
 		pr_debug("%s: output %d irq %d active %d was %d count %d\n",
 			__func__, src->output, n_IRQ, active, was_active,
 			dst->outputs_active[src->output]);
@@ -282,14 +350,14 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
 			    dst->outputs_active[src->output]++ == 0) {
 				pr_debug("%s: Raise OpenPIC output %d cpu %d irq %d\n",
 					__func__, src->output, n_CPU, n_IRQ);
-				qemu_irq_raise(dst->irqs[src->output]);
+				mpic_irq_raise(opp, dst, src->output);
 			}
 		} else {
 			if (was_active &&
 			    --dst->outputs_active[src->output] == 0) {
 				pr_debug("%s: Lower OpenPIC output %d cpu %d irq %d\n",
 					__func__, src->output, n_CPU, n_IRQ);
-				qemu_irq_lower(dst->irqs[src->output]);
+				mpic_irq_lower(opp, dst, src->output);
 			}
 		}
 
@@ -322,8 +390,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
 		} else {
 			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d/%d\n",
 				__func__, n_CPU, n_IRQ, dst->raised.next);
-			qemu_irq_raise(opp->dst[n_CPU].
-				       irqs[OPENPIC_OUTPUT_INT]);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
 		}
 	} else {
 		IRQ_get_next(opp, &dst->servicing);
@@ -338,8 +405,7 @@ static void IRQ_local_pipe(struct openpic *opp, int n_CPU, int n_IRQ,
 			pr_debug("%s: IRQ %d inactive, current prio %d/%d, CPU %d\n",
 				__func__, n_IRQ, dst->ctpr,
 				dst->servicing.priority, n_CPU);
-			qemu_irq_lower(opp->dst[n_CPU].
-				       irqs[OPENPIC_OUTPUT_INT]);
+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
 		}
 	}
 }
@@ -415,8 +481,8 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
 	struct irq_source *src;
 
 	if (n_IRQ >= MAX_IRQ) {
-		pr_err("%s: IRQ %d out of range\n", __func__, n_IRQ);
-		abort();
+		WARN_ONCE(1, "%s: IRQ %d out of range\n", __func__, n_IRQ);
+		return;
 	}
 
 	src = &opp->src[n_IRQ];
@@ -433,7 +499,7 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
 			openpic_update_irq(opp, n_IRQ);
 		}
 
-		if (src->output != OPENPIC_OUTPUT_INT) {
+		if (src->output != ILR_INTTGT_INT) {
 			/* Edge-triggered interrupts shouldn't be used
 			 * with non-INT delivery, but just in case,
 			 * try to make it do something sane rather than
@@ -446,15 +512,13 @@ static void openpic_set_irq(void *opaque, int n_IRQ, int level)
 	}
 }
 
-static void openpic_reset(DeviceState *d)
+static void openpic_reset(struct openpic *opp)
 {
-	struct openpic *opp = FROM_SYSBUS(typeof(*opp), SYS_BUS_DEVICE(d));
 	int i;
 
 	opp->gcr = GCR_RESET;
 	/* Initialise controller registers */
 	opp->frr = ((opp->nb_irqs - 1) << FRR_NIRQ_SHIFT) |
-	    ((opp->nb_cpus - 1) << FRR_NCPU_SHIFT) |
 	    (opp->vid << FRR_VID_SHIFT);
 
 	opp->pir = 0;
@@ -504,7 +568,7 @@ static inline uint32_t read_IRQreg_idr(struct openpic *opp, int n_IRQ)
 static inline uint32_t read_IRQreg_ilr(struct openpic *opp, int n_IRQ)
 {
 	if (opp->flags & OPENPIC_FLAG_ILR)
-		return output_to_inttgt(opp->src[n_IRQ].output);
+		return opp->src[n_IRQ].output;
 
 	return 0xffffffff;
 }
@@ -539,7 +603,7 @@ static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
 					__func__);
 			}
 
-			src->output = OPENPIC_OUTPUT_CINT;
+			src->output = ILR_INTTGT_CINT;
 			src->nomask = true;
 			src->destmask = 0;
 
@@ -550,7 +614,7 @@ static inline void write_IRQreg_idr(struct openpic *opp, int n_IRQ,
 					src->destmask |= 1UL << i;
 			}
 		} else {
-			src->output = OPENPIC_OUTPUT_INT;
+			src->output = ILR_INTTGT_INT;
 			src->nomask = false;
 			src->destmask = src->idr & normal_mask;
 		}
@@ -565,7 +629,7 @@ static inline void write_IRQreg_ilr(struct openpic *opp, int n_IRQ,
 	if (opp->flags & OPENPIC_FLAG_ILR) {
 		struct irq_source *src = &opp->src[n_IRQ];
 
-		src->output = inttgt_to_output(val & ILR_INTTGT_MASK);
+		src->output = val & ILR_INTTGT_MASK;
 		pr_debug("Set ILR %d to 0x%08x, output %d\n", n_IRQ, src->idr,
 			src->output);
 
@@ -614,34 +678,23 @@ static inline void write_IRQreg_ivpr(struct openpic *opp, int n_IRQ,
 
 static void openpic_gcr_write(struct openpic *opp, uint64_t val)
 {
-	bool mpic_proxy = false;
-
 	if (val & GCR_RESET) {
-		openpic_reset(&opp->busdev.qdev);
+		openpic_reset(opp);
 		return;
 	}
 
 	opp->gcr &= ~opp->mpic_mode_mask;
 	opp->gcr |= val & opp->mpic_mode_mask;
-
-	/* Set external proxy mode */
-	if ((val & opp->mpic_mode_mask) == GCR_MODE_PROXY)
-		mpic_proxy = true;
-
-	ppce500_set_mpic_proxy(mpic_proxy);
 }
 
-static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val,
-			      unsigned len)
+static int openpic_gbl_write(void *opaque, gpa_t addr, u32 val)
 {
 	struct openpic *opp = opaque;
-	struct irq_dest *dst;
-	int idx;
+	int err = 0;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n",
-		__func__, addr, val);
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
 	if (addr & 0xF)
-		return;
+		return 0;
 
 	switch (addr) {
 	case 0x00:	/* Block Revision Register1 (BRR1) is Readonly */
@@ -654,7 +707,8 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val,
 	case 0x90:
 	case 0xA0:
 	case 0xB0:
-		openpic_cpu_write_internal(opp, addr, val, get_current_cpu());
+		err = openpic_cpu_write_internal(opp, addr, val,
+						 get_current_cpu());
 		break;
 	case 0x1000:		/* FRR */
 		break;
@@ -664,21 +718,11 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val,
 	case 0x1080:		/* VIR */
 		break;
 	case 0x1090:		/* PIR */
-		for (idx = 0; idx < opp->nb_cpus; idx++) {
-			if ((val & (1 << idx)) && !(opp->pir & (1 << idx))) {
-				pr_debug("Raise OpenPIC RESET output for CPU %d\n",
-					idx);
-				dst = &opp->dst[idx];
-				qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_RESET]);
-			} else if (!(val & (1 << idx)) &&
-				   (opp->pir & (1 << idx))) {
-				pr_debug("Lower OpenPIC RESET output for CPU %d\n",
-					idx);
-				dst = &opp->dst[idx];
-				qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_RESET]);
-			}
-		}
-		opp->pir = val;
+		/*
+		 * This register is used to reset a CPU core --
+		 * let userspace handle it.
+		 */
+		err = -ENXIO;
 		break;
 	case 0x10A0:		/* IPI_IVPR */
 	case 0x10B0:
@@ -695,21 +739,25 @@ static void openpic_gbl_write(void *opaque, gpa_t addr, uint64_t val,
 	default:
 		break;
 	}
+
+	return err;
 }
 
-static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len)
+static int openpic_gbl_read(void *opaque, gpa_t addr, u32 *ptr)
 {
 	struct openpic *opp = opaque;
-	uint32_t retval;
+	u32 retval;
+	int err = 0;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr);
+	pr_debug("%s: addr %#llx\n", __func__, addr);
 	retval = 0xFFFFFFFF;
 	if (addr & 0xF)
-		return retval;
+		goto out;
 
 	switch (addr) {
 	case 0x1000:		/* FRR */
 		retval = opp->frr;
+		retval |= (opp->nb_cpus - 1) << FRR_NCPU_SHIFT;
 		break;
 	case 0x1020:		/* GCR */
 		retval = opp->gcr;
@@ -731,8 +779,8 @@ static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len)
 	case 0x90:
 	case 0xA0:
 	case 0xB0:
-		retval =
-		    openpic_cpu_read_internal(opp, addr, get_current_cpu());
+		err = openpic_cpu_read_internal(opp, addr,
+			&retval, get_current_cpu());
 		break;
 	case 0x10A0:		/* IPI_IVPR */
 	case 0x10B0:
@@ -750,28 +798,28 @@ static uint64_t openpic_gbl_read(void *opaque, gpa_t addr, unsigned len)
 	default:
 		break;
 	}
-	pr_debug("%s: => 0x%08x\n", __func__, retval);
 
-	return retval;
+out:
+	pr_debug("%s: => 0x%08x\n", __func__, retval);
+	*ptr = retval;
+	return err;
 }
 
-static void openpic_tmr_write(void *opaque, gpa_t addr, uint64_t val,
-			      unsigned len)
+static int openpic_tmr_write(void *opaque, gpa_t addr, u32 val)
 {
 	struct openpic *opp = opaque;
 	int idx;
 
 	addr += 0x10f0;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n",
-		__func__, addr, val);
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
 	if (addr & 0xF)
-		return;
+		return 0;
 
 	if (addr == 0x10f0) {
 		/* TFRR */
 		opp->tfrr = val;
-		return;
+		return 0;
 	}
 
 	idx = (addr >> 6) & 0x3;
@@ -795,15 +843,17 @@ static void openpic_tmr_write(void *opaque, gpa_t addr, uint64_t val,
 		write_IRQreg_idr(opp, opp->irq_tim0 + idx, val);
 		break;
 	}
+
+	return 0;
 }
 
-static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len)
+static int openpic_tmr_read(void *opaque, gpa_t addr, u32 *ptr)
 {
 	struct openpic *opp = opaque;
 	uint32_t retval = -1;
 	int idx;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr);
+	pr_debug("%s: addr %#llx\n", __func__, addr);
 	if (addr & 0xF)
 		goto out;
 
@@ -813,6 +863,7 @@ static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len)
 		retval = opp->tfrr;
 		goto out;
 	}
+
 	switch (addr & 0x30) {
 	case 0x00:		/* TCCR */
 		retval = opp->timers[idx].tccr;
@@ -830,18 +881,16 @@ static uint64_t openpic_tmr_read(void *opaque, gpa_t addr, unsigned len)
 
 out:
 	pr_debug("%s: => 0x%08x\n", __func__, retval);
-
-	return retval;
+	*ptr = retval;
+	return 0;
 }
 
-static void openpic_src_write(void *opaque, gpa_t addr, uint64_t val,
-			      unsigned len)
+static int openpic_src_write(void *opaque, gpa_t addr, u32 val)
 {
 	struct openpic *opp = opaque;
 	int idx;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx " <= %08" PRIx64 "\n",
-		__func__, addr, val);
+	pr_debug("%s: addr %#llx <= %08x\n", __func__, addr, val);
 
 	addr = addr & 0xffff;
 	idx = addr >> 5;
@@ -857,15 +906,17 @@ static void openpic_src_write(void *opaque, gpa_t addr, uint64_t val,
 		write_IRQreg_ilr(opp, idx, val);
 		break;
 	}
+
+	return 0;
 }
 
-static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
+static int openpic_src_read(void *opaque, gpa_t addr, u32 *ptr)
 {
 	struct openpic *opp = opaque;
 	uint32_t retval;
 	int idx;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr);
+	pr_debug("%s: addr %#llx\n", __func__, addr);
 	retval = 0xFFFFFFFF;
 
 	addr = addr & 0xffff;
@@ -884,20 +935,19 @@ static uint64_t openpic_src_read(void *opaque, uint64_t addr, unsigned len)
 	}
 
 	pr_debug("%s: => 0x%08x\n", __func__, retval);
-	return retval;
+	*ptr = retval;
+	return 0;
 }
 
-static void openpic_msi_write(void *opaque, gpa_t addr, uint64_t val,
-			      unsigned size)
+static int openpic_msi_write(void *opaque, gpa_t addr, u32 val)
 {
 	struct openpic *opp = opaque;
 	int idx = opp->irq_msi;
 	int srs, ibs;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n",
-		__func__, addr, val);
+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
 	if (addr & 0xF)
-		return;
+		return 0;
 
 	switch (addr) {
 	case MSIIR_OFFSET:
@@ -911,17 +961,19 @@ static void openpic_msi_write(void *opaque, gpa_t addr, uint64_t val,
 		/* most registers are read-only, thus ignored */
 		break;
 	}
+
+	return 0;
 }
 
-static uint64_t openpic_msi_read(void *opaque, gpa_t addr, unsigned size)
+static int openpic_msi_read(void *opaque, gpa_t addr, u32 *ptr)
 {
 	struct openpic *opp = opaque;
-	uint64_t r = 0;
+	uint32_t r = 0;
 	int i, srs;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr);
+	pr_debug("%s: addr %#llx\n", __func__, addr);
 	if (addr & 0xF)
-		return -1;
+		return -ENXIO;
 
 	srs = addr >> 4;
 
@@ -945,45 +997,47 @@ static uint64_t openpic_msi_read(void *opaque, gpa_t addr, unsigned size)
 		break;
 	}
 
-	return r;
+	pr_debug("%s: => 0x%08x\n", __func__, r);
+	*ptr = r;
+	return 0;
 }
 
-static uint64_t openpic_summary_read(void *opaque, gpa_t addr, unsigned size)
+static int openpic_summary_read(void *opaque, gpa_t addr, u32 *ptr)
 {
-	uint64_t r = 0;
+	uint32_t r = 0;
 
-	pr_debug("%s: addr %#" HWADDR_PRIx "\n", __func__, addr);
+	pr_debug("%s: addr %#llx\n", __func__, addr);
 
 	/* TODO: EISR/EIMR */
 
-	return r;
+	*ptr = r;
+	return 0;
 }
 
-static void openpic_summary_write(void *opaque, gpa_t addr, uint64_t val,
-				  unsigned size)
+static int openpic_summary_write(void *opaque, gpa_t addr, u32 val)
 {
-	pr_debug("%s: addr %#" HWADDR_PRIx " <= 0x%08" PRIx64 "\n",
-		__func__, addr, val);
+	pr_debug("%s: addr %#llx <= 0x%08x\n", __func__, addr, val);
 
 	/* TODO: EISR/EIMR */
+	return 0;
 }
 
-static void openpic_cpu_write_internal(void *opaque, gpa_t addr,
-				       uint32_t val, int idx)
+static int openpic_cpu_write_internal(void *opaque, gpa_t addr,
+				      u32 val, int idx)
 {
 	struct openpic *opp = opaque;
 	struct irq_source *src;
 	struct irq_dest *dst;
 	int s_IRQ, n_IRQ;
 
-	pr_debug("%s: cpu %d addr %#" HWADDR_PRIx " <= 0x%08x\n", __func__, idx,
+	pr_debug("%s: cpu %d addr %#llx <= 0x%08x\n", __func__, idx,
 		addr, val);
 
 	if (idx < 0)
-		return;
+		return 0;
 
 	if (addr & 0xF)
-		return;
+		return 0;
 
 	dst = &opp->dst[idx];
 	addr &= 0xFF0;
@@ -1008,11 +1062,11 @@ static void openpic_cpu_write_internal(void *opaque, gpa_t addr,
 		if (dst->raised.priority <= dst->ctpr) {
 			pr_debug("%s: Lower OpenPIC INT output cpu %d due to ctpr\n",
 				__func__, idx);
-			qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]);
+			mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
 		} else if (dst->raised.priority > dst->servicing.priority) {
 			pr_debug("%s: Raise OpenPIC INT output cpu %d irq %d\n",
 				__func__, idx, dst->raised.next);
-			qemu_irq_raise(dst->irqs[OPENPIC_OUTPUT_INT]);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
 		}
 
 		break;
@@ -1043,18 +1097,22 @@ static void openpic_cpu_write_internal(void *opaque, gpa_t addr,
 		     IVPR_PRIORITY(src->ivpr) > dst->servicing.priority)) {
 			pr_debug("Raise OpenPIC INT output cpu %d irq %d\n",
 				idx, n_IRQ);
-			qemu_irq_raise(opp->dst[idx].irqs[OPENPIC_OUTPUT_INT]);
+			mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
 		}
 		break;
 	default:
 		break;
 	}
+
+	return 0;
 }
 
-static void openpic_cpu_write(void *opaque, gpa_t addr, uint64_t val,
-			      unsigned len)
+static int openpic_cpu_write(void *opaque, gpa_t addr, u32 val)
 {
-	openpic_cpu_write_internal(opaque, addr, val, (addr & 0x1f000) >> 12);
+	struct openpic *opp = opaque;
+
+	return openpic_cpu_write_internal(opp, addr, val,
+					 (addr & 0x1f000) >> 12);
 }
 
 static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
@@ -1064,7 +1122,7 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
 	int retval, irq;
 
 	pr_debug("Lower OpenPIC INT output\n");
-	qemu_irq_lower(dst->irqs[OPENPIC_OUTPUT_INT]);
+	mpic_irq_lower(opp, dst, ILR_INTTGT_INT);
 
 	irq = IRQ_get_next(opp, &dst->raised);
 	pr_debug("IACK: irq=%d\n", irq);
@@ -1107,20 +1165,21 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
 	return retval;
 }
 
-static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx)
+static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
+				     u32 *ptr, int idx)
 {
 	struct openpic *opp = opaque;
 	struct irq_dest *dst;
 	uint32_t retval;
 
-	pr_debug("%s: cpu %d addr %#" HWADDR_PRIx "\n", __func__, idx, addr);
+	pr_debug("%s: cpu %d addr %#llx\n", __func__, idx, addr);
 	retval = 0xFFFFFFFF;
 
 	if (idx < 0)
-		return retval;
+		goto out;
 
 	if (addr & 0xF)
-		return retval;
+		goto out;
 
 	dst = &opp->dst[idx];
 	addr &= 0xFF0;
@@ -1142,49 +1201,67 @@ static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx)
 	}
 	pr_debug("%s: => 0x%08x\n", __func__, retval);
 
-	return retval;
+out:
+	*ptr = retval;
+	return 0;
 }
 
-static uint64_t openpic_cpu_read(void *opaque, gpa_t addr, unsigned len)
+static int openpic_cpu_read(void *opaque, gpa_t addr, u32 *ptr)
 {
-	return openpic_cpu_read_internal(opaque, addr, (addr & 0x1f000) >> 12);
+	struct openpic *opp = opaque;
+
+	return openpic_cpu_read_internal(opp, addr, ptr,
+					 (addr & 0x1f000) >> 12);
 }
 
-static const struct kvm_io_device_ops openpic_glb_ops_be = {
+struct mem_reg {
+	struct list_head list;
+	int (*read)(void *opaque, gpa_t addr, u32 *ptr);
+	int (*write)(void *opaque, gpa_t addr, u32 val);
+	gpa_t start_addr;
+	int size;
+};
+
+static struct mem_reg openpic_gbl_mmio = {
 	.write = openpic_gbl_write,
 	.read = openpic_gbl_read,
+	.start_addr = OPENPIC_GLB_REG_START,
+	.size = OPENPIC_GLB_REG_SIZE,
 };
 
-static const struct kvm_io_device_ops openpic_tmr_ops_be = {
+static struct mem_reg openpic_tmr_mmio = {
 	.write = openpic_tmr_write,
 	.read = openpic_tmr_read,
+	.start_addr = OPENPIC_TMR_REG_START,
+	.size = OPENPIC_TMR_REG_SIZE,
 };
 
-static const struct kvm_io_device_ops openpic_cpu_ops_be = {
+static struct mem_reg openpic_cpu_mmio = {
 	.write = openpic_cpu_write,
 	.read = openpic_cpu_read,
+	.start_addr = OPENPIC_CPU_REG_START,
+	.size = OPENPIC_CPU_REG_SIZE,
 };
 
-static const struct kvm_io_device_ops openpic_src_ops_be = {
+static struct mem_reg openpic_src_mmio = {
 	.write = openpic_src_write,
 	.read = openpic_src_read,
+	.start_addr = OPENPIC_SRC_REG_START,
+	.size = OPENPIC_SRC_REG_SIZE,
 };
 
-static const struct kvm_io_device_ops openpic_msi_ops_be = {
+static struct mem_reg openpic_msi_mmio = {
 	.read = openpic_msi_read,
 	.write = openpic_msi_write,
+	.start_addr = OPENPIC_MSI_REG_START,
+	.size = OPENPIC_MSI_REG_SIZE,
 };
 
-static const struct kvm_io_device_ops openpic_summary_ops_be = {
+static struct mem_reg openpic_summary_mmio = {
 	.read = openpic_summary_read,
 	.write = openpic_summary_write,
-};
-
-struct mem_reg {
-	const char *name;
-	const struct kvm_io_device_ops *ops;
-	gpa_t start_addr;
-	int size;
+	.start_addr = OPENPIC_SUMMARY_REG_START,
+	.size = OPENPIC_SUMMARY_REG_SIZE,
 };
 
 static void fsl_common_init(struct openpic *opp)
@@ -1192,6 +1269,9 @@ static void fsl_common_init(struct openpic *opp)
 	int i;
 	int virq = MAX_SRC;
 
+	list_add(&openpic_msi_mmio.list, &opp->mmio_regions);
+	list_add(&openpic_summary_mmio.list, &opp->mmio_regions);
+
 	opp->vid = VID_REVISION_1_2;
 	opp->vir = VIR_GENERIC;
 	opp->vector_mask = 0xFFFF;
@@ -1205,11 +1285,10 @@ static void fsl_common_init(struct openpic *opp)
 	opp->irq_tim0 = virq;
 	virq += MAX_TMR;
 
-	assert(virq <= MAX_IRQ);
+	BUG_ON(virq > MAX_IRQ);
 
 	opp->irq_msi = 224;
 
-	msi_supported = true;
 	for (i = 0; i < opp->fsl->max_ext; i++)
 		opp->src[i].level = false;
 
@@ -1226,63 +1305,352 @@ static void fsl_common_init(struct openpic *opp)
 	}
 }
 
-static void map_list(struct openpic *opp, const struct mem_reg *list,
-		     int *count)
+static int kvm_mpic_read_internal(struct openpic *opp, gpa_t addr, u32 *ptr)
 {
-	while (list->name) {
-		assert(*count < ARRAY_SIZE(opp->sub_io_mem));
+	struct list_head *node;
 
-		memory_region_init_io(&opp->sub_io_mem[*count], list->ops, opp,
-				      list->name, list->size);
+	list_for_each(node, &opp->mmio_regions) {
+		struct mem_reg *mr = list_entry(node, struct mem_reg, list);
 
-		memory_region_add_subregion(&opp->mem, list->start_addr,
-					    &opp->sub_io_mem[*count]);
+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+			continue;
 
-		(*count)++;
-		list++;
+		return mr->read(opp, addr - mr->start_addr, ptr);
 	}
+
+	return -ENXIO;
 }
 
-static int openpic_init(SysBusDevice *dev)
+static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
 {
-	struct openpic *opp = FROM_SYSBUS(typeof(*opp), dev);
-	int i, j;
-	int list_count = 0;
-	static const struct mem_reg list_le[] = {
-		{"glb", &openpic_glb_ops_le,
-		 OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
-		{"tmr", &openpic_tmr_ops_le,
-		 OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
-		{"src", &openpic_src_ops_le,
-		 OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
-		{"cpu", &openpic_cpu_ops_le,
-		 OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
-		{NULL}
-	};
-	static const struct mem_reg list_be[] = {
-		{"glb", &openpic_glb_ops_be,
-		 OPENPIC_GLB_REG_START, OPENPIC_GLB_REG_SIZE},
-		{"tmr", &openpic_tmr_ops_be,
-		 OPENPIC_TMR_REG_START, OPENPIC_TMR_REG_SIZE},
-		{"src", &openpic_src_ops_be,
-		 OPENPIC_SRC_REG_START, OPENPIC_SRC_REG_SIZE},
-		{"cpu", &openpic_cpu_ops_be,
-		 OPENPIC_CPU_REG_START, OPENPIC_CPU_REG_SIZE},
-		{NULL}
-	};
-	static const struct mem_reg list_fsl[] = {
-		{"msi", &openpic_msi_ops_be,
-		 OPENPIC_MSI_REG_START, OPENPIC_MSI_REG_SIZE},
-		{"summary", &openpic_summary_ops_be,
-		 OPENPIC_SUMMARY_REG_START, OPENPIC_SUMMARY_REG_SIZE},
-		{NULL}
-	};
+	struct list_head *node;
+
+	list_for_each(node, &opp->mmio_regions) {
+		struct mem_reg *mr = list_entry(node, struct mem_reg, list);
+
+		if (mr->start_addr > addr || addr >= mr->start_addr + mr->size)
+			continue;
 
-	memory_region_init(&opp->mem, "openpic", 0x40000);
+		return mr->write(opp, addr - mr->start_addr, val);
+	}
+
+	return -ENXIO;
+}
+
+static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
+			 int len, void *ptr)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+	int ret;
+	union {
+		u32 val;
+		u8 bytes[4];
+	} u;
+
+	if (addr & (len - 1)) {
+		pr_debug("%s: bad alignment %llx/%d\n",
+			 __func__, addr, len);
+		return -EINVAL;
+	}
+
+	spin_lock_irq(&opp->lock);
+	ret = kvm_mpic_read_internal(opp, addr - opp->reg_base, &u.val);
+	spin_unlock_irq(&opp->lock);
+
+	/*
+	 * Technically only 32-bit accesses are allowed, but be nice to
+	 * people dumping registers a byte at a time -- it works in real
+	 * hardware (reads only, not writes).
+	 */
+	if (len == 4) {
+		*(u32 *)ptr = u.val;
+		pr_debug("%s: addr %llx ret %d len 4 val %x\n",
+			 __func__, addr, ret, u.val);
+	} else if (len == 1) {
+		*(u8 *)ptr = u.bytes[addr & 3];
+		pr_debug("%s: addr %llx ret %d len 1 val %x\n",
+			 __func__, addr, ret, u.bytes[addr & 3]);
+	} else {
+		pr_debug("%s: bad length %d\n", __func__, len);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
+			  int len, const void *ptr)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+	int ret;
+
+	if (len != 4) {
+		pr_debug("%s: bad length %d\n", __func__, len);
+		return -EOPNOTSUPP;
+	}
+	if (addr & 3) {
+		pr_debug("%s: bad alignment %llx/%d\n", __func__, addr, len);
+		return -EOPNOTSUPP;
+	}
+
+	spin_lock_irq(&opp->lock);
+	ret = kvm_mpic_write_internal(opp, addr - opp->reg_base,
+				      *(const u32 *)ptr);
+	spin_unlock_irq(&opp->lock);
+
+	pr_debug("%s: addr %llx ret %d val %x\n",
+		 __func__, addr, ret, *(const u32 *)ptr);
+
+	return ret;
+}
+
+static void kvm_mpic_dtor(struct kvm_io_device *this)
+{
+	struct openpic *opp = container_of(this, struct openpic, mmio);
+
+	opp->mmio_mapped = false;
+}
+
+static const struct kvm_io_device_ops mpic_mmio_ops = {
+	.read = kvm_mpic_read,
+	.write = kvm_mpic_write,
+	.destructor = kvm_mpic_dtor,
+};
+
+static void map_mmio(struct openpic *opp)
+{
+	BUG_ON(opp->mmio_mapped);
+	opp->mmio_mapped = true;
+
+	kvm_iodevice_init(&opp->mmio, &mpic_mmio_ops);
+
+	kvm_io_bus_register_dev(opp->kvm, KVM_MMIO_BUS,
+				opp->reg_base, OPENPIC_REG_SIZE,
+				&opp->mmio);
+}
+
+static void unmap_mmio(struct openpic *opp)
+{
+	BUG_ON(opp->mmio_mapped);
+	opp->mmio_mapped = false;
+
+	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+}
+
+static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
+{
+	u64 base;
+
+	if (copy_from_user(&base, (u64 __user *)(long)attr->addr, sizeof(u64)))
+		return -EFAULT;
+
+	if (base & 0x3ffff) {
+		pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx not aligned\n",
+			 __func__, base);
+		return -EINVAL;
+	}
+
+	if (base == opp->reg_base)
+		return 0;
+
+	mutex_lock(&opp->kvm->slots_lock);
+
+	unmap_mmio(opp);
+	opp->reg_base = base;
+
+	pr_debug("kvm mpic %s: KVM_DEV_MPIC_BASE_ADDR %08llx\n",
+		 __func__, base);
+
+	if (base == 0)
+		goto out;
+
+	map_mmio(opp);
+
+	mutex_unlock(&opp->kvm->slots_lock);
+out:
+	return 0;
+}
+
+#define ATTR_SET		0
+#define ATTR_GET		1
+
+static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type)
+{
+	int ret;
+
+	if (addr & 3)
+		return -ENXIO;
+
+	spin_lock_irq(&opp->lock);
+
+	if (type == ATTR_SET)
+		ret = kvm_mpic_write_internal(opp, addr, *val);
+	else
+		ret = kvm_mpic_read_internal(opp, addr, val);
+
+	spin_unlock_irq(&opp->lock);
+
+	pr_debug("%s: type %d addr %llx val %x\n", __func__, type, addr, *val);
+
+	return ret;
+}
+
+static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct openpic *opp = dev->private;
+	u32 attr32;
+
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			return set_base_addr(opp, attr);
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return access_reg(opp, attr->attr, &attr32, ATTR_SET);
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			return -EINVAL;
+
+		if (get_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		if (attr32 != 0 && attr32 != 1)
+			return -EINVAL;
+
+		spin_lock_irq(&opp->lock);
+		openpic_set_irq(opp, attr->attr, attr32);
+		spin_unlock_irq(&opp->lock);
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct openpic *opp = dev->private;
+	u64 attr64;
+	u32 attr32;
+	int ret;
+
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			mutex_lock(&opp->kvm->slots_lock);
+			attr64 = opp->reg_base;
+			mutex_unlock(&opp->kvm->slots_lock);
+
+			if (copy_to_user((u64 __user *)(long)attr->addr,
+					 &attr64, sizeof(u64)))
+				return -EFAULT;
+
+			return 0;
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		ret = access_reg(opp, attr->attr, &attr32, ATTR_GET);
+		if (ret)
+			return ret;
+
+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return 0;
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			return -EINVAL;
+
+		spin_lock_irq(&opp->lock);
+		attr32 = opp->src[attr->attr].pending;
+		spin_unlock_irq(&opp->lock);
+
+		if (put_user(attr32, (u32 __user *)(long)attr->addr))
+			return -EFAULT;
+
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static int mpic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	switch (attr->group) {
+	case KVM_DEV_MPIC_GRP_MISC:
+		switch (attr->attr) {
+		case KVM_DEV_MPIC_BASE_ADDR:
+			return 0;
+		}
+
+		break;
+
+	case KVM_DEV_MPIC_GRP_REGISTER:
+		return 0;
+
+	case KVM_DEV_MPIC_GRP_IRQ_ACTIVE:
+		if (attr->attr > MAX_SRC)
+			break;
+
+		return 0;
+	}
+
+	return -ENXIO;
+}
+
+static void mpic_destroy(struct kvm_device *dev)
+{
+	struct openpic *opp = dev->private;
+
+	if (opp->mmio_mapped) {
+		/*
+		 * Normally we get unmapped by kvm_io_bus_destroy(),
+		 * which happens before the VCPUs release their references.
+		 *
+		 * Thus, we should only get here if no VCPUs took a reference
+		 * to us in the first place.
+		 */
+		WARN_ON(opp->nb_cpus != 0);
+		unmap_mmio(opp);
+	}
+
+	kfree(opp);
+}
+
+static int mpic_create(struct kvm_device *dev, u32 type)
+{
+	struct openpic *opp;
+	int ret;
+
+	opp = kzalloc(sizeof(struct openpic), GFP_KERNEL);
+	if (!opp)
+		return -ENOMEM;
+
+	dev->private = opp;
+	opp->kvm = dev->kvm;
+	opp->dev = dev;
+	opp->model = type;
+	spin_lock_init(&opp->lock);
+
+	INIT_LIST_HEAD(&opp->mmio_regions);
+	list_add(&openpic_gbl_mmio.list, &opp->mmio_regions);
+	list_add(&openpic_tmr_mmio.list, &opp->mmio_regions);
+	list_add(&openpic_src_mmio.list, &opp->mmio_regions);
+	list_add(&openpic_cpu_mmio.list, &opp->mmio_regions);
 
 	switch (opp->model) {
-	case OPENPIC_MODEL_FSL_MPIC_20:
-	default:
+	case KVM_DEV_TYPE_FSL_MPIC_20:
 		opp->fsl = &fsl_mpic_20;
 		opp->brr1 = 0x00400200;
 		opp->flags |= OPENPIC_FLAG_IDR_CRIT;
@@ -1290,12 +1658,10 @@ static int openpic_init(SysBusDevice *dev)
 		opp->mpic_mode_mask = GCR_MODE_MIXED;
 
 		fsl_common_init(opp);
-		map_list(opp, list_be, &list_count);
-		map_list(opp, list_fsl, &list_count);
 
 		break;
 
-	case OPENPIC_MODEL_FSL_MPIC_42:
+	case KVM_DEV_TYPE_FSL_MPIC_42:
 		opp->fsl = &fsl_mpic_42;
 		opp->brr1 = 0x00400402;
 		opp->flags |= OPENPIC_FLAG_ILR;
@@ -1303,11 +1669,27 @@ static int openpic_init(SysBusDevice *dev)
 		opp->mpic_mode_mask = GCR_MODE_PROXY;
 
 		fsl_common_init(opp);
-		map_list(opp, list_be, &list_count);
-		map_list(opp, list_fsl, &list_count);
 
 		break;
+
+	default:
+		ret = -ENODEV;
+		goto err;
 	}
 
+	openpic_reset(opp);
 	return 0;
+
+err:
+	kfree(opp);
+	return ret;
 }
+
+struct kvm_device_ops kvm_mpic_ops = {
+	.name = "kvm-mpic",
+	.create = mpic_create,
+	.destroy = mpic_destroy,
+	.set_attr = mpic_set_attr,
+	.get_attr = mpic_get_attr,
+	.has_attr = mpic_has_attr,
+};
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6b8108624851..88d69cf1f953 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -317,6 +317,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_IOEVENTFD:
+	case KVM_CAP_DEVICE_CTRL:
 		r = 1;
 		break;
 #ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -762,7 +763,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		break;
 	case KVM_CAP_PPC_EPR:
 		r = 0;
-		vcpu->arch.epr_enabled = cap->args[0];
+		if (cap->args[0])
+			vcpu->arch.epr_flags |= KVMPPC_EPR_USER;
+		else
+			vcpu->arch.epr_flags &= ~KVMPPC_EPR_USER;
 		break;
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_WATCHDOG:
@@ -908,6 +912,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
+	struct kvm *kvm __maybe_unused = filp->private_data;
 	void __user *argp = (void __user *)arg;
 	long r;
 
@@ -926,7 +931,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CREATE_SPAPR_TCE: {
 		struct kvm_create_spapr_tce create_tce;
-		struct kvm *kvm = filp->private_data;
 
 		r = -EFAULT;
 		if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
@@ -938,7 +942,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_ALLOCATE_RMA: {
-		struct kvm *kvm = filp->private_data;
 		struct kvm_allocate_rma rma;
 
 		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
@@ -948,7 +951,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 
 	case KVM_PPC_ALLOCATE_HTAB: {
-		struct kvm *kvm = filp->private_data;
 		u32 htab_order;
 
 		r = -EFAULT;
@@ -965,7 +967,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 
 	case KVM_PPC_GET_HTAB_FD: {
-		struct kvm *kvm = filp->private_data;
 		struct kvm_get_htab_fd ghf;
 
 		r = -EFAULT;
@@ -978,7 +979,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_PPC_GET_SMMU_INFO: {
-		struct kvm *kvm = filp->private_data;
 		struct kvm_ppc_smmu_info info;
 
 		memset(&info, 0, sizeof(info));
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6dab6b5b3e3b..feffbdaf8986 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1099,6 +1099,8 @@ void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 
+extern struct kvm_device_ops kvm_mpic_ops;
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 38a0be0c199f..4148becdc93f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -837,6 +837,9 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
+#define KVM_DEV_TYPE_FSL_MPIC_20	1
+#define KVM_DEV_TYPE_FSL_MPIC_42	2
+
 /*
  * ioctls for VM fds
  */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5f0d78c2537b..f6cd14d2f0d9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2238,6 +2238,12 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	int ret;
 
 	switch (cd->type) {
+#ifdef CONFIG_KVM_MPIC
+	case KVM_DEV_TYPE_FSL_MPIC_20:
+	case KVM_DEV_TYPE_FSL_MPIC_42:
+		ops = &kvm_mpic_ops;
+		break;
+#endif
 	default:
 		return -ENODEV;
 	}
-- 
cgit 


From eb1e4f43e0f47f2655372c7d32c43db9711c278e Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Fri, 12 Apr 2013 14:08:47 +0000
Subject: kvm/ppc/mpic: add KVM_CAP_IRQ_MPIC

Enabling this capability connects the vcpu to the designated in-kernel
MPIC.  Using explicit connections between vcpus and irqchips allows
for flexibility, but the main benefit at the moment is that it
simplifies the code -- KVM doesn't need vm-global state to remember
which MPIC object is associated with this vm, and it doesn't need to
care about ordering between irqchip creation and vcpu creation.

Signed-off-by: Scott Wood <scottwood@freescale.com>
[agraf: add stub functions for kvmppc_mpic_{dis,}connect_vcpu]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   |  8 ++++
 arch/powerpc/include/asm/kvm_host.h |  9 ++++
 arch/powerpc/include/asm/kvm_ppc.h  | 15 ++++++-
 arch/powerpc/kvm/booke.c            |  4 ++
 arch/powerpc/kvm/mpic.c             | 82 +++++++++++++++++++++++++++++++++----
 arch/powerpc/kvm/powerpc.c          | 30 ++++++++++++++
 include/uapi/linux/kvm.h            |  1 +
 7 files changed, 141 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 66b58e494935..149558b1e81e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2744,3 +2744,11 @@ to receive the topmost interrupt vector.
 When disabled (args[0] == 0), behavior is as if this facility is unsupported.
 
 When this capability is enabled, KVM_EXIT_EPR can occur.
+
+6.6 KVM_CAP_IRQ_MPIC
+
+Architectures: ppc
+Parameters: args[0] is the MPIC device fd
+            args[1] is the MPIC CPU number for this vcpu
+
+This capability connects the vcpu to an in-kernel MPIC device.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 153c8c2b0f88..c3f8ceffa412 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -377,6 +377,11 @@ struct kvmppc_booke_debug_reg {
 	u64 dac[KVMPPC_BOOKE_MAX_DAC];
 };
 
+#define KVMPPC_IRQ_DEFAULT	0
+#define KVMPPC_IRQ_MPIC		1
+
+struct openpic;
+
 struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
@@ -558,6 +563,10 @@ struct kvm_vcpu_arch {
 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
 
+	int irq_type;		/* one of KVM_IRQ_* */
+	int irq_cpu_id;
+	struct openpic *mpic;	/* KVM_IRQ_MPIC */
+
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	struct kvm_vcpu_arch_shared shregs;
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3810f9c7616c..df9c80b37905 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -248,7 +248,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
 struct openpic;
-void kvmppc_mpic_put(struct openpic *opp);
 
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
@@ -278,6 +277,9 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 #ifdef CONFIG_KVM_MPIC
 
 void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu);
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 cpu);
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu);
 
 #else
 
@@ -285,6 +287,17 @@ static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev,
+		struct kvm_vcpu *vcpu, u32 cpu)
+{
+	return -EINVAL;
+}
+
+static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp,
+		struct kvm_vcpu *vcpu)
+{
+}
+
 #endif /* CONFIG_KVM_MPIC */
 
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4da11ed48c59..1020119226db 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -430,6 +430,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		if (update_epr == true) {
 			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
 				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
+			else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
+				BUG_ON(vcpu->arch.irq_type != KVMPPC_IRQ_MPIC);
+				kvmppc_mpic_set_epr(vcpu);
+			}
 		}
 
 		new_msr &= msr_mask;
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index cb451b91e342..10bc08a246fd 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -115,7 +115,7 @@ static int get_current_cpu(void)
 {
 #if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
 	struct kvm_vcpu *vcpu = current->thread.kvm_vcpu;
-	return vcpu ? vcpu->vcpu_id : -1;
+	return vcpu ? vcpu->arch.irq_cpu_id : -1;
 #else
 	/* XXX */
 	return -1;
@@ -249,7 +249,7 @@ static void mpic_irq_raise(struct openpic *opp, struct irq_dest *dst,
 		return;
 	}
 
-	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id,
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
 		output);
 
 	if (output != ILR_INTTGT_INT)	/* TODO */
@@ -267,7 +267,7 @@ static void mpic_irq_lower(struct openpic *opp, struct irq_dest *dst,
 		return;
 	}
 
-	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->vcpu_id,
+	pr_debug("%s: cpu %d output %d\n", __func__, dst->vcpu->arch.irq_cpu_id,
 		output);
 
 	if (output != ILR_INTTGT_INT)	/* TODO */
@@ -1165,6 +1165,20 @@ static uint32_t openpic_iack(struct openpic *opp, struct irq_dest *dst,
 	return retval;
 }
 
+void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
+{
+	struct openpic *opp = vcpu->arch.mpic;
+	int cpu = vcpu->arch.irq_cpu_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&opp->lock, flags);
+
+	if ((opp->gcr & opp->mpic_mode_mask) == GCR_MODE_PROXY)
+		kvmppc_set_epr(vcpu, openpic_iack(opp, &opp->dst[cpu], cpu));
+
+	spin_unlock_irqrestore(&opp->lock, flags);
+}
+
 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
 				     u32 *ptr, int idx)
 {
@@ -1431,10 +1445,10 @@ static void map_mmio(struct openpic *opp)
 
 static void unmap_mmio(struct openpic *opp)
 {
-	BUG_ON(opp->mmio_mapped);
-	opp->mmio_mapped = false;
-
-	kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+	if (opp->mmio_mapped) {
+		opp->mmio_mapped = false;
+		kvm_io_bus_unregister_dev(opp->kvm, KVM_MMIO_BUS, &opp->mmio);
+	}
 }
 
 static int set_base_addr(struct openpic *opp, struct kvm_device_attr *attr)
@@ -1693,3 +1707,57 @@ struct kvm_device_ops kvm_mpic_ops = {
 	.get_attr = mpic_get_attr,
 	.has_attr = mpic_has_attr,
 };
+
+int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 cpu)
+{
+	struct openpic *opp = dev->private;
+	int ret = 0;
+
+	if (dev->ops != &kvm_mpic_ops)
+		return -EPERM;
+	if (opp->kvm != vcpu->kvm)
+		return -EPERM;
+	if (cpu < 0 || cpu >= MAX_CPU)
+		return -EPERM;
+
+	spin_lock_irq(&opp->lock);
+
+	if (opp->dst[cpu].vcpu) {
+		ret = -EEXIST;
+		goto out;
+	}
+	if (vcpu->arch.irq_type) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	opp->dst[cpu].vcpu = vcpu;
+	opp->nb_cpus = max(opp->nb_cpus, cpu + 1);
+
+	vcpu->arch.mpic = opp;
+	vcpu->arch.irq_cpu_id = cpu;
+	vcpu->arch.irq_type = KVMPPC_IRQ_MPIC;
+
+	/* This might need to be changed if GCR gets extended */
+	if (opp->mpic_mode_mask == GCR_MODE_PROXY)
+		vcpu->arch.epr_flags |= KVMPPC_EPR_KERNEL;
+
+	kvm_device_get(dev);
+out:
+	spin_unlock_irq(&opp->lock);
+	return ret;
+}
+
+/*
+ * This should only happen immediately before the mpic is destroyed,
+ * so we shouldn't need to worry about anything still trying to
+ * access the vcpu pointer.
+ */
+void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!opp->dst[vcpu->arch.irq_cpu_id].vcpu);
+
+	opp->dst[vcpu->arch.irq_cpu_id].vcpu = NULL;
+	kvm_device_put(opp->dev);
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 88d69cf1f953..5d046bbdf11f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -25,6 +25,7 @@
 #include <linux/hrtimer.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 #include <asm/cputable.h>
 #include <asm/uaccess.h>
 #include <asm/kvm_ppc.h>
@@ -326,6 +327,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_GET_PVINFO:
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB:
+#endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC:
 #endif
 		r = 1;
 		break;
@@ -460,6 +464,13 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	tasklet_kill(&vcpu->arch.tasklet);
 
 	kvmppc_remove_vcpu_debugfs(vcpu);
+
+	switch (vcpu->arch.irq_type) {
+	case KVMPPC_IRQ_MPIC:
+		kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
+		break;
+	}
+
 	kvmppc_core_vcpu_free(vcpu);
 }
 
@@ -786,6 +797,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = kvm_vcpu_ioctl_config_tlb(vcpu, &cfg);
 		break;
 	}
+#endif
+#ifdef CONFIG_KVM_MPIC
+	case KVM_CAP_IRQ_MPIC: {
+		struct file *filp;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		filp = fget(cap->args[0]);
+		if (!filp)
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(filp);
+		if (dev)
+			r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
+
+		fput(filp);
+		break;
+	}
 #endif
 	default:
 		r = -EINVAL;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4148becdc93f..0ebf59b50ed0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -667,6 +667,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PSCI 87
 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
 #define KVM_CAP_DEVICE_CTRL 89
+#define KVM_CAP_IRQ_MPIC 90
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From 8e591cb7204739efa8e15967ea334eb367039dde Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Wed, 17 Apr 2013 20:30:00 +0000
Subject: KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS
 calls

For pseries machine emulation, in order to move the interrupt
controller code to the kernel, we need to intercept some RTAS
calls in the kernel itself.  This adds an infrastructure to allow
in-kernel handlers to be registered for RTAS services by name.
A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to
associate token values with those service names.  Then, when the
guest requests an RTAS service with one of those token values, it
will be handled by the relevant in-kernel handler rather than being
passed up to userspace as at present.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix warning]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt   |  19 ++++
 arch/powerpc/include/asm/hvcall.h   |   3 +
 arch/powerpc/include/asm/kvm_host.h |   1 +
 arch/powerpc/include/asm/kvm_ppc.h  |   4 +
 arch/powerpc/include/uapi/asm/kvm.h |   6 ++
 arch/powerpc/kvm/Makefile           |   1 +
 arch/powerpc/kvm/book3s_hv.c        |  18 +++-
 arch/powerpc/kvm/book3s_pr.c        |   1 +
 arch/powerpc/kvm/book3s_pr_papr.c   |   7 ++
 arch/powerpc/kvm/book3s_rtas.c      | 182 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c          |   8 ++
 include/uapi/linux/kvm.h            |   3 +
 12 files changed, 252 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kvm/book3s_rtas.c

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 149558b1e81e..fb308be8521b 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2341,6 +2341,25 @@ and distributor interface, the ioctl must be called after calling
 KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs.  Calling
 this ioctl twice for any of the base addresses will return -EEXIST.
 
+4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel.  The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation.  If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel.  If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 4bc2c3dad6ad..cf4df8e2139a 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -270,6 +270,9 @@
 #define H_SET_MODE		0x31C
 #define MAX_HCALL_OPCODE	H_SET_MODE
 
+/* Platform specific hcalls, used by KVM */
+#define H_RTAS			0xf000
+
 #ifndef __ASSEMBLY__
 
 /**
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 13740a645a6d..311f7e6f09e9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -259,6 +259,7 @@ struct kvm_arch {
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
+	struct list_head rtas_tokens;
 #endif
 #ifdef CONFIG_KVM_MPIC
 	struct openpic *mpic;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index df9c80b37905..8a30eb7f2bec 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -166,6 +166,10 @@ extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
 
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
 
+extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
+extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
+extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 03c7819a44a3..eb9e25c194ad 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -324,6 +324,12 @@ struct kvm_allocate_rma {
 	__u64 rma_size;
 };
 
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+	char name[120];
+	__u64 token;	/* Use a token of 0 to undefine a mapping */
+};
+
 struct kvm_book3e_206_tlb_entry {
 	__u32 mas8;
 	__u32 mas1;
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4eada0c01082..3faf5c07329c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \
 	emulate.o \
 	book3s.o \
 	book3s_64_vio.o \
+	book3s_rtas.o \
 	$(kvm-book3s_64-objs-y)
 
 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5af0f2979833..f3d7af7981c7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -483,7 +483,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
 	struct kvm_vcpu *tvcpu;
-	int idx;
+	int idx, rc;
 
 	switch (req) {
 	case H_ENTER:
@@ -519,6 +519,19 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 					kvmppc_get_gpr(vcpu, 5),
 					kvmppc_get_gpr(vcpu, 6));
 		break;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+
+		rc = kvmppc_rtas_hcall(vcpu);
+
+		if (rc == -ENOENT)
+			return RESUME_HOST;
+		else if (rc == 0)
+			break;
+
+		/* Send the error out to userspace via KVM_RUN */
+		return rc;
 	default:
 		return RESUME_HOST;
 	}
@@ -1829,6 +1842,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 
 	kvm->arch.rma = NULL;
 
@@ -1874,6 +1888,8 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
 		kvm->arch.rma = NULL;
 	}
 
+	kvmppc_rtas_tokens_free(kvm);
+
 	kvmppc_free_hpt(kvm);
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c1cffa882a69..d09baf143500 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1296,6 +1296,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ee02b30878ed..4efa4a4f3722 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -246,6 +246,13 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_RTAS:
+		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+			return RESUME_HOST;
+		if (kvmppc_rtas_hcall(vcpu))
+			break;
+		kvmppc_set_gpr(vcpu, 3, 0);
+		return EMULATE_DONE;
 	}
 
 	return EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
new file mode 100644
index 000000000000..6ad7050eb67d
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/err.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/rtas.h>
+
+
+struct rtas_handler {
+	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
+	char *name;
+};
+
+static struct rtas_handler rtas_handlers[] = { };
+
+struct rtas_token_definition {
+	struct list_head list;
+	struct rtas_handler *handler;
+	u64 token;
+};
+
+static int rtas_name_matches(char *s1, char *s2)
+{
+	struct kvm_rtas_token_args args;
+	return !strncmp(s1, s2, sizeof(args.name));
+}
+
+static int rtas_token_undefine(struct kvm *kvm, char *name)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		if (rtas_name_matches(d->handler->name, name)) {
+			list_del(&d->list);
+			kfree(d);
+			return 0;
+		}
+	}
+
+	/* It's not an error to undefine an undefined token */
+	return 0;
+}
+
+static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
+{
+	struct rtas_token_definition *d;
+	struct rtas_handler *h = NULL;
+	bool found;
+	int i;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
+		if (d->token == token)
+			return -EEXIST;
+	}
+
+	found = false;
+	for (i = 0; i < ARRAY_SIZE(rtas_handlers); i++) {
+		h = &rtas_handlers[i];
+		if (rtas_name_matches(h->name, name)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found)
+		return -ENOENT;
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->handler = h;
+	d->token = token;
+
+	list_add_tail(&d->list, &kvm->arch.rtas_tokens);
+
+	return 0;
+}
+
+int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
+{
+	struct kvm_rtas_token_args args;
+	int rc;
+
+	if (copy_from_user(&args, argp, sizeof(args)))
+		return -EFAULT;
+
+	mutex_lock(&kvm->lock);
+
+	if (args.token)
+		rc = rtas_token_define(kvm, args.name, args.token);
+	else
+		rc = rtas_token_undefine(kvm, args.name);
+
+	mutex_unlock(&kvm->lock);
+
+	return rc;
+}
+
+int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
+{
+	struct rtas_token_definition *d;
+	struct rtas_args args;
+	rtas_arg_t *orig_rets;
+	gpa_t args_phys;
+	int rc;
+
+	/* r4 contains the guest physical address of the RTAS args */
+	args_phys = kvmppc_get_gpr(vcpu, 4);
+
+	rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+	if (rc)
+		goto fail;
+
+	/*
+	 * args->rets is a pointer into args->args. Now that we've
+	 * copied args we need to fix it up to point into our copy,
+	 * not the guest args. We also need to save the original
+	 * value so we can restore it on the way out.
+	 */
+	orig_rets = args.rets;
+	args.rets = &args.args[args.nargs];
+
+	mutex_lock(&vcpu->kvm->lock);
+
+	rc = -ENOENT;
+	list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
+		if (d->token == args.token) {
+			d->handler->handler(vcpu, &args);
+			rc = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&vcpu->kvm->lock);
+
+	if (rc == 0) {
+		args.rets = orig_rets;
+		rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+		if (rc)
+			goto fail;
+	}
+
+	return rc;
+
+fail:
+	/*
+	 * We only get here if the guest has called RTAS with a bogus
+	 * args pointer. That means we can't get to the args, and so we
+	 * can't fail the RTAS call. So fail right out to userspace,
+	 * which should kill the guest.
+	 */
+	return rc;
+}
+
+void kvmppc_rtas_tokens_free(struct kvm *kvm)
+{
+	struct rtas_token_definition *d, *tmp;
+
+	lockdep_assert_held(&kvm->lock);
+
+	list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
+		list_del(&d->list);
+		kfree(d);
+	}
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d8e81e6c1afe..d4fd443ae7bd 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -341,6 +341,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
+	case KVM_CAP_PPC_RTAS:
 		r = 1;
 		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -986,6 +987,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	case KVM_ALLOCATE_RMA: {
 		struct kvm_allocate_rma rma;
+		struct kvm *kvm = filp->private_data;
 
 		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
 		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
@@ -1030,6 +1032,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EFAULT;
 		break;
 	}
+	case KVM_PPC_RTAS_DEFINE_TOKEN: {
+		struct kvm *kvm = filp->private_data;
+
+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+		break;
+	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 	default:
 		r = -ENOTTY;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0ebf59b50ed0..d4005192ad6e 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -668,6 +668,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
 #define KVM_CAP_DEVICE_CTRL 89
 #define KVM_CAP_IRQ_MPIC 90
+#define KVM_CAP_PPC_RTAS 91
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -928,6 +929,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_PPC_GET_HTAB_FD	  _IOW(KVMIO,  0xaa, struct kvm_get_htab_fd)
 /* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
+/* Available with KVM_CAP_PPC_RTAS */
+#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO,  0xac, struct kvm_rtas_token_args)
 
 /* ioctl for vm fd */
 #define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
-- 
cgit 


From 2a5bab1004729f3302c776e53ee7c895b98bb1ce Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 16 Apr 2013 13:49:18 -0600
Subject: kvm: Allow build-time configuration of KVM device assignment

We hope to at some point deprecate KVM legacy device assignment in
favor of VFIO-based assignment.  Towards that end, allow legacy
device assignment to be deconfigured.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Alexander Graf <agraf@suse.de>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/include/uapi/asm/kvm.h |  1 -
 arch/ia64/kvm/Kconfig            | 13 +++++++++++--
 arch/ia64/kvm/Makefile           |  6 +++---
 arch/ia64/kvm/kvm-ia64.c         |  2 --
 arch/x86/include/uapi/asm/kvm.h  |  1 -
 arch/x86/kvm/Kconfig             | 13 +++++++++++--
 arch/x86/kvm/Makefile            |  5 +++--
 arch/x86/kvm/x86.c               |  6 ++++--
 include/linux/kvm_host.h         | 30 ++++++++----------------------
 include/uapi/linux/kvm.h         |  4 ----
 10 files changed, 40 insertions(+), 41 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
index ec6c6b301238..99503c284400 100644
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ b/arch/ia64/include/uapi/asm/kvm.h
@@ -27,7 +27,6 @@
 /* Select x86 specific features in <linux/kvm.h> */
 #define __KVM_HAVE_IOAPIC
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_DEVICE_ASSIGNMENT
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index 043183a06409..990b86420cc6 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -21,8 +21,6 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on BROKEN
 	depends on HAVE_KVM && MODULES
-	# for device assignment:
-	depends on PCI
 	depends on BROKEN
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
@@ -51,6 +49,17 @@ config KVM_INTEL
 	  Provides support for KVM on Itanium 2 processors equipped with the VT
 	  extensions.
 
+config KVM_DEVICE_ASSIGNMENT
+	bool "KVM legacy PCI device assignment support"
+	depends on KVM && PCI && IOMMU_API
+	default y
+	---help---
+	  Provide support for legacy PCI device assignment through KVM.  The
+	  kernel now also supports a full featured userspace device driver
+	  framework through VFIO, which supersedes much of this support.
+
+	  If unsure, say Y.
+
 source drivers/vhost/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 511f64a78d56..1a4053789d01 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -49,10 +49,10 @@ ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-		coalesced_mmio.o irq_comm.o assigned-dev.o irqchip.o)
+		coalesced_mmio.o irq_comm.o)
 
-ifeq ($(CONFIG_IOMMU_API),y)
-common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
+ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
+common-objs += $(addprefix ../../../virt/kvm/, assigned-dev.o iommu.o)
 endif
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 032c54d63c3d..dcc560729c20 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1368,9 +1368,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_iommu_unmap_guest(kvm);
-#ifdef  KVM_CAP_DEVICE_ASSIGNMENT
 	kvm_free_all_assigned_devices(kvm);
-#endif
 	kfree(kvm->arch.vioapic);
 	kvm_release_vm_pages(kvm);
 }
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a65ec29e6ffb..5d9a3033b3d7 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -29,7 +29,6 @@
 #define __KVM_HAVE_PIT
 #define __KVM_HAVE_IOAPIC
 #define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_DEVICE_ASSIGNMENT
 #define __KVM_HAVE_MSI
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 9d50efdb719d..a47a3e54b964 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -21,8 +21,6 @@ config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
 	depends on HIGH_RES_TIMERS
-	# for device assignment:
-	depends on PCI
 	# for TASKSTATS/TASK_DELAY_ACCT:
 	depends on NET
 	select PREEMPT_NOTIFIERS
@@ -83,6 +81,17 @@ config KVM_MMU_AUDIT
 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
 	 audit  KVM MMU at runtime.
 
+config KVM_DEVICE_ASSIGNMENT
+	bool "KVM legacy PCI device assignment support"
+	depends on KVM && PCI && IOMMU_API
+	default y
+	---help---
+	  Provide support for legacy PCI device assignment through KVM.  The
+	  kernel now also supports a full featured userspace device driver
+	  framework through VFIO, which supersedes much of this support.
+
+	  If unsure, say Y.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index a797b8e43ade..d609e1d84048 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,9 @@ CFLAGS_vmx.o := -I.
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 				coalesced_mmio.o irq_comm.o eventfd.o \
-				assigned-dev.o irqchip.o)
-kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
+				irqchip.o)
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(addprefix ../../../virt/kvm/, \
+				assigned-dev.o iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 145b1c81011b..8747fef7fd59 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2501,7 +2501,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_USER_NMI:
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_IRQFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_PIT2:
@@ -2519,9 +2518,12 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_XSAVE:
 	case KVM_CAP_ASYNC_PF:
 	case KVM_CAP_GET_TSC_KHZ:
-	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+	case KVM_CAP_ASSIGN_DEV_IRQ:
+	case KVM_CAP_PCI_2_3:
+#endif
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 309774715897..996661eb8e99 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -667,7 +667,6 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
-void kvm_free_all_assigned_devices(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
@@ -736,7 +735,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 /* For vcpu->arch.iommu_flags */
 #define KVM_IOMMU_CACHE_COHERENCY	0x1
 
-#ifdef CONFIG_IOMMU_API
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 int kvm_iommu_map_guest(struct kvm *kvm);
@@ -745,7 +744,7 @@ int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_deassign_device(struct kvm *kvm,
 			struct kvm_assigned_dev_kernel *assigned_dev);
-#else /* CONFIG_IOMMU_API */
+#else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      struct kvm_memory_slot *slot)
 {
@@ -757,28 +756,11 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 {
 }
 
-static inline int kvm_iommu_map_guest(struct kvm *kvm)
-{
-	return -ENODEV;
-}
-
 static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
 {
 	return 0;
 }
-
-static inline int kvm_assign_device(struct kvm *kvm,
-		struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	return 0;
-}
-
-static inline int kvm_deassign_device(struct kvm *kvm,
-		struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	return 0;
-}
-#endif /* CONFIG_IOMMU_API */
+#endif
 
 static inline void __guest_enter(void)
 {
@@ -1032,11 +1014,13 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
 
 #endif
 
-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 
 long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 				  unsigned long arg);
 
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+
 #else
 
 static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
@@ -1045,6 +1029,8 @@ static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
 	return -ENOTTY;
 }
 
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+
 #endif
 
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d4005192ad6e..965e5b52dee0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -561,9 +561,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_ASSIGNMENT 17
-#endif
 #define KVM_CAP_IOMMU 18
 #ifdef __KVM_HAVE_MSI
 #define KVM_CAP_DEVICE_MSI 20
@@ -581,9 +579,7 @@ struct kvm_ppc_smmu_info {
 #endif
 #define KVM_CAP_IRQ_ROUTING 25
 #define KVM_CAP_IRQ_INJECT_STATUS 26
-#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
 #define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#endif
 #ifdef __KVM_HAVE_MSIX
 #define KVM_CAP_DEVICE_MSIX 28
 #endif
-- 
cgit 


From 5d174dd80ce94b7ed0950e31fc9a0122c689523b Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Sat, 27 Apr 2013 11:31:55 +0000
Subject: vxlan: source compatiablity with IFLA_VXLAN_GROUP (v2)

Source compatiability for build iproute2 was broken by:
  commit c7995c43facc6e5dea4de63fa9d283a337aabeb1
  Author: Atzm Watanabe <atzm@stratosphere.co.jp>
    vxlan: Allow setting destination to unicast address.

Since this commit has not made it upstream (still net-next),
and better to avoid gratitious changes to exported API's;
go back to original definition, and add a comment.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c          | 10 +++++-----
 include/uapi/linux/if_link.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index e6ac16cd9ba2..d8de8a1303eb 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1324,7 +1324,7 @@ static void vxlan_setup(struct net_device *dev)
 
 static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_ID]		= { .type = NLA_U32 },
-	[IFLA_VXLAN_REMOTE]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+	[IFLA_VXLAN_GROUP]	= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 	[IFLA_VXLAN_LINK]	= { .type = NLA_U32 },
 	[IFLA_VXLAN_LOCAL]	= { .len = FIELD_SIZEOF(struct iphdr, saddr) },
 	[IFLA_VXLAN_TOS]	= { .type = NLA_U8 },
@@ -1406,8 +1406,8 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	}
 	dst->remote_vni = vni;
 
-	if (data[IFLA_VXLAN_REMOTE])
-		dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_REMOTE]);
+	if (data[IFLA_VXLAN_GROUP])
+		dst->remote_ip = nla_get_be32(data[IFLA_VXLAN_GROUP]);
 
 	if (data[IFLA_VXLAN_LOCAL])
 		vxlan->saddr = nla_get_be32(data[IFLA_VXLAN_LOCAL]);
@@ -1488,7 +1488,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
 {
 
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_ID */
-		nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_REMOTE */
+		nla_total_size(sizeof(__be32)) +/* IFLA_VXLAN_GROUP */
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LINK */
 		nla_total_size(sizeof(__be32))+	/* IFLA_VXLAN_LOCAL */
 		nla_total_size(sizeof(__u8)) +	/* IFLA_VXLAN_TTL */
@@ -1516,7 +1516,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni))
 		goto nla_put_failure;
 
-	if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_REMOTE, dst->remote_ip))
+	if (dst->remote_ip && nla_put_be32(skb, IFLA_VXLAN_GROUP, dst->remote_ip))
 		goto nla_put_failure;
 
 	if (dst->remote_ifindex && nla_put_u32(skb, IFLA_VXLAN_LINK, dst->remote_ifindex))
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e3163544f339..5346131524e4 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -297,7 +297,7 @@ enum macvlan_mode {
 enum {
 	IFLA_VXLAN_UNSPEC,
 	IFLA_VXLAN_ID,
-	IFLA_VXLAN_REMOTE,
+	IFLA_VXLAN_GROUP,	/* group or remote address */
 	IFLA_VXLAN_LINK,
 	IFLA_VXLAN_LOCAL,
 	IFLA_VXLAN_TTL,
-- 
cgit 


From 823aa873bc782f1c51b1ce8ec6da7cfcaf93836e Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Sat, 27 Apr 2013 11:31:57 +0000
Subject: vxlan: allow choosing destination port per vxlan

Allow configuring the default destination port on a per-device basis.
Adds new netlink paramater IFLA_VXLAN_PORT to allow setting destination
port when creating new vxlan.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c          | 18 +++++++++++++-----
 include/uapi/linux/if_link.h |  3 ++-
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 66fd7bef0a4d..f1d9e981e541 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -110,6 +110,7 @@ struct vxlan_dev {
 	struct net_device *dev;
 	struct vxlan_rdst default_dst;	/* default destination */
 	__be32		  saddr;	/* source address */
+	__be16		  dst_port;
 	__u16		  port_min;	/* source port range */
 	__u16		  port_max;
 	__u8		  tos;		/* TOS override */
@@ -192,7 +193,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (send_ip && nla_put_be32(skb, NDA_DST, rdst->remote_ip))
 		goto nla_put_failure;
 
-	if (rdst->remote_port && rdst->remote_port != htons(vxlan_port) &&
+	if (rdst->remote_port && rdst->remote_port != vxlan->dst_port &&
 	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
 		goto nla_put_failure;
 	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
@@ -467,7 +468,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			return -EINVAL;
 		port = nla_get_be16(tb[NDA_PORT]);
 	} else
-		port = htons(vxlan_port);
+		port = vxlan->dst_port;
 
 	if (tb[NDA_VNI]) {
 		if (nla_len(tb[NDA_VNI]) != sizeof(u32))
@@ -579,7 +580,7 @@ static void vxlan_snoop(struct net_device *dev,
 		err = vxlan_fdb_create(vxlan, src_mac, src_ip,
 				       NUD_REACHABLE,
 				       NLM_F_EXCL|NLM_F_CREATE,
-				       vxlan_port,
+				       vxlan->dst_port,
 				       vxlan->default_dst.remote_vni,
 				       0, NTF_SELF);
 		spin_unlock(&vxlan->hash_lock);
@@ -970,7 +971,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	__be16 df = 0;
 	__u8 tos, ttl;
 
-	dst_port = rdst->remote_port ? rdst->remote_port : htons(vxlan_port);
+	dst_port = rdst->remote_port ? rdst->remote_port : vxlan->dst_port;
 	vni = rdst->remote_vni;
 	dst = rdst->remote_ip;
 
@@ -1314,6 +1315,7 @@ static void vxlan_setup(struct net_device *dev)
 	inet_get_local_port_range(&low, &high);
 	vxlan->port_min = low;
 	vxlan->port_max = high;
+	vxlan->dst_port = htons(vxlan_port);
 
 	vxlan->dev = dev;
 
@@ -1336,6 +1338,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_RSC]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_L2MISS]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_L3MISS]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_PORT]	= { .type = NLA_U16 },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1465,6 +1468,9 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 		vxlan->port_max = ntohs(p->high);
 	}
 
+	if (data[IFLA_VXLAN_PORT])
+		vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
+
 	SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
 
 	err = register_netdevice(dev);
@@ -1500,6 +1506,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_AGEING */
 		nla_total_size(sizeof(__u32)) +	/* IFLA_VXLAN_LIMIT */
 		nla_total_size(sizeof(struct ifla_vxlan_port_range)) +
+		nla_total_size(sizeof(__be16))+ /* IFLA_VXLAN_PORT */
 		0;
 }
 
@@ -1536,7 +1543,8 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_VXLAN_L3MISS,
 			!!(vxlan->flags & VXLAN_F_L3MISS)) ||
 	    nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) ||
-	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax))
+	    nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax) ||
+	    nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->dst_port))
 		goto nla_put_failure;
 
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 5346131524e4..b05823cae784 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -305,11 +305,12 @@ enum {
 	IFLA_VXLAN_LEARNING,
 	IFLA_VXLAN_AGEING,
 	IFLA_VXLAN_LIMIT,
-	IFLA_VXLAN_PORT_RANGE,
+	IFLA_VXLAN_PORT_RANGE,	/* source port */
 	IFLA_VXLAN_PROXY,
 	IFLA_VXLAN_RSC,
 	IFLA_VXLAN_L2MISS,
 	IFLA_VXLAN_L3MISS,
+	IFLA_VXLAN_PORT,	/* destination port */
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
cgit 


From 626419038a3e4a1f61119a4af08d01415961eb4e Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 25 Apr 2013 06:53:52 +0000
Subject: packet_diag: disclose uid value

This value is disclosed via /proc/net/packet but not via netlink messages.
The goal is to have the same level of information.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/packet_diag.h |  1 +
 net/packet/diag.c                | 19 ++++++++++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index afafd703ad92..84f83a47b6f6 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -32,6 +32,7 @@ enum {
 	PACKET_DIAG_RX_RING,
 	PACKET_DIAG_TX_RING,
 	PACKET_DIAG_FANOUT,
+	PACKET_DIAG_UID,
 
 	__PACKET_DIAG_MAX,
 };
diff --git a/net/packet/diag.c b/net/packet/diag.c
index d3fcd1ebef7e..04c8219a2d06 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -125,8 +125,10 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb)
 	return ret;
 }
 
-static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req,
-		u32 portid, u32 seq, u32 flags, int sk_ino)
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+			struct packet_diag_req *req,
+			struct user_namespace *user_ns,
+			u32 portid, u32 seq, u32 flags, int sk_ino)
 {
 	struct nlmsghdr *nlh;
 	struct packet_diag_msg *rp;
@@ -147,6 +149,11 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag
 			pdiag_put_info(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_INFO) &&
+	    nla_put_u32(skb, PACKET_DIAG_UID,
+			from_kuid_munged(user_ns, sock_i_uid(sk))))
+		goto out_nlmsg_trim;
+
 	if ((req->pdiag_show & PACKET_SHOW_MCLIST) &&
 			pdiag_put_mclist(po, skb))
 		goto out_nlmsg_trim;
@@ -183,9 +190,11 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (num < s_num)
 			goto next;
 
-		if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid,
-					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					sock_i_ino(sk)) < 0)
+		if (sk_diag_fill(sk, skb, req,
+				 sk_user_ns(NETLINK_CB(cb->skb).sk),
+				 NETLINK_CB(cb->skb).portid,
+				 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				 sock_i_ino(sk)) < 0)
 			goto done;
 next:
 		num++;
-- 
cgit 


From 76d0eeb1a1579453cfd7c4da22004d4b34187ab4 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 25 Apr 2013 06:53:53 +0000
Subject: packet_diag: disclose meminfo values

sk_rmem_alloc is disclosed via /proc/net/packet but not via netlink messages.
The goal is to have the same level of information.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/packet_diag.h | 2 ++
 net/packet/diag.c                | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index 84f83a47b6f6..c0802c18c8ad 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -16,6 +16,7 @@ struct packet_diag_req {
 #define PACKET_SHOW_MCLIST	0x00000002 /* A set of packet_diag_mclist-s */
 #define PACKET_SHOW_RING_CFG	0x00000004 /* Rings configuration parameters */
 #define PACKET_SHOW_FANOUT	0x00000008
+#define PACKET_SHOW_MEMINFO	0x00000010
 
 struct packet_diag_msg {
 	__u8	pdiag_family;
@@ -33,6 +34,7 @@ enum {
 	PACKET_DIAG_TX_RING,
 	PACKET_DIAG_FANOUT,
 	PACKET_DIAG_UID,
+	PACKET_DIAG_MEMINFO,
 
 	__PACKET_DIAG_MAX,
 };
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 04c8219a2d06..822fe9b33a49 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -166,6 +166,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 			pdiag_put_fanout(po, skb))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
-- 
cgit 


From e8d9612c181b1a68ba5f71384629343466f1bd13 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 25 Apr 2013 06:53:54 +0000
Subject: sock_diag: allow to dump bpf filters

This patch allows to dump BPF filters attached to a socket with
SO_ATTACH_FILTER.
Note that we check CAP_SYS_ADMIN before allowing to dump this info.

For now, only AF_PACKET sockets use this feature.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h        |  3 +++
 include/uapi/linux/packet_diag.h |  2 ++
 net/core/sock_diag.c             | 33 +++++++++++++++++++++++++++++++++
 net/packet/diag.c                |  4 ++++
 4 files changed, 42 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index e8d702e0fd89..54f91d35e5fd 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,6 +1,7 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
 
+#include <linux/user_namespace.h>
 #include <uapi/linux/sock_diag.h>
 
 struct sk_buff;
@@ -22,5 +23,7 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
+int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+			     struct sk_buff *skb, int attrtype);
 
 #endif
diff --git a/include/uapi/linux/packet_diag.h b/include/uapi/linux/packet_diag.h
index c0802c18c8ad..b2cc0cd9c4d9 100644
--- a/include/uapi/linux/packet_diag.h
+++ b/include/uapi/linux/packet_diag.h
@@ -17,6 +17,7 @@ struct packet_diag_req {
 #define PACKET_SHOW_RING_CFG	0x00000004 /* Rings configuration parameters */
 #define PACKET_SHOW_FANOUT	0x00000008
 #define PACKET_SHOW_MEMINFO	0x00000010
+#define PACKET_SHOW_FILTER	0x00000020
 
 struct packet_diag_msg {
 	__u8	pdiag_family;
@@ -35,6 +36,7 @@ enum {
 	PACKET_DIAG_FANOUT,
 	PACKET_DIAG_UID,
 	PACKET_DIAG_MEMINFO,
+	PACKET_DIAG_FILTER,
 
 	__PACKET_DIAG_MAX,
 };
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a29e90cf36b7..d5bef0b0f639 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -49,6 +49,39 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
 }
 EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
 
+int sock_diag_put_filterinfo(struct user_namespace *user_ns, struct sock *sk,
+			     struct sk_buff *skb, int attrtype)
+{
+	struct nlattr *attr;
+	struct sk_filter *filter;
+	unsigned int len;
+	int err = 0;
+
+	if (!ns_capable(user_ns, CAP_NET_ADMIN)) {
+		nla_reserve(skb, attrtype, 0);
+		return 0;
+	}
+
+	rcu_read_lock();
+
+	filter = rcu_dereference(sk->sk_filter);
+	len = filter ? filter->len * sizeof(struct sock_filter) : 0;
+
+	attr = nla_reserve(skb, attrtype, len);
+	if (attr == NULL) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	if (filter)
+		memcpy(nla_data(attr), filter->insns, len);
+
+out:
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(sock_diag_put_filterinfo);
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 822fe9b33a49..a9584a2f6d69 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -170,6 +170,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	    sock_diag_put_meminfo(sk, skb, PACKET_DIAG_MEMINFO))
 		goto out_nlmsg_trim;
 
+	if ((req->pdiag_show & PACKET_SHOW_FILTER) &&
+	    sock_diag_put_filterinfo(user_ns, sk, skb, PACKET_DIAG_FILTER))
+		goto out_nlmsg_trim;
+
 	return nlmsg_end(skb, nlh);
 
 out_nlmsg_trim:
-- 
cgit 


From 34d666d489cf70c246ca99b2387741915c34b88c Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 27 Apr 2013 14:38:56 +0200
Subject: netfilter: ipset: Introduce the counter extension in the core

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h      | 75 +++++++++++++++++++++++++++--
 include/uapi/linux/netfilter/ipset/ip_set.h |  5 ++
 net/netfilter/ipset/ip_set_core.c           | 10 ++++
 3 files changed, 86 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index bf0220cbf46a..0f978ebfaefb 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -52,18 +52,24 @@ enum ip_set_extension {
 	IPSET_EXT_NONE = 0,
 	IPSET_EXT_BIT_TIMEOUT = 1,
 	IPSET_EXT_TIMEOUT = (1 << IPSET_EXT_BIT_TIMEOUT),
+	IPSET_EXT_BIT_COUNTER = 2,
+	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 };
 
 /* Extension offsets */
 enum ip_set_offset {
 	IPSET_OFFSET_TIMEOUT = 0,
+	IPSET_OFFSET_COUNTER,
 	IPSET_OFFSET_MAX,
 };
 
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
+#define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 
 struct ip_set_ext {
 	unsigned long timeout;
+	u64 packets;
+	u64 bytes;
 };
 
 struct ip_set;
@@ -177,6 +183,65 @@ struct ip_set {
 	void *data;
 };
 
+struct ip_set_counter {
+	atomic64_t bytes;
+	atomic64_t packets;
+};
+
+static inline void
+ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)bytes, &(counter)->bytes);
+}
+
+static inline void
+ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
+{
+	atomic64_add((long long)packets, &(counter)->packets);
+}
+
+static inline u64
+ip_set_get_bytes(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->bytes);
+}
+
+static inline u64
+ip_set_get_packets(const struct ip_set_counter *counter)
+{
+	return (u64)atomic64_read(&(counter)->packets);
+}
+
+static inline void
+ip_set_update_counter(struct ip_set_counter *counter,
+		      const struct ip_set_ext *ext,
+		      struct ip_set_ext *mext, u32 flags)
+{
+	if (ext->packets != ULLONG_MAX) {
+		ip_set_add_bytes(ext->bytes, counter);
+		ip_set_add_packets(ext->packets, counter);
+	}
+}
+
+static inline bool
+ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
+{
+	return nla_put_net64(skb, IPSET_ATTR_BYTES,
+			     cpu_to_be64(ip_set_get_bytes(counter))) ||
+	       nla_put_net64(skb, IPSET_ATTR_PACKETS,
+			     cpu_to_be64(ip_set_get_packets(counter)));
+}
+
+static inline void
+ip_set_init_counter(struct ip_set_counter *counter,
+		    const struct ip_set_ext *ext)
+{
+	if (ext->bytes != ULLONG_MAX)
+		atomic64_set(&(counter)->bytes, (long long)(ext->bytes));
+	if (ext->packets != ULLONG_MAX)
+		atomic64_set(&(counter)->packets, (long long)(ext->packets));
+}
+
 /* register and unregister set references */
 extern ip_set_id_t ip_set_get_byname(const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(ip_set_id_t index);
@@ -318,10 +383,12 @@ bitmap_bytes(u32 a, u32 b)
 
 #include <linux/netfilter/ipset/ip_set_timeout.h>
 
-#define IP_SET_INIT_KEXT(skb, opt, map)		\
-	{ .timeout = ip_set_adt_opt_timeout(opt, map) }
+#define IP_SET_INIT_KEXT(skb, opt, map)			\
+	{ .bytes = (skb)->len, .packets = 1,		\
+	  .timeout = ip_set_adt_opt_timeout(opt, map) }
 
-#define IP_SET_INIT_UEXT(map)			\
-	{ .timeout = (map)->timeout }
+#define IP_SET_INIT_UEXT(map)				\
+	{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX,	\
+	  .timeout = (map)->timeout }
 
 #endif /*_IP_SET_H */
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index fbee42807a11..ed452675d153 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -108,6 +108,8 @@ enum {
 	IPSET_ATTR_CIDR2,
 	IPSET_ATTR_IP2_TO,
 	IPSET_ATTR_IFACE,
+	IPSET_ATTR_BYTES,
+	IPSET_ATTR_PACKETS,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -137,6 +139,7 @@ enum ipset_errno {
 	IPSET_ERR_REFERENCED,
 	IPSET_ERR_IPADDR_IPV4,
 	IPSET_ERR_IPADDR_IPV6,
+	IPSET_ERR_COUNTER,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -161,6 +164,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_PHYSDEV	= (1 << IPSET_FLAG_BIT_PHYSDEV),
 	IPSET_FLAG_BIT_NOMATCH	= 2,
 	IPSET_FLAG_NOMATCH	= (1 << IPSET_FLAG_BIT_NOMATCH),
+	IPSET_FLAG_BIT_WITH_COUNTERS = 3,
+	IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
 	IPSET_FLAG_CADT_MAX	= 15,	/* Upper half */
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4486285d10da..f6d878a46c43 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -324,6 +324,16 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			return -IPSET_ERR_TIMEOUT;
 		ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]);
 	}
+	if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) {
+		if (!(set->extensions & IPSET_EXT_COUNTER))
+			return -IPSET_ERR_COUNTER;
+		if (tb[IPSET_ATTR_BYTES])
+			ext->bytes = be64_to_cpu(nla_get_be64(
+						 tb[IPSET_ATTR_BYTES]));
+		if (tb[IPSET_ATTR_PACKETS])
+			ext->packets = be64_to_cpu(nla_get_be64(
+						   tb[IPSET_ATTR_PACKETS]));
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-- 
cgit 


From 6e01781d1c80e2e8263471252a631e86165b15c5 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 27 Apr 2013 14:40:50 +0200
Subject: netfilter: ipset: set match: add support to match the counters

The new revision of the set match supports to match the counters
and to suppress updating the counters at matching too.

At the set:list types, the updating of the subcounters can be
suppressed as well.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h      |  9 +++-
 include/uapi/linux/netfilter/ipset/ip_set.h | 31 +++++++++++--
 include/uapi/linux/netfilter/xt_set.h       |  9 ++++
 net/netfilter/ipset/ip_set_core.c           |  2 +-
 net/netfilter/ipset/ip_set_list_set.c       |  8 +++-
 net/netfilter/xt_set.c                      | 70 +++++++++++++++++++++++++++++
 6 files changed, 120 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0f978ebfaefb..d80e2753847c 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -76,7 +76,7 @@ struct ip_set;
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
-			   struct ip_set_ext *mext, u32 flags);
+			   struct ip_set_ext *mext, u32 cmdflags);
 
 /* Kernel API function options */
 struct ip_set_adt_opt {
@@ -217,10 +217,15 @@ ip_set_update_counter(struct ip_set_counter *counter,
 		      const struct ip_set_ext *ext,
 		      struct ip_set_ext *mext, u32 flags)
 {
-	if (ext->packets != ULLONG_MAX) {
+	if (ext->packets != ULLONG_MAX &&
+	    !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
 		ip_set_add_bytes(ext->bytes, counter);
 		ip_set_add_packets(ext->packets, counter);
 	}
+	if (flags & IPSET_FLAG_MATCH_COUNTERS) {
+		mext->packets = ip_set_get_packets(counter);
+		mext->bytes = ip_set_get_bytes(counter);
+	}
 }
 
 static inline bool
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index ed452675d153..8024cdf13b70 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -145,7 +145,7 @@ enum ipset_errno {
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
 };
 
-/* Flags at command level */
+/* Flags at command level or match/target flags, lower half of cmdattrs*/
 enum ipset_cmd_flags {
 	IPSET_FLAG_BIT_EXIST	= 0,
 	IPSET_FLAG_EXIST	= (1 << IPSET_FLAG_BIT_EXIST),
@@ -153,10 +153,20 @@ enum ipset_cmd_flags {
 	IPSET_FLAG_LIST_SETNAME	= (1 << IPSET_FLAG_BIT_LIST_SETNAME),
 	IPSET_FLAG_BIT_LIST_HEADER = 2,
 	IPSET_FLAG_LIST_HEADER	= (1 << IPSET_FLAG_BIT_LIST_HEADER),
-	IPSET_FLAG_CMD_MAX = 15,	/* Lower half */
+	IPSET_FLAG_BIT_SKIP_COUNTER_UPDATE = 3,
+	IPSET_FLAG_SKIP_COUNTER_UPDATE =
+		(1 << IPSET_FLAG_BIT_SKIP_COUNTER_UPDATE),
+	IPSET_FLAG_BIT_SKIP_SUBCOUNTER_UPDATE = 4,
+	IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE =
+		(1 << IPSET_FLAG_BIT_SKIP_SUBCOUNTER_UPDATE),
+	IPSET_FLAG_BIT_MATCH_COUNTERS = 5,
+	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
+	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
+	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+	IPSET_FLAG_CMD_MAX = 15,
 };
 
-/* Flags at CADT attribute level */
+/* Flags at CADT attribute level, upper half of cmdattrs */
 enum ipset_cadt_flags {
 	IPSET_FLAG_BIT_BEFORE	= 0,
 	IPSET_FLAG_BEFORE	= (1 << IPSET_FLAG_BIT_BEFORE),
@@ -166,7 +176,7 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_NOMATCH	= (1 << IPSET_FLAG_BIT_NOMATCH),
 	IPSET_FLAG_BIT_WITH_COUNTERS = 3,
 	IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
-	IPSET_FLAG_CADT_MAX	= 15,	/* Upper half */
+	IPSET_FLAG_CADT_MAX	= 15,
 };
 
 /* Commands with settype-specific attributes */
@@ -195,6 +205,7 @@ enum ip_set_dim {
 	 * If changed, new revision of iptables match/target is required.
 	 */
 	IPSET_DIM_MAX = 6,
+	/* Backward compatibility: set match revision 2 */
 	IPSET_BIT_RETURN_NOMATCH = 7,
 };
 
@@ -207,6 +218,18 @@ enum ip_set_kopt {
 	IPSET_RETURN_NOMATCH = (1 << IPSET_BIT_RETURN_NOMATCH),
 };
 
+enum {
+	IPSET_COUNTER_NONE = 0,
+	IPSET_COUNTER_EQ,
+	IPSET_COUNTER_NE,
+	IPSET_COUNTER_LT,
+	IPSET_COUNTER_GT,
+};
+
+struct ip_set_counter_match {
+	__u8 op;
+	__u64 value;
+};
 
 /* Interface to iptables/ip6tables */
 
diff --git a/include/uapi/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h
index e3a9978f259f..964d3d42f874 100644
--- a/include/uapi/linux/netfilter/xt_set.h
+++ b/include/uapi/linux/netfilter/xt_set.h
@@ -62,4 +62,13 @@ struct xt_set_info_target_v2 {
 	__u32 timeout;
 };
 
+/* Revision 3 match */
+
+struct xt_set_info_match_v3 {
+	struct xt_set_info match_set;
+	struct ip_set_counter_match packets;
+	struct ip_set_counter_match bytes;
+	__u32 flags;
+};
+
 #endif /*_XT_SET_H*/
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index f6d878a46c43..f77139007983 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -413,7 +413,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 		ret = 1;
 	} else {
 		/* --return-nomatch: invert matched element */
-		if ((opt->flags & IPSET_RETURN_NOMATCH) &&
+		if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) &&
 		    (set->type->features & IPSET_TYPE_NOMATCH) &&
 		    (ret > 0 || ret == -ENOTEMPTY))
 			ret = -ret;
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index c09022e37406..979b8c90e422 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -84,9 +84,13 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
 {
 	struct list_set *map = set->data;
 	struct set_elem *e;
-	u32 i;
+	u32 i, cmdflags = opt->cmdflags;
 	int ret;
 
+	/* Don't lookup sub-counters at all */
+	opt->cmdflags &= ~IPSET_FLAG_MATCH_COUNTERS;
+	if (opt->cmdflags & IPSET_FLAG_SKIP_SUBCOUNTER_UPDATE)
+		opt->cmdflags &= ~IPSET_FLAG_SKIP_COUNTER_UPDATE;
 	for (i = 0; i < map->size; i++) {
 		e = list_set_elem(map, i);
 		if (e->id == IPSET_INVALID_ID)
@@ -99,7 +103,7 @@ list_set_ktest(struct ip_set *set, const struct sk_buff *skb,
 			if (SET_WITH_COUNTER(set))
 				ip_set_update_counter(ext_counter(e, map),
 						      ext, &opt->ext,
-						      opt->cmdflags);
+						      cmdflags);
 			return ret;
 		}
 	}
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index 636c5199ff35..31790e789e22 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -189,6 +189,9 @@ set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, 0, UINT_MAX);
 
+	if (opt.flags & IPSET_RETURN_NOMATCH)
+		opt.cmdflags |= IPSET_FLAG_RETURN_NOMATCH;
+
 	return match_set(info->match_set.index, skb, par, &opt,
 			 info->match_set.flags & IPSET_INV_MATCH);
 }
@@ -317,6 +320,52 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 #define set_target_v2_checkentry	set_target_v1_checkentry
 #define set_target_v2_destroy		set_target_v1_destroy
 
+/* Revision 3 match */
+
+static bool
+match_counter(u64 counter, const struct ip_set_counter_match *info)
+{
+	switch (info->op) {
+	case IPSET_COUNTER_NONE:
+		return true;
+	case IPSET_COUNTER_EQ:
+		return counter == info->value;
+	case IPSET_COUNTER_NE:
+		return counter != info->value;
+	case IPSET_COUNTER_LT:
+		return counter < info->value;
+	case IPSET_COUNTER_GT:
+		return counter > info->value;
+	}
+	return false;
+}
+
+static bool
+set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_set_info_match_v3 *info = par->matchinfo;
+	ADT_OPT(opt, par->family, info->match_set.dim,
+		info->match_set.flags, info->flags, UINT_MAX);
+	int ret;
+
+	if (info->packets.op != IPSET_COUNTER_NONE ||
+	    info->bytes.op != IPSET_COUNTER_NONE)
+		opt.cmdflags |= IPSET_FLAG_MATCH_COUNTERS;
+
+	ret = match_set(info->match_set.index, skb, par, &opt,
+			info->match_set.flags & IPSET_INV_MATCH);
+
+	if (!(ret && opt.cmdflags & IPSET_FLAG_MATCH_COUNTERS))
+		return ret;
+
+	if (!match_counter(opt.ext.packets, &info->packets))
+		return 0;
+	return match_counter(opt.ext.bytes, &info->bytes);
+}
+
+#define set_match_v3_checkentry	set_match_v1_checkentry
+#define set_match_v3_destroy	set_match_v1_destroy
+
 static struct xt_match set_matches[] __read_mostly = {
 	{
 		.name		= "set",
@@ -369,6 +418,27 @@ static struct xt_match set_matches[] __read_mostly = {
 		.destroy	= set_match_v1_destroy,
 		.me		= THIS_MODULE
 	},
+	/* counters support: update, match */
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV4,
+		.revision	= 3,
+		.match		= set_match_v3,
+		.matchsize	= sizeof(struct xt_set_info_match_v3),
+		.checkentry	= set_match_v3_checkentry,
+		.destroy	= set_match_v3_destroy,
+		.me		= THIS_MODULE
+	},
+	{
+		.name		= "set",
+		.family		= NFPROTO_IPV6,
+		.revision	= 3,
+		.match		= set_match_v3,
+		.matchsize	= sizeof(struct xt_set_info_match_v3),
+		.checkentry	= set_match_v3_checkentry,
+		.destroy	= set_match_v3_destroy,
+		.me		= THIS_MODULE
+	},
 };
 
 static struct xt_target set_targets[] __read_mostly = {
-- 
cgit 


From 7237190df8c4129241697530a4eecabdc4ecc66e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 19 Apr 2013 04:58:26 +0000
Subject: netfilter: nfnetlink_queue: add skb info attribute

Once we allow userspace to receive gso/gro packets, userspace
needs to be able to determine when checksums appear to be
broken, but are not.

NFQA_SKB_CSUMNOTREADY means 'checksums will be fixed in kernel
later, pretend they are ok'.

NFQA_SKB_GSO could be used for statistics, or to determine when
packet size exceeds mtu.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_queue.h |  7 +++++++
 net/netfilter/nfnetlink_queue_core.c           | 16 ++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index 70ec8c2bc11a..0069da370464 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -45,6 +45,7 @@ enum nfqnl_attr_type {
 	NFQA_CT,			/* nf_conntrack_netlink.h */
 	NFQA_CT_INFO,			/* enum ip_conntrack_info */
 	NFQA_CAP_LEN,			/* __u32 length of captured packet */
+	NFQA_SKB_INFO,			/* __u32 skb meta information */
 
 	__NFQA_MAX
 };
@@ -98,4 +99,10 @@ enum nfqnl_attr_config {
 #define NFQA_CFG_F_CONNTRACK			(1 << 1)
 #define NFQA_CFG_F_MAX				(1 << 2)
 
+/* flags for NFQA_SKB_INFO */
+/* packet appears to have wrong checksums, but they are ok */
+#define NFQA_SKB_CSUMNOTREADY (1 << 0)
+/* packet is GSO (i.e., exceeds device mtu) */
+#define NFQA_SKB_GSO (1 << 1)
+
 #endif /* _NFNETLINK_QUEUE_H */
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index edbae4caf753..d052cd6da5d2 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -272,6 +272,18 @@ nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
 	skb_shinfo(to)->nr_frags = j;
 }
 
+static int nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet)
+{
+	__u32 flags = 0;
+
+	if (packet->ip_summed == CHECKSUM_PARTIAL)
+		flags = NFQA_SKB_CSUMNOTREADY;
+	if (skb_is_gso(packet))
+		flags |= NFQA_SKB_GSO;
+
+	return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
@@ -301,6 +313,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 #endif
 		+ nla_total_size(sizeof(u_int32_t))	/* mark */
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+		+ nla_total_size(sizeof(u_int32_t))	/* skbinfo */
 		+ nla_total_size(sizeof(u_int32_t));	/* cap_len */
 
 	if (entskb->tstamp.tv64)
@@ -454,6 +467,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 	if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
 		goto nla_put_failure;
 
+	if (nfqnl_put_packet_info(skb, entskb))
+		goto nla_put_failure;
+
 	if (data_len) {
 		struct nlattr *nla;
 
-- 
cgit 


From 00bd1cc24a7dd295ee095dc50791aab6ede46c7a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 19 Apr 2013 04:58:27 +0000
Subject: netfilter: nfnetlink_queue: avoid expensive gso segmentation and
 checksum fixup

Userspace can now indicate that it can cope with larger-than-mtu sized
packets and packets that have invalid ipv4/tcp checksums.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_queue.h | 3 ++-
 net/netfilter/nfnetlink_queue_core.c           | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index 0069da370464..a2308ae5a73d 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -97,7 +97,8 @@ enum nfqnl_attr_config {
 /* Flags for NFQA_CFG_FLAGS */
 #define NFQA_CFG_F_FAIL_OPEN			(1 << 0)
 #define NFQA_CFG_F_CONNTRACK			(1 << 1)
-#define NFQA_CFG_F_MAX				(1 << 2)
+#define NFQA_CFG_F_GSO				(1 << 2)
+#define NFQA_CFG_F_MAX				(1 << 3)
 
 /* flags for NFQA_SKB_INFO */
 /* packet appears to have wrong checksums, but they are ok */
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index d052cd6da5d2..2e0e835baf72 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -327,7 +327,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 
 	case NFQNL_COPY_PACKET:
-		if (entskb->ip_summed == CHECKSUM_PARTIAL &&
+		if (!(queue->flags & NFQA_CFG_F_GSO) &&
+		    entskb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb_checksum_help(entskb))
 			return NULL;
 
@@ -636,7 +637,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
 	if (queue->copy_mode == NFQNL_COPY_NONE)
 		return -EINVAL;
 
-	if (!skb_is_gso(entry->skb))
+	if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(entry->skb))
 		return __nfqnl_enqueue_packet(net, queue, entry);
 
 	skb = entry->skb;
-- 
cgit 


From 6a5dc9e598fe90160fee7de098fa319665f5253e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Apr 2013 08:39:56 +0000
Subject: net: Add MIB counters for checksum errors

Add MIB counters for checksum errors in IP layer,
and TCP/UDP/ICMP layers, to help diagnose problems.

$ nstat -a | grep  Csum
IcmpInCsumErrors                72                 0.0
TcpInCsumErrors                 382                0.0
UdpInCsumErrors                 463221             0.0
Icmp6InCsumErrors               75                 0.0
Udp6InCsumErrors                173442             0.0
IpExtInCsumErrors               10884              0.0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h |  5 +++++
 net/ipv4/icmp.c           |  4 +++-
 net/ipv4/ip_input.c       |  4 +++-
 net/ipv4/proc.c           | 10 +++++++---
 net/ipv4/tcp_input.c      |  1 +
 net/ipv4/tcp_ipv4.c       | 14 ++++++++++----
 net/ipv4/tcp_output.c     |  3 ++-
 net/ipv4/udp.c            | 11 +++++++++--
 net/ipv6/icmp.c           |  4 +++-
 net/ipv6/proc.c           |  4 ++++
 net/ipv6/tcp_ipv6.c       | 19 ++++++++++---------
 net/ipv6/udp.c            | 21 +++++++++++++++------
 12 files changed, 72 insertions(+), 28 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index fefdec91c68b..df2e8b4f9c03 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -50,6 +50,7 @@ enum
 	IPSTATS_MIB_OUTMCASTOCTETS,		/* OutMcastOctets */
 	IPSTATS_MIB_INBCASTOCTETS,		/* InBcastOctets */
 	IPSTATS_MIB_OUTBCASTOCTETS,		/* OutBcastOctets */
+	IPSTATS_MIB_CSUMERRORS,			/* InCsumErrors */
 	__IPSTATS_MIB_MAX
 };
 
@@ -87,6 +88,7 @@ enum
 	ICMP_MIB_OUTTIMESTAMPREPS,		/* OutTimestampReps */
 	ICMP_MIB_OUTADDRMASKS,			/* OutAddrMasks */
 	ICMP_MIB_OUTADDRMASKREPS,		/* OutAddrMaskReps */
+	ICMP_MIB_CSUMERRORS,			/* InCsumErrors */
 	__ICMP_MIB_MAX
 };
 
@@ -103,6 +105,7 @@ enum
 	ICMP6_MIB_INERRORS,			/* InErrors */
 	ICMP6_MIB_OUTMSGS,			/* OutMsgs */
 	ICMP6_MIB_OUTERRORS,			/* OutErrors */
+	ICMP6_MIB_CSUMERRORS,			/* InCsumErrors */
 	__ICMP6_MIB_MAX
 };
 
@@ -130,6 +133,7 @@ enum
 	TCP_MIB_RETRANSSEGS,			/* RetransSegs */
 	TCP_MIB_INERRS,				/* InErrs */
 	TCP_MIB_OUTRSTS,			/* OutRsts */
+	TCP_MIB_CSUMERRORS,			/* InCsumErrors */
 	__TCP_MIB_MAX
 };
 
@@ -147,6 +151,7 @@ enum
 	UDP_MIB_OUTDATAGRAMS,			/* OutDatagrams */
 	UDP_MIB_RCVBUFERRORS,			/* RcvbufErrors */
 	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
+	UDP_MIB_CSUMERRORS,			/* InCsumErrors */
 	__UDP_MIB_MAX
 };
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 3ac5dff79627..76e10b47e053 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -881,7 +881,7 @@ int icmp_rcv(struct sk_buff *skb)
 	case CHECKSUM_NONE:
 		skb->csum = 0;
 		if (__skb_checksum_complete(skb))
-			goto error;
+			goto csum_error;
 	}
 
 	if (!pskb_pull(skb, sizeof(*icmph)))
@@ -929,6 +929,8 @@ int icmp_rcv(struct sk_buff *skb)
 drop:
 	kfree_skb(skb);
 	return 0;
+csum_error:
+	ICMP_INC_STATS_BH(net, ICMP_MIB_CSUMERRORS);
 error:
 	ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
 	goto drop;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2bdf802e28e2..3da817b89e9b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -419,7 +419,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	iph = ip_hdr(skb);
 
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
-		goto inhdr_error;
+		goto csum_error;
 
 	len = ntohs(iph->tot_len);
 	if (skb->len < len) {
@@ -446,6 +446,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
+csum_error:
+	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);
 inhdr_error:
 	IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
 drop:
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6da51d55d03a..2a5bf86d2415 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -125,6 +125,7 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
 	SNMP_MIB_ITEM("OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
 	SNMP_MIB_ITEM("InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
 	SNMP_MIB_ITEM("OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+	SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -162,6 +163,7 @@ static const struct snmp_mib snmp4_tcp_list[] = {
 	SNMP_MIB_ITEM("RetransSegs", TCP_MIB_RETRANSSEGS),
 	SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
 	SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
+	SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -172,6 +174,7 @@ static const struct snmp_mib snmp4_udp_list[] = {
 	SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -322,15 +325,16 @@ static void icmp_put(struct seq_file *seq)
 	struct net *net = seq->private;
 	atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
 
-	seq_puts(seq, "\nIcmp: InMsgs InErrors");
+	seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " In%s", icmpmibmap[i].name);
 	seq_printf(seq, " OutMsgs OutErrors");
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " Out%s", icmpmibmap[i].name);
-	seq_printf(seq, "\nIcmp: %lu %lu",
+	seq_printf(seq, "\nIcmp: %lu %lu %lu",
 		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INMSGS),
-		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS),
+		snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS));
 	for (i=0; icmpmibmap[i].name != NULL; i++)
 		seq_printf(seq, " %lu",
 			   atomic_long_read(ptr + icmpmibmap[i].index));
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aafd052865ba..08bbe6096528 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5273,6 +5273,7 @@ step5:
 	return 0;
 
 csum_error:
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 
 discard:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2278669b1d85..8ea975164596 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1866,6 +1866,7 @@ discard:
 	return 0;
 
 csum_err:
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 }
@@ -1985,7 +1986,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	 * provided case of th->doff==0 is eliminated.
 	 * So, we defer the checks. */
 	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
-		goto bad_packet;
+		goto csum_error;
 
 	th = tcp_hdr(skb);
 	iph = ip_hdr(skb);
@@ -2051,6 +2052,8 @@ no_tcp_socket:
 		goto discard_it;
 
 	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
+csum_error:
+		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
 bad_packet:
 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
@@ -2072,10 +2075,13 @@ do_time_wait:
 		goto discard_it;
 	}
 
-	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+	if (skb->len < (th->doff << 2)) {
 		inet_twsk_put(inet_twsk(sk));
-		goto discard_it;
+		goto bad_packet;
+	}
+	if (tcp_checksum_complete(skb)) {
+		inet_twsk_put(inet_twsk(sk));
+		goto csum_error;
 	}
 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
 	case TCP_TW_SYN: {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b735c23a961d..536d40929ba6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -80,8 +80,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
 
 	tp->packets_out += tcp_skb_pcount(skb);
 	if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
-	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
+	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		tcp_rearm_rto(sk);
+	}
 }
 
 /* SND.NXT, if window was not shrunk.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2722db024a0b..3159d16441d0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1131,6 +1131,8 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_lock_bh(&rcvq->lock);
 	while ((skb = skb_peek(rcvq)) != NULL &&
 		udp_lib_checksum_complete(skb)) {
+		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
+				 IS_UDPLITE(sk));
 		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
 				 IS_UDPLITE(sk));
 		atomic_inc(&sk->sk_drops);
@@ -1286,8 +1288,10 @@ out:
 
 csum_copy_err:
 	slow = lock_sock_fast(sk);
-	if (!skb_kill_datagram(sk, skb, flags))
+	if (!skb_kill_datagram(sk, skb, flags)) {
+		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
+	}
 	unlock_sock_fast(sk, slow);
 
 	if (noblock)
@@ -1513,7 +1517,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (rcu_access_pointer(sk->sk_filter) &&
 	    udp_lib_checksum_complete(skb))
-		goto drop;
+		goto csum_error;
 
 
 	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -1533,6 +1537,8 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	return rc;
 
+csum_error:
+	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 drop:
 	UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	atomic_inc(&sk->sk_drops);
@@ -1749,6 +1755,7 @@ csum_error:
 		       proto == IPPROTO_UDPLITE ? "Lite" : "",
 		       &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest),
 		       ulen);
+	UDP_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 drop:
 	UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 2a53a790514d..b4ff0a42b8c7 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -699,7 +699,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
 		if (__skb_checksum_complete(skb)) {
 			LIMIT_NETDEBUG(KERN_DEBUG "ICMPv6 checksum failed [%pI6 > %pI6]\n",
 				       saddr, daddr);
-			goto discard_it;
+			goto csum_error;
 		}
 	}
 
@@ -785,6 +785,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
 	kfree_skb(skb);
 	return 0;
 
+csum_error:
+	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
 discard_it:
 	ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS);
 drop_no_count:
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index bbbe53a99b57..115cc58898f5 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -90,6 +90,7 @@ static const struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_ITEM("Ip6OutMcastOctets", IPSTATS_MIB_OUTMCASTOCTETS),
 	SNMP_MIB_ITEM("Ip6InBcastOctets", IPSTATS_MIB_INBCASTOCTETS),
 	SNMP_MIB_ITEM("Ip6OutBcastOctets", IPSTATS_MIB_OUTBCASTOCTETS),
+	SNMP_MIB_ITEM("InCsumErrors", IPSTATS_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -99,6 +100,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
 	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
 	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
+	SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -129,6 +131,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_ITEM("Udp6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
@@ -139,6 +142,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
 	SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
 	SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
+	SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e51bd1a58264..71167069b394 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1405,6 +1405,7 @@ discard:
 	kfree_skb(skb);
 	return 0;
 csum_err:
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_CSUMERRORS);
 	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 	goto discard;
 
@@ -1466,7 +1467,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		goto discard_it;
 
 	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
-		goto bad_packet;
+		goto csum_error;
 
 	th = tcp_hdr(skb);
 	hdr = ipv6_hdr(skb);
@@ -1530,6 +1531,8 @@ no_tcp_socket:
 		goto discard_it;
 
 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+csum_error:
+		TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS);
 bad_packet:
 		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
 	} else {
@@ -1537,11 +1540,6 @@ bad_packet:
 	}
 
 discard_it:
-
-	/*
-	 *	Discard frame
-	 */
-
 	kfree_skb(skb);
 	return 0;
 
@@ -1555,10 +1553,13 @@ do_time_wait:
 		goto discard_it;
 	}
 
-	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
-		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
+	if (skb->len < (th->doff<<2)) {
 		inet_twsk_put(inet_twsk(sk));
-		goto discard_it;
+		goto bad_packet;
+	}
+	if (tcp_checksum_complete(skb)) {
+		inet_twsk_put(inet_twsk(sk));
+		goto csum_error;
 	}
 
 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index da6019b63730..d4defdd44937 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -483,12 +483,17 @@ out:
 csum_copy_err:
 	slow = lock_sock_fast(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
-		if (is_udp4)
+		if (is_udp4) {
+			UDP_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_CSUMERRORS, is_udplite);
 			UDP_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
-		else
+		} else {
+			UDP6_INC_STATS_USER(sock_net(sk),
+					UDP_MIB_CSUMERRORS, is_udplite);
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
+		}
 	}
 	unlock_sock_fast(sk, slow);
 
@@ -637,7 +642,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	if (rcu_access_pointer(sk->sk_filter)) {
 		if (udp_lib_checksum_complete(skb))
-			goto drop;
+			goto csum_error;
 	}
 
 	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf))
@@ -656,6 +661,8 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	bh_unlock_sock(sk);
 
 	return rc;
+csum_error:
+	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 drop:
 	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
 	atomic_inc(&sk->sk_drops);
@@ -817,7 +824,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	}
 
 	if (udp6_csum_init(skb, uh, proto))
-		goto discard;
+		goto csum_error;
 
 	/*
 	 *	Multicast receive code
@@ -850,7 +857,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 		goto discard;
 
 	if (udp_lib_checksum_complete(skb))
-		goto discard;
+		goto csum_error;
 
 	UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
@@ -867,7 +874,9 @@ short_packet:
 		       skb->len,
 		       daddr,
 		       ntohs(uh->dest));
-
+	goto discard;
+csum_error:
+	UDP6_INC_STATS_BH(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE);
 discard:
 	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
 	kfree_skb(skb);
-- 
cgit 


From 7136851117744f1d291bed6d307432699d405109 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Mon, 29 Apr 2013 15:07:25 -0700
Subject: mm: make snapshotting pages for stable writes a per-bio operation

Walking a bio's page mappings has proved problematic, so create a new
bio flag to indicate that a bio's data needs to be snapshotted in order
to guarantee stable pages during writeback.  Next, for the one user
(ext3/jbd) of snapshotting, hook all the places where writes can be
initiated without PG_writeback set, and set BIO_SNAP_STABLE there.

We must also flag journal "metadata" bios for stable writeout, since
file data can be written through the journal.  Finally, the
MS_SNAP_STABLE mount flag (only used by ext3) is now superfluous, so get
rid of it.

[akpm@linux-foundation.org: rename _submit_bh()'s `flags' to `bio_flags', delobotomize the _submit_bh declaration]
[akpm@linux-foundation.org: teeny cleanup]
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Artem Bityutskiy <dedekind1@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 |  9 ++++++++-
 fs/ext3/super.c             |  1 -
 fs/jbd/commit.c             | 25 ++++++++++++++++++++++---
 include/linux/blk_types.h   |  3 ++-
 include/linux/buffer_head.h |  1 +
 include/uapi/linux/fs.h     |  1 -
 mm/bounce.c                 | 21 +--------------------
 mm/page-writeback.c         |  4 ----
 8 files changed, 34 insertions(+), 31 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index b4dcb34c9635..71578d69b82d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2949,7 +2949,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
 	}
 }
 
-int submit_bh(int rw, struct buffer_head * bh)
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
 {
 	struct bio *bio;
 	int ret = 0;
@@ -2984,6 +2984,7 @@ int submit_bh(int rw, struct buffer_head * bh)
 
 	bio->bi_end_io = end_bio_bh_io_sync;
 	bio->bi_private = bh;
+	bio->bi_flags |= bio_flags;
 
 	/* Take care of bh's that straddle the end of the device */
 	guard_bh_eod(rw, bio, bh);
@@ -2997,6 +2998,12 @@ int submit_bh(int rw, struct buffer_head * bh)
 	bio_put(bio);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(_submit_bh);
+
+int submit_bh(int rw, struct buffer_head *bh)
+{
+	return _submit_bh(rw, bh, 0);
+}
 EXPORT_SYMBOL(submit_bh);
 
 /**
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index fb5120a5505c..3dc48cc8b6eb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2067,7 +2067,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
-	sb->s_flags |= MS_SNAP_STABLE;
 
 	return 0;
 
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 86b39b167c23..11bb11f48b3a 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -162,8 +162,17 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
 
 	for (i = 0; i < bufs; i++) {
 		wbuf[i]->b_end_io = end_buffer_write_sync;
-		/* We use-up our safety reference in submit_bh() */
-		submit_bh(write_op, wbuf[i]);
+		/*
+		 * Here we write back pagecache data that may be mmaped. Since
+		 * we cannot afford to clean the page and set PageWriteback
+		 * here due to lock ordering (page lock ranks above transaction
+		 * start), the data can change while IO is in flight. Tell the
+		 * block layer it should bounce the bio pages if stable data
+		 * during write is required.
+		 *
+		 * We use up our safety reference in submit_bh().
+		 */
+		_submit_bh(write_op, wbuf[i], 1 << BIO_SNAP_STABLE);
 	}
 }
 
@@ -667,7 +676,17 @@ start_journal_io:
 				clear_buffer_dirty(bh);
 				set_buffer_uptodate(bh);
 				bh->b_end_io = journal_end_buffer_io_sync;
-				submit_bh(write_op, bh);
+				/*
+				 * In data=journal mode, here we can end up
+				 * writing pagecache data that might be
+				 * mmapped. Since we can't afford to clean the
+				 * page and set PageWriteback (see the comment
+				 * near the other use of _submit_bh()), the
+				 * data can change while the write is in
+				 * flight.  Tell the block layer to bounce the
+				 * bio pages if stable pages are required.
+				 */
+				_submit_bh(write_op, bh, 1 << BIO_SNAP_STABLE);
 			}
 			cond_resched();
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cdf11191e645..22990cf4439d 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -111,12 +111,13 @@ struct bio {
 #define BIO_FS_INTEGRITY 9	/* fs owns integrity data, not block layer */
 #define BIO_QUIET	10	/* Make BIO Quiet */
 #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
+#define BIO_SNAP_STABLE	12	/* bio data must be snapshotted during write */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
  * BIO_POOL_IDX()
  */
-#define BIO_RESET_BITS	12
+#define BIO_RESET_BITS	13
 
 #define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
 
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5afc4f94d110..4c16c4a88d47 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -181,6 +181,7 @@ void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
 int __sync_dirty_buffer(struct buffer_head *bh, int rw);
 void write_dirty_buffer(struct buffer_head *bh, int rw);
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags);
 int submit_bh(int, struct buffer_head *);
 void write_boundary_block(struct block_device *bdev,
 			sector_t bblock, unsigned blocksize);
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index c7fc1e6517c3..a4ed56cf0eac 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -88,7 +88,6 @@ struct inodes_stat_t {
 #define MS_STRICTATIME	(1<<24) /* Always perform atime updates */
 
 /* These sb flags are internal to the kernel */
-#define MS_SNAP_STABLE	(1<<27) /* Snapshot pages during writeback, if needed */
 #define MS_NOSEC	(1<<28)
 #define MS_BORN		(1<<29)
 #define MS_ACTIVE	(1<<30)
diff --git a/mm/bounce.c b/mm/bounce.c
index 5f8901768602..a5c2ec3589cb 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -181,32 +181,13 @@ static void bounce_end_io_read_isa(struct bio *bio, int err)
 #ifdef CONFIG_NEED_BOUNCE_POOL
 static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
 {
-	struct page *page;
-	struct backing_dev_info *bdi;
-	struct address_space *mapping;
-	struct bio_vec *from;
-	int i;
-
 	if (bio_data_dir(bio) != WRITE)
 		return 0;
 
 	if (!bdi_cap_stable_pages_required(&q->backing_dev_info))
 		return 0;
 
-	/*
-	 * Based on the first page that has a valid mapping, decide whether or
-	 * not we have to employ bounce buffering to guarantee stable pages.
-	 */
-	bio_for_each_segment(from, bio, i) {
-		page = from->bv_page;
-		mapping = page_mapping(page);
-		if (!mapping)
-			continue;
-		bdi = mapping->backing_dev_info;
-		return mapping->host->i_sb->s_flags & MS_SNAP_STABLE;
-	}
-
-	return 0;
+	return test_bit(BIO_SNAP_STABLE, &bio->bi_flags);
 }
 #else
 static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index efe68148f621..4514ad7415c3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2311,10 +2311,6 @@ void wait_for_stable_page(struct page *page)
 
 	if (!bdi_cap_stable_pages_required(bdi))
 		return;
-#ifdef CONFIG_NEED_BOUNCE_POOL
-	if (mapping->host->i_sb->s_flags & MS_SNAP_STABLE)
-		return;
-#endif /* CONFIG_NEED_BOUNCE_POOL */
 
 	wait_on_page_writeback(page);
 }
-- 
cgit 


From 46e959ea2969cc1668d09b0dc55226946cf781f1 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Fri, 3 May 2013 14:03:50 -0400
Subject: audit: add an option to control logging of passwords with
 pam_tty_audit

Most commands are entered one line at a time and processed as complete lines
in non-canonical mode.  Commands that interactively require a password, enter
canonical mode to do this while shutting off echo.  This pair of features
(icanon and !echo) can be used to avoid logging passwords by audit while still
logging the rest of the command.

Adding a member (log_passwd) to the struct audit_tty_status passed in by
pam_tty_audit allows control of canonical mode without echo per task.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 drivers/tty/tty_audit.c    |  9 +++++++++
 include/linux/sched.h      |  1 +
 include/uapi/linux/audit.h |  3 ++-
 kernel/audit.c             | 16 ++++++++++------
 4 files changed, 22 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 755d418019c8..5f3868202183 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -138,6 +138,7 @@ void tty_audit_fork(struct signal_struct *sig)
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	sig->audit_tty = current->signal->audit_tty;
+	sig->audit_tty_log_passwd = current->signal->audit_tty_log_passwd;
 	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 }
 
@@ -275,10 +276,18 @@ void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
 {
 	struct tty_audit_buf *buf;
 	int major, minor;
+	int audit_log_tty_passwd;
+	unsigned long flags;
 
 	if (unlikely(size == 0))
 		return;
 
+	spin_lock_irqsave(&current->sighand->siglock, flags);
+	audit_log_tty_passwd = current->signal->audit_tty_log_passwd;
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
+	if (!audit_log_tty_passwd && icanon && !L_ECHO(tty))
+		return;
+
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY
 	    && tty->driver->subtype == PTY_TYPE_MASTER)
 		return;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d2112477ff5e..c4689fe92864 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -641,6 +641,7 @@ struct signal_struct {
 #endif
 #ifdef CONFIG_AUDIT
 	unsigned audit_tty;
+	unsigned audit_tty_log_passwd;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
 #ifdef CONFIG_CGROUPS
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 9f096f1c0907..c058c24b97ac 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -369,7 +369,8 @@ struct audit_status {
 };
 
 struct audit_tty_status {
-	__u32		enabled; /* 1 = enabled, 0 = disabled */
+	__u32		enabled;	/* 1 = enabled, 0 = disabled */
+	__u32		log_passwd;	/* 1 = enabled, 0 = disabled */
 };
 
 /* audit_rule_data supports filter rules with both integer and string
diff --git a/kernel/audit.c b/kernel/audit.c
index 241aa8593fa8..998a0d4155cf 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -49,6 +49,7 @@
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/kthread.h>
+#include <linux/kernel.h>
 
 #include <linux/audit.h>
 
@@ -808,6 +809,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 		spin_lock_irqsave(&tsk->sighand->siglock, flags);
 		s.enabled = tsk->signal->audit_tty != 0;
+		s.log_passwd = tsk->signal->audit_tty_log_passwd;
 		spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
 
 		audit_send_reply(NETLINK_CB(skb).portid, seq,
@@ -815,18 +817,20 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		break;
 	}
 	case AUDIT_TTY_SET: {
-		struct audit_tty_status *s;
+		struct audit_tty_status s;
 		struct task_struct *tsk = current;
 		unsigned long flags;
 
-		if (nlh->nlmsg_len < sizeof(struct audit_tty_status))
-			return -EINVAL;
-		s = data;
-		if (s->enabled != 0 && s->enabled != 1)
+		memset(&s, 0, sizeof(s));
+		/* guard against past and future API changes */
+		memcpy(&s, data, min(sizeof(s), (size_t)nlh->nlmsg_len));
+		if ((s.enabled != 0 && s.enabled != 1) ||
+		    (s.log_passwd != 0 && s.log_passwd != 1))
 			return -EINVAL;
 
 		spin_lock_irqsave(&tsk->sighand->siglock, flags);
-		tsk->signal->audit_tty = s->enabled != 0;
+		tsk->signal->audit_tty = s.enabled;
+		tsk->signal->audit_tty_log_passwd = s.log_passwd;
 		spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
 		break;
 	}
-- 
cgit 


From 84c751bd4aebbaae995fe32279d3dba48327bad4 Mon Sep 17 00:00:00 2001
From: Andrey Vagin <avagin@openvz.org>
Date: Tue, 30 Apr 2013 15:27:59 -0700
Subject: ptrace: add ability to retrieve signals without removing from a queue
 (v4)

This patch adds a new ptrace request PTRACE_PEEKSIGINFO.

This request is used to retrieve information about pending signals
starting with the specified sequence number.  Siginfo_t structures are
copied from the child into the buffer starting at "data".

The argument "addr" is a pointer to struct ptrace_peeksiginfo_args.
struct ptrace_peeksiginfo_args {
	u64 off;	/* from which siginfo to start */
	u32 flags;
	s32 nr;		/* how may siginfos to take */
};

"nr" has type "s32", because ptrace() returns "long", which has 32 bits on
i386 and a negative values is used for errors.

Currently here is only one flag PTRACE_PEEKSIGINFO_SHARED for dumping
signals from process-wide queue.  If this flag is not set, signals are
read from a per-thread queue.

The request PTRACE_PEEKSIGINFO returns a number of dumped signals.  If a
signal with the specified sequence number doesn't exist, ptrace returns
zero.  The request returns an error, if no signal has been dumped.

Errors:
EINVAL - one or more specified flags are not supported or nr is negative
EFAULT - buf or addr is outside your accessible address space.

A result siginfo contains a kernel part of si_code which usually striped,
but it's required for queuing the same siginfo back during restore of
pending signals.

This functionality is required for checkpointing pending signals.  Pedro
Alves suggested using it in "gdb" to peek at pending signals.  gdb already
uses PTRACE_GETSIGINFO to get the siginfo for the signal which was already
dequeued.  This functionality allows gdb to look at the pending signals
which were not reported yet.

The prototype of this code was developed by Oleg Nesterov.

Signed-off-by: Andrew Vagin <avagin@openvz.org>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Cc: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pedro Alves <palves@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/ptrace.h | 12 +++++++
 kernel/ptrace.c             | 80 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 022ab186a812..52ebcc89f306 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -5,6 +5,7 @@
 
 /* has the defines to get at the registers. */
 
+#include <linux/types.h>
 
 #define PTRACE_TRACEME		   0
 #define PTRACE_PEEKTEXT		   1
@@ -52,6 +53,17 @@
 #define PTRACE_INTERRUPT	0x4207
 #define PTRACE_LISTEN		0x4208
 
+#define PTRACE_PEEKSIGINFO	0x4209
+
+struct ptrace_peeksiginfo_args {
+	__u64 off;	/* from which siginfo to start */
+	__u32 flags;
+	__s32 nr;	/* how may siginfos to take */
+};
+
+/* Read signals from a shared (process wide) queue */
+#define PTRACE_PEEKSIGINFO_SHARED	(1 << 0)
+
 /* Wait extended result codes for the above trace options.  */
 #define PTRACE_EVENT_FORK	1
 #define PTRACE_EVENT_VFORK	2
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index acbd28424d81..17ae54da0ec2 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -24,6 +24,7 @@
 #include <linux/regset.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/cn_proc.h>
+#include <linux/compat.h>
 
 
 static int ptrace_trapping_sleep_fn(void *flags)
@@ -618,6 +619,81 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
 	return error;
 }
 
+static int ptrace_peek_siginfo(struct task_struct *child,
+				unsigned long addr,
+				unsigned long data)
+{
+	struct ptrace_peeksiginfo_args arg;
+	struct sigpending *pending;
+	struct sigqueue *q;
+	int ret, i;
+
+	ret = copy_from_user(&arg, (void __user *) addr,
+				sizeof(struct ptrace_peeksiginfo_args));
+	if (ret)
+		return -EFAULT;
+
+	if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED)
+		return -EINVAL; /* unknown flags */
+
+	if (arg.nr < 0)
+		return -EINVAL;
+
+	if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
+		pending = &child->signal->shared_pending;
+	else
+		pending = &child->pending;
+
+	for (i = 0; i < arg.nr; ) {
+		siginfo_t info;
+		s32 off = arg.off + i;
+
+		spin_lock_irq(&child->sighand->siglock);
+		list_for_each_entry(q, &pending->list, list) {
+			if (!off--) {
+				copy_siginfo(&info, &q->info);
+				break;
+			}
+		}
+		spin_unlock_irq(&child->sighand->siglock);
+
+		if (off >= 0) /* beyond the end of the list */
+			break;
+
+#ifdef CONFIG_COMPAT
+		if (unlikely(is_compat_task())) {
+			compat_siginfo_t __user *uinfo = compat_ptr(data);
+
+			ret = copy_siginfo_to_user32(uinfo, &info);
+			ret |= __put_user(info.si_code, &uinfo->si_code);
+		} else
+#endif
+		{
+			siginfo_t __user *uinfo = (siginfo_t __user *) data;
+
+			ret = copy_siginfo_to_user(uinfo, &info);
+			ret |= __put_user(info.si_code, &uinfo->si_code);
+		}
+
+		if (ret) {
+			ret = -EFAULT;
+			break;
+		}
+
+		data += sizeof(siginfo_t);
+		i++;
+
+		if (signal_pending(current))
+			break;
+
+		cond_resched();
+	}
+
+	if (i > 0)
+		return i;
+
+	return ret;
+}
 
 #ifdef PTRACE_SINGLESTEP
 #define is_singlestep(request)		((request) == PTRACE_SINGLESTEP)
@@ -748,6 +824,10 @@ int ptrace_request(struct task_struct *child, long request,
 		ret = put_user(child->ptrace_message, datalp);
 		break;
 
+	case PTRACE_PEEKSIGINFO:
+		ret = ptrace_peek_siginfo(child, addr, data);
+		break;
+
 	case PTRACE_GETSIGINFO:
 		ret = ptrace_getsiginfo(child, &siginfo);
 		if (!ret)
-- 
cgit 


From 60b9df7a54804a965850db00beec4d3a2c002536 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 1 May 2013 14:37:21 +0200
Subject: fuse: add flag to turn on async direct IO

Without async DIO write requests to a single file were always serialized.
With async DIO that's no longer the case.

So don't turn on async DIO by default for fear of breaking backward
compatibility.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c            | 8 ++++----
 fs/fuse/fuse_i.h          | 3 +++
 fs/fuse/inode.c           | 4 +++-
 include/uapi/linux/fuse.h | 7 ++++++-
 4 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 1f8e3d60dc07..6ab7ca43f9e0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2371,14 +2371,14 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
 {
 	ssize_t ret = 0;
-	struct file *file = NULL;
+	struct file *file = iocb->ki_filp;
+	struct fuse_file *ff = file->private_data;
 	loff_t pos = 0;
 	struct inode *inode;
 	loff_t i_size;
 	size_t count = iov_length(iov, nr_segs);
 	struct fuse_io_priv *io;
 
-	file = iocb->ki_filp;
 	pos = offset;
 	inode = file->f_mapping->host;
 	i_size = i_size_read(inode);
@@ -2403,9 +2403,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	io->file = file;
 	/*
 	 * By default, we want to optimize all I/Os with async request
-	 * submission to the client filesystem.
+	 * submission to the client filesystem if supported.
 	 */
-	io->async = 1;
+	io->async = ff->fc->async_dio;
 	io->iocb = iocb;
 
 	/*
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 53b830e3b38f..fde7249a3a96 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -541,6 +541,9 @@ struct fuse_conn {
 	/** Does the filesystem want adaptive readdirplus? */
 	unsigned readdirplus_auto:1;
 
+	/** Does the filesystem support asynchronous direct-IO submission? */
+	unsigned async_dio:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 17ec1f70524d..6201f81e4d3a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -871,6 +871,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 				fc->do_readdirplus = 1;
 			if (arg->flags & FUSE_READDIRPLUS_AUTO)
 				fc->readdirplus_auto = 1;
+			if (arg->flags & FUSE_ASYNC_DIO)
+				fc->async_dio = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
@@ -898,7 +900,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
 		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
 		FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
-		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO;
+		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 706d035fa748..60bb2f9f7b74 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -90,6 +90,9 @@
  * 7.21
  *  - add FUSE_READDIRPLUS
  *  - send the requested events in POLL request
+ *
+ * 7.22
+ *  - add FUSE_ASYNC_DIO
  */
 
 #ifndef _LINUX_FUSE_H
@@ -125,7 +128,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 21
+#define FUSE_KERNEL_MINOR_VERSION 22
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -215,6 +218,7 @@ struct fuse_file_lock {
  * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
  * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
  * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -231,6 +235,7 @@ struct fuse_file_lock {
 #define FUSE_AUTO_INVAL_DATA	(1 << 12)
 #define FUSE_DO_READDIRPLUS	(1 << 13)
 #define FUSE_READDIRPLUS_AUTO	(1 << 14)
+#define FUSE_ASYNC_DIO		(1 << 15)
 
 /**
  * CUSE INIT request/reply flags
-- 
cgit 


From 5012a3a384ad917b5c72a918607bd0cc64452ff8 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 2 May 2013 03:50:34 +0300
Subject: tcm_vhost: header split up

move uapi parts to vhost.h
move .c private parts to .c itself

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Asias He <asias@redhat.com>
Acked-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/vhost/scsi.c       | 112 +++++++++++++++++++++++++++++++++++---
 drivers/vhost/tcm_vhost.h  | 131 ---------------------------------------------
 include/uapi/linux/vhost.h |  28 ++++++++++
 3 files changed, 132 insertions(+), 139 deletions(-)
 delete mode 100644 drivers/vhost/tcm_vhost.h

(limited to 'include/uapi/linux')

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 19ca021bf88c..eb1aa56dced8 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -51,7 +51,110 @@
 
 #include "vhost.c"
 #include "vhost.h"
-#include "tcm_vhost.h"
+
+#define TCM_VHOST_VERSION  "v0.1"
+#define TCM_VHOST_NAMELEN 256
+#define TCM_VHOST_MAX_CDB_SIZE 32
+
+struct vhost_scsi_inflight {
+	/* Wait for the flush operation to finish */
+	struct completion comp;
+	/* Refcount for the inflight reqs */
+	struct kref kref;
+};
+
+struct tcm_vhost_cmd {
+	/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
+	int tvc_vq_desc;
+	/* virtio-scsi initiator task attribute */
+	int tvc_task_attr;
+	/* virtio-scsi initiator data direction */
+	enum dma_data_direction tvc_data_direction;
+	/* Expected data transfer length from virtio-scsi header */
+	u32 tvc_exp_data_len;
+	/* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
+	u64 tvc_tag;
+	/* The number of scatterlists associated with this cmd */
+	u32 tvc_sgl_count;
+	/* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
+	u32 tvc_lun;
+	/* Pointer to the SGL formatted memory from virtio-scsi */
+	struct scatterlist *tvc_sgl;
+	/* Pointer to response */
+	struct virtio_scsi_cmd_resp __user *tvc_resp;
+	/* Pointer to vhost_scsi for our device */
+	struct vhost_scsi *tvc_vhost;
+	/* Pointer to vhost_virtqueue for the cmd */
+	struct vhost_virtqueue *tvc_vq;
+	/* Pointer to vhost nexus memory */
+	struct tcm_vhost_nexus *tvc_nexus;
+	/* The TCM I/O descriptor that is accessed via container_of() */
+	struct se_cmd tvc_se_cmd;
+	/* work item used for cmwq dispatch to tcm_vhost_submission_work() */
+	struct work_struct work;
+	/* Copy of the incoming SCSI command descriptor block (CDB) */
+	unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
+	/* Sense buffer that will be mapped into outgoing status */
+	unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
+	/* Completed commands list, serviced from vhost worker thread */
+	struct llist_node tvc_completion_list;
+	/* Used to track inflight cmd */
+	struct vhost_scsi_inflight *inflight;
+};
+
+struct tcm_vhost_nexus {
+	/* Pointer to TCM session for I_T Nexus */
+	struct se_session *tvn_se_sess;
+};
+
+struct tcm_vhost_nacl {
+	/* Binary World Wide unique Port Name for Vhost Initiator port */
+	u64 iport_wwpn;
+	/* ASCII formatted WWPN for Sas Initiator port */
+	char iport_name[TCM_VHOST_NAMELEN];
+	/* Returned by tcm_vhost_make_nodeacl() */
+	struct se_node_acl se_node_acl;
+};
+
+struct vhost_scsi;
+struct tcm_vhost_tpg {
+	/* Vhost port target portal group tag for TCM */
+	u16 tport_tpgt;
+	/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
+	int tv_tpg_port_count;
+	/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
+	int tv_tpg_vhost_count;
+	/* list for tcm_vhost_list */
+	struct list_head tv_tpg_list;
+	/* Used to protect access for tpg_nexus */
+	struct mutex tv_tpg_mutex;
+	/* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
+	struct tcm_vhost_nexus *tpg_nexus;
+	/* Pointer back to tcm_vhost_tport */
+	struct tcm_vhost_tport *tport;
+	/* Returned by tcm_vhost_make_tpg() */
+	struct se_portal_group se_tpg;
+	/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
+	struct vhost_scsi *vhost_scsi;
+};
+
+struct tcm_vhost_tport {
+	/* SCSI protocol the tport is providing */
+	u8 tport_proto_id;
+	/* Binary World Wide unique Port Name for Vhost Target port */
+	u64 tport_wwpn;
+	/* ASCII formatted WWPN for Vhost Target port */
+	char tport_name[TCM_VHOST_NAMELEN];
+	/* Returned by tcm_vhost_make_tport() */
+	struct se_wwn tport_wwn;
+};
+
+struct tcm_vhost_evt {
+	/* event to be sent to guest */
+	struct virtio_scsi_event event;
+	/* event list, serviced from vhost worker thread */
+	struct llist_node list;
+};
 
 enum {
 	VHOST_SCSI_VQ_CTL = 0,
@@ -73,13 +176,6 @@ enum {
 #define VHOST_SCSI_MAX_VQ	128
 #define VHOST_SCSI_MAX_EVENT	128
 
-struct vhost_scsi_inflight {
-	/* Wait for the flush operation to finish */
-	struct completion comp;
-	/* Refcount for the inflight reqs */
-	struct kref kref;
-};
-
 struct vhost_scsi_virtqueue {
 	struct vhost_virtqueue vq;
 	/*
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
deleted file mode 100644
index 26a57c2fdf92..000000000000
--- a/drivers/vhost/tcm_vhost.h
+++ /dev/null
@@ -1,131 +0,0 @@
-#define TCM_VHOST_VERSION  "v0.1"
-#define TCM_VHOST_NAMELEN 256
-#define TCM_VHOST_MAX_CDB_SIZE 32
-
-struct vhost_scsi_inflight;
-struct tcm_vhost_cmd {
-	/* Descriptor from vhost_get_vq_desc() for virt_queue segment */
-	int tvc_vq_desc;
-	/* virtio-scsi initiator task attribute */
-	int tvc_task_attr;
-	/* virtio-scsi initiator data direction */
-	enum dma_data_direction tvc_data_direction;
-	/* Expected data transfer length from virtio-scsi header */
-	u32 tvc_exp_data_len;
-	/* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
-	u64 tvc_tag;
-	/* The number of scatterlists associated with this cmd */
-	u32 tvc_sgl_count;
-	/* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
-	u32 tvc_lun;
-	/* Pointer to the SGL formatted memory from virtio-scsi */
-	struct scatterlist *tvc_sgl;
-	/* Pointer to response */
-	struct virtio_scsi_cmd_resp __user *tvc_resp;
-	/* Pointer to vhost_scsi for our device */
-	struct vhost_scsi *tvc_vhost;
-	/* Pointer to vhost_virtqueue for the cmd */
-	struct vhost_virtqueue *tvc_vq;
-	/* Pointer to vhost nexus memory */
-	struct tcm_vhost_nexus *tvc_nexus;
-	/* The TCM I/O descriptor that is accessed via container_of() */
-	struct se_cmd tvc_se_cmd;
-	/* work item used for cmwq dispatch to tcm_vhost_submission_work() */
-	struct work_struct work;
-	/* Copy of the incoming SCSI command descriptor block (CDB) */
-	unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
-	/* Sense buffer that will be mapped into outgoing status */
-	unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
-	/* Completed commands list, serviced from vhost worker thread */
-	struct llist_node tvc_completion_list;
-	/* Used to track inflight cmd */
-	struct vhost_scsi_inflight *inflight;
-};
-
-struct tcm_vhost_nexus {
-	/* Pointer to TCM session for I_T Nexus */
-	struct se_session *tvn_se_sess;
-};
-
-struct tcm_vhost_nacl {
-	/* Binary World Wide unique Port Name for Vhost Initiator port */
-	u64 iport_wwpn;
-	/* ASCII formatted WWPN for Sas Initiator port */
-	char iport_name[TCM_VHOST_NAMELEN];
-	/* Returned by tcm_vhost_make_nodeacl() */
-	struct se_node_acl se_node_acl;
-};
-
-struct vhost_scsi;
-struct tcm_vhost_tpg {
-	/* Vhost port target portal group tag for TCM */
-	u16 tport_tpgt;
-	/* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
-	int tv_tpg_port_count;
-	/* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
-	int tv_tpg_vhost_count;
-	/* list for tcm_vhost_list */
-	struct list_head tv_tpg_list;
-	/* Used to protect access for tpg_nexus */
-	struct mutex tv_tpg_mutex;
-	/* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
-	struct tcm_vhost_nexus *tpg_nexus;
-	/* Pointer back to tcm_vhost_tport */
-	struct tcm_vhost_tport *tport;
-	/* Returned by tcm_vhost_make_tpg() */
-	struct se_portal_group se_tpg;
-	/* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
-	struct vhost_scsi *vhost_scsi;
-};
-
-struct tcm_vhost_tport {
-	/* SCSI protocol the tport is providing */
-	u8 tport_proto_id;
-	/* Binary World Wide unique Port Name for Vhost Target port */
-	u64 tport_wwpn;
-	/* ASCII formatted WWPN for Vhost Target port */
-	char tport_name[TCM_VHOST_NAMELEN];
-	/* Returned by tcm_vhost_make_tport() */
-	struct se_wwn tport_wwn;
-};
-
-struct tcm_vhost_evt {
-	/* event to be sent to guest */
-	struct virtio_scsi_event event;
-	/* event list, serviced from vhost worker thread */
-	struct llist_node list;
-};
-
-/*
- * As per request from MST, keep TCM_VHOST related ioctl defines out of
- * linux/vhost.h (user-space) for now..
- */
-
-#include <linux/vhost.h>
-
-/*
- * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
- *
- * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
- *            RFC-v2 vhost-scsi userspace.  Add GET_ABI_VERSION ioctl usage
- * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
- *            All the targets under vhost_wwpn can be seen and used by guset.
- */
-
-#define VHOST_SCSI_ABI_VERSION	1
-
-struct vhost_scsi_target {
-	int abi_version;
-	char vhost_wwpn[TRANSPORT_IQN_LEN];
-	unsigned short vhost_tpgt;
-	unsigned short reserved;
-};
-
-/* VHOST_SCSI specific defines */
-#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
-#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
-/* Changing this breaks userspace. */
-#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
-/* Set and get the events missed flag */
-#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
-#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index e847f1e30756..bb6a5b4cb3c5 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -127,4 +127,32 @@ struct vhost_memory {
 /* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
 #define VHOST_NET_F_VIRTIO_NET_HDR 27
 
+/* VHOST_SCSI specific definitions */
+
+/*
+ * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
+ *
+ * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
+ *            RFC-v2 vhost-scsi userspace.  Add GET_ABI_VERSION ioctl usage
+ * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
+ *            All the targets under vhost_wwpn can be seen and used by guset.
+ */
+
+#define VHOST_SCSI_ABI_VERSION	1
+
+struct vhost_scsi_target {
+	int abi_version;
+	char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */
+	unsigned short vhost_tpgt;
+	unsigned short reserved;
+};
+
+#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
+#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
+/* Changing this breaks userspace. */
+#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
+/* Set and get the events missed flag */
+#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
+#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
+
 #endif
-- 
cgit 


From 5975a2e0950291a6bfe9fd5880e7952ff87764be Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 27 Apr 2013 00:28:37 +0000
Subject: KVM: PPC: Book3S: Add API for in-kernel XICS emulation

This adds the API for userspace to instantiate an XICS device in a VM
and connect VCPUs to it.  The API consists of a new device type for
the KVM_CREATE_DEVICE ioctl, a new capability KVM_CAP_IRQ_XICS, which
functions similarly to KVM_CAP_IRQ_MPIC, and the KVM_IRQ_LINE ioctl,
which is used to assert and deassert interrupt inputs of the XICS.

The XICS device has one attribute group, KVM_DEV_XICS_GRP_SOURCES.
Each attribute within this group corresponds to the state of one
interrupt source.  The attribute number is the same as the interrupt
source number.

This does not support irq routing or irqfd yet.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt          |   8 ++
 Documentation/virtual/kvm/devices/xics.txt |  66 ++++++++++
 arch/powerpc/include/asm/kvm_ppc.h         |   2 +
 arch/powerpc/include/uapi/asm/kvm.h        |  12 ++
 arch/powerpc/kvm/book3s_xics.c             | 190 +++++++++++++++++++++++++----
 arch/powerpc/kvm/book3s_xics.h             |   1 +
 arch/powerpc/kvm/irq.h                     |   3 +
 arch/powerpc/kvm/powerpc.c                 |  22 ++++
 include/linux/kvm_host.h                   |   1 +
 include/uapi/linux/kvm.h                   |   2 +
 virt/kvm/kvm_main.c                        |   5 +
 11 files changed, 287 insertions(+), 25 deletions(-)
 create mode 100644 Documentation/virtual/kvm/devices/xics.txt

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index c09d1832e935..03492f95ed39 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2772,3 +2772,11 @@ Parameters: args[0] is the MPIC device fd
             args[1] is the MPIC CPU number for this vcpu
 
 This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Parameters: args[0] is the XICS device fd
+            args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.
diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt
new file mode 100644
index 000000000000..42864935ac5d
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+  KVM_DEV_XICS_SOURCES
+  Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR.  The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called "servers",
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct.  Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu.  The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+  Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+  Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+  Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+  Zero is the highest priority, meaning no interrupts can be
+  delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number.  The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+  This specifies where the interrupt should be sent, and is the
+  interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+  This is the priority specified for this interrupt source, where 0 is
+  the highest priority and 255 is the lowest.  An interrupt with a
+  priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+  This bit is 1 for a level-sensitive interrupt source, or 0 for
+  edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+  This bit is set to 1 if the interrupt is masked (cannot be delivered
+  regardless of its priority), for example by the ibm,int-off RTAS
+  call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+  This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index d7339df19259..a5287fe03d77 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -315,6 +315,8 @@ extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
 extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
 extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
 extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
+			struct kvm_vcpu *vcpu, u32 cpu);
 #else
 static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
 	{ return 0; }
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 427b9aca2a0f..0fb1a6e9ff90 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -499,4 +499,16 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_TLB3PS	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
 #define KVM_REG_PPC_EPTCFG	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
 
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_XICS_GRP_SOURCES	1	/* 64-bit source attributes */
+
+/* Layout of 64-bit source attribute values */
+#define  KVM_XICS_DESTINATION_SHIFT	0
+#define  KVM_XICS_DESTINATION_MASK	0xffffffffULL
+#define  KVM_XICS_PRIORITY_SHIFT	32
+#define  KVM_XICS_PRIORITY_MASK		0xff
+#define  KVM_XICS_LEVEL_SENSITIVE	(1ULL << 40)
+#define  KVM_XICS_MASKED		(1ULL << 41)
+#define  KVM_XICS_PENDING		(1ULL << 42)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index ee841ed8a690..f7a103756618 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -11,6 +11,7 @@
 #include <linux/kvm_host.h>
 #include <linux/err.h>
 #include <linux/gfp.h>
+#include <linux/anon_inodes.h>
 
 #include <asm/uaccess.h>
 #include <asm/kvm_book3s.h>
@@ -55,8 +56,6 @@
  *
  * - Make ICS lockless as well, or at least a per-interrupt lock or hashed
  *   locks array to improve scalability
- *
- * - ioctl's to save/restore the entire state for snapshot & migration
  */
 
 /* -- ICS routines -- */
@@ -64,7 +63,8 @@
 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
 			    u32 new_irq);
 
-static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
+static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level,
+			   bool report_status)
 {
 	struct ics_irq_state *state;
 	struct kvmppc_ics *ics;
@@ -81,6 +81,9 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
 	if (!state->exists)
 		return -EINVAL;
 
+	if (report_status)
+		return state->asserted;
+
 	/*
 	 * We set state->asserted locklessly. This should be fine as
 	 * we are the only setter, thus concurrent access is undefined
@@ -96,7 +99,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
 	/* Attempt delivery */
 	icp_deliver_irq(xics, NULL, irq);
 
-	return 0;
+	return state->asserted;
 }
 
 static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -891,8 +894,8 @@ static void xics_debugfs_init(struct kvmppc_xics *xics)
 	kfree(name);
 }
 
-struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
-					  struct kvmppc_xics *xics, int irq)
+static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
+					struct kvmppc_xics *xics, int irq)
 {
 	struct kvmppc_ics *ics;
 	int i, icsid;
@@ -1044,34 +1047,138 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
 	return 0;
 }
 
-/* -- ioctls -- */
+static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
+{
+	int ret;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val, prio;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics)
+		return -ENOENT;
 
-int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
+	irqp = &ics->irq_state[idx];
+	mutex_lock(&ics->lock);
+	ret = -ENOENT;
+	if (irqp->exists) {
+		val = irqp->server;
+		prio = irqp->priority;
+		if (prio == MASKED) {
+			val |= KVM_XICS_MASKED;
+			prio = irqp->saved_priority;
+		}
+		val |= prio << KVM_XICS_PRIORITY_SHIFT;
+		if (irqp->asserted)
+			val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
+		else if (irqp->masked_pending || irqp->resend)
+			val |= KVM_XICS_PENDING;
+		ret = 0;
+	}
+	mutex_unlock(&ics->lock);
+
+	if (!ret && put_user(val, ubufp))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
 {
-	struct kvmppc_xics *xics;
-	int r;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *irqp;
+	u64 __user *ubufp = (u64 __user *) addr;
+	u16 idx;
+	u64 val;
+	u8 prio;
+	u32 server;
+
+	if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+		return -ENOENT;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &idx);
+	if (!ics) {
+		ics = kvmppc_xics_create_ics(xics->kvm, xics, irq);
+		if (!ics)
+			return -ENOMEM;
+	}
+	irqp = &ics->irq_state[idx];
+	if (get_user(val, ubufp))
+		return -EFAULT;
+
+	server = val & KVM_XICS_DESTINATION_MASK;
+	prio = val >> KVM_XICS_PRIORITY_SHIFT;
+	if (prio != MASKED &&
+	    kvmppc_xics_find_server(xics->kvm, server) == NULL)
+		return -EINVAL;
 
-	/* locking against multiple callers? */
+	mutex_lock(&ics->lock);
+	irqp->server = server;
+	irqp->saved_priority = prio;
+	if (val & KVM_XICS_MASKED)
+		prio = MASKED;
+	irqp->priority = prio;
+	irqp->resend = 0;
+	irqp->masked_pending = 0;
+	irqp->asserted = 0;
+	if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
+		irqp->asserted = 1;
+	irqp->exists = 1;
+	mutex_unlock(&ics->lock);
 
-	xics = kvm->arch.xics;
-	if (!xics)
-		return -ENODEV;
+	if (val & KVM_XICS_PENDING)
+		icp_deliver_irq(xics, NULL, irqp->number);
 
-	switch (args->level) {
-	case KVM_INTERRUPT_SET:
-	case KVM_INTERRUPT_SET_LEVEL:
-	case KVM_INTERRUPT_UNSET:
-		r = ics_deliver_irq(xics, args->irq, args->level);
-		break;
-	default:
-		r = -EINVAL;
+	return 0;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+		bool line_status)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+
+	return ics_deliver_irq(xics, irq, level, line_status);
+}
+
+static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xics_set_source(xics, attr->attr, attr->addr);
 	}
+	return -ENXIO;
+}
 
-	return r;
+static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+	struct kvmppc_xics *xics = dev->private;
+
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		return xics_get_source(xics, attr->attr, attr->addr);
+	}
+	return -ENXIO;
 }
 
-void kvmppc_xics_free(struct kvmppc_xics *xics)
+static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 {
+	switch (attr->group) {
+	case KVM_DEV_XICS_GRP_SOURCES:
+		if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+		    attr->attr < KVMPPC_XICS_NR_IRQS)
+			return 0;
+		break;
+	}
+	return -ENXIO;
+}
+
+static void kvmppc_xics_free(struct kvm_device *dev)
+{
+	struct kvmppc_xics *xics = dev->private;
 	int i;
 	struct kvm *kvm = xics->kvm;
 
@@ -1083,17 +1190,21 @@ void kvmppc_xics_free(struct kvmppc_xics *xics)
 	for (i = 0; i <= xics->max_icsid; i++)
 		kfree(xics->ics[i]);
 	kfree(xics);
+	kfree(dev);
 }
 
-int kvm_xics_create(struct kvm *kvm, u32 type)
+static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
 {
 	struct kvmppc_xics *xics;
+	struct kvm *kvm = dev->kvm;
 	int ret = 0;
 
 	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
 	if (!xics)
 		return -ENOMEM;
 
+	dev->private = xics;
+	xics->dev = dev;
 	xics->kvm = kvm;
 
 	/* Already there ? */
@@ -1120,6 +1231,35 @@ int kvm_xics_create(struct kvm *kvm, u32 type)
 	return 0;
 }
 
+struct kvm_device_ops kvm_xics_ops = {
+	.name = "kvm-xics",
+	.create = kvmppc_xics_create,
+	.destroy = kvmppc_xics_free,
+	.set_attr = xics_set_attr,
+	.get_attr = xics_get_attr,
+	.has_attr = xics_has_attr,
+};
+
+int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
+			     u32 xcpu)
+{
+	struct kvmppc_xics *xics = dev->private;
+	int r = -EBUSY;
+
+	if (dev->ops != &kvm_xics_ops)
+		return -EPERM;
+	if (xics->kvm != vcpu->kvm)
+		return -EPERM;
+	if (vcpu->arch.irq_type)
+		return -EBUSY;
+
+	r = kvmppc_xics_create_icp(vcpu, xcpu);
+	if (!r)
+		vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+
+	return r;
+}
+
 void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu->arch.icp)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e4fdec3dde77..dd9326c5c19b 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -88,6 +88,7 @@ struct kvmppc_ics {
 
 struct kvmppc_xics {
 	struct kvm *kvm;
+	struct kvm_device *dev;
 	struct dentry *dentry;
 	u32 max_icsid;
 	bool real_mode;
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
index f1e27fdc8c2e..5a9a10b90762 100644
--- a/arch/powerpc/kvm/irq.h
+++ b/arch/powerpc/kvm/irq.h
@@ -9,6 +9,9 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 
 #ifdef CONFIG_KVM_MPIC
 	ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+	ret = ret || (kvm->arch.xics != NULL);
 #endif
 	smp_rmb();
 	return ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 270773f0d955..6316ee336e88 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -342,6 +342,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_SPAPR_TCE:
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_RTAS:
+#ifdef CONFIG_KVM_XICS
+	case KVM_CAP_IRQ_XICS:
+#endif
 		r = 1;
 		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
@@ -837,6 +840,25 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		break;
 	}
 #endif
+#ifdef CONFIG_KVM_XICS
+	case KVM_CAP_IRQ_XICS: {
+		struct file *filp;
+		struct kvm_device *dev;
+
+		r = -EBADF;
+		filp = fget(cap->args[0]);
+		if (!filp)
+			break;
+
+		r = -EPERM;
+		dev = kvm_device_from_filp(filp);
+		if (dev)
+			r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+
+		fput(filp);
+		break;
+	}
+#endif /* CONFIG_KVM_XICS */
 	default:
 		r = -EINVAL;
 		break;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 996661eb8e99..7823b6369c5e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1086,6 +1086,7 @@ void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 
 extern struct kvm_device_ops kvm_mpic_ops;
+extern struct kvm_device_ops kvm_xics_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 965e5b52dee0..a5c86fc34a37 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -665,6 +665,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_DEVICE_CTRL 89
 #define KVM_CAP_IRQ_MPIC 90
 #define KVM_CAP_PPC_RTAS 91
+#define KVM_CAP_IRQ_XICS 92
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -837,6 +838,7 @@ struct kvm_device_attr {
 
 #define KVM_DEV_TYPE_FSL_MPIC_20	1
 #define KVM_DEV_TYPE_FSL_MPIC_42	2
+#define KVM_DEV_TYPE_XICS		3
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5da9f02a2a67..e6e7abe16d05 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2246,6 +2246,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	case KVM_DEV_TYPE_FSL_MPIC_42:
 		ops = &kvm_mpic_ops;
 		break;
+#endif
+#ifdef CONFIG_KVM_XICS
+	case KVM_DEV_TYPE_XICS:
+		ops = &kvm_xics_ops;
+		break;
 #endif
 	default:
 		return -ENODEV;
-- 
cgit 


From 2b3b9bb03a9fb1e4c72947cc235771c6455ec7c9 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 27 Mar 2013 10:47:13 +0000
Subject: hostfs: move HOSTFS_SUPER_MAGIC to <linux/magic.h>

Move HOSTFS_SUPER_MAGIC to <linux/magic.h> to be with it's magical
friends from other file systems.

Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hostfs/hostfs_kern.c    | 3 +--
 include/uapi/linux/magic.h | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 95b9c87cb24a..f2372ef80850 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/magic.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -45,8 +46,6 @@ static const struct dentry_operations hostfs_dentry_ops = {
 static char *root_ino = "";
 static int append = 0;
 
-#define HOSTFS_SUPER_MAGIC 0x00c0ffee
-
 static const struct inode_operations hostfs_iops;
 static const struct inode_operations hostfs_dir_iops;
 static const struct inode_operations hostfs_link_iops;
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 249df3720be2..2944278a8ba7 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -30,6 +30,7 @@
 #define JFFS2_SUPER_MAGIC	0x72b6
 #define PSTOREFS_MAGIC		0x6165676C
 #define EFIVARFS_MAGIC		0xde5e81e4
+#define HOSTFS_SUPER_MAGIC	0x00c0ffee
 
 #define MINIX_SUPER_MAGIC	0x137F		/* minix v1 fs, 14 char names */
 #define MINIX_SUPER_MAGIC2	0x138F		/* minix v1 fs, 30 char names */
-- 
cgit 


From 77d21f23a1e4db8639e3916547c903a3b3c7a07c Mon Sep 17 00:00:00 2001
From: stephen hemminger <stephen@networkplumber.org>
Date: Fri, 3 May 2013 14:49:41 +0000
Subject: virtio: don't expose u16 in userspace api

Programs using virtio headers outside of kernel will no longer
build because u16 type does not exist in userspace. All user ABI
must use __u16 typedef instead.

Bug introduce by:
  commit 986a4f4d452dec004697f667439d27c3fda9c928
  Author: Jason Wang <jasowang@redhat.com>
  Date:   Fri Dec 7 07:04:56 2012 +0000

    virtio_net: multiqueue support

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/virtio_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index a5a8c88753b9..c520203fac2f 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -191,7 +191,7 @@ struct virtio_net_ctrl_mac {
  * specified.
  */
 struct virtio_net_ctrl_mq {
-	u16 virtqueue_pairs;
+	__u16 virtqueue_pairs;
 };
 
 #define VIRTIO_NET_CTRL_MQ   4
-- 
cgit 


From c2c71324ecb471c932bc1ff59e46ffcf82f274fc Mon Sep 17 00:00:00 2001
From: Stefan Behrens <sbehrens@giantdisaster.de>
Date: Wed, 10 Apr 2013 17:10:52 +0000
Subject: Btrfs: allow omitting stream header and end-cmd for btrfs send

Two new flags are added to allow omitting the stream header and the
end command for btrfs send streams. This is used in cases where you
send multiple snapshots back-to-back in one stream.

This used to be encoded like this (with 2 snapshots in this example):
<stream header> + <sequence of commands> + <end cmd> +
<stream header> + <sequence of commands> + <end cmd> + EOF

The new format (if the two new flags are used) is this one:
<stream header> + <sequence of commands> +
                  <sequence of commands> + <end cmd>

Note that the currently existing receivers treat <end cmd> only as
an indication that a new <stream header> is following. This means,
you can just skip the sequence <end cmd> <stream header> without
loosing compatibility. As long as an EOF is following, the currently
existing receivers handle the new format (if the two new flags are
used) exactly as the old one.

So what is the benefit of this change? The goal is to be able to use
a single stream (one TCP connection) to multiplex a request/response
handshake plus Btrfs send streams, all in the same stream. In this
case you cannot evaluate an EOF condition as an end of the Btrfs send
stream. You need something else, and the <end cmd> is just perfect
for this purpose.

The summary is:
The format change is driven by the need to send several Btrfs send
streams over a single TCP connections, with the ability for a repeated
request/response handshake in the middle. And this format change does
not break any existing tool, it is completely compatible.

You could compare the old behaviour of the Btrfs send stream to the
one of ftp where you need a seperate request/response channel and
newly opened data transfer channels for each file, while the new
behaviour is more like http using a single stream for everything.

Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/send.c            | 24 ++++++++++++++----------
 include/uapi/linux/btrfs.h | 20 +++++++++++++++++++-
 2 files changed, 33 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c85e7c6b4598..e0c69a106c77 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4529,9 +4529,11 @@ static int send_subvol(struct send_ctx *sctx)
 {
 	int ret;
 
-	ret = send_header(sctx);
-	if (ret < 0)
-		goto out;
+	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_STREAM_HEADER)) {
+		ret = send_header(sctx);
+		if (ret < 0)
+			goto out;
+	}
 
 	ret = send_subvol_begin(sctx);
 	if (ret < 0)
@@ -4593,7 +4595,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 		goto out;
 	}
 
-	if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) {
+	if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
 		ret = -EINVAL;
 		goto out;
 	}
@@ -4704,12 +4706,14 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
 	if (ret < 0)
 		goto out;
 
-	ret = begin_cmd(sctx, BTRFS_SEND_C_END);
-	if (ret < 0)
-		goto out;
-	ret = send_cmd(sctx);
-	if (ret < 0)
-		goto out;
+	if (!(sctx->flags & BTRFS_SEND_FLAG_OMIT_END_CMD)) {
+		ret = begin_cmd(sctx, BTRFS_SEND_C_END);
+		if (ret < 0)
+			goto out;
+		ret = send_cmd(sctx);
+		if (ret < 0)
+			goto out;
+	}
 
 out:
 	kfree(arg);
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index fa3a5f9338fc..5e39e859a848 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -412,7 +412,25 @@ struct btrfs_ioctl_received_subvol_args {
  * search of clone sources doesn't find an extent. UPDATE_EXTENT
  * commands will be sent instead of WRITE commands.
  */
-#define BTRFS_SEND_FLAG_NO_FILE_DATA     0x1
+#define BTRFS_SEND_FLAG_NO_FILE_DATA		0x1
+
+/*
+ * Do not add the leading stream header. Used when multiple snapshots
+ * are sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_STREAM_HEADER	0x2
+
+/*
+ * Omit the command at the end of the stream that indicated the end
+ * of the stream. This option is used when multiple snapshots are
+ * sent back to back.
+ */
+#define BTRFS_SEND_FLAG_OMIT_END_CMD		0x4
+
+#define BTRFS_SEND_FLAG_MASK \
+	(BTRFS_SEND_FLAG_NO_FILE_DATA | \
+	 BTRFS_SEND_FLAG_OMIT_STREAM_HEADER | \
+	 BTRFS_SEND_FLAG_OMIT_END_CMD)
 
 struct btrfs_ioctl_send_args {
 	__s64 send_fd;			/* in */
-- 
cgit 


From 2f2320360b0c35b86938bfc561124474f0dac6e4 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Thu, 25 Apr 2013 16:04:51 +0000
Subject: Btrfs: rescan for qgroups

If qgroup tracking is out of sync, a rescan operation can be started. It
iterates the complete extent tree and recalculates all qgroup tracking data.
This is an expensive operation and should not be used unless required.

A filesystem under rescan can still be umounted. The rescan continues on the
next mount.  Status information is provided with a separate ioctl while a
rescan operation is in progress.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/ctree.h           |  17 ++-
 fs/btrfs/disk-io.c         |   5 +
 fs/btrfs/ioctl.c           |  83 ++++++++++--
 fs/btrfs/qgroup.c          | 318 +++++++++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/btrfs.h |  12 +-
 5 files changed, 400 insertions(+), 35 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2c48f52aba40..d9bed5fd3347 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1021,9 +1021,9 @@ struct btrfs_block_group_item {
  */
 #define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
 /*
- * SCANNING is set during the initialization phase
+ * RESCAN is set during the initialization phase
  */
-#define BTRFS_QGROUP_STATUS_FLAG_SCANNING	(1ULL << 1)
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
 /*
  * Some qgroup entries are known to be out of date,
  * either because the configuration has changed in a way that
@@ -1052,7 +1052,7 @@ struct btrfs_qgroup_status_item {
 	 * only used during scanning to record the progress
 	 * of the scan. It contains a logical address
 	 */
-	__le64 scan;
+	__le64 rescan;
 } __attribute__ ((__packed__));
 
 struct btrfs_qgroup_info_item {
@@ -1603,6 +1603,11 @@ struct btrfs_fs_info {
 	/* used by btrfs_qgroup_record_ref for an efficient tree traversal */
 	u64 qgroup_seq;
 
+	/* qgroup rescan items */
+	struct mutex qgroup_rescan_lock; /* protects the progress item */
+	struct btrfs_key qgroup_rescan_progress;
+	struct btrfs_workers qgroup_rescan_workers;
+
 	/* filesystem state */
 	unsigned long fs_state;
 
@@ -2886,8 +2891,8 @@ BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
 		   version, 64);
 BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
 		   flags, 64);
-BTRFS_SETGET_FUNCS(qgroup_status_scan, struct btrfs_qgroup_status_item,
-		   scan, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+		   rescan, 64);
 
 /* btrfs_qgroup_info_item */
 BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
@@ -3828,7 +3833,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 		       struct btrfs_fs_info *fs_info);
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
 			struct btrfs_fs_info *fs_info);
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
 			      struct btrfs_fs_info *fs_info, u64 src, u64 dst);
 int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 92c44ed78de1..d96305e5cc93 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1976,6 +1976,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 	btrfs_stop_workers(&fs_info->caching_workers);
 	btrfs_stop_workers(&fs_info->readahead_workers);
 	btrfs_stop_workers(&fs_info->flush_workers);
+	btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
 }
 
 /* helper to cleanup tree roots */
@@ -2267,6 +2268,7 @@ int open_ctree(struct super_block *sb,
 	fs_info->qgroup_seq = 1;
 	fs_info->quota_enabled = 0;
 	fs_info->pending_quota_state = 0;
+	mutex_init(&fs_info->qgroup_rescan_lock);
 
 	btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
 	btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2476,6 +2478,8 @@ int open_ctree(struct super_block *sb,
 	btrfs_init_workers(&fs_info->readahead_workers, "readahead",
 			   fs_info->thread_pool_size,
 			   &fs_info->generic_worker);
+	btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
+			   &fs_info->generic_worker);
 
 	/*
 	 * endios are largely parallel and should have a very
@@ -2510,6 +2514,7 @@ int open_ctree(struct super_block *sb,
 	ret |= btrfs_start_workers(&fs_info->caching_workers);
 	ret |= btrfs_start_workers(&fs_info->readahead_workers);
 	ret |= btrfs_start_workers(&fs_info->flush_workers);
+	ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
 	if (ret) {
 		err = -ENOMEM;
 		goto fail_sb_buffer;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a74edc797531..f5f6af338b53 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3701,12 +3701,10 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	}
 
 	down_write(&root->fs_info->subvol_sem);
-	if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) {
-		trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
-		if (IS_ERR(trans)) {
-			ret = PTR_ERR(trans);
-			goto out;
-		}
+	trans = btrfs_start_transaction(root->fs_info->tree_root, 2);
+	if (IS_ERR(trans)) {
+		ret = PTR_ERR(trans);
+		goto out;
 	}
 
 	switch (sa->cmd) {
@@ -3716,9 +3714,6 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	case BTRFS_QUOTA_CTL_DISABLE:
 		ret = btrfs_quota_disable(trans, root->fs_info);
 		break;
-	case BTRFS_QUOTA_CTL_RESCAN:
-		ret = btrfs_quota_rescan(root->fs_info);
-		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -3727,11 +3722,9 @@ static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg)
 	if (copy_to_user(arg, sa, sizeof(*sa)))
 		ret = -EFAULT;
 
-	if (trans) {
-		err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
-		if (err && !ret)
-			ret = err;
-	}
+	err = btrfs_commit_transaction(trans, root->fs_info->tree_root);
+	if (err && !ret)
+		ret = err;
 out:
 	kfree(sa);
 	up_write(&root->fs_info->subvol_sem);
@@ -3886,6 +3879,64 @@ drop_write:
 	return ret;
 }
 
+static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
+{
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_ioctl_quota_rescan_args *qsa;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	ret = mnt_want_write_file(file);
+	if (ret)
+		return ret;
+
+	qsa = memdup_user(arg, sizeof(*qsa));
+	if (IS_ERR(qsa)) {
+		ret = PTR_ERR(qsa);
+		goto drop_write;
+	}
+
+	if (qsa->flags) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = btrfs_qgroup_rescan(root->fs_info);
+
+out:
+	kfree(qsa);
+drop_write:
+	mnt_drop_write_file(file);
+	return ret;
+}
+
+static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
+{
+	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
+	struct btrfs_ioctl_quota_rescan_args *qsa;
+	int ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+	if (!qsa)
+		return -ENOMEM;
+
+	if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		qsa->flags = 1;
+		qsa->progress = root->fs_info->qgroup_rescan_progress.objectid;
+	}
+
+	if (copy_to_user(arg, qsa, sizeof(*qsa)))
+		ret = -EFAULT;
+
+	kfree(qsa);
+	return ret;
+}
+
 static long btrfs_ioctl_set_received_subvol(struct file *file,
 					    void __user *arg)
 {
@@ -4124,6 +4175,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_qgroup_create(file, argp);
 	case BTRFS_IOC_QGROUP_LIMIT:
 		return btrfs_ioctl_qgroup_limit(file, argp);
+	case BTRFS_IOC_QUOTA_RESCAN:
+		return btrfs_ioctl_quota_rescan(file, argp);
+	case BTRFS_IOC_QUOTA_RESCAN_STATUS:
+		return btrfs_ioctl_quota_rescan_status(file, argp);
 	case BTRFS_IOC_DEV_REPLACE:
 		return btrfs_ioctl_dev_replace(root, argp);
 	case BTRFS_IOC_GET_FSLABEL:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1fb7d8da3084..da8458357b57 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -31,13 +31,13 @@
 #include "locking.h"
 #include "ulist.h"
 #include "backref.h"
+#include "extent_io.h"
 
 /* TODO XXX FIXME
  *  - subvol delete -> delete when ref goes to 0? delete limits also?
  *  - reorganize keys
  *  - compressed
  *  - sync
- *  - rescan
  *  - copy also limits on subvol creation
  *  - limit
  *  - caches fuer ulists
@@ -98,6 +98,14 @@ struct btrfs_qgroup_list {
 	struct btrfs_qgroup *member;
 };
 
+struct qgroup_rescan {
+	struct btrfs_work	work;
+	struct btrfs_fs_info	*fs_info;
+};
+
+static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
+				struct qgroup_rescan *qscan);
+
 /* must be called with qgroup_ioctl_lock held */
 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
 					   u64 qgroupid)
@@ -298,7 +306,20 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
 			}
 			fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
 									  ptr);
-			/* FIXME read scan element */
+			fs_info->qgroup_rescan_progress.objectid =
+					btrfs_qgroup_status_rescan(l, ptr);
+			if (fs_info->qgroup_flags &
+			    BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+				struct qgroup_rescan *qscan =
+					kmalloc(sizeof(*qscan), GFP_NOFS);
+				if (!qscan) {
+					ret = -ENOMEM;
+					goto out;
+				}
+				fs_info->qgroup_rescan_progress.type = 0;
+				fs_info->qgroup_rescan_progress.offset = 0;
+				qgroup_rescan_start(fs_info, qscan);
+			}
 			goto next1;
 		}
 
@@ -719,7 +740,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
 	ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
 	btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
 	btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
-	/* XXX scan */
+	btrfs_set_qgroup_status_rescan(l, ptr,
+				fs_info->qgroup_rescan_progress.objectid);
 
 	btrfs_mark_buffer_dirty(l);
 
@@ -830,7 +852,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 	fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
 				BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 	btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
-	btrfs_set_qgroup_status_scan(leaf, ptr, 0);
+	btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
 
 	btrfs_mark_buffer_dirty(leaf);
 
@@ -944,10 +966,11 @@ out:
 	return ret;
 }
 
-int btrfs_quota_rescan(struct btrfs_fs_info *fs_info)
+static void qgroup_dirty(struct btrfs_fs_info *fs_info,
+			 struct btrfs_qgroup *qgroup)
 {
-	/* FIXME */
-	return 0;
+	if (list_empty(&qgroup->dirty))
+		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
 }
 
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
@@ -1155,13 +1178,6 @@ out:
 	return ret;
 }
 
-static void qgroup_dirty(struct btrfs_fs_info *fs_info,
-			 struct btrfs_qgroup *qgroup)
-{
-	if (list_empty(&qgroup->dirty))
-		list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
-}
-
 /*
  * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
  * the modification into a list that's later used by btrfs_end_transaction to
@@ -1390,6 +1406,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 		BUG();
 	}
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+			mutex_unlock(&fs_info->qgroup_rescan_lock);
+			return 0;
+		}
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
 	/*
 	 * the delayed ref sequence number we pass depends on the direction of
 	 * the operation. for add operations, we pass
@@ -1403,7 +1428,15 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 	if (ret < 0)
 		return ret;
 
+	mutex_lock(&fs_info->qgroup_rescan_lock);
 	spin_lock(&fs_info->qgroup_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
+		if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
+			ret = 0;
+			goto unlock;
+		}
+	}
+
 	quota_root = fs_info->quota_root;
 	if (!quota_root)
 		goto unlock;
@@ -1445,6 +1478,7 @@ int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
 
 unlock:
 	spin_unlock(&fs_info->qgroup_lock);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
 	ulist_free(roots);
 	ulist_free(tmp);
 
@@ -1823,3 +1857,259 @@ void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
 		(u32)trans->delayed_ref_elem.seq);
 	BUG();
 }
+
+/*
+ * returns < 0 on error, 0 when more leafs are to be scanned.
+ * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
+ */
+static int
+qgroup_rescan_leaf(struct qgroup_rescan *qscan, struct btrfs_path *path,
+		   struct btrfs_trans_handle *trans, struct ulist *tmp,
+		   struct extent_buffer *scratch_leaf)
+{
+	struct btrfs_key found;
+	struct btrfs_fs_info *fs_info = qscan->fs_info;
+	struct ulist *roots = NULL;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct seq_list tree_mod_seq_elem = {};
+	u64 seq;
+	int slot;
+	int ret;
+
+	path->leave_spinning = 1;
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	ret = btrfs_search_slot_for_read(fs_info->extent_root,
+					 &fs_info->qgroup_rescan_progress,
+					 path, 1, 0);
+
+	pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
+		 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+		 fs_info->qgroup_rescan_progress.type,
+		 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
+		 ret);
+
+	if (ret) {
+		/*
+		 * The rescan is about to end, we will not be scanning any
+		 * further blocks. We cannot unset the RESCAN flag here, because
+		 * we want to commit the transaction if everything went well.
+		 * To make the live accounting work in this phase, we set our
+		 * scan progress pointer such that every real extent objectid
+		 * will be smaller.
+		 */
+		fs_info->qgroup_rescan_progress.objectid = (u64)-1;
+		btrfs_release_path(path);
+		mutex_unlock(&fs_info->qgroup_rescan_lock);
+		return ret;
+	}
+
+	btrfs_item_key_to_cpu(path->nodes[0], &found,
+			      btrfs_header_nritems(path->nodes[0]) - 1);
+	fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
+
+	btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+	memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
+	slot = path->slots[0];
+	btrfs_release_path(path);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
+		btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
+		if (found.type != BTRFS_EXTENT_ITEM_KEY)
+			continue;
+		ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
+					   tree_mod_seq_elem.seq, &roots);
+		if (ret < 0)
+			goto out;
+		spin_lock(&fs_info->qgroup_lock);
+		seq = fs_info->qgroup_seq;
+		fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
+
+		ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
+		if (ret) {
+			spin_unlock(&fs_info->qgroup_lock);
+			ulist_free(roots);
+			goto out;
+		}
+
+		/*
+		 * step2 of btrfs_qgroup_account_ref works from a single root,
+		 * we're doing all at once here.
+		 */
+		ulist_reinit(tmp);
+		ULIST_ITER_INIT(&uiter);
+		while ((unode = ulist_next(roots, &uiter))) {
+			struct btrfs_qgroup *qg;
+
+			qg = find_qgroup_rb(fs_info, unode->val);
+			if (!qg)
+				continue;
+
+			ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
+					GFP_ATOMIC);
+			if (ret < 0) {
+				spin_unlock(&fs_info->qgroup_lock);
+				ulist_free(roots);
+				goto out;
+			}
+		}
+
+		/* this loop is similar to step 2 of btrfs_qgroup_account_ref */
+		ULIST_ITER_INIT(&uiter);
+		while ((unode = ulist_next(tmp, &uiter))) {
+			struct btrfs_qgroup *qg;
+			struct btrfs_qgroup_list *glist;
+
+			qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
+			qg->rfer += found.offset;
+			qg->rfer_cmpr += found.offset;
+			WARN_ON(qg->tag >= seq);
+			if (qg->refcnt - seq == roots->nnodes) {
+				qg->excl += found.offset;
+				qg->excl_cmpr += found.offset;
+			}
+			qgroup_dirty(fs_info, qg);
+
+			list_for_each_entry(glist, &qg->groups, next_group) {
+				ret = ulist_add(tmp, glist->group->qgroupid,
+						(uintptr_t)glist->group,
+						GFP_ATOMIC);
+				if (ret < 0) {
+					spin_unlock(&fs_info->qgroup_lock);
+					ulist_free(roots);
+					goto out;
+				}
+			}
+		}
+
+		spin_unlock(&fs_info->qgroup_lock);
+		ulist_free(roots);
+		ret = 0;
+	}
+
+out:
+	btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+
+	return ret;
+}
+
+static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
+{
+	struct qgroup_rescan *qscan = container_of(work, struct qgroup_rescan,
+						   work);
+	struct btrfs_path *path;
+	struct btrfs_trans_handle *trans = NULL;
+	struct btrfs_fs_info *fs_info = qscan->fs_info;
+	struct ulist *tmp = NULL;
+	struct extent_buffer *scratch_leaf = NULL;
+	int err = -ENOMEM;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		goto out;
+	tmp = ulist_alloc(GFP_NOFS);
+	if (!tmp)
+		goto out;
+	scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
+	if (!scratch_leaf)
+		goto out;
+
+	err = 0;
+	while (!err) {
+		trans = btrfs_start_transaction(fs_info->fs_root, 0);
+		if (IS_ERR(trans)) {
+			err = PTR_ERR(trans);
+			break;
+		}
+		if (!fs_info->quota_enabled) {
+			err = -EINTR;
+		} else {
+			err = qgroup_rescan_leaf(qscan, path, trans,
+						 tmp, scratch_leaf);
+		}
+		if (err > 0)
+			btrfs_commit_transaction(trans, fs_info->fs_root);
+		else
+			btrfs_end_transaction(trans, fs_info->fs_root);
+	}
+
+out:
+	kfree(scratch_leaf);
+	ulist_free(tmp);
+	btrfs_free_path(path);
+	kfree(qscan);
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+
+	if (err == 2 &&
+	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
+		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	} else if (err < 0) {
+		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
+	}
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	if (err >= 0) {
+		pr_info("btrfs: qgroup scan completed%s\n",
+			err == 2 ? " (inconsistency flag cleared)" : "");
+	} else {
+		pr_err("btrfs: qgroup scan failed with %d\n", err);
+	}
+}
+
+static void
+qgroup_rescan_start(struct btrfs_fs_info *fs_info, struct qgroup_rescan *qscan)
+{
+	memset(&qscan->work, 0, sizeof(qscan->work));
+	qscan->work.func = btrfs_qgroup_rescan_worker;
+	qscan->fs_info = fs_info;
+
+	pr_info("btrfs: qgroup scan started\n");
+	btrfs_queue_worker(&fs_info->qgroup_rescan_workers, &qscan->work);
+}
+
+int
+btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
+{
+	int ret = 0;
+	struct rb_node *n;
+	struct btrfs_qgroup *qgroup;
+	struct qgroup_rescan *qscan = kmalloc(sizeof(*qscan), GFP_NOFS);
+
+	if (!qscan)
+		return -ENOMEM;
+
+	mutex_lock(&fs_info->qgroup_rescan_lock);
+	spin_lock(&fs_info->qgroup_lock);
+	if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
+		ret = -EINPROGRESS;
+	else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
+		ret = -EINVAL;
+	if (ret) {
+		spin_unlock(&fs_info->qgroup_lock);
+		mutex_unlock(&fs_info->qgroup_rescan_lock);
+		kfree(qscan);
+		return ret;
+	}
+
+	fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+	memset(&fs_info->qgroup_rescan_progress, 0,
+		sizeof(fs_info->qgroup_rescan_progress));
+
+	/* clear all current qgroup tracking information */
+	for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
+		qgroup = rb_entry(n, struct btrfs_qgroup, node);
+		qgroup->rfer = 0;
+		qgroup->rfer_cmpr = 0;
+		qgroup->excl = 0;
+		qgroup->excl_cmpr = 0;
+	}
+	spin_unlock(&fs_info->qgroup_lock);
+	mutex_unlock(&fs_info->qgroup_rescan_lock);
+
+	qgroup_rescan_start(fs_info, qscan);
+
+	return 0;
+}
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index 5e39e859a848..5ef0df545a2a 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -376,12 +376,18 @@ struct btrfs_ioctl_get_dev_stats {
 
 #define BTRFS_QUOTA_CTL_ENABLE	1
 #define BTRFS_QUOTA_CTL_DISABLE	2
-#define BTRFS_QUOTA_CTL_RESCAN	3
+#define BTRFS_QUOTA_CTL_RESCAN__NOTUSED	3
 struct btrfs_ioctl_quota_ctl_args {
 	__u64 cmd;
 	__u64 status;
 };
 
+struct btrfs_ioctl_quota_rescan_args {
+	__u64	flags;
+	__u64   progress;
+	__u64   reserved[6];
+};
+
 struct btrfs_ioctl_qgroup_assign_args {
 	__u64 assign;
 	__u64 src;
@@ -520,6 +526,10 @@ struct btrfs_ioctl_send_args {
 			       struct btrfs_ioctl_qgroup_create_args)
 #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
 			       struct btrfs_ioctl_qgroup_limit_args)
+#define BTRFS_IOC_QUOTA_RESCAN _IOW(BTRFS_IOCTL_MAGIC, 44, \
+			       struct btrfs_ioctl_quota_rescan_args)
+#define BTRFS_IOC_QUOTA_RESCAN_STATUS _IOR(BTRFS_IOCTL_MAGIC, 45, \
+			       struct btrfs_ioctl_quota_rescan_args)
 #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
 				   char[BTRFS_LABEL_SIZE])
 #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
-- 
cgit 


From 780a7654cee8d61819512385e778e4827db4bfbc Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 9 Apr 2013 02:22:10 -0700
Subject: audit: Make testing for a valid loginuid explicit.

audit rule additions containing "-F auid!=4294967295" were failing
with EINVAL because of a regression caused by e1760bd.

Apparently some userland audit rule sets want to know if loginuid uid
has been set and are using a test for auid != 4294967295 to determine
that.

In practice that is a horrible way to ask if a value has been set,
because it relies on subtle implementation details and will break
every time the uid implementation in the kernel changes.

So add a clean way to test if the audit loginuid has been set, and
silently convert the old idiom to the cleaner and more comprehensible
new idiom.

Cc: <stable@vger.kernel.org> # 3.7
Reported-By: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Tested-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h      |  5 +++++
 include/uapi/linux/audit.h |  1 +
 kernel/auditfilter.c       | 17 +++++++++++++++--
 kernel/auditsc.c           |  5 ++++-
 4 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 469d11755e46..b20b03852f21 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -391,6 +391,11 @@ static inline void audit_ptrace(struct task_struct *t)
 #define audit_signals 0
 #endif /* CONFIG_AUDITSYSCALL */
 
+static inline bool audit_loginuid_set(struct task_struct *tsk)
+{
+	return uid_valid(audit_get_loginuid(tsk));
+}
+
 #ifdef CONFIG_AUDIT
 /* These are defined in audit.c */
 				/* Public API */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index c058c24b97ac..75cef3fd97ad 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -246,6 +246,7 @@
 #define AUDIT_OBJ_TYPE	21
 #define AUDIT_OBJ_LEV_LOW	22
 #define AUDIT_OBJ_LEV_HIGH	23
+#define AUDIT_LOGINUID_SET	24
 
 				/* These are ONLY useful when checking
 				 * at syscall exit time (AUDIT_AT_EXIT). */
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 478f4602c96b..bc6595fe952e 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -365,7 +365,10 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
 	case AUDIT_DIR:
 	case AUDIT_FILTERKEY:
 		break;
-	/* arch is only allowed to be = or != */
+	case AUDIT_LOGINUID_SET:
+		if ((f->val != 0) && (f->val != 1))
+			return -EINVAL;
+	/* FALL THROUGH */
 	case AUDIT_ARCH:
 		if (f->op != Audit_not_equal && f->op != Audit_equal)
 			return -EINVAL;
@@ -419,17 +422,23 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		f->lsm_str = NULL;
 		f->lsm_rule = NULL;
 
+		/* Support legacy tests for a valid loginuid */
+		if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) {
+			f->type = AUDIT_LOGINUID_SET;
+			f->val = 0;
+		}
+
 		err = audit_field_valid(entry, f);
 		if (err)
 			goto exit_free;
 
 		err = -EINVAL;
 		switch (f->type) {
+		case AUDIT_LOGINUID:
 		case AUDIT_UID:
 		case AUDIT_EUID:
 		case AUDIT_SUID:
 		case AUDIT_FSUID:
-		case AUDIT_LOGINUID:
 		case AUDIT_OBJ_UID:
 			f->uid = make_kuid(current_user_ns(), f->val);
 			if (!uid_valid(f->uid))
@@ -1222,6 +1231,10 @@ static int audit_filter_user_rules(struct audit_krule *rule, int type,
 			result = audit_uid_comparator(audit_get_loginuid(current),
 						  f->op, f->uid);
 			break;
+		case AUDIT_LOGINUID_SET:
+			result = audit_comparator(audit_loginuid_set(current),
+						  f->op, f->val);
+			break;
 		case AUDIT_MSGTYPE:
 			result = audit_comparator(type, f->op, f->val);
 			break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index add3086bdb02..3c8a601324a2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -613,6 +613,9 @@ static int audit_filter_rules(struct task_struct *tsk,
 			if (ctx)
 				result = audit_uid_comparator(tsk->loginuid, f->op, f->uid);
 			break;
+		case AUDIT_LOGINUID_SET:
+			result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
+			break;
 		case AUDIT_SUBJ_USER:
 		case AUDIT_SUBJ_ROLE:
 		case AUDIT_SUBJ_TYPE:
@@ -1970,7 +1973,7 @@ int audit_set_loginuid(kuid_t loginuid)
 	unsigned int sessionid;
 
 #ifdef CONFIG_AUDIT_LOGINUID_IMMUTABLE
-	if (uid_valid(task->loginuid))
+	if (audit_loginuid_set(task))
 		return -EPERM;
 #else /* CONFIG_AUDIT_LOGINUID_IMMUTABLE */
 	if (!capable(CAP_AUDIT_CONTROL))
-- 
cgit 


From 4f924b2aa4d3cb30f07e57d6b608838edcbc0d88 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@redhat.com>
Date: Wed, 8 May 2013 09:45:47 +0000
Subject: if_cablemodem.h: Add parenthesis around ioctl macros

Protect the SIOCGCM* ioctl macros with parenthesis.

Reported-by: Paul Wouters <pwouters@redhat.com>
Signed-off-by: Josh Boyer <jwboyer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_cablemodem.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_cablemodem.h b/include/uapi/linux/if_cablemodem.h
index 9ca1007edd93..ee6b3c442baf 100644
--- a/include/uapi/linux/if_cablemodem.h
+++ b/include/uapi/linux/if_cablemodem.h
@@ -12,11 +12,11 @@
  */
 
 /* some useful defines for sb1000.c e cmconfig.c - fv */
-#define SIOCGCMSTATS		SIOCDEVPRIVATE+0	/* get cable modem stats */
-#define SIOCGCMFIRMWARE		SIOCDEVPRIVATE+1	/* get cm firmware version */
-#define SIOCGCMFREQUENCY	SIOCDEVPRIVATE+2	/* get cable modem frequency */
-#define SIOCSCMFREQUENCY	SIOCDEVPRIVATE+3	/* set cable modem frequency */
-#define SIOCGCMPIDS			SIOCDEVPRIVATE+4	/* get cable modem PIDs */
-#define SIOCSCMPIDS			SIOCDEVPRIVATE+5	/* set cable modem PIDs */
+#define SIOCGCMSTATS		(SIOCDEVPRIVATE+0)	/* get cable modem stats */
+#define SIOCGCMFIRMWARE		(SIOCDEVPRIVATE+1)	/* get cm firmware version */
+#define SIOCGCMFREQUENCY	(SIOCDEVPRIVATE+2)	/* get cable modem frequency */
+#define SIOCSCMFREQUENCY	(SIOCDEVPRIVATE+3)	/* set cable modem frequency */
+#define SIOCGCMPIDS			(SIOCDEVPRIVATE+4)	/* get cable modem PIDs */
+#define SIOCSCMPIDS			(SIOCDEVPRIVATE+5)	/* set cable modem PIDs */
 
 #endif
-- 
cgit