From a7b8829d242b1a58107e9c02b09e93aec446d55c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 5 Jul 2017 20:30:01 +0200
Subject: iio: accel: st_accel: add SPI-3wire support

Add SPI Serial Interface Mode (SIM) register information
in st_sensor_settings look up table to support devices
(like LSM303AGR accel sensor) that allow just SPI-3wire
communication mode. SIM mode has to be configured before any
other operation since it is not enabled by default and the driver
is not able to read without that configuration

Whilst a fairly substantial patch, the actual logic is simple and it
is better to have the generic fix than a band aid.

Fixes: ddc05fa28606 (iio: st-accel: add support for lsm303agr accel)
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@st.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/common/st_sensors.h          | 7 +++++++
 include/linux/platform_data/st_sensors_pdata.h | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 497f2b3a5a62..97f1b465d04f 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -105,6 +105,11 @@ struct st_sensor_fullscale {
 	struct st_sensor_fullscale_avl fs_avl[ST_SENSORS_FULLSCALE_AVL_MAX];
 };
 
+struct st_sensor_sim {
+	u8 addr;
+	u8 value;
+};
+
 /**
  * struct st_sensor_bdu - ST sensor device block data update
  * @addr: address of the register.
@@ -197,6 +202,7 @@ struct st_sensor_transfer_function {
  * @bdu: Block data update register.
  * @das: Data Alignment Selection register.
  * @drdy_irq: Data ready register of the sensor.
+ * @sim: SPI serial interface mode register of the sensor.
  * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read.
  * @bootime: samples to discard when sensor passing from power-down to power-up.
  */
@@ -213,6 +219,7 @@ struct st_sensor_settings {
 	struct st_sensor_bdu bdu;
 	struct st_sensor_das das;
 	struct st_sensor_data_ready_irq drdy_irq;
+	struct st_sensor_sim sim;
 	bool multi_read_bit;
 	unsigned int bootime;
 };
diff --git a/include/linux/platform_data/st_sensors_pdata.h b/include/linux/platform_data/st_sensors_pdata.h
index 79b0e4cdb814..f8274b0c6888 100644
--- a/include/linux/platform_data/st_sensors_pdata.h
+++ b/include/linux/platform_data/st_sensors_pdata.h
@@ -17,10 +17,12 @@
  *	Available only for accelerometer and pressure sensors.
  *	Accelerometer DRDY on LSM330 available only on pin 1 (see datasheet).
  * @open_drain: set the interrupt line to be open drain if possible.
+ * @spi_3wire: enable spi-3wire mode.
  */
 struct st_sensors_platform_data {
 	u8 drdy_int_pin;
 	bool open_drain;
+	bool spi_3wire;
 };
 
 #endif /* ST_SENSORS_PDATA_H */
-- 
cgit 


From 7cfdfdc82a467c78af9132cb9c98e84415df34bc Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Mon, 10 Jul 2017 14:45:20 +0900
Subject: libata: Cleanup ata_read_log_page()

The warning message "READ LOG DMA EXT failed, trying unqueued" in
ata_read_log_page() as well as the macro name ATA_HORKAGE_NO_NCQ_LOG
are confusing: the command READ LOG DMA EXT is not an queued NCQ command
unless it is encapsulated in a RECEIVE FPDMA QUEUED command.
From ACS-4 READ LOG DMA EXT description:

"The device processes the READ LOG DMA EXT command in the NCQ feature
set environment (see 4.13.6) if the READ LOG DMA EXT command is
encapsulated in a RECEIVE FPDMA QUEUED command (see 7.30) with the
inputs encapsulated as shown in 7.23.6."

To avoid confusion, fix the warning messsage to mention switching to PIO and
not "unqueued" and rename the macro ATA_HORKAGE_NO_NCQ_LOG to
ATA_HORKAGE_NO_DMA_LOG.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 6 +++---
 include/linux/libata.h    | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8453f9a4682f..fa7dd4394c02 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2083,7 +2083,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
 retry:
 	ata_tf_init(dev, &tf);
 	if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) &&
-	    !(dev->horkage & ATA_HORKAGE_NO_NCQ_LOG)) {
+	    !(dev->horkage & ATA_HORKAGE_NO_DMA_LOG)) {
 		tf.command = ATA_CMD_READ_LOG_DMA_EXT;
 		tf.protocol = ATA_PROT_DMA;
 		dma = true;
@@ -2102,8 +2102,8 @@ retry:
 				     buf, sectors * ATA_SECT_SIZE, 0);
 
 	if (err_mask && dma) {
-		dev->horkage |= ATA_HORKAGE_NO_NCQ_LOG;
-		ata_dev_warn(dev, "READ LOG DMA EXT failed, trying unqueued\n");
+		dev->horkage |= ATA_HORKAGE_NO_DMA_LOG;
+		ata_dev_warn(dev, "READ LOG DMA EXT failed, trying PIO\n");
 		goto retry;
 	}
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 55de3da58b1c..931c32f1f18d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -435,7 +435,7 @@ enum {
 	ATA_HORKAGE_NOLPM	= (1 << 20),	/* don't use LPM */
 	ATA_HORKAGE_WD_BROKEN_LPM = (1 << 21),	/* some WDs have broken LPM */
 	ATA_HORKAGE_ZERO_AFTER_TRIM = (1 << 22),/* guarantees zero after trim */
-	ATA_HORKAGE_NO_NCQ_LOG	= (1 << 23),	/* don't use NCQ for log read */
+	ATA_HORKAGE_NO_DMA_LOG	= (1 << 23),	/* don't use DMA for log read */
 	ATA_HORKAGE_NOTRIM	= (1 << 24),	/* don't use TRIM */
 	ATA_HORKAGE_MAX_SEC_1024 = (1 << 25),	/* Limit max sects to 1024 */
 
-- 
cgit 


From 36acbd9e8377c27570b887e2332a5e1f0b140e16 Mon Sep 17 00:00:00 2001
From: Faiz Abbas <faiz_abbas@ti.com>
Date: Fri, 14 Jul 2017 18:16:44 +0530
Subject: mmc: host: omap_hsmmc: remove unused platform callbacks

Remove unused callbacks in the omap_hsmmc_platform_data structure

Signed-off-by: Faiz Abbas <faiz_abbas@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c            | 11 -----------
 include/linux/platform_data/hsmmc-omap.h | 10 ----------
 2 files changed, 21 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 7c12f3715676..04ff3c97a535 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -356,9 +356,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
 	struct mmc_host *mmc = host->mmc;
 	int ret = 0;
 
-	if (mmc_pdata(host)->set_power)
-		return mmc_pdata(host)->set_power(host->dev, power_on, vdd);
-
 	/*
 	 * If we don't see a Vcc regulator, assume it's a fixed
 	 * voltage always-on regulator.
@@ -366,9 +363,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
 	if (IS_ERR(mmc->supply.vmmc))
 		return 0;
 
-	if (mmc_pdata(host)->before_set_reg)
-		mmc_pdata(host)->before_set_reg(host->dev, power_on, vdd);
-
 	ret = omap_hsmmc_set_pbias(host, false, 0);
 	if (ret)
 		return ret;
@@ -400,9 +394,6 @@ static int omap_hsmmc_set_power(struct omap_hsmmc_host *host, int power_on,
 			return ret;
 	}
 
-	if (mmc_pdata(host)->after_set_reg)
-		mmc_pdata(host)->after_set_reg(host->dev, power_on, vdd);
-
 	return 0;
 
 err_set_voltage:
@@ -469,8 +460,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	int ret;
 	struct mmc_host *mmc = host->mmc;
 
-	if (mmc_pdata(host)->set_power)
-		return 0;
 
 	ret = mmc_regulator_get_supply(mmc);
 	if (ret == -EPROBE_DEFER)
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 8e981be2e2c2..0ff1e0dba720 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -55,9 +55,6 @@ struct omap_hsmmc_platform_data {
 	u32 caps;	/* Used for the MMC driver on 2430 and later */
 	u32 pm_caps;	/* PM capabilities of the mmc */
 
-	/* use the internal clock */
-	unsigned internal_clock:1;
-
 	/* nonremovable e.g. eMMC */
 	unsigned nonremovable:1;
 
@@ -73,13 +70,6 @@ struct omap_hsmmc_platform_data {
 	int gpio_cd;			/* gpio (card detect) */
 	int gpio_cod;			/* gpio (cover detect) */
 	int gpio_wp;			/* gpio (write protect) */
-
-	int (*set_power)(struct device *dev, int power_on, int vdd);
-	void (*remux)(struct device *dev, int power_on);
-	/* Call back before enabling / disabling regulators */
-	void (*before_set_reg)(struct device *dev, int power_on, int vdd);
-	/* Call back after enabling / disabling regulators */
-	void (*after_set_reg)(struct device *dev, int power_on, int vdd);
 	/* if we have special card, init it using this callback */
 	void (*init_card)(struct mmc_card *card);
 
-- 
cgit 


From c641e5b207ed7dfaa692820aeb5b6dde3de3e9b0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 12 Jul 2017 17:55:29 +0200
Subject: ASoC: fix pcm-creation regression

This reverts commit 99b04f4c4051 ("ASoC: add Component level
pcm_new/pcm_free"), which started calling the pcm_new callback for every
component in a *card* when creating a new pcm, something which does not
seem to make any sense.

This specifically led to memory leaks in systems with more than one
platform component and where DMA memory is allocated in the
platform-driver callback. For example, when both mcasp devices are being
used on an am335x board, DMA memory would be allocated twice for every
DAI link during probe.

When CONFIG_SND_VERBOSE_PROCFS was set this fortunately also led to
warnings such as:

WARNING: CPU: 0 PID: 565 at ../fs/proc/generic.c:346 proc_register+0x110/0x154
proc_dir_entry 'sub0/prealloc' already registered

Since there seems to be no users of the new component callbacks, and the
current implementation introduced a regression, let's revert the
offending commit for now.

Fixes: 99b04f4c4051 ("ASoC: add Component level pcm_new/pcm_free")
Signed-off-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable <stable@vger.kernel.org>	# 4.10
---
 include/sound/soc.h  |  6 ------
 sound/soc/soc-core.c | 25 -------------------------
 sound/soc/soc-pcm.c  | 32 +++++++++-----------------------
 3 files changed, 9 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 9c94b97c17f8..c4a8b1947566 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -795,10 +795,6 @@ struct snd_soc_component_driver {
 	int (*suspend)(struct snd_soc_component *);
 	int (*resume)(struct snd_soc_component *);
 
-	/* pcm creation and destruction */
-	int (*pcm_new)(struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_pcm *);
-
 	/* DT */
 	int (*of_xlate_dai_name)(struct snd_soc_component *component,
 				 struct of_phandle_args *args,
@@ -874,8 +870,6 @@ struct snd_soc_component {
 	void (*remove)(struct snd_soc_component *);
 	int (*suspend)(struct snd_soc_component *);
 	int (*resume)(struct snd_soc_component *);
-	int (*pcm_new)(struct snd_soc_pcm_runtime *);
-	void (*pcm_free)(struct snd_pcm *);
 
 	/* machine specific init */
 	int (*init)(struct snd_soc_component *component);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 921622a01944..c240e13ba057 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3171,8 +3171,6 @@ static int snd_soc_component_initialize(struct snd_soc_component *component,
 	component->remove = component->driver->remove;
 	component->suspend = component->driver->suspend;
 	component->resume = component->driver->resume;
-	component->pcm_new = component->driver->pcm_new;
-	component->pcm_free = component->driver->pcm_free;
 
 	dapm = &component->dapm;
 	dapm->dev = dev;
@@ -3360,25 +3358,6 @@ static void snd_soc_platform_drv_remove(struct snd_soc_component *component)
 	platform->driver->remove(platform);
 }
 
-static int snd_soc_platform_drv_pcm_new(struct snd_soc_pcm_runtime *rtd)
-{
-	struct snd_soc_platform *platform = rtd->platform;
-
-	if (platform->driver->pcm_new)
-		return platform->driver->pcm_new(rtd);
-	else
-		return 0;
-}
-
-static void snd_soc_platform_drv_pcm_free(struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_soc_platform *platform = rtd->platform;
-
-	if (platform->driver->pcm_free)
-		platform->driver->pcm_free(pcm);
-}
-
 /**
  * snd_soc_add_platform - Add a platform to the ASoC core
  * @dev: The parent device for the platform
@@ -3402,10 +3381,6 @@ int snd_soc_add_platform(struct device *dev, struct snd_soc_platform *platform,
 		platform->component.probe = snd_soc_platform_drv_probe;
 	if (platform_drv->remove)
 		platform->component.remove = snd_soc_platform_drv_remove;
-	if (platform_drv->pcm_new)
-		platform->component.pcm_new = snd_soc_platform_drv_pcm_new;
-	if (platform_drv->pcm_free)
-		platform->component.pcm_free = snd_soc_platform_drv_pcm_free;
 
 #ifdef CONFIG_DEBUG_FS
 	platform->component.debugfs_prefix = "platform";
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index dcc5ece08668..553f7a76743c 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2628,25 +2628,12 @@ static int dpcm_fe_dai_close(struct snd_pcm_substream *fe_substream)
 	return ret;
 }
 
-static void soc_pcm_free(struct snd_pcm *pcm)
-{
-	struct snd_soc_pcm_runtime *rtd = pcm->private_data;
-	struct snd_soc_component *component;
-
-	list_for_each_entry(component, &rtd->card->component_dev_list,
-			    card_list) {
-		if (component->pcm_free)
-			component->pcm_free(pcm);
-	}
-}
-
 /* create a new pcm */
 int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 {
 	struct snd_soc_platform *platform = rtd->platform;
 	struct snd_soc_dai *codec_dai;
 	struct snd_soc_dai *cpu_dai = rtd->cpu_dai;
-	struct snd_soc_component *component;
 	struct snd_pcm *pcm;
 	char new_name[64];
 	int ret = 0, playback = 0, capture = 0;
@@ -2756,18 +2743,17 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 	if (capture)
 		snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &rtd->ops);
 
-	list_for_each_entry(component, &rtd->card->component_dev_list, card_list) {
-		if (component->pcm_new) {
-			ret = component->pcm_new(rtd);
-			if (ret < 0) {
-				dev_err(component->dev,
-					"ASoC: pcm constructor failed: %d\n",
-					ret);
-				return ret;
-			}
+	if (platform->driver->pcm_new) {
+		ret = platform->driver->pcm_new(rtd);
+		if (ret < 0) {
+			dev_err(platform->dev,
+				"ASoC: pcm constructor failed: %d\n",
+				ret);
+			return ret;
 		}
 	}
-	pcm->private_free = soc_pcm_free;
+
+	pcm->private_free = platform->driver->pcm_free;
 out:
 	dev_info(rtd->card->dev, "%s <-> %s mapping ok\n",
 		 (rtd->num_codecs > 1) ? "multicodec" : rtd->codec_dai->name,
-- 
cgit 


From 43fc509c3efb5c973991ee24c449ab2a0d71dd1e Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Thu, 20 Jul 2017 11:19:58 +0100
Subject: dma-coherent: introduce interface for default DMA pool

Christoph noticed [1] that default DMA pool in current form overload
the DMA coherent infrastructure. In reply, Robin suggested [2] to
split the per-device vs. global pool interfaces, so allocation/release
from default DMA pool is driven by dma ops implementation.

This patch implements Robin's idea and provide interface to
allocate/release/mmap the default (aka global) DMA pool.

To make it clear that existing *_from_coherent routines work on
per-device pool rename them to *_from_dev_coherent.

[1] https://lkml.org/lkml/2017/7/7/370
[2] https://lkml.org/lkml/2017/7/7/431

Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Andras Szemzo <sza@esh.hu>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arc/mm/dma.c           |   2 +-
 arch/arm/mm/dma-mapping.c   |   2 +-
 arch/arm64/mm/dma-mapping.c |   4 +-
 arch/mips/mm/dma-default.c  |   2 +-
 drivers/base/dma-coherent.c | 164 ++++++++++++++++++++++++++++----------------
 drivers/base/dma-mapping.c  |   2 +-
 include/linux/dma-mapping.h |  40 ++++++++---
 7 files changed, 144 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 2a07e6ecafbd..71d3efff99d3 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -117,7 +117,7 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < count && user_count <= (count - off)) {
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index e7380bafbfa6..fcf1473d6fed 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -851,7 +851,7 @@ static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn = dma_to_pfn(dev, dma_addr);
 	unsigned long off = vma->vm_pgoff;
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index e90cd1db42a8..f27d4dd04384 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -329,7 +329,7 @@ static int __swiotlb_mmap(struct device *dev,
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
 					     is_device_dma_coherent(dev));
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	return __swiotlb_mmap_pfn(vma, pfn, size);
@@ -706,7 +706,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
 					     is_device_dma_coherent(dev));
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index e08598c70b3e..8e78251eccc2 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -232,7 +232,7 @@ static int mips_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	else
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < count && user_count <= (count - off)) {
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
index 2ae24c28e70c..1c152aed6b82 100644
--- a/drivers/base/dma-coherent.c
+++ b/drivers/base/dma-coherent.c
@@ -25,7 +25,7 @@ static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *de
 {
 	if (dev && dev->dma_mem)
 		return dev->dma_mem;
-	return dma_coherent_default_memory;
+	return NULL;
 }
 
 static inline dma_addr_t dma_get_device_base(struct device *dev,
@@ -165,34 +165,15 @@ void *dma_mark_declared_memory_occupied(struct device *dev,
 }
 EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
 
-/**
- * dma_alloc_from_coherent() - try to allocate memory from the per-device coherent area
- *
- * @dev:	device from which we allocate memory
- * @size:	size of requested memory area
- * @dma_handle:	This will be filled with the correct dma handle
- * @ret:	This pointer will be filled with the virtual address
- *		to allocated area.
- *
- * This function should be only called from per-arch dma_alloc_coherent()
- * to support allocation from per-device coherent memory pools.
- *
- * Returns 0 if dma_alloc_coherent should continue with allocating from
- * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
- */
-int dma_alloc_from_coherent(struct device *dev, ssize_t size,
-				       dma_addr_t *dma_handle, void **ret)
+static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
+		ssize_t size, dma_addr_t *dma_handle)
 {
-	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 	int order = get_order(size);
 	unsigned long flags;
 	int pageno;
 	int dma_memory_map;
+	void *ret;
 
-	if (!mem)
-		return 0;
-
-	*ret = NULL;
 	spin_lock_irqsave(&mem->spinlock, flags);
 
 	if (unlikely(size > (mem->size << PAGE_SHIFT)))
@@ -203,21 +184,50 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
 		goto err;
 
 	/*
-	 * Memory was found in the per-device area.
+	 * Memory was found in the coherent area.
 	 */
-	*dma_handle = dma_get_device_base(dev, mem) + (pageno << PAGE_SHIFT);
-	*ret = mem->virt_base + (pageno << PAGE_SHIFT);
+	*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+	ret = mem->virt_base + (pageno << PAGE_SHIFT);
 	dma_memory_map = (mem->flags & DMA_MEMORY_MAP);
 	spin_unlock_irqrestore(&mem->spinlock, flags);
 	if (dma_memory_map)
-		memset(*ret, 0, size);
+		memset(ret, 0, size);
 	else
-		memset_io(*ret, 0, size);
+		memset_io(ret, 0, size);
 
-	return 1;
+	return ret;
 
 err:
 	spin_unlock_irqrestore(&mem->spinlock, flags);
+	return NULL;
+}
+
+/**
+ * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool
+ * @dev:	device from which we allocate memory
+ * @size:	size of requested memory area
+ * @dma_handle:	This will be filled with the correct dma handle
+ * @ret:	This pointer will be filled with the virtual address
+ *		to allocated area.
+ *
+ * This function should be only called from per-arch dma_alloc_coherent()
+ * to support allocation from per-device coherent memory pools.
+ *
+ * Returns 0 if dma_alloc_coherent should continue with allocating from
+ * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
+ */
+int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
+		dma_addr_t *dma_handle, void **ret)
+{
+	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+	if (!mem)
+		return 0;
+
+	*ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+	if (*ret)
+		return 1;
+
 	/*
 	 * In the case where the allocation can not be satisfied from the
 	 * per-device area, try to fall back to generic memory if the
@@ -225,25 +235,20 @@ err:
 	 */
 	return mem->flags & DMA_MEMORY_EXCLUSIVE;
 }
-EXPORT_SYMBOL(dma_alloc_from_coherent);
+EXPORT_SYMBOL(dma_alloc_from_dev_coherent);
 
-/**
- * dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
- * @dev:	device from which the memory was allocated
- * @order:	the order of pages allocated
- * @vaddr:	virtual address of allocated pages
- *
- * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, releases that memory.
- *
- * Returns 1 if we correctly released the memory, or 0 if
- * dma_release_coherent() should proceed with releasing memory from
- * generic pools.
- */
-int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
 {
-	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+	if (!dma_coherent_default_memory)
+		return NULL;
+
+	return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
+			dma_handle);
+}
 
+static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
+				       int order, void *vaddr)
+{
 	if (mem && vaddr >= mem->virt_base && vaddr <
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
 		int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
@@ -256,28 +261,39 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(dma_release_from_coherent);
 
 /**
- * dma_mmap_from_coherent() - try to mmap the memory allocated from
- * per-device coherent memory pool to userspace
+ * dma_release_from_dev_coherent() - free memory to device coherent memory pool
  * @dev:	device from which the memory was allocated
- * @vma:	vm_area for the userspace memory
- * @vaddr:	cpu address returned by dma_alloc_from_coherent
- * @size:	size of the memory buffer allocated by dma_alloc_from_coherent
- * @ret:	result from remap_pfn_range()
+ * @order:	the order of pages allocated
+ * @vaddr:	virtual address of allocated pages
  *
  * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, maps that memory to the provided vma.
+ * coherent memory pool and if so, releases that memory.
  *
- * Returns 1 if we correctly mapped the memory, or 0 if the caller should
- * proceed with mapping memory from generic pools.
+ * Returns 1 if we correctly released the memory, or 0 if the caller should
+ * proceed with releasing memory from generic pools.
  */
-int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
-			   void *vaddr, size_t size, int *ret)
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
 {
 	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
 
+	return __dma_release_from_coherent(mem, order, vaddr);
+}
+EXPORT_SYMBOL(dma_release_from_dev_coherent);
+
+int dma_release_from_global_coherent(int order, void *vaddr)
+{
+	if (!dma_coherent_default_memory)
+		return 0;
+
+	return __dma_release_from_coherent(dma_coherent_default_memory, order,
+			vaddr);
+}
+
+static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
+		struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
+{
 	if (mem && vaddr >= mem->virt_base && vaddr + size <=
 		   (mem->virt_base + (mem->size << PAGE_SHIFT))) {
 		unsigned long off = vma->vm_pgoff;
@@ -296,7 +312,39 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(dma_mmap_from_coherent);
+
+/**
+ * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool
+ * @dev:	device from which the memory was allocated
+ * @vma:	vm_area for the userspace memory
+ * @vaddr:	cpu address returned by dma_alloc_from_dev_coherent
+ * @size:	size of the memory buffer allocated
+ * @ret:	result from remap_pfn_range()
+ *
+ * This checks whether the memory was allocated from the per-device
+ * coherent memory pool and if so, maps that memory to the provided vma.
+ *
+ * Returns 1 if we correctly mapped the memory, or 0 if the caller should
+ * proceed with mapping memory from generic pools.
+ */
+int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
+			   void *vaddr, size_t size, int *ret)
+{
+	struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+	return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
+}
+EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
+
+int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
+				   size_t size, int *ret)
+{
+	if (!dma_coherent_default_memory)
+		return 0;
+
+	return __dma_mmap_from_coherent(dma_coherent_default_memory, vma,
+					vaddr, size, ret);
+}
 
 /*
  * Support for reserved memory regions defined in device tree
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
index 5096755d185e..b555ff9dd8fc 100644
--- a/drivers/base/dma-mapping.c
+++ b/drivers/base/dma-mapping.c
@@ -235,7 +235,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
+	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
 	if (off < count && user_count <= (count - off)) {
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 843ab866e0f4..03c0196a6f24 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -157,16 +157,40 @@ static inline int is_device_dma_capable(struct device *dev)
  * These three functions are only for dma allocator.
  * Don't use them in device drivers.
  */
-int dma_alloc_from_coherent(struct device *dev, ssize_t size,
+int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
 				       dma_addr_t *dma_handle, void **ret);
-int dma_release_from_coherent(struct device *dev, int order, void *vaddr);
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr);
 
-int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma,
+int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
 			    void *cpu_addr, size_t size, int *ret);
+
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle);
+int dma_release_from_global_coherent(int order, void *vaddr);
+int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr,
+				  size_t size, int *ret);
+
 #else
-#define dma_alloc_from_coherent(dev, size, handle, ret) (0)
-#define dma_release_from_coherent(dev, order, vaddr) (0)
-#define dma_mmap_from_coherent(dev, vma, vaddr, order, ret) (0)
+#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0)
+#define dma_release_from_dev_coherent(dev, order, vaddr) (0)
+#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0)
+
+static inline void *dma_alloc_from_global_coherent(ssize_t size,
+						   dma_addr_t *dma_handle)
+{
+	return NULL;
+}
+
+static inline int dma_release_from_global_coherent(int order, void *vaddr)
+{
+	return 0;
+}
+
+static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma,
+						void *cpu_addr, size_t size,
+						int *ret)
+{
+	return 0;
+}
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
 #ifdef CONFIG_HAS_DMA
@@ -481,7 +505,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size,
 
 	BUG_ON(!ops);
 
-	if (dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr))
+	if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
 		return cpu_addr;
 
 	if (!arch_dma_alloc_attrs(&dev, &flag))
@@ -503,7 +527,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	BUG_ON(!ops);
 	WARN_ON(irqs_disabled());
 
-	if (dma_release_from_coherent(dev, get_order(size), cpu_addr))
+	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
 		return;
 
 	if (!ops->free || !cpu_addr)
-- 
cgit 


From 832e4c83abc5ec25af77db6c8a0f36d78f1cf825 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 11 May 2017 09:16:24 +0200
Subject: uuid: remove uuid_be

Everything uses uuid_t now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/uuid.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/uuid.h b/include/linux/uuid.h
index 2251e1925ea4..33b0bdbb613c 100644
--- a/include/linux/uuid.h
+++ b/include/linux/uuid.h
@@ -84,26 +84,12 @@ int guid_parse(const char *uuid, guid_t *u);
 int uuid_parse(const char *uuid, uuid_t *u);
 
 /* backwards compatibility, don't use in new code */
-typedef uuid_t uuid_be;
-#define UUID_BE(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7) \
-	UUID_INIT(a, _b, c, d0, d1, d2, d3, d4, d5, d6, d7)
-#define NULL_UUID_BE 							\
-	UUID_BE(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00,	\
-	     0x00, 0x00, 0x00, 0x00)
-
 #define uuid_le_gen(u)		guid_gen(u)
-#define uuid_be_gen(u)		uuid_gen(u)
 #define uuid_le_to_bin(guid, u)	guid_parse(guid, u)
-#define uuid_be_to_bin(uuid, u)	uuid_parse(uuid, u)
 
 static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
 {
 	return memcmp(&u1, &u2, sizeof(guid_t));
 }
 
-static inline int uuid_be_cmp(const uuid_t u1, const uuid_t u2)
-{
-	return memcmp(&u1, &u2, sizeof(uuid_t));
-}
-
 #endif
-- 
cgit 


From a0c2d9c1de0f6be33fe817069b06663277153c20 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Fri, 21 Jul 2017 16:51:23 -0600
Subject: ACPI: NUMA: add missing include in acpi_numa.h

Right now if a file includes acpi_numa.h and they don't happen to include
linux/numa.h before it, they get the following warning:

./include/acpi/acpi_numa.h:9:5: warning: "MAX_NUMNODES" is not defined [-Wundef]
 #if MAX_NUMNODES > 256
     ^~~~~~~~~~~~

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpi_numa.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h
index d4b72944ccda..1e3a74f94131 100644
--- a/include/acpi/acpi_numa.h
+++ b/include/acpi/acpi_numa.h
@@ -3,6 +3,7 @@
 
 #ifdef CONFIG_ACPI_NUMA
 #include <linux/kernel.h>
+#include <linux/numa.h>
 
 /* Proximity bitmap length */
 #if MAX_NUMNODES > 256
-- 
cgit 


From 6c423f5751b9f68bfe7c7545519d4c7159f93e1b Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Mon, 24 Jul 2017 13:58:00 -0600
Subject: sched/wait: Clean up some documentation warnings

A couple of kerneldoc comments in <linux/wait.h> had incorrect names for
macro parameters, with this unsightly result:

  ./include/linux/wait.h:555: warning: No description found for parameter 'wq'
  ./include/linux/wait.h:555: warning: Excess function parameter 'wq_head' description in 'wait_event_interruptible_hrtimeout'
  ./include/linux/wait.h:759: warning: No description found for parameter 'wq_head'
  ./include/linux/wait.h:759: warning: Excess function parameter 'wq' description in 'wait_event_killable'

Correct the comments and kill the warnings.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-doc@vger.kernel.org
Link: http://lkml.kernel.org/r/20170724135800.769c4042@lwn.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index b289c96151ee..5b74e36c0ca8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -529,13 +529,13 @@ do {										\
 
 /**
  * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
- * @wq_head: the waitqueue to wait on
+ * @wq: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  * @timeout: timeout, as a ktime_t
  *
  * The process is put to sleep (TASK_INTERRUPTIBLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq_head is woken up.
+ * The @condition is checked each time the waitqueue @wq is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
@@ -735,12 +735,12 @@ extern int do_wait_intr_irq(wait_queue_head_t *, wait_queue_entry_t *);
 
 /**
  * wait_event_killable - sleep until a condition gets true
- * @wq: the waitqueue to wait on
+ * @wq_head: the waitqueue to wait on
  * @condition: a C expression for the event to wait for
  *
  * The process is put to sleep (TASK_KILLABLE) until the
  * @condition evaluates to true or a signal is received.
- * The @condition is checked each time the waitqueue @wq is woken up.
+ * The @condition is checked each time the waitqueue @wq_head is woken up.
  *
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
-- 
cgit 


From d9f89b4e9290e46cd9b273e9ad0bff0f93e86fae Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Sat, 1 Jul 2017 18:26:54 +0200
Subject: KVM: arm/arm64: PMU: Fix overflow interrupt injection

kvm_pmu_overflow_set() is called from perf's interrupt handler,
making the call of kvm_vgic_inject_irq() from it introduced with
"KVM: arm/arm64: PMU: remove request-less vcpu kick" a really bad
idea, as it's quite easy to try and retake a lock that the
interrupted context is already holding. The fix is to use a vcpu
kick, leaving the interrupt injection to kvm_pmu_sync_hwstate(),
like it was doing before the refactoring. We don't just revert,
though, because before the kick was request-less, leaving the vcpu
exposed to the request-less vcpu kick race, and also because the
kick was used unnecessarily from register access handlers.

Reviewed-by: Christoffer Dall <cdall@linaro.org>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/sys_regs.c |  2 +-
 include/kvm/arm_pmu.h     |  2 --
 virt/kvm/arm/pmu.c        | 43 +++++++++++++++----------------------------
 3 files changed, 16 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 77862881ae86..2e070d3baf9f 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	if (p->is_write) {
 		if (r->CRm & 0x2)
 			/* accessing PMOVSSET_EL0 */
-			kvm_pmu_overflow_set(vcpu, p->regval & mask);
+			vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
 		else
 			/* accessing PMOVSCLR_EL0 */
 			vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index f6e030617467..f87fe20fcb05 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -48,7 +48,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
 void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
-void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
@@ -86,7 +85,6 @@ static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
-static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
 static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
 static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index fc8a723ff387..8a9c42366db7 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -203,11 +203,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 	return reg;
 }
 
-static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
+static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	bool overflow = !!kvm_pmu_overflow_status(vcpu);
+	bool overflow;
+
+	if (!kvm_arm_pmu_v3_ready(vcpu))
+		return;
 
+	overflow = !!kvm_pmu_overflow_status(vcpu);
 	if (pmu->irq_level == overflow)
 		return;
 
@@ -215,33 +219,11 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
 
 	if (likely(irqchip_in_kernel(vcpu->kvm))) {
 		int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-					      pmu->irq_num, overflow,
-					      &vcpu->arch.pmu);
+					      pmu->irq_num, overflow, pmu);
 		WARN_ON(ret);
 	}
 }
 
-/**
- * kvm_pmu_overflow_set - set PMU overflow interrupt
- * @vcpu: The vcpu pointer
- * @val: the value guest writes to PMOVSSET register
- */
-void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
-{
-	if (val == 0)
-		return;
-
-	vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
-	kvm_pmu_check_overflow(vcpu);
-}
-
-static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
-{
-	if (!kvm_arm_pmu_v3_ready(vcpu))
-		return;
-	kvm_pmu_check_overflow(vcpu);
-}
-
 bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
@@ -303,7 +285,7 @@ static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 }
 
 /**
- * When perf event overflows, call kvm_pmu_overflow_set to set overflow status.
+ * When the perf event overflows, set the overflow status and inform the vcpu.
  */
 static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 				  struct perf_sample_data *data,
@@ -313,7 +295,12 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	int idx = pmc->idx;
 
-	kvm_pmu_overflow_set(vcpu, BIT(idx));
+	vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
+
+	if (kvm_pmu_overflow_status(vcpu)) {
+		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
 }
 
 /**
@@ -341,7 +328,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
 			reg = lower_32_bits(reg);
 			vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 			if (!reg)
-				kvm_pmu_overflow_set(vcpu, BIT(i));
+				vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
 		}
 	}
 }
-- 
cgit 


From 2fd4167fadd1360ab015e4f0e88e51843e49556c Mon Sep 17 00:00:00 2001
From: Jon Derrick <jonathan.derrick@intel.com>
Date: Wed, 12 Jul 2017 10:58:19 -0600
Subject: nvme: fabrics commands should use the fctype field for data direction

Fabrics commands with opcode 0x7F use the fctype field to indicate data
direction.

Signed-off-by: Jon Derrick <jonathan.derrick@intel.com>
Reviewed-by: Sagi Grimberg <sai@grmberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Fixes: eb793e2c ("nvme.h: add NVMe over Fabrics definitions")
---
 include/linux/nvme.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index bc74da018bdc..25d8225dbd04 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1006,7 +1006,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd)
 	 * Why can't we simply have a Fabrics In and Fabrics out command?
 	 */
 	if (unlikely(cmd->common.opcode == nvme_fabrics_command))
-		return cmd->fabrics.opcode & 1;
+		return cmd->fabrics.fctype & 1;
 	return cmd->common.opcode & 1;
 }
 
-- 
cgit 


From 9c5358e15ca12ed3dc3b1e51671dee5d155de8e0 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Mon, 17 Jul 2017 13:59:39 -0700
Subject: nvme-fc: revise TRADDR parsing

The FC-NVME spec hasn't locked down on the format string for TRADDR.
Currently the spec is lobbying for "nn-<16hexdigits>:pn-<16hexdigits>"
where the wwn's are hex values but not prefixed by 0x.

Most implementations so far expect a string format of
"nn-0x<16hexdigits>:pn-0x<16hexdigits>" to be used. The transport
uses the match_u64 parser which requires a leading 0x prefix to set
the base properly. If it's not there, a match will either fail or return
a base 10 value.

The resolution in T11 is pushing out. Therefore, to fix things now and
to cover any eventuality and any implementations already in the field,
this patch adds support for both formats.

The change consists of replacing the token matching routine with a
routine that validates the fixed string format, and then builds
a local copy of the hex name with a 0x prefix before calling
the system parser.

Note: the same parser routine exists in both the initiator and target
transports. Given this is about the only "shared" item, we chose to
replicate rather than create an interdendency on some shared code.

Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c   | 102 ++++++++++++++++++++++++-----------------------
 drivers/nvme/target/fc.c | 101 ++++++++++++++++++++++++----------------------
 include/linux/nvme-fc.h  |  19 +++++++++
 3 files changed, 125 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 5630ca46c3b5..5c2a08ef08ba 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2805,66 +2805,70 @@ out_fail:
 	return ERR_PTR(ret);
 }
 
-enum {
-	FCT_TRADDR_ERR		= 0,
-	FCT_TRADDR_WWNN		= 1 << 0,
-	FCT_TRADDR_WWPN		= 1 << 1,
-};
 
 struct nvmet_fc_traddr {
 	u64	nn;
 	u64	pn;
 };
 
-static const match_table_t traddr_opt_tokens = {
-	{ FCT_TRADDR_WWNN,	"nn-%s"		},
-	{ FCT_TRADDR_WWPN,	"pn-%s"		},
-	{ FCT_TRADDR_ERR,	NULL		}
-};
-
 static int
-nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf)
+__nvme_fc_parse_u64(substring_t *sstr, u64 *val)
 {
-	substring_t args[MAX_OPT_ARGS];
-	char *options, *o, *p;
-	int token, ret = 0;
 	u64 token64;
 
-	options = o = kstrdup(buf, GFP_KERNEL);
-	if (!options)
-		return -ENOMEM;
+	if (match_u64(sstr, &token64))
+		return -EINVAL;
+	*val = token64;
 
-	while ((p = strsep(&o, ":\n")) != NULL) {
-		if (!*p)
-			continue;
+	return 0;
+}
 
-		token = match_token(p, traddr_opt_tokens, args);
-		switch (token) {
-		case FCT_TRADDR_WWNN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->nn = token64;
-			break;
-		case FCT_TRADDR_WWPN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->pn = token64;
-			break;
-		default:
-			pr_warn("unknown traddr token or missing value '%s'\n",
-					p);
-			ret = -EINVAL;
-			goto out;
-		}
-	}
+/*
+ * This routine validates and extracts the WWN's from the TRADDR string.
+ * As kernel parsers need the 0x to determine number base, universally
+ * build string to parse with 0x prefix before parsing name strings.
+ */
+static int
+nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
+{
+	char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1];
+	substring_t wwn = { name, &name[sizeof(name)-1] };
+	int nnoffset, pnoffset;
+
+	/* validate it string one of the 2 allowed formats */
+	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
+			!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
+				"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
+		nnoffset = NVME_FC_TRADDR_OXNNLEN;
+		pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
+						NVME_FC_TRADDR_OXNNLEN;
+	} else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH &&
+			!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
+				"pn-", NVME_FC_TRADDR_NNLEN))) {
+		nnoffset = NVME_FC_TRADDR_NNLEN;
+		pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
+	} else
+		goto out_einval;
 
-out:
-	kfree(options);
-	return ret;
+	name[0] = '0';
+	name[1] = 'x';
+	name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0;
+
+	memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->nn))
+		goto out_einval;
+
+	memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->pn))
+		goto out_einval;
+
+	return 0;
+
+out_einval:
+	pr_warn("%s: bad traddr string\n", __func__);
+	return -EINVAL;
 }
 
 static struct nvme_ctrl *
@@ -2878,11 +2882,11 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 	unsigned long flags;
 	int ret;
 
-	ret = nvme_fc_parse_address(&raddr, opts->traddr);
+	ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE);
 	if (ret || !raddr.nn || !raddr.pn)
 		return ERR_PTR(-EINVAL);
 
-	ret = nvme_fc_parse_address(&laddr, opts->host_traddr);
+	ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE);
 	if (ret || !laddr.nn || !laddr.pn)
 		return ERR_PTR(-EINVAL);
 
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index d5801c150b1c..31ca55dfcb1d 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2293,66 +2293,70 @@ nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port,
 }
 EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort);
 
-enum {
-	FCT_TRADDR_ERR		= 0,
-	FCT_TRADDR_WWNN		= 1 << 0,
-	FCT_TRADDR_WWPN		= 1 << 1,
-};
 
 struct nvmet_fc_traddr {
 	u64	nn;
 	u64	pn;
 };
 
-static const match_table_t traddr_opt_tokens = {
-	{ FCT_TRADDR_WWNN,	"nn-%s"		},
-	{ FCT_TRADDR_WWPN,	"pn-%s"		},
-	{ FCT_TRADDR_ERR,	NULL		}
-};
-
 static int
-nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf)
+__nvme_fc_parse_u64(substring_t *sstr, u64 *val)
 {
-	substring_t args[MAX_OPT_ARGS];
-	char *options, *o, *p;
-	int token, ret = 0;
 	u64 token64;
 
-	options = o = kstrdup(buf, GFP_KERNEL);
-	if (!options)
-		return -ENOMEM;
+	if (match_u64(sstr, &token64))
+		return -EINVAL;
+	*val = token64;
 
-	while ((p = strsep(&o, ":\n")) != NULL) {
-		if (!*p)
-			continue;
+	return 0;
+}
 
-		token = match_token(p, traddr_opt_tokens, args);
-		switch (token) {
-		case FCT_TRADDR_WWNN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->nn = token64;
-			break;
-		case FCT_TRADDR_WWPN:
-			if (match_u64(args, &token64)) {
-				ret = -EINVAL;
-				goto out;
-			}
-			traddr->pn = token64;
-			break;
-		default:
-			pr_warn("unknown traddr token or missing value '%s'\n",
-					p);
-			ret = -EINVAL;
-			goto out;
-		}
-	}
+/*
+ * This routine validates and extracts the WWN's from the TRADDR string.
+ * As kernel parsers need the 0x to determine number base, universally
+ * build string to parse with 0x prefix before parsing name strings.
+ */
+static int
+nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
+{
+	char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1];
+	substring_t wwn = { name, &name[sizeof(name)-1] };
+	int nnoffset, pnoffset;
+
+	/* validate it string one of the 2 allowed formats */
+	if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
+			!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
+				"pn-0x", NVME_FC_TRADDR_OXNNLEN)) {
+		nnoffset = NVME_FC_TRADDR_OXNNLEN;
+		pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET +
+						NVME_FC_TRADDR_OXNNLEN;
+	} else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH &&
+			!strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) &&
+			!strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET],
+				"pn-", NVME_FC_TRADDR_NNLEN))) {
+		nnoffset = NVME_FC_TRADDR_NNLEN;
+		pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN;
+	} else
+		goto out_einval;
+
+	name[0] = '0';
+	name[1] = 'x';
+	name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0;
+
+	memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->nn))
+		goto out_einval;
+
+	memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN);
+	if (__nvme_fc_parse_u64(&wwn, &traddr->pn))
+		goto out_einval;
 
-out:
-	kfree(options);
-	return ret;
+	return 0;
+
+out_einval:
+	pr_warn("%s: bad traddr string\n", __func__);
+	return -EINVAL;
 }
 
 static int
@@ -2370,7 +2374,8 @@ nvmet_fc_add_port(struct nvmet_port *port)
 
 	/* map the traddr address info to a target port */
 
-	ret = nvmet_fc_parse_traddr(&traddr, port->disc_addr.traddr);
+	ret = nvme_fc_parse_traddr(&traddr, port->disc_addr.traddr,
+			sizeof(port->disc_addr.traddr));
 	if (ret)
 		return ret;
 
diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h
index 21c37e39e41a..36cca93a5ff2 100644
--- a/include/linux/nvme-fc.h
+++ b/include/linux/nvme-fc.h
@@ -334,5 +334,24 @@ struct fcnvme_ls_disconnect_acc {
 #define NVME_FC_LS_TIMEOUT_SEC		2		/* 2 seconds */
 #define NVME_FC_TGTOP_TIMEOUT_SEC	2		/* 2 seconds */
 
+/*
+ * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>"
+ * the string is allowed to be specified with or without a "0x" prefix
+ * infront of the <16hexdigits>.  Without is considered the "min" string
+ * and with is considered the "max" string. The hexdigits may be upper
+ * or lower case.
+ */
+#define NVME_FC_TRADDR_NNLEN		3	/* "?n-" */
+#define NVME_FC_TRADDR_OXNNLEN		5	/* "?n-0x" */
+#define NVME_FC_TRADDR_HEXNAMELEN	16
+#define NVME_FC_TRADDR_MINLENGTH	\
+		(2 * (NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1)
+#define NVME_FC_TRADDR_MAXLENGTH	\
+		(2 * (NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN) + 1)
+#define NVME_FC_TRADDR_MIN_PN_OFFSET	\
+		(NVME_FC_TRADDR_NNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1)
+#define NVME_FC_TRADDR_MAX_PN_OFFSET	\
+		(NVME_FC_TRADDR_OXNNLEN + NVME_FC_TRADDR_HEXNAMELEN + 1)
+
 
 #endif /* _NVME_FC_H */
-- 
cgit 


From dce4551cb2adb1ac9a30f8ab5299d614392b3cff Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 25 Jul 2017 17:57:47 +0200
Subject: udp: preserve head state for IP_CMSG_PASSSEC

Paul Moore reported a SELinux/IP_PASSSEC regression
caused by missing skb->sp at recvmsg() time. We need to
preserve the skb head state to process the IP_CMSG_PASSSEC
cmsg.

With this commit we avoid releasing the skb head state in the
BH even if a secpath is attached to the current skb, and stores
the skb status (with/without head states) in the scratch area,
so that we can access it at skb deallocation time, without
incurring in cache-miss penalties.

This also avoids misusing the skb CB for ipv6 packets,
as introduced by the commit 0ddf3fb2c43d ("udp: preserve
skb->dst if required for IP options processing").

Clean a bit the scratch area helpers implementation, to
reduce the code differences between 32 and 64 bits build.

Reported-by: Paul Moore <paul@paul-moore.com>
Fixes: 0a463c78d25b ("udp: avoid a cache miss on dequeue")
Fixes: 0ddf3fb2c43d ("udp: preserve skb->dst if required for IP options processing")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Tested-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h | 33 ++++++++++++++++++++++-----------
 net/ipv4/udp.c    | 38 ++++++++++++++++++--------------------
 2 files changed, 40 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 972ce4baab6b..56ce2d2a612d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
  */
-#if BITS_PER_LONG == 64
-
-/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on
- * cold cache lines at recvmsg time.
- * skb->len can be stored on 16 bits since the udp header has been already
- * validated and pulled.
- */
 struct udp_dev_scratch {
-	u32 truesize;
+	/* skb->truesize and the stateless bit are embedded in a single field;
+	 * do not use a bitfield since the compiler emits better/smaller code
+	 * this way
+	 */
+	u32 _tsize_state;
+
+#if BITS_PER_LONG == 64
+	/* len and the bit needed to compute skb_csum_unnecessary
+	 * will be on cold cache lines at recvmsg time.
+	 * skb->len can be stored on 16 bits since the udp header has been
+	 * already validated and pulled.
+	 */
 	u16 len;
 	bool is_linear;
 	bool csum_unnecessary;
+#endif
 };
 
+static inline struct udp_dev_scratch *udp_skb_scratch(struct sk_buff *skb)
+{
+	return (struct udp_dev_scratch *)&skb->dev_scratch;
+}
+
+#if BITS_PER_LONG == 64
 static inline unsigned int udp_skb_len(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->len;
+	return udp_skb_scratch(skb)->len;
 }
 
 static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary;
+	return udp_skb_scratch(skb)->csum_unnecessary;
 }
 
 static inline bool udp_skb_is_linear(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear;
+	return udp_skb_scratch(skb)->is_linear;
 }
 
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b057653ceca9..d243772f6efc 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1163,34 +1163,32 @@ out:
 	return ret;
 }
 
-#if BITS_PER_LONG == 64
+#define UDP_SKB_IS_STATELESS 0x80000000
+
 static void udp_set_dev_scratch(struct sk_buff *skb)
 {
-	struct udp_dev_scratch *scratch;
+	struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
 
 	BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
-	scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
-	scratch->truesize = skb->truesize;
+	scratch->_tsize_state = skb->truesize;
+#if BITS_PER_LONG == 64
 	scratch->len = skb->len;
 	scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
 	scratch->is_linear = !skb_is_nonlinear(skb);
+#endif
+	if (likely(!skb->_skb_refdst))
+		scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
 }
 
 static int udp_skb_truesize(struct sk_buff *skb)
 {
-	return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
-}
-#else
-static void udp_set_dev_scratch(struct sk_buff *skb)
-{
-	skb->dev_scratch = skb->truesize;
+	return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
 }
 
-static int udp_skb_truesize(struct sk_buff *skb)
+static bool udp_skb_has_head_state(struct sk_buff *skb)
 {
-	return skb->dev_scratch;
+	return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
 }
-#endif
 
 /* fully reclaim rmem/fwd memory allocated for skb */
 static void udp_rmem_release(struct sock *sk, int size, int partial,
@@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
 		unlock_sock_fast(sk, slow);
 	}
 
-	/* we cleared the head states previously only if the skb lacks any IP
-	 * options, see __udp_queue_rcv_skb().
+	/* In the more common cases we cleared the head states previously,
+	 * see __udp_queue_rcv_skb().
 	 */
-	if (unlikely(IPCB(skb)->opt.optlen > 0))
+	if (unlikely(udp_skb_has_head_state(skb)))
 		skb_release_head_state(skb);
 	consume_stateless_skb(skb);
 }
@@ -1784,11 +1782,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		sk_mark_napi_id_once(sk, skb);
 	}
 
-	/* At recvmsg() time we need skb->dst to process IP options-related
-	 * cmsg, elsewhere can we clear all pending head states while they are
-	 * hot in the cache
+	/* At recvmsg() time we may access skb->dst or skb->sp depending on
+	 * the IP options and the cmsg flags, elsewhere can we clear all
+	 * pending head states while they are hot in the cache
 	 */
-	if (likely(IPCB(skb)->opt.optlen == 0))
+	if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp))
 		skb_release_head_state(skb);
 
 	rc = __udp_enqueue_schedule_skb(sk, skb);
-- 
cgit 


From 0a94efb5acbb6980d7c9ab604372d93cd507e4d8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sun, 23 Jul 2017 08:36:15 -0400
Subject: workqueue: implicit ordered attribute should be overridable

5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be
ordered") automatically enabled ordered attribute for unbound
workqueues w/ max_active == 1.  Because ordered workqueues reject
max_active and some attribute changes, this implicit ordered mode
broke cases where the user creates an unbound workqueue w/ max_active
== 1 and later explicitly changes the related attributes.

This patch distinguishes explicit and implicit ordered setting and
overrides from attribute changes if implict.

Signed-off-by: Tejun Heo <tj@kernel.org>
Fixes: 5c0338c68706 ("workqueue: restore WQ_UNBOUND/max_active==1 to be ordered")
---
 include/linux/workqueue.h |  4 +++-
 kernel/workqueue.c        | 13 +++++++++----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c102ef65cb64..db6dc9dc0482 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -323,6 +323,7 @@ enum {
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
+	__WQ_ORDERED_EXPLICIT	= 1 << 18, /* internal: alloc_ordered_workqueue() */
 	__WQ_LEGACY		= 1 << 18, /* internal: create*_workqueue() */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
@@ -422,7 +423,8 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
-	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
+	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED |		\
+			__WQ_ORDERED_EXPLICIT | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
 	alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name))
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index abe4a4971c24..7146ea70a62d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3744,8 +3744,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
 		return -EINVAL;
 
 	/* creating multiple pwqs breaks ordering guarantee */
-	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
-		return -EINVAL;
+	if (!list_empty(&wq->pwqs)) {
+		if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+			return -EINVAL;
+
+		wq->flags &= ~__WQ_ORDERED;
+	}
 
 	ctx = apply_wqattrs_prepare(wq, attrs);
 	if (!ctx)
@@ -4129,13 +4133,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 	struct pool_workqueue *pwq;
 
 	/* disallow meddling with max_active for ordered workqueues */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
+	if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
 		return;
 
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 	mutex_lock(&wq->mutex);
 
+	wq->flags &= ~__WQ_ORDERED;
 	wq->saved_max_active = max_active;
 
 	for_each_pwq(pwq, wq)
@@ -5263,7 +5268,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
 	 * attributes breaks ordering guarantee.  Disallow exposing ordered
 	 * workqueues.
 	 */
-	if (WARN_ON(wq->flags & __WQ_ORDERED))
+	if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
 		return -EINVAL;
 
 	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
-- 
cgit 


From 2eaa38d9fcba5294182268b8d11770cf3fdc9bc9 Mon Sep 17 00:00:00 2001
From: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Date: Tue, 25 Jul 2017 11:08:15 +0200
Subject: net: phy: Remove trailing semicolon in macro definition

Commit e5a03bfd873c2 ("phy: Add an mdio_device structure")
introduced a spurious trailing semicolon. Remove it.

Signed-off-by: Marc Gonzalez <marc_gonzalez@sigmadesigns.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2a9567bb8186..0bb5b212ab42 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -830,7 +830,7 @@ static inline int phy_read_status(struct phy_device *phydev)
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
 #define phydev_dbg(_phydev, format, args...)	\
-	dev_dbg(&_phydev->mdio.dev, format, ##args);
+	dev_dbg(&_phydev->mdio.dev, format, ##args)
 
 static inline const char *phydev_name(const struct phy_device *phydev)
 {
-- 
cgit 


From 09ac4fcb3f255e9225967c75f5893325c116cdbe Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 13 Jul 2017 17:01:16 -0400
Subject: drm/ttm: Implement vm_operations_struct.access v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allows gdb to access contents of user mode mapped BOs. System memory
is handled by TTM using kmap. Other memory pools require a new driver
callback in ttm_bo_driver.

v2:
* kmap only one page at a time
* swap in BO if needed
* make driver callback more generic to handle private memory pools
* document callback return value
* WARN_ON -> WARN_ON_ONCE

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c | 79 ++++++++++++++++++++++++++++++++++++++++-
 include/drm/ttm/ttm_bo_driver.h | 17 +++++++++
 2 files changed, 95 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index b442d12f2f7d..a01e5c90fd87 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -294,10 +294,87 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma)
 	vma->vm_private_data = NULL;
 }
 
+static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
+				 unsigned long offset,
+				 void *buf, int len, int write)
+{
+	unsigned long page = offset >> PAGE_SHIFT;
+	unsigned long bytes_left = len;
+	int ret;
+
+	/* Copy a page at a time, that way no extra virtual address
+	 * mapping is needed
+	 */
+	offset -= page << PAGE_SHIFT;
+	do {
+		unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
+		struct ttm_bo_kmap_obj map;
+		void *ptr;
+		bool is_iomem;
+
+		ret = ttm_bo_kmap(bo, page, 1, &map);
+		if (ret)
+			return ret;
+
+		ptr = (uint8_t *)ttm_kmap_obj_virtual(&map, &is_iomem) + offset;
+		WARN_ON_ONCE(is_iomem);
+		if (write)
+			memcpy(ptr, buf, bytes);
+		else
+			memcpy(buf, ptr, bytes);
+		ttm_bo_kunmap(&map);
+
+		page++;
+		bytes_left -= bytes;
+		offset = 0;
+	} while (bytes_left);
+
+	return len;
+}
+
+static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr,
+			    void *buf, int len, int write)
+{
+	unsigned long offset = (addr) - vma->vm_start;
+	struct ttm_buffer_object *bo = vma->vm_private_data;
+	int ret;
+
+	if (len < 1 || (offset + len) >> PAGE_SHIFT > bo->num_pages)
+		return -EIO;
+
+	ret = ttm_bo_reserve(bo, true, false, NULL);
+	if (ret)
+		return ret;
+
+	switch (bo->mem.mem_type) {
+	case TTM_PL_SYSTEM:
+		if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) {
+			ret = ttm_tt_swapin(bo->ttm);
+			if (unlikely(ret != 0))
+				return ret;
+		}
+		/* fall through */
+	case TTM_PL_TT:
+		ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write);
+		break;
+	default:
+		if (bo->bdev->driver->access_memory)
+			ret = bo->bdev->driver->access_memory(
+				bo, offset, buf, len, write);
+		else
+			ret = -EIO;
+	}
+
+	ttm_bo_unreserve(bo);
+
+	return ret;
+}
+
 static const struct vm_operations_struct ttm_bo_vm_ops = {
 	.fault = ttm_bo_vm_fault,
 	.open = ttm_bo_vm_open,
-	.close = ttm_bo_vm_close
+	.close = ttm_bo_vm_close,
+	.access = ttm_bo_vm_access
 };
 
 static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 990d529f823c..d30850e07936 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -472,6 +472,23 @@ struct ttm_bo_driver {
 	 */
 	unsigned long (*io_mem_pfn)(struct ttm_buffer_object *bo,
 				    unsigned long page_offset);
+
+	/**
+	 * Read/write memory buffers for ptrace access
+	 *
+	 * @bo: the BO to access
+	 * @offset: the offset from the start of the BO
+	 * @buf: pointer to source/destination buffer
+	 * @len: number of bytes to copy
+	 * @write: whether to read (0) from or write (non-0) to BO
+	 *
+	 * If successful, this function should return the number of
+	 * bytes copied, -EIO otherwise. If the number of bytes
+	 * returned is < len, the function may be called again with
+	 * the remainder of the buffer to copy.
+	 */
+	int (*access_memory)(struct ttm_buffer_object *bo, unsigned long offset,
+			     void *buf, int len, int write);
 };
 
 /**
-- 
cgit 


From fc1ff45a07abf240aa0c6586c11465c86c8cab8d Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 15 Jul 2017 09:32:56 -0300
Subject: media: cec-notifier: small improvements

Allow calling cec_notifier_set_phys_addr and
cec_notifier_set_phys_addr_from_edid with a NULL notifier, in which
case these functions do nothing.

Add a cec_notifier_phys_addr_invalidate helper function (the notifier
equivalent of cec_phys_addr_invalidate).

These changes simplify drm CEC driver support.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/cec/cec-notifier.c |  6 ++++++
 include/media/cec-notifier.h     | 15 +++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/media/cec/cec-notifier.c b/drivers/media/cec/cec-notifier.c
index 74dc1c32080e..08b619d0ea1e 100644
--- a/drivers/media/cec/cec-notifier.c
+++ b/drivers/media/cec/cec-notifier.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL_GPL(cec_notifier_put);
 
 void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa)
 {
+	if (n == NULL)
+		return;
+
 	mutex_lock(&n->lock);
 	n->phys_addr = pa;
 	if (n->callback)
@@ -100,6 +103,9 @@ void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 {
 	u16 pa = CEC_PHYS_ADDR_INVALID;
 
+	if (n == NULL)
+		return;
+
 	if (edid && edid->extensions)
 		pa = cec_get_edid_phys_addr((const u8 *)edid,
 				EDID_LENGTH * (edid->extensions + 1), NULL);
diff --git a/include/media/cec-notifier.h b/include/media/cec-notifier.h
index 298f996969df..a4f7429c4ae5 100644
--- a/include/media/cec-notifier.h
+++ b/include/media/cec-notifier.h
@@ -57,6 +57,7 @@ void cec_notifier_put(struct cec_notifier *n);
  * @pa: the CEC physical address
  *
  * Set a new CEC physical address.
+ * Does nothing if @n == NULL.
  */
 void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa);
 
@@ -66,6 +67,7 @@ void cec_notifier_set_phys_addr(struct cec_notifier *n, u16 pa);
  * @edid: the struct edid pointer
  *
  * Parses the EDID to obtain the new CEC physical address and set it.
+ * Does nothing if @n == NULL.
  */
 void cec_notifier_set_phys_addr_from_edid(struct cec_notifier *n,
 					  const struct edid *edid);
@@ -118,4 +120,17 @@ static inline void cec_notifier_unregister(struct cec_notifier *n)
 
 #endif
 
+/**
+ * cec_notifier_phys_addr_invalidate() - set the physical address to INVALID
+ *
+ * @n: the CEC notifier
+ *
+ * This is a simple helper function to invalidate the physical
+ * address. Does nothing if @n == NULL.
+ */
+static inline void cec_notifier_phys_addr_invalidate(struct cec_notifier *n)
+{
+	cec_notifier_set_phys_addr(n, CEC_PHYS_ADDR_INVALID);
+}
+
 #endif
-- 
cgit 


From b25db383928cecba356835583b16fa7008f97b3a Mon Sep 17 00:00:00 2001
From: Prabhakar Lad <prabhakar.csengg@gmail.com>
Date: Thu, 20 Jul 2017 04:56:31 -0400
Subject: media: platform: davinci: drop VPFE_CMD_S_CCDC_RAW_PARAMS

drop VPFE_CMD_S_CCDC_RAW_PARAMS ioctl from dm355/dm644x following reasons:

- This ioctl was never in public api and was only defined in kernel header.
- The function set_params constantly mixes up pointers and phys_addr_t
  numbers.
- This is part of a 'VPFE_CMD_S_CCDC_RAW_PARAMS' ioctl command that is
  described as an 'experimental ioctl that will change in future kernels'.
- The code to allocate the table never gets called after we copy_from_user
  the user input over the kernel settings, and then compare them
  for inequality.
- We then go on to use an address provided by user space as both the
  __user pointer for input and pass it through phys_to_virt to come up
  with a kernel pointer to copy the data to. This looks like a trivially
  exploitable root hole.

Signed-off-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/media/platform/davinci/ccdc_hw_device.h |  10 --
 drivers/media/platform/davinci/dm355_ccdc.c     |  92 +--------------
 drivers/media/platform/davinci/dm644x_ccdc.c    | 151 +-----------------------
 drivers/media/platform/davinci/vpfe_capture.c   |  75 ------------
 include/media/davinci/dm644x_ccdc.h             |  12 --
 include/media/davinci/vpfe_capture.h            |  10 --
 6 files changed, 4 insertions(+), 346 deletions(-)

(limited to 'include')

diff --git a/drivers/media/platform/davinci/ccdc_hw_device.h b/drivers/media/platform/davinci/ccdc_hw_device.h
index 8f6688a7a111..f1b521045d64 100644
--- a/drivers/media/platform/davinci/ccdc_hw_device.h
+++ b/drivers/media/platform/davinci/ccdc_hw_device.h
@@ -42,16 +42,6 @@ struct ccdc_hw_ops {
 	int (*set_hw_if_params) (struct vpfe_hw_if_param *param);
 	/* get interface parameters */
 	int (*get_hw_if_params) (struct vpfe_hw_if_param *param);
-	/*
-	 * Pointer to function to set parameters. Used
-	 * for implementing VPFE_S_CCDC_PARAMS
-	 */
-	int (*set_params) (void *params);
-	/*
-	 * Pointer to function to get parameter. Used
-	 * for implementing VPFE_G_CCDC_PARAMS
-	 */
-	int (*get_params) (void *params);
 	/* Pointer to function to configure ccdc */
 	int (*configure) (void);
 
diff --git a/drivers/media/platform/davinci/dm355_ccdc.c b/drivers/media/platform/davinci/dm355_ccdc.c
index 73db166dc338..6d492dc4c3a9 100644
--- a/drivers/media/platform/davinci/dm355_ccdc.c
+++ b/drivers/media/platform/davinci/dm355_ccdc.c
@@ -17,12 +17,7 @@
  * This module is for configuring DM355 CCD controller of VPFE to capture
  * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
  * such as Defect Pixel Correction, Color Space Conversion etc to
- * pre-process the Bayer RGB data, before writing it to SDRAM. This
- * module also allows application to configure individual
- * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
- * To do so, application include dm355_ccdc.h and vpfe_capture.h header
- * files. The setparams() API is called by vpfe_capture driver
- * to configure module parameters
+ * pre-process the Bayer RGB data, before writing it to SDRAM.
  *
  * TODO: 1) Raw bayer parameter settings and bayer capture
  * 	 2) Split module parameter structure to module specific ioctl structs
@@ -260,90 +255,6 @@ static void ccdc_setwin(struct v4l2_rect *image_win,
 	dev_dbg(ccdc_cfg.dev, "\nEnd of ccdc_setwin...");
 }
 
-static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
-{
-	if (ccdcparam->datasft < CCDC_DATA_NO_SHIFT ||
-	    ccdcparam->datasft > CCDC_DATA_SHIFT_6BIT) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of data shift\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->mfilt1 < CCDC_NO_MEDIAN_FILTER1 ||
-	    ccdcparam->mfilt1 > CCDC_MEDIAN_FILTER1) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of median filter1\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->mfilt2 < CCDC_NO_MEDIAN_FILTER2 ||
-	    ccdcparam->mfilt2 > CCDC_MEDIAN_FILTER2) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of median filter2\n");
-		return -EINVAL;
-	}
-
-	if ((ccdcparam->med_filt_thres < 0) ||
-	   (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) {
-		dev_dbg(ccdc_cfg.dev,
-			"Invalid value of median filter threshold\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->data_sz < CCDC_DATA_16BITS ||
-	    ccdcparam->data_sz > CCDC_DATA_8BITS) {
-		dev_dbg(ccdc_cfg.dev, "Invalid value of data size\n");
-		return -EINVAL;
-	}
-
-	if (ccdcparam->alaw.enable) {
-		if (ccdcparam->alaw.gamma_wd < CCDC_GAMMA_BITS_13_4 ||
-		    ccdcparam->alaw.gamma_wd > CCDC_GAMMA_BITS_09_0) {
-			dev_dbg(ccdc_cfg.dev, "Invalid value of ALAW\n");
-			return -EINVAL;
-		}
-	}
-
-	if (ccdcparam->blk_clamp.b_clamp_enable) {
-		if (ccdcparam->blk_clamp.sample_pixel < CCDC_SAMPLE_1PIXELS ||
-		    ccdcparam->blk_clamp.sample_pixel > CCDC_SAMPLE_16PIXELS) {
-			dev_dbg(ccdc_cfg.dev,
-				"Invalid value of sample pixel\n");
-			return -EINVAL;
-		}
-		if (ccdcparam->blk_clamp.sample_ln < CCDC_SAMPLE_1LINES ||
-		    ccdcparam->blk_clamp.sample_ln > CCDC_SAMPLE_16LINES) {
-			dev_dbg(ccdc_cfg.dev,
-				"Invalid value of sample lines\n");
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
-/* Parameter operations */
-static int ccdc_set_params(void __user *params)
-{
-	struct ccdc_config_params_raw ccdc_raw_params;
-	int x;
-
-	/* only raw module parameters can be set through the IOCTL */
-	if (ccdc_cfg.if_type != VPFE_RAW_BAYER)
-		return -EINVAL;
-
-	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
-	if (x) {
-		dev_dbg(ccdc_cfg.dev, "ccdc_set_params: error in copying ccdcparams, %d\n",
-			x);
-		return -EFAULT;
-	}
-
-	if (!validate_ccdc_param(&ccdc_raw_params)) {
-		memcpy(&ccdc_cfg.bayer.config_params,
-			&ccdc_raw_params,
-			sizeof(ccdc_raw_params));
-		return 0;
-	}
-	return -EINVAL;
-}
-
 /* This function will configure CCDC for YCbCr video capture */
 static void ccdc_config_ycbcr(void)
 {
@@ -939,7 +850,6 @@ static struct ccdc_hw_device ccdc_hw_dev = {
 		.enable = ccdc_enable,
 		.enable_out_to_sdram = ccdc_enable_output_to_sdram,
 		.set_hw_if_params = ccdc_set_hw_if_params,
-		.set_params = ccdc_set_params,
 		.configure = ccdc_configure,
 		.set_buftype = ccdc_set_buftype,
 		.get_buftype = ccdc_get_buftype,
diff --git a/drivers/media/platform/davinci/dm644x_ccdc.c b/drivers/media/platform/davinci/dm644x_ccdc.c
index 740fbc7a8c14..3b2d8a9317b8 100644
--- a/drivers/media/platform/davinci/dm644x_ccdc.c
+++ b/drivers/media/platform/davinci/dm644x_ccdc.c
@@ -17,13 +17,9 @@
  * This module is for configuring CCD controller of DM6446 VPFE to capture
  * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
  * such as Defect Pixel Correction, Color Space Conversion etc to
- * pre-process the Raw Bayer RGB data, before writing it to SDRAM. This
- * module also allows application to configure individual
- * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
- * To do so, application includes dm644x_ccdc.h and vpfe_capture.h header
- * files.  The setparams() API is called by vpfe_capture driver
- * to configure module parameters. This file is named DM644x so that other
- * variants such DM6443 may be supported using the same module.
+ * pre-process the Raw Bayer RGB data, before writing it to SDRAM.
+ * This file is named DM644x so that other variants such DM6443
+ * may be supported using the same module.
  *
  * TODO: Test Raw bayer parameter settings and bayer capture
  * 	 Split module parameter structure to module specific ioctl structs
@@ -216,96 +212,8 @@ static void ccdc_readregs(void)
 	dev_notice(ccdc_cfg.dev, "\nReading 0x%x to VERT_LINES...\n", val);
 }
 
-static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
-{
-	if (ccdcparam->alaw.enable) {
-		u8 max_gamma = ccdc_gamma_width_max_bit(ccdcparam->alaw.gamma_wd);
-		u8 max_data = ccdc_data_size_max_bit(ccdcparam->data_sz);
-
-		if ((ccdcparam->alaw.gamma_wd > CCDC_GAMMA_BITS_09_0) ||
-		    (ccdcparam->alaw.gamma_wd < CCDC_GAMMA_BITS_15_6) ||
-		    (max_gamma > max_data)) {
-			dev_dbg(ccdc_cfg.dev, "\nInvalid data line select");
-			return -1;
-		}
-	}
-	return 0;
-}
-
-static int ccdc_update_raw_params(struct ccdc_config_params_raw *raw_params)
-{
-	struct ccdc_config_params_raw *config_params =
-				&ccdc_cfg.bayer.config_params;
-	unsigned int *fpc_virtaddr = NULL;
-	unsigned int *fpc_physaddr = NULL;
-
-	memcpy(config_params, raw_params, sizeof(*raw_params));
-	/*
-	 * allocate memory for fault pixel table and copy the user
-	 * values to the table
-	 */
-	if (!config_params->fault_pxl.enable)
-		return 0;
-
-	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
-	fpc_virtaddr = (unsigned int *)phys_to_virt(
-				(unsigned long)fpc_physaddr);
-	/*
-	 * Allocate memory for FPC table if current
-	 * FPC table buffer is not big enough to
-	 * accommodate FPC Number requested
-	 */
-	if (raw_params->fault_pxl.fp_num != config_params->fault_pxl.fp_num) {
-		if (fpc_physaddr != NULL) {
-			free_pages((unsigned long)fpc_virtaddr,
-				   get_order
-				   (config_params->fault_pxl.fp_num *
-				   FP_NUM_BYTES));
-		}
-
-		/* Allocate memory for FPC table */
-		fpc_virtaddr =
-			(unsigned int *)__get_free_pages(GFP_KERNEL | GFP_DMA,
-							 get_order(raw_params->
-							 fault_pxl.fp_num *
-							 FP_NUM_BYTES));
-
-		if (fpc_virtaddr == NULL) {
-			dev_dbg(ccdc_cfg.dev,
-				"\nUnable to allocate memory for FPC");
-			return -EFAULT;
-		}
-		fpc_physaddr =
-		    (unsigned int *)virt_to_phys((void *)fpc_virtaddr);
-	}
-
-	/* Copy number of fault pixels and FPC table */
-	config_params->fault_pxl.fp_num = raw_params->fault_pxl.fp_num;
-	if (copy_from_user(fpc_virtaddr,
-			(void __user *)raw_params->fault_pxl.fpc_table_addr,
-			config_params->fault_pxl.fp_num * FP_NUM_BYTES)) {
-		dev_dbg(ccdc_cfg.dev, "\n copy_from_user failed");
-		return -EFAULT;
-	}
-	config_params->fault_pxl.fpc_table_addr = (unsigned long)fpc_physaddr;
-	return 0;
-}
-
 static int ccdc_close(struct device *dev)
 {
-	struct ccdc_config_params_raw *config_params =
-				&ccdc_cfg.bayer.config_params;
-	unsigned int *fpc_physaddr = NULL, *fpc_virtaddr = NULL;
-
-	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
-
-	if (fpc_physaddr != NULL) {
-		fpc_virtaddr = (unsigned int *)
-		    phys_to_virt((unsigned long)fpc_physaddr);
-		free_pages((unsigned long)fpc_virtaddr,
-			   get_order(config_params->fault_pxl.fp_num *
-			   FP_NUM_BYTES));
-	}
 	return 0;
 }
 
@@ -339,29 +247,6 @@ static void ccdc_sbl_reset(void)
 	vpss_clear_wbl_overflow(VPSS_PCR_CCDC_WBL_O);
 }
 
-/* Parameter operations */
-static int ccdc_set_params(void __user *params)
-{
-	struct ccdc_config_params_raw ccdc_raw_params;
-	int x;
-
-	if (ccdc_cfg.if_type != VPFE_RAW_BAYER)
-		return -EINVAL;
-
-	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
-	if (x) {
-		dev_dbg(ccdc_cfg.dev, "ccdc_set_params: error in copyingccdc params, %d\n",
-			x);
-		return -EFAULT;
-	}
-
-	if (!validate_ccdc_param(&ccdc_raw_params)) {
-		if (!ccdc_update_raw_params(&ccdc_raw_params))
-			return 0;
-	}
-	return -EINVAL;
-}
-
 /*
  * ccdc_config_ycbcr()
  * This function will configure CCDC for YCbCr video capture
@@ -489,32 +374,6 @@ static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp)
 	regw(val, CCDC_BLKCMP);
 }
 
-static void ccdc_config_fpc(struct ccdc_fault_pixel *fpc)
-{
-	u32 val;
-
-	/* Initially disable FPC */
-	val = CCDC_FPC_DISABLE;
-	regw(val, CCDC_FPC);
-
-	if (!fpc->enable)
-		return;
-
-	/* Configure Fault pixel if needed */
-	regw(fpc->fpc_table_addr, CCDC_FPC_ADDR);
-	dev_dbg(ccdc_cfg.dev, "\nWriting 0x%lx to FPC_ADDR...\n",
-		       (fpc->fpc_table_addr));
-	/* Write the FPC params with FPC disable */
-	val = fpc->fp_num & CCDC_FPC_FPC_NUM_MASK;
-	regw(val, CCDC_FPC);
-
-	dev_dbg(ccdc_cfg.dev, "\nWriting 0x%x to FPC...\n", val);
-	/* read the FPC register */
-	val = regr(CCDC_FPC) | CCDC_FPC_ENABLE;
-	regw(val, CCDC_FPC);
-	dev_dbg(ccdc_cfg.dev, "\nWriting 0x%x to FPC...\n", val);
-}
-
 /*
  * ccdc_config_raw()
  * This function will configure CCDC for Raw capture mode
@@ -569,9 +428,6 @@ static void ccdc_config_raw(void)
 	/* Configure Black level compensation */
 	ccdc_config_black_compense(&config_params->blk_comp);
 
-	/* Configure Fault Pixel Correction */
-	ccdc_config_fpc(&config_params->fault_pxl);
-
 	/* If data size is 8 bit then pack the data */
 	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
 	     config_params->alaw.enable)
@@ -929,7 +785,6 @@ static struct ccdc_hw_device ccdc_hw_dev = {
 		.reset = ccdc_sbl_reset,
 		.enable = ccdc_enable,
 		.set_hw_if_params = ccdc_set_hw_if_params,
-		.set_params = ccdc_set_params,
 		.configure = ccdc_configure,
 		.set_buftype = ccdc_set_buftype,
 		.get_buftype = ccdc_get_buftype,
diff --git a/drivers/media/platform/davinci/vpfe_capture.c b/drivers/media/platform/davinci/vpfe_capture.c
index 1831bf5ccca5..b1bf4a7e8eb7 100644
--- a/drivers/media/platform/davinci/vpfe_capture.c
+++ b/drivers/media/platform/davinci/vpfe_capture.c
@@ -280,45 +280,6 @@ void vpfe_unregister_ccdc_device(struct ccdc_hw_device *dev)
 }
 EXPORT_SYMBOL(vpfe_unregister_ccdc_device);
 
-/*
- * vpfe_get_ccdc_image_format - Get image parameters based on CCDC settings
- */
-static int vpfe_get_ccdc_image_format(struct vpfe_device *vpfe_dev,
-				 struct v4l2_format *f)
-{
-	struct v4l2_rect image_win;
-	enum ccdc_buftype buf_type;
-	enum ccdc_frmfmt frm_fmt;
-
-	memset(f, 0, sizeof(*f));
-	f->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
-	ccdc_dev->hw_ops.get_image_window(&image_win);
-	f->fmt.pix.width = image_win.width;
-	f->fmt.pix.height = image_win.height;
-	f->fmt.pix.bytesperline = ccdc_dev->hw_ops.get_line_length();
-	f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
-				f->fmt.pix.height;
-	buf_type = ccdc_dev->hw_ops.get_buftype();
-	f->fmt.pix.pixelformat = ccdc_dev->hw_ops.get_pixel_format();
-	frm_fmt = ccdc_dev->hw_ops.get_frame_format();
-	if (frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
-		f->fmt.pix.field = V4L2_FIELD_NONE;
-	else if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
-		if (buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
-			f->fmt.pix.field = V4L2_FIELD_INTERLACED;
-		else if (buf_type == CCDC_BUFTYPE_FLD_SEPARATED)
-			f->fmt.pix.field = V4L2_FIELD_SEQ_TB;
-		else {
-			v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf_type\n");
-			return -EINVAL;
-		}
-	} else {
-		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid frm_fmt\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
 /*
  * vpfe_config_ccdc_image_format()
  * For a pix format, configure ccdc to setup the capture
@@ -1697,41 +1658,6 @@ unlock_out:
 	return ret;
 }
 
-
-static long vpfe_param_handler(struct file *file, void *priv,
-		bool valid_prio, unsigned int cmd, void *param)
-{
-	struct vpfe_device *vpfe_dev = video_drvdata(file);
-	int ret;
-
-	v4l2_dbg(2, debug, &vpfe_dev->v4l2_dev, "vpfe_param_handler\n");
-
-	if (vpfe_dev->started) {
-		/* only allowed if streaming is not started */
-		v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
-			"device already started\n");
-		return -EBUSY;
-	}
-
-	ret = mutex_lock_interruptible(&vpfe_dev->lock);
-	if (ret)
-		return ret;
-
-	switch (cmd) {
-	case VPFE_CMD_S_CCDC_RAW_PARAMS:
-		ret = -EINVAL;
-		v4l2_warn(&vpfe_dev->v4l2_dev,
-			"VPFE_CMD_S_CCDC_RAW_PARAMS not supported\n");
-		break;
-	default:
-		ret = -ENOTTY;
-	}
-unlock_out:
-	mutex_unlock(&vpfe_dev->lock);
-	return ret;
-}
-
-
 /* vpfe capture ioctl operations */
 static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
 	.vidioc_querycap	 = vpfe_querycap,
@@ -1754,7 +1680,6 @@ static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
 	.vidioc_cropcap		 = vpfe_cropcap,
 	.vidioc_g_selection	 = vpfe_g_selection,
 	.vidioc_s_selection	 = vpfe_s_selection,
-	.vidioc_default		 = vpfe_param_handler,
 };
 
 static struct vpfe_device *vpfe_initialize(void)
diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h
index 7c909da29d43..6ea2ce241851 100644
--- a/include/media/davinci/dm644x_ccdc.h
+++ b/include/media/davinci/dm644x_ccdc.h
@@ -103,16 +103,6 @@ struct ccdc_black_compensation {
 	char gb;
 };
 
-/* structure for fault pixel correction */
-struct ccdc_fault_pixel {
-	/* Enable or Disable fault pixel correction */
-	unsigned char enable;
-	/* Number of fault pixel */
-	unsigned short fp_num;
-	/* Address of fault pixel table */
-	unsigned long fpc_table_addr;
-};
-
 /* Structure for CCDC configuration parameters for raw capture mode passed
  * by application
  */
@@ -125,8 +115,6 @@ struct ccdc_config_params_raw {
 	struct ccdc_black_clamp blk_clamp;
 	/* Structure for Black Compensation */
 	struct ccdc_black_compensation blk_comp;
-	/* Structure for Fault Pixel Module Configuration */
-	struct ccdc_fault_pixel fault_pxl;
 };
 
 
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
index 8e1a4d88daa0..f003533602d0 100644
--- a/include/media/davinci/vpfe_capture.h
+++ b/include/media/davinci/vpfe_capture.h
@@ -183,14 +183,4 @@ struct vpfe_config_params {
 };
 
 #endif				/* End of __KERNEL__ */
-/**
- * VPFE_CMD_S_CCDC_RAW_PARAMS - EXPERIMENTAL IOCTL to set raw capture params
- * This can be used to configure modules such as defect pixel correction,
- * color space conversion, culling etc. This is an experimental ioctl that
- * will change in future kernels. So use this ioctl with care !
- * TODO: This is to be split into multiple ioctls and also explore the
- * possibility of extending the v4l2 api to include this
- **/
-#define VPFE_CMD_S_CCDC_RAW_PARAMS _IOW('V', BASE_VIDIOC_PRIVATE + 1, \
-					void *)
 #endif				/* _DAVINCI_VPFE_H */
-- 
cgit 


From fdeaf7e3eb37c6dbc4b4ac97dbe1945d239eb788 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Date: Mon, 24 Jul 2017 13:40:03 +0200
Subject: KVM: make pid available for uevents without debugfs

Simplify and improve the code so that the PID is always available in
the uevent even when debugfs is not available.

This adds a userspace_pid field to struct kvm, as per Radim's
suggestion, so that the PID can be retrieved on destruction too.

Acked-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Fixes: 286de8f6ac9202 ("KVM: trigger uevents when creating or destroying a VM")
Signed-off-by: Claudio Imbrenda <imbrenda@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 35 ++++++++++++-----------------------
 2 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 648b34cabb38..890b706d1943 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -445,6 +445,7 @@ struct kvm {
 	struct kvm_stat_data **debugfs_stat_data;
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
+	pid_t userspace_pid;
 };
 
 #define kvm_err(fmt, ...) \
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 82987d457b8b..f3f74271f1a9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3883,7 +3883,6 @@ static const struct file_operations *stat_fops[] = {
 static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 {
 	struct kobj_uevent_env *env;
-	char *tmp, *pathbuf = NULL;
 	unsigned long long created, active;
 
 	if (!kvm_dev.this_device || !kvm)
@@ -3907,38 +3906,28 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
 	add_uevent_var(env, "CREATED=%llu", created);
 	add_uevent_var(env, "COUNT=%llu", active);
 
-	if (type == KVM_EVENT_CREATE_VM)
+	if (type == KVM_EVENT_CREATE_VM) {
 		add_uevent_var(env, "EVENT=create");
-	else if (type == KVM_EVENT_DESTROY_VM)
+		kvm->userspace_pid = task_pid_nr(current);
+	} else if (type == KVM_EVENT_DESTROY_VM) {
 		add_uevent_var(env, "EVENT=destroy");
+	}
+	add_uevent_var(env, "PID=%d", kvm->userspace_pid);
 
 	if (kvm->debugfs_dentry) {
-		char p[ITOA_MAX_LEN];
-
-		snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
-		tmp = strchrnul(p + 1, '-');
-		*tmp = '\0';
-		add_uevent_var(env, "PID=%s", p);
-		pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (pathbuf) {
-			/* sizeof counts the final '\0' */
-			int len = sizeof("STATS_PATH=") - 1;
-			const char *pvar = "STATS_PATH=";
-
-			tmp = dentry_path_raw(kvm->debugfs_dentry,
-					      pathbuf + len,
-					      PATH_MAX - len);
-			if (!IS_ERR(tmp)) {
-				memcpy(tmp - len, pvar, len);
-				env->envp[env->envp_idx++] = tmp - len;
-			}
+		char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
+
+		if (p) {
+			tmp = dentry_path_raw(kvm->debugfs_dentry, p, PATH_MAX);
+			if (!IS_ERR(tmp))
+				add_uevent_var(env, "STATS_PATH=%s", tmp);
+			kfree(p);
 		}
 	}
 	/* no need for checks, since we are adding at most only 5 keys */
 	env->envp[env->envp_idx++] = NULL;
 	kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
 	kfree(env);
-	kfree(pathbuf);
 }
 
 static int kvm_init_debug(void)
-- 
cgit 


From bb67b496c338e15813f075f482067da930f52e39 Mon Sep 17 00:00:00 2001
From: Murilo Opsfelder Araujo <mopsfelder@gmail.com>
Date: Tue, 18 Jul 2017 14:22:20 -0300
Subject: include/linux/vfio.h: Guard powerpc-specific functions with
 CONFIG_VFIO_SPAPR_EEH

When CONFIG_EEH=y and CONFIG_VFIO_SPAPR_EEH=n, build fails with the
following:

    drivers/vfio/pci/vfio_pci.o: In function `.vfio_pci_release':
    vfio_pci.c:(.text+0xa98): undefined reference to `.vfio_spapr_pci_eeh_release'
    drivers/vfio/pci/vfio_pci.o: In function `.vfio_pci_open':
    vfio_pci.c:(.text+0x1420): undefined reference to `.vfio_spapr_pci_eeh_open'

In this case, vfio_pci.c should use the empty definitions of
vfio_spapr_pci_eeh_open and vfio_spapr_pci_eeh_release functions.

This patch fixes it by guarding these function definitions with
CONFIG_VFIO_SPAPR_EEH, the symbol that controls whether vfio_spapr_eeh.c is
built, which is where the non-empty versions of these functions are. We need to
make use of IS_ENABLED() macro because CONFIG_VFIO_SPAPR_EEH is a tristate
option.

This issue was found during a randconfig build. Logs are here:

    http://kisskb.ellerman.id.au/kisskb/buildresult/12982362/

Signed-off-by: Murilo Opsfelder Araujo <mopsfelder@gmail.com>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/vfio.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 586809abb273..a47b985341d1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -152,7 +152,7 @@ extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 					      size_t *data_size);
 
 struct pci_dev;
-#ifdef CONFIG_EEH
+#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
 extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
 extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
@@ -173,7 +173,7 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 {
 	return -ENOTTY;
 }
-#endif /* CONFIG_EEH */
+#endif /* CONFIG_VFIO_SPAPR_EEH */
 
 /*
  * IRQfd - generic
-- 
cgit 


From 273752c9ff03eb83856601b2a3458218bb949e46 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 26 Jul 2017 09:35:09 -0400
Subject: dm, dax: Make sure dm_dax_flush() is called if device supports it

Currently dm_dax_flush() is not being called, even if underlying dax
device supports write cache, because DAXDEV_WRITE_CACHE is not being
propagated up to the DM dax device.

If the underlying dax device supports write cache, set
DAXDEV_WRITE_CACHE on the DM dax device.  This will cause dm_dax_flush()
to be called.

Fixes: abebfbe2f7 ("dm: add ->flush() dax operation support")
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/dax/super.c   |  6 ++++++
 drivers/md/dm-table.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/dax.h   |  1 +
 3 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ce9e563e6e1d..938eb4868f7f 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -278,6 +278,12 @@ void dax_write_cache(struct dax_device *dax_dev, bool wc)
 }
 EXPORT_SYMBOL_GPL(dax_write_cache);
 
+bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+	return test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
+}
+EXPORT_SYMBOL_GPL(dax_write_cache_enabled);
+
 bool dax_alive(struct dax_device *dax_dev)
 {
 	lockdep_assert_held(&dax_srcu);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index a39bcd9b982a..28a4071cdf85 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -20,6 +20,7 @@
 #include <linux/atomic.h>
 #include <linux/blk-mq.h>
 #include <linux/mount.h>
+#include <linux/dax.h>
 
 #define DM_MSG_PREFIX "table"
 
@@ -1630,6 +1631,37 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned long flush)
 	return false;
 }
 
+static int device_dax_write_cache_enabled(struct dm_target *ti,
+					  struct dm_dev *dev, sector_t start,
+					  sector_t len, void *data)
+{
+	struct dax_device *dax_dev = dev->dax_dev;
+
+	if (!dax_dev)
+		return false;
+
+	if (dax_write_cache_enabled(dax_dev))
+		return true;
+	return false;
+}
+
+static int dm_table_supports_dax_write_cache(struct dm_table *t)
+{
+	struct dm_target *ti;
+	unsigned i;
+
+	for (i = 0; i < dm_table_get_num_targets(t); i++) {
+		ti = dm_table_get_target(t, i);
+
+		if (ti->type->iterate_devices &&
+		    ti->type->iterate_devices(ti,
+				device_dax_write_cache_enabled, NULL))
+			return true;
+	}
+
+	return false;
+}
+
 static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev,
 			    sector_t start, sector_t len, void *data)
 {
@@ -1785,6 +1817,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 	blk_queue_write_cache(q, wc, fua);
 
+	if (dm_table_supports_dax_write_cache(t))
+		dax_write_cache(t->md->dax_dev, true);
+
 	/* Ensure that all underlying devices are non-rotational. */
 	if (dm_table_all_devices_attribute(t, device_is_nonrot))
 		queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 794811875732..df97b7af7e2c 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -87,6 +87,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t size);
 void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 /*
  * We use lowest available bit in exceptional entry for locking, one bit for
-- 
cgit 


From 1937f8a29f4a650bc27e0311b43b53509a34fd22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:52:59 +0200
Subject: scsi: bnx2fc: Simplify CPU hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 69 +++++++++------------------------------
 include/linux/cpuhotplug.h        |  1 -
 2 files changed, 15 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index c4ebf8c5d884..6844ba361616 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2624,12 +2624,11 @@ static struct fcoe_transport bnx2fc_transport = {
 };
 
 /**
- * bnx2fc_percpu_thread_create - Create a receive thread for an
- *				 online CPU
+ * bnx2fc_cpu_online - Create a receive thread for an  online CPU
  *
  * @cpu: cpu index for the online cpu
  */
-static void bnx2fc_percpu_thread_create(unsigned int cpu)
+static int bnx2fc_cpu_online(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2639,15 +2638,17 @@ static void bnx2fc_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2fc_percpu_io_thread,
 					(void *)p, cpu_to_node(cpu),
 					"bnx2fc_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
+static int bnx2fc_cpu_offline(unsigned int cpu)
 {
 	struct bnx2fc_percpu_s *p;
 	struct task_struct *thread;
@@ -2661,7 +2662,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 	thread = p->iothread;
 	p->iothread = NULL;
 
-
 	/* Free all work in the list */
 	list_for_each_entry_safe(work, tmp, &p->work_list, list) {
 		list_del_init(&work->list);
@@ -2673,20 +2673,6 @@ static void bnx2fc_percpu_thread_destroy(unsigned int cpu)
 
 	if (thread)
 		kthread_stop(thread);
-}
-
-
-static int bnx2fc_cpu_online(unsigned int cpu)
-{
-	printk(PFX "CPU %x online: Create Rx thread\n", cpu);
-	bnx2fc_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2fc_cpu_dead(unsigned int cpu)
-{
-	printk(PFX "CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2fc_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -2761,31 +2747,16 @@ static int __init bnx2fc_mod_init(void)
 		spin_lock_init(&p->fp_work_lock);
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_create(cpu);
-
-	rc = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						  "scsi/bnx2fc:online",
-						  bnx2fc_cpu_online, NULL);
+	rc = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2fc:online",
+			       bnx2fc_cpu_online, bnx2fc_cpu_offline);
 	if (rc < 0)
-		goto stop_threads;
+		goto stop_thread;
 	bnx2fc_online_state = rc;
 
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD,
-					     "scsi/bnx2fc:dead",
-					     NULL, bnx2fc_cpu_dead);
-	put_online_cpus();
-
 	cnic_register_driver(CNIC_ULP_FCOE, &bnx2fc_cnic_cb);
-
 	return 0;
 
-stop_threads:
-	for_each_online_cpu(cpu)
-		bnx2fc_percpu_thread_destroy(cpu);
-	put_online_cpus();
+stop_thread:
 	kthread_stop(l2_thread);
 free_wq:
 	destroy_workqueue(bnx2fc_wq);
@@ -2804,7 +2775,6 @@ static void __exit bnx2fc_mod_exit(void)
 	struct fcoe_percpu_s *bg;
 	struct task_struct *l2_thread;
 	struct sk_buff *skb;
-	unsigned int cpu = 0;
 
 	/*
 	 * NOTE: Since cnic calls register_driver routine rtnl_lock,
@@ -2845,16 +2815,7 @@ static void __exit bnx2fc_mod_exit(void)
 	if (l2_thread)
 		kthread_stop(l2_thread);
 
-	get_online_cpus();
-	/* Destroy per cpu threads */
-	for_each_online_cpu(cpu) {
-		bnx2fc_percpu_thread_destroy(cpu);
-	}
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2fc_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2FC_DEAD);
-
-	put_online_cpus();
+	cpuhp_remove_state(bnx2fc_online_state);
 
 	destroy_workqueue(bnx2fc_wq);
 	/*
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index b56573bf440d..2e7b1731ad12 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2FC_DEAD,
 	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
-- 
cgit 


From f9f22a86912f9d36b50e9b3b383fabfb9f22dd46 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 24 Jul 2017 12:53:00 +0200
Subject: scsi: bnx2i: Simplify cpu hotplug code

The CPU hotplug related code of this driver can be simplified by:

1) Consolidating the callbacks into a single state. The CPU thread can be
   torn down on the CPU which goes offline. There is no point in delaying
   that to the CPU dead state

2) Let the core code invoke the online/offline callbacks and remove the
   extra for_each_online_cpu() loops.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Chad Dupuis <chad.dupuis@cavium.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bnx2i/bnx2i_init.c | 65 ++++++++++-------------------------------
 include/linux/cpuhotplug.h      |  1 -
 2 files changed, 15 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/bnx2i/bnx2i_init.c b/drivers/scsi/bnx2i/bnx2i_init.c
index 7487b653e799..4ebcda8d9500 100644
--- a/drivers/scsi/bnx2i/bnx2i_init.c
+++ b/drivers/scsi/bnx2i/bnx2i_init.c
@@ -404,12 +404,11 @@ int bnx2i_get_stats(void *handle)
 
 
 /**
- * bnx2i_percpu_thread_create - Create a receive thread for an
- *				online CPU
+ * bnx2i_cpu_online - Create a receive thread for an online CPU
  *
  * @cpu:	cpu index for the online cpu
  */
-static void bnx2i_percpu_thread_create(unsigned int cpu)
+static int bnx2i_cpu_online(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -419,16 +418,17 @@ static void bnx2i_percpu_thread_create(unsigned int cpu)
 	thread = kthread_create_on_node(bnx2i_percpu_io_thread, (void *)p,
 					cpu_to_node(cpu),
 					"bnx2i_thread/%d", cpu);
+	if (IS_ERR(thread))
+		return PTR_ERR(thread);
+
 	/* bind thread to the cpu */
-	if (likely(!IS_ERR(thread))) {
-		kthread_bind(thread, cpu);
-		p->iothread = thread;
-		wake_up_process(thread);
-	}
+	kthread_bind(thread, cpu);
+	p->iothread = thread;
+	wake_up_process(thread);
+	return 0;
 }
 
-
-static void bnx2i_percpu_thread_destroy(unsigned int cpu)
+static int bnx2i_cpu_offline(unsigned int cpu)
 {
 	struct bnx2i_percpu_s *p;
 	struct task_struct *thread;
@@ -451,19 +451,6 @@ static void bnx2i_percpu_thread_destroy(unsigned int cpu)
 	spin_unlock_bh(&p->p_work_lock);
 	if (thread)
 		kthread_stop(thread);
-}
-
-static int bnx2i_cpu_online(unsigned int cpu)
-{
-	pr_info("bnx2i: CPU %x online: Create Rx thread\n", cpu);
-	bnx2i_percpu_thread_create(cpu);
-	return 0;
-}
-
-static int bnx2i_cpu_dead(unsigned int cpu)
-{
-	pr_info("CPU %x offline: Remove Rx thread\n", cpu);
-	bnx2i_percpu_thread_destroy(cpu);
 	return 0;
 }
 
@@ -511,28 +498,14 @@ static int __init bnx2i_mod_init(void)
 		p->iothread = NULL;
 	}
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_create(cpu);
-
-	err = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
-						   "scsi/bnx2i:online",
-						   bnx2i_cpu_online, NULL);
+	err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "scsi/bnx2i:online",
+				bnx2i_cpu_online, bnx2i_cpu_offline);
 	if (err < 0)
-		goto remove_threads;
+		goto unreg_driver;
 	bnx2i_online_state = err;
-
-	cpuhp_setup_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD,
-					     "scsi/bnx2i:dead",
-					     NULL, bnx2i_cpu_dead);
-	put_online_cpus();
 	return 0;
 
-remove_threads:
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-	put_online_cpus();
+unreg_driver:
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
 unreg_xport:
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
@@ -552,7 +525,6 @@ out:
 static void __exit bnx2i_mod_exit(void)
 {
 	struct bnx2i_hba *hba;
-	unsigned cpu = 0;
 
 	mutex_lock(&bnx2i_dev_lock);
 	while (!list_empty(&adapter_list)) {
@@ -570,14 +542,7 @@ static void __exit bnx2i_mod_exit(void)
 	}
 	mutex_unlock(&bnx2i_dev_lock);
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		bnx2i_percpu_thread_destroy(cpu);
-
-	cpuhp_remove_state_nocalls_cpuslocked(bnx2i_online_state);
-	cpuhp_remove_state_nocalls_cpuslocked(CPUHP_SCSI_BNX2I_DEAD);
-	put_online_cpus();
+	cpuhp_remove_state(bnx2i_online_state);
 
 	iscsi_unregister_transport(&bnx2i_iscsi_transport);
 	cnic_unregister_driver(CNIC_ULP_ISCSI);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2e7b1731ad12..82b30e638430 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -39,7 +39,6 @@ enum cpuhp_state {
 	CPUHP_PCI_XGENE_DEAD,
 	CPUHP_IOMMU_INTEL_DEAD,
 	CPUHP_LUSTRE_CFS_DEAD,
-	CPUHP_SCSI_BNX2I_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
-- 
cgit 


From 6b84202c946cd3da3a8daa92c682510e9ed80321 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Wed, 26 Jul 2017 16:24:59 +0800
Subject: sctp: fix the check for _sctp_walk_params and _sctp_walk_errors

Commit b1f5bfc27a19 ("sctp: don't dereference ptr before leaving
_sctp_walk_{params, errors}()") tried to fix the issue that it
may overstep the chunk end for _sctp_walk_{params, errors} with
'chunk_end > offset(length) + sizeof(length)'.

But it introduced a side effect: When processing INIT, it verifies
the chunks with 'param.v == chunk_end' after iterating all params
by sctp_walk_params(). With the check 'chunk_end > offset(length)
+ sizeof(length)', it would return when the last param is not yet
accessed. Because the last param usually is fwdtsn supported param
whose size is 4 and 'chunk_end == offset(length) + sizeof(length)'

This is a badly issue even causing sctp couldn't process 4-shakes.
Client would always get abort when connecting to server, due to
the failure of INIT chunk verification on server.

The patch is to use 'chunk_end <= offset(length) + sizeof(length)'
instead of 'chunk_end < offset(length) + sizeof(length)' for both
_sctp_walk_params and _sctp_walk_errors.

Fixes: b1f5bfc27a19 ("sctp: don't dereference ptr before leaving _sctp_walk_{params, errors}()")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 980807d7506f..45fd4c6056b5 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -469,7 +469,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member)
 
 #define _sctp_walk_params(pos, chunk, end, member)\
 for (pos.v = chunk->member;\
-     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <\
+     (pos.v + offsetof(struct sctp_paramhdr, length) + sizeof(pos.p->length) <=\
       (void *)chunk + end) &&\
      pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\
      ntohs(pos.p->length) >= sizeof(struct sctp_paramhdr);\
@@ -481,7 +481,7 @@ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length))
 #define _sctp_walk_errors(err, chunk_hdr, end)\
 for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \
 	    sizeof(struct sctp_chunkhdr));\
-     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <\
+     ((void *)err + offsetof(sctp_errhdr_t, length) + sizeof(err->length) <=\
       (void *)chunk_hdr + end) &&\
      (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\
      ntohs(err->length) >= sizeof(sctp_errhdr_t); \
-- 
cgit 


From a3287c41ff405025bc57b165a0f6cd698bbbc1be Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 25 Jul 2017 16:30:34 +0100
Subject: drivers/perf: arm_pmu: Request PMU SPIs with IRQF_PER_CPU

Since the PMU register interface is banked per CPU, CPU PMU interrrupts
cannot be handled by a CPU other than the one with the PMU asserting the
interrupt. This means that migrating PMU SPIs, as we do during a CPU
hotplug operation doesn't make any sense and can lead to the IRQ being
disabled entirely if we route a spurious IRQ to the new affinity target.

This has been observed in practice on AMD Seattle, where CPUs on the
non-boot cluster appear to take a spurious PMU IRQ when coming online,
which is routed to CPU0 where it cannot be handled.

This patch passes IRQF_PERCPU for PMU SPIs and forcefully sets their
affinity prior to requesting them, ensuring that they cannot
be migrated during hotplug events. This interacts badly with the DB8500
erratum workaround that ping-pongs the interrupt affinity from the handler,
so we avoid passing IRQF_PERCPU in that case by allowing the IRQ flags
to be overridden in the platdata.

Fixes: 3cf7ee98b848 ("drivers/perf: arm_pmu: move irq request/free into probe")
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/mach-ux500/cpu-db8500.c |  1 +
 drivers/perf/arm_pmu.c           | 41 ++++++++++++++++++++++++++--------------
 include/linux/perf/arm_pmu.h     |  4 ++++
 3 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-ux500/cpu-db8500.c b/arch/arm/mach-ux500/cpu-db8500.c
index 28083ef72819..71a34e8c345a 100644
--- a/arch/arm/mach-ux500/cpu-db8500.c
+++ b/arch/arm/mach-ux500/cpu-db8500.c
@@ -133,6 +133,7 @@ static irqreturn_t db8500_pmu_handler(int irq, void *dev, irq_handler_t handler)
 
 static struct arm_pmu_platdata db8500_pmu_platdata = {
 	.handle_irq		= db8500_pmu_handler,
+	.irq_flags		= IRQF_NOBALANCING | IRQF_NO_THREAD,
 };
 
 static struct of_dev_auxdata u8500_auxdata_lookup[] __initdata = {
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index dc459eb1246b..1c5e0f333779 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -569,22 +569,41 @@ int armpmu_request_irq(struct arm_pmu *armpmu, int cpu)
 		if (irq != other_irq) {
 			pr_warn("mismatched PPIs detected.\n");
 			err = -EINVAL;
+			goto err_out;
 		}
 	} else {
-		err = request_irq(irq, handler,
-				  IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
+		struct arm_pmu_platdata *platdata = armpmu_get_platdata(armpmu);
+		unsigned long irq_flags;
+
+		err = irq_force_affinity(irq, cpumask_of(cpu));
+
+		if (err && num_possible_cpus() > 1) {
+			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+				irq, cpu);
+			goto err_out;
+		}
+
+		if (platdata && platdata->irq_flags) {
+			irq_flags = platdata->irq_flags;
+		} else {
+			irq_flags = IRQF_PERCPU |
+				    IRQF_NOBALANCING |
+				    IRQF_NO_THREAD;
+		}
+
+		err = request_irq(irq, handler, irq_flags, "arm-pmu",
 				  per_cpu_ptr(&hw_events->percpu_pmu, cpu));
 	}
 
-	if (err) {
-		pr_err("unable to request IRQ%d for ARM PMU counters\n",
-			irq);
-		return err;
-	}
+	if (err)
+		goto err_out;
 
 	cpumask_set_cpu(cpu, &armpmu->active_irqs);
-
 	return 0;
+
+err_out:
+	pr_err("unable to request IRQ%d for ARM PMU counters\n", irq);
+	return err;
 }
 
 int armpmu_request_irqs(struct arm_pmu *armpmu)
@@ -628,12 +647,6 @@ static int arm_perf_starting_cpu(unsigned int cpu, struct hlist_node *node)
 			enable_percpu_irq(irq, IRQ_TYPE_NONE);
 			return 0;
 		}
-
-		if (irq_force_affinity(irq, cpumask_of(cpu)) &&
-		    num_possible_cpus() > 1) {
-			pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
-				irq, cpu);
-		}
 	}
 
 	return 0;
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 1360dd6d5e61..af0f44effd44 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -24,10 +24,14 @@
  *	interrupt and passed the address of the low level handler,
  *	and can be used to implement any platform specific handling
  *	before or after calling it.
+ *
+ * @irq_flags: if non-zero, these flags will be passed to request_irq
+ *             when requesting interrupts for this PMU device.
  */
 struct arm_pmu_platdata {
 	irqreturn_t (*handle_irq)(int irq, void *dev,
 				  irq_handler_t pmu_handler);
+	unsigned long irq_flags;
 };
 
 #ifdef CONFIG_ARM_PMU
-- 
cgit 


From 8397913303abc9333f376a518a8368fa22ca5e6e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 27 Jul 2017 12:21:11 +0200
Subject: genirq/cpuhotplug: Revert "Set force affinity flag on hotplug
 migration"

That commit was part of the changes moving x86 to the generic CPU hotplug
interrupt migration code. The force flag was required on x86 before the
hierarchical irqdomain rework, but invoking set_affinity() with force=true
stayed and had no side effects.

At some point in the past, the force flag got repurposed to support the
exynos timer interrupt affinity setting to a not yet online CPU, so the
interrupt controller callback does not verify the supplied affinity mask
against cpu_online_mask.

Setting the flag in the CPU hotplug code causes the cpu online masking to
be blocked on these irq controllers and results in potentially affining an
interrupt to the CPU which is unplugged, i.e. instead of moving it away,
it's just reassigned to it.

As the force flags is not longer needed on x86, it's safe to revert that
patch so the ARM irqchips which use the force flag work again.

Add comments to that effect, so this won't happen again.

Note: The online mask handling should be done in the generic code and the
force flag and the masking in the irq chips removed all together, but
that's not a change possible for 4.13.

Fixes: 77f85e66aa8b ("genirq/cpuhotplug: Set force affinity flag on hotplug migration")
Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: LAK <linux-arm-kernel@lists.infradead.org>
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707271217590.3109@nanos
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h     | 7 ++++++-
 kernel/irq/cpuhotplug.c | 9 +++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 00db35b61e9e..d2d543794093 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -388,7 +388,12 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @irq_mask_ack:	ack and mask an interrupt source
  * @irq_unmask:		unmask an interrupt source
  * @irq_eoi:		end of interrupt
- * @irq_set_affinity:	set the CPU affinity on SMP machines
+ * @irq_set_affinity:	Set the CPU affinity on SMP machines. If the force
+ *			argument is true, it tells the driver to
+ *			unconditionally apply the affinity setting. Sanity
+ *			checks against the supplied affinity mask are not
+ *			required. This is used for CPU hotplug where the
+ *			target CPU is not yet set in the cpu_online_mask.
  * @irq_retrigger:	resend an IRQ to the CPU
  * @irq_set_type:	set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
  * @irq_set_wake:	enable/disable power-management wake-on of an IRQ
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index aee8f7ec40af..638eb9c83d9f 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -95,8 +95,13 @@ static bool migrate_one_irq(struct irq_desc *desc)
 		affinity = cpu_online_mask;
 		brokeaff = true;
 	}
-
-	err = irq_do_set_affinity(d, affinity, true);
+	/*
+	 * Do not set the force argument of irq_do_set_affinity() as this
+	 * disables the masking of offline CPUs from the supplied affinity
+	 * mask and therefore might keep/reassign the irq to the outgoing
+	 * CPU.
+	 */
+	err = irq_do_set_affinity(d, affinity, false);
 	if (err) {
 		pr_warn_ratelimited("IRQ%u: set affinity failed(%d).\n",
 				    d->irq, err);
-- 
cgit 


From 0b794ffae7afa7c4e5accac8791c4b78e8d080ce Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 May 2017 15:11:26 +0300
Subject: net/mlx5: Fix mlx5_ifc_mtpps_reg_bits structure size

Fix miscalculation in reserved_at_1a0 field.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 87869c04849a..fd98aef4545c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8175,7 +8175,7 @@ struct mlx5_ifc_mtpps_reg_bits {
 	u8         out_pulse_duration[0x10];
 	u8         out_periodic_adjustment[0x10];
 
-	u8         reserved_at_1a0[0x60];
+	u8         reserved_at_1a0[0x40];
 };
 
 struct mlx5_ifc_mtppse_reg_bits {
-- 
cgit 


From fa3676885e3b5be1edfa1b2cc775e20a45b34a19 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.com>
Date: Thu, 25 May 2017 16:09:34 +0300
Subject: net/mlx5e: Add field select to MTPPS register

In order to mark relevant fields while setting the MTPPS register
add field select. Otherwise it can cause a misconfiguration in
firmware.

Fixes: ee7f12205abc ('net/mlx5e: Implement 1PPS support')
Signed-off-by: Eugenia Emantayev <eugenia@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 29 +++++++++++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  2 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    |  5 ++++
 include/linux/mlx5/mlx5_ifc.h                      | 10 +++++---
 4 files changed, 36 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 66f432385dbb..ab07233e2faa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -53,6 +53,15 @@ enum {
 	MLX5E_EVENT_MODE_ONCE_TILL_ARM	= 0x2,
 };
 
+enum {
+	MLX5E_MTPPS_FS_ENABLE			= BIT(0x0),
+	MLX5E_MTPPS_FS_PATTERN			= BIT(0x2),
+	MLX5E_MTPPS_FS_PIN_MODE			= BIT(0x3),
+	MLX5E_MTPPS_FS_TIME_STAMP		= BIT(0x4),
+	MLX5E_MTPPS_FS_OUT_PULSE_DURATION	= BIT(0x5),
+	MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
+};
+
 void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
 			struct skb_shared_hwtstamps *hwts)
 {
@@ -221,7 +230,10 @@ static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 
 		/* For future use need to add a loop for finding all 1PPS out pins */
 		MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_OUT);
-		MLX5_SET(mtpps_reg, in, out_periodic_adjustment, delta & 0xFFFF);
+		MLX5_SET(mtpps_reg, in, enhanced_out_periodic_adjustment, delta);
+		MLX5_SET(mtpps_reg, in, field_select,
+			 MLX5E_MTPPS_FS_PIN_MODE |
+			 MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ);
 
 		mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	}
@@ -257,8 +269,7 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	int pin = -1;
 	int err = 0;
 
-	if (!MLX5_CAP_GEN(priv->mdev, pps) ||
-	    !MLX5_CAP_GEN(priv->mdev, pps_modify))
+	if (!MLX5_PPS_CAP(priv->mdev))
 		return -EOPNOTSUPP;
 
 	if (rq->extts.index >= tstamp->ptp_info.n_pins)
@@ -277,6 +288,9 @@ static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
 	MLX5_SET(mtpps_reg, in, pin_mode, MLX5E_PIN_MODE_IN);
 	MLX5_SET(mtpps_reg, in, pattern, pattern);
 	MLX5_SET(mtpps_reg, in, enable, on);
+	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+					      MLX5E_MTPPS_FS_PATTERN |
+					      MLX5E_MTPPS_FS_ENABLE);
 
 	err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 	if (err)
@@ -302,7 +316,7 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	int pin = -1;
 	s64 ns;
 
-	if (!MLX5_CAP_GEN(priv->mdev, pps_modify))
+	if (!MLX5_PPS_CAP(priv->mdev))
 		return -EOPNOTSUPP;
 
 	if (rq->perout.index >= tstamp->ptp_info.n_pins)
@@ -337,7 +351,10 @@ static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
 	MLX5_SET(mtpps_reg, in, pattern, MLX5E_OUT_PATTERN_PERIODIC);
 	MLX5_SET(mtpps_reg, in, enable, on);
 	MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
-
+	MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_PIN_MODE |
+					      MLX5E_MTPPS_FS_PATTERN |
+					      MLX5E_MTPPS_FS_ENABLE |
+					      MLX5E_MTPPS_FS_TIME_STAMP);
 	return mlx5_set_mtpps(priv->mdev, in, sizeof(in));
 }
 
@@ -487,7 +504,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
 #define MAX_PIN_NUM	8
 	tstamp->pps_pin_caps = kzalloc(sizeof(u8) * MAX_PIN_NUM, GFP_KERNEL);
 	if (tstamp->pps_pin_caps) {
-		if (MLX5_CAP_GEN(priv->mdev, pps))
+		if (MLX5_PPS_CAP(priv->mdev))
 			mlx5e_get_pps_caps(priv, tstamp);
 		if (tstamp->ptp_info.n_pins)
 			mlx5e_init_pin_config(tstamp);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index af51a5d2b912..52b9a64cd3a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -698,7 +698,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	else
 		mlx5_core_dbg(dev, "port_module_event is not set\n");
 
-	if (MLX5_CAP_GEN(dev, pps))
+	if (MLX5_PPS_CAP(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
 
 	if (MLX5_CAP_GEN(dev, fpga))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 6a3d6bef7dd4..6a263e8d883a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -154,6 +154,11 @@ int mlx5_set_mtpps(struct mlx5_core_dev *mdev, u32 *mtpps, u32 mtpps_size);
 int mlx5_query_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 *arm, u8 *mode);
 int mlx5_set_mtppse(struct mlx5_core_dev *mdev, u8 pin, u8 arm, u8 mode);
 
+#define MLX5_PPS_CAP(mdev) (MLX5_CAP_GEN((mdev), pps) &&		\
+			    MLX5_CAP_GEN((mdev), pps_modify) &&		\
+			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_fs) &&	\
+			    MLX5_CAP_MCAM_FEATURE((mdev), mtpps_enh_out_per_adj))
+
 int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw);
 
 void mlx5e_init(void);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fd98aef4545c..3030121b4746 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7749,8 +7749,10 @@ struct mlx5_ifc_pcam_reg_bits {
 };
 
 struct mlx5_ifc_mcam_enhanced_features_bits {
-	u8         reserved_at_0[0x7f];
+	u8         reserved_at_0[0x7d];
 
+	u8         mtpps_enh_out_per_adj[0x1];
+	u8         mtpps_fs[0x1];
 	u8         pcie_performance_group[0x1];
 };
 
@@ -8159,7 +8161,8 @@ struct mlx5_ifc_mtpps_reg_bits {
 	u8         reserved_at_78[0x4];
 	u8         cap_pin_4_mode[0x4];
 
-	u8         reserved_at_80[0x80];
+	u8         field_select[0x20];
+	u8         reserved_at_a0[0x60];
 
 	u8         enable[0x1];
 	u8         reserved_at_101[0xb];
@@ -8174,8 +8177,9 @@ struct mlx5_ifc_mtpps_reg_bits {
 
 	u8         out_pulse_duration[0x10];
 	u8         out_periodic_adjustment[0x10];
+	u8         enhanced_out_periodic_adjustment[0x20];
 
-	u8         reserved_at_1a0[0x40];
+	u8         reserved_at_1c0[0x20];
 };
 
 struct mlx5_ifc_mtppse_reg_bits {
-- 
cgit 


From cebd8c532fb925311764098b382e1a1ee9ecda78 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Thu, 25 May 2017 22:14:24 +0300
Subject: v4l: vsp1: Add pipe index argument to the VSP-DU API

In the H3 ES2.0 SoC the VSP2-DL instance has two connections to DU
channels that need to be configured independently. Extend the VSP-DU API
with a pipeline index to identify which pipeline the caller wants to
operate on.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Acked-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/gpu/drm/rcar-du/rcar_du_vsp.c  | 12 ++++++------
 drivers/media/platform/vsp1/vsp1_drm.c | 20 ++++++++++++++++----
 include/media/vsp1.h                   | 10 ++++++----
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index f870445ebc8d..d46dce054442 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -81,22 +81,22 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
 	 */
 	crtc->group->need_restart = true;
 
-	vsp1_du_setup_lif(crtc->vsp->vsp, &cfg);
+	vsp1_du_setup_lif(crtc->vsp->vsp, 0, &cfg);
 }
 
 void rcar_du_vsp_disable(struct rcar_du_crtc *crtc)
 {
-	vsp1_du_setup_lif(crtc->vsp->vsp, NULL);
+	vsp1_du_setup_lif(crtc->vsp->vsp, 0, NULL);
 }
 
 void rcar_du_vsp_atomic_begin(struct rcar_du_crtc *crtc)
 {
-	vsp1_du_atomic_begin(crtc->vsp->vsp);
+	vsp1_du_atomic_begin(crtc->vsp->vsp, 0);
 }
 
 void rcar_du_vsp_atomic_flush(struct rcar_du_crtc *crtc)
 {
-	vsp1_du_atomic_flush(crtc->vsp->vsp);
+	vsp1_du_atomic_flush(crtc->vsp->vsp, 0);
 }
 
 /* Keep the two tables in sync. */
@@ -192,7 +192,7 @@ static void rcar_du_vsp_plane_setup(struct rcar_du_vsp_plane *plane)
 		}
 	}
 
-	vsp1_du_atomic_update(plane->vsp->vsp, plane->index, &cfg);
+	vsp1_du_atomic_update(plane->vsp->vsp, 0, plane->index, &cfg);
 }
 
 static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane,
@@ -292,7 +292,7 @@ static void rcar_du_vsp_plane_atomic_update(struct drm_plane *plane,
 	if (plane->state->crtc)
 		rcar_du_vsp_plane_setup(rplane);
 	else
-		vsp1_du_atomic_update(rplane->vsp->vsp, rplane->index, NULL);
+		vsp1_du_atomic_update(rplane->vsp->vsp, 0, rplane->index, NULL);
 }
 
 static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = {
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index c72d021ff820..f40444a96ba6 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -58,12 +58,16 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
 /**
  * vsp1_du_setup_lif - Setup the output part of the VSP pipeline
  * @dev: the VSP device
+ * @pipe_index: the DRM pipeline index
  * @cfg: the LIF configuration
  *
  * Configure the output part of VSP DRM pipeline for the given frame @cfg.width
  * and @cfg.height. This sets up formats on the BRU source pad, the WPF0 sink
  * and source pads, and the LIF sink pad.
  *
+ * The @pipe_index argument selects which DRM pipeline to setup. The number of
+ * available pipelines depend on the VSP instance.
+ *
  * As the media bus code on the BRU source pad is conditioned by the
  * configuration of the BRU sink 0 pad, we also set up the formats on all BRU
  * sinks, even if the configuration will be overwritten later by
@@ -72,7 +76,8 @@ EXPORT_SYMBOL_GPL(vsp1_du_init);
  *
  * Return 0 on success or a negative error code on failure.
  */
-int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg)
+int vsp1_du_setup_lif(struct device *dev, unsigned int pipe_index,
+		      const struct vsp1_du_lif_config *cfg)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	struct vsp1_pipeline *pipe = &vsp1->drm->pipe;
@@ -81,6 +86,9 @@ int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg)
 	unsigned int i;
 	int ret;
 
+	if (pipe_index > 0)
+		return -EINVAL;
+
 	if (!cfg) {
 		/*
 		 * NULL configuration means the CRTC is being disabled, stop
@@ -232,8 +240,9 @@ EXPORT_SYMBOL_GPL(vsp1_du_setup_lif);
 /**
  * vsp1_du_atomic_begin - Prepare for an atomic update
  * @dev: the VSP device
+ * @pipe_index: the DRM pipeline index
  */
-void vsp1_du_atomic_begin(struct device *dev)
+void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	struct vsp1_pipeline *pipe = &vsp1->drm->pipe;
@@ -245,6 +254,7 @@ EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin);
 /**
  * vsp1_du_atomic_update - Setup one RPF input of the VSP pipeline
  * @dev: the VSP device
+ * @pipe_index: the DRM pipeline index
  * @rpf_index: index of the RPF to setup (0-based)
  * @cfg: the RPF configuration
  *
@@ -271,7 +281,8 @@ EXPORT_SYMBOL_GPL(vsp1_du_atomic_begin);
  *
  * Return 0 on success or a negative error code on failure.
  */
-int vsp1_du_atomic_update(struct device *dev, unsigned int rpf_index,
+int vsp1_du_atomic_update(struct device *dev, unsigned int pipe_index,
+			  unsigned int rpf_index,
 			  const struct vsp1_du_atomic_config *cfg)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
@@ -437,8 +448,9 @@ static unsigned int rpf_zpos(struct vsp1_device *vsp1, struct vsp1_rwpf *rpf)
 /**
  * vsp1_du_atomic_flush - Commit an atomic update
  * @dev: the VSP device
+ * @pipe_index: the DRM pipeline index
  */
-void vsp1_du_atomic_flush(struct device *dev)
+void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index)
 {
 	struct vsp1_device *vsp1 = dev_get_drvdata(dev);
 	struct vsp1_pipeline *pipe = &vsp1->drm->pipe;
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index c837383b2013..c8fc868fb0f2 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -38,7 +38,8 @@ struct vsp1_du_lif_config {
 	void *callback_data;
 };
 
-int vsp1_du_setup_lif(struct device *dev, const struct vsp1_du_lif_config *cfg);
+int vsp1_du_setup_lif(struct device *dev, unsigned int pipe_index,
+		      const struct vsp1_du_lif_config *cfg);
 
 struct vsp1_du_atomic_config {
 	u32 pixelformat;
@@ -50,10 +51,11 @@ struct vsp1_du_atomic_config {
 	unsigned int zpos;
 };
 
-void vsp1_du_atomic_begin(struct device *dev);
-int vsp1_du_atomic_update(struct device *dev, unsigned int rpf,
+void vsp1_du_atomic_begin(struct device *dev, unsigned int pipe_index);
+int vsp1_du_atomic_update(struct device *dev, unsigned int pipe_index,
+			  unsigned int rpf,
 			  const struct vsp1_du_atomic_config *cfg);
-void vsp1_du_atomic_flush(struct device *dev);
+void vsp1_du_atomic_flush(struct device *dev, unsigned int pipe_index);
 int vsp1_du_map_sg(struct device *dev, struct sg_table *sgt);
 void vsp1_du_unmap_sg(struct device *dev, struct sg_table *sgt);
 
-- 
cgit 


From c9f2c1ae123a751d4e4f949144500219354d5ee1 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 27 Jul 2017 14:45:09 +0200
Subject: udp6: fix socket leak on early demux

When an early demuxed packet reaches __udp6_lib_lookup_skb(), the
sk reference is retrieved and used, but the relevant reference
count is leaked and the socket destructor is never called.
Beyond leaking the sk memory, if there are pending UDP packets
in the receive queue, even the related accounted memory is leaked.

In the long run, this will cause persistent forward allocation errors
and no UDP skbs (both ipv4 and ipv6) will be able to reach the
user-space.

Fix this by explicitly accessing the early demux reference before
the lookup, and properly decreasing the socket reference count
after usage.

Also drop the skb_steal_sock() in __udp6_lib_lookup_skb(), and
the now obsoleted comment about "socket cache".

The newly added code is derived from the current ipv4 code for the
similar path.

v1 -> v2:
  fixed the __udp6_lib_rcv() return code for resubmission,
  as suggested by Eric

Reported-by: Sam Edwards <CFSworks@gmail.com>
Reported-by: Marc Haber <mh+netdev@zugschlus.de>
Fixes: 5425077d73e0 ("net: ipv6: Add early demux handler for UDP unicast")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h |  1 +
 net/ipv4/udp.c    |  3 ++-
 net/ipv6/udp.c    | 27 ++++++++++++++++++---------
 3 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 56ce2d2a612d..cc8036987dcb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -260,6 +260,7 @@ static inline struct sk_buff *skb_recv_udp(struct sock *sk, unsigned int flags,
 }
 
 void udp_v4_early_demux(struct sk_buff *skb);
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst);
 int udp_get_port(struct sock *sk, unsigned short snum,
 		 int (*saddr_cmp)(const struct sock *,
 				  const struct sock *));
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fac7cb9e3b0f..e6276fa3750b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1928,7 +1928,7 @@ drop:
 /* For TCP sockets, sk_rx_dst is protected by socket lock
  * For UDP, we use xchg() to guard against concurrent changes.
  */
-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 {
 	struct dst_entry *old;
 
@@ -1937,6 +1937,7 @@ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 		dst_release(old);
 	}
 }
+EXPORT_SYMBOL(udp_sk_rx_dst_set);
 
 /*
  *	Multicasts and broadcasts go to each listener.
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4a3e65626e8b..98fe4560e24c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
 					  struct udp_table *udptable)
 {
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
-	struct sock *sk;
 
-	sk = skb_steal_sock(skb);
-	if (unlikely(sk))
-		return sk;
 	return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
 				 &iph->daddr, dport, inet6_iif(skb),
 				 udptable, skb);
@@ -804,6 +800,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (udp6_csum_init(skb, uh, proto))
 		goto csum_error;
 
+	/* Check if the socket is already available, e.g. due to early demux */
+	sk = skb_steal_sock(skb);
+	if (sk) {
+		struct dst_entry *dst = skb_dst(skb);
+		int ret;
+
+		if (unlikely(sk->sk_rx_dst != dst))
+			udp_sk_rx_dst_set(sk, dst);
+
+		ret = udpv6_queue_rcv_skb(sk, skb);
+		sock_put(sk);
+
+		/* a return value > 0 means to resubmit the input */
+		if (ret > 0)
+			return ret;
+		return 0;
+	}
+
 	/*
 	 *	Multicast receive code
 	 */
@@ -812,11 +826,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 				saddr, daddr, udptable, proto);
 
 	/* Unicast */
-
-	/*
-	 * check socket cache ... must talk to Alan about his plans
-	 * for sock caches... i'll skip this for now.
-	 */
 	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
 	if (sk) {
 		int ret;
-- 
cgit 


From 37ef38f3f83891a2f413fb872bae7d0f9bb95b27 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@codeaurora.org>
Date: Thu, 27 Jul 2017 16:15:52 -0500
Subject: tty: pl011: fix initialization order of QDF2400 E44

The work-around for Qualcomm Technologies QDF2400 Erratum 44 hinges on a
global variable defined in the pl011 driver.  The ACPI SPCR parsing code
determines whether the work-around is needed, and if so, it changes the
console name from "pl011" to "qdf2400_e44".  The expectation is that
the pl011 driver will implement the work-around when it sees the console
name.  The global variable qdf2400_e44_present is set when that happens.

The problem is that work-around needs to be enabled when the pl011
driver probes, not when the console name is queried.  However, sbsa_probe()
is called before pl011_console_match().  The work-around appeared to work
previously because the default console on QDF2400 platforms was always
ttyAMA1.  The first time sbsa_probe() is called (for ttyAMA0),
qdf2400_e44_present is still false.  Then pl011_console_match() is called,
and it sets qdf2400_e44_present to true.  All subsequent calls to
sbsa_probe() enable the work-around.

The solution is to move the global variable into spcr.c and let the
pl011 driver query it during probe time.  This works because all QDF2400
platforms require SPCR, so parse_spcr() will always be called.
pl011_console_match still checks for the "qdf2400_e44" console name,
but it doesn't do anything else special.

Fixes: 5a0722b898f8 ("tty: pl011: use "qdf2400_e44" as the earlycon name for QDF2400 E44")
Tested-by: Jeffrey Hugo <jhugo@codeaurora.org>
Signed-off-by: Timur Tabi <timur@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/spcr.c             | 36 ++++++++++++++++++++++++++++++++++--
 drivers/tty/serial/amba-pl011.c | 37 +++++++++++++++++++------------------
 include/linux/acpi.h            |  1 +
 3 files changed, 54 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/spcr.c b/drivers/acpi/spcr.c
index 4ac3e06b41d8..98aa8c808a33 100644
--- a/drivers/acpi/spcr.c
+++ b/drivers/acpi/spcr.c
@@ -16,6 +16,16 @@
 #include <linux/kernel.h>
 #include <linux/serial_core.h>
 
+/*
+ * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as
+ * occasionally getting stuck as 1. To avoid the potential for a hang, check
+ * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART
+ * implementations, so only do so if an affected platform is detected in
+ * parse_spcr().
+ */
+bool qdf2400_e44_present;
+EXPORT_SYMBOL(qdf2400_e44_present);
+
 /*
  * Some Qualcomm Datacenter Technologies SoCs have a defective UART BUSY bit.
  * Detect them by examining the OEM fields in the SPCR header, similiar to PCI
@@ -147,8 +157,30 @@ int __init parse_spcr(bool earlycon)
 		goto done;
 	}
 
-	if (qdf2400_erratum_44_present(&table->header))
-		uart = "qdf2400_e44";
+	/*
+	 * If the E44 erratum is required, then we need to tell the pl011
+	 * driver to implement the work-around.
+	 *
+	 * The global variable is used by the probe function when it
+	 * creates the UARTs, whether or not they're used as a console.
+	 *
+	 * If the user specifies "traditional" earlycon, the qdf2400_e44
+	 * console name matches the EARLYCON_DECLARE() statement, and
+	 * SPCR is not used.  Parameter "earlycon" is false.
+	 *
+	 * If the user specifies "SPCR" earlycon, then we need to update
+	 * the console name so that it also says "qdf2400_e44".  Parameter
+	 * "earlycon" is true.
+	 *
+	 * For consistency, if we change the console name, then we do it
+	 * for everyone, not just earlycon.
+	 */
+	if (qdf2400_erratum_44_present(&table->header)) {
+		qdf2400_e44_present = true;
+		if (earlycon)
+			uart = "qdf2400_e44";
+	}
+
 	if (xgene_8250_erratum_present(table))
 		iotype = "mmio32";
 
diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 8a857bb34fbb..1888d168a41c 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -142,15 +142,7 @@ static struct vendor_data vendor_sbsa = {
 	.fixed_options		= true,
 };
 
-/*
- * Erratum 44 for QDF2432v1 and QDF2400v1 SoCs describes the BUSY bit as
- * occasionally getting stuck as 1. To avoid the potential for a hang, check
- * TXFE == 0 instead of BUSY == 1. This may not be suitable for all UART
- * implementations, so only do so if an affected platform is detected in
- * parse_spcr().
- */
-static bool qdf2400_e44_present = false;
-
+#ifdef CONFIG_ACPI_SPCR_TABLE
 static struct vendor_data vendor_qdt_qdf2400_e44 = {
 	.reg_offset		= pl011_std_offsets,
 	.fr_busy		= UART011_FR_TXFE,
@@ -165,6 +157,7 @@ static struct vendor_data vendor_qdt_qdf2400_e44 = {
 	.always_enabled		= true,
 	.fixed_options		= true,
 };
+#endif
 
 static u16 pl011_st_offsets[REG_ARRAY_SIZE] = {
 	[REG_DR] = UART01x_DR,
@@ -2375,12 +2368,14 @@ static int __init pl011_console_match(struct console *co, char *name, int idx,
 	resource_size_t addr;
 	int i;
 
-	if (strcmp(name, "qdf2400_e44") == 0) {
-		pr_info_once("UART: Working around QDF2400 SoC erratum 44");
-		qdf2400_e44_present = true;
-	} else if (strcmp(name, "pl011") != 0) {
+	/*
+	 * Systems affected by the Qualcomm Technologies QDF2400 E44 erratum
+	 * have a distinct console name, so make sure we check for that.
+	 * The actual implementation of the erratum occurs in the probe
+	 * function.
+	 */
+	if ((strcmp(name, "qdf2400_e44") != 0) && (strcmp(name, "pl011") != 0))
 		return -ENODEV;
-	}
 
 	if (uart_parse_earlycon(options, &iotype, &addr, &options))
 		return -ENODEV;
@@ -2734,11 +2729,17 @@ static int sbsa_uart_probe(struct platform_device *pdev)
 	}
 	uap->port.irq	= ret;
 
-	uap->reg_offset	= vendor_sbsa.reg_offset;
-	uap->vendor	= qdf2400_e44_present ?
-					&vendor_qdt_qdf2400_e44 : &vendor_sbsa;
+#ifdef CONFIG_ACPI_SPCR_TABLE
+	if (qdf2400_e44_present) {
+		dev_info(&pdev->dev, "working around QDF2400 SoC erratum 44\n");
+		uap->vendor = &vendor_qdt_qdf2400_e44;
+	} else
+#endif
+		uap->vendor = &vendor_sbsa;
+
+	uap->reg_offset	= uap->vendor->reg_offset;
 	uap->fifosize	= 32;
-	uap->port.iotype = vendor_sbsa.access_32b ? UPIO_MEM32 : UPIO_MEM;
+	uap->port.iotype = uap->vendor->access_32b ? UPIO_MEM32 : UPIO_MEM;
 	uap->port.ops	= &sbsa_uart_pops;
 	uap->fixed_baud = baudrate;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index c749eef1daa1..27b4b6615263 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1209,6 +1209,7 @@ static inline bool acpi_has_watchdog(void) { return false; }
 #endif
 
 #ifdef CONFIG_ACPI_SPCR_TABLE
+extern bool qdf2400_e44_present;
 int parse_spcr(bool earlycon);
 #else
 static inline int parse_spcr(bool earlycon) { return 0; }
-- 
cgit 


From a627b0a7c15ee4d2c87a86d5be5c8167382e8d0d Mon Sep 17 00:00:00 2001
From: Eric Whitney <enwlinux@gmail.com>
Date: Sun, 30 Jul 2017 22:30:11 -0400
Subject: ext4: remove unused metadata accounting variables

Two variables in ext4_inode_info, i_reserved_meta_blocks and
i_allocated_meta_blocks, are unused.  Removing them saves a little
memory per in-memory inode and cleans up clutter in several tracepoints.
Adjust tracepoint output from ext4_alloc_da_blocks() for consistency
and fix a typo and whitespace near these changes.

Signed-off-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/ext4.h              |  6 ++----
 fs/ext4/super.c             |  2 --
 include/trace/events/ext4.h | 35 ++++++++---------------------------
 3 files changed, 10 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 9ebde0cd632e..dcbcd9d444d1 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -961,7 +961,7 @@ struct ext4_inode_info {
 	/*
 	 * i_block_group is the number of the block group which contains
 	 * this file's inode.  Constant across the lifetime of the inode,
-	 * it is ued for making block allocation decisions - we try to
+	 * it is used for making block allocation decisions - we try to
 	 * place a file's data blocks near its inode block, and new inodes
 	 * near to their parent directory's inode.
 	 */
@@ -1049,10 +1049,8 @@ struct ext4_inode_info {
 	ext4_group_t	i_last_alloc_group;
 
 	/* allocation reservation info for delalloc */
-	/* In case of bigalloc, these refer to clusters rather than blocks */
+	/* In case of bigalloc, this refer to clusters rather than blocks */
 	unsigned int i_reserved_data_blocks;
-	unsigned int i_reserved_meta_blocks;
-	unsigned int i_allocated_meta_blocks;
 	ext4_lblk_t i_da_metadata_calc_last_lblock;
 	int i_da_metadata_calc_len;
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0886fe82e9c4..d61a70e2193a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -978,8 +978,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_es_shk_nr = 0;
 	ei->i_es_shrink_lblk = 0;
 	ei->i_reserved_data_blocks = 0;
-	ei->i_reserved_meta_blocks = 0;
-	ei->i_allocated_meta_blocks = 0;
 	ei->i_da_metadata_calc_len = 0;
 	ei->i_da_metadata_calc_last_lblock = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index dfae175ddebc..9c3bc3883d2f 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -937,21 +937,19 @@ TRACE_EVENT(ext4_alloc_da_blocks,
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
-		__field( unsigned int,	data_blocks	)
-		__field( unsigned int,	meta_blocks	)
+		__field( unsigned int,	data_blocks		)
 	),
 
 	TP_fast_assign(
 		__entry->dev	= inode->i_sb->s_dev;
 		__entry->ino	= inode->i_ino;
 		__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
+	TP_printk("dev %d,%d ino %lu reserved_data_blocks %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->data_blocks, __entry->meta_blocks)
+		  __entry->data_blocks)
 );
 
 TRACE_EVENT(ext4_mballoc_alloc,
@@ -1153,8 +1151,6 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 		__field(	__u64,	i_blocks		)
 		__field(	int,	used_blocks		)
 		__field(	int,	reserved_data_blocks	)
-		__field(	int,	reserved_meta_blocks	)
-		__field(	int,	allocated_meta_blocks	)
 		__field(	int,	quota_claim		)
 		__field(	__u16,	mode			)
 	),
@@ -1166,22 +1162,16 @@ TRACE_EVENT(ext4_da_update_reserve_space,
 		__entry->used_blocks = used_blocks;
 		__entry->reserved_data_blocks =
 				EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks =
-				EXT4_I(inode)->i_reserved_meta_blocks;
-		__entry->allocated_meta_blocks =
-				EXT4_I(inode)->i_allocated_meta_blocks;
 		__entry->quota_claim = quota_claim;
 		__entry->mode	= inode->i_mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
-		  "reserved_data_blocks %d reserved_meta_blocks %d "
-		  "allocated_meta_blocks %d quota_claim %d",
+		  "reserved_data_blocks %d quota_claim %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks,
 		  __entry->quota_claim)
 );
 
@@ -1195,7 +1185,6 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__field(	ino_t,	ino			)
 		__field(	__u64,	i_blocks		)
 		__field(	int,	reserved_data_blocks	)
-		__field(	int,	reserved_meta_blocks	)
 		__field(	__u16,  mode			)
 	),
 
@@ -1204,17 +1193,15 @@ TRACE_EVENT(ext4_da_reserve_space,
 		__entry->ino	= inode->i_ino;
 		__entry->i_blocks = inode->i_blocks;
 		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
 		__entry->mode	= inode->i_mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu "
-		  "reserved_data_blocks %d reserved_meta_blocks %d",
+		  "reserved_data_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
-		  __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks)
+		  __entry->reserved_data_blocks)
 );
 
 TRACE_EVENT(ext4_da_release_space,
@@ -1228,8 +1215,6 @@ TRACE_EVENT(ext4_da_release_space,
 		__field(	__u64,	i_blocks		)
 		__field(	int,	freed_blocks		)
 		__field(	int,	reserved_data_blocks	)
-		__field(	int,	reserved_meta_blocks	)
-		__field(	int,	allocated_meta_blocks	)
 		__field(	__u16,  mode			)
 	),
 
@@ -1239,19 +1224,15 @@ TRACE_EVENT(ext4_da_release_space,
 		__entry->i_blocks = inode->i_blocks;
 		__entry->freed_blocks = freed_blocks;
 		__entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-		__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
-		__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
 		__entry->mode	= inode->i_mode;
 	),
 
 	TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
-		  "reserved_data_blocks %d reserved_meta_blocks %d "
-		  "allocated_meta_blocks %d",
+		  "reserved_data_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->mode, __entry->i_blocks,
-		  __entry->freed_blocks, __entry->reserved_data_blocks,
-		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
+		  __entry->freed_blocks, __entry->reserved_data_blocks)
 );
 
 DECLARE_EVENT_CLASS(ext4__bitmap_load,
-- 
cgit 


From 99f828436788f0155798145853607ca8f0e6de93 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Fri, 28 Jul 2017 22:29:51 +0100
Subject: dma-buf/sync_file: Allow multiple sync_files to wrap a single
 dma-fence

Up until recently sync_file were create to export a single dma-fence to
userspace, and so we could canabalise a bit insie dma-fence to mark
whether or not we had enable polling for the sync_file itself. However,
with the advent of syncobj, we do allow userspace to create multiple
sync_files for a single dma-fence. (Similarly, that the sw-sync
validation framework also started returning multiple sync-files wrapping
a single dma-fence for a syncpt also triggering the problem.)

This patch reverts my suggestion in commit e24165537312
("dma-buf/sync_file: only enable fence signalling on poll()") to use a
single bit in the shared dma-fence and restores the sync_file->flags for
tracking the bits individually.

Reported-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Fixes: f1e8c67123cf ("dma-buf/sw-sync: Use an rbtree to sort fences in the timeline")
Fixes: e9083420bbac ("drm: introduce sync objects (v4)")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
Cc: dri-devel@lists.freedesktop.org
Cc: <drm-intel-fixes@lists.freedesktop.org> # v4.13-rc1+
Signed-off-by: Gustavo Padovan <gustavo.padovan@collabora.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170728212951.7818-1-chris@chris-wilson.co.uk
(cherry picked from commit db1fc97ca0c0d3fdeeadf314d99a26188438940a)
---
 drivers/dma-buf/sync_file.c | 5 +++--
 include/linux/sync_file.h   | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index d7e219d2669d..66fb40d0ebdb 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -304,7 +304,7 @@ static int sync_file_release(struct inode *inode, struct file *file)
 {
 	struct sync_file *sync_file = file->private_data;
 
-	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
+	if (test_bit(POLL_ENABLED, &sync_file->flags))
 		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
 	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
@@ -318,7 +318,8 @@ static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
+	if (list_empty(&sync_file->cb.node) &&
+	    !test_and_set_bit(POLL_ENABLED, &sync_file->flags)) {
 		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
 					   fence_check_cb_func) < 0)
 			wake_up_all(&sync_file->wq);
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 5726107963b2..0ad87c434ae6 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -43,12 +43,13 @@ struct sync_file {
 #endif
 
 	wait_queue_head_t	wq;
+	unsigned long		flags;
 
 	struct dma_fence	*fence;
 	struct dma_fence_cb cb;
 };
 
-#define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
+#define POLL_ENABLED 0
 
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
-- 
cgit 


From 9c80034921d1ece5c0e3241ba1645bce32387684 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Sat, 29 Jul 2017 14:11:43 +0200
Subject: i2c: rephrase explanation of I2C_CLASS_DEPRECATED

Hopefully making clear that it is not needed for new drivers.

Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/linux/i2c.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 00ca5b86a753..d501d3956f13 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -689,7 +689,8 @@ i2c_unlock_adapter(struct i2c_adapter *adapter)
 #define I2C_CLASS_HWMON		(1<<0)	/* lm_sensors, ... */
 #define I2C_CLASS_DDC		(1<<3)	/* DDC bus on graphics adapters */
 #define I2C_CLASS_SPD		(1<<7)	/* Memory modules */
-#define I2C_CLASS_DEPRECATED	(1<<8)	/* Warn users that adapter will stop using classes */
+/* Warn users that the adapter doesn't support classes anymore */
+#define I2C_CLASS_DEPRECATED	(1<<8)
 
 /* Internal numbers to terminate lists */
 #define I2C_CLIENT_END		0xfffeU
-- 
cgit 


From cb891fa6a1d5f52c5f5c07b6f7f4c6d65ea55fc0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 31 Jul 2017 16:52:36 +0200
Subject: udp6: fix jumbogram reception

Since commit 67a51780aebb ("ipv6: udp: leverage scratch area
helpers") udp6_recvmsg() read the skb len from the scratch area,
to avoid a cache miss.
But the UDP6 rx path support RFC 2675 UDPv6 jumbograms, and their
length exceeds the 16 bits available in the scratch area. As a side
effect the length returned by recvmsg() is:
<ingress datagram len> % (1<<16)

This commit addresses the issue allocating one more bit in the
IP6CB flags field and setting it for incoming jumbograms.
Such field is still in the first cacheline, so at recvmsg()
time we can check it and fallback to access skb->len if
required, without a measurable overhead.

Fixes: 67a51780aebb ("ipv6: udp: leverage scratch area helpers")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h |  6 ++++++
 net/ipv6/exthdrs.c   |  1 +
 net/ipv6/udp.c       | 11 ++++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e1b442996f81..474d6bbc158c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -128,6 +128,7 @@ struct inet6_skb_parm {
 #define IP6SKB_FRAGMENTED      16
 #define IP6SKB_HOPBYHOP        32
 #define IP6SKB_L3SLAVE         64
+#define IP6SKB_JUMBOGRAM      128
 };
 
 #if defined(CONFIG_NET_L3_MASTER_DEV)
@@ -152,6 +153,11 @@ static inline int inet6_iif(const struct sk_buff *skb)
 	return l3_slave ? skb->skb_iif : IP6CB(skb)->iif;
 }
 
+static inline bool inet6_is_jumbogram(const struct sk_buff *skb)
+{
+	return !!(IP6CB(skb)->flags & IP6SKB_JUMBOGRAM);
+}
+
 /* can not be used in TCP layer after tcp_v6_fill_cb */
 static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb)
 {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4996d734f1d2..3cec529c6113 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
 	if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
 		goto drop;
 
+	IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM;
 	return true;
 
 drop:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 98fe4560e24c..578142b7ca3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -328,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
 EXPORT_SYMBOL_GPL(udp6_lib_lookup);
 #endif
 
+/* do not use the scratch area len for jumbogram: their length execeeds the
+ * scratch area space; note that the IP6CB flags is still in the first
+ * cacheline, so checking for jumbograms is cheap
+ */
+static int udp6_skb_len(struct sk_buff *skb)
+{
+	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
+}
+
 /*
  *	This should be easy, if there is something there we
  *	return it, otherwise we block.
@@ -358,7 +367,7 @@ try_again:
 	if (!skb)
 		return err;
 
-	ulen = udp_skb_len(skb);
+	ulen = udp6_skb_len(skb);
 	copied = len;
 	if (copied > ulen - off)
 		copied = ulen - off;
-- 
cgit 


From e17e8969f5c59a10083af5e260bdad6026872203 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 24 Jul 2017 16:43:49 +0200
Subject: libceph: fallback for when there isn't a pool-specific choose_arg

There is now a fallback to a choose_arg index of -1 if there isn't
a pool-specific choose_arg set.  If you create a per-pool weight-set,
that works for that pool.  Otherwise we try the compat/default one.  If
that doesn't exist either, then we use the normal CRUSH weights.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/crush/crush.h |  2 +-
 net/ceph/osdmap.c           | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 92e165d417a6..07eed95e10c7 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -193,7 +193,7 @@ struct crush_choose_arg {
 struct crush_choose_arg_map {
 #ifdef __KERNEL__
 	struct rb_node node;
-	u64 choose_args_index;
+	s64 choose_args_index;
 #endif
 	struct crush_choose_arg *args; /*!< replacement for each bucket
                                             in the crushmap */
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 64ae9f89773a..eb57a06373ca 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2301,10 +2301,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
 	}
 }
 
+/*
+ * Magic value used for a "default" fallback choose_args, used if the
+ * crush_choose_arg_map passed to do_crush() does not exist.  If this
+ * also doesn't exist, fall back to canonical weights.
+ */
+#define CEPH_DEFAULT_CHOOSE_ARGS	-1
+
 static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 		    int *result, int result_max,
 		    const __u32 *weight, int weight_max,
-		    u64 choose_args_index)
+		    s64 choose_args_index)
 {
 	struct crush_choose_arg_map *arg_map;
 	int r;
@@ -2313,6 +2320,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
 
 	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
 					choose_args_index);
+	if (!arg_map)
+		arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+						CEPH_DEFAULT_CHOOSE_ARGS);
 
 	mutex_lock(&map->crush_workspace_mutex);
 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
-- 
cgit 


From ae78dd8139ce93a528beb7f3914531b7a7be9e30 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 27 Jul 2017 17:59:14 +0200
Subject: libceph: make RECOVERY_DELETES feature create a new interval

This is needed so that the OSDs can regenerate the missing set at the
start of a new interval where support for recovery deletes changed.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/osd_client.h | 1 +
 include/linux/ceph/osdmap.h     | 2 ++
 include/linux/ceph/rados.h      | 4 ++++
 net/ceph/osd_client.c           | 5 +++++
 net/ceph/osdmap.c               | 5 ++++-
 5 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index c6d96a5f46fd..adf670ecaf94 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -148,6 +148,7 @@ struct ceph_osd_request_target {
 	int size;
 	int min_size;
 	bool sort_bitwise;
+	bool recovery_deletes;
 
 	unsigned int flags;                /* CEPH_OSD_FLAG_* */
 	bool paused;
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index a0996cb9faed..af3444a5bfdd 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -272,6 +272,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  u32 new_pg_num,
 			  bool old_sort_bitwise,
 			  bool new_sort_bitwise,
+			  bool old_recovery_deletes,
+			  bool new_recovery_deletes,
 			  const struct ceph_pg *pgid);
 bool ceph_osds_changed(const struct ceph_osds *old_acting,
 		       const struct ceph_osds *new_acting,
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 385db08bb8b2..b8281feda9c7 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -158,6 +158,10 @@ extern const char *ceph_osd_state_name(int s);
 #define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
 #define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
 #define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
+#define CEPH_OSDMAP_REQUIRE_JEWEL    (1<<16) /* require jewel for booting osds */
+#define CEPH_OSDMAP_REQUIRE_KRAKEN   (1<<17) /* require kraken for booting osds */
+#define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */
+#define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
 
 /*
  * The error code to return when an OSD can't handle a write
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index b5f016cb9569..dcfbdd74dfd1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 	bool legacy_change;
 	bool split = false;
 	bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
+	bool recovery_deletes = ceph_osdmap_flag(osdc,
+						 CEPH_OSDMAP_RECOVERY_DELETES);
 	enum calc_target_result ct_res;
 	int ret;
 
@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 				 pi->pg_num,
 				 t->sort_bitwise,
 				 sort_bitwise,
+				 t->recovery_deletes,
+				 recovery_deletes,
 				 &last_pgid))
 		force_resend = true;
 
@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
 		t->pg_num = pi->pg_num;
 		t->pg_num_mask = pi->pg_num_mask;
 		t->sort_bitwise = sort_bitwise;
+		t->recovery_deletes = recovery_deletes;
 
 		t->osd = acting.primary;
 	}
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 0bec71fa712e..f358d0bfa76b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -2082,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 			  u32 new_pg_num,
 			  bool old_sort_bitwise,
 			  bool new_sort_bitwise,
+			  bool old_recovery_deletes,
+			  bool new_recovery_deletes,
 			  const struct ceph_pg *pgid)
 {
 	return !osds_equal(old_acting, new_acting) ||
@@ -2089,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
 	       old_size != new_size ||
 	       old_min_size != new_min_size ||
 	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
-	       old_sort_bitwise != new_sort_bitwise;
+	       old_sort_bitwise != new_sort_bitwise ||
+	       old_recovery_deletes != new_recovery_deletes;
 }
 
 static int calc_pg_rank(int osd, const struct ceph_osds *acting)
-- 
cgit 


From fd40559c8657418385e42f797e0b04bfc0add748 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Tue, 1 Aug 2017 16:02:47 -0400
Subject: NFSv4: Fix EXCHANGE_ID corrupt verifier issue

The verifier is allocated on the stack, but the EXCHANGE_ID RPC call was
changed to be asynchronous by commit 8d89bd70bc939. If we interrrupt
the call to rpc_wait_for_completion_task(), we can therefore end up
transmitting random stack contents in lieu of the verifier.

Fixes: 8d89bd70bc939 ("NFS setup async exchange_id")
Cc: stable@vger.kernel.org # v4.9+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c       | 11 ++++-------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  2 +-
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 93c1d7352238..c458a1e28b91 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7460,7 +7460,7 @@ static void nfs4_exchange_id_done(struct rpc_task *task, void *data)
 			cdata->res.server_scope = NULL;
 		}
 		/* Save the EXCHANGE_ID verifier session trunk tests */
-		memcpy(clp->cl_confirm.data, cdata->args.verifier->data,
+		memcpy(clp->cl_confirm.data, cdata->args.verifier.data,
 		       sizeof(clp->cl_confirm.data));
 	}
 out:
@@ -7497,7 +7497,6 @@ static const struct rpc_call_ops nfs4_exchange_id_call_ops = {
 static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 			u32 sp4_how, struct rpc_xprt *xprt)
 {
-	nfs4_verifier verifier;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_EXCHANGE_ID],
 		.rpc_cred = cred,
@@ -7521,8 +7520,7 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 		return -ENOMEM;
 	}
 
-	if (!xprt)
-		nfs4_init_boot_verifier(clp, &verifier);
+	nfs4_init_boot_verifier(clp, &calldata->args.verifier);
 
 	status = nfs4_init_uniform_client_string(clp);
 	if (status)
@@ -7563,9 +7561,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
 		task_setup_data.rpc_xprt = xprt;
 		task_setup_data.flags =
 				RPC_TASK_SOFT|RPC_TASK_SOFTCONN|RPC_TASK_ASYNC;
-		calldata->args.verifier = &clp->cl_confirm;
-	} else {
-		calldata->args.verifier = &verifier;
+		memcpy(calldata->args.verifier.data, clp->cl_confirm.data,
+				sizeof(calldata->args.verifier.data));
 	}
 	calldata->args.client = clp;
 #ifdef CONFIG_NFS_V4_1_MIGRATION
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fa3eb361d4f8..37c8af003275 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1785,7 +1785,7 @@ static void encode_exchange_id(struct xdr_stream *xdr,
 	int len = 0;
 
 	encode_op_hdr(xdr, OP_EXCHANGE_ID, decode_exchange_id_maxsz, hdr);
-	encode_nfs4_verifier(xdr, args->verifier);
+	encode_nfs4_verifier(xdr, &args->verifier);
 
 	encode_string(xdr, strlen(args->client->cl_owner_id),
 			args->client->cl_owner_id);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ca3bcc4ed4e5..62cbcb842f99 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1235,7 +1235,7 @@ struct nfs41_state_protection {
 
 struct nfs41_exchange_id_args {
 	struct nfs_client		*client;
-	nfs4_verifier			*verifier;
+	nfs4_verifier			verifier;
 	u32				flags;
 	struct nfs41_state_protection	state_protect;
 };
-- 
cgit 


From d9535cb7b7603aeb549c697ecdf92024e4d0a650 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 28 Jul 2017 17:30:02 -0500
Subject: ptp: introduce ptp auxiliary worker

Many PTP drivers required to perform some asynchronous or periodic work,
like periodically handling PHC counter overflow or handle delayed timestamp
for RX/TX network packets. In most of the cases, such work is implemented
using workqueues. Unfortunately, Kernel workqueues might introduce
significant delay in work scheduling under high system load and on -RT,
which could cause misbehavior of PTP drivers due to internal counter
overflow, for example, and there is no way to tune its execution policy and
priority manuallly.

Hence, The kthread_worker can be used insted of workqueues, as it create
separte named kthread for each worker and its its execution policy and
priority can be configured using chrt tool.

This prblem was reported for two drivers TI CPSW CPTS and dp83640, so
instead of modifying each of these driver it was proposed to add PTP
auxiliary worker to the PHC subsystem.

The patch adds PTP auxiliary worker in PHC subsystem using kthread_worker
and kthread_delayed_work and introduces two new PHC subsystem APIs:

- long (*do_aux_work)(struct ptp_clock_info *ptp) callback in
ptp_clock_info structure, which driver should assign if it require to
perform asynchronous or periodic work. Driver should return the delay of
the PTP next auxiliary work scheduling time (>=0) or negative value in case
further scheduling is not required.

- int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay) which
allows schedule PTP auxiliary work.

The name of kthread_worker thread corresponds PTP PHC device name "ptp%d".

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 42 ++++++++++++++++++++++++++++++++++++++++
 drivers/ptp/ptp_private.h        |  3 +++
 include/linux/ptp_clock_kernel.h | 20 +++++++++++++++++++
 3 files changed, 65 insertions(+)

(limited to 'include')

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index b77435783ef3..7eacc1c4b3b1 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 #include <linux/uaccess.h>
+#include <uapi/linux/sched/types.h>
 
 #include "ptp_private.h"
 
@@ -184,6 +185,19 @@ static void delete_ptp_clock(struct posix_clock *pc)
 	kfree(ptp);
 }
 
+static void ptp_aux_kworker(struct kthread_work *work)
+{
+	struct ptp_clock *ptp = container_of(work, struct ptp_clock,
+					     aux_work.work);
+	struct ptp_clock_info *info = ptp->info;
+	long delay;
+
+	delay = info->do_aux_work(info);
+
+	if (delay >= 0)
+		kthread_queue_delayed_work(ptp->kworker, &ptp->aux_work, delay);
+}
+
 /* public interface */
 
 struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
@@ -217,6 +231,20 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 	mutex_init(&ptp->pincfg_mux);
 	init_waitqueue_head(&ptp->tsev_wq);
 
+	if (ptp->info->do_aux_work) {
+		char *worker_name = kasprintf(GFP_KERNEL, "ptp%d", ptp->index);
+
+		kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
+		ptp->kworker = kthread_create_worker(0, worker_name ?
+						     worker_name : info->name);
+		kfree(worker_name);
+		if (IS_ERR(ptp->kworker)) {
+			err = PTR_ERR(ptp->kworker);
+			pr_err("failed to create ptp aux_worker %d\n", err);
+			goto kworker_err;
+		}
+	}
+
 	err = ptp_populate_pin_groups(ptp);
 	if (err)
 		goto no_pin_groups;
@@ -259,6 +287,9 @@ no_pps:
 no_device:
 	ptp_cleanup_pin_groups(ptp);
 no_pin_groups:
+	if (ptp->kworker)
+		kthread_destroy_worker(ptp->kworker);
+kworker_err:
 	mutex_destroy(&ptp->tsevq_mux);
 	mutex_destroy(&ptp->pincfg_mux);
 	ida_simple_remove(&ptp_clocks_map, index);
@@ -274,6 +305,11 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
 	ptp->defunct = 1;
 	wake_up_interruptible(&ptp->tsev_wq);
 
+	if (ptp->kworker) {
+		kthread_cancel_delayed_work_sync(&ptp->aux_work);
+		kthread_destroy_worker(ptp->kworker);
+	}
+
 	/* Release the clock's resources. */
 	if (ptp->pps_source)
 		pps_unregister_source(ptp->pps_source);
@@ -339,6 +375,12 @@ int ptp_find_pin(struct ptp_clock *ptp,
 }
 EXPORT_SYMBOL(ptp_find_pin);
 
+int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
+{
+	return kthread_mod_delayed_work(ptp->kworker, &ptp->aux_work, delay);
+}
+EXPORT_SYMBOL(ptp_schedule_worker);
+
 /* module operations */
 
 static void __exit ptp_exit(void)
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index d95888974d0c..b86f1bfecd6f 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -22,6 +22,7 @@
 
 #include <linux/cdev.h>
 #include <linux/device.h>
+#include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/posix-clock.h>
 #include <linux/ptp_clock.h>
@@ -56,6 +57,8 @@ struct ptp_clock {
 	struct attribute_group pin_attr_group;
 	/* 1st entry is a pointer to the real group, 2nd is NULL terminator */
 	const struct attribute_group *pin_attr_groups[2];
+	struct kthread_worker *kworker;
+	struct kthread_delayed_work aux_work;
 };
 
 /*
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index a026bfd089db..51349d124ee5 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -99,6 +99,11 @@ struct system_device_crosststamp;
  *            parameter func: the desired function to use.
  *            parameter chan: the function channel index to use.
  *
+ * @do_work:  Request driver to perform auxiliary (periodic) operations
+ *	      Driver should return delay of the next auxiliary work scheduling
+ *	      time (>=0) or negative value in case further scheduling
+ *	      is not required.
+ *
  * Drivers should embed their ptp_clock_info within a private
  * structure, obtaining a reference to it using container_of().
  *
@@ -126,6 +131,7 @@ struct ptp_clock_info {
 		      struct ptp_clock_request *request, int on);
 	int (*verify)(struct ptp_clock_info *ptp, unsigned int pin,
 		      enum ptp_pin_function func, unsigned int chan);
+	long (*do_aux_work)(struct ptp_clock_info *ptp);
 };
 
 struct ptp_clock;
@@ -211,6 +217,16 @@ extern int ptp_clock_index(struct ptp_clock *ptp);
 int ptp_find_pin(struct ptp_clock *ptp,
 		 enum ptp_pin_function func, unsigned int chan);
 
+/**
+ * ptp_schedule_worker() - schedule ptp auxiliary work
+ *
+ * @ptp:    The clock obtained from ptp_clock_register().
+ * @delay:  number of jiffies to wait before queuing
+ *          See kthread_queue_delayed_work() for more info.
+ */
+
+int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay);
+
 #else
 static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
 						   struct device *parent)
@@ -225,6 +241,10 @@ static inline int ptp_clock_index(struct ptp_clock *ptp)
 static inline int ptp_find_pin(struct ptp_clock *ptp,
 			       enum ptp_pin_function func, unsigned int chan)
 { return -1; }
+static inline int ptp_schedule_worker(struct ptp_clock *ptp,
+				      unsigned long delay)
+{ return -EOPNOTSUPP; }
+
 #endif
 
 #endif
-- 
cgit 


From cdbc78ba702650b02b9e3e957dbaa725423bdf1e Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jcrouse@codeaurora.org>
Date: Thu, 27 Jul 2017 10:42:35 -0600
Subject: drm/msm: Remove __user from __u64 data types

__user should be used to identify user pointers and not __u64
variables containing pointers.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
---
 include/uapi/drm/msm_drm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index 26c54f6d595d..ad4eb2863e70 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -171,7 +171,7 @@ struct drm_msm_gem_submit_cmd {
 	__u32 size;           /* in, cmdstream size */
 	__u32 pad;
 	__u32 nr_relocs;      /* in, number of submit_reloc's */
-	__u64 __user relocs;  /* in, ptr to array of submit_reloc's */
+	__u64 relocs;         /* in, ptr to array of submit_reloc's */
 };
 
 /* Each buffer referenced elsewhere in the cmdstream submit (ie. the
@@ -215,8 +215,8 @@ struct drm_msm_gem_submit {
 	__u32 fence;          /* out */
 	__u32 nr_bos;         /* in, number of submit_bo's */
 	__u32 nr_cmds;        /* in, number of submit_cmd's */
-	__u64 __user bos;     /* in, ptr to array of submit_bo's */
-	__u64 __user cmds;    /* in, ptr to array of submit_cmd's */
+	__u64 bos;            /* in, ptr to array of submit_bo's */
+	__u64 cmds;           /* in, ptr to array of submit_cmd's */
 	__s32 fence_fd;       /* in/out fence fd (see MSM_SUBMIT_FENCE_FD_IN/OUT) */
 };
 
-- 
cgit 


From a477b9cd37aa81a490dfa3265b7ff4f2c5a92463 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Aug 2017 20:11:02 -0500
Subject: PCI: Add pci_reset_function_locked()

The implementation of PCI workarounds may require that the device is reset
from its probe function.  This implies that the PCI device lock is already
held, and makes calling pci_reset_function() impossible (since it will
itself try to take that lock).

Add pci_reset_function_locked(), which is the equivalent of
pci_reset_function(), except that it requires the PCI device lock to be
already held by the caller.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
[bhelgaas: folded in fix for conflict with 52354b9d1f46 ("PCI: Remove
__pci_dev_reset() and pci_dev_reset()")]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org	# 4.11: 52354b9d1f46: PCI: Remove __pci_dev_reset() and pci_dev_reset()
Cc: stable@vger.kernel.org	# 4.11
---
 drivers/pci/pci.c   | 35 +++++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index af0cc3456dc1..b4b7eab29400 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4259,6 +4259,41 @@ int pci_reset_function(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_reset_function);
 
+/**
+ * pci_reset_function_locked - quiesce and reset a PCI device function
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * This function does not just reset the PCI portion of a device, but
+ * clears all the state associated with the device.  This function differs
+ * from __pci_reset_function() in that it saves and restores device state
+ * over the reset.  It also differs from pci_reset_function() in that it
+ * requires the PCI device lock to be held.
+ *
+ * Returns 0 if the device function was successfully reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_reset_function_locked(struct pci_dev *dev)
+{
+	int rc;
+
+	rc = pci_probe_reset_function(dev);
+	if (rc)
+		return rc;
+
+	pci_dev_save_and_disable(dev);
+
+	rc = __pci_reset_function_locked(dev);
+
+	pci_dev_restore(dev);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(pci_reset_function_locked);
+
 /**
  * pci_try_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4869e66dd659..a75c13673852 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1067,6 +1067,7 @@ void pcie_flr(struct pci_dev *dev);
 int __pci_reset_function(struct pci_dev *dev);
 int __pci_reset_function_locked(struct pci_dev *dev);
 int pci_reset_function(struct pci_dev *dev);
+int pci_reset_function_locked(struct pci_dev *dev);
 int pci_try_reset_function(struct pci_dev *dev);
 int pci_probe_reset_slot(struct pci_slot *slot);
 int pci_reset_slot(struct pci_slot *slot);
-- 
cgit 


From 6d29231000bbe0fb9e4893a9c68151ffdd3b5469 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 31 Jul 2017 10:31:27 +0200
Subject: mtd: nand: Declare tBERS, tR and tPROG as u64 to avoid integer
 overflow

All timings in nand_sdr_timings are expressed in picoseconds but some
of them may not fit in an u32.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: 204e7ecd47e2 ("mtd: nand: Add a few more timings to nand_sdr_timings")
Reported-by: Alexander Dahl <ada@thorsis.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Alexander Dahl <ada@thorsis.com>
Tested-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
---
 drivers/mtd/nand/nand_timings.c | 6 +++---
 include/linux/mtd/nand.h        | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_timings.c b/drivers/mtd/nand/nand_timings.c
index f06312df3669..7e36d7d13c26 100644
--- a/drivers/mtd/nand/nand_timings.c
+++ b/drivers/mtd/nand/nand_timings.c
@@ -311,9 +311,9 @@ int onfi_init_data_interface(struct nand_chip *chip,
 		struct nand_sdr_timings *timings = &iface->timings.sdr;
 
 		/* microseconds -> picoseconds */
-		timings->tPROG_max = 1000000UL * le16_to_cpu(params->t_prog);
-		timings->tBERS_max = 1000000UL * le16_to_cpu(params->t_bers);
-		timings->tR_max = 1000000UL * le16_to_cpu(params->t_r);
+		timings->tPROG_max = 1000000ULL * le16_to_cpu(params->t_prog);
+		timings->tBERS_max = 1000000ULL * le16_to_cpu(params->t_bers);
+		timings->tR_max = 1000000ULL * le16_to_cpu(params->t_r);
 
 		/* nanoseconds -> picoseconds */
 		timings->tCCS_min = 1000UL * le16_to_cpu(params->t_ccs);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 892148c448cc..5216d2eb2289 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -681,10 +681,10 @@ struct nand_buffers {
  * @tWW_min: WP# transition to WE# low
  */
 struct nand_sdr_timings {
-	u32 tBERS_max;
+	u64 tBERS_max;
 	u32 tCCS_min;
-	u32 tPROG_max;
-	u32 tR_max;
+	u64 tPROG_max;
+	u64 tR_max;
 	u32 tALH_min;
 	u32 tADL_min;
 	u32 tALS_min;
-- 
cgit 


From c994f778bb1cca8ebe7a4e528cefec233e93b5cc Mon Sep 17 00:00:00 2001
From: Inbar Karmy <inbark@mellanox.com>
Date: Tue, 1 Aug 2017 16:43:43 +0300
Subject: net/mlx4_en: Fix wrong indication of Wake-on-LAN (WoL) support

Currently when WoL is supported but disabled, ethtool reports:
"Supports Wake-on: d".
Fix the indication of Wol support, so that the indication
remains "g" all the time if the NIC supports WoL.

Tested:
As accepted, when NIC supports WoL- ethtool reports:
	Supports Wake-on: g
	Wake-on: d
when NIC doesn't support WoL- ethtool reports:
        Supports Wake-on: d
        Wake-on: d

Fixes: 14c07b1358ed ("mlx4: Wake on LAN support")
Signed-off-by: Inbar Karmy <inbark@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 15 ++++++++-------
 drivers/net/ethernet/mellanox/mlx4/fw.c         |  4 ++++
 drivers/net/ethernet/mellanox/mlx4/fw.h         |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c       |  2 ++
 include/linux/mlx4/device.h                     |  1 +
 5 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index c751a1d434ad..3d4e4a5d00d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -223,6 +223,7 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 			    struct ethtool_wolinfo *wol)
 {
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
+	struct mlx4_caps *caps = &priv->mdev->dev->caps;
 	int err = 0;
 	u64 config = 0;
 	u64 mask;
@@ -235,24 +236,24 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
 		MLX4_DEV_CAP_FLAG_WOL_PORT2;
 
-	if (!(priv->mdev->dev->caps.flags & mask)) {
+	if (!(caps->flags & mask)) {
 		wol->supported = 0;
 		wol->wolopts = 0;
 		return;
 	}
 
+	if (caps->wol_port[priv->port])
+		wol->supported = WAKE_MAGIC;
+	else
+		wol->supported = 0;
+
 	err = mlx4_wol_read(priv->mdev->dev, &config, priv->port);
 	if (err) {
 		en_err(priv, "Failed to get WoL information\n");
 		return;
 	}
 
-	if (config & MLX4_EN_WOL_MAGIC)
-		wol->supported = WAKE_MAGIC;
-	else
-		wol->supported = 0;
-
-	if (config & MLX4_EN_WOL_ENABLED)
+	if ((config & MLX4_EN_WOL_ENABLED) && (config & MLX4_EN_WOL_MAGIC))
 		wol->wolopts = WAKE_MAGIC;
 	else
 		wol->wolopts = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 37e84a59e751..c165f16623a9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -764,6 +764,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_CQ_TS_SUPPORT_OFFSET	0x3e
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
 #define QUERY_DEV_CAP_EXT_FLAGS_OFFSET		0x40
+#define QUERY_DEV_CAP_WOL_OFFSET		0x43
 #define QUERY_DEV_CAP_FLAGS_OFFSET		0x44
 #define QUERY_DEV_CAP_RSVD_UAR_OFFSET		0x48
 #define QUERY_DEV_CAP_UAR_SZ_OFFSET		0x49
@@ -920,6 +921,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
 	MLX4_GET(flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
 	dev_cap->flags = flags | (u64)ext_flags << 32;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_WOL_OFFSET);
+	dev_cap->wol_port[1] = !!(field & 0x20);
+	dev_cap->wol_port[2] = !!(field & 0x40);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_UAR_OFFSET);
 	dev_cap->reserved_uars = field >> 4;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_UAR_SZ_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 5343a0599253..b52ba01aa486 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -129,6 +129,7 @@ struct mlx4_dev_cap {
 	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_rate_limit_caps rl_caps;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
+	bool wol_port[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_func_cap {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a27c9c13a36e..09b9bc17bce9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -424,6 +424,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 	dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
+	dev->caps.wol_port[1]          = dev_cap->wol_port[1];
+	dev->caps.wol_port[2]          = dev_cap->wol_port[2];
 
 	/* Save uar page shift */
 	if (!mlx4_is_slave(dev)) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index aad5d81dfb44..b54517c05e9a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -620,6 +620,7 @@ struct mlx4_caps {
 	u32			dmfs_high_rate_qpn_base;
 	u32			dmfs_high_rate_qpn_range;
 	u32			vf_caps;
+	bool			wol_port[MLX4_MAX_PORTS + 1];
 	struct mlx4_rate_limit_caps rl_caps;
 };
 
-- 
cgit 


From 3898da947bbaf9e7fd5816e825978d360028bba2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 2 Aug 2017 17:55:54 +0200
Subject: KVM: avoid using rcu_dereference_protected
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During teardown, accesses to memslots and buses are using
rcu_dereference_protected with an always-true condition because
these accesses are done outside the usual mutexes.  This
is because the last reference is gone and there cannot be any
concurrent modifications, but rcu_dereference_protected is
ugly and unobvious.

Instead, check the refcount in kvm_get_bus and __kvm_memslots.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 include/linux/kvm_host.h |  6 ++++--
 virt/kvm/kvm_main.c      | 11 ++++-------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 890b706d1943..21a6fd6c44af 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -477,7 +477,8 @@ struct kvm {
 static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
 {
 	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
-				      lockdep_is_held(&kvm->slots_lock));
+				      lockdep_is_held(&kvm->slots_lock) ||
+				      !refcount_read(&kvm->users_count));
 }
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
@@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
 static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
 	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
-			lockdep_is_held(&kvm->slots_lock));
+			lockdep_is_held(&kvm->slots_lock) ||
+			!refcount_read(&kvm->users_count));
 }
 
 static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f3f74271f1a9..15252d723b54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -717,10 +717,9 @@ out_err_no_srcu:
 	hardware_disable_all();
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
-		kfree(rcu_access_pointer(kvm->buses[i]));
+		kfree(kvm_get_bus(kvm, i));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm,
-			rcu_dereference_protected(kvm->memslots[i], 1));
+		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
 	kvm_arch_free_vm(kvm);
 	mmdrop(current->mm);
 	return ERR_PTR(r);
@@ -754,9 +753,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	spin_unlock(&kvm_lock);
 	kvm_free_irq_routing(kvm);
 	for (i = 0; i < KVM_NR_BUSES; i++) {
-		struct kvm_io_bus *bus;
+		struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
 
-		bus = rcu_dereference_protected(kvm->buses[i], 1);
 		if (bus)
 			kvm_io_bus_destroy(bus);
 		kvm->buses[i] = NULL;
@@ -770,8 +768,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
-		kvm_free_memslots(kvm,
-			rcu_dereference_protected(kvm->memslots[i], 1));
+		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
-- 
cgit 


From 3ea277194daaeaa84ce75180ec7c7a2075027a68 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@suse.de>
Date: Wed, 2 Aug 2017 13:31:52 -0700
Subject: mm, mprotect: flush TLB if potentially racing with a parallel reclaim
 leaving stale TLB entries

Nadav Amit identified a theoritical race between page reclaim and
mprotect due to TLB flushes being batched outside of the PTL being held.

He described the race as follows:

        CPU0                            CPU1
        ----                            ----
                                        user accesses memory using RW PTE
                                        [PTE now cached in TLB]
        try_to_unmap_one()
        ==> ptep_get_and_clear()
        ==> set_tlb_ubc_flush_pending()
                                        mprotect(addr, PROT_READ)
                                        ==> change_pte_range()
                                        ==> [ PTE non-present - no flush ]

                                        user writes using cached RW PTE
        ...

        try_to_unmap_flush()

The same type of race exists for reads when protecting for PROT_NONE and
also exists for operations that can leave an old TLB entry behind such
as munmap, mremap and madvise.

For some operations like mprotect, it's not necessarily a data integrity
issue but it is a correctness issue as there is a window where an
mprotect that limits access still allows access.  For munmap, it's
potentially a data integrity issue although the race is massive as an
munmap, mmap and return to userspace must all complete between the
window when reclaim drops the PTL and flushes the TLB.  However, it's
theoritically possible so handle this issue by flushing the mm if
reclaim is potentially currently batching TLB flushes.

Other instances where a flush is required for a present pte should be ok
as either the page lock is held preventing parallel reclaim or a page
reference count is elevated preventing a parallel free leading to
corruption.  In the case of page_mkclean there isn't an obvious path
that userspace could take advantage of without using the operations that
are guarded by this patch.  Other users such as gup as a race with
reclaim looks just at PTEs.  huge page variants should be ok as they
don't race with reclaim.  mincore only looks at PTEs.  userfault also
should be ok as if a parallel reclaim takes place, it will either fault
the page back in or read some of the data before the flush occurs
triggering a fault.

Note that a variant of this patch was acked by Andy Lutomirski but this
was for the x86 parts on top of his PCID work which didn't make the 4.13
merge window as expected.  His ack is dropped from this version and
there will be a follow-on patch on top of PCID that will include his
ack.

[akpm@linux-foundation.org: tweak comments]
[akpm@linux-foundation.org: fix spello]
Link: http://lkml.kernel.org/r/20170717155523.emckq2esjro6hf3z@suse.de
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Mel Gorman <mgorman@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: <stable@vger.kernel.org>	[v4.4+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  4 ++++
 mm/internal.h            |  5 ++++-
 mm/madvise.c             |  1 +
 mm/memory.c              |  1 +
 mm/mprotect.c            |  1 +
 mm/mremap.c              |  1 +
 mm/rmap.c                | 36 ++++++++++++++++++++++++++++++++++++
 7 files changed, 48 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ff151814a02d..7f384bb62d8e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -494,6 +494,10 @@ struct mm_struct {
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
 	bool tlb_flush_pending;
+#endif
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+	/* See flush_tlb_batched_pending() */
+	bool tlb_flush_batched;
 #endif
 	struct uprobes_state uprobes_state;
 #ifdef CONFIG_HUGETLB_PAGE
diff --git a/mm/internal.h b/mm/internal.h
index 24d88f084705..4ef49fc55e58 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq;
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 void try_to_unmap_flush(void);
 void try_to_unmap_flush_dirty(void);
+void flush_tlb_batched_pending(struct mm_struct *mm);
 #else
 static inline void try_to_unmap_flush(void)
 {
@@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void)
 static inline void try_to_unmap_flush_dirty(void)
 {
 }
-
+static inline void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+}
 #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
 
 extern const struct trace_print_flags pageflag_names[];
diff --git a/mm/madvise.c b/mm/madvise.c
index 9976852f1e1c..47d8d8a25eae 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 
 	tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
 	orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		ptent = *pte;
diff --git a/mm/memory.c b/mm/memory.c
index 0e517be91a89..f65beaad319b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1197,6 +1197,7 @@ again:
 	init_rss_vec(rss);
 	start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	pte = start_pte;
+	flush_tlb_batched_pending(mm);
 	arch_enter_lazy_mmu_mode();
 	do {
 		pte_t ptent = *pte;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 1a8c9ca83e48..4180ad8cc9c5 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 	    atomic_read(&vma->vm_mm->mm_users) == 1)
 		target_node = numa_node_id();
 
+	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 	do {
 		oldpte = *pte;
diff --git a/mm/mremap.c b/mm/mremap.c
index cd8a1b199ef9..6e3d857458de 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	new_ptl = pte_lockptr(mm, new_pmd);
 	if (new_ptl != old_ptl)
 		spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+	flush_tlb_batched_pending(vma->vm_mm);
 	arch_enter_lazy_mmu_mode();
 
 	for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
diff --git a/mm/rmap.c b/mm/rmap.c
index ced14f1af6dc..c8993c63eb25 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -604,6 +604,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
 	tlb_ubc->flush_required = true;
 
+	/*
+	 * Ensure compiler does not re-order the setting of tlb_flush_batched
+	 * before the PTE is cleared.
+	 */
+	barrier();
+	mm->tlb_flush_batched = true;
+
 	/*
 	 * If the PTE was dirty then it's best to assume it's writable. The
 	 * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
@@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
 
 	return should_defer;
 }
+
+/*
+ * Reclaim unmaps pages under the PTL but do not flush the TLB prior to
+ * releasing the PTL if TLB flushes are batched. It's possible for a parallel
+ * operation such as mprotect or munmap to race between reclaim unmapping
+ * the page and flushing the page. If this race occurs, it potentially allows
+ * access to data via a stale TLB entry. Tracking all mm's that have TLB
+ * batching in flight would be expensive during reclaim so instead track
+ * whether TLB batching occurred in the past and if so then do a flush here
+ * if required. This will cost one additional flush per reclaim cycle paid
+ * by the first operation at risk such as mprotect and mumap.
+ *
+ * This must be called under the PTL so that an access to tlb_flush_batched
+ * that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
+ * via the PTL.
+ */
+void flush_tlb_batched_pending(struct mm_struct *mm)
+{
+	if (mm->tlb_flush_batched) {
+		flush_tlb_mm(mm);
+
+		/*
+		 * Do not allow the compiler to re-order the clearing of
+		 * tlb_flush_batched before the tlb is flushed.
+		 */
+		barrier();
+		mm->tlb_flush_batched = false;
+	}
+}
 #else
 static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 {
-- 
cgit 


From d16977f3a6cfbb5e9ce477f423a1bf343347c1ed Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 2 Aug 2017 13:32:01 -0700
Subject: kthread: fix documentation build warning

The kerneldoc comment for kthread_create() had an incorrect argument
name, leading to a warning in the docs build.

Correct it, and make one more small step toward a warning-free build.

Link: http://lkml.kernel.org/r/20170724135916.7f486c6f@lwn.net
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kthread.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 4fec8b775895..82e197eeac91 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -15,7 +15,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
  * @threadfn: the function to run in the thread
  * @data: data pointer for @threadfn()
  * @namefmt: printf-style format string for the thread name
- * @...: arguments for @namefmt.
+ * @arg...: arguments for @namefmt.
  *
  * This macro will create a kthread on the current node, leaving it in
  * the stopped state.  This is just a helper for kthread_create_on_node();
-- 
cgit 


From 89affbf5d9ebb15c6460596822e8857ea2f9e735 Mon Sep 17 00:00:00 2001
From: Dima Zavin <dmitriyz@waymo.com>
Date: Wed, 2 Aug 2017 13:32:18 -0700
Subject: cpuset: fix a deadlock due to incomplete patching of
 cpusets_enabled()

In codepaths that use the begin/retry interface for reading
mems_allowed_seq with irqs disabled, there exists a race condition that
stalls the patch process after only modifying a subset of the
static_branch call sites.

This problem manifested itself as a deadlock in the slub allocator,
inside get_any_partial.  The loop reads mems_allowed_seq value (via
read_mems_allowed_begin), performs the defrag operation, and then
verifies the consistency of mem_allowed via the read_mems_allowed_retry
and the cookie returned by xxx_begin.

The issue here is that both begin and retry first check if cpusets are
enabled via cpusets_enabled() static branch.  This branch can be
rewritted dynamically (via cpuset_inc) if a new cpuset is created.  The
x86 jump label code fully synchronizes across all CPUs for every entry
it rewrites.  If it rewrites only one of the callsites (specifically the
one in read_mems_allowed_retry) and then waits for the
smp_call_function(do_sync_core) to complete while a CPU is inside the
begin/retry section with IRQs off and the mems_allowed value is changed,
we can hang.

This is because begin() will always return 0 (since it wasn't patched
yet) while retry() will test the 0 against the actual value of the seq
counter.

The fix is to use two different static keys: one for begin
(pre_enable_key) and one for retry (enable_key).  In cpuset_inc(), we
first bump the pre_enable key to ensure that cpuset_mems_allowed_begin()
always return a valid seqcount if are enabling cpusets.  Similarly, when
disabling cpusets via cpuset_dec(), we first ensure that callers of
cpuset_mems_allowed_retry() will start ignoring the seqcount value
before we let cpuset_mems_allowed_begin() return 0.

The relevant stack traces of the two stuck threads:

  CPU: 1 PID: 1415 Comm: mkdir Tainted: G L  4.9.36-00104-g540c51286237 #4
  Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017
  task: ffff8817f9c28000 task.stack: ffffc9000ffa4000
  RIP: smp_call_function_many+0x1f9/0x260
  Call Trace:
    smp_call_function+0x3b/0x70
    on_each_cpu+0x2f/0x90
    text_poke_bp+0x87/0xd0
    arch_jump_label_transform+0x93/0x100
    __jump_label_update+0x77/0x90
    jump_label_update+0xaa/0xc0
    static_key_slow_inc+0x9e/0xb0
    cpuset_css_online+0x70/0x2e0
    online_css+0x2c/0xa0
    cgroup_apply_control_enable+0x27f/0x3d0
    cgroup_mkdir+0x2b7/0x420
    kernfs_iop_mkdir+0x5a/0x80
    vfs_mkdir+0xf6/0x1a0
    SyS_mkdir+0xb7/0xe0
    entry_SYSCALL_64_fastpath+0x18/0xad

  ...

  CPU: 2 PID: 1 Comm: init Tainted: G L  4.9.36-00104-g540c51286237 #4
  Hardware name: Default string Default string/Hardware, BIOS 4.29.1-20170526215256 05/26/2017
  task: ffff8818087c0000 task.stack: ffffc90000030000
  RIP: int3+0x39/0x70
  Call Trace:
    <#DB> ? ___slab_alloc+0x28b/0x5a0
    <EOE> ? copy_process.part.40+0xf7/0x1de0
    __slab_alloc.isra.80+0x54/0x90
    copy_process.part.40+0xf7/0x1de0
    copy_process.part.40+0xf7/0x1de0
    kmem_cache_alloc_node+0x8a/0x280
    copy_process.part.40+0xf7/0x1de0
    _do_fork+0xe7/0x6c0
    _raw_spin_unlock_irq+0x2d/0x60
    trace_hardirqs_on_caller+0x136/0x1d0
    entry_SYSCALL_64_fastpath+0x5/0xad
    do_syscall_64+0x27/0x350
    SyS_clone+0x19/0x20
    do_syscall_64+0x60/0x350
    entry_SYSCALL64_slow_path+0x25/0x25

Link: http://lkml.kernel.org/r/20170731040113.14197-1-dmitriyz@waymo.com
Fixes: 46e700abc44c ("mm, page_alloc: remove unnecessary taking of a seqlock when cpusets are disabled")
Signed-off-by: Dima Zavin <dmitriyz@waymo.com>
Reported-by: Cliff Spradlin <cspradlin@waymo.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Christopher Lameter <cl@linux.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 19 +++++++++++++++++--
 kernel/cgroup/cpuset.c |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 119a3f9604b0..898cfe2eeb42 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -18,6 +18,19 @@
 
 #ifdef CONFIG_CPUSETS
 
+/*
+ * Static branch rewrites can happen in an arbitrary order for a given
+ * key. In code paths where we need to loop with read_mems_allowed_begin() and
+ * read_mems_allowed_retry() to get a consistent view of mems_allowed, we need
+ * to ensure that begin() always gets rewritten before retry() in the
+ * disabled -> enabled transition. If not, then if local irqs are disabled
+ * around the loop, we can deadlock since retry() would always be
+ * comparing the latest value of the mems_allowed seqcount against 0 as
+ * begin() still would see cpusets_enabled() as false. The enabled -> disabled
+ * transition should happen in reverse order for the same reasons (want to stop
+ * looking at real value of mems_allowed.sequence in retry() first).
+ */
+extern struct static_key_false cpusets_pre_enable_key;
 extern struct static_key_false cpusets_enabled_key;
 static inline bool cpusets_enabled(void)
 {
@@ -32,12 +45,14 @@ static inline int nr_cpusets(void)
 
 static inline void cpuset_inc(void)
 {
+	static_branch_inc(&cpusets_pre_enable_key);
 	static_branch_inc(&cpusets_enabled_key);
 }
 
 static inline void cpuset_dec(void)
 {
 	static_branch_dec(&cpusets_enabled_key);
+	static_branch_dec(&cpusets_pre_enable_key);
 }
 
 extern int cpuset_init(void);
@@ -115,7 +130,7 @@ extern void cpuset_print_current_mems_allowed(void);
  */
 static inline unsigned int read_mems_allowed_begin(void)
 {
-	if (!cpusets_enabled())
+	if (!static_branch_unlikely(&cpusets_pre_enable_key))
 		return 0;
 
 	return read_seqcount_begin(&current->mems_allowed_seq);
@@ -129,7 +144,7 @@ static inline unsigned int read_mems_allowed_begin(void)
  */
 static inline bool read_mems_allowed_retry(unsigned int seq)
 {
-	if (!cpusets_enabled())
+	if (!static_branch_unlikely(&cpusets_enabled_key))
 		return false;
 
 	return read_seqcount_retry(&current->mems_allowed_seq, seq);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index ca8376e5008c..8d5151688504 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -63,6 +63,7 @@
 #include <linux/cgroup.h>
 #include <linux/wait.h>
 
+DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
 DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
 
 /* See "Frequency meter" comments, below. */
-- 
cgit 


From 1ee1c3f5b5cff959e0ac95a125bd15eaf88cc638 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 2 Aug 2017 13:32:27 -0700
Subject: mm: allow page_cache_get_speculative in interrupt context

Kernel panic when calling the IRQ-safe __get_user_pages_fast in NMI
handler.

The bug was introduced by commit 2947ba054a4d ("x86/mm/gup: Switch GUP
to the generic get_user_page_fast() implementation").

The original x86 __get_user_page_fast used plain get_page() or
page_ref_add().  However, the generic __get_user_page_fast uses
page_cache_get_speculative(), which has VM_BUG_ON(in_interrupt()).

There is no reason to prevent page_cache_get_speculative from using in
interrupt context.  According to the author, putting a BUG_ON there is
just because the code is not verifying correctness of interrupt races.
I did some tests in interrupt context.  There is no issue found.

Removing VM_BUG_ON(in_interrupt()) for page_cache_get_speculative().

Link: http://lkml.kernel.org/r/1501609146-59730-1-git-send-email-kan.liang@intel.com
Fixes: 2947ba054a4d ("x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation")
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Jens Axboe <axboe@fb.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Ying Huang <ying.huang@intel.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index baa9344dcd10..79b36f57c3ba 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -163,8 +163,6 @@ void release_pages(struct page **pages, int nr, bool cold);
  */
 static inline int page_cache_get_speculative(struct page *page)
 {
-	VM_BUG_ON(in_interrupt());
-
 #ifdef CONFIG_TINY_RCU
 # ifdef CONFIG_PREEMPT_COUNT
 	VM_BUG_ON(!in_atomic() && !irqs_disabled());
-- 
cgit 


From 5e0594fd77e0d4dfd728898814da43a065094ae0 Mon Sep 17 00:00:00 2001
From: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Date: Fri, 30 Jun 2017 13:14:11 +0100
Subject: drm: rcar-du: Repair vblank for DRM page flips using the VSP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver recently switched from handling page flip completion in the
DU vertical blanking handler to the VSP frame end handler to fix a race
condition. This unfortunately resulted in incorrect timestamps in the
vertical blanking events sent to userspace as vertical blanking is now
handled after sending the event.

To fix this we must reverse the order of the two operations. The easiest
way is to handle vertical blanking in the VSP frame end handler before
sending the event. The VSP frame end interrupt occurs approximately 50µs
earlier than the DU frame end interrupt, but this should not cause any
undue harm.

As we need to handle vertical blanking even when page flip completion is
delayed, the VSP driver now needs to call the frame end completion
callback unconditionally, with a new argument to report whether page
flip has completed.

With this new scheme the DU vertical blanking interrupt isn't needed
anymore, so we can stop enabling it.

Fixes: d503a43ac06a ("drm: rcar-du: Register a completion callback with VSP1")
Signed-off-by: Kieran Bingham <kieran.bingham+renesas@ideasonboard.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 drivers/gpu/drm/rcar-du/rcar_du_crtc.c   |  8 +++++---
 drivers/gpu/drm/rcar-du/rcar_du_crtc.h   |  2 ++
 drivers/gpu/drm/rcar-du/rcar_du_vsp.c    |  8 ++++++--
 drivers/media/platform/vsp1/vsp1_drm.c   |  5 +++--
 drivers/media/platform/vsp1/vsp1_drm.h   |  2 +-
 drivers/media/platform/vsp1/vsp1_pipe.c  | 20 ++++++++++----------
 drivers/media/platform/vsp1/vsp1_pipe.h  |  2 +-
 drivers/media/platform/vsp1/vsp1_video.c |  6 +++++-
 include/media/vsp1.h                     |  2 +-
 9 files changed, 34 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 6e5bd0b92dfa..301ea1a8018e 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -690,6 +690,7 @@ static int rcar_du_crtc_enable_vblank(struct drm_crtc *crtc)
 
 	rcar_du_crtc_write(rcrtc, DSRCR, DSRCR_VBCL);
 	rcar_du_crtc_set(rcrtc, DIER, DIER_VBE);
+	rcrtc->vblank_enable = true;
 
 	return 0;
 }
@@ -699,6 +700,7 @@ static void rcar_du_crtc_disable_vblank(struct drm_crtc *crtc)
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 
 	rcar_du_crtc_clr(rcrtc, DIER, DIER_VBE);
+	rcrtc->vblank_enable = false;
 }
 
 static const struct drm_crtc_funcs crtc_funcs = {
@@ -743,10 +745,10 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
 	spin_unlock(&rcrtc->vblank_lock);
 
 	if (status & DSSR_VBK) {
-		drm_crtc_handle_vblank(&rcrtc->crtc);
-
-		if (rcdu->info->gen < 3)
+		if (rcdu->info->gen < 3) {
+			drm_crtc_handle_vblank(&rcrtc->crtc);
 			rcar_du_crtc_finish_page_flip(rcrtc);
+		}
 
 		ret = IRQ_HANDLED;
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index 065b91f5b1d9..fdc2bf99bda1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -32,6 +32,7 @@ struct rcar_du_vsp;
  * @mmio_offset: offset of the CRTC registers in the DU MMIO block
  * @index: CRTC software and hardware index
  * @initialized: whether the CRTC has been initialized and clocks enabled
+ * @vblank_enable: whether vblank events are enabled on this CRTC
  * @event: event to post when the pending page flip completes
  * @flip_wait: wait queue used to signal page flip completion
  * @vblank_lock: protects vblank_wait and vblank_count
@@ -51,6 +52,7 @@ struct rcar_du_crtc {
 	unsigned int index;
 	bool initialized;
 
+	bool vblank_enable;
 	struct drm_pending_vblank_event *event;
 	wait_queue_head_t flip_wait;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index e43b065e141a..6de6be3d9090 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -31,11 +31,15 @@
 #include "rcar_du_kms.h"
 #include "rcar_du_vsp.h"
 
-static void rcar_du_vsp_complete(void *private)
+static void rcar_du_vsp_complete(void *private, bool completed)
 {
 	struct rcar_du_crtc *crtc = private;
 
-	rcar_du_crtc_finish_page_flip(crtc);
+	if (crtc->vblank_enable)
+		drm_crtc_handle_vblank(&crtc->crtc);
+
+	if (completed)
+		rcar_du_crtc_finish_page_flip(crtc);
 }
 
 void rcar_du_vsp_enable(struct rcar_du_crtc *crtc)
diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c
index 7791d7b5a743..4dfbeac8f42c 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.c
+++ b/drivers/media/platform/vsp1/vsp1_drm.c
@@ -32,12 +32,13 @@
  * Interrupt Handling
  */
 
-static void vsp1_du_pipeline_frame_end(struct vsp1_pipeline *pipe)
+static void vsp1_du_pipeline_frame_end(struct vsp1_pipeline *pipe,
+				       bool completed)
 {
 	struct vsp1_drm_pipeline *drm_pipe = to_vsp1_drm_pipeline(pipe);
 
 	if (drm_pipe->du_complete)
-		drm_pipe->du_complete(drm_pipe->du_private);
+		drm_pipe->du_complete(drm_pipe->du_private, completed);
 }
 
 /* -----------------------------------------------------------------------------
diff --git a/drivers/media/platform/vsp1/vsp1_drm.h b/drivers/media/platform/vsp1/vsp1_drm.h
index fca553ddd184..1cd9db785bf7 100644
--- a/drivers/media/platform/vsp1/vsp1_drm.h
+++ b/drivers/media/platform/vsp1/vsp1_drm.h
@@ -29,7 +29,7 @@ struct vsp1_drm_pipeline {
 	bool enabled;
 
 	/* Frame synchronisation */
-	void (*du_complete)(void *);
+	void (*du_complete)(void *, bool);
 	void *du_private;
 };
 
diff --git a/drivers/media/platform/vsp1/vsp1_pipe.c b/drivers/media/platform/vsp1/vsp1_pipe.c
index 9bb961298af2..4f4b732df84b 100644
--- a/drivers/media/platform/vsp1/vsp1_pipe.c
+++ b/drivers/media/platform/vsp1/vsp1_pipe.c
@@ -335,16 +335,12 @@ void vsp1_pipeline_frame_end(struct vsp1_pipeline *pipe)
 	if (pipe == NULL)
 		return;
 
+	/*
+	 * If the DL commit raced with the frame end interrupt, the commit ends
+	 * up being postponed by one frame. @completed represents whether the
+	 * active frame was finished or postponed.
+	 */
 	completed = vsp1_dlm_irq_frame_end(pipe->output->dlm);
-	if (!completed) {
-		/*
-		 * If the DL commit raced with the frame end interrupt, the
-		 * commit ends up being postponed by one frame. Return
-		 * immediately without calling the pipeline's frame end handler
-		 * or incrementing the sequence number.
-		 */
-		return;
-	}
 
 	if (pipe->hgo)
 		vsp1_hgo_frame_end(pipe->hgo);
@@ -352,8 +348,12 @@ void vsp1_pipeline_frame_end(struct vsp1_pipeline *pipe)
 	if (pipe->hgt)
 		vsp1_hgt_frame_end(pipe->hgt);
 
+	/*
+	 * Regardless of frame completion we still need to notify the pipe
+	 * frame_end to account for vblank events.
+	 */
 	if (pipe->frame_end)
-		pipe->frame_end(pipe);
+		pipe->frame_end(pipe, completed);
 
 	pipe->sequence++;
 }
diff --git a/drivers/media/platform/vsp1/vsp1_pipe.h b/drivers/media/platform/vsp1/vsp1_pipe.h
index 91a784a13422..c5d01a365370 100644
--- a/drivers/media/platform/vsp1/vsp1_pipe.h
+++ b/drivers/media/platform/vsp1/vsp1_pipe.h
@@ -91,7 +91,7 @@ struct vsp1_pipeline {
 	enum vsp1_pipeline_state state;
 	wait_queue_head_t wq;
 
-	void (*frame_end)(struct vsp1_pipeline *pipe);
+	void (*frame_end)(struct vsp1_pipeline *pipe, bool completed);
 
 	struct mutex lock;
 	struct kref kref;
diff --git a/drivers/media/platform/vsp1/vsp1_video.c b/drivers/media/platform/vsp1/vsp1_video.c
index 84139affb871..e9f5dcb8fae5 100644
--- a/drivers/media/platform/vsp1/vsp1_video.c
+++ b/drivers/media/platform/vsp1/vsp1_video.c
@@ -440,13 +440,17 @@ static void vsp1_video_pipeline_run(struct vsp1_pipeline *pipe)
 	vsp1_pipeline_run(pipe);
 }
 
-static void vsp1_video_pipeline_frame_end(struct vsp1_pipeline *pipe)
+static void vsp1_video_pipeline_frame_end(struct vsp1_pipeline *pipe,
+					  bool completed)
 {
 	struct vsp1_device *vsp1 = pipe->output->entity.vsp1;
 	enum vsp1_pipeline_state state;
 	unsigned long flags;
 	unsigned int i;
 
+	/* M2M Pipelines should never call here with an incomplete frame. */
+	WARN_ON_ONCE(!completed);
+
 	spin_lock_irqsave(&pipe->irqlock, flags);
 
 	/* Complete buffers on all video nodes. */
diff --git a/include/media/vsp1.h b/include/media/vsp1.h
index c8fc868fb0f2..68a8abe4fac5 100644
--- a/include/media/vsp1.h
+++ b/include/media/vsp1.h
@@ -34,7 +34,7 @@ struct vsp1_du_lif_config {
 	unsigned int width;
 	unsigned int height;
 
-	void (*callback)(void *);
+	void (*callback)(void *, bool);
 	void *callback_data;
 };
 
-- 
cgit 


From e1a10ef7fa876f8510aaec36ea5c0cf34baba410 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Thu, 3 Aug 2017 09:19:52 -0400
Subject: tcp: introduce tcp_rto_delta_us() helper for xmit timer fix

Pure refactor. This helper will be required in the xmit timer fix
later in the patch series. (Because the TLP logic will want to make
this calculation.)

Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)")
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Nandita Dukkipati <nanditad@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 10 ++++++++++
 net/ipv4/tcp_input.c |  5 +----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..ada65e767b28 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1916,6 +1916,16 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
 			     u64 xmit_time);
 extern void tcp_rack_reo_timeout(struct sock *sk);
 
+/* At how many usecs into the future should the RTO fire? */
+static inline s64 tcp_rto_delta_us(const struct sock *sk)
+{
+	const struct sk_buff *skb = tcp_write_queue_head(sk);
+	u32 rto = inet_csk(sk)->icsk_rto;
+	u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
+
+	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
+}
+
 /*
  * Save and compile IPv4 options, return a pointer to it
  */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index dad026fcfd09..b9ba8d55d8b8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3004,10 +3004,7 @@ void tcp_rearm_rto(struct sock *sk)
 		/* Offset the time elapsed after installing regular RTO */
 		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
-			struct sk_buff *skb = tcp_write_queue_head(sk);
-			u64 rto_time_stamp = skb->skb_mstamp +
-					     jiffies_to_usecs(rto);
-			s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+			s64 delta_us = tcp_rto_delta_us(sk);
 			/* delta_us may not be positive if the socket is locked
 			 * when the retrans timer fires and is rescheduled.
 			 */
-- 
cgit 


From 931b3c1a832621b4bdcbaf783096fc267eb36fbe Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 1 Aug 2017 09:41:37 +0300
Subject: RDMA/mlx5: Fix existence check for extended address vector

The extended address vector is the highest bit in be32 variable,
but it was compared with the lowest. This patch fixes the endianness
of that check and removes already declared define.

Fixes: 17d2f88f92ce ("IB/mlx5: Add ODP atomics support")
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 2 +-
 include/linux/mlx5/qp.h          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index ae0746754008..3d701c7a4c91 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
 	if (qp->ibqp.qp_type != IB_QPT_RC) {
 		av = *wqe;
-		if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+		if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
 			*wqe += sizeof(struct mlx5_av);
 		else
 			*wqe += sizeof(struct mlx5_base_av);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 6f41270d80c0..f378dc0e7eaf 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg {
 #define MLX5_WQE_CTRL_OPCODE_MASK 0xff
 #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
-#define MLX5_WQE_AV_EXT 0x80000000
 
 enum {
 	MLX5_ETH_WQE_L3_INNER_CSUM      = 1 << 4,
-- 
cgit 


From 978d13d60c34818a41fc35962602bdfa5c03f214 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Fri, 4 Aug 2017 23:59:31 -0700
Subject: iscsi-target: Fix iscsi_np reset hung task during parallel delete

This patch fixes a bug associated with iscsit_reset_np_thread()
that can occur during parallel configfs rmdir of a single iscsi_np
used across multiple iscsi-target instances, that would result in
hung task(s) similar to below where configfs rmdir process context
was blocked indefinately waiting for iscsi_np->np_restart_comp
to finish:

[ 6726.112076] INFO: task dcp_proxy_node_:15550 blocked for more than 120 seconds.
[ 6726.119440]       Tainted: G        W  O     4.1.26-3321 #2
[ 6726.125045] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 6726.132927] dcp_proxy_node_ D ffff8803f202bc88     0 15550      1 0x00000000
[ 6726.140058]  ffff8803f202bc88 ffff88085c64d960 ffff88083b3b1ad0 ffff88087fffeb08
[ 6726.147593]  ffff8803f202c000 7fffffffffffffff ffff88083f459c28 ffff88083b3b1ad0
[ 6726.155132]  ffff88035373c100 ffff8803f202bca8 ffffffff8168ced2 ffff8803f202bcb8
[ 6726.162667] Call Trace:
[ 6726.165150]  [<ffffffff8168ced2>] schedule+0x32/0x80
[ 6726.170156]  [<ffffffff8168f5b4>] schedule_timeout+0x214/0x290
[ 6726.176030]  [<ffffffff810caef2>] ? __send_signal+0x52/0x4a0
[ 6726.181728]  [<ffffffff8168d7d6>] wait_for_completion+0x96/0x100
[ 6726.187774]  [<ffffffff810e7c80>] ? wake_up_state+0x10/0x10
[ 6726.193395]  [<ffffffffa035d6e2>] iscsit_reset_np_thread+0x62/0xe0 [iscsi_target_mod]
[ 6726.201278]  [<ffffffffa0355d86>] iscsit_tpg_disable_portal_group+0x96/0x190 [iscsi_target_mod]
[ 6726.210033]  [<ffffffffa0363f7f>] lio_target_tpg_store_enable+0x4f/0xc0 [iscsi_target_mod]
[ 6726.218351]  [<ffffffff81260c5a>] configfs_write_file+0xaa/0x110
[ 6726.224392]  [<ffffffff811ea364>] vfs_write+0xa4/0x1b0
[ 6726.229576]  [<ffffffff811eb111>] SyS_write+0x41/0xb0
[ 6726.234659]  [<ffffffff8169042e>] system_call_fastpath+0x12/0x71

It would happen because each iscsit_reset_np_thread() sets state
to ISCSI_NP_THREAD_RESET, sends SIGINT, and then blocks waiting
for completion on iscsi_np->np_restart_comp.

However, if iscsi_np was active processing a login request and
more than a single iscsit_reset_np_thread() caller to the same
iscsi_np was blocked on iscsi_np->np_restart_comp, iscsi_np
kthread process context in __iscsi_target_login_thread() would
flush pending signals and only perform a single completion of
np->np_restart_comp before going back to sleep within transport
specific iscsit_transport->iscsi_accept_np code.

To address this bug, add a iscsi_np->np_reset_count and update
__iscsi_target_login_thread() to keep completing np->np_restart_comp
until ->np_reset_count has reached zero.

Reported-by: Gary Guo <ghg@datera.io>
Tested-by: Gary Guo <ghg@datera.io>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: stable@vger.kernel.org # 3.10+
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/target/iscsi/iscsi_target.c       | 1 +
 drivers/target/iscsi/iscsi_target_login.c | 7 +++++--
 include/target/iscsi/iscsi_target_core.h  | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 12803de99400..5001261f5d69 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -418,6 +418,7 @@ int iscsit_reset_np_thread(
 		return 0;
 	}
 	np->np_thread_state = ISCSI_NP_THREAD_RESET;
+	atomic_inc(&np->np_reset_count);
 
 	if (np->np_thread) {
 		spin_unlock_bh(&np->np_thread_lock);
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index e9bdc8b86e7d..dc13afbd4c88 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -1243,9 +1243,11 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 	flush_signals(current);
 
 	spin_lock_bh(&np->np_thread_lock);
-	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+	if (atomic_dec_if_positive(&np->np_reset_count) >= 0) {
 		np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
+		spin_unlock_bh(&np->np_thread_lock);
 		complete(&np->np_restart_comp);
+		return 1;
 	} else if (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN) {
 		spin_unlock_bh(&np->np_thread_lock);
 		goto exit;
@@ -1278,7 +1280,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
 		goto exit;
 	} else if (rc < 0) {
 		spin_lock_bh(&np->np_thread_lock);
-		if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
+		if (atomic_dec_if_positive(&np->np_reset_count) >= 0) {
+			np->np_thread_state = ISCSI_NP_THREAD_ACTIVE;
 			spin_unlock_bh(&np->np_thread_lock);
 			complete(&np->np_restart_comp);
 			iscsit_put_transport(conn->conn_transport);
diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h
index 0ca1fb08805b..fb87d32f5e51 100644
--- a/include/target/iscsi/iscsi_target_core.h
+++ b/include/target/iscsi/iscsi_target_core.h
@@ -786,6 +786,7 @@ struct iscsi_np {
 	int			np_sock_type;
 	enum np_thread_state_table np_thread_state;
 	bool                    enabled;
+	atomic_t		np_reset_count;
 	enum iscsi_timer_flags_table np_login_timer_flags;
 	u32			np_exports;
 	enum np_flags_table	np_flags;
-- 
cgit 


From 0cca6c8920ade95e2741b2062cf1397dc546fb0f Mon Sep 17 00:00:00 2001
From: Ludovic Desroches <ludovic.desroches@o2linux.fr>
Date: Sun, 6 Aug 2017 16:00:05 +0200
Subject: pinctrl: generic: update references to Documentation/pinctrl.txt

Update deprecated references to Documentation/pinctrl.txt since it has been
moved to Documentation/driver-api/pinctl.rst.

Signed-off-by: Ludovic Desroches <ludovic.desroches@o2linux.fr>
Fixes: 5a9b73832e9e ("pinctrl.txt: move it to the driver-api book")
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/gpio-legacy.txt      | 2 +-
 MAINTAINERS                             | 2 +-
 include/linux/device.h                  | 2 +-
 include/linux/pinctrl/pinconf-generic.h | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt
index b34fd94f7089..5eacc147ea87 100644
--- a/Documentation/gpio/gpio-legacy.txt
+++ b/Documentation/gpio/gpio-legacy.txt
@@ -459,7 +459,7 @@ pin controller?
 
 This is done by registering "ranges" of pins, which are essentially
 cross-reference tables. These are described in
-Documentation/pinctrl.txt
+Documentation/driver-api/pinctl.rst
 
 While the pin allocation is totally managed by the pinctrl subsystem,
 gpio (under gpiolib) is still maintained by gpio drivers. It may happen
diff --git a/MAINTAINERS b/MAINTAINERS
index f66488dfdbc9..2c85558aec19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10375,7 +10375,7 @@ L:	linux-gpio@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-pinctrl.git
 S:	Maintained
 F:	Documentation/devicetree/bindings/pinctrl/
-F:	Documentation/pinctrl.txt
+F:	Documentation/driver-api/pinctl.rst
 F:	drivers/pinctrl/
 F:	include/linux/pinctrl/
 
diff --git a/include/linux/device.h b/include/linux/device.h
index 723cd54b94da..beabdbc08420 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -843,7 +843,7 @@ struct dev_links_info {
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
  * @pins:	For device pin management.
- *		See Documentation/pinctrl.txt for details.
+ *		See Documentation/driver-api/pinctl.rst for details.
  * @msi_list:	Hosts MSI descriptors
  * @msi_domain: The generic MSI domain this device is using.
  * @numa_node:	NUMA node this device is close to.
diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index 231d3075815a..e91d1b6a260d 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -81,8 +81,8 @@
  * 	it.
  * @PIN_CONFIG_OUTPUT: this will configure the pin as an output and drive a
  * 	value on the line. Use argument 1 to indicate high level, argument 0 to
- * 	indicate low level. (Please see Documentation/pinctrl.txt, section
- * 	"GPIO mode pitfalls" for a discussion around this parameter.)
+ *	indicate low level. (Please see Documentation/driver-api/pinctl.rst,
+ *	section "GPIO mode pitfalls" for a discussion around this parameter.)
  * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power
  *	supplies, the argument to this parameter (on a custom format) tells
  *	the driver which alternative power source to use.
-- 
cgit 


From 0fb228d30b8d72bfee51f57e638d412324d44a11 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 1 Aug 2017 15:12:39 -0700
Subject: nvmet_fc: add defer_req callback for deferment of cmd buffer return

At queue creation, the transport allocates a local job struct
(struct nvmet_fc_fcp_iod) for each possible element of the queue.
When a new CMD is received from the wire, a jobs struct is allocated
from the queue and then used for the duration of the command.
The job struct contains buffer space for the wire command iu. Thus,
upon allocation of the job struct, the cmd iu buffer is copied to
the job struct and the LLDD may immediately free/reuse the CMD IU
buffer passed in the call.

However, in some circumstances, due to the packetized nature of FC
and the api of the FC LLDD which may issue a hw command to send the
wire response, but the LLDD may not get the hw completion for the
command and upcall the nvmet_fc layer before a new command may be
asynchronously received on the wire. In other words, its possible
for the initiator to get the response from the wire, thus believe a
command slot free, and send a new command iu. The new command iu
may be received by the LLDD and passed to the transport before the
LLDD had serviced the hw completion and made the teardown calls for
the original job struct. As such, there is no available job struct
available for the new io. E.g. it appears like the host sent more
queue elements than the queue size. It didn't based on it's
understanding.

Rather than treat this as a hard connection failure queue the new
request until the job struct does free up. As the buffer isn't
copied as there's no job struct, a special return value must be
returned to the LLDD to signify to hold off on recycling the cmd
iu buffer.  And later, when a job struct is allocated and the
buffer copied, a new LLDD callback is introduced to notify the
LLDD and allow it to recycle it's command iu buffer.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c       | 212 +++++++++++++++++++++++++++++++++++------
 include/linux/nvme-fc-driver.h |   7 ++
 2 files changed, 191 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 31ca55dfcb1d..1b7f2520a20d 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -114,6 +114,11 @@ struct nvmet_fc_tgtport {
 	struct kref			ref;
 };
 
+struct nvmet_fc_defer_fcp_req {
+	struct list_head		req_list;
+	struct nvmefc_tgt_fcp_req	*fcp_req;
+};
+
 struct nvmet_fc_tgt_queue {
 	bool				ninetypercent;
 	u16				qid;
@@ -132,6 +137,8 @@ struct nvmet_fc_tgt_queue {
 	struct nvmet_fc_tgt_assoc	*assoc;
 	struct nvmet_fc_fcp_iod		*fod;		/* array of fcp_iods */
 	struct list_head		fod_list;
+	struct list_head		pending_cmd_list;
+	struct list_head		avail_defer_list;
 	struct workqueue_struct		*work_q;
 	struct kref			ref;
 } __aligned(sizeof(unsigned long long));
@@ -223,6 +230,8 @@ static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue);
 static int nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue);
 static void nvmet_fc_tgtport_put(struct nvmet_fc_tgtport *tgtport);
 static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
+static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
+					struct nvmet_fc_fcp_iod *fod);
 
 
 /* *********************** FC-NVME DMA Handling **************************** */
@@ -463,9 +472,9 @@ static struct nvmet_fc_fcp_iod *
 nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 {
 	static struct nvmet_fc_fcp_iod *fod;
-	unsigned long flags;
 
-	spin_lock_irqsave(&queue->qlock, flags);
+	lockdep_assert_held(&queue->qlock);
+
 	fod = list_first_entry_or_null(&queue->fod_list,
 					struct nvmet_fc_fcp_iod, fcp_list);
 	if (fod) {
@@ -477,17 +486,37 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue)
 		 * will "inherit" that reference.
 		 */
 	}
-	spin_unlock_irqrestore(&queue->qlock, flags);
 	return fod;
 }
 
 
+static void
+nvmet_fc_queue_fcp_req(struct nvmet_fc_tgtport *tgtport,
+		       struct nvmet_fc_tgt_queue *queue,
+		       struct nvmefc_tgt_fcp_req *fcpreq)
+{
+	struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private;
+
+	/*
+	 * put all admin cmds on hw queue id 0. All io commands go to
+	 * the respective hw queue based on a modulo basis
+	 */
+	fcpreq->hwqid = queue->qid ?
+			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
+
+	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
+		queue_work_on(queue->cpu, queue->work_q, &fod->work);
+	else
+		nvmet_fc_handle_fcp_rqst(tgtport, fod);
+}
+
 static void
 nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 			struct nvmet_fc_fcp_iod *fod)
 {
 	struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq;
 	struct nvmet_fc_tgtport *tgtport = fod->tgtport;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
 	unsigned long flags;
 
 	fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma,
@@ -495,21 +524,56 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
 
 	fcpreq->nvmet_fc_private = NULL;
 
-	spin_lock_irqsave(&queue->qlock, flags);
-	list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
 	fod->active = false;
 	fod->abort = false;
 	fod->aborted = false;
 	fod->writedataactive = false;
 	fod->fcpreq = NULL;
+
+	tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
+
+	spin_lock_irqsave(&queue->qlock, flags);
+	deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+				struct nvmet_fc_defer_fcp_req, req_list);
+	if (!deferfcp) {
+		list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		/* Release reference taken at queue lookup and fod allocation */
+		nvmet_fc_tgt_q_put(queue);
+		return;
+	}
+
+	/* Re-use the fod for the next pending cmd that was deferred */
+	list_del(&deferfcp->req_list);
+
+	fcpreq = deferfcp->fcp_req;
+
+	/* deferfcp can be reused for another IO at a later date */
+	list_add_tail(&deferfcp->req_list, &queue->avail_defer_list);
+
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
+	/* Save NVME CMD IO in fod */
+	memcpy(&fod->cmdiubuf, fcpreq->rspaddr, fcpreq->rsplen);
+
+	/* Setup new fcpreq to be processed */
+	fcpreq->rspaddr = NULL;
+	fcpreq->rsplen  = 0;
+	fcpreq->nvmet_fc_private = fod;
+	fod->fcpreq = fcpreq;
+	fod->active = true;
+
+	/* inform LLDD IO is now being processed */
+	tgtport->ops->defer_rcv(&tgtport->fc_target_port, fcpreq);
+
+	/* Submit deferred IO for processing */
+	nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
 	/*
-	 * release the reference taken at queue lookup and fod allocation
+	 * Leave the queue lookup get reference taken when
+	 * fod was originally allocated.
 	 */
-	nvmet_fc_tgt_q_put(queue);
-
-	tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
 }
 
 static int
@@ -569,6 +633,8 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	queue->port = assoc->tgtport->port;
 	queue->cpu = nvmet_fc_queue_to_cpu(assoc->tgtport, qid);
 	INIT_LIST_HEAD(&queue->fod_list);
+	INIT_LIST_HEAD(&queue->avail_defer_list);
+	INIT_LIST_HEAD(&queue->pending_cmd_list);
 	atomic_set(&queue->connected, 0);
 	atomic_set(&queue->sqtail, 0);
 	atomic_set(&queue->rsn, 1);
@@ -638,6 +704,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 {
 	struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport;
 	struct nvmet_fc_fcp_iod *fod = queue->fod;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
 	unsigned long flags;
 	int i, writedataactive;
 	bool disconnect;
@@ -666,6 +733,35 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
 			}
 		}
 	}
+
+	/* Cleanup defer'ed IOs in queue */
+	list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
+		list_del(&deferfcp->req_list);
+		kfree(deferfcp);
+	}
+
+	for (;;) {
+		deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
+				struct nvmet_fc_defer_fcp_req, req_list);
+		if (!deferfcp)
+			break;
+
+		list_del(&deferfcp->req_list);
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		tgtport->ops->defer_rcv(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		tgtport->ops->fcp_abort(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		tgtport->ops->fcp_req_release(&tgtport->fc_target_port,
+				deferfcp->fcp_req);
+
+		kfree(deferfcp);
+
+		spin_lock_irqsave(&queue->qlock, flags);
+	}
 	spin_unlock_irqrestore(&queue->qlock, flags);
 
 	flush_workqueue(queue->work_q);
@@ -2172,11 +2268,38 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work)
  * Pass a FC-NVME FCP CMD IU received from the FC link to the nvmet-fc
  * layer for processing.
  *
- * The nvmet-fc layer will copy cmd payload to an internal structure for
- * processing.  As such, upon completion of the routine, the LLDD may
- * immediately free/reuse the CMD IU buffer passed in the call.
+ * The nvmet_fc layer allocates a local job structure (struct
+ * nvmet_fc_fcp_iod) from the queue for the io and copies the
+ * CMD IU buffer to the job structure. As such, on a successful
+ * completion (returns 0), the LLDD may immediately free/reuse
+ * the CMD IU buffer passed in the call.
+ *
+ * However, in some circumstances, due to the packetized nature of FC
+ * and the api of the FC LLDD which may issue a hw command to send the
+ * response, but the LLDD may not get the hw completion for that command
+ * and upcall the nvmet_fc layer before a new command may be
+ * asynchronously received - its possible for a command to be received
+ * before the LLDD and nvmet_fc have recycled the job structure. It gives
+ * the appearance of more commands received than fits in the sq.
+ * To alleviate this scenario, a temporary queue is maintained in the
+ * transport for pending LLDD requests waiting for a queue job structure.
+ * In these "overrun" cases, a temporary queue element is allocated
+ * the LLDD request and CMD iu buffer information remembered, and the
+ * routine returns a -EOVERFLOW status. Subsequently, when a queue job
+ * structure is freed, it is immediately reallocated for anything on the
+ * pending request list. The LLDDs defer_rcv() callback is called,
+ * informing the LLDD that it may reuse the CMD IU buffer, and the io
+ * is then started normally with the transport.
  *
- * If this routine returns error, the lldd should abort the exchange.
+ * The LLDD, when receiving an -EOVERFLOW completion status, is to treat
+ * the completion as successful but must not reuse the CMD IU buffer
+ * until the LLDD's defer_rcv() callback has been called for the
+ * corresponding struct nvmefc_tgt_fcp_req pointer.
+ *
+ * If there is any other condition in which an error occurs, the
+ * transport will return a non-zero status indicating the error.
+ * In all cases other than -EOVERFLOW, the transport has not accepted the
+ * request and the LLDD should abort the exchange.
  *
  * @target_port: pointer to the (registered) target port the FCP CMD IU
  *              was received on.
@@ -2194,6 +2317,8 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	struct nvme_fc_cmd_iu *cmdiu = cmdiubuf;
 	struct nvmet_fc_tgt_queue *queue;
 	struct nvmet_fc_fcp_iod *fod;
+	struct nvmet_fc_defer_fcp_req *deferfcp;
+	unsigned long flags;
 
 	/* validate iu, so the connection id can be used to find the queue */
 	if ((cmdiubuf_len != sizeof(*cmdiu)) ||
@@ -2214,29 +2339,60 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port,
 	 * when the fod is freed.
 	 */
 
+	spin_lock_irqsave(&queue->qlock, flags);
+
 	fod = nvmet_fc_alloc_fcp_iod(queue);
-	if (!fod) {
+	if (fod) {
+		spin_unlock_irqrestore(&queue->qlock, flags);
+
+		fcpreq->nvmet_fc_private = fod;
+		fod->fcpreq = fcpreq;
+
+		memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+
+		nvmet_fc_queue_fcp_req(tgtport, queue, fcpreq);
+
+		return 0;
+	}
+
+	if (!tgtport->ops->defer_rcv) {
+		spin_unlock_irqrestore(&queue->qlock, flags);
 		/* release the queue lookup reference */
 		nvmet_fc_tgt_q_put(queue);
 		return -ENOENT;
 	}
 
-	fcpreq->nvmet_fc_private = fod;
-	fod->fcpreq = fcpreq;
-	/*
-	 * put all admin cmds on hw queue id 0. All io commands go to
-	 * the respective hw queue based on a modulo basis
-	 */
-	fcpreq->hwqid = queue->qid ?
-			((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0;
-	memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len);
+	deferfcp = list_first_entry_or_null(&queue->avail_defer_list,
+			struct nvmet_fc_defer_fcp_req, req_list);
+	if (deferfcp) {
+		/* Just re-use one that was previously allocated */
+		list_del(&deferfcp->req_list);
+	} else {
+		spin_unlock_irqrestore(&queue->qlock, flags);
 
-	if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR)
-		queue_work_on(queue->cpu, queue->work_q, &fod->work);
-	else
-		nvmet_fc_handle_fcp_rqst(tgtport, fod);
+		/* Now we need to dynamically allocate one */
+		deferfcp = kmalloc(sizeof(*deferfcp), GFP_KERNEL);
+		if (!deferfcp) {
+			/* release the queue lookup reference */
+			nvmet_fc_tgt_q_put(queue);
+			return -ENOMEM;
+		}
+		spin_lock_irqsave(&queue->qlock, flags);
+	}
 
-	return 0;
+	/* For now, use rspaddr / rsplen to save payload information */
+	fcpreq->rspaddr = cmdiubuf;
+	fcpreq->rsplen  = cmdiubuf_len;
+	deferfcp->fcp_req = fcpreq;
+
+	/* defer processing till a fod becomes available */
+	list_add_tail(&deferfcp->req_list, &queue->pending_cmd_list);
+
+	/* NOTE: the queue lookup reference is still valid */
+
+	spin_unlock_irqrestore(&queue->qlock, flags);
+
+	return -EOVERFLOW;
 }
 EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req);
 
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 6c8c5d8041b7..2591878c1d48 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -346,6 +346,11 @@ struct nvme_fc_remote_port {
  *       indicating an FC transport Aborted status.
  *       Entrypoint is Mandatory.
  *
+ * @defer_rcv:  Called by the transport to signal the LLLD that it has
+ *       begun processing of a previously received NVME CMD IU. The LLDD
+ *       is now free to re-use the rcv buffer associated with the
+ *       nvmefc_tgt_fcp_req.
+ *
  * @max_hw_queues:  indicates the maximum number of hw queues the LLDD
  *       supports for cpu affinitization.
  *       Value is Mandatory. Must be at least 1.
@@ -846,6 +851,8 @@ struct nvmet_fc_target_template {
 				struct nvmefc_tgt_fcp_req *fcpreq);
 	void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport,
 				struct nvmefc_tgt_fcp_req *fcpreq);
+	void (*defer_rcv)(struct nvmet_fc_target_port *tgtport,
+				struct nvmefc_tgt_fcp_req *fcpreq);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;
-- 
cgit 


From 16af97dc5a8975371a83d9e30a64038b48f40a2d Mon Sep 17 00:00:00 2001
From: Nadav Amit <nadav.amit@gmail.com>
Date: Thu, 10 Aug 2017 15:23:56 -0700
Subject: mm: migrate: prevent racy access to tlb_flush_pending

Patch series "fixes of TLB batching races", v6.

It turns out that Linux TLB batching mechanism suffers from various
races.  Races that are caused due to batching during reclamation were
recently handled by Mel and this patch-set deals with others.  The more
fundamental issue is that concurrent updates of the page-tables allow
for TLB flushes to be batched on one core, while another core changes
the page-tables.  This other core may assume a PTE change does not
require a flush based on the updated PTE value, while it is unaware that
TLB flushes are still pending.

This behavior affects KSM (which may result in memory corruption) and
MADV_FREE and MADV_DONTNEED (which may result in incorrect behavior).  A
proof-of-concept can easily produce the wrong behavior of MADV_DONTNEED.
Memory corruption in KSM is harder to produce in practice, but was
observed by hacking the kernel and adding a delay before flushing and
replacing the KSM page.

Finally, there is also one memory barrier missing, which may affect
architectures with weak memory model.

This patch (of 7):

Setting and clearing mm->tlb_flush_pending can be performed by multiple
threads, since mmap_sem may only be acquired for read in
task_numa_work().  If this happens, tlb_flush_pending might be cleared
while one of the threads still changes PTEs and batches TLB flushes.

This can lead to the same race between migration and
change_protection_range() that led to the introduction of
tlb_flush_pending.  The result of this race was data corruption, which
means that this patch also addresses a theoretically possible data
corruption.

An actual data corruption was not observed, yet the race was was
confirmed by adding assertion to check tlb_flush_pending is not set by
two threads, adding artificial latency in change_protection_range() and
using sysctl to reduce kernel.numa_balancing_scan_delay_ms.

Link: http://lkml.kernel.org/r/20170802000818.4760-2-namit@vmware.com
Fixes: 20841405940e ("mm: fix TLB flush race between migration, and
change_protection_range")
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 31 ++++++++++++++++++++++---------
 kernel/fork.c            |  2 +-
 mm/debug.c               |  2 +-
 mm/mprotect.c            |  4 ++--
 4 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f384bb62d8e..f58f76ee1dfa 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -493,7 +493,7 @@ struct mm_struct {
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
-	bool tlb_flush_pending;
+	atomic_t tlb_flush_pending;
 #endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
@@ -532,33 +532,46 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	return mm->tlb_flush_pending;
+	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
-	mm->tlb_flush_pending = true;
+	atomic_set(&mm->tlb_flush_pending, 0);
+}
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+	atomic_inc(&mm->tlb_flush_pending);
 
 	/*
-	 * Guarantee that the tlb_flush_pending store does not leak into the
+	 * Guarantee that the tlb_flush_pending increase does not leak into the
 	 * critical section updating the page tables
 	 */
 	smp_mb__before_spinlock();
 }
+
 /* Clearing is done after a TLB flush, which also provides a barrier. */
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 	barrier();
-	mm->tlb_flush_pending = false;
+	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
 	return false;
 }
-static inline void set_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 }
-static inline void clear_tlb_flush_pending(struct mm_struct *mm)
+
+static inline void inc_tlb_flush_pending(struct mm_struct *mm)
+{
+}
+
+static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
 }
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 17921b0390b4..e075b7780421 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -807,7 +807,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 	mmu_notifier_mm_init(mm);
-	clear_tlb_flush_pending(mm);
+	init_tlb_flush_pending(mm);
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS
 	mm->pmd_huge_pte = NULL;
 #endif
diff --git a/mm/debug.c b/mm/debug.c
index db1cd26d8752..d70103bb4731 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -159,7 +159,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
-		mm->tlb_flush_pending,
+		atomic_read(&mm->tlb_flush_pending),
 #endif
 		mm->def_flags, &mm->def_flags
 	);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4180ad8cc9c5..bd0f409922cb 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -244,7 +244,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	BUG_ON(addr >= end);
 	pgd = pgd_offset(mm, addr);
 	flush_cache_range(vma, addr, end);
-	set_tlb_flush_pending(mm);
+	inc_tlb_flush_pending(mm);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -256,7 +256,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 	/* Only flush the TLB if we actually modified any entries: */
 	if (pages)
 		flush_tlb_range(vma, start, end);
-	clear_tlb_flush_pending(mm);
+	dec_tlb_flush_pending(mm);
 
 	return pages;
 }
-- 
cgit 


From 0a2c40487f3e4215c6ab46e7f837036badfb542b Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Thu, 10 Aug 2017 15:23:59 -0700
Subject: mm: migrate: fix barriers around tlb_flush_pending

Reading tlb_flush_pending while the page-table lock is taken does not
require a barrier, since the lock/unlock already acts as a barrier.
Removing the barrier in mm_tlb_flush_pending() to address this issue.

However, migrate_misplaced_transhuge_page() calls mm_tlb_flush_pending()
while the page-table lock is already released, which may present a
problem on architectures with weak memory model (PPC).  To deal with
this case, a new parameter is added to mm_tlb_flush_pending() to
indicate if it is read without the page-table lock taken, and calling
smp_mb__after_unlock_lock() in this case.

Link: http://lkml.kernel.org/r/20170802000818.4760-3-namit@vmware.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f58f76ee1dfa..0e478ebd2706 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -526,12 +526,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
- * The barriers below prevent the compiler from re-ordering the instructions
- * around the memory barriers that are already present in the code.
+ * The barriers are used to ensure the order between tlb_flush_pending updates,
+ * which happen while the lock is not taken, and the PTE updates, which happen
+ * while the lock is taken, are serialized.
  */
 static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
@@ -554,7 +554,13 @@ static inline void inc_tlb_flush_pending(struct mm_struct *mm)
 /* Clearing is done after a TLB flush, which also provides a barrier. */
 static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 {
-	barrier();
+	/*
+	 * Guarantee that the tlb_flush_pending does not not leak into the
+	 * critical section, since we must order the PTE change and changes to
+	 * the pending TLB flush indication. We could have relied on TLB flush
+	 * as a memory barrier, but this behavior is not clearly documented.
+	 */
+	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
 #else
-- 
cgit 


From 56236a59556cfd3bae7bffb7e5f438b5ef0af880 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:05 -0700
Subject: mm: refactor TLB gathering API

This patch is a preparatory patch for solving race problems caused by
TLB batch.  For that, we will increase/decrease TLB flush pending count
of mm_struct whenever tlb_[gather|finish]_mmu is called.

Before making it simple, this patch separates architecture specific part
and rename it to arch_tlb_[gather|finish]_mmu and generic part just
calls it.

It shouldn't change any behavior.

Link: http://lkml.kernel.org/r/20170802000818.4760-5-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  6 ++++--
 arch/ia64/include/asm/tlb.h |  6 ++++--
 arch/s390/include/asm/tlb.h | 12 ++++++------
 arch/sh/include/asm/tlb.h   |  6 ++++--
 arch/um/include/asm/tlb.h   |  8 +++++---
 include/asm-generic/tlb.h   |  7 ++++---
 include/linux/mm_types.h    |  6 ++++++
 mm/memory.c                 | 28 +++++++++++++++++++++-------
 8 files changed, 54 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 3f2eb76243e3..7f5b2a2d3861 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -148,7 +148,8 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->fullmm = !(start | (end+1));
@@ -166,7 +167,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index fced197b9626..93cadc04ac62 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -168,7 +168,8 @@ static inline void __tlb_alloc_page(struct mmu_gather *tlb)
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
@@ -185,7 +186,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
  * collected.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			unsigned long start, unsigned long end)
 {
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 7317b3108a88..d574d0820dc8 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -47,10 +47,9 @@ struct mmu_table_batch {
 extern void tlb_table_flush(struct mmu_gather *tlb);
 extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
-static inline void tlb_gather_mmu(struct mmu_gather *tlb,
-				  struct mm_struct *mm,
-				  unsigned long start,
-				  unsigned long end)
+static inline void
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -76,8 +75,9 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-static inline void tlb_finish_mmu(struct mmu_gather *tlb,
-				  unsigned long start, unsigned long end)
+static inline void
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 }
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 46e0d635e36f..89786560dbd4 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,7 +36,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -47,7 +48,8 @@ tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start
 }
 
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	if (tlb->fullmm)
 		flush_tlb_mm(tlb->mm);
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 600a2e9bfee2..2a901eca7145 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,7 +45,8 @@ static inline void init_tlb_gather(struct mmu_gather *tlb)
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+		unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->start = start;
@@ -80,12 +81,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb_flush_mmu_free(tlb);
 }
 
-/* tlb_finish_mmu
+/* arch_tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
 static inline void
-tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	tlb_flush_mmu(tlb);
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8afa4335e5b2..8f71521e7a44 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,10 +112,11 @@ struct mmu_gather {
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
+void arch_tlb_gather_mmu(struct mmu_gather *tlb,
+	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
-							unsigned long end);
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+			 unsigned long start, unsigned long end);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e478ebd2706..c605f2a3a68e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -522,6 +522,12 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
 	return mm->cpu_vm_mask_var;
 }
 
+struct mmu_gather;
+extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end);
+extern void tlb_finish_mmu(struct mmu_gather *tlb,
+				unsigned long start, unsigned long end);
+
 #if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
diff --git a/mm/memory.c b/mm/memory.c
index f65beaad319b..34cba5113e06 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -215,12 +215,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
-/* tlb_gather_mmu
- *	Called to initialize an (on-stack) mmu_gather structure for page-table
- *	tear-down from @mm. The @fullmm argument is used when @mm is without
- *	users and we're going to destroy the full address space (exit/execve).
- */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
+void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
@@ -275,7 +271,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
  */
-void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
 {
 	struct mmu_gather_batch *batch, *next;
 
@@ -398,6 +395,23 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
+/* tlb_gather_mmu
+ *	Called to initialize an (on-stack) mmu_gather structure for page-table
+ *	tear-down from @mm. The @fullmm argument is used when @mm is without
+ *	users and we're going to destroy the full address space (exit/execve).
+ */
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
+{
+	arch_tlb_gather_mmu(tlb, mm, start, end);
+}
+
+void tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
+{
+	arch_tlb_finish_mmu(tlb, start, end);
+}
+
 /*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
-- 
cgit 


From 0a2dd266dd6b7a31503b5bbe63af05961a6b446d Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:09 -0700
Subject: mm: make tlb_flush_pending global

Currently, tlb_flush_pending is used only for CONFIG_[NUMA_BALANCING|
COMPACTION] but upcoming patches to solve subtle TLB flush batching
problem will use it regardless of compaction/NUMA so this patch doesn't
remove the dependency.

[akpm@linux-foundation.org: remove more ifdefs from world's ugliest printk statement]
Link: http://lkml.kernel.org/r/20170802000818.4760-6-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 21 ---------------------
 mm/debug.c               |  4 ----
 2 files changed, 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index c605f2a3a68e..892a7b0196fd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -487,14 +487,12 @@ struct mm_struct {
 	/* numa_scan_seq prevents two threads setting pte_numa */
 	int numa_scan_seq;
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 	/*
 	 * An operation with batched TLB flushing is going on. Anything that
 	 * can move process memory needs to flush the TLB when moving a
 	 * PROT_NONE or PROT_NUMA mapped page.
 	 */
 	atomic_t tlb_flush_pending;
-#endif
 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	/* See flush_tlb_batched_pending() */
 	bool tlb_flush_batched;
@@ -528,7 +526,6 @@ extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 extern void tlb_finish_mmu(struct mmu_gather *tlb,
 				unsigned long start, unsigned long end);
 
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 /*
  * Memory barriers to keep this state in sync are graciously provided by
  * the page table locks, outside of which no page table modifications happen.
@@ -569,24 +566,6 @@ static inline void dec_tlb_flush_pending(struct mm_struct *mm)
 	smp_mb__before_atomic();
 	atomic_dec(&mm->tlb_flush_pending);
 }
-#else
-static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
-{
-	return false;
-}
-
-static inline void init_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void inc_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-
-static inline void dec_tlb_flush_pending(struct mm_struct *mm)
-{
-}
-#endif
 
 struct vm_fault;
 
diff --git a/mm/debug.c b/mm/debug.c
index d70103bb4731..5715448ab0b5 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -124,9 +124,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		"tlb_flush_pending %d\n"
-#endif
 		"def_flags: %#lx(%pGv)\n",
 
 		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
@@ -158,9 +156,7 @@ void dump_mm(const struct mm_struct *mm)
 #ifdef CONFIG_NUMA_BALANCING
 		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
 #endif
-#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
 		atomic_read(&mm->tlb_flush_pending),
-#endif
 		mm->def_flags, &mm->def_flags
 	);
 }
-- 
cgit 


From 99baac21e4585f4258f919502c6e23f1e5edc98c Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 10 Aug 2017 15:24:12 -0700
Subject: mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem

Nadav reported parallel MADV_DONTNEED on same range has a stale TLB
problem and Mel fixed it[1] and found same problem on MADV_FREE[2].

Quote from Mel Gorman:
 "The race in question is CPU 0 running madv_free and updating some PTEs
  while CPU 1 is also running madv_free and looking at the same PTEs.
  CPU 1 may have writable TLB entries for a page but fail the pte_dirty
  check (because CPU 0 has updated it already) and potentially fail to
  flush.

  Hence, when madv_free on CPU 1 returns, there are still potentially
  writable TLB entries and the underlying PTE is still present so that a
  subsequent write does not necessarily propagate the dirty bit to the
  underlying PTE any more. Reclaim at some unknown time at the future
  may then see that the PTE is still clean and discard the page even
  though a write has happened in the meantime. I think this is possible
  but I could have missed some protection in madv_free that prevents it
  happening."

This patch aims for solving both problems all at once and is ready for
other problem with KSM, MADV_FREE and soft-dirty story[3].

TLB batch API(tlb_[gather|finish]_mmu] uses [inc|dec]_tlb_flush_pending
and mmu_tlb_flush_pending so that when tlb_finish_mmu is called, we can
catch there are parallel threads going on.  In that case, forcefully,
flush TLB to prevent for user to access memory via stale TLB entry
although it fail to gather page table entry.

I confirmed this patch works with [4] test program Nadav gave so this
patch supersedes "mm: Always flush VMA ranges affected by zap_page_range
v2" in current mmotm.

NOTE:

This patch modifies arch-specific TLB gathering interface(x86, ia64,
s390, sh, um).  It seems most of architecture are straightforward but
s390 need to be careful because tlb_flush_mmu works only if
mm->context.flush_mm is set to non-zero which happens only a pte entry
really is cleared by ptep_get_and_clear and friends.  However, this
problem never changes the pte entries but need to flush to prevent
memory access from stale tlb.

[1] http://lkml.kernel.org/r/20170725101230.5v7gvnjmcnkzzql3@techsingularity.net
[2] http://lkml.kernel.org/r/20170725100722.2dxnmgypmwnrfawp@suse.de
[3] http://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com
[4] https://patchwork.kernel.org/patch/9861621/

[minchan@kernel.org: decrease tlb flush pending count in tlb_finish_mmu]
  Link: http://lkml.kernel.org/r/20170808080821.GA31730@bbox
Link: http://lkml.kernel.org/r/20170802000818.4760-7-namit@vmware.com
Signed-off-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reported-by: Nadav Amit <namit@vmware.com>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/include/asm/tlb.h  |  7 ++++++-
 arch/ia64/include/asm/tlb.h |  4 +++-
 arch/s390/include/asm/tlb.h |  7 ++++++-
 arch/sh/include/asm/tlb.h   |  4 ++--
 arch/um/include/asm/tlb.h   |  7 ++++++-
 include/asm-generic/tlb.h   |  2 +-
 include/linux/mm_types.h    |  8 ++++++++
 mm/memory.c                 | 18 ++++++++++++++++--
 8 files changed, 48 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 7f5b2a2d3861..d5562f9ce600 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -168,8 +168,13 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->range_start = start;
+		tlb->range_end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 93cadc04ac62..cbe5ac3699bf 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -187,8 +187,10 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			unsigned long start, unsigned long end)
+			unsigned long start, unsigned long end, bool force)
 {
+	if (force)
+		tlb->need_flush = 1;
 	/*
 	 * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
 	 * tlb->end_addr.
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index d574d0820dc8..2eb8ff0d6fca 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -77,8 +77,13 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb)
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+	}
+
 	tlb_flush_mmu(tlb);
 }
 
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 89786560dbd4..51a8bc967e75 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -49,9 +49,9 @@ arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
-	if (tlb->fullmm)
+	if (tlb->fullmm || force)
 		flush_tlb_mm(tlb->mm);
 
 	/* keep the page table cache within bounds */
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 2a901eca7145..344d95619d03 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -87,8 +87,13 @@ tlb_flush_mmu(struct mmu_gather *tlb)
  */
 static inline void
 arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
+	if (force) {
+		tlb->start = start;
+		tlb->end = end;
+		tlb->need_flush = 1;
+	}
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 8f71521e7a44..faddde44de8c 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -116,7 +116,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 	struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-			 unsigned long start, unsigned long end);
+			 unsigned long start, unsigned long end, bool force);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 892a7b0196fd..3cadee0a3508 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -538,6 +538,14 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
 	return atomic_read(&mm->tlb_flush_pending) > 0;
 }
 
+/*
+ * Returns true if there are two above TLB batching threads in parallel.
+ */
+static inline bool mm_tlb_flush_nested(struct mm_struct *mm)
+{
+	return atomic_read(&mm->tlb_flush_pending) > 1;
+}
+
 static inline void init_tlb_flush_pending(struct mm_struct *mm)
 {
 	atomic_set(&mm->tlb_flush_pending, 0);
diff --git a/mm/memory.c b/mm/memory.c
index 34cba5113e06..e158f7ac6730 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -272,10 +272,13 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
  *	that were required.
  */
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
+		unsigned long start, unsigned long end, bool force)
 {
 	struct mmu_gather_batch *batch, *next;
 
+	if (force)
+		__tlb_adjust_range(tlb, start, end - start);
+
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -404,12 +407,23 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 			unsigned long start, unsigned long end)
 {
 	arch_tlb_gather_mmu(tlb, mm, start, end);
+	inc_tlb_flush_pending(tlb->mm);
 }
 
 void tlb_finish_mmu(struct mmu_gather *tlb,
 		unsigned long start, unsigned long end)
 {
-	arch_tlb_finish_mmu(tlb, start, end);
+	/*
+	 * If there are parallel threads are doing PTE changes on same range
+	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
+	 * flush by batching, a thread has stable TLB entry can fail to flush
+	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
+	 * forcefully if we detect parallel PTE batching threads.
+	 */
+	bool force = mm_tlb_flush_nested(tlb->mm);
+
+	arch_tlb_finish_mmu(tlb, start, end, force);
+	dec_tlb_flush_pending(tlb->mm);
 }
 
 /*
-- 
cgit 


From d20fa5a06d7bddb7823d14255982148fefb72950 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 11 Aug 2017 16:49:04 +0300
Subject: drm: omapdrm: hdmi: Pass HDMI core version as integer to HDMI audio

The HDMI audio driver only needs to know which generation of HDMI
transmitter it deals with, not the detailed SoC model. Pass the version
number as an integer to prepare for removal of the OMAP SoC version from
the omapdrm driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 drivers/gpu/drm/omapdrm/dss/hdmi4.c          | 2 +-
 drivers/gpu/drm/omapdrm/dss/hdmi5.c          | 2 +-
 drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c | 2 +-
 drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c | 2 +-
 include/sound/omap-hdmi-audio.h              | 2 +-
 sound/soc/omap/omap-hdmi-audio.c             | 9 +++------
 6 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index 438f47d8ab69..4c131d710282 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -667,7 +667,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 4,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index 8ab76b50662a..a6adddeee5bb 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -694,7 +694,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 5,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c
index 156a254705ea..ec78d61bc551 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi4.c
@@ -664,7 +664,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 4,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c
index 4da36bcab977..2e2fcc3d6d4f 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5.c
@@ -695,7 +695,7 @@ static int hdmi_audio_register(struct device *dev)
 {
 	struct omap_hdmi_audio_pdata pdata = {
 		.dev = dev,
-		.dss_version = omapdss_get_version(),
+		.version = 5,
 		.audio_dma_addr = hdmi_wp_get_audio_dma_addr(&hdmi.wp),
 		.ops = &hdmi_audio_ops,
 	};
diff --git a/include/sound/omap-hdmi-audio.h b/include/sound/omap-hdmi-audio.h
index 1df2ff61a4dd..0e495ed8872e 100644
--- a/include/sound/omap-hdmi-audio.h
+++ b/include/sound/omap-hdmi-audio.h
@@ -39,7 +39,7 @@ struct omap_hdmi_audio_ops {
 /* HDMI audio initalization data */
 struct omap_hdmi_audio_pdata {
 	struct device *dev;
-	enum omapdss_version dss_version;
+	unsigned int version;
 	phys_addr_t audio_dma_addr;
 
 	const struct omap_hdmi_audio_ops *ops;
diff --git a/sound/soc/omap/omap-hdmi-audio.c b/sound/soc/omap/omap-hdmi-audio.c
index 888133f9e65d..3e9cc4842a1d 100644
--- a/sound/soc/omap/omap-hdmi-audio.c
+++ b/sound/soc/omap/omap-hdmi-audio.c
@@ -337,14 +337,11 @@ static int omap_hdmi_audio_probe(struct platform_device *pdev)
 	ad->dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
 	mutex_init(&ad->current_stream_lock);
 
-	switch (ha->dss_version) {
-	case OMAPDSS_VER_OMAP4430_ES1:
-	case OMAPDSS_VER_OMAP4430_ES2:
-	case OMAPDSS_VER_OMAP4:
+	switch (ha->version) {
+	case 4:
 		dai_drv = &omap4_hdmi_dai;
 		break;
-	case OMAPDSS_VER_OMAP5:
-	case OMAPDSS_VER_DRA7xx:
+	case 5:
 		dai_drv = &omap5_hdmi_dai;
 		break;
 	default:
-- 
cgit 


From 739a6d5d640a9811beef6a828253ee184dd431c5 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Fri, 11 Aug 2017 16:49:12 +0300
Subject: drm: omapdrm: Remove omapdrm platform data

The omapdrm platform data are not used anymore, remove them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/platform_data/omap_drm.h | 53 ----------------------------------
 1 file changed, 53 deletions(-)
 delete mode 100644 include/linux/platform_data/omap_drm.h

(limited to 'include')

diff --git a/include/linux/platform_data/omap_drm.h b/include/linux/platform_data/omap_drm.h
deleted file mode 100644
index f4e4a237ebd2..000000000000
--- a/include/linux/platform_data/omap_drm.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * DRM/KMS platform data for TI OMAP platforms
- *
- * Copyright (C) 2012 Texas Instruments
- * Author: Rob Clark <rob.clark@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __PLATFORM_DATA_OMAP_DRM_H__
-#define __PLATFORM_DATA_OMAP_DRM_H__
-
-/*
- * Optional platform data to configure the default configuration of which
- * pipes/overlays/CRTCs are used.. if this is not provided, then instead the
- * first CONFIG_DRM_OMAP_NUM_CRTCS are used, and they are each connected to
- * one manager, with priority given to managers that are connected to
- * detected devices.  Remaining overlays are used as video planes.  This
- * should be a good default behavior for most cases, but yet there still
- * might be times when you wish to do something different.
- */
-struct omap_kms_platform_data {
-	/* overlays to use as CRTCs: */
-	int ovl_cnt;
-	const int *ovl_ids;
-
-	/* overlays to use as video planes: */
-	int pln_cnt;
-	const int *pln_ids;
-
-	int mgr_cnt;
-	const int *mgr_ids;
-
-	int dev_cnt;
-	const char **dev_names;
-};
-
-struct omap_drm_platform_data {
-	uint32_t omaprev;
-	struct omap_kms_platform_data *kms_pdata;
-};
-
-#endif /* __PLATFORM_DATA_OMAP_DRM_H__ */
-- 
cgit