From 19d67694745c5c9ed085d0d8332fa02d835a07d0 Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Fri, 2 Jul 2021 17:19:27 +0200 Subject: Documentation: seccomp: Fix typo in user notification The close on exec flag is O_CLOEXEC, not O_EXEC. This patch just fixes the typo. Suggested-by: Christian Brauner Signed-off-by: Rodrigo Campos Acked-by: Christian Brauner Signed-off-by: Kees Cook Fixes: 0ae71c7720e3 ("seccomp: Support atomic "addfd + send reply"") Link: https://lore.kernel.org/r/20210702151927.263402-1-rodrigo@kinvolk.io --- Documentation/userspace-api/seccomp_filter.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index d61219889e49..539e9d4a4860 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -263,7 +263,7 @@ Userspace can also add file descriptors to the notifying process via ``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of ``struct seccomp_notif_addfd`` should be the same ``id`` as in ``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags -like O_EXEC on the file descriptor in the notifying process. If the supervisor +like O_CLOEXEC on the file descriptor in the notifying process. If the supervisor wants to inject the file descriptor with a specific number, the ``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to the specific number to use. If that file descriptor is already open in the -- cgit From e9faf53c5a5d01f6f2a09ae28ec63a3bbd6f64fd Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Mon, 5 Jul 2021 21:13:20 +0800 Subject: ieee802154: hwsim: fix GPF in hwsim_set_edge_lqi Both MAC802154_HWSIM_ATTR_RADIO_ID and MAC802154_HWSIM_ATTR_RADIO_EDGE, MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID and MAC802154_HWSIM_EDGE_ATTR_LQI must be present to fix GPF. Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb") Signed-off-by: Dongliang Mu Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20210705131321.217111-1-mudongliangabcd@gmail.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index ebc976b7fcc2..cae52bfb871e 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -528,14 +528,14 @@ static int hwsim_set_edge_lqi(struct sk_buff *msg, struct genl_info *info) u32 v0, v1; u8 lqi; - if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] && + if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; if (nla_parse_nested_deprecated(edge_attrs, MAC802154_HWSIM_EDGE_ATTR_MAX, info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE], hwsim_edge_policy, NULL)) return -EINVAL; - if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] && + if (!edge_attrs[MAC802154_HWSIM_EDGE_ATTR_ENDPOINT_ID] || !edge_attrs[MAC802154_HWSIM_EDGE_ATTR_LQI]) return -EINVAL; -- cgit From 2e29be2e491595407087ab36a5e5a159be693f7b Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 25 May 2021 10:24:26 -0700 Subject: fs/fuse: Remove unneeded kaddr parameter fuse_dax_mem_range_init() does not need the address or the pfn of the memory requested in dax_direct_access(). It is only calling direct access to get the number of pages. Remove the unused variables and stop requesting the kaddr and pfn from dax_direct_access(). Reviewed-by: Dan Williams Signed-off-by: Ira Weiny Reviewed-by: Vivek Goyal Link: https://lore.kernel.org/r/20210525172428.3634316-2-ira.weiny@intel.com Signed-off-by: Dan Williams --- fs/fuse/dax.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index ff99ab2a3c43..34f8a5635c7f 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1234,8 +1234,6 @@ void fuse_dax_conn_free(struct fuse_conn *fc) static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; - void *kaddr; - pfn_t pfn; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; @@ -1247,8 +1245,8 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); - nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, - &pfn); + nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL, + NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); -- cgit From 44788591c3cfb81d9315b8ee5c2076e51bfe8a39 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 25 May 2021 10:24:27 -0700 Subject: fs/dax: Clarify nr_pages to dax_direct_access() dax_direct_access() takes a number of pages. PHYS_PFN(PAGE_SIZE) is a very round about way to specify '1'. Change the nr_pages parameter to the explicit value of '1'. Reviewed-by: Dan Williams Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20210525172428.3634316-3-ira.weiny@intel.com Signed-off-by: Dan Williams --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 62352cbcf0f4..8ce3a7e147a8 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -721,7 +721,7 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(PAGE_SIZE), &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); if (rc < 0) { dax_read_unlock(id); return rc; -- cgit From b05d4c576b697b9f462b9c532c997171d5c3b067 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Tue, 25 May 2021 10:24:28 -0700 Subject: dax: Ensure errno is returned from dax_direct_access If the caller specifies a negative nr_pages that is an invalid parameter. Return -EINVAL to ensure callers get an errno if they want to check it. Reviewed-by: Dan Williams Signed-off-by: Ira Weiny Link: https://lore.kernel.org/r/20210525172428.3634316-4-ira.weiny@intel.com Signed-off-by: Dan Williams --- drivers/dax/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 5fa6ae9dbc8b..44736cbd446e 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -313,7 +313,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, return -ENXIO; if (nr_pages < 0) - return nr_pages; + return -EINVAL; avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); -- cgit From 889d0e7dc68314a273627d89cbb60c09e1cc1c25 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Wed, 7 Jul 2021 23:56:32 +0800 Subject: ieee802154: hwsim: fix GPF in hwsim_new_edge_nl Both MAC802154_HWSIM_ATTR_RADIO_ID and MAC802154_HWSIM_ATTR_RADIO_EDGE must be present to fix GPF. Fixes: f25da51fdc38 ("ieee802154: hwsim: add replacement for fakelb") Signed-off-by: Dongliang Mu Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20210707155633.1486603-1-mudongliangabcd@gmail.com Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index cae52bfb871e..8caa61ec718f 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -418,7 +418,7 @@ static int hwsim_new_edge_nl(struct sk_buff *msg, struct genl_info *info) struct hwsim_edge *e; u32 v0, v1; - if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] && + if (!info->attrs[MAC802154_HWSIM_ATTR_RADIO_ID] || !info->attrs[MAC802154_HWSIM_ATTR_RADIO_EDGE]) return -EINVAL; -- cgit From 4377d9ab1f162e58e0e5ae89c9a5fd7b4d8a6bdb Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Fri, 9 Jul 2021 09:17:27 +0200 Subject: iio: accel: fxls8962af: fix potential use of uninitialized symbol Fix this warning from kernel test robot: smatch warnings: drivers/iio/accel/fxls8962af-core.c:640 fxls8962af_i2c_raw_read_errata3() error: uninitialized symbol 'ret'. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Sean Nyekjaer Fixes: af959b7b96b8 ("iio: accel: fxls8962af: fix errata bug E3 - I2C burst reads") Link: https://lore.kernel.org/r/20210709071727.2453536-1-sean@geanix.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/fxls8962af-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c index 078d87865fde..0019f1ea7df2 100644 --- a/drivers/iio/accel/fxls8962af-core.c +++ b/drivers/iio/accel/fxls8962af-core.c @@ -637,7 +637,7 @@ static int fxls8962af_i2c_raw_read_errata3(struct fxls8962af_data *data, return ret; } - return ret; + return 0; } static int fxls8962af_fifo_transfer(struct fxls8962af_data *data, -- cgit From 2b6d2833cd1d8a43a837a45da65860ef086443dc Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Tue, 15 Jun 2021 17:39:05 +0800 Subject: mtd: mtd_blkdevs: Initialize rq.limits.discard_granularity Since commit b35fd7422c2f8("block: check queue's limits.discard_granularity in __blkdev_issue_discard()") checks rq.limits.discard_granularity in __blkdev_issue_discard(), we may get following warnings on formatted ftl: WARNING: CPU: 2 PID: 7313 at block/blk-lib.c:51 __blkdev_issue_discard+0x2a7/0x390 Reproducer: 1. ftl_format /dev/mtd0 2. modprobe ftl 3. mkfs.vfat /dev/ftla 4. mount -odiscard /dev/ftla temp 5. dd if=/dev/zero of=temp/tst bs=1M count=10 oflag=direct 6. dd if=/dev/zero of=temp/tst bs=1M count=10 oflag=direct Fix it by initializing rq.limits.discard_granularity if device supports discard operation. Signed-off-by: Zhihao Cheng Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210615093905.3473709-1-chengzhihao1@huawei.com --- drivers/mtd/mtd_blkdevs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 6ce4bc57f919..8f97a4fe6e93 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -419,6 +419,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new) if (tr->discard) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, new->rq); blk_queue_max_discard_sectors(new->rq, UINT_MAX); + new->rq->limits.discard_granularity = tr->blksize; } gd->queue = new->rq; -- cgit From 14f97f0b8e2b9950c028d0cb7311ffe26a3cc1c0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 17 Jun 2021 16:37:25 +0300 Subject: mtd: rawnand: Add a check in of_get_nand_secure_regions() Check for whether of_property_count_elems_of_size() returns a negative error code. Fixes: 13b89768275d ("mtd: rawnand: Add support for secure regions in NAND memory") Signed-off-by: Dan Carpenter Reviewed-by: Manivannan Sadhasivam Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/YMtQFXE0F1w7mUh+@mwanda --- drivers/mtd/nand/raw/nand_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 57a583149cc0..cbba46432e39 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5231,8 +5231,8 @@ static int of_get_nand_secure_regions(struct nand_chip *chip) int nr_elem, i, j; nr_elem = of_property_count_elems_of_size(dn, "secure-regions", sizeof(u64)); - if (!nr_elem) - return 0; + if (nr_elem <= 0) + return nr_elem; chip->nr_secure_regions = nr_elem / 2; chip->secure_regions = kcalloc(chip->nr_secure_regions, sizeof(*chip->secure_regions), -- cgit From 962bf783ef65d15b0f8ca9c33342cf3b20bf0d2e Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Fri, 18 Jun 2021 00:09:04 +0800 Subject: mtd: break circular locks in register_mtd_blktrans Syzbot reported a circular locking dependency: https://syzkaller.appspot.com/bug?id=7bd106c28e846d1023d4ca915718b1a0905444cb This happens because of the following lock dependencies: 1. loop_ctl_mutex -> bdev->bd_mutex (when loop_control_ioctl calls loop_remove, which then calls del_gendisk; this also happens in loop_exit which eventually calls loop_remove) 2. bdev->bd_mutex -> mtd_table_mutex (when blkdev_get_by_dev calls __blkdev_get, which then calls blktrans_open) 3. mtd_table_mutex -> major_names_lock (when register_mtd_blktrans calls __register_blkdev) 4. major_names_lock -> loop_ctl_mutex (when blk_request_module calls loop_probe) Hence there's an overall dependency of: loop_ctl_mutex ----------> bdev->bd_mutex ^ | | | | v major_names_lock <--------- mtd_table_mutex We can break this circular dependency by holding mtd_table_mutex only for the required critical section in register_mtd_blktrans. This avoids the mtd_table_mutex -> major_names_lock dependency. Reported-and-tested-by: syzbot+6a8a0d93c91e8fbf2e80@syzkaller.appspotmail.com Co-developed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210617160904.570111-1-desmondcheongzx@gmail.com --- drivers/mtd/mtd_blkdevs.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 8f97a4fe6e93..31b208fb225e 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -526,14 +526,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) if (!blktrans_notifier.list.next) register_mtd_user(&blktrans_notifier); - - mutex_lock(&mtd_table_mutex); - ret = register_blkdev(tr->major, tr->name); if (ret < 0) { printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n", tr->name, tr->major, ret); - mutex_unlock(&mtd_table_mutex); return ret; } @@ -543,12 +539,12 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkshift = ffs(tr->blksize) - 1; INIT_LIST_HEAD(&tr->devs); - list_add(&tr->list, &blktrans_majors); + mutex_lock(&mtd_table_mutex); + list_add(&tr->list, &blktrans_majors); mtd_for_each_device(mtd) if (mtd->type != MTD_ABSENT) tr->add_mtd(tr, mtd); - mutex_unlock(&mtd_table_mutex); return 0; } -- cgit From e83862ee1b9b1668826683f432b041875ec0c819 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 18 Jun 2021 16:42:07 +0300 Subject: mtd: mchp48l640: silence some uninitialized variable warnings Smatch complains that zero length read/writes will lead to an uninitalized return value. I don't know if that's possible, but it's nicer to return a zero literal anyway so let's do that. Fixes: 88d125026753 ("mtd: devices: add support for microchip 48l640 EERAM") Signed-off-by: Dan Carpenter Reviewed-by: Fabio Estevam Reviewed-by: Heiko Schocher Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/YMyir961W28TX5dT@mwanda --- drivers/mtd/devices/mchp48l640.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index efc2003bd13a..ad66b5aaf4e9 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -229,7 +229,7 @@ static int mchp48l640_write(struct mtd_info *mtd, loff_t to, size_t len, woff += ws; } - return ret; + return 0; } static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len, @@ -286,7 +286,7 @@ static int mchp48l640_read(struct mtd_info *mtd, loff_t from, size_t len, woff += ws; } - return ret; + return 0; }; static const struct mchp48_caps mchp48l640_caps = { -- cgit From 45bb1faa29effbd4ca4d581b32373f2eda309b95 Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Wed, 7 Jul 2021 15:53:59 +0200 Subject: mtd: core: handle flashes without OTP gracefully There are flash drivers which registers the OTP callbacks although the flash doesn't support OTP regions and return -ENODATA for these callbacks if there is no OTP. If this happens, the probe of the whole flash will fail. Fix it by handling the ENODATA return code and skip the OTP region nvmem setup. Fixes: 4b361cfa8624 ("mtd: core: add OTP nvmem provider support") Reported-by: Guenter Roeck Signed-off-by: Michael Walle Tested-by: Guenter Roeck Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210707135359.32398-1-michael@walle.cc --- drivers/mtd/mtdcore.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index b5ccd3037788..c8fd7f758938 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -806,7 +806,9 @@ static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user) err: kfree(info); - return ret; + + /* ENODATA means there is no OTP region. */ + return ret == -ENODATA ? 0 : ret; } static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, -- cgit From 2394e628738933aa014093d93093030f6232946d Mon Sep 17 00:00:00 2001 From: Andreas Persson Date: Mon, 12 Jul 2021 09:54:52 +0200 Subject: mtd: cfi_cmdset_0002: fix crash when erasing/writing AMD cards Erasing an AMD linear flash card (AM29F016D) crashes after the first sector has been erased. Likewise, writing to it crashes after two bytes have been written. The reason is a missing check for a null pointer - the cmdset_priv field is not set for this type of card. Fixes: 4844ef80305d ("mtd: cfi_cmdset_0002: Add support for polling status register") Signed-off-by: Andreas Persson Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/DB6P189MB05830B3530B8087476C5CFE4C1159@DB6P189MB0583.EURP189.PROD.OUTLOOK.COM --- drivers/mtd/chips/cfi_cmdset_0002.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 3097e93787f7..a761134fd3be 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -119,7 +119,7 @@ static int cfi_use_status_reg(struct cfi_private *cfi) struct cfi_pri_amdstd *extp = cfi->cmdset_priv; u8 poll_mask = CFI_POLL_STATUS_REG | CFI_POLL_DQ; - return extp->MinorVersion >= '5' && + return extp && extp->MinorVersion >= '5' && (extp->SoftwareFeatures & poll_mask) == CFI_POLL_STATUS_REG; } -- cgit From 5261cdf457ce3635bf18d393a3c1991dcfaf9d02 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Wed, 30 Jun 2021 12:32:52 +0200 Subject: crypto: drbg - select SHA512 With the swtich to use HMAC(SHA-512) as the default DRBG type, the configuration must now also select SHA-512. Fixes: 9b7b94683a9b "crypto: DRBG - switch to HMAC SHA512 DRBG as default DRBG" Reported-by: Sachin Sant Signed-off-by: Stephan Mueller Tested-by: Sachin Sant Signed-off-by: Herbert Xu --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index ca3b02dcbbfa..64b772c5d1c9 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1768,7 +1768,7 @@ config CRYPTO_DRBG_HMAC bool default y select CRYPTO_HMAC - select CRYPTO_SHA256 + select CRYPTO_SHA512 config CRYPTO_DRBG_HASH bool "Enable Hash DRBG" -- cgit From 9898cb24e454602beb6e17bacf9f97b26c85c955 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 9 Jul 2021 12:11:10 +0200 Subject: iio: adc: ti-ads7950: Ensure CS is deasserted after reading channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ADS7950 requires that CS is deasserted after each SPI word. Before commit e2540da86ef8 ("iio: adc: ti-ads7950: use SPI_CS_WORD to reduce CPU usage") the driver used a message with one spi transfer per channel where each but the last one had .cs_change set to enforce a CS toggle. This was wrongly translated into a message with a single transfer and .cs_change set which results in a CS toggle after each word but the last which corrupts the first adc conversion of all readouts after the first readout. Fixes: e2540da86ef8 ("iio: adc: ti-ads7950: use SPI_CS_WORD to reduce CPU usage") Signed-off-by: Uwe Kleine-König Reviewed-by: David Lechner Tested-by: David Lechner Cc: Link: https://lore.kernel.org/r/20210709101110.1814294-1-u.kleine-koenig@pengutronix.de Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ti-ads7950.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index 2383eacada87..a2b83f0bd526 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -568,7 +568,6 @@ static int ti_ads7950_probe(struct spi_device *spi) st->ring_xfer.tx_buf = &st->tx_buf[0]; st->ring_xfer.rx_buf = &st->rx_buf[0]; /* len will be set later */ - st->ring_xfer.cs_change = true; spi_message_add_tail(&st->ring_xfer, &st->ring_msg); -- cgit From 7e77ef8b8d600cf8448a2bbd32f682c28884551f Mon Sep 17 00:00:00 2001 From: Antti Keränen Date: Thu, 8 Jul 2021 12:54:29 +0300 Subject: iio: adis: set GPIO reset pin direction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set reset pin direction to output as the reset pin needs to be an active low output pin. Co-developed-by: Hannu Hartikainen Signed-off-by: Hannu Hartikainen Signed-off-by: Antti Keränen Reviewed-by: Nuno Sá Fixes: ecb010d44108 ("iio: imu: adis: Refactor adis_initial_startup") Link: https://lore.kernel.org/r/20210708095425.13295-1-detegr@rbx.email Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index a5b421f42287..b9a06ca29bee 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -411,12 +411,11 @@ int __adis_initial_startup(struct adis *adis) int ret; /* check if the device has rst pin low */ - gpio = devm_gpiod_get_optional(&adis->spi->dev, "reset", GPIOD_ASIS); + gpio = devm_gpiod_get_optional(&adis->spi->dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(gpio)) return PTR_ERR(gpio); if (gpio) { - gpiod_set_value_cansleep(gpio, 1); msleep(10); /* bring device out of reset */ gpiod_set_value_cansleep(gpio, 0); -- cgit From 9a253bb42f190efd1a1c156939ad7298b3529dca Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 6 Jul 2021 09:35:23 +0100 Subject: arm64: dts: qcom: c630: fix correct powerdown pin for WSA881x WSA881x powerdown pin is connected to GPIO1, GPIO2 not GPIO2 and GPIO3, so correct this. This was working so far due to a shift bug in gpio driver, however once that is fixed this will stop working, so fix this! For some reason we forgot to add this dts change in last merge cycle so currently audio is broken in 5.13 as the gpio driver fix already landed in 5.13. Reported-by: Shawn Guo Fixes: 45021d35fcb2 ("arm64: dts: qcom: c630: Enable audio support") Signed-off-by: Srinivas Kandagatla Tested-by: Shawn Guo Link: https://lore.kernel.org/r/20210706083523.10601-1-srinivas.kandagatla@linaro.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index c2a709a384e9..d7591a4621a2 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -700,7 +700,7 @@ left_spkr: wsa8810-left{ compatible = "sdw10217211000"; reg = <0 3>; - powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>; + powerdown-gpios = <&wcdgpio 1 GPIO_ACTIVE_HIGH>; #thermal-sensor-cells = <0>; sound-name-prefix = "SpkrLeft"; #sound-dai-cells = <0>; @@ -708,7 +708,7 @@ right_spkr: wsa8810-right{ compatible = "sdw10217211000"; - powerdown-gpios = <&wcdgpio 3 GPIO_ACTIVE_HIGH>; + powerdown-gpios = <&wcdgpio 2 GPIO_ACTIVE_HIGH>; reg = <0 4>; #thermal-sensor-cells = <0>; sound-name-prefix = "SpkrRight"; -- cgit From 9d1fc2e4f5a94a492c7dd1ca577c66fdb7571c84 Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Tue, 13 Jul 2021 20:57:33 +0200 Subject: arm64: dts: qcom: msm8992-bullhead: Remove PSCI Bullhead firmware obviously doesn't support PSCI as it fails to boot with this definition. Fixes: 329e16d5f8fc ("arm64: dts: qcom: msm8992: Add PSCI support.") Signed-off-by: Petr Vorel Link: https://lore.kernel.org/r/20210713185734.380-2-petr.vorel@gmail.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts index 23cdcc9f7c72..5c6e17f11ee9 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2015, LGE Inc. All rights reserved. * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Petr Vorel */ /dts-v1/; @@ -17,6 +18,9 @@ qcom,board-id = <0xb64 0>; qcom,pmic-id = <0x10009 0x1000A 0x0 0x0>; + /* Bullhead firmware doesn't support PSCI */ + /delete-node/ psci; + aliases { serial0 = &blsp1_uart2; }; -- cgit From 3cb6a271f4b04f11270111638c24fa5c0b846dec Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Tue, 13 Jul 2021 20:57:34 +0200 Subject: arm64: dts: qcom: msm8992-bullhead: Fix cont_splash_mem mapping cont_splash_mem has different memory mapping than generic from msm8994.dtsi: [ 0.000000] cma: Found cont_splash_mem@0, memory base 0x0000000003400000, size 12 MiB, limit 0xffffffffffffffff [ 0.000000] cma: CMA: reserved 12 MiB at 0x0000000003400000 for cont_splash_mem This fixes boot. Fixes: 976d321f32dc ("arm64: dts: qcom: msm8992: Make the DT an overlay on top of 8994") Signed-off-by: Petr Vorel Link: https://lore.kernel.org/r/20210713185734.380-3-petr.vorel@gmail.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts index 5c6e17f11ee9..1ccca83292ac 100644 --- a/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8992-bullhead-rev-101.dts @@ -10,6 +10,9 @@ #include "pm8994.dtsi" #include "pmi8994.dtsi" +/* cont_splash_mem has different memory mapping */ +/delete-node/ &cont_splash_mem; + / { model = "LG Nexus 5X"; compatible = "lg,bullhead", "qcom,msm8992"; @@ -42,6 +45,11 @@ ftrace-size = <0x10000>; pmsg-size = <0x20000>; }; + + cont_splash_mem: memory@3400000 { + reg = <0 0x03400000 0 0x1200000>; + no-map; + }; }; }; -- cgit From 4152433c397697acc4b02c4a10d17d5859c2730d Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 20 Jul 2021 21:14:05 +1000 Subject: arm64: efi: kaslr: Fix occasional random alloc (and boot) failure The EFI stub random allocator used for kaslr on arm64 has a subtle bug. In function get_entry_num_slots() which counts the number of possible allocation "slots" for the image in a given chunk of free EFI memory, "last_slot" can become negative if the chunk is smaller than the requested allocation size. The test "if (first_slot > last_slot)" doesn't catch it because both first_slot and last_slot are unsigned. I chose not to make them signed to avoid problems if this is ever used on architectures where there are meaningful addresses with the top bit set. Instead, fix it with an additional test against the allocation size. This can cause a boot failure in addition to a loss of randomisation due to another bug in the arm64 stub fixed separately. Signed-off-by: Benjamin Herrenschmidt Fixes: 2ddbfc81eac8 ("efi: stub: add implementation of efi_random_alloc()") Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/libstub/randomalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index a408df474d83..724155b9e10d 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -30,6 +30,8 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, (u64)ULONG_MAX); + if (region_end < size) + return 0; first_slot = round_up(md->phys_addr, align); last_slot = round_down(region_end - size + 1, align); -- cgit From 4cbb02fa76de4bbada0af9409fcce3aa747880ea Mon Sep 17 00:00:00 2001 From: Sibi Sankar Date: Tue, 20 Jul 2021 22:39:13 +0530 Subject: arm64: dts: qcom: sc7280: Fixup cpufreq domain info for cpu7 The SC7280 SoC supports a 4-Silver/3-Gold/1-Gold+ configuration and hence the cpu7 node should point to cpufreq domain 2 instead. Fixes: 7dbd121a2c58 ("arm64: dts: qcom: sc7280: Add cpufreq hw node") Signed-off-by: Sibi Sankar Link: https://lore.kernel.org/r/1626800953-613-1-git-send-email-sibis@codeaurora.org Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sc7280.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index a8c274ad74c4..188c5768a55a 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -200,7 +200,7 @@ &BIG_CPU_SLEEP_1 &CLUSTER_SLEEP_0>; next-level-cache = <&L2_700>; - qcom,freq-domain = <&cpufreq_hw 1>; + qcom,freq-domain = <&cpufreq_hw 2>; #cooling-cells = <2>; L2_700: l2-cache { compatible = "cache"; -- cgit From 83f877a09516bcb82e34df621cc3a794509a11a3 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 21 Jul 2021 12:40:10 +0100 Subject: xen/events: remove redundant initialization of variable irq The variable irq is being initialized with a value that is never read, it is being updated later on. The assignment is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210721114010.108648-1-colin.king@canonical.com Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index d7e361fb0548..154daddbdcb4 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1009,7 +1009,7 @@ static void __unbind_from_irq(unsigned int irq) int xen_bind_pirq_gsi_to_irq(unsigned gsi, unsigned pirq, int shareable, char *name) { - int irq = -1; + int irq; struct physdev_irq irq_op; int ret; -- cgit From e062233c0ed0a76b6dd4ec785550419a323f9380 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Mon, 12 Jul 2021 10:07:36 +0200 Subject: drm/mediatek: dpi: Fix NULL dereference in mtk_dpi_bridge_atomic_check bridge->driver_private is not set (NULL) so use bridge_to_dpi(bridge) like it's done in bridge_atomic_get_output_bus_fmts Fixes: ec8747c52434 ("drm/mediatek: dpi: Add bus format negotiation") Signed-off-by: Frank Wunderlich Tested-by: Hsin-Yi Wang Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index bced555648b0..a2eca1f66984 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -605,7 +605,7 @@ static int mtk_dpi_bridge_atomic_check(struct drm_bridge *bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { - struct mtk_dpi *dpi = bridge->driver_private; + struct mtk_dpi *dpi = bridge_to_dpi(bridge); unsigned int out_bus_format; out_bus_format = bridge_state->output_bus_cfg.format; -- cgit From b9a4b57f423ff8ff9ab9b68a238bdc3e7678f723 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:04:45 +0200 Subject: ASoC: codecs: wcd938x: fix wcd module dependency With SND_SOC_ALL_CODECS=y and SND_SOC_WCD938X_SDW=m, there is a link error from a reverse dependency, since the built-in codec driver calls into the modular soundwire back-end: x86_64-linux-ld: sound/soc/codecs/wcd938x.o: in function `wcd938x_codec_free': wcd938x.c:(.text+0x2c0): undefined reference to `wcd938x_sdw_free' x86_64-linux-ld: sound/soc/codecs/wcd938x.o: in function `wcd938x_codec_hw_params': wcd938x.c:(.text+0x2f6): undefined reference to `wcd938x_sdw_hw_params' x86_64-linux-ld: sound/soc/codecs/wcd938x.o: in function `wcd938x_codec_set_sdw_stream': wcd938x.c:(.text+0x332): undefined reference to `wcd938x_sdw_set_sdw_stream' x86_64-linux-ld: sound/soc/codecs/wcd938x.o: in function `wcd938x_tx_swr_ctrl': wcd938x.c:(.text+0x23de): undefined reference to `wcd938x_swr_get_current_bank' x86_64-linux-ld: sound/soc/codecs/wcd938x.o: in function `wcd938x_bind': wcd938x.c:(.text+0x2579): undefined reference to `wcd938x_sdw_device_get' x86_64-linux-ld: wcd938x.c:(.text+0x25a1): undefined reference to `wcd938x_sdw_device_get' x86_64-linux-ld: wcd938x.c:(.text+0x262a): undefined reference to `__devm_regmap_init_sdw' Work around this using two small hacks: An added Kconfig dependency prevents the main driver from being built-in when soundwire support itself is a loadable module to allow calling devm_regmap_init_sdw(), and a Makefile trick links the wcd938x-sdw backend as built-in if needed to solve the dependency between the two modules. Fixes: 045442228868 ("ASoC: codecs: wcd938x: add audio routing and Kconfig") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210721150510.1837221-1-arnd@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/Kconfig | 1 + sound/soc/codecs/Makefile | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 032c87637f63..d24edb01c19b 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1559,6 +1559,7 @@ config SND_SOC_WCD934X config SND_SOC_WCD938X depends on SND_SOC_WCD938X_SDW tristate + depends on SOUNDWIRE || !SOUNDWIRE config SND_SOC_WCD938X_SDW tristate "WCD9380/WCD9385 Codec - SDW" diff --git a/sound/soc/codecs/Makefile b/sound/soc/codecs/Makefile index de8b83dd2c76..7bb38c370842 100644 --- a/sound/soc/codecs/Makefile +++ b/sound/soc/codecs/Makefile @@ -583,7 +583,10 @@ obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o obj-$(CONFIG_SND_SOC_WCD9335) += snd-soc-wcd9335.o obj-$(CONFIG_SND_SOC_WCD934X) += snd-soc-wcd934x.o obj-$(CONFIG_SND_SOC_WCD938X) += snd-soc-wcd938x.o -obj-$(CONFIG_SND_SOC_WCD938X_SDW) += snd-soc-wcd938x-sdw.o +ifdef CONFIG_SND_SOC_WCD938X_SDW +# avoid link failure by forcing sdw code built-in when needed +obj-$(CONFIG_SND_SOC_WCD938X) += snd-soc-wcd938x-sdw.o +endif obj-$(CONFIG_SND_SOC_WL1273) += snd-soc-wl1273.o obj-$(CONFIG_SND_SOC_WM0010) += snd-soc-wm0010.o obj-$(CONFIG_SND_SOC_WM1250_EV1) += snd-soc-wm1250-ev1.o -- cgit From 6d20bf7c020f417fdef1810a22da17c126603472 Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Wed, 21 Jul 2021 21:31:21 +0800 Subject: ASoC: rt5682: Adjust headset volume button threshold Adjust the threshold of headset button volume+ to fix the wrong button detection issue with some brand headsets. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20210721133121.12333-1-derek.fang@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt5682.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index abcd6f483788..51ecaa2abcd1 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -44,6 +44,7 @@ static const struct reg_sequence patch_list[] = { {RT5682_I2C_CTRL, 0x000f}, {RT5682_PLL2_INTERNAL, 0x8266}, {RT5682_SAR_IL_CMD_3, 0x8365}, + {RT5682_SAR_IL_CMD_6, 0x0180}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) -- cgit From d00f541a49406afc2c091aac121e29b3b61480a2 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 21 Jul 2021 13:36:03 -0500 Subject: ASoC: amd: renoir: Run hibernation callbacks The registers need to be re-initialized after hibernation or microphone may be non-functional. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213793 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20210721183603.747-2-mario.limonciello@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/renoir/rn-pci-acp3x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/amd/renoir/rn-pci-acp3x.c b/sound/soc/amd/renoir/rn-pci-acp3x.c index 19438da5dfa5..7b8040e812a1 100644 --- a/sound/soc/amd/renoir/rn-pci-acp3x.c +++ b/sound/soc/amd/renoir/rn-pci-acp3x.c @@ -382,6 +382,8 @@ static const struct dev_pm_ops rn_acp_pm = { .runtime_resume = snd_rn_acp_resume, .suspend = snd_rn_acp_suspend, .resume = snd_rn_acp_resume, + .restore = snd_rn_acp_resume, + .poweroff = snd_rn_acp_suspend, }; static void snd_rn_acp_remove(struct pci_dev *pci) -- cgit From 090c57da5fd59fb59adc9d9341a77558c93b0abd Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 20 Jul 2021 22:03:48 +0200 Subject: ASoC: tlv320aic32x4: Fix TAS2505/TAS2521 processing block selection The TAS2505/TAS2521 does support only three processing block options, unlike TLV320AIC32x4 which supports 25. This is documented in TI slau472 2.5.1.2 Processing Blocks and Page 0 / Register 60: DAC Instruction Set - 0x00 / 0x3C. Limit the Processing Blocks maximum value to 3 on TAS2505/TAS2521 and select processing block PRB_P1 always, because for the configuration of teh codec implemented in this driver, this is the best quality option. Fixes: b4525b6196cd7 ("ASoC: tlv320aic32x4: add support for TAS2505") Signed-off-by: Marek Vasut Cc: Claudius Heine Cc: Mark Brown Link: https://lore.kernel.org/r/20210720200348.182139-1-marex@denx.de Signed-off-by: Mark Brown --- sound/soc/codecs/tlv320aic32x4.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index dcd8aeb45cb3..2e9175b37dc9 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -682,11 +682,20 @@ static int aic32x4_set_dosr(struct snd_soc_component *component, u16 dosr) static int aic32x4_set_processing_blocks(struct snd_soc_component *component, u8 r_block, u8 p_block) { - if (r_block > 18 || p_block > 25) - return -EINVAL; + struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component); + + if (aic32x4->type == AIC32X4_TYPE_TAS2505) { + if (r_block || p_block > 3) + return -EINVAL; - snd_soc_component_write(component, AIC32X4_ADCSPB, r_block); - snd_soc_component_write(component, AIC32X4_DACSPB, p_block); + snd_soc_component_write(component, AIC32X4_DACSPB, p_block); + } else { /* AIC32x4 */ + if (r_block > 18 || p_block > 25) + return -EINVAL; + + snd_soc_component_write(component, AIC32X4_ADCSPB, r_block); + snd_soc_component_write(component, AIC32X4_DACSPB, p_block); + } return 0; } @@ -695,6 +704,7 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component, unsigned int sample_rate, unsigned int channels, unsigned int bit_depth) { + struct aic32x4_priv *aic32x4 = snd_soc_component_get_drvdata(component); u8 aosr; u16 dosr; u8 adc_resource_class, dac_resource_class; @@ -721,19 +731,28 @@ static int aic32x4_setup_clocks(struct snd_soc_component *component, adc_resource_class = 6; dac_resource_class = 8; dosr_increment = 8; - aic32x4_set_processing_blocks(component, 1, 1); + if (aic32x4->type == AIC32X4_TYPE_TAS2505) + aic32x4_set_processing_blocks(component, 0, 1); + else + aic32x4_set_processing_blocks(component, 1, 1); } else if (sample_rate <= 96000) { aosr = 64; adc_resource_class = 6; dac_resource_class = 8; dosr_increment = 4; - aic32x4_set_processing_blocks(component, 1, 9); + if (aic32x4->type == AIC32X4_TYPE_TAS2505) + aic32x4_set_processing_blocks(component, 0, 1); + else + aic32x4_set_processing_blocks(component, 1, 9); } else if (sample_rate == 192000) { aosr = 32; adc_resource_class = 3; dac_resource_class = 4; dosr_increment = 2; - aic32x4_set_processing_blocks(component, 13, 19); + if (aic32x4->type == AIC32X4_TYPE_TAS2505) + aic32x4_set_processing_blocks(component, 0, 1); + else + aic32x4_set_processing_blocks(component, 13, 19); } else { dev_err(component->dev, "Sampling rate not supported\n"); return -EINVAL; -- cgit From 6b57ba3243c5774e5b2a0984e8ca0d34a126ac6b Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Mon, 12 Jul 2021 17:46:57 +0800 Subject: drm/mediatek: mtk-dpi: Set out_fmt from config if not the last bridge atomic_get_output_bus_fmts() is only called when the bridge is the last element in the bridge chain. If mtk-dpi is not the last bridge, the format of output_bus_cfg is MEDIA_BUS_FMT_FIXED, and mtk_dpi_dual_edge() will fail to write correct value to regs. Fixes: ec8747c52434 ("drm/mediatek: dpi: Add bus format negotiation") Signed-off-by: Hsin-Yi Wang Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_dpi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index a2eca1f66984..e94738fe4db8 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -610,6 +610,10 @@ static int mtk_dpi_bridge_atomic_check(struct drm_bridge *bridge, out_bus_format = bridge_state->output_bus_cfg.format; + if (out_bus_format == MEDIA_BUS_FMT_FIXED) + if (dpi->conf->num_output_fmts) + out_bus_format = dpi->conf->output_fmts[0]; + dev_dbg(dpi->dev, "input format 0x%04x, output format 0x%04x\n", bridge_state->input_bus_cfg.format, bridge_state->output_bus_cfg.format); -- cgit From 1a64a7aff8da352c9419de3d5c34343682916411 Mon Sep 17 00:00:00 2001 From: "jason-jh.lin" Date: Thu, 22 Jul 2021 09:47:43 +0800 Subject: drm/mediatek: Fix cursor plane no update The cursor plane should use the current plane state in atomic_async_update because it would not be the new plane state in the global atomic state since _swap_state happened when those hook are run. Fix cursor plane issue by below modification: 1. Remove plane_helper_funcs->atomic_update(plane, state) in mtk_drm_crtc_async_update. 2. Add mtk_drm_update_new_state in to mtk_plane_atomic_async_update to update the cursor plane by current plane state hook and update others plane by the new_state. Fixes: 37418bf14c13 ("drm: Use state helper instead of the plane state pointer") Signed-off-by: jason-jh.lin Tested-by: Enric Balletbo i Serra Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_crtc.c | 3 -- drivers/gpu/drm/mediatek/mtk_drm_plane.c | 60 ++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c index 474efb844249..735efe79f075 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c @@ -532,13 +532,10 @@ void mtk_drm_crtc_async_update(struct drm_crtc *crtc, struct drm_plane *plane, struct drm_atomic_state *state) { struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc); - const struct drm_plane_helper_funcs *plane_helper_funcs = - plane->helper_private; if (!mtk_crtc->enabled) return; - plane_helper_funcs->atomic_update(plane, state); mtk_drm_crtc_update_config(mtk_crtc, false); } diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c index b5582dcf564c..e6dcb34d3052 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c @@ -110,6 +110,35 @@ static int mtk_plane_atomic_async_check(struct drm_plane *plane, true, true); } +static void mtk_plane_update_new_state(struct drm_plane_state *new_state, + struct mtk_plane_state *mtk_plane_state) +{ + struct drm_framebuffer *fb = new_state->fb; + struct drm_gem_object *gem; + struct mtk_drm_gem_obj *mtk_gem; + unsigned int pitch, format; + dma_addr_t addr; + + gem = fb->obj[0]; + mtk_gem = to_mtk_gem_obj(gem); + addr = mtk_gem->dma_addr; + pitch = fb->pitches[0]; + format = fb->format->format; + + addr += (new_state->src.x1 >> 16) * fb->format->cpp[0]; + addr += (new_state->src.y1 >> 16) * pitch; + + mtk_plane_state->pending.enable = true; + mtk_plane_state->pending.pitch = pitch; + mtk_plane_state->pending.format = format; + mtk_plane_state->pending.addr = addr; + mtk_plane_state->pending.x = new_state->dst.x1; + mtk_plane_state->pending.y = new_state->dst.y1; + mtk_plane_state->pending.width = drm_rect_width(&new_state->dst); + mtk_plane_state->pending.height = drm_rect_height(&new_state->dst); + mtk_plane_state->pending.rotation = new_state->rotation; +} + static void mtk_plane_atomic_async_update(struct drm_plane *plane, struct drm_atomic_state *state) { @@ -126,8 +155,10 @@ static void mtk_plane_atomic_async_update(struct drm_plane *plane, plane->state->src_h = new_state->src_h; plane->state->src_w = new_state->src_w; swap(plane->state->fb, new_state->fb); - new_plane_state->pending.async_dirty = true; + mtk_plane_update_new_state(new_state, new_plane_state); + wmb(); /* Make sure the above parameters are set before update */ + new_plane_state->pending.async_dirty = true; mtk_drm_crtc_async_update(new_state->crtc, plane, state); } @@ -189,14 +220,8 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane); struct mtk_plane_state *mtk_plane_state = to_mtk_plane_state(new_state); - struct drm_crtc *crtc = new_state->crtc; - struct drm_framebuffer *fb = new_state->fb; - struct drm_gem_object *gem; - struct mtk_drm_gem_obj *mtk_gem; - unsigned int pitch, format; - dma_addr_t addr; - if (!crtc || WARN_ON(!fb)) + if (!new_state->crtc || WARN_ON(!new_state->fb)) return; if (!new_state->visible) { @@ -204,24 +229,7 @@ static void mtk_plane_atomic_update(struct drm_plane *plane, return; } - gem = fb->obj[0]; - mtk_gem = to_mtk_gem_obj(gem); - addr = mtk_gem->dma_addr; - pitch = fb->pitches[0]; - format = fb->format->format; - - addr += (new_state->src.x1 >> 16) * fb->format->cpp[0]; - addr += (new_state->src.y1 >> 16) * pitch; - - mtk_plane_state->pending.enable = true; - mtk_plane_state->pending.pitch = pitch; - mtk_plane_state->pending.format = format; - mtk_plane_state->pending.addr = addr; - mtk_plane_state->pending.x = new_state->dst.x1; - mtk_plane_state->pending.y = new_state->dst.y1; - mtk_plane_state->pending.width = drm_rect_width(&new_state->dst); - mtk_plane_state->pending.height = drm_rect_height(&new_state->dst); - mtk_plane_state->pending.rotation = new_state->rotation; + mtk_plane_update_new_state(new_state, mtk_plane_state); wmb(); /* Make sure the above parameters are set before update */ mtk_plane_state->pending.dirty = true; } -- cgit From 5434d0dc56bce4510109a431a7eb71ec5131ef0f Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Thu, 22 Jul 2021 18:33:15 +0530 Subject: ASoC: amd: enable stop_dma_first flag for cz_dai_7219_98357 dai link DMA driver stop sequence should be invoked first before invoking I2S controller driver stop sequence for Stoneyridge platform. Enable stop_dma_first flag for cz_dai_7219_98357 dai link structure. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/20210722130328.23796-1-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/acp-da7219-max98357a.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index 9449fb40a956..3c60c5f96dcb 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -525,6 +525,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { | SND_SOC_DAIFMT_CBM_CFM, .init = cz_da7219_init, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_da7219_play_ops, SND_SOC_DAILINK_REG(designware1, dlgs, platform), }, @@ -534,6 +535,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_da7219_cap_ops, SND_SOC_DAILINK_REG(designware2, dlgs, platform), }, @@ -543,6 +545,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_max_play_ops, SND_SOC_DAILINK_REG(designware3, mx, platform), }, @@ -553,6 +556,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_dmic0_cap_ops, SND_SOC_DAILINK_REG(designware3, adau, platform), }, @@ -563,6 +567,7 @@ static struct snd_soc_dai_link cz_dai_7219_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_dmic1_cap_ops, SND_SOC_DAILINK_REG(designware2, adau, platform), }, -- cgit From 32ec3960175e58a914fc242b66dfe33e9059568f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 23 Jul 2021 11:13:52 +0200 Subject: pinctrl: qcom: fix GPIOLIB dependencies Enabling the PINCTRL_SM8350 symbol without GPIOLIB or SCM causes a build failure: WARNING: unmet direct dependencies detected for PINCTRL_MSM Depends on [m]: PINCTRL [=y] && (ARCH_QCOM [=y] || COMPILE_TEST [=y]) && GPIOLIB [=y] && (QCOM_SCM [=m] || !QCOM_SCM [=m]) Selected by [y]: - PINCTRL_SM8350 [=y] && PINCTRL [=y] && (ARCH_QCOM [=y] || COMPILE_TEST [=y]) && GPIOLIB [=y] && OF [=y] aarch64-linux-ld: drivers/pinctrl/qcom/pinctrl-msm.o: in function `msm_gpio_irq_set_type': pinctrl-msm.c:(.text.msm_gpio_irq_set_type+0x1c8): undefined reference to `qcom_scm_io_readl' The main problem here is the 'select PINCTRL_MSM', which needs to be a 'depends on' as it is for all the other front-ends. As the GPIOLIB dependency is now implied by that, symbol, remove the duplicate dependencies in the process. Fixes: d5d348a3271f ("pinctrl: qcom: Add SM8350 pinctrl driver") Fixes: 376f9e34c10f ("drivers: pinctrl: qcom: fix Kconfig dependency on GPIOLIB") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210723091400.1669716-1-arnd@kernel.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/Kconfig | 63 ++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 32 deletions(-) diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig index 2f51b4f99393..cad4e60df618 100644 --- a/drivers/pinctrl/qcom/Kconfig +++ b/drivers/pinctrl/qcom/Kconfig @@ -13,7 +13,7 @@ config PINCTRL_MSM config PINCTRL_APQ8064 tristate "Qualcomm APQ8064 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -21,7 +21,7 @@ config PINCTRL_APQ8064 config PINCTRL_APQ8084 tristate "Qualcomm APQ8084 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -29,7 +29,7 @@ config PINCTRL_APQ8084 config PINCTRL_IPQ4019 tristate "Qualcomm IPQ4019 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -37,7 +37,7 @@ config PINCTRL_IPQ4019 config PINCTRL_IPQ8064 tristate "Qualcomm IPQ8064 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -45,7 +45,7 @@ config PINCTRL_IPQ8064 config PINCTRL_IPQ8074 tristate "Qualcomm Technologies, Inc. IPQ8074 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for @@ -55,7 +55,7 @@ config PINCTRL_IPQ8074 config PINCTRL_IPQ6018 tristate "Qualcomm Technologies, Inc. IPQ6018 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for @@ -65,7 +65,7 @@ config PINCTRL_IPQ6018 config PINCTRL_MSM8226 tristate "Qualcomm 8226 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -74,7 +74,7 @@ config PINCTRL_MSM8226 config PINCTRL_MSM8660 tristate "Qualcomm 8660 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -82,7 +82,7 @@ config PINCTRL_MSM8660 config PINCTRL_MSM8960 tristate "Qualcomm 8960 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -90,7 +90,7 @@ config PINCTRL_MSM8960 config PINCTRL_MDM9615 tristate "Qualcomm 9615 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -98,7 +98,7 @@ config PINCTRL_MDM9615 config PINCTRL_MSM8X74 tristate "Qualcomm 8x74 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -106,7 +106,7 @@ config PINCTRL_MSM8X74 config PINCTRL_MSM8916 tristate "Qualcomm 8916 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -114,7 +114,7 @@ config PINCTRL_MSM8916 config PINCTRL_MSM8953 tristate "Qualcomm 8953 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -124,7 +124,7 @@ config PINCTRL_MSM8953 config PINCTRL_MSM8976 tristate "Qualcomm 8976 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -134,7 +134,7 @@ config PINCTRL_MSM8976 config PINCTRL_MSM8994 tristate "Qualcomm 8994 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -143,7 +143,7 @@ config PINCTRL_MSM8994 config PINCTRL_MSM8996 tristate "Qualcomm MSM8996 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -151,7 +151,7 @@ config PINCTRL_MSM8996 config PINCTRL_MSM8998 tristate "Qualcomm MSM8998 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -159,7 +159,7 @@ config PINCTRL_MSM8998 config PINCTRL_QCS404 tristate "Qualcomm QCS404 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -167,7 +167,7 @@ config PINCTRL_QCS404 config PINCTRL_QDF2XXX tristate "Qualcomm Technologies QDF2xxx pin controller driver" - depends on GPIOLIB && ACPI + depends on ACPI depends on PINCTRL_MSM help This is the GPIO driver for the TLMM block found on the @@ -175,7 +175,7 @@ config PINCTRL_QDF2XXX config PINCTRL_QCOM_SPMI_PMIC tristate "Qualcomm SPMI PMIC pin controller driver" - depends on GPIOLIB && OF && SPMI + depends on OF && SPMI select REGMAP_SPMI select PINMUX select PINCONF @@ -190,7 +190,7 @@ config PINCTRL_QCOM_SPMI_PMIC config PINCTRL_QCOM_SSBI_PMIC tristate "Qualcomm SSBI PMIC pin controller driver" - depends on GPIOLIB && OF + depends on OF select PINMUX select PINCONF select GENERIC_PINCONF @@ -204,7 +204,7 @@ config PINCTRL_QCOM_SSBI_PMIC config PINCTRL_SC7180 tristate "Qualcomm Technologies Inc SC7180 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -213,7 +213,7 @@ config PINCTRL_SC7180 config PINCTRL_SC7280 tristate "Qualcomm Technologies Inc SC7280 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -222,7 +222,7 @@ config PINCTRL_SC7280 config PINCTRL_SC8180X tristate "Qualcomm Technologies Inc SC8180x pin controller driver" - depends on GPIOLIB && (OF || ACPI) + depends on (OF || ACPI) depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -231,7 +231,7 @@ config PINCTRL_SC8180X config PINCTRL_SDM660 tristate "Qualcomm Technologies Inc SDM660 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -240,7 +240,7 @@ config PINCTRL_SDM660 config PINCTRL_SDM845 tristate "Qualcomm Technologies Inc SDM845 pin controller driver" - depends on GPIOLIB && (OF || ACPI) + depends on (OF || ACPI) depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -249,7 +249,7 @@ config PINCTRL_SDM845 config PINCTRL_SDX55 tristate "Qualcomm Technologies Inc SDX55 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -258,7 +258,7 @@ config PINCTRL_SDX55 config PINCTRL_SM6125 tristate "Qualcomm Technologies Inc SM6125 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -267,7 +267,7 @@ config PINCTRL_SM6125 config PINCTRL_SM8150 tristate "Qualcomm Technologies Inc SM8150 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -276,7 +276,7 @@ config PINCTRL_SM8150 config PINCTRL_SM8250 tristate "Qualcomm Technologies Inc SM8250 pin controller driver" - depends on GPIOLIB && OF + depends on OF depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the @@ -285,8 +285,7 @@ config PINCTRL_SM8250 config PINCTRL_SM8350 tristate "Qualcomm Technologies Inc SM8350 pin controller driver" - depends on GPIOLIB && OF - select PINCTRL_MSM + depends on PINCTRL_MSM help This is the pinctrl, pinmux, pinconf and gpiolib driver for the Qualcomm Technologies Inc TLMM block found on the Qualcomm -- cgit From 798a315fc359aa6dbe48e09d802aa59b7e158ffc Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Thu, 1 Jul 2021 16:09:55 +0800 Subject: pinctrl: mediatek: Fix fallback behavior for bias_set_combo Some pin doesn't support PUPD register, if it fails and fallbacks with bias_set_combo case, it will call mtk_pinconf_bias_set_pupd_r1_r0() to modify the PUPD pin again. Since the general bias set are either PU/PD or PULLSEL/PULLEN, try bias_set or bias_set_rev1 for the other fallback case. If the pin doesn't support neither PU/PD nor PULLSEL/PULLEN, it will return -ENOTSUPP. Fixes: 81bd1579b43e ("pinctrl: mediatek: Fix fallback call path") Signed-off-by: Hsin-Yi Wang Reviewed-by: Chen-Yu Tsai Reviewed-by: Zhiyong Tao Link: https://lore.kernel.org/r/20210701080955.2660294-1-hsinyi@chromium.org Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 5b3b048725cc..45ebdeba985a 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -925,12 +925,10 @@ int mtk_pinconf_adv_pull_set(struct mtk_pinctrl *hw, err = hw->soc->bias_set(hw, desc, pullup); if (err) return err; - } else if (hw->soc->bias_set_combo) { - err = hw->soc->bias_set_combo(hw, desc, pullup, arg); - if (err) - return err; } else { - return -ENOTSUPP; + err = mtk_pinconf_bias_set_rev1(hw, desc, pullup); + if (err) + err = mtk_pinconf_bias_set(hw, desc, pullup); } } -- cgit From e2f55370b42205bda2f8b02c5933b9df2456bd53 Mon Sep 17 00:00:00 2001 From: Rahul Tanwar Date: Tue, 6 Jul 2021 16:20:59 +0800 Subject: MAINTAINERS: Add Rahul Tanwar as Intel LGM Gateway PCIe maintainer Add Rahul Tanwar as maintainer for PCIe RC controller driver for the Intel Lightning Mountain (LGM) Gateway SoC. Link: https://lore.kernel.org/r/b3249e08155e04ac08d820be3b8da29a913c472a.1625559158.git.rtanwar@maxlinear.com Signed-off-by: Rahul Tanwar Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a61f4f3b78a9..ad2520cef3cb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14422,6 +14422,13 @@ S: Maintained F: Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt F: drivers/pci/controller/dwc/pcie-histb.c +PCIE DRIVER FOR INTEL LGM GW SOC +M: Rahul Tanwar +L: linux-pci@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/pci/intel-gw-pcie.yaml +F: drivers/pci/controller/dwc/pcie-intel-gw.c + PCIE DRIVER FOR MEDIATEK M: Ryder Lee M: Jianjun Wang -- cgit From 9f9decdb64c5cc05b66f7a6ede226dd90684570b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:13:20 +0200 Subject: iio: accel: fxls8962af: fix i2c dependency With CONFIG_SPI=y and CONFIG_I2C=m, building fxls8962af into vmlinux causes a link error against the I2C module: aarch64-linux-ld: drivers/iio/accel/fxls8962af-core.o: in function `fxls8962af_fifo_flush': fxls8962af-core.c:(.text+0x3a0): undefined reference to `i2c_verify_client' Work around it by adding a Kconfig dependency that forces the SPI driver to be a loadable module whenever I2C is a module. Fixes: af959b7b96b8 ("iio: accel: fxls8962af: fix errata bug E3 - I2C burst reads") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210721151330.2176653-1-arnd@kernel.org Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index 0e56ace61103..8d8b1ba42ff8 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -231,6 +231,7 @@ config DMARD10 config FXLS8962AF tristate + depends on I2C || !I2C # cannot be built-in for modular I2C config FXLS8962AF_I2C tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer I2C Driver" @@ -247,6 +248,7 @@ config FXLS8962AF_I2C config FXLS8962AF_SPI tristate "NXP FXLS8962AF/FXLS8964AF Accelerometer SPI Driver" depends on SPI + depends on I2C || !I2C select FXLS8962AF select REGMAP_SPI help -- cgit From 14a30238ecb8dcf52a9e2be514414e3ec443b536 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Wed, 21 Jul 2021 16:03:45 +0200 Subject: dt-bindings: iio: st: Remove wrong items length check The original bindings was listing the length of the interrupts as either 1 or 2, depending on the setup. This is also what is enforced by the top level schema. However, that is further constrained with an if clause that require exactly two interrupts, even though it might not make sense on those devices or in some setups. Let's remove the clause entirely. Cc: Denis Ciocca Cc: Lars-Peter Clausen Fixes: 0cd71145803d ("iio: st-sensors: Update ST Sensor bindings") Signed-off-by: Maxime Ripard Reviewed-by: Rob Herring Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20210721140424.725744-16-maxime@cerno.tech Signed-off-by: Jonathan Cameron --- .../devicetree/bindings/iio/st,st-sensors.yaml | 41 ---------------------- 1 file changed, 41 deletions(-) diff --git a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml index b2a1e42c56fa..71de5631ebae 100644 --- a/Documentation/devicetree/bindings/iio/st,st-sensors.yaml +++ b/Documentation/devicetree/bindings/iio/st,st-sensors.yaml @@ -152,47 +152,6 @@ allOf: maxItems: 1 st,drdy-int-pin: false - - if: - properties: - compatible: - enum: - # Two intertial interrupts i.e. accelerometer/gyro interrupts - - st,h3lis331dl-accel - - st,l3g4200d-gyro - - st,l3g4is-gyro - - st,l3gd20-gyro - - st,l3gd20h-gyro - - st,lis2de12 - - st,lis2dw12 - - st,lis2hh12 - - st,lis2dh12-accel - - st,lis331dl-accel - - st,lis331dlh-accel - - st,lis3de - - st,lis3dh-accel - - st,lis3dhh - - st,lis3mdl-magn - - st,lng2dm-accel - - st,lps331ap-press - - st,lsm303agr-accel - - st,lsm303dlh-accel - - st,lsm303dlhc-accel - - st,lsm303dlm-accel - - st,lsm330-accel - - st,lsm330-gyro - - st,lsm330d-accel - - st,lsm330d-gyro - - st,lsm330dl-accel - - st,lsm330dl-gyro - - st,lsm330dlc-accel - - st,lsm330dlc-gyro - - st,lsm9ds0-gyro - - st,lsm9ds1-magn - then: - properties: - interrupts: - maxItems: 2 - required: - compatible - reg -- cgit From 84edec86f449adea9ee0b4912a79ab8d9d65abb7 Mon Sep 17 00:00:00 2001 From: Chris Lesiak Date: Mon, 14 Jun 2021 09:18:20 -0500 Subject: iio: humidity: hdc100x: Add margin to the conversion time The datasheets have the following note for the conversion time specification: "This parameter is specified by design and/or characterization and it is not tested in production." Parts have been seen that require more time to do 14-bit conversions for the relative humidity channel. The result is ENXIO due to the address phase of a transfer not getting an ACK. Delay an additional 1 ms per conversion to allow for additional margin. Fixes: 4839367d99e3 ("iio: humidity: add HDC100x support") Signed-off-by: Chris Lesiak Acked-by: Matt Ranostay Link: https://lore.kernel.org/r/20210614141820.2034827-1-chris.lesiak@licor.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/humidity/hdc100x.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 2a957f19048e..9e0fce917ce4 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -25,6 +25,8 @@ #include #include +#include + #define HDC100X_REG_TEMP 0x00 #define HDC100X_REG_HUMIDITY 0x01 @@ -166,7 +168,7 @@ static int hdc100x_get_measurement(struct hdc100x_data *data, struct iio_chan_spec const *chan) { struct i2c_client *client = data->client; - int delay = data->adc_int_us[chan->address]; + int delay = data->adc_int_us[chan->address] + 1*USEC_PER_MSEC; int ret; __be16 val; @@ -316,7 +318,7 @@ static irqreturn_t hdc100x_trigger_handler(int irq, void *p) struct iio_dev *indio_dev = pf->indio_dev; struct hdc100x_data *data = iio_priv(indio_dev); struct i2c_client *client = data->client; - int delay = data->adc_int_us[0] + data->adc_int_us[1]; + int delay = data->adc_int_us[0] + data->adc_int_us[1] + 2*USEC_PER_MSEC; int ret; /* dual read starts at temp register */ -- cgit From d66cd5dea551e974580a64bf80b337b9a09ce63e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sat, 24 Jul 2021 20:02:14 -0700 Subject: cpufreq: blacklist Qualcomm sc8180x in cpufreq-dt-platdev The Qualcomm SC8180x platform uses the qcom-cpufreq-hw driver, so it in the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bef7528aecd3..9d5a38a91f10 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -139,6 +139,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,qcs404", }, { .compatible = "qcom,sc7180", }, { .compatible = "qcom,sc7280", }, + { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sdm845", }, { .compatible = "st,stih407", }, -- cgit From 2c39ca6885a2ec03e5c9e7c12a4da2aa8926605a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 23 Jul 2021 19:02:00 +0100 Subject: ASoC: tlv320aic31xx: Fix jack detection after suspend The tlv320aic31xx driver relies on regcache_sync() to restore the register contents after going to _BIAS_OFF, for example during system suspend. This does not work for the jack detection configuration since that is configured via the same register that status is read back from so the register is volatile and not cached. This can also cause issues during init if the jack detection ends up getting set up before the CODEC is initially brought out of _BIAS_OFF, we will reset the CODEC and resync the cache as part of that process. Fix this by explicitly reapplying the jack detection configuration after resyncing the register cache during power on. This issue was found by an engineer working off-list on a product kernel, I just wrote up the upstream fix. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210723180200.25105-1-broonie@kernel.org Cc: stable@vger.kernel.org --- sound/soc/codecs/tlv320aic31xx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index b504d63385b3..52d2c968b5c0 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -35,6 +35,9 @@ #include "tlv320aic31xx.h" +static int aic31xx_set_jack(struct snd_soc_component *component, + struct snd_soc_jack *jack, void *data); + static const struct reg_default aic31xx_reg_defaults[] = { { AIC31XX_CLKMUX, 0x00 }, { AIC31XX_PLLPR, 0x11 }, @@ -1256,6 +1259,13 @@ static int aic31xx_power_on(struct snd_soc_component *component) return ret; } + /* + * The jack detection configuration is in the same register + * that is used to report jack detect status so is volatile + * and not covered by the cache sync, restore it separately. + */ + aic31xx_set_jack(component, aic31xx->jack, NULL); + return 0; } -- cgit From 0fbea680540108b09db7b26d9f4d24236d58a6ad Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Thu, 22 Jul 2021 22:05:52 -0300 Subject: iommu/dma: Fix leak in non-contiguous API Currently, iommu_dma_alloc_noncontiguous() allocates a struct dma_sgt_handle object to hold some state needed for iommu_dma_free_noncontiguous(). However, the handle is neither freed nor returned explicitly by the ->alloc_noncontiguous method, and therefore seems leaked. This was found by code inspection, so please review carefully and test. As a side note, it appears the struct dma_sgt_handle type is exposed to users of the DMA-API by linux/dma-map-ops.h, but is has no users or functions returning the type explicitly. This may indicate it's a good idea to move the struct dma_sgt_handle type to drivers/iommu/dma-iommu.c. The decision is left to maintainers :-) Cc: stable@vger.kernel.org Fixes: e817ee5f2f95c ("dma-iommu: implement ->alloc_noncontiguous") Signed-off-by: Ezequiel Garcia Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210723010552.50969-1-ezequiel@collabora.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 98ba927aee1a..6f0df629353f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -768,6 +768,7 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, __iommu_dma_unmap(dev, sgt->sgl->dma_address, size); __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT); sg_free_table(&sh->sgt); + kfree(sh); } #endif /* CONFIG_DMA_REMAP */ -- cgit From 0f32d9eb38c13c32895b5bf695eac639cee02d6c Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Mon, 26 Jul 2021 17:45:25 +0800 Subject: ASoC: Intel: sof_da7219_mx98360a: fail to initialize soundcard The default codec for speaker amp's DAI Link is max98373 and will be overwritten in probe function if the board id is sof_da7219_mx98360a. However, the probe function does not do it because the board id is changed in earlier commit. Fixes: 1cc04d195dc2 ("ASoC: Intel: sof_da7219_max98373: shrink platform_id below 20 characters") Signed-off-by: Brent Lu Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210726094525.5748-1-brent.lu@intel.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/sof_da7219_max98373.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/boards/sof_da7219_max98373.c b/sound/soc/intel/boards/sof_da7219_max98373.c index 896251d742fe..b7b3b0bf994a 100644 --- a/sound/soc/intel/boards/sof_da7219_max98373.c +++ b/sound/soc/intel/boards/sof_da7219_max98373.c @@ -404,7 +404,7 @@ static int audio_probe(struct platform_device *pdev) return -ENOMEM; /* By default dais[0] is configured for max98373 */ - if (!strcmp(pdev->name, "sof_da7219_max98360a")) { + if (!strcmp(pdev->name, "sof_da7219_mx98360a")) { dais[0] = (struct snd_soc_dai_link) { .name = "SSP1-Codec", .id = 0, -- cgit From 2635c226036c1bf44b86575d0bc721505c8201e3 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Mon, 26 Jul 2021 13:21:42 -0500 Subject: ASoC: topology: Select SND_DYNAMIC_MINORS The indexes of the devices are described within the topology file, it is a possibility that the topology encodes invalid indexes when DYNAMIC_MINORS is not enabled in kernel: #define SNDRV_MINOR_COMPRESS 2 /* 2 - 3 */ #define SNDRV_MINOR_HWDEP 4 /* 4 - 7 */ #define SNDRV_MINOR_RAWMIDI 8 /* 8 - 15 */ #define SNDRV_MINOR_PCM_PLAYBACK 16 /* 16 - 23 */ #define SNDRV_MINOR_PCM_CAPTURE 24 /* 24 - 31 */ If the topology assigns an index greater than 7 for PLAYBACK/CAPTURE PCM then there will be minor number collision. As an example: card0 creates a capture PCM with index 10 -> minor = 34 card1 creates compress device with index 0 -> minor = 34 Card1 will fail to instantiate because the minor for the compress stream is already taken. To avoid seemingly mysterious issues with card creation, select the DYNAMIC_MINORS when the topology is enabled. The other option would be to try to do out of bound index checks in case of DYNAMIC_MINOR is not enabled and do not even attempt to create the device with failing the topology load. Signed-off-by: Peter Ujfalusi Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210726182142.179604-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/Kconfig b/sound/soc/Kconfig index 8a13462e1a63..5dcf77af07af 100644 --- a/sound/soc/Kconfig +++ b/sound/soc/Kconfig @@ -36,6 +36,7 @@ config SND_SOC_COMPRESS config SND_SOC_TOPOLOGY bool + select SND_DYNAMIC_MINORS config SND_SOC_TOPOLOGY_KUNIT_TEST tristate "KUnit tests for SoC topology" -- cgit From 61bef9e68dca4316e1fc5fdf5f0c270bdbd65657 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 26 Jul 2021 13:28:55 -0500 Subject: ASoC: SOF: Intel: hda: enforce exclusion between HDaudio and SoundWire On some platforms with an external HDaudio codec, the DSDT reports the presence of SoundWire devices. Pin-mux restrictions and board reworks usually prevent coexistence between the two types of links, let's prevent unnecessary operations from starting. In the case of a single iDISP codec being detected, we still start the links even if no SoundWire machine configuration was detected, so that we can double-check what the hardware is and add the missing configuration if applicable. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Link: https://lore.kernel.org/r/20210726182855.179943-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index e1e368ff2b12..891e6e1b9121 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -187,12 +187,16 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev) int hda_sdw_startup(struct snd_sof_dev *sdev) { struct sof_intel_hda_dev *hdev; + struct snd_sof_pdata *pdata = sdev->pdata; hdev = sdev->pdata->hw_pdata; if (!hdev->sdw) return 0; + if (pdata->machine && !pdata->machine->mach_params.link_mask) + return 0; + return sdw_intel_startup(hdev->sdw); } @@ -1002,6 +1006,14 @@ static int hda_generic_machine_select(struct snd_sof_dev *sdev) hda_mach->mach_params.dmic_num = dmic_num; pdata->machine = hda_mach; pdata->tplg_filename = tplg_filename; + + if (codec_num == 2) { + /* + * Prevent SoundWire links from starting when an external + * HDaudio codec is used + */ + hda_mach->mach_params.link_mask = 0; + } } } -- cgit From c3df5fb57fe8756d67fd56ed29da65cdfde839f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 27 Jul 2021 13:12:20 -1000 Subject: cgroup: rstat: fix A-A deadlock on 32bit around u64_stats_sync 0fa294fb1985 ("cgroup: Replace cgroup_rstat_mutex with a spinlock") added cgroup_rstat_flush_irqsafe() allowing flushing to happen from the irq context. However, rstat paths use u64_stats_sync to synchronize access to 64bit stat counters on 32bit machines. u64_stats_sync is implemented using seq_lock and trying to read from an irq context can lead to A-A deadlock if the irq happens to interrupt the stat update. Fix it by using the irqsafe variants - u64_stats_update_begin_irqsave() and u64_stats_update_end_irqrestore() - in the update paths. Note that none of this matters on 64bit machines. All these are just for 32bit SMP setups. Note that the interface was introduced way back, its first and currently only use was recently added by 2d146aa3aa84 ("mm: memcontrol: switch to rstat"). Stable tagging targets this commit. Signed-off-by: Tejun Heo Reported-by: Rik van Riel Fixes: 2d146aa3aa84 ("mm: memcontrol: switch to rstat") Cc: stable@vger.kernel.org # v5.13+ --- block/blk-cgroup.c | 14 ++++++++------ kernel/cgroup/rstat.c | 19 +++++++++++-------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 575d7a2e7203..31fe9be179d9 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -790,6 +790,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) struct blkcg_gq *parent = blkg->parent; struct blkg_iostat_set *bisc = per_cpu_ptr(blkg->iostat_cpu, cpu); struct blkg_iostat cur, delta; + unsigned long flags; unsigned int seq; /* fetch the current per-cpu values */ @@ -799,21 +800,21 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu) } while (u64_stats_fetch_retry(&bisc->sync, seq)); /* propagate percpu delta to global */ - u64_stats_update_begin(&blkg->iostat.sync); + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); blkg_iostat_set(&delta, &cur); blkg_iostat_sub(&delta, &bisc->last); blkg_iostat_add(&blkg->iostat.cur, &delta); blkg_iostat_add(&bisc->last, &delta); - u64_stats_update_end(&blkg->iostat.sync); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); /* propagate global delta to parent (unless that's root) */ if (parent && parent->parent) { - u64_stats_update_begin(&parent->iostat.sync); + flags = u64_stats_update_begin_irqsave(&parent->iostat.sync); blkg_iostat_set(&delta, &blkg->iostat.cur); blkg_iostat_sub(&delta, &blkg->iostat.last); blkg_iostat_add(&parent->iostat.cur, &delta); blkg_iostat_add(&blkg->iostat.last, &delta); - u64_stats_update_end(&parent->iostat.sync); + u64_stats_update_end_irqrestore(&parent->iostat.sync, flags); } } @@ -848,6 +849,7 @@ static void blkcg_fill_root_iostats(void) memset(&tmp, 0, sizeof(tmp)); for_each_possible_cpu(cpu) { struct disk_stats *cpu_dkstats; + unsigned long flags; cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); tmp.ios[BLKG_IOSTAT_READ] += @@ -864,9 +866,9 @@ static void blkcg_fill_root_iostats(void) tmp.bytes[BLKG_IOSTAT_DISCARD] += cpu_dkstats->sectors[STAT_DISCARD] << 9; - u64_stats_update_begin(&blkg->iostat.sync); + flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync); blkg_iostat_set(&blkg->iostat.cur, &tmp); - u64_stats_update_end(&blkg->iostat.sync); + u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags); } } } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 7f0e58917432..b264ab5652ba 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -347,19 +347,20 @@ static void cgroup_base_stat_flush(struct cgroup *cgrp, int cpu) } static struct cgroup_rstat_cpu * -cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp) +cgroup_base_stat_cputime_account_begin(struct cgroup *cgrp, unsigned long *flags) { struct cgroup_rstat_cpu *rstatc; rstatc = get_cpu_ptr(cgrp->rstat_cpu); - u64_stats_update_begin(&rstatc->bsync); + *flags = u64_stats_update_begin_irqsave(&rstatc->bsync); return rstatc; } static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, - struct cgroup_rstat_cpu *rstatc) + struct cgroup_rstat_cpu *rstatc, + unsigned long flags) { - u64_stats_update_end(&rstatc->bsync); + u64_stats_update_end_irqrestore(&rstatc->bsync, flags); cgroup_rstat_updated(cgrp, smp_processor_id()); put_cpu_ptr(rstatc); } @@ -367,18 +368,20 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp, void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); rstatc->bstat.cputime.sum_exec_runtime += delta_exec; - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec) { struct cgroup_rstat_cpu *rstatc; + unsigned long flags; - rstatc = cgroup_base_stat_cputime_account_begin(cgrp); + rstatc = cgroup_base_stat_cputime_account_begin(cgrp, &flags); switch (index) { case CPUTIME_USER: @@ -394,7 +397,7 @@ void __cgroup_account_cputime_field(struct cgroup *cgrp, break; } - cgroup_base_stat_cputime_account_end(cgrp, rstatc); + cgroup_base_stat_cputime_account_end(cgrp, rstatc, flags); } /* -- cgit From 0f673c16c850250db386537a422c11d248fb123c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Jul 2021 14:01:56 +0200 Subject: iwlwifi: pnvm: accept multiple HW-type TLVs Some products (So) may have two different types of products with different mac-type that are otherwise equivalent, and have the same PNVM data, so the PNVM file will contain two (or perhaps later more) HW-type TLVs. Accept the file and use the data section that contains any matching entry. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210719140154.a6a86e903035.Ic0b1b75c45d386698859f251518e8a5144431938@changeid --- drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 2403490cbc26..b4b1f75b9c2a 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -37,6 +37,7 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, u32 sha1 = 0; u16 mac_type = 0, rf_id = 0; u8 *pnvm_data = NULL, *tmp; + bool hw_match = false; u32 size = 0; int ret; @@ -83,6 +84,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, break; } + if (hw_match) + break; + mac_type = le16_to_cpup((__le16 *)data); rf_id = le16_to_cpup((__le16 *)(data + sizeof(__le16))); @@ -90,15 +94,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, "Got IWL_UCODE_TLV_HW_TYPE mac_type 0x%0x rf_id 0x%0x\n", mac_type, rf_id); - if (mac_type != CSR_HW_REV_TYPE(trans->hw_rev) || - rf_id != CSR_HW_RFID_TYPE(trans->hw_rf_id)) { - IWL_DEBUG_FW(trans, - "HW mismatch, skipping PNVM section, mac_type 0x%0x, rf_id 0x%0x.\n", - CSR_HW_REV_TYPE(trans->hw_rev), trans->hw_rf_id); - ret = -ENOENT; - goto out; - } - + if (mac_type == CSR_HW_REV_TYPE(trans->hw_rev) && + rf_id == CSR_HW_RFID_TYPE(trans->hw_rf_id)) + hw_match = true; break; case IWL_UCODE_TLV_SEC_RT: { struct iwl_pnvm_section *section = (void *)data; @@ -149,6 +147,15 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, } done: + if (!hw_match) { + IWL_DEBUG_FW(trans, + "HW mismatch, skipping PNVM section (need mac_type 0x%x rf_id 0x%x)\n", + CSR_HW_REV_TYPE(trans->hw_rev), + CSR_HW_RFID_TYPE(trans->hw_rf_id)); + ret = -ENOENT; + goto out; + } + if (!size) { IWL_DEBUG_FW(trans, "Empty PNVM, skipping.\n"); ret = -ENOENT; -- cgit From a5bf1d4434b93394fa37494d78fe9f3513557185 Mon Sep 17 00:00:00 2001 From: Yaara Baruch Date: Mon, 19 Jul 2021 14:45:38 +0200 Subject: iwlwifi: add new SoF with JF devices Add new SoF JF devices to the driver. Signed-off-by: Yaara Baruch Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210719144523.0545d8964ff2.I3498879d8c184e42b1578a64aa7b7c99a18b75fb@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 16baee3d52ae..e8693be51fb1 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1110,6 +1110,40 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_NO_CDB, iwl_cfg_bz_a0_mr_a0, iwl_ax211_name), +/* SoF with JF2 */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), + +/* SoF with JF */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SOF, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_name), + /* So with GF */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, -- cgit From 891332f697e14bfb2002f56e21d9bbd4800a7098 Mon Sep 17 00:00:00 2001 From: Yaara Baruch Date: Mon, 19 Jul 2021 14:45:39 +0200 Subject: iwlwifi: add new so-jf devices Add new so-jf devices to the driver. Signed-off-by: Yaara Baruch Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210719144523.1c9a59fd2760.If5aef1942007828210f0f2c4a17985f63050bb45@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 36 ++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index e8693be51fb1..0b8a0cd3b652 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -1149,7 +1149,41 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, IWL_CFG_RF_TYPE_GF, IWL_CFG_ANY, IWL_CFG_160, IWL_CFG_ANY, IWL_CFG_NO_CDB, - iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name) + iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_name), + +/* So with JF2 */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9560_name), + +/* So with JF */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_160_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9461_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1_DIV, + IWL_CFG_NO_160, IWL_CFG_CORES_BT, IWL_CFG_NO_CDB, + iwlax210_2ax_cfg_so_jf_b0, iwl9462_name) #endif /* CONFIG_IWLMVM */ }; -- cgit From 31428c78748cafdd9352e1f622eb89bf453d9700 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 26 Jul 2021 20:41:23 +0100 Subject: ASoC: component: Remove misplaced prefix handling in pin control functions When the component level pin control functions were added they for some no longer obvious reason handled adding prefixing of widget names. This meant that when the lack of prefix handling in the DAPM level pin operations was fixed by ae4fc532244b3bb4d (ASoC: dapm: use component prefix when checking widget names) the one device using the component level API ended up with the prefix being applied twice, causing all lookups to fail. Fix this by removing the redundant prefixing from the component code, which has the nice side effect of also making that code much simpler. Reported-by: Richard Fitzgerald Signed-off-by: Mark Brown Tested-by: Lucas Tanure Link: https://lore.kernel.org/r/20210726194123.54585-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/soc-component.c | 63 ++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 3a5e84e16a87..c8dfd0de30e4 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -148,86 +148,75 @@ int snd_soc_component_set_bias_level(struct snd_soc_component *component, return soc_component_ret(component, ret); } -static int soc_component_pin(struct snd_soc_component *component, - const char *pin, - int (*pin_func)(struct snd_soc_dapm_context *dapm, - const char *pin)) -{ - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); - char *full_name; - int ret; - - if (!component->name_prefix) { - ret = pin_func(dapm, pin); - goto end; - } - - full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin); - if (!full_name) { - ret = -ENOMEM; - goto end; - } - - ret = pin_func(dapm, full_name); - kfree(full_name); -end: - return soc_component_ret(component, ret); -} - int snd_soc_component_enable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_enable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_enable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin); int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_enable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_enable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin_unlocked); int snd_soc_component_disable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_disable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_disable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin); int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_disable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_disable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin_unlocked); int snd_soc_component_nc_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_nc_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_nc_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin); int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_nc_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_nc_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin_unlocked); int snd_soc_component_get_pin_status(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_get_pin_status); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_get_pin_status(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_get_pin_status); int snd_soc_component_force_enable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_force_enable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin); @@ -235,7 +224,9 @@ int snd_soc_component_force_enable_pin_unlocked( struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_force_enable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); -- cgit From acbf58e530416e167c3b323111f4013d9f2b0a7d Mon Sep 17 00:00:00 2001 From: Lucas Tanure Date: Wed, 28 Jul 2021 11:44:16 +0100 Subject: ASoC: wm_adsp: Let soc_cleanup_component_debugfs remove debugfs soc_cleanup_component_debugfs will debugfs_remove_recursive the component->debugfs_root, so adsp doesn't need to also remove the same entry. By doing that adsp also creates a race with core component, which causes a NULL pointer dereference Signed-off-by: Lucas Tanure Link: https://lore.kernel.org/r/20210728104416.636591-1-tanureal@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/wm_adsp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 549d98241dae..fe15cbc7bcaf 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -747,7 +747,6 @@ static void wm_adsp2_init_debugfs(struct wm_adsp *dsp, static void wm_adsp2_cleanup_debugfs(struct wm_adsp *dsp) { wm_adsp_debugfs_clear(dsp); - debugfs_remove_recursive(dsp->debugfs_root); } #else static inline void wm_adsp2_init_debugfs(struct wm_adsp *dsp, -- cgit From 830b69f6c059bc46451e7c4be8b796d483acb0bd Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 27 Jul 2021 17:49:48 +0100 Subject: MAINTAINERS: Add sound devicetree bindings for Wolfson Micro devices Include all wm* sound bindings in the section for Wolfson Micro drivers. This section already includes the actual driver source files. Also update the existing entry to match all wlf,* sound bindings. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20210727164948.4308-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 516a9fead7e3..225e6c885b66 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19744,7 +19744,8 @@ F: Documentation/devicetree/bindings/extcon/wlf,arizona.yaml F: Documentation/devicetree/bindings/mfd/wlf,arizona.yaml F: Documentation/devicetree/bindings/mfd/wm831x.txt F: Documentation/devicetree/bindings/regulator/wlf,arizona.yaml -F: Documentation/devicetree/bindings/sound/wlf,arizona.yaml +F: Documentation/devicetree/bindings/sound/wlf,*.yaml +F: Documentation/devicetree/bindings/sound/wm* F: Documentation/hwmon/wm83??.rst F: arch/arm/mach-s3c/mach-crag6410* F: drivers/clk/clk-wm83*.c -- cgit From 153cca9caa81ca8912a70528daca4b9a523c6898 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 29 Jul 2021 10:21:34 +0200 Subject: platform/x86: Add and use a dual_accel_detect() helper Various 360 degree hinges (yoga) style 2-in-1 devices use 2 accelerometers to allow the OS to determine the angle between the display and the base of the device. On Windows these are read by a special HingeAngleService process which calls undocumented ACPI methods, to let the firmware know if the 2-in-1 is in tablet- or laptop-mode. The firmware may use this to disable the kbd and touchpad to avoid spurious input in tablet-mode as well as to report SW_TABLET_MODE info to the OS. Since Linux does not call these undocumented methods, the SW_TABLET_MODE info reported by various pdx86 drivers is incorrect on these devices. Before this commit the intel-hid and thinkpad_acpi code already had 2 hardcoded checks for ACPI hardware-ids of dual-accel sensors to avoid reporting broken info. And now we also have a bug-report about the same problem in the intel-vbtn code. Since there are at least 3 different ACPI hardware-ids in play, add a new dual_accel_detect() helper which checks for all 3, rather then adding different hardware-ids to the drivers as bug-reports trickle in. Having shared code which checks all known hardware-ids is esp. important for the intel-hid and intel-vbtn drivers as these are generic drivers which are used on a lot of devices. The BOSC0200 hardware-id requires special handling, because often it is used for a single-accelerometer setup. Only in a few cases it refers to a dual-accel setup, in which case there will be 2 I2cSerialBus resources in the device's resource-list, so the helper checks for this. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209011 Reported-and-tested-by: Julius Lehmann Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210729082134.6683-1-hdegoede@redhat.com --- drivers/platform/x86/Kconfig | 3 ++ drivers/platform/x86/dual_accel_detect.h | 75 ++++++++++++++++++++++++++++++++ drivers/platform/x86/intel-hid.c | 21 +++------ drivers/platform/x86/intel-vbtn.c | 18 ++++++-- drivers/platform/x86/thinkpad_acpi.c | 3 +- 5 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 drivers/platform/x86/dual_accel_detect.h diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 7d385c3b2239..d12db6c316ea 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -508,6 +508,7 @@ config THINKPAD_ACPI depends on RFKILL || RFKILL = n depends on ACPI_VIDEO || ACPI_VIDEO = n depends on BACKLIGHT_CLASS_DEVICE + depends on I2C select ACPI_PLATFORM_PROFILE select HWMON select NVRAM @@ -691,6 +692,7 @@ config INTEL_HID_EVENT tristate "INTEL HID Event" depends on ACPI depends on INPUT + depends on I2C select INPUT_SPARSEKMAP help This driver provides support for the Intel HID Event hotkey interface. @@ -742,6 +744,7 @@ config INTEL_VBTN tristate "INTEL VIRTUAL BUTTON" depends on ACPI depends on INPUT + depends on I2C select INPUT_SPARSEKMAP help This driver provides support for the Intel Virtual Button interface. diff --git a/drivers/platform/x86/dual_accel_detect.h b/drivers/platform/x86/dual_accel_detect.h new file mode 100644 index 000000000000..1a069159da91 --- /dev/null +++ b/drivers/platform/x86/dual_accel_detect.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Helper code to detect 360 degree hinges (yoga) style 2-in-1 devices using 2 accelerometers + * to allow the OS to determine the angle between the display and the base of the device. + * + * On Windows these are read by a special HingeAngleService process which calls undocumented + * ACPI methods, to let the firmware know if the 2-in-1 is in tablet- or laptop-mode. + * The firmware may use this to disable the kbd and touchpad to avoid spurious input in + * tablet-mode as well as to report SW_TABLET_MODE info to the OS. + * + * Since Linux does not call these undocumented methods, the SW_TABLET_MODE info reported + * by various drivers/platform/x86 drivers is incorrect. These drivers use the detection + * code in this file to disable SW_TABLET_MODE reporting to avoid reporting broken info + * (instead userspace can derive the status itself by directly reading the 2 accels). + */ + +#include +#include + +static int dual_accel_i2c_resource_count(struct acpi_resource *ares, void *data) +{ + struct acpi_resource_i2c_serialbus *sb; + int *count = data; + + if (i2c_acpi_get_i2c_resource(ares, &sb)) + *count = *count + 1; + + return 1; +} + +static int dual_accel_i2c_client_count(struct acpi_device *adev) +{ + int ret, count = 0; + LIST_HEAD(r); + + ret = acpi_dev_get_resources(adev, &r, dual_accel_i2c_resource_count, &count); + if (ret < 0) + return ret; + + acpi_dev_free_resource_list(&r); + return count; +} + +static bool dual_accel_detect_bosc0200(void) +{ + struct acpi_device *adev; + int count; + + adev = acpi_dev_get_first_match_dev("BOSC0200", NULL, -1); + if (!adev) + return false; + + count = dual_accel_i2c_client_count(adev); + + acpi_dev_put(adev); + + return count == 2; +} + +static bool dual_accel_detect(void) +{ + /* Systems which use a pair of accels with KIOX010A / KIOX020A ACPI ids */ + if (acpi_dev_present("KIOX010A", NULL, -1)) + return true; + + /* Systems which use a single DUAL250E ACPI device to model 2 accels */ + if (acpi_dev_present("DUAL250E", NULL, -1)) + return true; + + /* Systems which use a single BOSC0200 ACPI device to model 2 accels */ + if (dual_accel_detect_bosc0200()) + return true; + + return false; +} diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index e5fbe017f8e1..2e4e97a626a5 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -14,6 +14,7 @@ #include #include #include +#include "dual_accel_detect.h" /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ #define TABLET_MODE_FLAG BIT(6) @@ -122,6 +123,7 @@ struct intel_hid_priv { struct input_dev *array; struct input_dev *switches; bool wakeup_mode; + bool dual_accel; }; #define HID_EVENT_FILTER_UUID "eeec56b3-4442-408f-a792-4edd4d758054" @@ -451,22 +453,9 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) * SW_TABLET_MODE report, in these cases we enable support when receiving * the first event instead of during driver setup. * - * Some 360 degree hinges (yoga) style 2-in-1 devices use 2 accelerometers - * to allow the OS to determine the angle between the display and the base - * of the device. On Windows these are read by a special HingeAngleService - * process which calls an ACPI DSM (Device Specific Method) on the - * ACPI KIOX010A device node for the sensor in the display, to let the - * firmware know if the 2-in-1 is in tablet- or laptop-mode so that it can - * disable the kbd and touchpad to avoid spurious input in tablet-mode. - * - * The linux kxcjk1013 driver calls the DSM for this once at probe time - * to ensure that the builtin kbd and touchpad work. On some devices this - * causes a "spurious" 0xcd event on the intel-hid ACPI dev. In this case - * there is not a functional tablet-mode switch, so we should not register - * the tablet-mode switch device. + * See dual_accel_detect.h for more info on the dual_accel check. */ - if (!priv->switches && (event == 0xcc || event == 0xcd) && - !acpi_dev_present("KIOX010A", NULL, -1)) { + if (!priv->switches && !priv->dual_accel && (event == 0xcc || event == 0xcd)) { dev_info(&device->dev, "switch event received, enable switches supports\n"); err = intel_hid_switches_setup(device); if (err) @@ -607,6 +596,8 @@ static int intel_hid_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + priv->dual_accel = dual_accel_detect(); + err = intel_hid_input_setup(device); if (err) { pr_err("Failed to setup Intel HID hotkeys\n"); diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 888a764efad1..309166431063 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -14,6 +14,7 @@ #include #include #include +#include "dual_accel_detect.h" /* Returned when NOT in tablet mode on some HP Stream x360 11 models */ #define VGBS_TABLET_MODE_FLAG_ALT 0x10 @@ -66,6 +67,7 @@ static const struct key_entry intel_vbtn_switchmap[] = { struct intel_vbtn_priv { struct input_dev *buttons_dev; struct input_dev *switches_dev; + bool dual_accel; bool has_buttons; bool has_switches; bool wakeup_mode; @@ -160,6 +162,10 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) input_dev = priv->buttons_dev; } else if ((ke = sparse_keymap_entry_from_scancode(priv->switches_dev, event))) { if (!priv->has_switches) { + /* See dual_accel_detect.h for more info */ + if (priv->dual_accel) + return; + dev_info(&device->dev, "Registering Intel Virtual Switches input-dev after receiving a switch event\n"); ret = input_register_device(priv->switches_dev); if (ret) @@ -248,11 +254,15 @@ static const struct dmi_system_id dmi_switches_allow_list[] = { {} /* Array terminator */ }; -static bool intel_vbtn_has_switches(acpi_handle handle) +static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel) { unsigned long long vgbs; acpi_status status; + /* See dual_accel_detect.h for more info */ + if (dual_accel) + return false; + if (!dmi_check_system(dmi_switches_allow_list)) return false; @@ -263,13 +273,14 @@ static bool intel_vbtn_has_switches(acpi_handle handle) static int intel_vbtn_probe(struct platform_device *device) { acpi_handle handle = ACPI_HANDLE(&device->dev); - bool has_buttons, has_switches; + bool dual_accel, has_buttons, has_switches; struct intel_vbtn_priv *priv; acpi_status status; int err; + dual_accel = dual_accel_detect(); has_buttons = acpi_has_method(handle, "VBDL"); - has_switches = intel_vbtn_has_switches(handle); + has_switches = intel_vbtn_has_switches(handle, dual_accel); if (!has_buttons && !has_switches) { dev_warn(&device->dev, "failed to read Intel Virtual Button driver\n"); @@ -281,6 +292,7 @@ static int intel_vbtn_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + priv->dual_accel = dual_accel; priv->has_buttons = has_buttons; priv->has_switches = has_switches; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 603156a6e3ed..50ff04c84650 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -73,6 +73,7 @@ #include #include #include +#include "dual_accel_detect.h" /* ThinkPad CMOS commands */ #define TP_CMOS_VOLUME_DOWN 0 @@ -3232,7 +3233,7 @@ static int hotkey_init_tablet_mode(void) * the laptop/tent/tablet mode to the EC. The bmc150 iio driver * does not support this, so skip the hotkey on these models. */ - if (has_tablet_mode && !acpi_dev_present("BOSC0200", "1", -1)) + if (has_tablet_mode && !dual_accel_detect()) tp_features.hotkey_tablet = TP_HOTKEY_TABLET_USES_GMMS; type = "GMMS"; } else if (acpi_evalf(hkey_handle, &res, "MHKG", "qd")) { -- cgit From facee1be7689f8cf573b9ffee6a5c28ee193615e Mon Sep 17 00:00:00 2001 From: David Brazdil Date: Wed, 28 Jul 2021 15:32:31 +0000 Subject: KVM: arm64: Fix off-by-one in range_is_memory Hyp checks whether an address range only covers RAM by checking the start/endpoints against a list of memblock_region structs. However, the endpoint here is exclusive but internally is treated as inclusive. Fix the off-by-one error that caused valid address ranges to be rejected. Cc: Quentin Perret Fixes: 90134ac9cabb6 ("KVM: arm64: Protect the .hyp sections from the host") Signed-off-by: David Brazdil Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210728153232.1018911-2-dbrazdil@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index d938ce95d3bd..a6ce991b1467 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -193,7 +193,7 @@ static bool range_is_memory(u64 start, u64 end) { struct kvm_mem_range r1, r2; - if (!find_mem_range(start, &r1) || !find_mem_range(end, &r2)) + if (!find_mem_range(start, &r1) || !find_mem_range(end - 1, &r2)) return false; if (r1.start != r2.start) return false; -- cgit From c4d7c51845af9542d42cd18a25c570583abf2768 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Thu, 29 Jul 2021 17:00:36 +0100 Subject: KVM: arm64: Fix race when enabling KVM_ARM_CAP_MTE When enabling KVM_CAP_ARM_MTE the ioctl checks that there are no VCPUs created to ensure that the capability is enabled before the VM is running. However no locks are held at that point so it is (theoretically) possible for another thread in the VMM to create VCPUs between the check and actually setting mte_enabled. Close the race by taking kvm->lock. Reported-by: Alexandru Elisei Fixes: 673638f434ee ("KVM: arm64: Expose KVM_ARM_CAP_MTE") Signed-off-by: Steven Price Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210729160036.20433-1-steven.price@arm.com --- arch/arm64/kvm/arm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index e9a2b8f27792..0ca72f5cda41 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -94,10 +94,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, kvm->arch.return_nisv_io_abort_to_user = true; break; case KVM_CAP_ARM_MTE: - if (!system_supports_mte() || kvm->created_vcpus) - return -EINVAL; - r = 0; - kvm->arch.mte_enabled = true; + mutex_lock(&kvm->lock); + if (!system_supports_mte() || kvm->created_vcpus) { + r = -EINVAL; + } else { + r = 0; + kvm->arch.mte_enabled = true; + } + mutex_unlock(&kvm->lock); break; default: r = -EINVAL; -- cgit From ee86f680ff4c9b406d49d4e22ddf10805b8a2137 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 29 Jul 2021 18:09:27 +0100 Subject: ASoC: cs42l42: Correct definition of ADC Volume control The ADC volume is a signed 8-bit number with range -97 to +12, with -97 being mute. Use a SOC_SINGLE_S8_TLV() to define this and fix the DECLARE_TLV_DB_SCALE() to have the correct start and mute flag. Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20210729170929.6589-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index eff013f295be..2fd20511f246 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -405,7 +405,7 @@ static const struct regmap_config cs42l42_regmap = { .use_single_write = true, }; -static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false); +static DECLARE_TLV_DB_SCALE(adc_tlv, -9700, 100, true); static DECLARE_TLV_DB_SCALE(mixer_tlv, -6300, 100, true); static const char * const cs42l42_hpf_freq_text[] = { @@ -444,8 +444,7 @@ static const struct snd_kcontrol_new cs42l42_snd_controls[] = { CS42L42_ADC_INV_SHIFT, true, false), SOC_SINGLE("ADC Boost Switch", CS42L42_ADC_CTL, CS42L42_ADC_DIG_BOOST_SHIFT, true, false), - SOC_SINGLE_SX_TLV("ADC Volume", CS42L42_ADC_VOLUME, - CS42L42_ADC_VOL_SHIFT, 0xA0, 0x6C, adc_tlv), + SOC_SINGLE_S8_TLV("ADC Volume", CS42L42_ADC_VOLUME, -97, 12, adc_tlv), SOC_SINGLE("ADC WNF Switch", CS42L42_ADC_WNF_HPF_CTL, CS42L42_ADC_WNF_EN_SHIFT, true, false), SOC_SINGLE("ADC HPF Switch", CS42L42_ADC_WNF_HPF_CTL, -- cgit From 64324bac750b84ca54711fb7d332132fcdb87293 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 29 Jul 2021 18:09:28 +0100 Subject: ASoC: cs42l42: Don't allow SND_SOC_DAIFMT_LEFT_J The driver has no support for left-justified protocol so it should not have been allowing this to be passed to cs42l42_set_dai_fmt(). Signed-off-by: Richard Fitzgerald Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Link: https://lore.kernel.org/r/20210729170929.6589-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 2fd20511f246..38e243a815b1 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -788,7 +788,6 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: - case SND_SOC_DAIFMT_LEFT_J: break; default: return -EINVAL; -- cgit From 926ef1a4c245c093acc07807e466ad2ef0ff6ccb Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 29 Jul 2021 18:09:29 +0100 Subject: ASoC: cs42l42: Fix bclk calculation for mono An I2S frame always has a left and right channel slot even if mono data is being sent. So if channels==1 the actual bitclock frequency is 2 * snd_soc_params_to_bclk(params). Signed-off-by: Richard Fitzgerald Fixes: 2cdba9b045c7 ("ASoC: cs42l42: Use bclk from hw_params if set_sysclk was not called") Link: https://lore.kernel.org/r/20210729170929.6589-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 38e243a815b1..08ca05bfbeb3 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -830,6 +830,10 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, cs42l42->srate = params_rate(params); cs42l42->bclk = snd_soc_params_to_bclk(params); + /* I2S frame always has 2 channels even for mono audio */ + if (channels == 1) + cs42l42->bclk *= 2; + switch(substream->stream) { case SNDRV_PCM_STREAM_CAPTURE: if (channels == 2) { -- cgit From 2e6b836312a477d647a7920b56810a5a25f6c856 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Jul 2021 13:23:50 +0200 Subject: ASoC: intel: atom: Fix reference to PCM buffer address PCM buffers might be allocated dynamically when the buffer preallocation failed or a larger buffer is requested, and it's not guaranteed that substream->dma_buffer points to the actually used buffer. The address should be retrieved from runtime->dma_addr, instead of substream->dma_buffer (and shouldn't use virt_to_phys). Also, remove the line overriding runtime->dma_area superfluously, which was already set up at the PCM buffer allocation. Cc: Cezary Rojewski Cc: Pierre-Louis Bossart Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20210728112353.6675-3-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 4124aa2fc247..5db2f4865bbb 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -127,7 +127,7 @@ static void sst_fill_alloc_params(struct snd_pcm_substream *substream, snd_pcm_uframes_t period_size; ssize_t periodbytes; ssize_t buffer_bytes = snd_pcm_lib_buffer_bytes(substream); - u32 buffer_addr = virt_to_phys(substream->dma_buffer.area); + u32 buffer_addr = substream->runtime->dma_addr; channels = substream->runtime->channels; period_size = substream->runtime->period_size; @@ -233,7 +233,6 @@ static int sst_platform_alloc_stream(struct snd_pcm_substream *substream, /* set codec params and inform SST driver the same */ sst_fill_pcm_params(substream, ¶m); sst_fill_alloc_params(substream, &alloc_params); - substream->runtime->dma_area = substream->dma_buffer.area; str_params.sparams = param; str_params.aparams = alloc_params; str_params.codec = SST_CODEC_TYPE_PCM; -- cgit From 42bc62c9f1d3d4880bdc27acb5ab4784209bb0b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Jul 2021 13:23:51 +0200 Subject: ASoC: xilinx: Fix reference to PCM buffer address PCM buffers might be allocated dynamically when the buffer preallocation failed or a larger buffer is requested, and it's not guaranteed that substream->dma_buffer points to the actually used buffer. The driver needs to refer to substream->runtime->dma_addr instead for the buffer address. Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20210728112353.6675-4-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/xilinx/xlnx_formatter_pcm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index 1d59fb668c77..91afea9d5de6 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -452,8 +452,8 @@ static int xlnx_formatter_pcm_hw_params(struct snd_soc_component *component, stream_data->buffer_size = size; - low = lower_32_bits(substream->dma_buffer.addr); - high = upper_32_bits(substream->dma_buffer.addr); + low = lower_32_bits(runtime->dma_addr); + high = upper_32_bits(runtime->dma_addr); writel(low, stream_data->mmio + XLNX_AUD_BUFF_ADDR_LSB); writel(high, stream_data->mmio + XLNX_AUD_BUFF_ADDR_MSB); -- cgit From 827f3164aaa579eee6fd50c6654861d54f282a11 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Jul 2021 13:23:52 +0200 Subject: ASoC: uniphier: Fix reference to PCM buffer address Along with the transition to the managed PCM buffers, the driver now accepts the dynamically allocated buffer, while it still kept the reference to the old preallocated buffer address. This patch corrects to the right reference via runtime->dma_addr. (Although this might have been already buggy before the cleanup with the managed buffer, let's put Fixes tag to point that; it's a corner case, after all.) Fixes: d55894bc2763 ("ASoC: uniphier: Use managed buffer allocation") Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20210728112353.6675-5-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/uniphier/aio-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/uniphier/aio-dma.c b/sound/soc/uniphier/aio-dma.c index 3c1628a3a1ac..3d9736e7381f 100644 --- a/sound/soc/uniphier/aio-dma.c +++ b/sound/soc/uniphier/aio-dma.c @@ -198,7 +198,7 @@ static int uniphier_aiodma_mmap(struct snd_soc_component *component, vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return remap_pfn_range(vma, vma->vm_start, - substream->dma_buffer.addr >> PAGE_SHIFT, + substream->runtime->dma_addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } -- cgit From bb6a40fc5a830cae45ddd5cd6cfa151b008522ed Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 28 Jul 2021 13:23:53 +0200 Subject: ASoC: kirkwood: Fix reference to PCM buffer address The transition to the managed PCM buffers allowed the dynamically buffer allocation, while the driver code still assumes the fixed preallocation buffer and sets up the DMA stuff at the open call. This needs to be moved to hw_params after the buffer allocation and setup. Also, the reference to the buffer address has to be corrected to runtime->dma_addr. Fixes: b3c0ae75f5d3 ("ASoC: kirkwood: Use managed DMA buffer allocation") Cc: Lars-Peter Clausen Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20210728112353.6675-6-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/kirkwood/kirkwood-dma.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/sound/soc/kirkwood/kirkwood-dma.c b/sound/soc/kirkwood/kirkwood-dma.c index c2a5933bfcfc..700a18561a94 100644 --- a/sound/soc/kirkwood/kirkwood-dma.c +++ b/sound/soc/kirkwood/kirkwood-dma.c @@ -104,8 +104,6 @@ static int kirkwood_dma_open(struct snd_soc_component *component, int err; struct snd_pcm_runtime *runtime = substream->runtime; struct kirkwood_dma_data *priv = kirkwood_priv(substream); - const struct mbus_dram_target_info *dram; - unsigned long addr; snd_soc_set_runtime_hwparams(substream, &kirkwood_dma_snd_hw); @@ -142,20 +140,14 @@ static int kirkwood_dma_open(struct snd_soc_component *component, writel((unsigned int)-1, priv->io + KIRKWOOD_ERR_MASK); } - dram = mv_mbus_dram_info(); - addr = substream->dma_buffer.addr; if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { if (priv->substream_play) return -EBUSY; priv->substream_play = substream; - kirkwood_dma_conf_mbus_windows(priv->io, - KIRKWOOD_PLAYBACK_WIN, addr, dram); } else { if (priv->substream_rec) return -EBUSY; priv->substream_rec = substream; - kirkwood_dma_conf_mbus_windows(priv->io, - KIRKWOOD_RECORD_WIN, addr, dram); } return 0; @@ -182,6 +174,23 @@ static int kirkwood_dma_close(struct snd_soc_component *component, return 0; } +static int kirkwood_dma_hw_params(struct snd_soc_component *component, + struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct kirkwood_dma_data *priv = kirkwood_priv(substream); + const struct mbus_dram_target_info *dram = mv_mbus_dram_info(); + unsigned long addr = substream->runtime->dma_addr; + + if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) + kirkwood_dma_conf_mbus_windows(priv->io, + KIRKWOOD_PLAYBACK_WIN, addr, dram); + else + kirkwood_dma_conf_mbus_windows(priv->io, + KIRKWOOD_RECORD_WIN, addr, dram); + return 0; +} + static int kirkwood_dma_prepare(struct snd_soc_component *component, struct snd_pcm_substream *substream) { @@ -246,6 +255,7 @@ const struct snd_soc_component_driver kirkwood_soc_component = { .name = DRV_NAME, .open = kirkwood_dma_open, .close = kirkwood_dma_close, + .hw_params = kirkwood_dma_hw_params, .prepare = kirkwood_dma_prepare, .pointer = kirkwood_dma_pointer, .pcm_construct = kirkwood_dma_new, -- cgit From 1d25684e22516f1cff77176d288b1da02fff57bb Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 29 Jul 2021 00:47:29 +0100 Subject: ASoC: nau8824: Fix open coded prefix handling As with the component layer code the nau8824 driver had been doing some open coded pin manipulation which will have been broken now the core is fixed to handle this properly, remove the open coding to avoid the issue. Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210728234729.10135-1-broonie@kernel.org Signed-off-by: Mark Brown --- sound/soc/codecs/nau8824.c | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 15bd8335f667..db88be48c998 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -828,36 +828,6 @@ static void nau8824_int_status_clear_all(struct regmap *regmap) } } -static void nau8824_dapm_disable_pin(struct nau8824 *nau8824, const char *pin) -{ - struct snd_soc_dapm_context *dapm = nau8824->dapm; - const char *prefix = dapm->component->name_prefix; - char prefixed_pin[80]; - - if (prefix) { - snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", - prefix, pin); - snd_soc_dapm_disable_pin(dapm, prefixed_pin); - } else { - snd_soc_dapm_disable_pin(dapm, pin); - } -} - -static void nau8824_dapm_enable_pin(struct nau8824 *nau8824, const char *pin) -{ - struct snd_soc_dapm_context *dapm = nau8824->dapm; - const char *prefix = dapm->component->name_prefix; - char prefixed_pin[80]; - - if (prefix) { - snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", - prefix, pin); - snd_soc_dapm_force_enable_pin(dapm, prefixed_pin); - } else { - snd_soc_dapm_force_enable_pin(dapm, pin); - } -} - static void nau8824_eject_jack(struct nau8824 *nau8824) { struct snd_soc_dapm_context *dapm = nau8824->dapm; @@ -866,8 +836,8 @@ static void nau8824_eject_jack(struct nau8824 *nau8824) /* Clear all interruption status */ nau8824_int_status_clear_all(regmap); - nau8824_dapm_disable_pin(nau8824, "SAR"); - nau8824_dapm_disable_pin(nau8824, "MICBIAS"); + snd_soc_dapm_disable_pin(dapm, "SAR"); + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); snd_soc_dapm_sync(dapm); /* Enable the insertion interruption, disable the ejection @@ -897,8 +867,8 @@ static void nau8824_jdet_work(struct work_struct *work) struct regmap *regmap = nau8824->regmap; int adc_value, event = 0, event_mask = 0; - nau8824_dapm_enable_pin(nau8824, "MICBIAS"); - nau8824_dapm_enable_pin(nau8824, "SAR"); + snd_soc_dapm_enable_pin(dapm, "MICBIAS"); + snd_soc_dapm_enable_pin(dapm, "SAR"); snd_soc_dapm_sync(dapm); msleep(100); @@ -909,8 +879,8 @@ static void nau8824_jdet_work(struct work_struct *work) if (adc_value < HEADSET_SARADC_THD) { event |= SND_JACK_HEADPHONE; - nau8824_dapm_disable_pin(nau8824, "SAR"); - nau8824_dapm_disable_pin(nau8824, "MICBIAS"); + snd_soc_dapm_disable_pin(dapm, "SAR"); + snd_soc_dapm_disable_pin(dapm, "MICBIAS"); snd_soc_dapm_sync(dapm); } else { event |= SND_JACK_HEADSET; -- cgit From 567c39047dbee341244fe3bf79fea24ee0897ff9 Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Mon, 5 Jul 2021 08:09:21 +0300 Subject: selftests/sgx: Fix Q1 and Q2 calculation in sigstruct.c Q1 and Q2 are numbers with *maximum* length of 384 bytes. If the calculated length of Q1 and Q2 is less than 384 bytes, things will go wrong. E.g. if Q2 is 383 bytes, then 1. The bytes of q2 are copied to sigstruct->q2 in calc_q1q2(). 2. The entire sigstruct->q2 is reversed, which results it being 256 * Q2, given that the last byte of sigstruct->q2 is added to before the bytes given by calc_q1q2(). Either change in key or measurement can trigger the bug. E.g. an unmeasured heap could cause a devastating change in Q1 or Q2. Reverse exactly the bytes of Q1 and Q2 in calc_q1q2() before returning to the caller. Fixes: 2adcba79e69d ("selftests/x86: Add a selftest for SGX") Link: https://lore.kernel.org/linux-sgx/20210301051836.30738-1-tianjia.zhang@linux.alibaba.com/ Signed-off-by: Tianjia Zhang Signed-off-by: Jarkko Sakkinen Signed-off-by: Shuah Khan --- tools/testing/selftests/sgx/sigstruct.c | 41 +++++++++++++++++---------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c index dee7a3d6c5a5..92bbc5a15c39 100644 --- a/tools/testing/selftests/sgx/sigstruct.c +++ b/tools/testing/selftests/sgx/sigstruct.c @@ -55,10 +55,27 @@ static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m, return true; } +static void reverse_bytes(void *data, int length) +{ + int i = 0; + int j = length - 1; + uint8_t temp; + uint8_t *ptr = data; + + while (i < j) { + temp = ptr[i]; + ptr[i] = ptr[j]; + ptr[j] = temp; + i++; + j--; + } +} + static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, uint8_t *q2) { struct q1q2_ctx ctx; + int len; if (!alloc_q1q2_ctx(s, m, &ctx)) { fprintf(stderr, "Not enough memory for Q1Q2 calculation\n"); @@ -89,8 +106,10 @@ static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1, goto out; } - BN_bn2bin(ctx.q1, q1); - BN_bn2bin(ctx.q2, q2); + len = BN_bn2bin(ctx.q1, q1); + reverse_bytes(q1, len); + len = BN_bn2bin(ctx.q2, q2); + reverse_bytes(q2, len); free_q1q2_ctx(&ctx); return true; @@ -152,22 +171,6 @@ static RSA *gen_sign_key(void) return key; } -static void reverse_bytes(void *data, int length) -{ - int i = 0; - int j = length - 1; - uint8_t temp; - uint8_t *ptr = data; - - while (i < j) { - temp = ptr[i]; - ptr[i] = ptr[j]; - ptr[j] = temp; - i++; - j--; - } -} - enum mrtags { MRECREATE = 0x0045544145524345, MREADD = 0x0000000044444145, @@ -367,8 +370,6 @@ bool encl_measure(struct encl *encl) /* BE -> LE */ reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE); reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE); - reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE); - reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE); EVP_MD_CTX_destroy(ctx); RSA_free(key); -- cgit From 5afc1540f13804a31bb704b763308e17688369c5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 30 Jul 2021 08:16:51 +0100 Subject: iio: adc: Fix incorrect exit of for-loop Currently the for-loop that scans for the optimial adc_period iterates through all the possible adc_period levels because the exit logic in the loop is inverted. I believe the comparison should be swapped and the continue replaced with a break to exit the loop at the correct point. Addresses-Coverity: ("Continue has no effect") Fixes: e08e19c331fb ("iio:adc: add iio driver for Palmas (twl6035/7) gpadc") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210730071651.17394-1-colin.king@canonical.com Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/palmas_gpadc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/palmas_gpadc.c b/drivers/iio/adc/palmas_gpadc.c index 6ef09609be9f..f9c8385c72d3 100644 --- a/drivers/iio/adc/palmas_gpadc.c +++ b/drivers/iio/adc/palmas_gpadc.c @@ -664,8 +664,8 @@ static int palmas_adc_wakeup_configure(struct palmas_gpadc *adc) adc_period = adc->auto_conversion_period; for (i = 0; i < 16; ++i) { - if (((1000 * (1 << i)) / 32) < adc_period) - continue; + if (((1000 * (1 << i)) / 32) >= adc_period) + break; } if (i > 0) i--; -- cgit From 8b5d95313b6d30f642e4ed0125891984c446604e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 31 Jul 2021 10:43:31 +0200 Subject: ASoC: amd: Fix reference to PCM buffer address PCM buffers might be allocated dynamically when the buffer preallocation failed or a larger buffer is requested, and it's not guaranteed that substream->dma_buffer points to the actually used buffer. The driver needs to refer to substream->runtime->dma_addr instead for the buffer address. Cc: Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/r/20210731084331.32225-1-tiwai@suse.de Signed-off-by: Mark Brown --- sound/soc/amd/acp-pcm-dma.c | 2 +- sound/soc/amd/raven/acp3x-pcm-dma.c | 2 +- sound/soc/amd/renoir/acp3x-pdm-dma.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index 143155a840ac..cc1ce6f22caa 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -969,7 +969,7 @@ static int acp_dma_hw_params(struct snd_soc_component *component, acp_set_sram_bank_state(rtd->acp_mmio, 0, true); /* Save for runtime private data */ - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = runtime->dma_addr; rtd->order = get_order(size); /* Fill the page table entries in ACP SRAM */ diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 8148b0d22e88..597d7c4b2a6b 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -286,7 +286,7 @@ static int acp3x_dma_hw_params(struct snd_soc_component *component, pr_err("pinfo failed\n"); } size = params_buffer_bytes(params); - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = substream->runtime->dma_addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp3x_dma(rtd, substream->stream); return 0; diff --git a/sound/soc/amd/renoir/acp3x-pdm-dma.c b/sound/soc/amd/renoir/acp3x-pdm-dma.c index bd20622b0933..0391c28dd078 100644 --- a/sound/soc/amd/renoir/acp3x-pdm-dma.c +++ b/sound/soc/amd/renoir/acp3x-pdm-dma.c @@ -242,7 +242,7 @@ static int acp_pdm_dma_hw_params(struct snd_soc_component *component, return -EINVAL; size = params_buffer_bytes(params); period_bytes = params_period_bytes(params); - rtd->dma_addr = substream->dma_buffer.addr; + rtd->dma_addr = substream->runtime->dma_addr; rtd->num_pages = (PAGE_ALIGN(size) >> PAGE_SHIFT); config_acp_dma(rtd, substream->stream); init_pdm_ring_buffer(MEM_WINDOW_START, size, period_bytes, -- cgit From 5aa95d8834e07907e64937d792c12ffef7fb271f Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Sat, 31 Jul 2021 09:47:37 +0200 Subject: iommu: Check if group is NULL before remove device If probe_device is failing, iommu_group is not initialized because iommu_group_add_device is not reached, so freeing it will result in NULL pointer access. iommu_bus_init ->bus_iommu_probe ->probe_iommu_group in for each:/* return -22 in fail case */ ->iommu_probe_device ->__iommu_probe_device /* return -22 here.*/ -> ops->probe_device /* return -22 here.*/ -> iommu_group_get_for_dev -> ops->device_group -> iommu_group_add_device //good case ->remove_iommu_group //in fail case, it will remove group ->iommu_release_device ->iommu_group_remove_device // here we don't have group In my case ops->probe_device (mtk_iommu_probe_device from mtk_iommu_v1.c) is due to failing fwspec->ops mismatch. Fixes: d72e31c93746 ("iommu: IOMMU Groups") Signed-off-by: Frank Wunderlich Link: https://lore.kernel.org/r/20210731074737.4573-1-linux@fw-web.de Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5419c4b9f27a..63f0af10c403 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -924,6 +924,9 @@ void iommu_group_remove_device(struct device *dev) struct iommu_group *group = dev->iommu_group; struct group_device *tmp_device, *device = NULL; + if (!group) + return; + dev_info(dev, "Removing from iommu group %d\n", group->id); /* Pre-notify listeners that a device is being removed. */ -- cgit From 6b994c554ebc4c065427f510db333081cbd7228d Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 2 Aug 2021 10:16:28 -0500 Subject: ASoC: SOF: Intel: Kconfig: fix SoundWire dependencies The previous Kconfig cleanup added simplifications but also introduced a new one by moving a boolean to a tristate. This leads to randconfig problems. This patch moves the select operations in the SOUNDWIRE_LINK_BASELINE option. The INTEL_SOUNDWIRE config remains a tristate for backwards compatibility with older configurations but is essentially an on/off switch. Fixes: cf5807f5f814f ('ASoC: SOF: Intel: SoundWire: simplify Kconfig') Reported-by: Arnd Bergmann Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Reviewed-by: Bard Liao Tested-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210802151628.15291-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig index 4bce89b5ea40..4447f515e8b1 100644 --- a/sound/soc/sof/intel/Kconfig +++ b/sound/soc/sof/intel/Kconfig @@ -278,6 +278,8 @@ config SND_SOC_SOF_HDA config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE tristate + select SOUNDWIRE_INTEL if SND_SOC_SOF_INTEL_SOUNDWIRE + select SND_INTEL_SOUNDWIRE_ACPI if SND_SOC_SOF_INTEL_SOUNDWIRE config SND_SOC_SOF_INTEL_SOUNDWIRE tristate "SOF support for SoundWire" @@ -285,8 +287,6 @@ config SND_SOC_SOF_INTEL_SOUNDWIRE depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE depends on ACPI && SOUNDWIRE depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y) - select SOUNDWIRE_INTEL - select SND_INTEL_SOUNDWIRE_ACPI help This adds support for SoundWire with Sound Open Firmware for Intel(R) platforms. -- cgit From 973b393fdf073a4ebd8d82ef6edea99fedc74af9 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Mon, 2 Aug 2021 10:17:49 -0500 Subject: ASoC: SOF: Intel: hda-ipc: fix reply size checking Checking that two values don't have common bits makes no sense, strict equality is meant. Fixes: f3b433e4699f ("ASoC: SOF: Implement Probe IPC API") Reviewed-by: Ranjani Sridharan Signed-off-by: Guennadi Liakhovetski Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210802151749.15417-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- sound/soc/sof/intel/hda-ipc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/sof/intel/hda-ipc.c b/sound/soc/sof/intel/hda-ipc.c index c91aa951df22..acfeca42604c 100644 --- a/sound/soc/sof/intel/hda-ipc.c +++ b/sound/soc/sof/intel/hda-ipc.c @@ -107,8 +107,8 @@ void hda_dsp_ipc_get_reply(struct snd_sof_dev *sdev) } else { /* reply correct size ? */ if (reply.hdr.size != msg->reply_size && - /* getter payload is never known upfront */ - !(reply.hdr.cmd & SOF_IPC_GLB_PROBE)) { + /* getter payload is never known upfront */ + ((reply.hdr.cmd & SOF_GLB_TYPE_MASK) != SOF_IPC_GLB_PROBE)) { dev_err(sdev->dev, "error: reply expected %zu got %u bytes\n", msg->reply_size, reply.hdr.size); ret = -EINVAL; -- cgit From 5b94046efb4706b3429c9c8e7377bd8d1621d588 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 26 Jul 2021 11:38:41 +0200 Subject: efi/libstub: arm64: Force Image reallocation if BSS was not reserved Distro versions of GRUB replace the usual LoadImage/StartImage calls used to load the kernel image with some local code that fails to honor the allocation requirements described in the PE/COFF header, as it does not account for the image's BSS section at all: it fails to allocate space for it, and fails to zero initialize it. Since the EFI stub itself is allocated in the .init segment, which is in the middle of the image, its BSS section is not impacted by this, and the main consequence of this omission is that the BSS section may overlap with memory regions that are already used by the firmware. So let's warn about this condition, and force image reallocation to occur in this case, which works around the problem. Fixes: 82046702e288 ("efi/libstub/arm64: Replace 'preferred' offset with alignment check") Signed-off-by: Ard Biesheuvel Tested-by: Benjamin Herrenschmidt --- drivers/firmware/efi/libstub/arm64-stub.c | 49 ++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 7bf0a7acae5e..3698c1ce2940 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -34,6 +34,51 @@ efi_status_t check_platform_features(void) return EFI_SUCCESS; } +/* + * Distro versions of GRUB may ignore the BSS allocation entirely (i.e., fail + * to provide space, and fail to zero it). Check for this condition by double + * checking that the first and the last byte of the image are covered by the + * same EFI memory map entry. + */ +static bool check_image_region(u64 base, u64 size) +{ + unsigned long map_size, desc_size, buff_size; + efi_memory_desc_t *memory_map; + struct efi_boot_memmap map; + efi_status_t status; + bool ret = false; + int map_offset; + + map.map = &memory_map; + map.map_size = &map_size; + map.desc_size = &desc_size; + map.desc_ver = NULL; + map.key_ptr = NULL; + map.buff_size = &buff_size; + + status = efi_get_memory_map(&map); + if (status != EFI_SUCCESS) + return false; + + for (map_offset = 0; map_offset < map_size; map_offset += desc_size) { + efi_memory_desc_t *md = (void *)memory_map + map_offset; + u64 end = md->phys_addr + md->num_pages * EFI_PAGE_SIZE; + + /* + * Find the region that covers base, and return whether + * it covers base+size bytes. + */ + if (base >= md->phys_addr && base < end) { + ret = (base + size) <= end; + break; + } + } + + efi_bs_call(free_pool, memory_map); + + return ret; +} + /* * Although relocatable kernels can fix up the misalignment with respect to * MIN_KIMG_ALIGN, the resulting virtual text addresses are subtly out of @@ -92,7 +137,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } if (status != EFI_SUCCESS) { - if (IS_ALIGNED((u64)_text, min_kimg_align())) { + if (!check_image_region((u64)_text, kernel_memsize)) { + efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); + } else if (IS_ALIGNED((u64)_text, min_kimg_align())) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. -- cgit From 3a262423755b83a5f85009ace415d6e7f572dfe8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 22 Jul 2021 12:10:31 +0200 Subject: efi/libstub: arm64: Relax 2M alignment again for relocatable kernels Commit 82046702e288 ("efi/libstub/arm64: Replace 'preferred' offset with alignment check") simplified the way the stub moves the kernel image around in memory before booting it, given that a relocatable image does not need to be copied to a 2M aligned offset if it was loaded on a 64k boundary by EFI. Commit d32de9130f6c ("efi/arm64: libstub: Deal gracefully with EFI_RNG_PROTOCOL failure") inadvertently defeated this logic by overriding the value of efi_nokaslr if EFI_RNG_PROTOCOL is not available, which was mistaken by the loader logic as an explicit request on the part of the user to disable KASLR and any associated relocation of an Image not loaded on a 2M boundary. So let's reinstate this functionality, by capturing the value of efi_nokaslr at function entry to choose the minimum alignment. Fixes: d32de9130f6c ("efi/arm64: libstub: Deal gracefully with EFI_RNG_PROTOCOL failure") Signed-off-by: Ard Biesheuvel Tested-by: Benjamin Herrenschmidt --- drivers/firmware/efi/libstub/arm64-stub.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 3698c1ce2940..6f214c9c303e 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -79,18 +79,6 @@ static bool check_image_region(u64 base, u64 size) return ret; } -/* - * Although relocatable kernels can fix up the misalignment with respect to - * MIN_KIMG_ALIGN, the resulting virtual text addresses are subtly out of - * sync with those recorded in the vmlinux when kaslr is disabled but the - * image required relocation anyway. Therefore retain 2M alignment unless - * KASLR is in use. - */ -static u64 min_kimg_align(void) -{ - return efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; -} - efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr, @@ -101,6 +89,16 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, unsigned long kernel_size, kernel_memsize = 0; u32 phys_seed = 0; + /* + * Although relocatable kernels can fix up the misalignment with + * respect to MIN_KIMG_ALIGN, the resulting virtual text addresses are + * subtly out of sync with those recorded in the vmlinux when kaslr is + * disabled but the image required relocation anyway. Therefore retain + * 2M alignment if KASLR was explicitly disabled, even if it was not + * going to be activated to begin with. + */ + u64 min_kimg_align = efi_nokaslr ? MIN_KIMG_ALIGN : EFI_KIMG_ALIGN; + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { if (!efi_nokaslr) { status = efi_get_random_bytes(sizeof(phys_seed), @@ -130,7 +128,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, * If KASLR is enabled, and we have some randomness available, * locate the kernel at a randomized offset in physical memory. */ - status = efi_random_alloc(*reserve_size, min_kimg_align(), + status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed); } else { status = EFI_OUT_OF_RESOURCES; @@ -139,7 +137,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (status != EFI_SUCCESS) { if (!check_image_region((u64)_text, kernel_memsize)) { efi_err("FIRMWARE BUG: Image BSS overlaps adjacent EFI memory region\n"); - } else if (IS_ALIGNED((u64)_text, min_kimg_align())) { + } else if (IS_ALIGNED((u64)_text, min_kimg_align)) { /* * Just execute from wherever we were loaded by the * UEFI PE/COFF loader if the alignment is suitable. @@ -150,7 +148,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, } status = efi_allocate_pages_aligned(*reserve_size, reserve_addr, - ULONG_MAX, min_kimg_align()); + ULONG_MAX, min_kimg_align); if (status != EFI_SUCCESS) { efi_err("Failed to relocate kernel\n"); -- cgit From ff80ef5bf5bd59e5eab82d1d846acc613ebbf6c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 26 Jul 2021 16:24:01 +0200 Subject: efi/libstub: arm64: Warn when efi_random_alloc() fails Randomization of the physical load address of the kernel image relies on efi_random_alloc() returning successfully, and currently, we ignore any failures and just carry on, using the ordinary, non-randomized page allocator routine. This means we never find out if a failure occurs, which could harm security, so let's at least warn about this condition. Signed-off-by: Ard Biesheuvel Tested-by: Benjamin Herrenschmidt --- drivers/firmware/efi/libstub/arm64-stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 6f214c9c303e..010564f8bbc4 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -130,6 +130,8 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, */ status = efi_random_alloc(*reserve_size, min_kimg_align, reserve_addr, phys_seed); + if (status != EFI_SUCCESS) + efi_warn("efi_random_alloc() failed: 0x%lx\n", status); } else { status = EFI_OUT_OF_RESOURCES; } -- cgit From c32ac11da3f83bb42b986702a9b92f0a14ed4182 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 26 Jul 2021 16:31:44 +0200 Subject: efi/libstub: arm64: Double check image alignment at entry On arm64, the stub only moves the kernel image around in memory if needed, which is typically only for KASLR, given that relocatable kernels (which is the default) can run from any 64k aligned address, which is also the minimum alignment communicated to EFI via the PE/COFF header. Unfortunately, some loaders appear to ignore this header, and load the kernel at some arbitrary offset in memory. We can deal with this, but let's check for this condition anyway, so non-compliant code can be spotted and fixed. Cc: # v5.10+ Signed-off-by: Ard Biesheuvel Tested-by: Benjamin Herrenschmidt --- drivers/firmware/efi/libstub/arm64-stub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 010564f8bbc4..2363fee9211c 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -119,6 +119,10 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (image->image_base != _text) efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); + if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN)) + efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n", + EFI_KIMG_ALIGN >> 10); + kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); *reserve_size = kernel_memsize; -- cgit From 30615bd21b4cc3c3bb5ae8bd70e2a915cc5f75c7 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 3 Aug 2021 17:08:33 +0100 Subject: ASoC: cs42l42: Fix inversion of ADC Notch Switch control The underlying register field has inverted sense (0 = enabled) so the control definition must be marked as inverted. Signed-off-by: Richard Fitzgerald Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Link: https://lore.kernel.org/r/20210803160834.9005-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 08ca05bfbeb3..2db87e3a4dc3 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -437,7 +437,7 @@ static SOC_ENUM_SINGLE_DECL(cs42l42_wnf05_freq_enum, CS42L42_ADC_WNF_HPF_CTL, static const struct snd_kcontrol_new cs42l42_snd_controls[] = { /* ADC Volume and Filter Controls */ SOC_SINGLE("ADC Notch Switch", CS42L42_ADC_CTL, - CS42L42_ADC_NOTCH_DIS_SHIFT, true, false), + CS42L42_ADC_NOTCH_DIS_SHIFT, true, true), SOC_SINGLE("ADC Weak Force Switch", CS42L42_ADC_CTL, CS42L42_ADC_FORCE_WEAK_VCM_SHIFT, true, false), SOC_SINGLE("ADC Invert Switch", CS42L42_ADC_CTL, -- cgit From 8b353bbeae20e2214c9d9d88bcb2fda4ba145d83 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 3 Aug 2021 17:08:34 +0100 Subject: ASoC: cs42l42: Remove duplicate control for WNF filter frequency The driver was defining two ALSA controls that both change the same register field for the wind noise filter corner frequency. The filter response has two corners, at different frequencies, and the duplicate controls most likely were an attempt to be able to set the value using either of the frequencies. However, having two controls changing the same field can be problematic and it is unnecessary. Both frequencies are related to each other so setting one implies exactly what the other would be. Removing a control affects user-side code, but there is currently no known use of the removed control so it would be best to remove it now before it becomes a problem. Signed-off-by: Richard Fitzgerald Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Link: https://lore.kernel.org/r/20210803160834.9005-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 2db87e3a4dc3..a00dc3c65549 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -425,15 +425,6 @@ static SOC_ENUM_SINGLE_DECL(cs42l42_wnf3_freq_enum, CS42L42_ADC_WNF_HPF_CTL, CS42L42_ADC_WNF_CF_SHIFT, cs42l42_wnf3_freq_text); -static const char * const cs42l42_wnf05_freq_text[] = { - "280Hz", "315Hz", "350Hz", "385Hz", - "420Hz", "455Hz", "490Hz", "525Hz" -}; - -static SOC_ENUM_SINGLE_DECL(cs42l42_wnf05_freq_enum, CS42L42_ADC_WNF_HPF_CTL, - CS42L42_ADC_WNF_CF_SHIFT, - cs42l42_wnf05_freq_text); - static const struct snd_kcontrol_new cs42l42_snd_controls[] = { /* ADC Volume and Filter Controls */ SOC_SINGLE("ADC Notch Switch", CS42L42_ADC_CTL, @@ -451,7 +442,6 @@ static const struct snd_kcontrol_new cs42l42_snd_controls[] = { CS42L42_ADC_HPF_EN_SHIFT, true, false), SOC_ENUM("HPF Corner Freq", cs42l42_hpf_freq_enum), SOC_ENUM("WNF 3dB Freq", cs42l42_wnf3_freq_enum), - SOC_ENUM("WNF 05dB Freq", cs42l42_wnf05_freq_enum), /* DAC Volume and Filter Controls */ SOC_SINGLE("DACA Invert Switch", CS42L42_DAC_CTL1, -- cgit From d4067395519b40d4ee9b7c26347233e4ae59f900 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Sat, 26 Jun 2021 13:58:28 +0800 Subject: arc: Prefer unsigned int to bare use of unsigned Fix checkpatch warnings: WARNING: Prefer 'unsigned int' to bare use of 'unsigned' Signed-off-by: Jinchao Wang Signed-off-by: Vineet Gupta --- arch/arc/include/asm/checksum.h | 2 +- arch/arc/include/asm/perf_event.h | 2 +- arch/arc/kernel/unwind.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/checksum.h b/arch/arc/include/asm/checksum.h index 69debd77cd04..0b485800a392 100644 --- a/arch/arc/include/asm/checksum.h +++ b/arch/arc/include/asm/checksum.h @@ -24,7 +24,7 @@ */ static inline __sum16 csum_fold(__wsum s) { - unsigned r = s << 16 | s >> 16; /* ror */ + unsigned int r = s << 16 | s >> 16; /* ror */ s = ~s; s -= r; return s >> 16; diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 30b9ae511ea9..e1971d34ef30 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -123,7 +123,7 @@ static const char * const arc_pmu_ev_hw_map[] = { #define C(_x) PERF_COUNT_HW_CACHE_##_x #define CACHE_OP_UNSUPPORTED 0xffff -static const unsigned arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +static const unsigned int arc_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { [C(OP_READ)] = { [C(RESULT_ACCESS)] = PERF_COUNT_ARC_LDC, diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c index 47bab67f8649..9e28058cdba8 100644 --- a/arch/arc/kernel/unwind.c +++ b/arch/arc/kernel/unwind.c @@ -260,7 +260,7 @@ static void init_unwind_hdr(struct unwind_table *table, { const u8 *ptr; unsigned long tableSize = table->size, hdrSize; - unsigned n; + unsigned int n; const u32 *fde; struct { u8 version; @@ -462,7 +462,7 @@ static uleb128_t get_uleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; uleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -483,7 +483,7 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end) { const u8 *cur = *pcur; sleb128_t value; - unsigned shift; + unsigned int shift; for (shift = 0, value = 0; cur < end; shift += 7) { if (shift + 7 > 8 * sizeof(value) @@ -609,7 +609,7 @@ static unsigned long read_pointer(const u8 **pLoc, const void *end, static signed fde_pointer_type(const u32 *cie) { const u8 *ptr = (const u8 *)(cie + 2); - unsigned version = *ptr; + unsigned int version = *ptr; if (*++ptr) { const char *aug; @@ -904,7 +904,7 @@ int arc_unwind(struct unwind_frame_info *frame) const u8 *ptr = NULL, *end = NULL; unsigned long pc = UNW_PC(frame) - frame->call_frame; unsigned long startLoc = 0, endLoc = 0, cfa; - unsigned i; + unsigned int i; signed ptrType = -1; uleb128_t retAddrReg = 0; const struct unwind_table *table; -- cgit From 81e82fa58098b13af206c8ba4edb690bf5b7d5d5 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 4 Jul 2021 10:28:24 +0100 Subject: arc: Fix spelling mistake and grammar in Kconfig There is a spelling mistake and incorrect grammar in the Kconfig text. Fix them. Signed-off-by: Colin Ian King Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d8f51eb8963b..b5bf68e74732 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -409,7 +409,7 @@ choice help Depending on the configuration, CPU can contain DSP registers (ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_CTRL, DSP_FFT_CTRL). - Bellow is options describing how to handle these registers in + Below are options describing how to handle these registers in interrupt entry / exit and in context switch. config ARC_DSP_NONE -- cgit From bf79167fd86f3b97390fe2e70231d383526bd9cc Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Jul 2021 07:50:33 -0700 Subject: ARC: Fix CONFIG_STACKDEPOT Enabling CONFIG_STACKDEPOT results in the following build error. arc-elf-ld: lib/stackdepot.o: in function `filter_irq_stacks': stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' Other architectures address this problem by adding IRQENTRY_TEXT and SOFTIRQENTRY_TEXT to the text segment, so do the same here. Signed-off-by: Guenter Roeck Signed-off-by: Vineet Gupta --- arch/arc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index e2146a8da195..529ae50f9fe2 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } -- cgit From 3a715e80400f452b247caa55344f4f60250ffbcf Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 8 Jul 2021 17:13:48 -0700 Subject: ARC: fp: set FPU_STATUS.FWE to enable FPU_STATUS update on context switch FPU_STATUS register contains FP exception flags bits which are updated by core as side-effect of FP instructions but can also be manually wiggled such as by glibc C99 functions fe{raise,clear,test}except() etc. To effect the update, the programming model requires OR'ing FWE bit (31). This bit is write-only and RAZ, meaning it is effectively auto-cleared after write and thus needs to be set everytime: which is how glibc implements this. However there's another usecase of FPU_STATUS update, at the time of Linux task switch when incoming task value needs to be programmed into the register. This was added as part of f45ba2bd6da0dc ("ARCv2: fpu: preserve userspace fpu state") which missed OR'ing FWE bit, meaning the new value is effectively not being written at all. This patch remedies that. Interestingly, this snafu was not caught in interm glibc testing as the race window which relies on a specific exception bit to be set/clear is really small specially when it nvolves context switch. Fortunately this was caught by glibc's math/test-fenv-tls test which repeatedly set/clear exception flags in a big loop, concurrently in main program and also in a thread. Fixes: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/54 Fixes: f45ba2bd6da0dc ("ARCv2: fpu: preserve userspace fpu state") Cc: stable@vger.kernel.org #5.6+ Signed-off-by: Vineet Gupta --- arch/arc/kernel/fpu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/fpu.c b/arch/arc/kernel/fpu.c index c67c0f0f5f77..ec640219d989 100644 --- a/arch/arc/kernel/fpu.c +++ b/arch/arc/kernel/fpu.c @@ -57,23 +57,26 @@ void fpu_save_restore(struct task_struct *prev, struct task_struct *next) void fpu_init_task(struct pt_regs *regs) { + const unsigned int fwe = 0x80000000; + /* default rounding mode */ write_aux_reg(ARC_REG_FPU_CTRL, 0x100); - /* set "Write enable" to allow explicit write to exception flags */ - write_aux_reg(ARC_REG_FPU_STATUS, 0x80000000); + /* Initialize to zero: setting requires FWE be set */ + write_aux_reg(ARC_REG_FPU_STATUS, fwe); } void fpu_save_restore(struct task_struct *prev, struct task_struct *next) { struct arc_fpu *save = &prev->thread.fpu; struct arc_fpu *restore = &next->thread.fpu; + const unsigned int fwe = 0x80000000; save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL); save->status = read_aux_reg(ARC_REG_FPU_STATUS); write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl); - write_aux_reg(ARC_REG_FPU_STATUS, restore->status); + write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status)); } #endif -- cgit From 335ffab3ef864539e814b9a2903b0ae420c1c067 Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Mon, 26 Jul 2021 10:30:56 +0200 Subject: opp: remove WARN when no valid OPPs remain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This WARN can be triggered per-core and the stack trace is not useful. Replace it with plain dev_err(). Fix a comment while at it. Signed-off-by: Michał Mirosław Signed-off-by: Viresh Kumar --- drivers/opp/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index d298e38aaf7e..67f2e0710e79 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -964,8 +964,9 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) } } - /* There should be one of more OPP defined */ - if (WARN_ON(!count)) { + /* There should be one or more OPPs defined */ + if (!count) { + dev_err(dev, "%s: no supported OPPs", __func__); ret = -ENOENT; goto remove_static_opp; } -- cgit From f7d635883fb73414c7c4e2648b42adc296c5d40d Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Tue, 3 Aug 2021 10:07:44 +0100 Subject: cpufreq: arm_scmi: Fix error path when allocation failed Stop the initialization when cpumask allocation failed and return an error. Fixes: 80a064dbd556 ("scmi-cpufreq: Get opp_shared_cpus from opp-v2 for EM") Signed-off-by: Lukasz Luba Reviewed-by: Sudeep Holla Signed-off-by: Viresh Kumar --- drivers/cpufreq/scmi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index ec9a87ca2dbb..75f818d04b48 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -134,7 +134,7 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) } if (!zalloc_cpumask_var(&opp_shared_cpus, GFP_KERNEL)) - ret = -ENOMEM; + return -ENOMEM; /* Obtain CPUs that share SCMI performance controls */ ret = scmi_get_sharing_cpus(cpu_dev, policy->cpus); -- cgit From 5f7b51bf09baca8e4f80cbe879536842bafb5f31 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 28 Jul 2021 17:01:15 +0200 Subject: netfilter: ipset: Limit the maximal range of consecutive elements to add/delete The range size of consecutive elements were not limited. Thus one could define a huge range which may result soft lockup errors due to the long execution time. Now the range size is limited to 2^20 entries. Reported-by: Brad Spengler Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 3 +++ net/netfilter/ipset/ip_set_hash_ip.c | 9 ++++++++- net/netfilter/ipset/ip_set_hash_ipmark.c | 10 +++++++++- net/netfilter/ipset/ip_set_hash_ipport.c | 3 +++ net/netfilter/ipset/ip_set_hash_ipportip.c | 3 +++ net/netfilter/ipset/ip_set_hash_ipportnet.c | 3 +++ net/netfilter/ipset/ip_set_hash_net.c | 11 ++++++++++- net/netfilter/ipset/ip_set_hash_netiface.c | 10 +++++++++- net/netfilter/ipset/ip_set_hash_netnet.c | 16 +++++++++++++++- net/netfilter/ipset/ip_set_hash_netport.c | 11 ++++++++++- net/netfilter/ipset/ip_set_hash_netportnet.c | 16 +++++++++++++++- 11 files changed, 88 insertions(+), 7 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 10279c4830ac..ada1296c87d5 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -196,6 +196,9 @@ struct ip_set_region { u32 elements; /* Number of elements vs timeout */ }; +/* Max range where every element is added/deleted in one step */ +#define IPSET_MAX_RANGE (1<<20) + /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index d1bef23fd4f5..dd30c03d5a23 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -132,8 +132,11 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -144,6 +147,10 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + /* 64bit division is not allowed on 32bit */ + if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) { ip = ntohl(h->next.ip); e.ip = htonl(ip); diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 18346d18aa16..153de3457423 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -121,6 +121,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], e.mark = ntohl(nla_get_be32(tb[IPSET_ATTR_MARK])); e.mark &= h->markmask; + if (e.mark == 0 && e.ip == 0) + return -IPSET_ERR_HASH_ELEM; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR])) { @@ -133,8 +135,11 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; - if (ip > ip_to) + if (ip > ip_to) { + if (e.mark == 0 && ip_to == 0) + return -IPSET_ERR_HASH_ELEM; swap(ip, ip_to); + } } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); @@ -143,6 +148,9 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } + if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index e1ca11196515..7303138e46be 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -173,6 +173,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index ab179e064597..334fb1ad0e86 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -180,6 +180,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 8f075b44cf64..7df94f437f60 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -253,6 +253,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } + if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; + ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index c1a11f041ac6..1422739d9aa2 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -140,7 +140,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,6 +188,15 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; + if (retried) ip = ntohl(h->next.ip); do { diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index ddd51c2e1cb3..9810f5bf63f5 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0; + u32 ip = 0, ip_to = 0, ipn, n = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,6 +256,14 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); + n++; + } while (ipn++ < ip_to); + + if (n > IPSET_MAX_RANGE) + return -ERANGE; if (retried) ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 6532f0505e66..3d09eefe998a 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -168,7 +168,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; + u64 n = 0, m = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -244,6 +245,19 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index ec1564a1cb5a..09cf72eb37f8 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -158,7 +158,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; + u64 n = 0; bool with_ports = false; u8 cidr; int ret; @@ -235,6 +236,14 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); + n++; + } while (ipn++ < ip_to); + + if (n*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip); diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 0e91d1e82f1c..19bcdb3141f6 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -182,7 +182,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, ipn; + u64 n = 0, m = 0; bool with_ports = false; int ret; @@ -284,6 +285,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } + ipn = ip; + do { + ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); + n++; + } while (ipn++ < ip_to); + ipn = ip2_from; + do { + ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); + m++; + } while (ipn++ < ip2_to); + + if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) + return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); -- cgit From 38ea9def5b62f9193f6bad96c5d108e2830ecbde Mon Sep 17 00:00:00 2001 From: Yajun Deng Date: Thu, 29 Jul 2021 16:20:21 +0800 Subject: netfilter: nf_conntrack_bridge: Fix memory leak when error It should be added kfree_skb_list() when err is not equal to zero in nf_br_ip_fragment(). v2: keep this aligned with IPv6. v3: modify iter.frag_list to iter.frag. Fixes: 3c171f496ef5 ("netfilter: bridge: add connection tracking system") Signed-off-by: Yajun Deng Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/nf_conntrack_bridge.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 8d033a75a766..fdbed3158555 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -88,6 +88,12 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, skb = ip_fraglist_next(&iter); } + + if (!err) + return 0; + + kfree_skb_list(iter.frag); + return err; } slow_path: -- cgit From 25f8203b4be1937c4939bb98623e67dcfd7da4d1 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Wed, 30 Jun 2021 12:22:32 +0200 Subject: mmc: dw_mmc: Fix hang on data CRC error When a Data CRC interrupt is received, the driver disables the DMA, then sends the stop/abort command and then waits for Data Transfer Over. However, sometimes, when a data CRC error is received in the middle of a multi-block write transfer, the Data Transfer Over interrupt is never received, and the driver hangs and never completes the request. The driver sets the BMOD.SWR bit (SDMMC_IDMAC_SWRESET) when stopping the DMA, but according to the manual CMD.STOP_ABORT_CMD should be programmed "before assertion of SWR". Do these operations in the recommended order. With this change the Data Transfer Over is always received correctly in my tests. Signed-off-by: Vincent Whitchurch Reviewed-by: Jaehoon Chung Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210630102232.16011-1-vincent.whitchurch@axis.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index d333130d1531..c3229d8c7041 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2018,8 +2018,8 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t) continue; } - dw_mci_stop_dma(host); send_stop_abort(host, data); + dw_mci_stop_dma(host); state = STATE_SENDING_STOP; break; } @@ -2043,10 +2043,10 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t) */ if (test_and_clear_bit(EVENT_DATA_ERROR, &host->pending_events)) { - dw_mci_stop_dma(host); if (!(host->data_status & (SDMMC_INT_DRTO | SDMMC_INT_EBE))) send_stop_abort(host, data); + dw_mci_stop_dma(host); state = STATE_DATA_ERROR; break; } @@ -2079,10 +2079,10 @@ static void dw_mci_tasklet_func(struct tasklet_struct *t) */ if (test_and_clear_bit(EVENT_DATA_ERROR, &host->pending_events)) { - dw_mci_stop_dma(host); if (!(host->data_status & (SDMMC_INT_DRTO | SDMMC_INT_EBE))) send_stop_abort(host, data); + dw_mci_stop_dma(host); state = STATE_DATA_ERROR; break; } -- cgit From d8e193f13b07e6c0ffaa1a999386f1989f2b4c5e Mon Sep 17 00:00:00 2001 From: Christophe Kerello Date: Thu, 1 Jul 2021 16:33:53 +0200 Subject: mmc: mmci: stm32: Check when the voltage switch procedure should be done If the card has not been power cycled, it may still be using 1.8V signaling. This situation is detected in mmc_sd_init_card function and should be handled in mmci stm32 variant. The host->pwr_reg variable is also correctly protected with spin locks. Fixes: 94b94a93e355 ("mmc: mmci_sdmmc: Implement signal voltage callbacks") Signed-off-by: Christophe Kerello Signed-off-by: Yann Gautier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210701143353.13188-1-yann.gautier@foss.st.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmci_stm32_sdmmc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index 51db30acf4dc..fdaa11f92fe6 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -479,8 +479,9 @@ static int sdmmc_post_sig_volt_switch(struct mmci_host *host, u32 status; int ret = 0; - if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) { - spin_lock_irqsave(&host->lock, flags); + spin_lock_irqsave(&host->lock, flags); + if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180 && + host->pwr_reg & MCI_STM32_VSWITCHEN) { mmci_write_pwrreg(host, host->pwr_reg | MCI_STM32_VSWITCH); spin_unlock_irqrestore(&host->lock, flags); @@ -492,9 +493,11 @@ static int sdmmc_post_sig_volt_switch(struct mmci_host *host, writel_relaxed(MCI_STM32_VSWENDC | MCI_STM32_CKSTOPC, host->base + MMCICLEAR); + spin_lock_irqsave(&host->lock, flags); mmci_write_pwrreg(host, host->pwr_reg & ~(MCI_STM32_VSWITCHEN | MCI_STM32_VSWITCH)); } + spin_unlock_irqrestore(&host->lock, flags); return ret; } -- cgit From 67b13f3e221ed81b46a657e2b499bf8b20162476 Mon Sep 17 00:00:00 2001 From: Shaik Sajida Bhanu Date: Fri, 16 Jul 2021 17:16:14 +0530 Subject: mmc: sdhci-msm: Update the software timeout value for sdhc Whenever SDHC run at clock rate 50MHZ or below, the hardware data timeout value will be 21.47secs, which is approx. 22secs and we have a current software timeout value as 10secs. We have to set software timeout value more than the hardware data timeout value to avioid seeing the below register dumps. [ 332.953670] mmc2: Timeout waiting for hardware interrupt. [ 332.959608] mmc2: sdhci: ============ SDHCI REGISTER DUMP =========== [ 332.966450] mmc2: sdhci: Sys addr: 0x00000000 | Version: 0x00007202 [ 332.973256] mmc2: sdhci: Blk size: 0x00000200 | Blk cnt: 0x00000001 [ 332.980054] mmc2: sdhci: Argument: 0x00000000 | Trn mode: 0x00000027 [ 332.986864] mmc2: sdhci: Present: 0x01f801f6 | Host ctl: 0x0000001f [ 332.993671] mmc2: sdhci: Power: 0x00000001 | Blk gap: 0x00000000 [ 333.000583] mmc2: sdhci: Wake-up: 0x00000000 | Clock: 0x00000007 [ 333.007386] mmc2: sdhci: Timeout: 0x0000000e | Int stat: 0x00000000 [ 333.014182] mmc2: sdhci: Int enab: 0x03ff100b | Sig enab: 0x03ff100b [ 333.020976] mmc2: sdhci: ACmd stat: 0x00000000 | Slot int: 0x00000000 [ 333.027771] mmc2: sdhci: Caps: 0x322dc8b2 | Caps_1: 0x0000808f [ 333.034561] mmc2: sdhci: Cmd: 0x0000183a | Max curr: 0x00000000 [ 333.041359] mmc2: sdhci: Resp[0]: 0x00000900 | Resp[1]: 0x00000000 [ 333.048157] mmc2: sdhci: Resp[2]: 0x00000000 | Resp[3]: 0x00000000 [ 333.054945] mmc2: sdhci: Host ctl2: 0x00000000 [ 333.059657] mmc2: sdhci: ADMA Err: 0x00000000 | ADMA Ptr: 0x0000000ffffff218 [ 333.067178] mmc2: sdhci_msm: ----------- VENDOR REGISTER DUMP ----------- [ 333.074343] mmc2: sdhci_msm: DLL sts: 0x00000000 | DLL cfg: 0x6000642c | DLL cfg2: 0x0020a000 [ 333.083417] mmc2: sdhci_msm: DLL cfg3: 0x00000000 | DLL usr ctl: 0x00000000 | DDR cfg: 0x80040873 [ 333.092850] mmc2: sdhci_msm: Vndr func: 0x00008a9c | Vndr func2 : 0xf88218a8 Vndr func3: 0x02626040 [ 333.102371] mmc2: sdhci: ============================================ So, set software timeout value more than hardware timeout value. Signed-off-by: Shaik Sajida Bhanu Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1626435974-14462-1-git-send-email-sbhanu@codeaurora.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index e44b7a66b73c..290a14cdc1cf 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -2089,6 +2089,23 @@ static void sdhci_msm_cqe_disable(struct mmc_host *mmc, bool recovery) sdhci_cqe_disable(mmc, recovery); } +static void sdhci_msm_set_timeout(struct sdhci_host *host, struct mmc_command *cmd) +{ + u32 count, start = 15; + + __sdhci_set_timeout(host, cmd); + count = sdhci_readb(host, SDHCI_TIMEOUT_CONTROL); + /* + * Update software timeout value if its value is less than hardware data + * timeout value. Qcom SoC hardware data timeout value was calculated + * using 4 * MCLK * 2^(count + 13). where MCLK = 1 / host->clock. + */ + if (cmd && cmd->data && host->clock > 400000 && + host->clock <= 50000000 && + ((1 << (count + start)) > (10 * host->clock))) + host->data_timeout = 22LL * NSEC_PER_SEC; +} + static const struct cqhci_host_ops sdhci_msm_cqhci_ops = { .enable = sdhci_msm_cqe_enable, .disable = sdhci_msm_cqe_disable, @@ -2438,6 +2455,7 @@ static const struct sdhci_ops sdhci_msm_ops = { .irq = sdhci_msm_cqe_irq, .dump_vendor_regs = sdhci_msm_dump_vendor_regs, .set_power = sdhci_set_power_noreg, + .set_timeout = sdhci_msm_set_timeout, }; static const struct sdhci_pltfm_data sdhci_msm_pdata = { -- cgit From 2f658f7a3953f6d70bab90e117aff8d0ad44e200 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Aug 2021 14:21:41 +0300 Subject: pinctrl: tigerlake: Fix GPIO mapping for newer version of software The software mapping for GPIO, which initially comes from Microsoft, is subject to change by respective Windows and firmware developers. Due to the above the driver had been written and published way ahead of the schedule, and thus the numbering schema used in it is outdated. Fix the numbering schema in accordance with the real products on market. Fixes: 653d96455e1e ("pinctrl: tigerlake: Add support for Tiger Lake-H") Reported-and-tested-by: Kai-Heng Feng Reported-by: Riccardo Mori Reported-and-tested-by: Lovesh BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213463 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213579 BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213857 Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg --- drivers/pinctrl/intel/pinctrl-tigerlake.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c index 3e4ef2b87526..0bcd19597e4a 100644 --- a/drivers/pinctrl/intel/pinctrl-tigerlake.c +++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c @@ -701,32 +701,32 @@ static const struct pinctrl_pin_desc tglh_pins[] = { static const struct intel_padgroup tglh_community0_gpps[] = { TGL_GPP(0, 0, 24, 0), /* GPP_A */ - TGL_GPP(1, 25, 44, 128), /* GPP_R */ - TGL_GPP(2, 45, 70, 32), /* GPP_B */ - TGL_GPP(3, 71, 78, INTEL_GPIO_BASE_NOMAP), /* vGPIO_0 */ + TGL_GPP(1, 25, 44, 32), /* GPP_R */ + TGL_GPP(2, 45, 70, 64), /* GPP_B */ + TGL_GPP(3, 71, 78, 96), /* vGPIO_0 */ }; static const struct intel_padgroup tglh_community1_gpps[] = { - TGL_GPP(0, 79, 104, 96), /* GPP_D */ - TGL_GPP(1, 105, 128, 64), /* GPP_C */ - TGL_GPP(2, 129, 136, 160), /* GPP_S */ - TGL_GPP(3, 137, 153, 192), /* GPP_G */ - TGL_GPP(4, 154, 180, 224), /* vGPIO */ + TGL_GPP(0, 79, 104, 128), /* GPP_D */ + TGL_GPP(1, 105, 128, 160), /* GPP_C */ + TGL_GPP(2, 129, 136, 192), /* GPP_S */ + TGL_GPP(3, 137, 153, 224), /* GPP_G */ + TGL_GPP(4, 154, 180, 256), /* vGPIO */ }; static const struct intel_padgroup tglh_community3_gpps[] = { - TGL_GPP(0, 181, 193, 256), /* GPP_E */ - TGL_GPP(1, 194, 217, 288), /* GPP_F */ + TGL_GPP(0, 181, 193, 288), /* GPP_E */ + TGL_GPP(1, 194, 217, 320), /* GPP_F */ }; static const struct intel_padgroup tglh_community4_gpps[] = { - TGL_GPP(0, 218, 241, 320), /* GPP_H */ + TGL_GPP(0, 218, 241, 352), /* GPP_H */ TGL_GPP(1, 242, 251, 384), /* GPP_J */ - TGL_GPP(2, 252, 266, 352), /* GPP_K */ + TGL_GPP(2, 252, 266, 416), /* GPP_K */ }; static const struct intel_padgroup tglh_community5_gpps[] = { - TGL_GPP(0, 267, 281, 416), /* GPP_I */ + TGL_GPP(0, 267, 281, 448), /* GPP_I */ TGL_GPP(1, 282, 290, INTEL_GPIO_BASE_NOMAP), /* JTAG */ }; -- cgit From bf2ba432213fade50dd39f2e348085b758c0726e Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Tue, 6 Jul 2021 14:52:41 +0100 Subject: ceph: reduce contention in ceph_check_delayed_caps() Function ceph_check_delayed_caps() is called from the mdsc->delayed_work workqueue and it can be kept looping for quite some time if caps keep being added back to the mdsc->cap_delay_list. This may result in the watchdog tainting the kernel with the softlockup flag. This patch breaks this loop if the caps have been recently (i.e. during the loop execution). Any new caps added to the list will be handled in the next run. Also, allow schedule_delayed() callers to explicitly set the delay value instead of defaulting to 5s, so we can ensure that it runs soon afterward if it looks like there is more work. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/46284 Signed-off-by: Luis Henriques Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/caps.c | 17 ++++++++++++++++- fs/ceph/mds_client.c | 25 ++++++++++++++++--------- fs/ceph/super.h | 2 +- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7bdefd0c789a..2a2900903f8c 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4150,11 +4150,19 @@ bad: /* * Delayed work handler to process end of delayed cap release LRU list. + * + * If new caps are added to the list while processing it, these won't get + * processed in this run. In this case, the ci->i_hold_caps_max will be + * returned so that the work can be scheduled accordingly. */ -void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) +unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { struct inode *inode; struct ceph_inode_info *ci; + struct ceph_mount_options *opt = mdsc->fsc->mount_options; + unsigned long delay_max = opt->caps_wanted_delay_max * HZ; + unsigned long loop_start = jiffies; + unsigned long delay = 0; dout("check_delayed_caps\n"); spin_lock(&mdsc->cap_delay_lock); @@ -4162,6 +4170,11 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) ci = list_first_entry(&mdsc->cap_delay_list, struct ceph_inode_info, i_cap_delay_list); + if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) { + dout("%s caps added recently. Exiting loop", __func__); + delay = ci->i_hold_caps_max; + break; + } if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 && time_before(jiffies, ci->i_hold_caps_max)) break; @@ -4177,6 +4190,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) } } spin_unlock(&mdsc->cap_delay_lock); + + return delay; } /* diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9db1b39df773..afdc20213876 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4490,22 +4490,29 @@ void inc_session_sequence(struct ceph_mds_session *s) } /* - * delayed work -- periodically trim expired leases, renew caps with mds + * delayed work -- periodically trim expired leases, renew caps with mds. If + * the @delay parameter is set to 0 or if it's more than 5 secs, the default + * workqueue delay value of 5 secs will be used. */ -static void schedule_delayed(struct ceph_mds_client *mdsc) +static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay) { - int delay = 5; - unsigned hz = round_jiffies_relative(HZ * delay); - schedule_delayed_work(&mdsc->delayed_work, hz); + unsigned long max_delay = HZ * 5; + + /* 5 secs default delay */ + if (!delay || (delay > max_delay)) + delay = max_delay; + schedule_delayed_work(&mdsc->delayed_work, + round_jiffies_relative(delay)); } static void delayed_work(struct work_struct *work) { - int i; struct ceph_mds_client *mdsc = container_of(work, struct ceph_mds_client, delayed_work.work); + unsigned long delay; int renew_interval; int renew_caps; + int i; dout("mdsc delayed_work\n"); @@ -4545,7 +4552,7 @@ static void delayed_work(struct work_struct *work) } mutex_unlock(&mdsc->mutex); - ceph_check_delayed_caps(mdsc); + delay = ceph_check_delayed_caps(mdsc); ceph_queue_cap_reclaim_work(mdsc); @@ -4553,7 +4560,7 @@ static void delayed_work(struct work_struct *work) maybe_recover_session(mdsc); - schedule_delayed(mdsc); + schedule_delayed(mdsc, delay); } int ceph_mdsc_init(struct ceph_fs_client *fsc) @@ -5030,7 +5037,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) mdsc->mdsmap->m_epoch); mutex_unlock(&mdsc->mutex); - schedule_delayed(mdsc); + schedule_delayed(mdsc, 0); return; bad_unlock: diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6b6332a5c113..9215a2f4535c 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1167,7 +1167,7 @@ extern void ceph_flush_snaps(struct ceph_inode_info *ci, extern bool __ceph_should_report_size(struct ceph_inode_info *ci); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); -extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); +extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc); extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); extern int ceph_drop_caps_for_unlink(struct inode *inode); extern int ceph_encode_inode_release(void **p, struct inode *inode, -- cgit From 8434ffe71c874b9c4e184b88d25de98c2bf5fe3f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 3 Aug 2021 12:47:34 -0400 Subject: ceph: take snap_empty_lock atomically with snaprealm refcount change There is a race in ceph_put_snap_realm. The change to the nref and the spinlock acquisition are not done atomically, so you could decrement nref, and before you take the spinlock, the nref is incremented again. At that point, you end up putting it on the empty list when it shouldn't be there. Eventually __cleanup_empty_realms runs and frees it when it's still in-use. Fix this by protecting the 1->0 transition with atomic_dec_and_lock, and just drop the spinlock if we can get the rwsem. Because these objects can also undergo a 0->1 refcount transition, we must protect that change as well with the spinlock. Increment locklessly unless the value is at 0, in which case we take the spinlock, increment and then take it off the empty list if it did the 0->1 transition. With these changes, I'm removing the dout() messages from these functions, as well as in __put_snap_realm. They've always been racy, and it's better to not print values that may be misleading. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/46419 Reported-by: Mark Nelson Signed-off-by: Jeff Layton Reviewed-by: Luis Henriques Signed-off-by: Ilya Dryomov --- fs/ceph/snap.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4ac0606dcbd4..4c6bd1042c94 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -67,19 +67,19 @@ void ceph_get_snap_realm(struct ceph_mds_client *mdsc, { lockdep_assert_held(&mdsc->snap_rwsem); - dout("get_realm %p %d -> %d\n", realm, - atomic_read(&realm->nref), atomic_read(&realm->nref)+1); /* - * since we _only_ increment realm refs or empty the empty - * list with snap_rwsem held, adjusting the empty list here is - * safe. we do need to protect against concurrent empty list - * additions, however. + * The 0->1 and 1->0 transitions must take the snap_empty_lock + * atomically with the refcount change. Go ahead and bump the + * nref here, unless it's 0, in which case we take the spinlock + * and then do the increment and remove it from the list. */ - if (atomic_inc_return(&realm->nref) == 1) { - spin_lock(&mdsc->snap_empty_lock); + if (atomic_inc_not_zero(&realm->nref)) + return; + + spin_lock(&mdsc->snap_empty_lock); + if (atomic_inc_return(&realm->nref) == 1) list_del_init(&realm->empty_item); - spin_unlock(&mdsc->snap_empty_lock); - } + spin_unlock(&mdsc->snap_empty_lock); } static void __insert_snap_realm(struct rb_root *root, @@ -208,28 +208,28 @@ static void __put_snap_realm(struct ceph_mds_client *mdsc, { lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("__put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); + /* + * We do not require the snap_empty_lock here, as any caller that + * increments the value must hold the snap_rwsem. + */ if (atomic_dec_and_test(&realm->nref)) __destroy_snap_realm(mdsc, realm); } /* - * caller needn't hold any locks + * See comments in ceph_get_snap_realm. Caller needn't hold any locks. */ void ceph_put_snap_realm(struct ceph_mds_client *mdsc, struct ceph_snap_realm *realm) { - dout("put_snap_realm %llx %p %d -> %d\n", realm->ino, realm, - atomic_read(&realm->nref), atomic_read(&realm->nref)-1); - if (!atomic_dec_and_test(&realm->nref)) + if (!atomic_dec_and_lock(&realm->nref, &mdsc->snap_empty_lock)) return; if (down_write_trylock(&mdsc->snap_rwsem)) { + spin_unlock(&mdsc->snap_empty_lock); __destroy_snap_realm(mdsc, realm); up_write(&mdsc->snap_rwsem); } else { - spin_lock(&mdsc->snap_empty_lock); list_add(&realm->empty_item, &mdsc->snap_empty); spin_unlock(&mdsc->snap_empty_lock); } -- cgit From 5d79e5ce5489b489cbc4c327305be9dfca0fc9ce Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Wed, 4 Aug 2021 16:34:20 -0400 Subject: cpufreq: blocklist Qualcomm sm8150 in cpufreq-dt-platdev The Qualcomm sm8150 platform uses the qcom-cpufreq-hw driver, so add it to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Thara Gopinath Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 9d5a38a91f10..231e585f6ba2 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -141,6 +141,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sm8150", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, -- cgit From 0e5ded926f2a0f8b57dfa7f0d69a30767e1ea2ce Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Tue, 22 Jun 2021 21:10:19 +0200 Subject: arm64: dts: qcom: msm8994-angler: Disable cont_splash_mem As the default definition breaks booting angler: [ 1.862561] printk: console [ttyMSM0] enabled [ 1.872260] msm_serial: driver initialized D - 15524 - pm_driver_init, Delta cont_splash_mem was introduced in 74d6d0a145835, but the problem manifested after commit '86588296acbf ("fdt: Properly handle "no-map" field in the memory region")'. Disabling it because Angler's firmware does not report where the memory is allocated (dmesg from downstream kernel): [ 0.000000] cma: Found cont_splash_mem@0, memory base 0x0000000000000000, size 16 MiB, limit 0x0000000000000000 [ 0.000000] cma: CMA: reserved 16 MiB at 0x0000000000000000 for cont_splash_mem Similar issue might be on Google Nexus 5X (lg-bullhead). Other MSM8992/4 are known to report correct address. Fixes: 74d6d0a145835 ("arm64: dts: qcom: msm8994/8994-kitakami: Fix up the memory map") Suggested-by: Konrad Dybcio Signed-off-by: Petr Vorel Link: https://lore.kernel.org/r/20210622191019.23771-1-petr.vorel@gmail.com Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts index ffe1a9bd8f70..c096b7758aa0 100644 --- a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts @@ -1,12 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2015, Huawei Inc. All rights reserved. * Copyright (c) 2016, The Linux Foundation. All rights reserved. + * Copyright (c) 2021, Petr Vorel */ /dts-v1/; #include "msm8994.dtsi" +/* Angler's firmware does not report where the memory is allocated */ +/delete-node/ &cont_splash_mem; + / { model = "Huawei Nexus 6P"; compatible = "huawei,angler", "qcom,msm8994"; -- cgit From d77c95bf9a64d8620662151b2b10efd8221f4bcc Mon Sep 17 00:00:00 2001 From: Caleb Connolly Date: Tue, 20 Jul 2021 15:33:38 +0000 Subject: arm64: dts: qcom: sdm845-oneplus: fix reserved-mem Fix the upper guard and the "removed_region", this fixes the random crashes which used to occur in memory intensive loads. I'm not sure WHY the upper guard being 0x2000 instead of 0x1000 doesn't fix this, but it HAS to be 0x1000. Fixes: e60fd5ac1f68 ("arm64: dts: qcom: sdm845-oneplus-common: guard rmtfs-mem") Signed-off-by: Caleb Connolly Link: https://lore.kernel.org/r/20210720153125.43389-2-caleb@connolly.tech Signed-off-by: Bjorn Andersson --- arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi index 4d052e39b348..eb6b1d15293d 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845-oneplus-common.dtsi @@ -69,7 +69,7 @@ }; rmtfs_upper_guard: memory@f5d01000 { no-map; - reg = <0 0xf5d01000 0 0x2000>; + reg = <0 0xf5d01000 0 0x1000>; }; /* @@ -78,7 +78,7 @@ */ removed_region: memory@88f00000 { no-map; - reg = <0 0x88f00000 0 0x200000>; + reg = <0 0x88f00000 0 0x1c00000>; }; ramoops: ramoops@ac300000 { -- cgit From 7d3fc01796fc895e5fcce45c994c5a8db8120a8d Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Wed, 4 Aug 2021 18:37:22 +0000 Subject: cifs: create sd context must be a multiple of 8 We used to follow the rule earlier that the create SD context always be a multiple of 8. However, with the change: cifs: refactor create_sd_buf() and and avoid corrupting the buffer ...we recompute the length, and we failed that rule. Fixing that with this change. Cc: # v5.10+ Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 781d14e5f2af..b6d2e3591927 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2426,7 +2426,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) memcpy(aclptr, &acl, sizeof(struct cifs_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); - *len = ptr - (__u8 *)buf; + *len = roundup(ptr - (__u8 *)buf, 8); return buf; } -- cgit From f1040e86f83b0f7d5f45724500a6a441731ff4b7 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 5 Aug 2021 17:11:04 +0100 Subject: ASoC: cs42l42: PLL must be running when changing MCLK_SRC_SEL Both SCLK and PLL clocks must be running to drive the glitch-free mux behind MCLK_SRC_SEL and complete the switchover. This patch moves the writing of MCLK_SRC_SEL to when the PLL is started and stopped, so that it only transitions while the PLL is running. The unconditional write MCLK_SRC_SEL=0 in cs42l42_mute_stream() is safe because if the PLL is not running MCLK_SRC_SEL is already 0. Signed-off-by: Richard Fitzgerald Fixes: 43fc357199f9 ("ASoC: cs42l42: Set clock source for both ways of stream") Link: https://lore.kernel.org/r/20210805161111.10410-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 25 ++++++++++++++++++------- sound/soc/codecs/cs42l42.h | 1 + 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index a00dc3c65549..c96549fe6ab2 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -619,6 +619,8 @@ static int cs42l42_pll_config(struct snd_soc_component *component) for (i = 0; i < ARRAY_SIZE(pll_ratio_table); i++) { if (pll_ratio_table[i].sclk == clk) { + cs42l42->pll_config = i; + /* Configure the internal sample rate */ snd_soc_component_update_bits(component, CS42L42_MCLK_CTL, CS42L42_INTERNAL_FS_MASK, @@ -627,14 +629,9 @@ static int cs42l42_pll_config(struct snd_soc_component *component) (pll_ratio_table[i].mclk_int != 24000000)) << CS42L42_INTERNAL_FS_SHIFT); - /* Set the MCLK src (PLL or SCLK) and the divide - * ratio - */ + snd_soc_component_update_bits(component, CS42L42_MCLK_SRC_SEL, - CS42L42_MCLK_SRC_SEL_MASK | CS42L42_MCLKDIV_MASK, - (pll_ratio_table[i].mclk_src_sel - << CS42L42_MCLK_SRC_SEL_SHIFT) | (pll_ratio_table[i].mclk_div << CS42L42_MCLKDIV_SHIFT)); /* Set up the LRCLK */ @@ -892,13 +889,21 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) */ regmap_multi_reg_write(cs42l42->regmap, cs42l42_to_osc_seq, ARRAY_SIZE(cs42l42_to_osc_seq)); + + /* Must disconnect PLL before stopping it */ + snd_soc_component_update_bits(component, + CS42L42_MCLK_SRC_SEL, + CS42L42_MCLK_SRC_SEL_MASK, + 0); + usleep_range(100, 200); + snd_soc_component_update_bits(component, CS42L42_PLL_CTL1, CS42L42_PLL_START_MASK, 0); } } else { if (!cs42l42->stream_use) { /* SCLK must be running before codec unmute */ - if ((cs42l42->bclk < 11289600) && (cs42l42->sclk < 11289600)) { + if (pll_ratio_table[cs42l42->pll_config].mclk_src_sel) { snd_soc_component_update_bits(component, CS42L42_PLL_CTL1, CS42L42_PLL_START_MASK, 1); @@ -919,6 +924,12 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) CS42L42_PLL_LOCK_TIMEOUT_US); if (ret < 0) dev_warn(component->dev, "PLL failed to lock: %d\n", ret); + + /* PLL must be running to drive glitchless switch logic */ + snd_soc_component_update_bits(component, + CS42L42_MCLK_SRC_SEL, + CS42L42_MCLK_SRC_SEL_MASK, + CS42L42_MCLK_SRC_SEL_MASK); } /* Mark SCLK as present, turn off internal oscillator */ diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 7bf05ff05f74..3dfeaf6611d1 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -775,6 +775,7 @@ struct cs42l42_private { struct gpio_desc *reset_gpio; struct completion pdn_done; struct snd_soc_jack *jack; + int pll_config; int bclk; u32 sclk; u32 srate; -- cgit From 0c2f2ad4f16a58879463d0979a54293f8f296d6f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 5 Aug 2021 17:11:05 +0100 Subject: ASoC: cs42l42: Fix LRCLK frame start edge An I2S frame starts on the falling edge of LRCLK so ASP_STP must be 0. At the same time, move other format settings in the same register from cs42l42_pll_config() to cs42l42_set_dai_fmt() where you'd expect to find them, and merge into a single write. Signed-off-by: Richard Fitzgerald Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Link: https://lore.kernel.org/r/20210805161111.10410-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index c96549fe6ab2..02486329a570 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -667,15 +667,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component) CS42L42_FSYNC_PULSE_WIDTH_MASK, CS42L42_FRAC1_VAL(fsync - 1) << CS42L42_FSYNC_PULSE_WIDTH_SHIFT); - snd_soc_component_update_bits(component, - CS42L42_ASP_FRM_CFG, - CS42L42_ASP_5050_MASK, - CS42L42_ASP_5050_MASK); - /* Set the frame delay to 1.0 SCLK clocks */ - snd_soc_component_update_bits(component, CS42L42_ASP_FRM_CFG, - CS42L42_ASP_FSD_MASK, - CS42L42_ASP_FSD_1_0 << - CS42L42_ASP_FSD_SHIFT); /* Set the sample rates (96k or lower) */ snd_soc_component_update_bits(component, CS42L42_FS_RATE_EN, CS42L42_FS_EN_MASK, @@ -775,6 +766,18 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: + /* + * 5050 mode, frame starts on falling edge of LRCLK, + * frame delayed by 1.0 SCLKs + */ + snd_soc_component_update_bits(component, + CS42L42_ASP_FRM_CFG, + CS42L42_ASP_STP_MASK | + CS42L42_ASP_5050_MASK | + CS42L42_ASP_FSD_MASK, + CS42L42_ASP_5050_MASK | + (CS42L42_ASP_FSD_1_0 << + CS42L42_ASP_FSD_SHIFT)); break; default: return -EINVAL; -- cgit From 3a5d89a9c6fe306d35dce4496abbb464c1454da0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 5 Aug 2021 17:11:06 +0100 Subject: ASoC: cs42l42: Constrain sample rate to prevent illegal SCLK The lowest valid SCLK corresponds to 44.1 kHz at 16-bit. Sample rates less than this would produce SCLK below the minimum when using a normal I2S frame. A constraint must be applied to prevent this. The constraint is not applied if the machine driver sets SCLK, to allow setups where the host generates additional bits per LRCLK phase to increase the SCLK frequency. In these cases the machine driver would always have to inform this driver of the actual SCLK, and it must select a legal SCLK. Signed-off-by: Richard Fitzgerald Link: https://lore.kernel.org/r/20210805161111.10410-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 02486329a570..29e0c8dc8466 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -807,6 +807,25 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return 0; } +static int cs42l42_dai_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component); + + /* + * Sample rates < 44.1 kHz would produce an out-of-range SCLK with + * a standard I2S frame. If the machine driver sets SCLK it must be + * legal. + */ + if (cs42l42->sclk) + return 0; + + /* Machine driver has not set a SCLK, limit bottom end to 44.1 kHz */ + return snd_pcm_hw_constraint_minmax(substream->runtime, + SNDRV_PCM_HW_PARAM_RATE, + 44100, 192000); +} + static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, struct snd_soc_dai *dai) @@ -966,8 +985,8 @@ static int cs42l42_mute_stream(struct snd_soc_dai *dai, int mute, int stream) SNDRV_PCM_FMTBIT_S24_LE |\ SNDRV_PCM_FMTBIT_S32_LE ) - static const struct snd_soc_dai_ops cs42l42_ops = { + .startup = cs42l42_dai_startup, .hw_params = cs42l42_pcm_hw_params, .set_fmt = cs42l42_set_dai_fmt, .set_sysclk = cs42l42_set_sysclk, -- cgit From e5ada3f6787a4d6234adc6f2f3ae35c6d5b71ba0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 5 Aug 2021 17:11:07 +0100 Subject: ASoC: cs42l42: Fix mono playback I2S always has two LRCLK phases and both CH1 and CH2 of the RX must be enabled (corresponding to the low and high phases of LRCLK.) The selection of the valid data channels is done by setting the DAC CHA_SEL and CHB_SEL. CHA_SEL is always the first (left) channel, CHB_SEL depends on the number of active channels. Previously for mono ASP CH2 was not enabled, the result was playing mono data would not produce any audio output. Signed-off-by: Richard Fitzgerald Fixes: 621d65f3b868 ("ASoC: cs42l42: Provide finer control on playback path") Link: https://lore.kernel.org/r/20210805161111.10410-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 15 +++++++++++++-- sound/soc/codecs/cs42l42.h | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 29e0c8dc8466..99c022be94a6 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -460,8 +460,8 @@ static const struct snd_soc_dapm_widget cs42l42_dapm_widgets[] = { SND_SOC_DAPM_OUTPUT("HP"), SND_SOC_DAPM_DAC("DAC", NULL, CS42L42_PWR_CTL1, CS42L42_HP_PDN_SHIFT, 1), SND_SOC_DAPM_MIXER("MIXER", CS42L42_PWR_CTL1, CS42L42_MIXER_PDN_SHIFT, 1, NULL, 0), - SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, CS42L42_ASP_RX_DAI0_EN, CS42L42_ASP_RX0_CH1_SHIFT, 0), - SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, CS42L42_ASP_RX_DAI0_EN, CS42L42_ASP_RX0_CH2_SHIFT, 0), + SND_SOC_DAPM_AIF_IN("SDIN1", NULL, 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("SDIN2", NULL, 1, SND_SOC_NOPM, 0, 0), /* Playback Requirements */ SND_SOC_DAPM_SUPPLY("ASP DAI0", CS42L42_PWR_CTL1, CS42L42_ASP_DAI_PDN_SHIFT, 1, NULL, 0), @@ -866,6 +866,17 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH2_AP_RES, CS42L42_ASP_RX_CH_AP_MASK | CS42L42_ASP_RX_CH_RES_MASK, val); + + /* Channel B comes from the last active channel */ + snd_soc_component_update_bits(component, CS42L42_SP_RX_CH_SEL, + CS42L42_SP_RX_CHB_SEL_MASK, + (channels - 1) << CS42L42_SP_RX_CHB_SEL_SHIFT); + + /* Both LRCLK slots must be enabled */ + snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_EN, + CS42L42_ASP_RX0_CH_EN_MASK, + BIT(CS42L42_ASP_RX0_CH1_SHIFT) | + BIT(CS42L42_ASP_RX0_CH2_SHIFT)); break; default: break; diff --git a/sound/soc/codecs/cs42l42.h b/sound/soc/codecs/cs42l42.h index 3dfeaf6611d1..112ffcf938c3 100644 --- a/sound/soc/codecs/cs42l42.h +++ b/sound/soc/codecs/cs42l42.h @@ -653,6 +653,8 @@ /* Page 0x25 Audio Port Registers */ #define CS42L42_SP_RX_CH_SEL (CS42L42_PAGE_25 + 0x01) +#define CS42L42_SP_RX_CHB_SEL_SHIFT 2 +#define CS42L42_SP_RX_CHB_SEL_MASK (3 << CS42L42_SP_RX_CHB_SEL_SHIFT) #define CS42L42_SP_RX_ISOC_CTL (CS42L42_PAGE_25 + 0x02) #define CS42L42_SP_RX_RSYNC_SHIFT 6 -- cgit From 283f1b9a0401859c53fdd6483ab66f1c4fadaea5 Mon Sep 17 00:00:00 2001 From: Dong Aisheng Date: Fri, 2 Jul 2021 16:54:38 +0800 Subject: clk: imx6q: fix uart earlycon unwork The earlycon depends on the bootloader setup UART clocks being retained. There're actually two uart clocks (ipg, per) on MX6QDL, but the 'Fixes' commit change to register only one which means another clock may be disabled during booting phase and result in the earlycon unwork. Cc: stable@vger.kernel.org # v5.10+ Fixes: 379c9a24cc23 ("clk: imx: Fix reparenting of UARTs not associated with stdout") Signed-off-by: Dong Aisheng Link: https://lore.kernel.org/r/20210702085438.1988087-1-aisheng.dong@nxp.com Reviewed-by: Abel Vesa Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index 496900de0b0b..de36f58d551c 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -974,6 +974,6 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) hws[IMX6QDL_CLK_PLL3_USB_OTG]->clk); } - imx_register_uart_clocks(1); + imx_register_uart_clocks(2); } CLK_OF_DECLARE(imx6q, "fsl,imx6q-ccm", imx6q_clocks_init); -- cgit From 9711759a87a041705148161b937ec847048d882e Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Wed, 21 Jul 2021 15:40:56 -0700 Subject: clk: qcom: gdsc: Ensure regulator init state matches GDSC state As GDSCs are registered and found to be already enabled gdsc_init() ensures that 1) the kernel state matches the hardware state, and 2) votable GDSCs are properly enabled from this master as well. But as the (optional) supply regulator is enabled deep into gdsc_toggle_logic(), which is only executed for votable GDSCs, the kernel's state of the regulator might not match the hardware. The regulator might be automatically turned off if no other users are present or the next call to gdsc_disable() would cause an unbalanced regulator_disable(). Given that the votable case deals with an already enabled GDSC, most of gdsc_enable() and gdsc_toggle_logic() can be skipped. Reduce it to just clearing the SW_COLLAPSE_MASK and enabling hardware control to simply call regulator_enable() in both cases. The enablement of hardware control seems to be an independent property from the GDSC being enabled, so this is moved outside that conditional segment. Lastly, as the propagation of ALWAYS_ON to GENPD_FLAG_ALWAYS_ON needs to happen regardless of the initial state this is grouped together with the other sc->pd updates at the end of the function. Cc: stable@vger.kernel.org Fixes: 37416e554961 ("clk: qcom: gdsc: Handle GDSC regulator supplies") Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210721224056.3035016-1-bjorn.andersson@linaro.org [sboyd@kernel.org: Rephrase commit text] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/gdsc.c | 54 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 51ed640e527b..4ece326ea233 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -357,27 +357,43 @@ static int gdsc_init(struct gdsc *sc) if (on < 0) return on; - /* - * Votable GDSCs can be ON due to Vote from other masters. - * If a Votable GDSC is ON, make sure we have a Vote. - */ - if ((sc->flags & VOTABLE) && on) - gdsc_enable(&sc->pd); + if (on) { + /* The regulator must be on, sync the kernel state */ + if (sc->rsupply) { + ret = regulator_enable(sc->rsupply); + if (ret < 0) + return ret; + } - /* - * Make sure the retain bit is set if the GDSC is already on, otherwise - * we end up turning off the GDSC and destroying all the register - * contents that we thought we were saving. - */ - if ((sc->flags & RETAIN_FF_ENABLE) && on) - gdsc_retain_ff_on(sc); + /* + * Votable GDSCs can be ON due to Vote from other masters. + * If a Votable GDSC is ON, make sure we have a Vote. + */ + if (sc->flags & VOTABLE) { + ret = regmap_update_bits(sc->regmap, sc->gdscr, + SW_COLLAPSE_MASK, val); + if (ret) + return ret; + } + + /* Turn on HW trigger mode if supported */ + if (sc->flags & HW_CTRL) { + ret = gdsc_hwctrl(sc, true); + if (ret < 0) + return ret; + } - /* If ALWAYS_ON GDSCs are not ON, turn them ON */ - if (sc->flags & ALWAYS_ON) { - if (!on) - gdsc_enable(&sc->pd); + /* + * Make sure the retain bit is set if the GDSC is already on, + * otherwise we end up turning off the GDSC and destroying all + * the register contents that we thought we were saving. + */ + if (sc->flags & RETAIN_FF_ENABLE) + gdsc_retain_ff_on(sc); + } else if (sc->flags & ALWAYS_ON) { + /* If ALWAYS_ON GDSCs are not ON, turn them ON */ + gdsc_enable(&sc->pd); on = true; - sc->pd.flags |= GENPD_FLAG_ALWAYS_ON; } if (on || (sc->pwrsts & PWRSTS_RET)) @@ -385,6 +401,8 @@ static int gdsc_init(struct gdsc *sc) else gdsc_clear_mem_on(sc); + if (sc->flags & ALWAYS_ON) + sc->pd.flags |= GENPD_FLAG_ALWAYS_ON; if (!sc->pd.power_off) sc->pd.power_off = gdsc_disable; if (!sc->pd.power_on) -- cgit From abf3d98dee7c4038152ce88833ddc2189f68cbd4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:06:56 +0200 Subject: mt76: fix enum type mismatch There is no 'NONE' version of 'enum mcu_cipher_type', and returning 'MT_CIPHER_NONE' causes a warning: drivers/net/wireless/mediatek/mt76/mt7921/mcu.c: In function 'mt7921_mcu_get_cipher': drivers/net/wireless/mediatek/mt76/mt7921/mcu.c:114:24: error: implicit conversion from 'enum mt76_cipher_type' to 'enum mcu_cipher_type' [-Werror=enum-conversion] 114 | return MT_CIPHER_NONE; | ^~~~~~~~~~~~~~ Add the missing MCU_CIPHER_NONE defintion that fits in here with the same value. Fixes: c368362c36d3 ("mt76: fix iv and CCMP header insertion") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210721150745.1914829-1-arnd@kernel.org --- drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7915/mcu.h | 3 ++- drivers/net/wireless/mediatek/mt76/mt7921/mcu.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7921/mcu.h | 3 ++- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index 863aa18b3024..43960770a9af 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -111,7 +111,7 @@ mt7915_mcu_get_cipher(int cipher) case WLAN_CIPHER_SUITE_SMS4: return MCU_CIPHER_WAPI; default: - return MT_CIPHER_NONE; + return MCU_CIPHER_NONE; } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h index edd3ba3a0c2d..e68a562cc5b4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h @@ -1073,7 +1073,8 @@ enum { }; enum mcu_cipher_type { - MCU_CIPHER_WEP40 = 1, + MCU_CIPHER_NONE = 0, + MCU_CIPHER_WEP40, MCU_CIPHER_WEP104, MCU_CIPHER_WEP128, MCU_CIPHER_TKIP, diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index cd690c64f65b..9fbaacc67cfa 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -111,7 +111,7 @@ mt7921_mcu_get_cipher(int cipher) case WLAN_CIPHER_SUITE_SMS4: return MCU_CIPHER_WAPI; default: - return MT_CIPHER_NONE; + return MCU_CIPHER_NONE; } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h index d76cf8f8dfdf..de3c091f6736 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.h @@ -199,7 +199,8 @@ struct sta_rec_sec { } __packed; enum mcu_cipher_type { - MCU_CIPHER_WEP40 = 1, + MCU_CIPHER_NONE = 0, + MCU_CIPHER_WEP40, MCU_CIPHER_WEP104, MCU_CIPHER_WEP128, MCU_CIPHER_TKIP, -- cgit From aff51c5da3208bd164381e1488998667269c6cf4 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Fri, 6 Aug 2021 12:05:27 +0800 Subject: net: dsa: mt7530: add the missing RxUnicast MIB counter Add the missing RxUnicast counter. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Signed-off-by: DENG Qingfang Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 69f21b71614c..632f0fcc5aa7 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -47,6 +47,7 @@ static const struct mt7530_mib_desc mt7530_mib[] = { MIB_DESC(2, 0x48, "TxBytes"), MIB_DESC(1, 0x60, "RxDrop"), MIB_DESC(1, 0x64, "RxFiltering"), + MIB_DESC(1, 0x68, "RxUnicast"), MIB_DESC(1, 0x6c, "RxMulticast"), MIB_DESC(1, 0x70, "RxBroadcast"), MIB_DESC(1, 0x74, "RxAlignErr"), -- cgit From 704e624f7b3e8a4fc1ce43fb564746d1d07b20c0 Mon Sep 17 00:00:00 2001 From: John Hubbard Date: Thu, 5 Aug 2021 23:53:30 -0700 Subject: net: mvvp2: fix short frame size on s390 On s390, the following build warning occurs: drivers/net/ethernet/marvell/mvpp2/mvpp2.h:844:2: warning: overflow in conversion from 'long unsigned int' to 'int' changes value from '18446744073709551584' to '-32' [-Woverflow] 844 | ((total_size) - MVPP2_SKB_HEADROOM - MVPP2_SKB_SHINFO_SIZE) This happens because MVPP2_SKB_SHINFO_SIZE, which is 320 bytes (which is already 64-byte aligned) on some architectures, actually gets ALIGN'd up to 512 bytes in the s390 case. So then, when this is invoked: MVPP2_RX_MAX_PKT_SIZE(MVPP2_BM_SHORT_FRAME_SIZE) ...that turns into: 704 - 224 - 512 == -32 ...which is not a good frame size to end up with! The warning above is a bit lucky: it notices a signed/unsigned bad behavior here, which leads to the real problem of a frame that is too short for its contents. Increase MVPP2_BM_SHORT_FRAME_SIZE by 32 (from 704 to 736), which is just exactly big enough. (The other values can't readily be changed without causing a lot of other problems.) Fixes: 07dd0a7aae7f ("mvpp2: add basic XDP support") Cc: Sven Auhagen Cc: Matteo Croce Cc: David S. Miller Signed-off-by: John Hubbard Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2/mvpp2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h index b9fbc9f000f2..cf8acabb90ac 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h @@ -938,7 +938,7 @@ enum mvpp22_ptp_packet_format { #define MVPP2_BM_COOKIE_POOL_OFFS 8 #define MVPP2_BM_COOKIE_CPU_OFFS 24 -#define MVPP2_BM_SHORT_FRAME_SIZE 704 /* frame size 128 */ +#define MVPP2_BM_SHORT_FRAME_SIZE 736 /* frame size 128 */ #define MVPP2_BM_LONG_FRAME_SIZE 2240 /* frame size 1664 */ #define MVPP2_BM_JUMBO_FRAME_SIZE 10432 /* frame size 9856 */ /* BM short pool packet size -- cgit From 085fc31f81765e061c78cdcab0e5516fd672bff7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Aug 2021 16:10:00 +0200 Subject: platform/x86: Make dual_accel_detect() KIOX010A + KIOX020A detect more robust 360 degree hinges devices with dual KIOX010A + KIOX020A accelerometers always have both a KIOX010A and a KIOX020A ACPI device (one for each accel). Theoretical some vendor may re-use some DSDT for a non-convertible stripping out just the KIOX020A ACPI device from the DSDT. Check that both ACPI devices are present to make the check more robust. Fixes: 153cca9caa81 ("platform/x86: Add and use a dual_accel_detect() helper") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210802141000.978035-1-hdegoede@redhat.com --- drivers/platform/x86/dual_accel_detect.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/dual_accel_detect.h b/drivers/platform/x86/dual_accel_detect.h index 1a069159da91..a9eae17cc43d 100644 --- a/drivers/platform/x86/dual_accel_detect.h +++ b/drivers/platform/x86/dual_accel_detect.h @@ -60,7 +60,8 @@ static bool dual_accel_detect_bosc0200(void) static bool dual_accel_detect(void) { /* Systems which use a pair of accels with KIOX010A / KIOX020A ACPI ids */ - if (acpi_dev_present("KIOX010A", NULL, -1)) + if (acpi_dev_present("KIOX010A", NULL, -1) && + acpi_dev_present("KIOX020A", NULL, -1)) return true; /* Systems which use a single DUAL250E ACPI device to model 2 accels */ -- cgit From 9d7b132e62e41b7d49bf157aeaf9147c27492e0f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 6 Aug 2021 13:55:15 +0200 Subject: platform/x86: pcengines-apuv2: Add missing terminating entries to gpio-lookup tables The gpiod_lookup_table.table passed to gpiod_add_lookup_table() must be terminated with an empty entry, add this. Note we have likely been getting away with this not being present because the GPIO lookup code first matches on the dev_id, causing most lookups to skip checking the table and the lookups which do check the table will find a matching entry before reaching the end. With that said, terminating these tables properly still is obviously the correct thing to do. Fixes: f8eb0235f659 ("x86: pcengines apuv2 gpio/leds/keys platform driver") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210806115515.12184-1-hdegoede@redhat.com --- drivers/platform/x86/pcengines-apuv2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index c37349f97bb8..d063d91db9bc 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -94,6 +94,7 @@ static struct gpiod_lookup_table gpios_led_table = { NULL, 1, GPIO_ACTIVE_LOW), GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_LED3, NULL, 2, GPIO_ACTIVE_LOW), + {} /* Terminating entry */ } }; @@ -123,6 +124,7 @@ static struct gpiod_lookup_table gpios_key_table = { .table = { GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_MODESW, NULL, 0, GPIO_ACTIVE_LOW), + {} /* Terminating entry */ } }; -- cgit From 4608fdfc07e116f9fc0895beb40abad7cdb5ee3d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Jul 2021 00:29:19 +0200 Subject: netfilter: conntrack: collect all entries in one cycle Michal Kubecek reports that conntrack gc is responsible for frequent wakeups (every 125ms) on idle systems. On busy systems, timed out entries are evicted during lookup. The gc worker is only needed to remove entries after system becomes idle after a busy period. To resolve this, always scan the entire table. If the scan is taking too long, reschedule so other work_structs can run and resume from next bucket. After a completed scan, wait for 2 minutes before the next cycle. Heuristics for faster re-schedule are removed. GC_SCAN_INTERVAL could be exposed as a sysctl in the future to allow tuning this as-needed or even turn the gc worker off. Reported-by: Michal Kubecek Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 71 ++++++++++++--------------------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5c03e5106751..d31dbccbe7bd 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; - u32 last_bucket; + u32 next_bucket; bool exiting; bool early_drop; - long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; -/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 128u -/* upper bound of full table scan */ -#define GC_MAX_SCAN_JIFFIES (16u * HZ) -/* desired ratio of entries found to be expired */ -#define GC_EVICT_RATIO 50u +#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) static struct conntrack_gc_work conntrack_gc_work; @@ -1363,17 +1358,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); - unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned int nf_conntrack_max95 = 0; + unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; + unsigned int i, hashsz, nf_conntrack_max95 = 0; + unsigned long next_run = GC_SCAN_INTERVAL; struct conntrack_gc_work *gc_work; - unsigned int ratio, scanned = 0; - unsigned long next_run; - gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; - i = gc_work->last_bucket; + i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; @@ -1381,15 +1372,15 @@ static void gc_worker(struct work_struct *work) struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int hashsz; struct nf_conn *tmp; - i++; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); - if (i >= hashsz) - i = 0; + if (i >= hashsz) { + rcu_read_unlock(); + break; + } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; @@ -1397,7 +1388,6 @@ static void gc_worker(struct work_struct *work) tmp = nf_ct_tuplehash_to_ctrack(h); - scanned++; if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { nf_ct_offload_timeout(tmp); continue; @@ -1405,7 +1395,6 @@ static void gc_worker(struct work_struct *work) if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); - expired_count++; continue; } @@ -1438,7 +1427,14 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched(); - } while (++buckets < goal); + i++; + + if (time_after(jiffies, end_time) && i < hashsz) { + gc_work->next_bucket = i; + next_run = 0; + break; + } + } while (i < hashsz); if (gc_work->exiting) return; @@ -1449,40 +1445,17 @@ static void gc_worker(struct work_struct *work) * * This worker is only here to reap expired entries when system went * idle after a busy period. - * - * The heuristics below are supposed to balance conflicting goals: - * - * 1. Minimize time until we notice a stale entry - * 2. Maximize scan intervals to not waste cycles - * - * Normally, expire ratio will be close to 0. - * - * As soon as a sizeable fraction of the entries have expired - * increase scan frequency. */ - ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio > GC_EVICT_RATIO) { - gc_work->next_gc_run = min_interval; - } else { - unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - - BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); - - gc_work->next_gc_run += min_interval; - if (gc_work->next_gc_run > max) - gc_work->next_gc_run = max; + if (next_run) { + gc_work->early_drop = false; + gc_work->next_bucket = 0; } - - next_run = gc_work->next_gc_run; - gc_work->last_bucket = i; - gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = HZ; gc_work->exiting = false; } -- cgit From 61e0c2bc555a194ada2632fadac73f2bdb5df9cb Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Aug 2021 00:15:53 +0200 Subject: netfilter: nfnetlink_hook: strip off module name from hookfn NFNLA_HOOK_FUNCTION_NAME should include the hook function name only, the module name is already provided by NFNLA_HOOK_MODULE_NAME. Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 202f57d17bab..ca453c61dbdf 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -135,6 +135,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, if (module_name) { char *end; + *module_name = '\0'; module_name += 2; end = strchr(module_name, ']'); if (end) { -- cgit From a6e57c4af12bbacf927d7321c3aa894948653688 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Aug 2021 00:15:54 +0200 Subject: netfilter: nfnetlink_hook: missing chain family The family is relevant for pseudo-families like NFPROTO_INET otherwise the user needs to rely on the hook function name to differentiate it from NFPROTO_IPV4 and NFPROTO_IPV6 names. Add nfnl_hook_chain_desc_attributes instead of using the existing NFTA_CHAIN_* attributes, since these do not provide a family number. Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_hook.h | 9 +++++++++ net/netfilter/nfnetlink_hook.c | 8 ++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/netfilter/nfnetlink_hook.h b/include/uapi/linux/netfilter/nfnetlink_hook.h index 912ec60b26b0..bbcd285b22e1 100644 --- a/include/uapi/linux/netfilter/nfnetlink_hook.h +++ b/include/uapi/linux/netfilter/nfnetlink_hook.h @@ -43,6 +43,15 @@ enum nfnl_hook_chain_info_attributes { }; #define NFNLA_HOOK_INFO_MAX (__NFNLA_HOOK_INFO_MAX - 1) +enum nfnl_hook_chain_desc_attributes { + NFNLA_CHAIN_UNSPEC, + NFNLA_CHAIN_TABLE, + NFNLA_CHAIN_FAMILY, + NFNLA_CHAIN_NAME, + __NFNLA_CHAIN_MAX, +}; +#define NFNLA_CHAIN_MAX (__NFNLA_CHAIN_MAX - 1) + /** * enum nfnl_hook_chaintype - chain type * diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index ca453c61dbdf..e0ff2973fd14 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -89,11 +89,15 @@ static int nfnl_hook_put_nft_chain_info(struct sk_buff *nlskb, if (!nest2) goto cancel_nest; - ret = nla_put_string(nlskb, NFTA_CHAIN_TABLE, chain->table->name); + ret = nla_put_string(nlskb, NFNLA_CHAIN_TABLE, chain->table->name); if (ret) goto cancel_nest; - ret = nla_put_string(nlskb, NFTA_CHAIN_NAME, chain->name); + ret = nla_put_string(nlskb, NFNLA_CHAIN_NAME, chain->name); + if (ret) + goto cancel_nest; + + ret = nla_put_u8(nlskb, NFNLA_CHAIN_FAMILY, chain->table->family); if (ret) goto cancel_nest; -- cgit From 3d9bbaf6c5416bfc50f014ce5879c8c440aaa511 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Aug 2021 01:27:19 +0200 Subject: netfilter: nfnetlink_hook: use the sequence number of the request message The sequence number allows to correlate the netlink reply message (as part of the dump) with the original request message. The cb->seq field is internally used to detect an interference (update) of the hook list during the netlink dump, do not use it as sequence number in the netlink dump header. Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index e0ff2973fd14..7b0d4a317457 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -264,7 +264,8 @@ static int nfnl_hook_dump(struct sk_buff *nlskb, ops = nf_hook_entries_get_hook_ops(e); for (; i < e->num_hook_entries; i++) { - err = nfnl_hook_dump_one(nlskb, ctx, ops[i], cb->seq); + err = nfnl_hook_dump_one(nlskb, ctx, ops[i], + cb->nlh->nlmsg_seq); if (err) break; } -- cgit From 69311e7c997451dd40942b6b27b522cc3b659cef Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 4 Aug 2021 01:27:20 +0200 Subject: netfilter: nfnetlink_hook: Use same family as request message Use the same family as the request message, for consistency. The netlink payload provides sufficient information to describe the hook object, including the family. This makes it easier to userspace to correlate the hooks are that visited by the packets for a certain family. Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 7b0d4a317457..32eea785ae25 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -113,7 +113,7 @@ cancel_nest: static int nfnl_hook_dump_one(struct sk_buff *nlskb, const struct nfnl_dump_hook_data *ctx, const struct nf_hook_ops *ops, - unsigned int seq) + int family, unsigned int seq) { u16 event = nfnl_msg_type(NFNL_SUBSYS_HOOK, NFNL_MSG_HOOK_GET); unsigned int portid = NETLINK_CB(nlskb).portid; @@ -124,7 +124,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, char *module_name; #endif nlh = nfnl_msg_put(nlskb, portid, seq, event, - NLM_F_MULTI, ops->pf, NFNETLINK_V0, 0); + NLM_F_MULTI, family, NFNETLINK_V0, 0); if (!nlh) goto nla_put_failure; @@ -264,7 +264,7 @@ static int nfnl_hook_dump(struct sk_buff *nlskb, ops = nf_hook_entries_get_hook_ops(e); for (; i < e->num_hook_entries; i++) { - err = nfnl_hook_dump_one(nlskb, ctx, ops[i], + err = nfnl_hook_dump_one(nlskb, ctx, ops[i], family, cb->nlh->nlmsg_seq); if (err) break; -- cgit From 4592ee7f525c4683ec9e290381601fdee50ae110 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Aug 2021 15:02:15 +0200 Subject: netfilter: conntrack: remove offload_pickup sysctl again These two sysctls were added because the hardcoded defaults (2 minutes, tcp, 30 seconds, udp) turned out to be too low for some setups. They appeared in 5.14-rc1 so it should be fine to remove it again. Marcelo convinced me that there should be no difference between a flow that was offloaded vs. a flow that was not wrt. timeout handling. Thus the default is changed to those for TCP established and UDP stream, 5 days and 120 seconds, respectively. Marcelo also suggested to account for the timeout value used for the offloading, this avoids increase beyond the value in the conntrack-sysctl and will also instantly expire the conntrack entry with altered sysctls. Example: nf_conntrack_udp_timeout_stream=60 nf_flowtable_udp_timeout=60 This will remove offloaded udp flows after one minute, rather than two. An earlier version of this patch also cleared the ASSURED bit to allow nf_conntrack to evict the entry via early_drop (i.e., table full). However, it looks like we can safely assume that connection timed out via HW is still in established state, so this isn't needed. Quoting Oz: [..] the hardware sends all packets with a set FIN flags to sw. [..] Connections that are aged in hardware are expected to be in the established state. In case it turns out that back-to-sw-path transition can occur for 'dodgy' connections too (e.g., one side disappeared while software-path would have been in RETRANS timeout), we can adjust this later. Cc: Oz Shlomo Cc: Paul Blakey Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Florian Westphal Reviewed-by: Marcelo Ricardo Leitner Reviewed-by: Oz Shlomo Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 10 ---------- include/net/netns/conntrack.h | 2 -- net/netfilter/nf_conntrack_proto_tcp.c | 1 - net/netfilter/nf_conntrack_proto_udp.c | 1 - net/netfilter/nf_conntrack_standalone.c | 16 ---------------- net/netfilter/nf_flow_table_core.c | 11 ++++++++--- 6 files changed, 8 insertions(+), 33 deletions(-) diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index d31ed6c1cb0d..024d784157c8 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -191,19 +191,9 @@ nf_flowtable_tcp_timeout - INTEGER (seconds) TCP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with tcp pickup timeout. -nf_flowtable_tcp_pickup - INTEGER (seconds) - default 120 - - TCP connection timeout after being aged from nf flow table offload. - nf_flowtable_udp_timeout - INTEGER (seconds) default 30 Control offload timeout for udp connections. UDP connections may be offloaded from nf conntrack to nf flow table. Once aged, the connection is returned to nf conntrack with udp pickup timeout. - -nf_flowtable_udp_pickup - INTEGER (seconds) - default 30 - - UDP connection timeout after being aged from nf flow table offload. diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 37e5300c7e5a..fefd38db95b3 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -30,7 +30,6 @@ struct nf_tcp_net { u8 tcp_ignore_invalid_rst; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; - unsigned int offload_pickup; #endif }; @@ -44,7 +43,6 @@ struct nf_udp_net { unsigned int timeouts[UDP_CT_MAX]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) unsigned int offload_timeout; - unsigned int offload_pickup; #endif }; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 3259416f2ea4..af5115e127cf 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1478,7 +1478,6 @@ void nf_conntrack_tcp_init_net(struct net *net) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) tn->offload_timeout = 30 * HZ; - tn->offload_pickup = 120 * HZ; #endif } diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 698fee49e732..f8e3c0d2602f 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -271,7 +271,6 @@ void nf_conntrack_udp_init_net(struct net *net) #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) un->offload_timeout = 30 * HZ; - un->offload_pickup = 30 * HZ; #endif } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 214d9f9e499b..e84b499b7bfa 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -575,7 +575,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD, - NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP, #endif NF_SYSCTL_CT_PROTO_TCP_LOOSE, NF_SYSCTL_CT_PROTO_TCP_LIBERAL, @@ -585,7 +584,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD, - NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP, #endif NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP, NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6, @@ -776,12 +774,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = { - .procname = "nf_flowtable_tcp_pickup", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = { .procname = "nf_conntrack_tcp_loose", @@ -832,12 +824,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = { - .procname = "nf_flowtable_udp_pickup", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = { .procname = "nf_conntrack_icmp_timeout", @@ -1018,7 +1004,6 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net, #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout; - table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup; #endif } @@ -1111,7 +1096,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED]; #if IS_ENABLED(CONFIG_NF_FLOW_TABLE) table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout; - table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup; #endif nf_conntrack_standalone_init_tcp_sysctl(net, table); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 551976e4284c..8788b519255e 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -183,7 +183,7 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) const struct nf_conntrack_l4proto *l4proto; struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); - unsigned int timeout; + s32 timeout; l4proto = nf_ct_l4proto_find(l4num); if (!l4proto) @@ -192,15 +192,20 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); - timeout = tn->offload_pickup; + timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; + timeout -= tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); - timeout = tn->offload_pickup; + timeout = tn->timeouts[UDP_CT_REPLIED]; + timeout -= tn->offload_timeout; } else { return; } + if (timeout < 0) + timeout = 0; + if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) ct->timeout = nfct_time_stamp + timeout; } -- cgit From 269fc69533de73a9065c0b7971bcd109880290b3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 6 Aug 2021 12:33:57 +0200 Subject: netfilter: nfnetlink_hook: translate inet ingress to netdev The NFPROTO_INET pseudofamily is not exposed through this new netlink interface. The netlink dump either shows NFPROTO_IPV4 or NFPROTO_IPV6 for NFPROTO_INET prerouting/input/forward/output/postrouting hooks. The NFNLA_CHAIN_FAMILY attribute provides the family chain, which specifies if this hook applies to inet traffic only (either IPv4 or IPv6). Translate the inet/ingress hook to netdev/ingress to fully hide the NFPROTO_INET implementation details. Fixes: e2cf17d3774c ("netfilter: add new hook nfnl subsystem") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_hook.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 32eea785ae25..f554e2ea32ee 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -119,6 +119,7 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, unsigned int portid = NETLINK_CB(nlskb).portid; struct nlmsghdr *nlh; int ret = -EMSGSIZE; + u32 hooknum; #ifdef CONFIG_KALLSYMS char sym[KSYM_SYMBOL_LEN]; char *module_name; @@ -156,7 +157,12 @@ static int nfnl_hook_dump_one(struct sk_buff *nlskb, goto nla_put_failure; #endif - ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(ops->hooknum)); + if (ops->pf == NFPROTO_INET && ops->hooknum == NF_INET_INGRESS) + hooknum = NF_NETDEV_INGRESS; + else + hooknum = ops->hooknum; + + ret = nla_put_be32(nlskb, NFNLA_HOOK_HOOKNUM, htonl(hooknum)); if (ret) goto nla_put_failure; -- cgit From ad89c9aa24603a6a26464316fab94d285792c942 Mon Sep 17 00:00:00 2001 From: Xiaomeng Hou Date: Mon, 2 Aug 2021 16:25:25 +0800 Subject: drm/amd/pm: update smu v13.0.1 firmware header Update smu v13.0.1 firmware header for yellow carp. Signed-off-by: Xiaomeng Hou Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h index 5627de734246..c5e26d619bf0 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v13_0_1_pmfw.h @@ -111,7 +111,9 @@ typedef struct { uint32_t InWhisperMode : 1; uint32_t spare0 : 1; uint32_t ZstateStatus : 4; - uint32_t spare1 :12; + uint32_t spare1 : 4; + uint32_t DstateFun : 4; + uint32_t DstateDev : 4; // MP1_EXT_SCRATCH2 uint32_t P2JobHandler :24; uint32_t RsmuPmiP2FinishedCnt : 8; -- cgit From 39932ef75897bfcb8ba1120e7b09d615d74762fd Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 4 Aug 2021 17:11:40 +0800 Subject: drm/amdgpu: set RAS EEPROM address from VBIOS update to latest atombios fw table [Backport to 5.14 - Alex] Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1670 Signed-off-by: John Clements Reviewed-by: Hawking Zhang . Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c | 40 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 4 +++ drivers/gpu/drm/amd/include/atomfirmware.h | 2 +- 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 3b5d13189073..8f53837d4d3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -468,6 +468,46 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false; } +/* + * Helper function to query RAS EEPROM address + * + * @adev: amdgpu_device pointer + * + * Return true if vbios supports ras rom address reporting + */ +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + int index; + u16 data_offset, size; + union firmware_info *firmware_info; + u8 frev, crev; + + if (i2c_address == NULL) + return false; + + *i2c_address = 0; + + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, + index, &size, &frev, &crev, &data_offset)) { + /* support firmware_info 3.4 + */ + if ((frev == 3 && crev >=4) || (frev > 3)) { + firmware_info = (union firmware_info *) + (mode_info->atom_context->bios + data_offset); + *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr; + } + } + + if (*i2c_address != 0) + return true; + + return false; +} + + union smu_info { struct atom_smu_info_v3_1 v31; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 1bbbb195015d..751248b253de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -36,6 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address); bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index f40c871da0c6..38222de921d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -26,6 +26,7 @@ #include "amdgpu_ras.h" #include #include "atom.h" +#include "amdgpu_atomfirmware.h" #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 @@ -96,6 +97,9 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, if (!i2c_addr) return false; + if (amdgpu_atomfirmware_ras_rom_addr(adev, (uint8_t*)i2c_addr)) + return true; + switch (adev->asic_type) { case CHIP_VEGA20: *i2c_addr = EEPROM_I2C_TARGET_ADDR_VEGA20; diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 3811e58dd857..44955458fe38 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -590,7 +590,7 @@ struct atom_firmware_info_v3_4 { uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id uint8_t board_i2c_feature_slave_addr; - uint8_t reserved3; + uint8_t ras_rom_i2c_slave_addr; uint16_t bootup_mvddq_mv; uint16_t bootup_mvpp_mv; uint32_t zfbstartaddrin16mb; -- cgit From 202ead5a3c589b0594a75cb99f080174f6851fed Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Aug 2021 10:03:30 -0400 Subject: drm/amdgpu: don't enable baco on boco platforms in runpm If the platform uses BOCO, don't use BACO in runtime suspend. We could end up executing the BACO path if the platform supports both. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1669 Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5ed8381ae0f5..971c5b8e75dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1571,6 +1571,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; + } else if (amdgpu_device_supports_boco(drm_dev)) { + /* nothing to do */ } else if (amdgpu_device_supports_baco(drm_dev)) { amdgpu_device_baco_enter(drm_dev); } -- cgit From 5126da7d99cf6396c929f3b577ba3aed1e74acd7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 5 Aug 2021 20:37:14 +0200 Subject: drm/amd/pm: Fix a memory leak in an error handling path in 'vangogh_tables_init()' 'watermarks_table' must be freed instead 'clocks_table', because 'clocks_table' is known to be NULL at this point and 'watermarks_table' is never freed if the last kzalloc fails. Fixes: c98ee89736b8 ("drm/amd/pm: add the fine grain tuning function for vangogh") Signed-off-by: Christophe JAILLET Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 18681dc458da..bcaaa086fc2f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -256,7 +256,7 @@ static int vangogh_tables_init(struct smu_context *smu) return 0; err3_out: - kfree(smu_table->clocks_table); + kfree(smu_table->watermarks_table); err2_out: kfree(smu_table->gpu_metrics_table); err1_out: -- cgit From 99dc4ad992bf156692b088fab4d98deab7cbd3e6 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 12 Jul 2021 15:52:14 +0100 Subject: mtd: devices: mchp48l640: Fix memory leak on cmd The allocation for cmd is not being kfree'd on the return leading to a memory leak. Fix this by kfree'ing it. Addresses-Coverity: ("Resource leak") Fixes: 88d125026753 ("mtd: devices: add support for microchip 48l640 EERAM") Signed-off-by: Colin Ian King Reviewed-by: Heiko Schocher Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210712145214.101377-1-colin.king@canonical.com --- drivers/mtd/devices/mchp48l640.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/devices/mchp48l640.c b/drivers/mtd/devices/mchp48l640.c index ad66b5aaf4e9..99400d0fb8c1 100644 --- a/drivers/mtd/devices/mchp48l640.c +++ b/drivers/mtd/devices/mchp48l640.c @@ -255,6 +255,7 @@ static int mchp48l640_read_page(struct mtd_info *mtd, loff_t from, size_t len, if (!ret) *retlen += len; + kfree(cmd); return ret; fail: -- cgit From b7abb051682263e51866bc78762fd0083d64c5ed Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Sat, 17 Jul 2021 18:07:19 +0800 Subject: mtd: fix lock hierarchy in deregister_mtd_blktrans There is a lock hierarchy of major_names_lock --> mtd_table_mutex. One existing chain is as follows: 1. major_names_lock --> loop_ctl_mutex (when blk_request_module calls loop_probe) 2. loop_ctl_mutex --> bdev->bd_mutex (when loop_control_ioctl calls loop_remove, which then calls del_gendisk) 3. bdev->bd_mutex --> mtd_table_mutex (when blkdev_get_by_dev calls __blkdev_get, which then calls blktrans_open) Since unregister_blkdev grabs the major_names_lock, we need to call it outside the critical section for mtd_table_mutex, otherwise we invert the lock hierarchy. Reported-by: Hillf Danton Signed-off-by: Desmond Cheong Zhi Xi Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210717100719.728829-1-desmondcheongzx@gmail.com --- drivers/mtd/mtd_blkdevs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 31b208fb225e..44bea3f65060 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -561,8 +561,8 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr) list_for_each_entry_safe(dev, next, &tr->devs, list) tr->remove_dev(dev); - unregister_blkdev(tr->major, tr->name); mutex_unlock(&mtd_table_mutex); + unregister_blkdev(tr->major, tr->name); BUG_ON(!list_empty(&tr->devs)); return 0; -- cgit From b48027083a78b13356695555a05b0e085e378687 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 27 Jul 2021 11:58:13 +0530 Subject: mtd: rawnand: Fix probe failure due to of_get_nand_secure_regions() Due to 14f97f0b8e2b, the rawnand platforms without "secure-regions" property defined in DT fails to probe. The issue is, of_get_nand_secure_regions() errors out if of_property_count_elems_of_size() returns a negative error code. If the "secure-regions" property is not present in DT, then also we'll get -EINVAL from of_property_count_elems_of_size() but it should not be treated as an error for platforms not declaring "secure-regions" in DT. So fix this behaviour by checking for the existence of that property in DT and return 0 if it is not present. Fixes: 14f97f0b8e2b ("mtd: rawnand: Add a check in of_get_nand_secure_regions()") Reported-by: Martin Kaiser Signed-off-by: Manivannan Sadhasivam Reviewed-by: Martin Kaiser Tested-by: Martin Kaiser Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210727062813.32619-1-manivannan.sadhasivam@linaro.org --- drivers/mtd/nand/raw/nand_base.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index cbba46432e39..3d6c6e880520 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5228,8 +5228,14 @@ static bool of_get_nand_on_flash_bbt(struct device_node *np) static int of_get_nand_secure_regions(struct nand_chip *chip) { struct device_node *dn = nand_get_flash_node(chip); + struct property *prop; int nr_elem, i, j; + /* Only proceed if the "secure-regions" property is present in DT */ + prop = of_find_property(dn, "secure-regions", NULL); + if (!prop) + return 0; + nr_elem = of_property_count_elems_of_size(dn, "secure-regions", sizeof(u64)); if (nr_elem <= 0) return nr_elem; -- cgit From 46dd2965bdd1c5a4f6499c73ff32e636fa8f9769 Mon Sep 17 00:00:00 2001 From: Solomon Chiu Date: Wed, 28 Jul 2021 00:20:30 +0800 Subject: drm/amdgpu: Add preferred mode in modeset when freesync video mode's enabled. [Why] With kernel module parameter "freesync_video" is enabled, if the mode is changed to preferred mode(the mode with highest rate), then Freesync fails because the preferred mode is treated as one of freesync video mode, and then be configurated as freesync video mode(fixed refresh rate). [How] Skip freesync fixed rate configurating when modeset to preferred mode. Signed-off-by: Solomon Chiu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c0ae73b0691c..afa96c8f721b 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9605,7 +9605,12 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, } else if (amdgpu_freesync_vid_mode && aconnector && is_freesync_video_mode(&new_crtc_state->mode, aconnector)) { - set_freesync_fixed_config(dm_new_crtc_state); + struct drm_display_mode *high_mode; + + high_mode = get_highest_refresh_rate_mode(aconnector, false); + if (!drm_mode_equal(&new_crtc_state->mode, high_mode)) { + set_freesync_fixed_config(dm_new_crtc_state); + } } ret = dm_atomic_get_state(state, &dm_state); -- cgit From 62376365048878f770d8b7d11b89b8b3e18018f1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 3 Aug 2021 15:14:27 +0000 Subject: powerpc/32s: Fix napping restore in data storage interrupt (DSI) When a DSI (Data Storage Interrupt) is taken while in NAP mode, r11 doesn't survive the call to power_save_ppc32_restore(). So use r1 instead of r11 as they both contain the virtual stack pointer at that point. Fixes: 4c0104a83fc3 ("powerpc/32: Dismantle EXC_XFER_STD/LITE/TEMPLATE") Cc: stable@vger.kernel.org # v5.13+ Reported-by: Finn Thain Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/731694e0885271f6ee9ffc179eb4bcee78313682.1628003562.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_book3s_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 764edd860ed4..68e5c0a7e99d 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -300,7 +300,7 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE) EXCEPTION_PROLOG_1 EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1 prepare_transfer_to_handler - lwz r5, _DSISR(r11) + lwz r5, _DSISR(r1) andis. r0, r5, DSISR_DABRMATCH@h bne- 1f bl do_page_fault -- cgit From b5cfc9cd7b0426e94ffd9e9ed79d1b00ace7780a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 7 Jul 2021 05:55:07 +0000 Subject: powerpc/32: Fix critical and debug interrupts on BOOKE 32 bits BOOKE have special interrupts for debug and other critical events. When handling those interrupts, dedicated registers are saved in the stack frame in addition to the standard registers, leading to a shift of the pt_regs struct. Since commit db297c3b07af ("powerpc/32: Don't save thread.regs on interrupt entry"), the pt_regs struct is expected to be at the same place all the time. Instead of handling a special struct in addition to pt_regs, just add those special registers to struct pt_regs. Fixes: db297c3b07af ("powerpc/32: Don't save thread.regs on interrupt entry") Cc: stable@vger.kernel.org Reported-by: Radu Rendec Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/028d5483b4851b01ea4334d0751e7f260419092b.1625637264.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/ptrace.h | 16 ++++++++++++++++ arch/powerpc/kernel/asm-offsets.c | 31 ++++++++++++++----------------- arch/powerpc/kernel/head_booke.h | 27 +++------------------------ 3 files changed, 33 insertions(+), 41 deletions(-) diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 3e5d470a6155..14422e851494 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,22 @@ struct pt_regs unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */ }; #endif +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + struct { /* Must be a multiple of 16 bytes */ + unsigned long mas0; + unsigned long mas1; + unsigned long mas2; + unsigned long mas3; + unsigned long mas6; + unsigned long mas7; + unsigned long srr0; + unsigned long srr1; + unsigned long csrr0; + unsigned long csrr1; + unsigned long dsrr0; + unsigned long dsrr1; + }; +#endif }; #endif diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index a47eefa09bcb..5bee245d832b 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -309,24 +309,21 @@ int main(void) STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr); #endif -#if defined(CONFIG_PPC32) -#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) - DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE); - DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); +#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE) + STACK_PT_REGS_OFFSET(MAS0, mas0); /* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */ - DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0)); - DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1)); - DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2)); - DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3)); - DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6)); - DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7)); - DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0)); - DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1)); - DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0)); - DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1)); - DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0)); - DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1)); -#endif + STACK_PT_REGS_OFFSET(MMUCR, mas0); + STACK_PT_REGS_OFFSET(MAS1, mas1); + STACK_PT_REGS_OFFSET(MAS2, mas2); + STACK_PT_REGS_OFFSET(MAS3, mas3); + STACK_PT_REGS_OFFSET(MAS6, mas6); + STACK_PT_REGS_OFFSET(MAS7, mas7); + STACK_PT_REGS_OFFSET(_SRR0, srr0); + STACK_PT_REGS_OFFSET(_SRR1, srr1); + STACK_PT_REGS_OFFSET(_CSRR0, csrr0); + STACK_PT_REGS_OFFSET(_CSRR1, csrr1); + STACK_PT_REGS_OFFSET(_DSRR0, dsrr0); + STACK_PT_REGS_OFFSET(_DSRR1, dsrr1); #endif /* About the CPU features table */ diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 87b806e8eded..e5503420b6c6 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -168,20 +168,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) /* only on e500mc */ #define DBG_STACK_BASE dbgirq_ctx -#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE) - #ifdef CONFIG_SMP #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ mfspr r8,SPRN_PIR; \ slwi r8,r8,2; \ addis r8,r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #else #define BOOKE_LOAD_EXC_LEVEL_STACK(level) \ lis r8,level##_STACK_BASE@ha; \ lwz r8,level##_STACK_BASE@l(r8); \ - addi r8,r8,EXC_LVL_FRAME_OVERHEAD; + addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE; #endif /* @@ -208,7 +206,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) mtmsr r11; \ mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\ - addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\ + addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\ beq 1f; \ /* COMING FROM USER MODE */ \ stw r9,_CCR(r11); /* save CR */\ @@ -516,24 +514,5 @@ label: bl kernel_fp_unavailable_exception; \ b interrupt_return -#else /* __ASSEMBLY__ */ -struct exception_regs { - unsigned long mas0; - unsigned long mas1; - unsigned long mas2; - unsigned long mas3; - unsigned long mas6; - unsigned long mas7; - unsigned long srr0; - unsigned long srr1; - unsigned long csrr0; - unsigned long csrr1; - unsigned long dsrr0; - unsigned long dsrr1; -}; - -/* ensure this structure is always sized to a multiple of the stack alignment */ -#define STACK_EXC_LVL_FRAME_SIZE ALIGN(sizeof (struct exception_regs), 16) - #endif /* __ASSEMBLY__ */ #endif /* __HEAD_BOOKE_H__ */ -- cgit From 8241461536f21bbe51308a6916d1c9fb2e6b75a7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 4 Aug 2021 18:24:10 +0000 Subject: powerpc/smp: Fix OOPS in topology_init() Running an SMP kernel on an UP platform not prepared for it, I encountered the following OOPS: BUG: Kernel NULL pointer dereference on read at 0x00000034 Faulting instruction address: 0xc0a04110 Oops: Kernel access of bad area, sig: 11 [#1] BE PAGE_SIZE=4K SMP NR_CPUS=2 CMPCPRO Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.13.0-pmac-00001-g230fedfaad21 #5234 NIP: c0a04110 LR: c0a040d8 CTR: c0a04084 REGS: e100dda0 TRAP: 0300 Not tainted (5.13.0-pmac-00001-g230fedfaad21) MSR: 00009032 CR: 84000284 XER: 00000000 DAR: 00000034 DSISR: 20000000 GPR00: c0006bd4 e100de60 c1033320 00000000 00000000 c0942274 00000000 00000000 GPR08: 00000000 00000000 00000001 00000063 00000007 00000000 c0006f30 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000005 GPR24: c0c67d74 c0c67f1c c0c60000 c0c67d70 c0c0c558 1efdf000 c0c00020 00000000 NIP [c0a04110] topology_init+0x8c/0x138 LR [c0a040d8] topology_init+0x54/0x138 Call Trace: [e100de60] [80808080] 0x80808080 (unreliable) [e100de90] [c0006bd4] do_one_initcall+0x48/0x1bc [e100def0] [c0a0150c] kernel_init_freeable+0x1c8/0x278 [e100df20] [c0006f44] kernel_init+0x14/0x10c [e100df30] [c00190fc] ret_from_kernel_thread+0x14/0x1c Instruction dump: 7c692e70 7d290194 7c035040 7c7f1b78 5529103a 546706fe 5468103a 39400001 7c641b78 40800054 80c690b4 7fb9402e <81060034> 7fbeea14 2c080000 7fa3eb78 ---[ end trace b246ffbc6bbbb6fb ]--- Fix it by checking smp_ops before using it, as already done in several other places in the arch/powerpc/kernel/smp.c Fixes: 39f87561454d ("powerpc/smp: Move ppc_md.cpu_die() to smp_ops.cpu_offline_self()") Cc: stable@vger.kernel.org Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/75287841cbb8740edd44880fe60be66d489160d9.1628097995.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 5ff0e55d0db1..defecb3b1b15 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -1167,7 +1167,7 @@ static int __init topology_init(void) * CPU. For instance, the boot cpu might never be valid * for hotplugging. */ - if (smp_ops->cpu_offline_self) + if (smp_ops && smp_ops->cpu_offline_self) c->hotpluggable = 1; #endif -- cgit From c18956e6e0b95f78dad2773ecc8c61a9e41f6405 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Thu, 5 Aug 2021 17:23:08 +0200 Subject: powerpc/pseries: Fix update of LPAR security flavor after LPM After LPM, when migrating from a system with security mitigation enabled to a system with mitigation disabled, the security flavor exposed in /proc is not correctly set back to 0. Do not assume the value of the security flavor is set to 0 when entering init_cpu_char_feature_flags(), so when called after a LPM, the value is set correctly even if the mitigation are not turned off. Fixes: 6ce56e1ac380 ("powerpc/pseries: export LPAR security flavor in lparcfg") Cc: stable@vger.kernel.org # v5.13+ Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210805152308.33988-1-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 6b0886668465..0dfaa6ab44cc 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -539,9 +539,10 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) * H_CPU_BEHAV_FAVOUR_SECURITY_H could be set only if * H_CPU_BEHAV_FAVOUR_SECURITY is. */ - if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) { security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); - else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) + pseries_security_flavor = 0; + } else if (result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY_H) pseries_security_flavor = 1; else pseries_security_flavor = 2; -- cgit From 78d14bda861dd2729f15bb438fe355b48514bfe0 Mon Sep 17 00:00:00 2001 From: Robin Gögge Date: Thu, 29 Jul 2021 00:58:25 +0200 Subject: libbpf: Fix probe for BPF_PROG_TYPE_CGROUP_SOCKOPT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the probe for BPF_PROG_TYPE_CGROUP_SOCKOPT, so the probe reports accurate results when used by e.g. bpftool. Fixes: 4cdbfb59c44a ("libbpf: support sockopt hooks") Signed-off-by: Robin Gögge Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20210728225825.2357586-1-r.goegge@gmail.com --- tools/lib/bpf/libbpf_probes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index ecaae2927ab8..cd8c703dde71 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -75,6 +75,9 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; break; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; + break; case BPF_PROG_TYPE_SK_LOOKUP: xattr.expected_attach_type = BPF_SK_LOOKUP; break; @@ -104,7 +107,6 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: case BPF_PROG_TYPE_CGROUP_SYSCTL: - case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_EXT: -- cgit From c34c338a40e4f3b6f80889cd17fd9281784d1c32 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 28 Jul 2021 16:09:21 -0700 Subject: libbpf: Do not close un-owned FD 0 on errors Before this patch, btf_new() was liable to close an arbitrary FD 0 if BTF parsing failed. This was because: * btf->fd was initialized to 0 through the calloc() * btf__free() (in the `done` label) closed any FDs >= 0 * btf->fd is left at 0 if parsing fails This issue was discovered on a system using libbpf v0.3 (without BTF_KIND_FLOAT support) but with a kernel that had BTF_KIND_FLOAT types in BTF. Thus, parsing fails. While this patch technically doesn't fix any issues b/c upstream libbpf has BTF_KIND_FLOAT support, it'll help prevent issues in the future if more BTF types are added. It also allow the fix to be backported to older libbpf's. Fixes: 3289959b97ca ("libbpf: Support BTF loading and raw data output in both endianness") Signed-off-by: Daniel Xu Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/5969bb991adedb03c6ae93e051fd2a00d293cf25.1627513670.git.dxu@dxuuu.xyz --- tools/lib/bpf/btf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index b46760b93bb4..7ff3d5ce44f9 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -804,6 +804,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) btf->nr_types = 0; btf->start_id = 1; btf->start_str_off = 0; + btf->fd = -1; if (base_btf) { btf->base_btf = base_btf; @@ -832,8 +833,6 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) if (err) goto done; - btf->fd = -1; - done: if (err) { btf__free(btf); -- cgit From 7c4a22339e7ce7b6ed473a8e682da622c3a774ee Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 1 Aug 2021 18:50:37 -0700 Subject: libbpf, doc: Eliminate warnings in libbpf_naming_convention Use "code-block: none" instead of "c" for non-C-language code blocks. Removes these warnings: lnx-514-rc4/Documentation/bpf/libbpf/libbpf_naming_convention.rst:111: WARNING: Could not lex literal_block as "c". Highlighting skipped. lnx-514-rc4/Documentation/bpf/libbpf/libbpf_naming_convention.rst:124: WARNING: Could not lex literal_block as "c". Highlighting skipped. Fixes: f42cfb469f9b ("bpf: Add documentation for libbpf including API autogen") Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210802015037.787-1-rdunlap@infradead.org --- Documentation/bpf/libbpf/libbpf_naming_convention.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst index 3de1d51e41da..6bf9c5ac7576 100644 --- a/Documentation/bpf/libbpf/libbpf_naming_convention.rst +++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst @@ -108,7 +108,7 @@ This bump in ABI version is at most once per kernel development cycle. For example, if current state of ``libbpf.map`` is: -.. code-block:: c +.. code-block:: none LIBBPF_0.0.1 { global: @@ -121,7 +121,7 @@ For example, if current state of ``libbpf.map`` is: , and a new symbol ``bpf_func_c`` is being introduced, then ``libbpf.map`` should be changed like this: -.. code-block:: c +.. code-block:: none LIBBPF_0.0.1 { global: -- cgit From c4eb1f403243fc7bbb7de644db8587c03de36da6 Mon Sep 17 00:00:00 2001 From: Tatsuhiko Yasumatsu Date: Sat, 7 Aug 2021 00:04:18 +0900 Subject: bpf: Fix integer overflow involving bucket_size In __htab_map_lookup_and_delete_batch(), hash buckets are iterated over to count the number of elements in each bucket (bucket_size). If bucket_size is large enough, the multiplication to calculate kvmalloc() size could overflow, resulting in out-of-bounds write as reported by KASAN: [...] [ 104.986052] BUG: KASAN: vmalloc-out-of-bounds in __htab_map_lookup_and_delete_batch+0x5ce/0xb60 [ 104.986489] Write of size 4194224 at addr ffffc9010503be70 by task crash/112 [ 104.986889] [ 104.987193] CPU: 0 PID: 112 Comm: crash Not tainted 5.14.0-rc4 #13 [ 104.987552] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 104.988104] Call Trace: [ 104.988410] dump_stack_lvl+0x34/0x44 [ 104.988706] print_address_description.constprop.0+0x21/0x140 [ 104.988991] ? __htab_map_lookup_and_delete_batch+0x5ce/0xb60 [ 104.989327] ? __htab_map_lookup_and_delete_batch+0x5ce/0xb60 [ 104.989622] kasan_report.cold+0x7f/0x11b [ 104.989881] ? __htab_map_lookup_and_delete_batch+0x5ce/0xb60 [ 104.990239] kasan_check_range+0x17c/0x1e0 [ 104.990467] memcpy+0x39/0x60 [ 104.990670] __htab_map_lookup_and_delete_batch+0x5ce/0xb60 [ 104.990982] ? __wake_up_common+0x4d/0x230 [ 104.991256] ? htab_of_map_free+0x130/0x130 [ 104.991541] bpf_map_do_batch+0x1fb/0x220 [...] In hashtable, if the elements' keys have the same jhash() value, the elements will be put into the same bucket. By putting a lot of elements into a single bucket, the value of bucket_size can be increased to trigger the integer overflow. Triggering the overflow is possible for both callers with CAP_SYS_ADMIN and callers without CAP_SYS_ADMIN. It will be trivial for a caller with CAP_SYS_ADMIN to intentionally reach this overflow by enabling BPF_F_ZERO_SEED. As this flag will set the random seed passed to jhash() to 0, it will be easy for the caller to prepare keys which will be hashed into the same value, and thus put all the elements into the same bucket. If the caller does not have CAP_SYS_ADMIN, BPF_F_ZERO_SEED cannot be used. However, it will be still technically possible to trigger the overflow, by guessing the random seed value passed to jhash() (32bit) and repeating the attempt to trigger the overflow. In this case, the probability to trigger the overflow will be low and will take a very long time. Fix the integer overflow by calling kvmalloc_array() instead of kvmalloc() to allocate memory. Fixes: 057996380a42 ("bpf: Add batch ops to all htab bpf map") Signed-off-by: Tatsuhiko Yasumatsu Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210806150419.109658-1-th.yasumatsu@gmail.com --- kernel/bpf/hashtab.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 72c58cc516a3..9c011f3a2687 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1565,8 +1565,8 @@ alloc: /* We cannot do copy_from_user or copy_to_user inside * the rcu_read_lock. Allocate enough space here. */ - keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN); - values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN); + keys = kvmalloc_array(key_size, bucket_size, GFP_USER | __GFP_NOWARN); + values = kvmalloc_array(value_size, bucket_size, GFP_USER | __GFP_NOWARN); if (!keys || !values) { ret = -ENOMEM; goto after_loop; -- cgit From be7ecbd240b2f9ec544d3ce6fccf4cec3cd15dca Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Tue, 3 Aug 2021 14:35:38 +0300 Subject: soc: fsl: qe: convert QE interrupt controller to platform_device Since 5.13 QE's ucc nodes can't get interrupts from devicetree: ucc@2000 { cell-index = <1>; reg = <0x2000 0x200>; interrupts = <32>; interrupt-parent = <&qeic>; }; Now fw_devlink expects driver to create and probe a struct device for interrupt controller. So lets convert this driver to simple platform_device with probe(). Also use platform_get_ and devm_ family function to get/allocate resources and drop unused .compatible = "qeic". [1] - https://lore.kernel.org/lkml/CAGETcx9PiX==mLxB9PO8Myyk6u2vhPVwTMsA5NkD-ywH5xhusw@mail.gmail.com Fixes: e590474768f1 ("driver core: Set fw_devlink=on by default") Fixes: ea718c699055 ("Revert "Revert "driver core: Set fw_devlink=on by default""") Signed-off-by: Maxim Kochetkov Reported-by: kernel test robot Reported-by: Dan Carpenter Acked-by: Saravana Kannan Signed-off-by: Li Yang --- drivers/soc/fsl/qe/qe_ic.c | 75 +++++++++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/soc/fsl/qe/qe_ic.c index 3f711c1a0996..e710d554425d 100644 --- a/drivers/soc/fsl/qe/qe_ic.c +++ b/drivers/soc/fsl/qe/qe_ic.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -404,41 +405,40 @@ static void qe_ic_cascade_muxed_mpic(struct irq_desc *desc) chip->irq_eoi(&desc->irq_data); } -static void __init qe_ic_init(struct device_node *node) +static int qe_ic_init(struct platform_device *pdev) { + struct device *dev = &pdev->dev; void (*low_handler)(struct irq_desc *desc); void (*high_handler)(struct irq_desc *desc); struct qe_ic *qe_ic; - struct resource res; - u32 ret; + struct resource *res; + struct device_node *node = pdev->dev.of_node; - ret = of_address_to_resource(node, 0, &res); - if (ret) - return; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + dev_err(dev, "no memory resource defined\n"); + return -ENODEV; + } - qe_ic = kzalloc(sizeof(*qe_ic), GFP_KERNEL); + qe_ic = devm_kzalloc(dev, sizeof(*qe_ic), GFP_KERNEL); if (qe_ic == NULL) - return; + return -ENOMEM; - qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS, - &qe_ic_host_ops, qe_ic); - if (qe_ic->irqhost == NULL) { - kfree(qe_ic); - return; + qe_ic->regs = devm_ioremap(dev, res->start, resource_size(res)); + if (qe_ic->regs == NULL) { + dev_err(dev, "failed to ioremap() registers\n"); + return -ENODEV; } - qe_ic->regs = ioremap(res.start, resource_size(&res)); - qe_ic->hc_irq = qe_ic_irq_chip; - qe_ic->virq_high = irq_of_parse_and_map(node, 0); - qe_ic->virq_low = irq_of_parse_and_map(node, 1); + qe_ic->virq_high = platform_get_irq(pdev, 0); + qe_ic->virq_low = platform_get_irq(pdev, 1); - if (!qe_ic->virq_low) { - printk(KERN_ERR "Failed to map QE_IC low IRQ\n"); - kfree(qe_ic); - return; + if (qe_ic->virq_low < 0) { + return -ENODEV; } + if (qe_ic->virq_high != qe_ic->virq_low) { low_handler = qe_ic_cascade_low; high_handler = qe_ic_cascade_high; @@ -447,6 +447,13 @@ static void __init qe_ic_init(struct device_node *node) high_handler = NULL; } + qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS, + &qe_ic_host_ops, qe_ic); + if (qe_ic->irqhost == NULL) { + dev_err(dev, "failed to add irq domain\n"); + return -ENODEV; + } + qe_ic_write(qe_ic->regs, QEIC_CICR, 0); irq_set_handler_data(qe_ic->virq_low, qe_ic); @@ -456,20 +463,26 @@ static void __init qe_ic_init(struct device_node *node) irq_set_handler_data(qe_ic->virq_high, qe_ic); irq_set_chained_handler(qe_ic->virq_high, high_handler); } + return 0; } +static const struct of_device_id qe_ic_ids[] = { + { .compatible = "fsl,qe-ic"}, + { .type = "qeic"}, + {}, +}; -static int __init qe_ic_of_init(void) +static struct platform_driver qe_ic_driver = { - struct device_node *np; + .driver = { + .name = "qe-ic", + .of_match_table = qe_ic_ids, + }, + .probe = qe_ic_init, +}; - np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic"); - if (!np) { - np = of_find_node_by_type(NULL, "qeic"); - if (!np) - return -ENODEV; - } - qe_ic_init(np); - of_node_put(np); +static int __init qe_ic_of_init(void) +{ + platform_driver_register(&qe_ic_driver); return 0; } subsys_initcall(qe_ic_of_init); -- cgit From 739d0959fbed23838a96c48fbce01dd2f6fb2c5f Mon Sep 17 00:00:00 2001 From: Luke D Jones Date: Sat, 7 Aug 2021 14:58:05 +1200 Subject: ALSA: hda: Add quirk for ASUS Flow x13 The ASUS GV301QH sound appears to work well with the quirk for ALC294_FIXUP_ASUS_DUAL_SPK. Signed-off-by: Luke D Jones Cc: Link: https://lore.kernel.org/r/20210807025805.27321-1-luke@ljones.dev Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 21c521596c9d..4043a2362f27 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8465,6 +8465,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS), SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK), + SND_PCI_QUIRK(0x1043, 0x1662, "ASUS GV301QH", ALC294_FIXUP_ASUS_DUAL_SPK), SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS), SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), -- cgit From 2115d3d482656ea702f7cf308c0ded3500282903 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 6 Aug 2021 17:15:55 +0800 Subject: Revert "r8169: avoid link-up interrupt issue on RTL8106e if user enables ASPM" This reverts commit 1ee8856de82faec9bc8bd0f2308a7f27e30ba207. This is used to re-enable ASPM on RTL8106e, if it is possible. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index c7af5bc3b8af..731631511dcf 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3508,6 +3508,7 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000); rtl_pcie_state_l2l3_disable(tp); + rtl_hw_aspm_clkreq_enable(tp, true); } DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond) -- cgit From 9c40186488145b57f800de120f0872168772adfe Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Fri, 6 Aug 2021 17:15:56 +0800 Subject: r8169: change the L0/L1 entrance latencies for RTL8106e The original L0 and L1 entrance latencies of RTL8106e are 4us. And they cause the delay of link-up interrupt when enabling ASPM. Change the L0 entrance latency to 7us and L1 entrance latency to 32us. Then, they could avoid the issue. Tested-by: Koba Ko Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 731631511dcf..4d8e337f5085 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3502,6 +3502,9 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp) RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET); RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN); + /* The default value is 0x13. Change it to 0x2f */ + rtl_csi_access_enable(tp, 0x2f); + rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000); /* disable EEE */ -- cgit From 47fac45600aafc5939d9620055c3c46f7135d316 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 6 Aug 2021 11:47:23 +0200 Subject: net: dsa: qca: ar9331: make proper initial port defaults Make sure that all external port are actually isolated from each other, so no packets are leaked. Fixes: ec6698c272de ("net: dsa: add support for Atheros AR9331 built-in switch") Signed-off-by: Oleksij Rempel Signed-off-by: David S. Miller --- drivers/net/dsa/qca/ar9331.c | 73 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index 6686192e1883..563d8a279030 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -101,6 +101,23 @@ AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \ AR9331_SW_PORT_STATUS_SPEED_M) +#define AR9331_SW_REG_PORT_CTRL(_port) (0x104 + (_port) * 0x100) +#define AR9331_SW_PORT_CTRL_HEAD_EN BIT(11) +#define AR9331_SW_PORT_CTRL_PORT_STATE GENMASK(2, 0) +#define AR9331_SW_PORT_CTRL_PORT_STATE_DISABLED 0 +#define AR9331_SW_PORT_CTRL_PORT_STATE_BLOCKING 1 +#define AR9331_SW_PORT_CTRL_PORT_STATE_LISTENING 2 +#define AR9331_SW_PORT_CTRL_PORT_STATE_LEARNING 3 +#define AR9331_SW_PORT_CTRL_PORT_STATE_FORWARD 4 + +#define AR9331_SW_REG_PORT_VLAN(_port) (0x108 + (_port) * 0x100) +#define AR9331_SW_PORT_VLAN_8021Q_MODE GENMASK(31, 30) +#define AR9331_SW_8021Q_MODE_SECURE 3 +#define AR9331_SW_8021Q_MODE_CHECK 2 +#define AR9331_SW_8021Q_MODE_FALLBACK 1 +#define AR9331_SW_8021Q_MODE_NONE 0 +#define AR9331_SW_PORT_VLAN_PORT_VID_MEMBER GENMASK(25, 16) + /* MIB registers */ #define AR9331_MIB_COUNTER(x) (0x20000 + ((x) * 0x100)) @@ -371,12 +388,60 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) return 0; } -static int ar9331_sw_setup(struct dsa_switch *ds) +static int ar9331_sw_setup_port(struct dsa_switch *ds, int port) { struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; struct regmap *regmap = priv->regmap; + u32 port_mask, port_ctrl, val; int ret; + /* Generate default port settings */ + port_ctrl = FIELD_PREP(AR9331_SW_PORT_CTRL_PORT_STATE, + AR9331_SW_PORT_CTRL_PORT_STATE_FORWARD); + + if (dsa_is_cpu_port(ds, port)) { + /* CPU port should be allowed to communicate with all user + * ports. + */ + port_mask = dsa_user_ports(ds); + /* Enable Atheros header on CPU port. This will allow us + * communicate with each port separately + */ + port_ctrl |= AR9331_SW_PORT_CTRL_HEAD_EN; + } else if (dsa_is_user_port(ds, port)) { + /* User ports should communicate only with the CPU port. + */ + port_mask = BIT(dsa_upstream_port(ds, port)); + } else { + /* Other ports do not need to communicate at all */ + port_mask = 0; + } + + val = FIELD_PREP(AR9331_SW_PORT_VLAN_8021Q_MODE, + AR9331_SW_8021Q_MODE_NONE) | + FIELD_PREP(AR9331_SW_PORT_VLAN_PORT_VID_MEMBER, port_mask); + + ret = regmap_write(regmap, AR9331_SW_REG_PORT_VLAN(port), val); + if (ret) + goto error; + + ret = regmap_write(regmap, AR9331_SW_REG_PORT_CTRL(port), port_ctrl); + if (ret) + goto error; + + return 0; +error: + dev_err(priv->dev, "%s: error: %i\n", __func__, ret); + + return ret; +} + +static int ar9331_sw_setup(struct dsa_switch *ds) +{ + struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv; + struct regmap *regmap = priv->regmap; + int ret, i; + ret = ar9331_sw_reset(priv); if (ret) return ret; @@ -402,6 +467,12 @@ static int ar9331_sw_setup(struct dsa_switch *ds) if (ret) goto error; + for (i = 0; i < ds->num_ports; i++) { + ret = ar9331_sw_setup_port(ds, i); + if (ret) + goto error; + } + ds->configure_vlan_while_not_filtering = false; return 0; -- cgit From 34737e1320db6d51f0d140d5c684b9eb32f0da76 Mon Sep 17 00:00:00 2001 From: Loic Poulain Date: Fri, 6 Aug 2021 12:35:09 +0200 Subject: net: wwan: mhi_wwan_ctrl: Fix possible deadlock Lockdep detected possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&mhiwwan->rx_lock); local_irq_disable(); lock(&mhi_cntrl->pm_lock); lock(&mhiwwan->rx_lock); lock(&mhi_cntrl->pm_lock); *** DEADLOCK *** To prevent this we need to disable the soft-interrupts when taking the rx_lock. Cc: stable@vger.kernel.org Fixes: fa588eba632d ("net: Add Qcom WWAN control driver") Reported-by: Thomas Perrot Signed-off-by: Loic Poulain Reviewed-by: Sergey Ryazanov Signed-off-by: David S. Miller --- drivers/net/wwan/mhi_wwan_ctrl.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1e18420ce404..d0a98f34c54d 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -41,14 +41,14 @@ struct mhi_wwan_dev { /* Increment RX budget and schedule RX refill if necessary */ static void mhi_wwan_rx_budget_inc(struct mhi_wwan_dev *mhiwwan) { - spin_lock(&mhiwwan->rx_lock); + spin_lock_bh(&mhiwwan->rx_lock); mhiwwan->rx_budget++; if (test_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags)) schedule_work(&mhiwwan->rx_refill); - spin_unlock(&mhiwwan->rx_lock); + spin_unlock_bh(&mhiwwan->rx_lock); } /* Decrement RX budget if non-zero and return true on success */ @@ -56,7 +56,7 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan) { bool ret = false; - spin_lock(&mhiwwan->rx_lock); + spin_lock_bh(&mhiwwan->rx_lock); if (mhiwwan->rx_budget) { mhiwwan->rx_budget--; @@ -64,7 +64,7 @@ static bool mhi_wwan_rx_budget_dec(struct mhi_wwan_dev *mhiwwan) ret = true; } - spin_unlock(&mhiwwan->rx_lock); + spin_unlock_bh(&mhiwwan->rx_lock); return ret; } @@ -130,9 +130,9 @@ static void mhi_wwan_ctrl_stop(struct wwan_port *port) { struct mhi_wwan_dev *mhiwwan = wwan_port_get_drvdata(port); - spin_lock(&mhiwwan->rx_lock); + spin_lock_bh(&mhiwwan->rx_lock); clear_bit(MHI_WWAN_RX_REFILL, &mhiwwan->flags); - spin_unlock(&mhiwwan->rx_lock); + spin_unlock_bh(&mhiwwan->rx_lock); cancel_work_sync(&mhiwwan->rx_refill); -- cgit From 2383cb9497d113360137a2be308b390faa80632d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 7 Aug 2021 02:06:18 +0200 Subject: net: phy: micrel: Fix link detection on ksz87xx switch" Commit a5e63c7d38d5 "net: phy: micrel: Fix detection of ksz87xx switch" broke link detection on the external ports of the KSZ8795. The previously unused phy_driver structure for these devices specifies config_aneg and read_status functions that appear to be designed for a fixed link and do not work with the embedded PHYs in the KSZ8795. Delete the use of these functions in favour of the generic PHY implementations which were used previously. Fixes: a5e63c7d38d5 ("net: phy: micrel: Fix detection of ksz87xx switch") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/phy/micrel.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 53bdd673ae56..5c928f827173 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1760,8 +1760,6 @@ static struct phy_driver ksphy_driver[] = { .name = "Micrel KSZ87XX Switch", /* PHY_BASIC_FEATURES */ .config_init = kszphy_config_init, - .config_aneg = ksz8873mll_config_aneg, - .read_status = ksz8873mll_read_status, .match_phy_device = ksz8795_match_phy_device, .suspend = genphy_suspend, .resume = genphy_resume, -- cgit From d329e41a08f37c478159d5c3379a17b9c07befa3 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Fri, 6 Aug 2021 18:15:46 -0700 Subject: ptp: Fix possible memory leak caused by invalid cast Fixes possible leak of PTP virtual clocks. The number of PTP virtual clocks to be unregistered is passed as 'u32', but the function that unregister the devices handles that as 'u8'. Fixes: 73f37068d540 ("ptp: support ptp physical/virtual clocks conversion") Signed-off-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- drivers/ptp/ptp_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_sysfs.c b/drivers/ptp/ptp_sysfs.c index b3d96b747292..41b92dc2f011 100644 --- a/drivers/ptp/ptp_sysfs.c +++ b/drivers/ptp/ptp_sysfs.c @@ -154,7 +154,7 @@ static int unregister_vclock(struct device *dev, void *data) struct ptp_clock *ptp = dev_get_drvdata(dev); struct ptp_clock_info *info = ptp->info; struct ptp_vclock *vclock; - u8 *num = data; + u32 *num = data; vclock = info_to_vclock(info); dev_info(dev->parent, "delete virtual clock ptp%d\n", -- cgit From 1027b96ec9d34f9abab69bc1a4dc5b1ad8ab1349 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 6 Aug 2021 16:21:24 +0800 Subject: once: Fix panic when module unload DO_ONCE DEFINE_STATIC_KEY_TRUE(___once_key); __do_once_done once_disable_jump(once_key); INIT_WORK(&w->work, once_deferred); struct once_work *w; w->key = key; schedule_work(&w->work); module unload //*the key is destroy* process_one_work once_deferred BUG_ON(!static_key_enabled(work->key)); static_key_count((struct static_key *)x) //*access key, crash* When module uses DO_ONCE mechanism, it could crash due to the above concurrency problem, we could reproduce it with link[1]. Fix it by add/put module refcount in the once work process. [1] https://lore.kernel.org/netdev/eaa6c371-465e-57eb-6be9-f4b16b9d7cbf@huawei.com/ Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Cc: David S. Miller Cc: Eric Dumazet Reported-by: Minmin chen Signed-off-by: Kefeng Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/once.h | 4 ++-- lib/once.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/once.h b/include/linux/once.h index 9225ee6d96c7..ae6f4eb41cbe 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -7,7 +7,7 @@ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags); + unsigned long *flags, struct module *mod); /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only @@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, if (unlikely(___ret)) { \ func(__VA_ARGS__); \ __do_once_done(&___done, &___once_key, \ - &___flags); \ + &___flags, THIS_MODULE); \ } \ } \ ___ret; \ diff --git a/lib/once.c b/lib/once.c index 8b7d6235217e..59149bf3bfb4 100644 --- a/lib/once.c +++ b/lib/once.c @@ -3,10 +3,12 @@ #include #include #include +#include struct once_work { struct work_struct work; struct static_key_true *key; + struct module *module; }; static void once_deferred(struct work_struct *w) @@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); static_branch_disable(work->key); + module_put(work->module); kfree(work); } -static void once_disable_jump(struct static_key_true *key) +static void once_disable_jump(struct static_key_true *key, struct module *mod) { struct once_work *w; @@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key) INIT_WORK(&w->work, once_deferred); w->key = key; + w->module = mod; + __module_get(mod); schedule_work(&w->work); } @@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags) EXPORT_SYMBOL(__do_once_start); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags) + unsigned long *flags, struct module *mod) __releases(once_lock) { *done = true; spin_unlock_irqrestore(&once_lock, *flags); - once_disable_jump(once_key); + once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); -- cgit From fbfee25796e2688004d58ad4d0673279366b97dd Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 7 Aug 2021 15:03:13 -0400 Subject: bnxt_en: Update firmware interface to 1.10.2.52 The key change is the firmware call to retrieve the PTP TX timestamp. The header offset for the PTP sequence number field is now added. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 76 +++++++++++++++++++-------- 1 file changed, 55 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 3fc6781c5b98..94d07a9f7034 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -368,6 +368,7 @@ struct cmd_nums { #define HWRM_FUNC_PTP_TS_QUERY 0x19fUL #define HWRM_FUNC_PTP_EXT_CFG 0x1a0UL #define HWRM_FUNC_PTP_EXT_QCFG 0x1a1UL + #define HWRM_FUNC_KEY_CTX_ALLOC 0x1a2UL #define HWRM_SELFTEST_QLIST 0x200UL #define HWRM_SELFTEST_EXEC 0x201UL #define HWRM_SELFTEST_IRQ 0x202UL @@ -531,8 +532,8 @@ struct hwrm_err_output { #define HWRM_VERSION_MAJOR 1 #define HWRM_VERSION_MINOR 10 #define HWRM_VERSION_UPDATE 2 -#define HWRM_VERSION_RSVD 47 -#define HWRM_VERSION_STR "1.10.2.47" +#define HWRM_VERSION_RSVD 52 +#define HWRM_VERSION_STR "1.10.2.52" /* hwrm_ver_get_input (size:192b/24B) */ struct hwrm_ver_get_input { @@ -585,6 +586,7 @@ struct hwrm_ver_get_output { #define VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED 0x1000UL #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED 0x2000UL #define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED 0x4000UL + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_BOOT_CAPABLE 0x8000UL u8 roce_fw_maj_8b; u8 roce_fw_min_8b; u8 roce_fw_bld_8b; @@ -886,7 +888,8 @@ struct hwrm_async_event_cmpl_reset_notify { #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL (0x2UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL (0x3UL << 8) #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET (0x4UL << 8) - #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FAST_RESET + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION (0x5UL << 8) + #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_ACTIVATION #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK 0xffff0000UL #define ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT 16 }; @@ -1236,13 +1239,14 @@ struct hwrm_async_event_cmpl_error_report_base { u8 timestamp_lo; __le16 timestamp_hi; __le32 event_data1; - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL - #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_MASK 0xffUL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_SFT 0 + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_RESERVED 0x0UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_PAUSE_STORM 0x1UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_INVALID_SIGNAL 0x2UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_NVM 0x3UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD 0x4UL + #define ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_LAST ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DOORBELL_DROP_THRESHOLD }; /* hwrm_async_event_cmpl_error_report_pause_storm (size:128b/16B) */ @@ -1446,6 +1450,8 @@ struct hwrm_func_vf_cfg_input { #define FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS 0x200UL #define FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS 0x400UL #define FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS 0x800UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_TX_KEY_CTXS 0x1000UL + #define FUNC_VF_CFG_REQ_ENABLES_NUM_RX_KEY_CTXS 0x2000UL __le16 mtu; __le16 guest_vlan; __le16 async_event_cr; @@ -1469,7 +1475,8 @@ struct hwrm_func_vf_cfg_input { __le16 num_vnics; __le16 num_stat_ctxs; __le16 num_hw_ring_grps; - u8 unused_0[4]; + __le16 num_tx_key_ctxs; + __le16 num_rx_key_ctxs; }; /* hwrm_func_vf_cfg_output (size:128b/16B) */ @@ -1493,7 +1500,7 @@ struct hwrm_func_qcaps_input { u8 unused_0[6]; }; -/* hwrm_func_qcaps_output (size:704b/88B) */ +/* hwrm_func_qcaps_output (size:768b/96B) */ struct hwrm_func_qcaps_output { __le16 error_code; __le16 req_type; @@ -1587,7 +1594,8 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_TE_CFA 0x4UL #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_RE_CFA 0x8UL #define FUNC_QCAPS_RESP_MPC_CHNLS_CAP_PRIMATE 0x10UL - u8 unused_1; + __le16 max_key_ctxs_alloc; + u8 unused_1[7]; u8 valid; }; @@ -1602,7 +1610,7 @@ struct hwrm_func_qcfg_input { u8 unused_0[6]; }; -/* hwrm_func_qcfg_output (size:832b/104B) */ +/* hwrm_func_qcfg_output (size:896b/112B) */ struct hwrm_func_qcfg_output { __le16 error_code; __le16 req_type; @@ -1749,11 +1757,13 @@ struct hwrm_func_qcfg_output { #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) #define FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 __le16 host_mtu; - u8 unused_3; + __le16 alloc_tx_key_ctxs; + __le16 alloc_rx_key_ctxs; + u8 unused_3[5]; u8 valid; }; -/* hwrm_func_cfg_input (size:832b/104B) */ +/* hwrm_func_cfg_input (size:896b/112B) */ struct hwrm_func_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -1820,6 +1830,8 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_ENABLES_PARTITION_MAX_BW 0x8000000UL #define FUNC_CFG_REQ_ENABLES_TPID 0x10000000UL #define FUNC_CFG_REQ_ENABLES_HOST_MTU 0x20000000UL + #define FUNC_CFG_REQ_ENABLES_TX_KEY_CTXS 0x40000000UL + #define FUNC_CFG_REQ_ENABLES_RX_KEY_CTXS 0x80000000UL __le16 admin_mtu; __le16 mru; __le16 num_rsscos_ctxs; @@ -1929,6 +1941,9 @@ struct hwrm_func_cfg_input { #define FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_PARTITION_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 __be16 tpid; __le16 host_mtu; + __le16 num_tx_key_ctxs; + __le16 num_rx_key_ctxs; + u8 unused_0[4]; }; /* hwrm_func_cfg_output (size:128b/16B) */ @@ -2099,6 +2114,7 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT 0x40UL #define FUNC_DRV_RGTR_REQ_FLAGS_FAST_RESET_SUPPORT 0x80UL #define FUNC_DRV_RGTR_REQ_FLAGS_RSS_STRICT_HASH_TYPE_SUPPORT 0x100UL + #define FUNC_DRV_RGTR_REQ_FLAGS_NPAR_1_2_SUPPORT 0x200UL __le32 enables; #define FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE 0x1UL #define FUNC_DRV_RGTR_REQ_ENABLES_VER 0x2UL @@ -2268,7 +2284,7 @@ struct hwrm_func_resource_qcaps_input { u8 unused_0[6]; }; -/* hwrm_func_resource_qcaps_output (size:448b/56B) */ +/* hwrm_func_resource_qcaps_output (size:512b/64B) */ struct hwrm_func_resource_qcaps_output { __le16 error_code; __le16 req_type; @@ -2300,11 +2316,15 @@ struct hwrm_func_resource_qcaps_output { __le16 max_tx_scheduler_inputs; __le16 flags; #define FUNC_RESOURCE_QCAPS_RESP_FLAGS_MIN_GUARANTEED 0x1UL + __le16 min_tx_key_ctxs; + __le16 max_tx_key_ctxs; + __le16 min_rx_key_ctxs; + __le16 max_rx_key_ctxs; u8 unused_0[5]; u8 valid; }; -/* hwrm_func_vf_resource_cfg_input (size:448b/56B) */ +/* hwrm_func_vf_resource_cfg_input (size:512b/64B) */ struct hwrm_func_vf_resource_cfg_input { __le16 req_type; __le16 cmpl_ring; @@ -2331,6 +2351,10 @@ struct hwrm_func_vf_resource_cfg_input { __le16 max_hw_ring_grps; __le16 flags; #define FUNC_VF_RESOURCE_CFG_REQ_FLAGS_MIN_GUARANTEED 0x1UL + __le16 min_tx_key_ctxs; + __le16 max_tx_key_ctxs; + __le16 min_rx_key_ctxs; + __le16 max_rx_key_ctxs; u8 unused_0[2]; }; @@ -2348,7 +2372,9 @@ struct hwrm_func_vf_resource_cfg_output { __le16 reserved_vnics; __le16 reserved_stat_ctx; __le16 reserved_hw_ring_grps; - u8 unused_0[7]; + __le16 reserved_tx_key_ctxs; + __le16 reserved_rx_key_ctxs; + u8 unused_0[3]; u8 valid; }; @@ -4220,7 +4246,7 @@ struct hwrm_port_lpbk_clr_stats_output { u8 valid; }; -/* hwrm_port_ts_query_input (size:256b/32B) */ +/* hwrm_port_ts_query_input (size:320b/40B) */ struct hwrm_port_ts_query_input { __le16 req_type; __le16 cmpl_ring; @@ -4238,8 +4264,11 @@ struct hwrm_port_ts_query_input { __le16 enables; #define PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT 0x1UL #define PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID 0x2UL + #define PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET 0x4UL __le16 ts_req_timeout; __le32 ptp_seq_id; + __le16 ptp_hdr_offset; + u8 unused_1[6]; }; /* hwrm_port_ts_query_output (size:192b/24B) */ @@ -8172,6 +8201,7 @@ struct hwrm_fw_reset_input { u8 host_idx; u8 flags; #define FW_RESET_REQ_FLAGS_RESET_GRACEFUL 0x1UL + #define FW_RESET_REQ_FLAGS_FW_ACTIVATION 0x2UL u8 unused_0[4]; }; @@ -8952,7 +8982,7 @@ struct hwrm_nvm_get_dir_info_output { u8 valid; }; -/* hwrm_nvm_write_input (size:384b/48B) */ +/* hwrm_nvm_write_input (size:448b/56B) */ struct hwrm_nvm_write_input { __le16 req_type; __le16 cmpl_ring; @@ -8968,7 +8998,11 @@ struct hwrm_nvm_write_input { __le16 option; __le16 flags; #define NVM_WRITE_REQ_FLAGS_KEEP_ORIG_ACTIVE_IMG 0x1UL + #define NVM_WRITE_REQ_FLAGS_BATCH_MODE 0x2UL + #define NVM_WRITE_REQ_FLAGS_BATCH_LAST 0x4UL __le32 dir_item_length; + __le32 offset; + __le32 len; __le32 unused_0; }; -- cgit From 9e26680733d5c6538ba2e7a111fb49c9ac2dc16a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 7 Aug 2021 15:03:14 -0400 Subject: bnxt_en: Update firmware call to retrieve TX PTP timestamp New firmware interface requires the PTP sequence ID header offset to be passed to the firmware to properly find the matching timestamp for all protocols. Fixes: 83bb623c968e ("bnxt_en: Transmit and retrieve packet timestamps") Reviewed-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 5 ++++- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 4 +++- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 6 ++++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 89606587b156..2fe743503949 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -426,7 +426,10 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb) && atomic_dec_if_positive(&ptp->tx_avail) >= 0) { - if (!bnxt_ptp_parse(skb, &ptp->tx_seqid)) { + if (!bnxt_ptp_parse(skb, &ptp->tx_seqid, + &ptp->tx_hdr_off)) { + if (vlan_tag_flags) + ptp->tx_hdr_off += VLAN_HLEN; lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; } else { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index ec381c2423b8..81f40ab748f1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -20,7 +20,7 @@ #include "bnxt.h" #include "bnxt_ptp.h" -int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id) +int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off) { unsigned int ptp_class; struct ptp_header *hdr; @@ -34,6 +34,7 @@ int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id) if (!hdr) return -EINVAL; + *hdr_off = (u8 *)hdr - skb->data; *seq_id = ntohs(hdr->sequence_id); return 0; default: @@ -91,6 +92,7 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts) PORT_TS_QUERY_REQ_FLAGS_PATH_TX) { req.enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES); req.ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid); + req.ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off); req.ts_req_timeout = cpu_to_le16(BNXT_PTP_QTS_TIMEOUT); } mutex_lock(&bp->hwrm_cmd_lock); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 254ba7bc0f99..57a8b9243a31 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -19,7 +19,8 @@ #define BNXT_PTP_QTS_TIMEOUT 1000 #define BNXT_PTP_QTS_TX_ENABLES (PORT_TS_QUERY_REQ_ENABLES_PTP_SEQ_ID | \ - PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT) + PORT_TS_QUERY_REQ_ENABLES_TS_REQ_TIMEOUT | \ + PORT_TS_QUERY_REQ_ENABLES_PTP_HDR_OFFSET) struct bnxt_ptp_cfg { struct ptp_clock_info ptp_info; @@ -37,6 +38,7 @@ struct bnxt_ptp_cfg { #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ) u16 tx_seqid; + u16 tx_hdr_off; struct bnxt *bp; atomic_t tx_avail; #define BNXT_MAX_TX_TS 1 @@ -74,7 +76,7 @@ do { \ ((dst) = READ_ONCE(src)) #endif -int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id); +int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); -- cgit From 92529df76db5ab184b82674cf7a4eef4b665b40e Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sat, 7 Aug 2021 15:03:15 -0400 Subject: bnxt_en: Use register window 6 instead of 5 to read the PHC Some older Broadcom debug tools use window 5 and may conflict, so switch to use window 6 instead. Fixes: 118612d519d8 ("bnxt_en: Add PTP clock APIs, ioctls, and ethtool methods") Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 57a8b9243a31..524f1c272054 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -10,8 +10,8 @@ #ifndef BNXT_PTP_H #define BNXT_PTP_H -#define BNXT_PTP_GRC_WIN 5 -#define BNXT_PTP_GRC_WIN_BASE 0x5000 +#define BNXT_PTP_GRC_WIN 6 +#define BNXT_PTP_GRC_WIN_BASE 0x6000 #define BNXT_MAX_PHC_DRIFT 31000000 #define BNXT_LO_TIMER_MASK 0x0000ffffffffUL -- cgit From 2459dcb96bcba94c08d6861f8a050185ff301672 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 7 Aug 2021 15:27:03 +0200 Subject: ppp: Fix generating ifname when empty IFLA_IFNAME is specified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IFLA_IFNAME is nul-term string which means that IFLA_IFNAME buffer can be larger than length of string which contains. Function __rtnl_newlink() generates new own ifname if either IFLA_IFNAME was not specified at all or userspace passed empty nul-term string. It is expected that if userspace does not specify ifname for new ppp netdev then kernel generates one in format "ppp" where id matches to the ppp unit id which can be later obtained by PPPIOCGUNIT ioctl. And it works in this way if IFLA_IFNAME is not specified at all. But it does not work when IFLA_IFNAME is specified with empty string. So fix this logic also for empty IFLA_IFNAME in ppp_nl_newlink() function and correctly generates ifname based on ppp unit identifier if userspace did not provided preferred ifname. Without this patch when IFLA_IFNAME was specified with empty string then kernel created a new ppp interface in format "ppp" but id did not match ppp unit id returned by PPPIOCGUNIT ioctl. In this case id was some number generated by __rtnl_newlink() function. Signed-off-by: Pali Rohár Fixes: bb8082f69138 ("ppp: build ifname using unit identifier for rtnl based devices") Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 930e49ef15f6..b2b35ec37702 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1306,7 +1306,7 @@ static int ppp_nl_newlink(struct net *src_net, struct net_device *dev, * the PPP unit identifer as suffix (i.e. ppp). This allows * userspace to infer the device name using to the PPPIOCGUNIT ioctl. */ - if (!tb[IFLA_IFNAME]) + if (!tb[IFLA_IFNAME] || !nla_len(tb[IFLA_IFNAME]) || !*(char *)nla_data(tb[IFLA_IFNAME])) conf.ifname_is_set = false; err = ppp_dev_configure(src_net, dev, &conf); -- cgit From 3125f26c514826077f2a4490b75e9b1c7a644c42 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Sat, 7 Aug 2021 18:00:50 +0200 Subject: ppp: Fix generating ppp unit id when ifname is not specified MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When registering new ppp interface via PPPIOCNEWUNIT ioctl then kernel has to choose interface name as this ioctl API does not support specifying it. Kernel in this case register new interface with name "ppp" where is the ppp unit id, which can be obtained via PPPIOCGUNIT ioctl. This applies also in the case when registering new ppp interface via rtnl without supplying IFLA_IFNAME. PPPIOCNEWUNIT ioctl allows to specify own ppp unit id which will kernel assign to ppp interface, in case this ppp id is not already used by other ppp interface. In case user does not specify ppp unit id then kernel choose the first free ppp unit id. This applies also for case when creating ppp interface via rtnl method as it does not provide a way for specifying own ppp unit id. If some network interface (does not have to be ppp) has name "ppp" with this first free ppp id then PPPIOCNEWUNIT ioctl or rtnl call fails. And registering new ppp interface is not possible anymore, until interface which holds conflicting name is renamed. Or when using rtnl method with custom interface name in IFLA_IFNAME. As list of allocated / used ppp unit ids is not possible to retrieve from kernel to userspace, userspace has no idea what happens nor which interface is doing this conflict. So change the algorithm how ppp unit id is generated. And choose the first number which is not neither used as ppp unit id nor in some network interface with pattern "ppp". This issue can be simply reproduced by following pppd call when there is no ppp interface registered and also no interface with name pattern "ppp": pppd ifname ppp1 +ipv6 noip noauth nolock local nodetach pty "pppd +ipv6 noip noauth nolock local nodetach notty" Or by creating the one ppp interface (which gets assigned ppp unit id 0), renaming it to "ppp1" and then trying to create a new ppp interface (which will always fails as next free ppp unit id is 1, but network interface with name "ppp1" exists). This patch fixes above described issue by generating new and new ppp unit id until some non-conflicting id with network interfaces is generated. Signed-off-by: Pali Rohár Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ppp/ppp_generic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index b2b35ec37702..7a099c37527f 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -284,7 +284,7 @@ static struct channel *ppp_find_channel(struct ppp_net *pn, int unit); static int ppp_connect_channel(struct channel *pch, int unit); static int ppp_disconnect_channel(struct channel *pch); static void ppp_destroy_channel(struct channel *pch); -static int unit_get(struct idr *p, void *ptr); +static int unit_get(struct idr *p, void *ptr, int min); static int unit_set(struct idr *p, void *ptr, int n); static void unit_put(struct idr *p, int n); static void *unit_find(struct idr *p, int n); @@ -1155,9 +1155,20 @@ static int ppp_unit_register(struct ppp *ppp, int unit, bool ifname_is_set) mutex_lock(&pn->all_ppp_mutex); if (unit < 0) { - ret = unit_get(&pn->units_idr, ppp); + ret = unit_get(&pn->units_idr, ppp, 0); if (ret < 0) goto err; + if (!ifname_is_set) { + while (1) { + snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ret); + if (!__dev_get_by_name(ppp->ppp_net, ppp->dev->name)) + break; + unit_put(&pn->units_idr, ret); + ret = unit_get(&pn->units_idr, ppp, ret + 1); + if (ret < 0) + goto err; + } + } } else { /* Caller asked for a specific unit number. Fail with -EEXIST * if unavailable. For backward compatibility, return -EEXIST @@ -3552,9 +3563,9 @@ static int unit_set(struct idr *p, void *ptr, int n) } /* get new free unit number and associate pointer with it */ -static int unit_get(struct idr *p, void *ptr) +static int unit_get(struct idr *p, void *ptr, int min) { - return idr_alloc(p, ptr, 0, 0, GFP_KERNEL); + return idr_alloc(p, ptr, min, 0, GFP_KERNEL); } /* put unit number back to a pool */ -- cgit From ef98eb0409c31c39ab55ff46b2721c3b4f84c122 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 7 Aug 2021 17:13:41 -0700 Subject: io_uring: clear TIF_NOTIFY_SIGNAL when running task work When using SQPOLL, the submission queue polling thread calls task_work_run() to run queued work. However, when work is added with TWA_SIGNAL - as done by io_uring itself - the TIF_NOTIFY_SIGNAL remains set afterwards and is never cleared. Consequently, when the submission queue polling thread checks whether signal_pending(), it may always find a pending signal, if task_work_add() was ever called before. The impact of this bug might be different on different kernel versions. It appears that on 5.14 it would only cause unnecessary calculation and prevent the polling thread from sleeping. On 5.13, where the bug was found, it stops the polling thread from finding newly submitted work. Instead of task_work_run(), use tracehook_notify_signal() that clears TIF_NOTIFY_SIGNAL. Test for TIF_NOTIFY_SIGNAL in addition to current->task_works to avoid a race in which task_works is cleared but the TIF_NOTIFY_SIGNAL is set. Fixes: 685fe7feedb96 ("io-wq: eliminate the need for a manager thread") Cc: Jens Axboe Cc: Pavel Begunkov Signed-off-by: Nadav Amit Link: https://lore.kernel.org/r/20210808001342.964634-2-namit@vmware.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index bf548af0426c..1093df3977b8 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -78,6 +78,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -2222,9 +2223,9 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) static inline bool io_run_task_work(void) { - if (current->task_works) { + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { __set_current_state(TASK_RUNNING); - task_work_run(); + tracehook_notify_signal(); return true; } -- cgit From 20c0b380f971e7d48f5d978bc27d827f7eabb21a Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sat, 7 Aug 2021 17:13:42 -0700 Subject: io_uring: Use WRITE_ONCE() when writing to sq_flags The compiler should be forbidden from any strange optimization for async writes to user visible data-structures. Without proper protection, the compiler can cause write-tearing or invent writes that would confuse the userspace. However, there are writes to sq_flags which are not protected by WRITE_ONCE(). Use WRITE_ONCE() for these writes. This is purely a theoretical issue. Presumably, any compiler is very unlikely to do such optimizations. Fixes: 75b28affdd6a ("io_uring: allocate the two rings together") Cc: Jens Axboe Cc: Pavel Begunkov Signed-off-by: Nadav Amit Link: https://lore.kernel.org/r/20210808001342.964634-3-namit@vmware.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1093df3977b8..ca064486cb41 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1500,7 +1500,8 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) all_flushed = list_empty(&ctx->cq_overflow_list); if (all_flushed) { clear_bit(0, &ctx->check_cq_overflow); - ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); } if (posted) @@ -1579,7 +1580,9 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, } if (list_empty(&ctx->cq_overflow_list)) { set_bit(0, &ctx->check_cq_overflow); - ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); + } ocqe->cqe.user_data = user_data; ocqe->cqe.res = res; @@ -6804,14 +6807,16 @@ static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) { /* Tell userspace we may need a wakeup call */ spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); spin_unlock_irq(&ctx->completion_lock); } static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) { spin_lock_irq(&ctx->completion_lock); - ctx->rings->sq_flags &= ~IORING_SQ_NEED_WAKEUP; + WRITE_ONCE(ctx->rings->sq_flags, + ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); spin_unlock_irq(&ctx->completion_lock); } -- cgit From 484f2b7c61b9ae58cc00c5127bcbcd9177af8dfe Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Thu, 1 Jul 2021 00:56:01 +0200 Subject: cpufreq: armada-37xx: forbid cpufreq for 1.2 GHz variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 1.2 GHz variant of the Armada 3720 SOC is unstable with DVFS: when the SOC boots, the WTMI firmware sets clocks and AVS values that work correctly with 1.2 GHz CPU frequency, but random crashes occur once cpufreq driver starts scaling. We do not know currently what is the reason: - it may be that the voltage value for L0 for 1.2 GHz variant provided by the vendor in the OTP is simply incorrect when scaling is used, - it may be that some delay is needed somewhere, - it may be something else. The most sane solution now seems to be to simply forbid the cpufreq driver on 1.2 GHz variant. Signed-off-by: Marek Behún Fixes: 92ce45fb875d ("cpufreq: Add DVFS support for Armada 37xx") Signed-off-by: Viresh Kumar --- drivers/cpufreq/armada-37xx-cpufreq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/armada-37xx-cpufreq.c b/drivers/cpufreq/armada-37xx-cpufreq.c index 3fc98a3ffd91..c10fc33b29b1 100644 --- a/drivers/cpufreq/armada-37xx-cpufreq.c +++ b/drivers/cpufreq/armada-37xx-cpufreq.c @@ -104,7 +104,11 @@ struct armada_37xx_dvfs { }; static struct armada_37xx_dvfs armada_37xx_dvfs[] = { - {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, + /* + * The cpufreq scaling for 1.2 GHz variant of the SOC is currently + * unstable because we do not know how to configure it properly. + */ + /* {.cpu_freq_max = 1200*1000*1000, .divider = {1, 2, 4, 6} }, */ {.cpu_freq_max = 1000*1000*1000, .divider = {1, 2, 4, 5} }, {.cpu_freq_max = 800*1000*1000, .divider = {1, 2, 3, 4} }, {.cpu_freq_max = 600*1000*1000, .divider = {2, 4, 5, 6} }, -- cgit From dc0dc8a73e8e4dc33fba93dfe23356cc5a500c57 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 8 Aug 2021 09:01:16 +0200 Subject: ALSA: pcm: Fix mmap breakage without explicit buffer setup The recent fix c4824ae7db41 ("ALSA: pcm: Fix mmap capability check") restricts the mmap capability only to the drivers that properly set up the buffers, but it caused a regression for a few drivers that manage the buffer on its own way. For those with UNKNOWN buffer type (i.e. the uninitialized / unused substream->dma_buffer), just assume that the driver handles the mmap properly and blindly trust the hardware info bit. Fixes: c4824ae7db41 ("ALSA: pcm: Fix mmap capability check") Reported-and-tested-by: Jeff Woods Cc: Link: https://lore.kernel.org/r/s5him0gpghv.wl-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 09c0e2a6489c..71323d807dbf 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -251,7 +251,10 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) switch (substream->dma_buffer.dev.type) { case SNDRV_DMA_TYPE_UNKNOWN: - return false; + /* we can't know the device, so just assume that the driver does + * everything right + */ + return true; case SNDRV_DMA_TYPE_CONTINUOUS: case SNDRV_DMA_TYPE_VMALLOC: return true; -- cgit From 43e8f76006592cb1573a959aa287c45421066f9c Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Mon, 9 Aug 2021 10:36:58 +0800 Subject: powerpc/kprobes: Fix kprobe Oops happens in booke When using kprobe on powerpc booke series processor, Oops happens as show bellow: / # echo "p:myprobe do_nanosleep" > /sys/kernel/debug/tracing/kprobe_events / # echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable / # sleep 1 [ 50.076730] Oops: Exception in kernel mode, sig: 5 [#1] [ 50.077017] BE PAGE_SIZE=4K SMP NR_CPUS=24 QEMU e500 [ 50.077221] Modules linked in: [ 50.077462] CPU: 0 PID: 77 Comm: sleep Not tainted 5.14.0-rc4-00022-g251a1524293d #21 [ 50.077887] NIP: c0b9c4e0 LR: c00ebecc CTR: 00000000 [ 50.078067] REGS: c3883de0 TRAP: 0700 Not tainted (5.14.0-rc4-00022-g251a1524293d) [ 50.078349] MSR: 00029000 CR: 24000228 XER: 20000000 [ 50.078675] [ 50.078675] GPR00: c00ebdf0 c3883e90 c313e300 c3883ea0 00000001 00000000 c3883ecc 00000001 [ 50.078675] GPR08: c100598c c00ea250 00000004 00000000 24000222 102490c2 bff4180c 101e60d4 [ 50.078675] GPR16: 00000000 102454ac 00000040 10240000 10241100 102410f8 10240000 00500000 [ 50.078675] GPR24: 00000002 00000000 c3883ea0 00000001 00000000 0000c350 3b9b8d50 00000000 [ 50.080151] NIP [c0b9c4e0] do_nanosleep+0x0/0x190 [ 50.080352] LR [c00ebecc] hrtimer_nanosleep+0x14c/0x1e0 [ 50.080638] Call Trace: [ 50.080801] [c3883e90] [c00ebdf0] hrtimer_nanosleep+0x70/0x1e0 (unreliable) [ 50.081110] [c3883f00] [c00ec004] sys_nanosleep_time32+0xa4/0x110 [ 50.081336] [c3883f40] [c001509c] ret_from_syscall+0x0/0x28 [ 50.081541] --- interrupt: c00 at 0x100a4d08 [ 50.081749] NIP: 100a4d08 LR: 101b5234 CTR: 00000003 [ 50.081931] REGS: c3883f50 TRAP: 0c00 Not tainted (5.14.0-rc4-00022-g251a1524293d) [ 50.082183] MSR: 0002f902 CR: 24000222 XER: 00000000 [ 50.082457] [ 50.082457] GPR00: 000000a2 bf980040 1024b4d0 bf980084 bf980084 64000000 00555345 fefefeff [ 50.082457] GPR08: 7f7f7f7f 101e0000 00000069 00000003 28000422 102490c2 bff4180c 101e60d4 [ 50.082457] GPR16: 00000000 102454ac 00000040 10240000 10241100 102410f8 10240000 00500000 [ 50.082457] GPR24: 00000002 bf9803f4 10240000 00000000 00000000 100039e0 00000000 102444e8 [ 50.083789] NIP [100a4d08] 0x100a4d08 [ 50.083917] LR [101b5234] 0x101b5234 [ 50.084042] --- interrupt: c00 [ 50.084238] Instruction dump: [ 50.084483] 4bfffc40 60000000 60000000 60000000 9421fff0 39400402 914200c0 38210010 [ 50.084841] 4bfffc20 00000000 00000000 00000000 <7fe00008> 7c0802a6 7c892378 93c10048 [ 50.085487] ---[ end trace f6fffe98e2fa8f3e ]--- [ 50.085678] Trace/breakpoint trap There is no real mode for booke arch and the MMU translation is always on. The corresponding MSR_IS/MSR_DS bit in booke is used to switch the address space, but not for real mode judgment. Fixes: 21f8b2fa3ca5 ("powerpc/kprobes: Ignore traps that happened in real mode") Signed-off-by: Pu Lehui Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210809023658.218915-1-pulehui@huawei.com --- arch/powerpc/kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index cbc28d1a2e1b..7a7cd6bda53e 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -292,7 +292,8 @@ int kprobe_handler(struct pt_regs *regs) if (user_mode(regs)) return 0; - if (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)) + if (!IS_ENABLED(CONFIG_BOOKE) && + (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR))) return 0; /* -- cgit From 699aa57b35672c3b2f230e2b7e5d0ab8c2bde80a Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 6 Aug 2021 12:40:56 +0800 Subject: drm/i915/gvt: Fix cached atomics setting for Windows VM We've seen recent regression with host and windows VM running simultaneously that cause gpu hang or even crash. Finally bisect to commit 58586680ffad ("drm/i915: Disable atomics in L3 for gen9"), which seems cached atomics behavior difference caused regression issue. This tries to add new scratch register handler and add those in mmio save/restore list for context switch. No gpu hang produced with this one. Cc: stable@vger.kernel.org # 5.12+ Cc: "Xu, Terrence" Cc: "Bloomfield, Jon" Cc: "Ekstrand, Jason" Reviewed-by: Colin Xu Fixes: 58586680ffad ("drm/i915: Disable atomics in L3 for gen9") Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20210806044056.648016-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/handlers.c | 1 + drivers/gpu/drm/i915/gvt/mmio_context.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 2358c92733b0..55611c7dbcec 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -3149,6 +3149,7 @@ static int init_bdw_mmio_info(struct intel_gvt *gvt) MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL); MMIO_D(_MMIO(0xb110), D_BDW); + MMIO_D(GEN9_SCRATCH_LNCF1, D_BDW_PLUS); MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0, D_BDW_PLUS, NULL, force_nonpriv_write); diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index c9589e26af93..ac24adc320d3 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -105,6 +105,8 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */ {RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */ {RCS0, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */ + {RCS0, GEN9_SCRATCH1, 0, false}, /* 0xb11c */ + {RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */ {RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */ {RCS0, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */ {RCS0, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */ -- cgit From 86aab09a4870bb8346c9579864588c3d7f555299 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Aug 2021 16:04:40 -0700 Subject: dccp: add do-while-0 stubs for dccp_pr_debug macros GCC complains about empty macros in an 'if' statement, so convert them to 'do {} while (0)' macros. Fixes these build warnings: net/dccp/output.c: In function 'dccp_xmit_packet': ../net/dccp/output.c:283:71: warning: suggest braces around empty body in an 'if' statement [-Wempty-body] 283 | dccp_pr_debug("transmit_skb() returned err=%d\n", err); net/dccp/ackvec.c: In function 'dccp_ackvec_update_old': ../net/dccp/ackvec.c:163:80: warning: suggest braces around empty body in an 'else' statement [-Wempty-body] 163 | (unsigned long long)seqno, state); Fixes: dc841e30eaea ("dccp: Extend CCID packet dequeueing interface") Fixes: 380240864451 ("dccp ccid-2: Update code for the Ack Vector input/registration routine") Signed-off-by: Randy Dunlap Cc: dccp@vger.kernel.org Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/dccp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 9cc9d1ee6cdb..c5c1d2b8045e 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -41,9 +41,9 @@ extern bool dccp_debug; #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) #define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) #else -#define dccp_pr_debug(format, a...) -#define dccp_pr_debug_cat(format, a...) -#define dccp_debug(format, a...) +#define dccp_pr_debug(format, a...) do {} while (0) +#define dccp_pr_debug_cat(format, a...) do {} while (0) +#define dccp_debug(format, a...) do {} while (0) #endif extern struct inet_hashinfo dccp_hashinfo; -- cgit From 0fa32ca438b42fadfb293d72690e117ab3d67489 Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 6 Aug 2021 09:39:07 +0800 Subject: page_pool: mask the page->signature before the checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As mentioned in commit c07aea3ef4d4 ("mm: add a signature in struct page"): "The page->signature field is aliased to page->lru.next and page->compound_head." And as the comment in page_is_pfmemalloc(): "lru.next has bit 1 set if the page is allocated from the pfmemalloc reserves. Callers may simply overwrite it if they do not need to preserve that information." The page->signature is OR’ed with PP_SIGNATURE when a page is allocated in page pool, see __page_pool_alloc_pages_slow(), and page->signature is checked directly with PP_SIGNATURE in page_pool_return_skb_page(), which might cause resoure leaking problem for a page from page pool if bit 1 of lru.next is set for a pfmemalloc page. What happens here is that the original pp->signature is OR'ed with PP_SIGNATURE after the allocation in order to preserve any existing bits(such as the bit 1, used to indicate a pfmemalloc page), so when those bits are present, those page is not considered to be from page pool and the DMA mapping of those pages will be left stale. As bit 0 is for page->compound_head, So mask both bit 0/1 before the checking in page_pool_return_skb_page(). And we will return those pfmemalloc pages back to the page allocator after cleaning up the DMA mapping. Fixes: 6a5bcd84e886 ("page_pool: Allow drivers to hint on SKB recycling") Reviewed-by: Ilias Apalodimas Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- net/core/page_pool.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e4eb45b139c..8ab7b402244c 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -634,7 +634,15 @@ bool page_pool_return_skb_page(struct page *page) struct page_pool *pp; page = compound_head(page); - if (unlikely(page->pp_magic != PP_SIGNATURE)) + + /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation + * in order to preserve any existing bits, such as bit 0 for the + * head page of compound page and bit 1 for pfmemalloc page, so + * mask those bits for freeing side when doing below checking, + * and page_is_pfmemalloc() is checked in __page_pool_put_page() + * to avoid recycling the pfmemalloc page. + */ + if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE)) return false; pp = page->pp; -- cgit From acc68b8d2a1196c4db806947606f162dbeed2274 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Thu, 5 Aug 2021 17:55:11 +0300 Subject: net: ethernet: ti: cpsw: fix min eth packet size for non-switch use-cases The CPSW switchdev driver inherited fix from commit 9421c9015047 ("net: ethernet: ti: cpsw: fix min eth packet size") which changes min TX packet size to 64bytes (VLAN_ETH_ZLEN, excluding ETH_FCS). It was done to fix HW packed drop issue when packets are sent from Host to the port with PVID and un-tagging enabled. Unfortunately this breaks some other non-switch specific use-cases, like: - [1] CPSW port as DSA CPU port with DSA-tag applied at the end of the packet - [2] Some industrial protocols, which expects min TX packet size 60Bytes (excluding FCS). Fix it by configuring min TX packet size depending on driver mode - 60Bytes (ETH_ZLEN) for multi mac (dual-mac) mode - 64Bytes (VLAN_ETH_ZLEN) for switch mode and update it during driver mode change and annotate with READ_ONCE()/WRITE_ONCE() as it can be read by napi while writing. [1] https://lore.kernel.org/netdev/20210531124051.GA15218@cephalopod/ [2] https://e2e.ti.com/support/arm/sitara_arm/f/791/t/701669 Cc: stable@vger.kernel.org Fixes: ed3525eda4c4 ("net: ethernet: ti: introduce cpsw switchdev based driver part 1 - dual-emac") Reported-by: Ben Hutchings Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw_new.c | 7 +++++-- drivers/net/ethernet/ti/cpsw_priv.h | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index 57d279fdcc9f..d1d02001cef6 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -920,7 +920,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct cpdma_chan *txch; int ret, q_idx; - if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) { + if (skb_put_padto(skb, READ_ONCE(priv->tx_packet_min))) { cpsw_err(priv, tx_err, "packet pad failed\n"); ndev->stats.tx_dropped++; return NET_XMIT_DROP; @@ -1100,7 +1100,7 @@ static int cpsw_ndo_xdp_xmit(struct net_device *ndev, int n, for (i = 0; i < n; i++) { xdpf = frames[i]; - if (xdpf->len < CPSW_MIN_PACKET_SIZE) + if (xdpf->len < READ_ONCE(priv->tx_packet_min)) break; if (cpsw_xdp_tx_frame(priv, xdpf, NULL, priv->emac_port)) @@ -1389,6 +1389,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw) priv->dev = dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); priv->emac_port = i + 1; + priv->tx_packet_min = CPSW_MIN_PACKET_SIZE; if (is_valid_ether_addr(slave_data->mac_addr)) { ether_addr_copy(priv->mac_addr, slave_data->mac_addr); @@ -1686,6 +1687,7 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, priv = netdev_priv(sl_ndev); slave->port_vlan = vlan; + WRITE_ONCE(priv->tx_packet_min, CPSW_MIN_PACKET_SIZE_VLAN); if (netif_running(sl_ndev)) cpsw_port_add_switch_def_ale_entries(priv, slave); @@ -1714,6 +1716,7 @@ static int cpsw_dl_switch_mode_set(struct devlink *dl, u32 id, priv = netdev_priv(slave->ndev); slave->port_vlan = slave->data->dual_emac_res_vlan; + WRITE_ONCE(priv->tx_packet_min, CPSW_MIN_PACKET_SIZE); cpsw_port_add_dual_emac_def_ale_entries(priv, slave); } diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index a323bea54faa..2951fb7b9dae 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -89,7 +89,8 @@ do { \ #define CPSW_POLL_WEIGHT 64 #define CPSW_RX_VLAN_ENCAP_HDR_SIZE 4 -#define CPSW_MIN_PACKET_SIZE (VLAN_ETH_ZLEN) +#define CPSW_MIN_PACKET_SIZE_VLAN (VLAN_ETH_ZLEN) +#define CPSW_MIN_PACKET_SIZE (ETH_ZLEN) #define CPSW_MAX_PACKET_SIZE (VLAN_ETH_FRAME_LEN +\ ETH_FCS_LEN +\ CPSW_RX_VLAN_ENCAP_HDR_SIZE) @@ -380,6 +381,7 @@ struct cpsw_priv { u32 emac_port; struct cpsw_common *cpsw; int offload_fwd_mark; + u32 tx_packet_min; }; #define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) -- cgit From 8f3d65c166797746455553f4eaf74a5f89f996d4 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Mon, 9 Aug 2021 11:05:56 +0200 Subject: net/smc: fix wait on already cleared link There can be a race between the waiters for a tx work request buffer and the link down processing that finally clears the link. Although all waiters are woken up before the link is cleared there might be waiters which did not yet get back control and are still waiting. This results in an access to a cleared wait queue head. Fix this by introducing atomic reference counting around the wait calls, and wait with the link clear processing until all waiters have finished. Move the work request layer related calls into smc_wr.c and set the link state to INACTIVE before calling smcr_link_clear() in smc_llc_srv_add_link(). Fixes: 15e1b99aadfb ("net/smc: no WR buffer wait for terminating link group") Signed-off-by: Karsten Graul Signed-off-by: Guvenc Gulce Signed-off-by: David S. Miller --- net/smc/smc_core.h | 2 ++ net/smc/smc_llc.c | 10 ++++------ net/smc/smc_tx.c | 18 +++++++++++++++++- net/smc/smc_wr.c | 10 ++++++++++ 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 6d6fd1397c87..64d86298e4df 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -97,6 +97,7 @@ struct smc_link { unsigned long *wr_tx_mask; /* bit mask of used indexes */ u32 wr_tx_cnt; /* number of WR send buffers */ wait_queue_head_t wr_tx_wait; /* wait for free WR send buf */ + atomic_t wr_tx_refcnt; /* tx refs to link */ struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ @@ -109,6 +110,7 @@ struct smc_link { struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ + atomic_t wr_reg_refcnt; /* reg refs to link */ enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ u8 gid[SMC_GID_SIZE];/* gid matching used vlan id*/ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 273eaf1bfe49..2e7560eba981 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -888,6 +888,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) if (!rc) goto out; out_clear_lnk: + lnk_new->state = SMC_LNK_INACTIVE; smcr_link_clear(lnk_new, false); out_reject: smc_llc_cli_add_link_reject(qentry); @@ -1184,6 +1185,7 @@ int smc_llc_srv_add_link(struct smc_link *link) goto out_err; return 0; out_err: + link_new->state = SMC_LNK_INACTIVE; smcr_link_clear(link_new, false); return rc; } @@ -1286,10 +1288,8 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr) del_llc->reason = 0; smc_llc_send_message(lnk, &qentry->msg); /* response */ - if (smc_link_downing(&lnk_del->state)) { - if (smc_switch_conns(lgr, lnk_del, false)) - smc_wr_tx_wait_no_pending_sends(lnk_del); - } + if (smc_link_downing(&lnk_del->state)) + smc_switch_conns(lgr, lnk_del, false); smcr_link_clear(lnk_del, true); active_links = smc_llc_active_link_count(lgr); @@ -1805,8 +1805,6 @@ void smc_llc_link_clear(struct smc_link *link, bool log) link->smcibdev->ibdev->name, link->ibport); complete(&link->llc_testlink_resp); cancel_delayed_work_sync(&link->llc_testlink_wrk); - smc_wr_wakeup_reg_wait(link); - smc_wr_wakeup_tx_wait(link); } /* register a new rtoken at the remote peer (for all links) */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 289025cd545a..c79361dfcdfb 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -496,7 +496,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, /* Wakeup sndbuf consumers from any context (IRQ or process) * since there is more data to transmit; usable snd_wnd as max transmit */ -static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) +static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; struct smc_link *link = conn->lnk; @@ -550,6 +550,22 @@ out_unlock: return rc; } +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) +{ + struct smc_link *link = conn->lnk; + int rc = -ENOLINK; + + if (!link) + return rc; + + atomic_inc(&link->wr_tx_refcnt); + if (smc_link_usable(link)) + rc = _smcr_tx_sndbuf_nonempty(conn); + if (atomic_dec_and_test(&link->wr_tx_refcnt)) + wake_up_all(&link->wr_tx_wait); + return rc; +} + static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index cbc73a7e4d59..a419e9af36b9 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -322,9 +322,12 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) if (rc) return rc; + atomic_inc(&link->wr_reg_refcnt); rc = wait_event_interruptible_timeout(link->wr_reg_wait, (link->wr_reg_state != POSTED), SMC_WR_REG_MR_WAIT_TIME); + if (atomic_dec_and_test(&link->wr_reg_refcnt)) + wake_up_all(&link->wr_reg_wait); if (!rc) { /* timeout - terminate link */ smcr_link_down_cond_sched(link); @@ -566,10 +569,15 @@ void smc_wr_free_link(struct smc_link *lnk) return; ibdev = lnk->smcibdev->ibdev; + smc_wr_wakeup_reg_wait(lnk); + smc_wr_wakeup_tx_wait(lnk); + if (smc_wr_tx_wait_no_pending_sends(lnk)) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); + wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, @@ -728,7 +736,9 @@ int smc_wr_create_link(struct smc_link *lnk) memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); init_waitqueue_head(&lnk->wr_tx_wait); + atomic_set(&lnk->wr_tx_refcnt, 0); init_waitqueue_head(&lnk->wr_reg_wait); + atomic_set(&lnk->wr_reg_refcnt, 0); return rc; dma_unmap: -- cgit From 64513d269e8971aabb7e787955a1b320e3031306 Mon Sep 17 00:00:00 2001 From: Guvenc Gulce Date: Mon, 9 Aug 2021 11:05:57 +0200 Subject: net/smc: Correct smc link connection counter in case of smc client SMC clients may be assigned to a different link after the initial connection between two peers was established. In such a case, the connection counter was not correctly set. Update the connection counter correctly when a smc client connection is assigned to a different smc link. Fixes: 07d51580ff65 ("net/smc: Add connection counters for links") Signed-off-by: Guvenc Gulce Tested-by: Karsten Graul Signed-off-by: David S. Miller --- net/smc/af_smc.c | 2 +- net/smc/smc_core.c | 4 ++-- net/smc/smc_core.h | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 898389611ae8..c038efc23ce3 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -795,7 +795,7 @@ static int smc_connect_rdma(struct smc_sock *smc, reason_code = SMC_CLC_DECL_NOSRVLINK; goto connect_abort; } - smc->conn.lnk = link; + smc_switch_link_and_count(&smc->conn, link); } /* create send buffer and rmb */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index cd0d7c908b2a..c160ff50c053 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -917,8 +917,8 @@ static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, return rc; } -static void smc_switch_link_and_count(struct smc_connection *conn, - struct smc_link *to_lnk) +void smc_switch_link_and_count(struct smc_connection *conn, + struct smc_link *to_lnk) { atomic_dec(&conn->lnk->conn_cnt); conn->lnk = to_lnk; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 64d86298e4df..c043ecdca5c4 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -446,6 +446,8 @@ void smc_core_exit(void); int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, u8 link_idx, struct smc_init_info *ini); void smcr_link_clear(struct smc_link *lnk, bool log); +void smc_switch_link_and_count(struct smc_connection *conn, + struct smc_link *to_lnk); int smcr_buf_map_lgr(struct smc_link *lnk); int smcr_buf_reg_lgr(struct smc_link *lnk); void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type); -- cgit From d09c548dbf3b31cb07bba562e0f452edfa01efe3 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Mon, 9 Aug 2021 15:04:55 +0800 Subject: net: sched: act_mirred: Reset ct info when mirror/redirect skb When mirror/redirect a skb to a different port, the ct info should be reset for reclassification. Or the pkts will match unexpected rules. For example, with following topology and commands: ----------- | veth0 -+------- | veth1 -+------- | ------------ tc qdisc add dev veth0 clsact # The same with "action mirred egress mirror dev veth1" or "action mirred ingress redirect dev veth1" tc filter add dev veth0 egress chain 1 protocol ip flower ct_state +trk action mirred ingress mirror dev veth1 tc filter add dev veth0 egress chain 0 protocol ip flower ct_state -inv action ct commit action goto chain 1 tc qdisc add dev veth1 clsact tc filter add dev veth1 ingress chain 0 protocol ip flower ct_state +trk action drop ping & tc -s filter show dev veth1 ingress With command 'tc -s filter show', we can find the pkts were dropped on veth1. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Roi Dayan Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 7153c67f641e..2ef4cd2c848b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -273,6 +273,9 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } + /* All mirred/redirected skbs should clear previous ct info */ + nf_reset_ct(skb2); + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); expects_nh = want_ingress || !m_mac_header_xmit; -- cgit From 769f52676756b8c5feb302d2d95af59577fc69ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 4 Aug 2021 21:35:01 -0700 Subject: configfs: restore the kernel v5.13 text attribute write behavior Instead of appending new text attribute data at the offset specified by the write() system call, only pass the newly written data to the .store() callback. Reported-by: Bodo Stroesser Tested-by: Bodo Stroesser Signed-off-by: Bart Van Assche Signed-off-by: Christoph Hellwig --- fs/configfs/file.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 5a0be9985bae..0ad32150611e 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -177,28 +177,22 @@ out: return retval; } -/* Fill [buffer, buffer + pos) with data coming from @from. */ -static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos, +/* Fill @buffer with data coming from @from. */ +static int fill_write_buffer(struct configfs_buffer *buffer, struct iov_iter *from) { - loff_t to_copy; int copied; - u8 *to; if (!buffer->page) buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0); if (!buffer->page) return -ENOMEM; - to_copy = SIMPLE_ATTR_SIZE - 1 - pos; - if (to_copy <= 0) - return 0; - to = buffer->page + pos; - copied = copy_from_iter(to, to_copy, from); + copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from); buffer->needs_read_fill = 1; /* if buf is assumed to contain a string, terminate it by \0, * so e.g. sscanf() can scan the string easily */ - to[copied] = 0; + buffer->page[copied] = 0; return copied ? : -EFAULT; } @@ -227,10 +221,10 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct configfs_buffer *buffer = file->private_data; - ssize_t len; + int len; mutex_lock(&buffer->mutex); - len = fill_write_buffer(buffer, iocb->ki_pos, from); + len = fill_write_buffer(buffer, from); if (len > 0) len = flush_write_buffer(file, buffer, len); if (len > 0) -- cgit From 50ac7479846053ca8054be833c1594e64de496bb Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 28 Jul 2021 12:39:10 -0700 Subject: ice: Prevent probing virtual functions The userspace utility "driverctl" can be used to change/override the system's default driver choices. This is useful in some situations (buggy driver, old driver missing a device ID, trying a workaround, etc.) where the user needs to load a different driver. However, this is also prone to user error, where a driver is mapped to a device it's not designed to drive. For example, if the ice driver is mapped to driver iavf devices, the ice driver crashes. Add a check to return an error if the ice driver is being used to probe a virtual function. Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series") Signed-off-by: Anirudh Venkataramanan Tested-by: Gurucharan G Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ef8d1815af56..a9506041eb42 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4194,6 +4194,11 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) struct ice_hw *hw; int i, err; + if (pdev->is_virtfn) { + dev_err(dev, "can't probe a virtual function\n"); + return -EINVAL; + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ -- cgit From c503e63200c679e362afca7aca9d3dc63a0f45ed Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 4 Aug 2021 12:12:42 -0700 Subject: ice: Stop processing VF messages during teardown When VFs are setup and torn down in quick succession, it is possible that a VF is torn down by the PF while the VF's virtchnl requests are still in the PF's mailbox ring. Processing the VF's virtchnl request when the VF itself doesn't exist results in undefined behavior. Fix this by adding a check to stop processing virtchnl requests when VF teardown is in progress. Fixes: ddf30f7ff840 ("ice: Add handler to configure SR-IOV") Signed-off-by: Anirudh Venkataramanan Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 1 + drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index a450343fbb92..eadcb9958346 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -234,6 +234,7 @@ enum ice_pf_state { ICE_VFLR_EVENT_PENDING, ICE_FLTR_OVERFLOW_PROMISC, ICE_VF_DIS, + ICE_VF_DEINIT_IN_PROGRESS, ICE_CFG_BUSY, ICE_SERVICE_SCHED, ICE_SERVICE_DIS, diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index 2826570dab51..e93430ab37f1 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -615,6 +615,8 @@ void ice_free_vfs(struct ice_pf *pf) struct ice_hw *hw = &pf->hw; unsigned int tmp, i; + set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); + if (!pf->vf) return; @@ -680,6 +682,7 @@ void ice_free_vfs(struct ice_pf *pf) i); clear_bit(ICE_VF_DIS, pf->state); + clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state); clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags); } @@ -4415,6 +4418,10 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) struct device *dev; int err = 0; + /* if de-init is underway, don't process messages from VF */ + if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state)) + return; + dev = ice_pf_to_dev(pf); if (ice_validate_vf_id(pf, vf_id)) { err = -EINVAL; -- cgit From 3ba7f53f8bf1fb862e36c7f74434ac3aceb60158 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Fri, 6 Aug 2021 09:51:27 -0700 Subject: ice: don't remove netdev->dev_addr from uc sync list In some circumstances, such as with bridging, it's possible that the stack will add the device's own MAC address to its unicast address list. If, later, the stack deletes this address, the driver will receive a request to remove this address. The driver stores its current MAC address as part of the VSI MAC filter list instead of separately. So, this causes a problem when the device's MAC address is deleted unexpectedly, which results in traffic failure in some cases. The following configuration steps will reproduce the previously mentioned problem: > ip link set eth0 up > ip link add dev br0 type bridge > ip link set br0 up > ip addr flush dev eth0 > ip link set eth0 master br0 > echo 1 > /sys/class/net/br0/bridge/vlan_filtering > modprobe -r veth > modprobe -r bridge > ip addr add 192.168.1.100/24 dev eth0 The following ping command fails due to the netdev->dev_addr being deleted when removing the bridge module. > ping Fix this by making sure to not delete the netdev->dev_addr during MAC address sync. After fixing this issue it was noticed that the netdev_warn() in .set_mac was overly verbose, so make it at netdev_dbg(). Also, there is a possibility of a race condition between .set_mac and .set_rx_mode. Fix this by calling netif_addr_lock_bh() and netif_addr_unlock_bh() on the device's netdev when the netdev->dev_addr is going to be updated in .set_mac. Fixes: e94d44786693 ("ice: Implement filter sync, NDO operations and bump version") Signed-off-by: Brett Creeley Tested-by: Liang Li Tested-by: Gurucharan G Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a9506041eb42..fe2ded775f25 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -191,6 +191,14 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + /* Under some circumstances, we might receive a request to delete our + * own device address from our uc list. Because we store the device + * address in the VSI's MAC filter list, we need to ignore such + * requests and not delete our device address from this list. + */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr, ICE_FWD_TO_VSI)) return -EINVAL; @@ -5124,7 +5132,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EADDRNOTAVAIL; if (ether_addr_equal(netdev->dev_addr, mac)) { - netdev_warn(netdev, "already using mac %pM\n", mac); + netdev_dbg(netdev, "already using mac %pM\n", mac); return 0; } @@ -5135,6 +5143,7 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EBUSY; } + netif_addr_lock_bh(netdev); /* Clean up old MAC filter. Not an error if old filter doesn't exist */ status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI); if (status && status != ICE_ERR_DOES_NOT_EXIST) { @@ -5144,30 +5153,28 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) /* Add filter for new MAC. If filter exists, return success */ status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI); - if (status == ICE_ERR_ALREADY_EXISTS) { + if (status == ICE_ERR_ALREADY_EXISTS) /* Although this MAC filter is already present in hardware it's * possible in some cases (e.g. bonding) that dev_addr was * modified outside of the driver and needs to be restored back * to this value. */ - memcpy(netdev->dev_addr, mac, netdev->addr_len); netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac); - return 0; - } - - /* error if the new filter addition failed */ - if (status) + else if (status) + /* error if the new filter addition failed */ err = -EADDRNOTAVAIL; err_update_filters: if (err) { netdev_err(netdev, "can't set MAC %pM. filter update failed\n", mac); + netif_addr_unlock_bh(netdev); return err; } /* change the netdev's MAC address */ memcpy(netdev->dev_addr, mac, netdev->addr_len); + netif_addr_unlock_bh(netdev); netdev_dbg(vsi->netdev, "updated MAC address to %pM\n", netdev->dev_addr); -- cgit From a7550f8b1c9712894f9e98d6caf5f49451ebd058 Mon Sep 17 00:00:00 2001 From: Md Fahad Iqbal Polash Date: Fri, 4 Jun 2021 09:53:33 -0700 Subject: iavf: Set RSS LUT and key in reset handle path iavf driver should set RSS LUT and key unconditionally in reset path. Currently, the driver does not do that. This patch fixes this issue. Fixes: 2c86ac3c7079 ("i40evf: create a generic config RSS function") Signed-off-by: Md Fahad Iqbal Polash Tested-by: Konrad Jankowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 44bafedd09f2..244ec74ceca7 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1506,11 +1506,6 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter) set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); iavf_map_rings_to_vectors(adapter); - - if (RSS_AQ(adapter)) - adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; - else - err = iavf_init_rss(adapter); err: return err; } @@ -2200,6 +2195,14 @@ continue_reset: goto reset_err; } + if (RSS_AQ(adapter)) { + adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_RSS; + } else { + err = iavf_init_rss(adapter); + if (err) + goto reset_err; + } + adapter->aq_required |= IAVF_FLAG_AQ_GET_CONFIG; adapter->aq_required |= IAVF_FLAG_AQ_MAP_VECTORS; -- cgit From 71330842ff93ae67a066c1fa68d75672527312fa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Aug 2021 21:45:32 +0200 Subject: bpf: Add _kernel suffix to internal lockdown_bpf_read Rename LOCKDOWN_BPF_READ into LOCKDOWN_BPF_READ_KERNEL so we have naming more consistent with a LOCKDOWN_BPF_WRITE_USER option that we are adding. Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko --- include/linux/security.h | 2 +- kernel/bpf/helpers.c | 4 ++-- kernel/trace/bpf_trace.c | 8 ++++---- security/security.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 24eda04221e9..724d7a4a0c91 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -123,7 +123,7 @@ enum lockdown_reason { LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, - LOCKDOWN_BPF_READ, + LOCKDOWN_BPF_READ_KERNEL, LOCKDOWN_PERF, LOCKDOWN_TRACEFS, LOCKDOWN_XMON_RW, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 62cf00383910..0b04553e8c44 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1070,12 +1070,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b4916ef388ad..1836591197a5 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -999,19 +999,19 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_kernel_str_proto; #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE case BPF_FUNC_probe_read: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_proto; case BPF_FUNC_probe_read_str: - return security_locked_down(LOCKDOWN_BPF_READ) < 0 ? + return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ? NULL : &bpf_probe_read_compat_str_proto; #endif #ifdef CONFIG_CGROUPS diff --git a/security/security.c b/security/security.c index 09533cbb7221..6b83ab4e9d66 100644 --- a/security/security.c +++ b/security/security.c @@ -61,7 +61,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", - [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM", + [LOCKDOWN_BPF_READ_KERNEL] = "use of bpf to read kernel RAM", [LOCKDOWN_PERF] = "unsafe use of perf", [LOCKDOWN_TRACEFS] = "use of tracefs", [LOCKDOWN_XMON_RW] = "xmon read and write access", -- cgit From f153c2246783ba210493054d99c66353f56423c9 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Fri, 30 Jul 2021 08:28:54 +0200 Subject: ucounts: add missing data type changes commit f9c82a4ea89c3 ("Increase size of ucounts to atomic_long_t") changed the data type of ucounts/ucounts_max to long, but missed to adjust a few other places. This is noticeable on big endian platforms from user space because the /proc/sys/user/max_*_names files all contain 0. v4 - Made the min and max constants long so the sysctl values are actually settable on little endian machines. -- EWB Fixes: f9c82a4ea89c ("Increase size of ucounts to atomic_long_t") Signed-off-by: Sven Schnelle Tested-by: Nathan Chancellor Tested-by: Linux Kernel Functional Testing Acked-by: Alexey Gladkov v1: https://lkml.kernel.org/r/20210721115800.910778-1-svens@linux.ibm.com v2: https://lkml.kernel.org/r/20210721125233.1041429-1-svens@linux.ibm.com v3: https://lkml.kernel.org/r/20210730062854.3601635-1-svens@linux.ibm.com Link: https://lkml.kernel.org/r/8735rijqlv.fsf_-_@disp2133 Signed-off-by: Eric W. Biederman --- fs/notify/fanotify/fanotify_user.c | 17 +++++++++++------ fs/notify/inotify/inotify_user.c | 17 +++++++++++------ kernel/ucount.c | 19 +++++++++++-------- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 64864fb40b40..28b67cb9458d 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -54,22 +54,27 @@ static int fanotify_max_queued_events __read_mostly; #include +static long ft_zero = 0; +static long ft_int_max = INT_MAX; + struct ctl_table fanotify_table[] = { { .procname = "max_user_groups", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &ft_zero, + .extra2 = &ft_int_max, }, { .procname = "max_user_marks", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &ft_zero, + .extra2 = &ft_int_max, }, { .procname = "max_queued_events", diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 98f61b31745a..62051247f6d2 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -55,22 +55,27 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; #include +static long it_zero = 0; +static long it_int_max = INT_MAX; + struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_user_watches", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES], - .maxlen = sizeof(int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &it_zero, + .extra2 = &it_int_max, }, { .procname = "max_queued_events", diff --git a/kernel/ucount.c b/kernel/ucount.c index 77be3bbe3cc4..bb51849e6375 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -58,14 +58,17 @@ static struct ctl_table_root set_root = { .permissions = set_permissions, }; -#define UCOUNT_ENTRY(name) \ - { \ - .procname = name, \ - .maxlen = sizeof(int), \ - .mode = 0644, \ - .proc_handler = proc_dointvec_minmax, \ - .extra1 = SYSCTL_ZERO, \ - .extra2 = SYSCTL_INT_MAX, \ +static long ue_zero = 0; +static long ue_int_max = INT_MAX; + +#define UCOUNT_ENTRY(name) \ + { \ + .procname = name, \ + .maxlen = sizeof(long), \ + .mode = 0644, \ + .proc_handler = proc_doulongvec_minmax, \ + .extra1 = &ue_zero, \ + .extra2 = &ue_int_max, \ } static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_user_namespaces"), -- cgit From 669d94219d91a2ba950bb12ece69cf0ada53ad4f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 9 Aug 2021 13:04:10 -0700 Subject: MAINTAINERS: update Vineet's email address I'll be leaving Synopsys shortly, but will continue to handle maintenance for the transition period. Signed-off-by: Vineet Gupta --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c9467d2839f5..bbaecde94aa0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17815,7 +17815,7 @@ F: include/linux/sync_file.h F: include/uapi/linux/sync_file.h SYNOPSYS ARC ARCHITECTURE -M: Vineet Gupta +M: Vineet Gupta L: linux-snps-arc@lists.infradead.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git -- cgit From beb7f2de5728b0bd2140a652fa51f6ad85d159f7 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 8 Aug 2021 09:52:42 +0300 Subject: psample: Add a fwd declaration for skbuff Without this there is a warning if source files include psample.h before skbuff.h or doesn't include it at all. Fixes: 6ae0a6286171 ("net: Introduce psample, a new genetlink channel for packet sampling") Signed-off-by: Roi Dayan Link: https://lore.kernel.org/r/20210808065242.1522535-1-roid@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/psample.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/net/psample.h b/include/net/psample.h index e328c5127757..0509d2d6be67 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -31,6 +31,8 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num); void psample_group_take(struct psample_group *group); void psample_group_put(struct psample_group *group); +struct sk_buff; + #if IS_ENABLED(CONFIG_PSAMPLE) void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, -- cgit From d6e712aa7e6a3d5a9633f4bcbe2237f3edc292bd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Aug 2021 12:08:34 -0700 Subject: net: openvswitch: fix kernel-doc warnings in flow.c Repair kernel-doc notation in a few places to make it conform to the expected format. Fixes the following kernel-doc warnings: flow.c:296: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Parse vlan tag from vlan header. flow.c:296: warning: missing initial short description on line: * Parse vlan tag from vlan header. flow.c:537: warning: No description found for return value of 'key_extract_l3l4' flow.c:769: warning: No description found for return value of 'key_extract' Signed-off-by: Randy Dunlap Cc: Pravin B Shelar Cc: dev@openvswitch.org Link: https://lore.kernel.org/r/20210808190834.23362-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- net/openvswitch/flow.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index e586424d8b04..9713035b89e3 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -293,14 +293,14 @@ static bool icmp6hdr_ok(struct sk_buff *skb) } /** - * Parse vlan tag from vlan header. + * parse_vlan_tag - Parse vlan tag from vlan header. * @skb: skb containing frame to parse * @key_vh: pointer to parsed vlan tag * @untag_vlan: should the vlan header be removed from the frame * - * Returns ERROR on memory error. - * Returns 0 if it encounters a non-vlan or incomplete packet. - * Returns 1 after successfully parsing vlan tag. + * Return: ERROR on memory error. + * %0 if it encounters a non-vlan or incomplete packet. + * %1 after successfully parsing vlan tag. */ static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh, bool untag_vlan) @@ -532,6 +532,7 @@ static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) * L3 header * @key: output flow key * + * Return: %0 if successful, otherwise a negative errno value. */ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) { @@ -748,8 +749,6 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) * * The caller must ensure that skb->len >= ETH_HLEN. * - * Returns 0 if successful, otherwise a negative errno value. - * * Initializes @skb header fields as follows: * * - skb->mac_header: the L2 header. @@ -764,6 +763,8 @@ static int key_extract_l3l4(struct sk_buff *skb, struct sw_flow_key *key) * * - skb->protocol: the type of the data starting at skb->network_header. * Equals to key->eth.type. + * + * Return: %0 if successful, otherwise a negative errno value. */ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { -- cgit From 143a8526ab5fd4f8a0c4fe2a9cb28c181dc5a95f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 6 Aug 2021 17:52:06 +0200 Subject: bareudp: Fix invalid read beyond skb's linear data Data beyond the UDP header might not be part of the skb's linear data. Use skb_copy_bits() instead of direct access to skb->data+X, so that we read the correct bytes even on a fragmented skb. Fixes: 4b5f67232d95 ("net: Special handling for IP & MPLS.") Signed-off-by: Guillaume Nault Link: https://lore.kernel.org/r/7741c46545c6ef02e70c80a9b32814b22d9616b3.1628264975.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index a7ee0af1af90..54e321a695ce 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -71,12 +71,18 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) family = AF_INET6; if (bareudp->ethertype == htons(ETH_P_IP)) { - struct iphdr *iphdr; + __u8 ipversion; - iphdr = (struct iphdr *)(skb->data + BAREUDP_BASE_HLEN); - if (iphdr->version == 4) { - proto = bareudp->ethertype; - } else if (bareudp->multi_proto_mode && (iphdr->version == 6)) { + if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion, + sizeof(ipversion))) { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + ipversion >>= 4; + + if (ipversion == 4) { + proto = htons(ETH_P_IP); + } else if (ipversion == 6 && bareudp->multi_proto_mode) { proto = htons(ETH_P_IPV6); } else { bareudp->dev->stats.rx_dropped++; -- cgit From 4956b9eaad456a88b0d56947bef036e086250beb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Aug 2021 07:49:41 -0600 Subject: io_uring: rsrc ref lock needs to be IRQ safe Nadav reports running into the below splat on re-enabling softirqs: WARNING: CPU: 2 PID: 1777 at kernel/softirq.c:364 __local_bh_enable_ip+0xaa/0xe0 Modules linked in: CPU: 2 PID: 1777 Comm: umem Not tainted 5.13.1+ #161 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/22/2020 RIP: 0010:__local_bh_enable_ip+0xaa/0xe0 Code: a9 00 ff ff 00 74 38 65 ff 0d a2 21 8c 7a e8 ed 1a 20 00 fb 66 0f 1f 44 00 00 5b 41 5c 5d c3 65 8b 05 e6 2d 8c 7a 85 c0 75 9a <0f> 0b eb 96 e8 2d 1f 20 00 eb a5 4c 89 e7 e8 73 4f 0c 00 eb ae 65 RSP: 0018:ffff88812e58fcc8 EFLAGS: 00010046 RAX: 0000000000000000 RBX: 0000000000000201 RCX: dffffc0000000000 RDX: 0000000000000007 RSI: 0000000000000201 RDI: ffffffff8898c5ac RBP: ffff88812e58fcd8 R08: ffffffff8575dbbf R09: ffffed1028ef14f9 R10: ffff88814778a7c3 R11: ffffed1028ef14f8 R12: ffffffff85c9e9ae R13: ffff88814778a000 R14: ffff88814778a7b0 R15: ffff8881086db890 FS: 00007fbcfee17700(0000) GS:ffff8881e0300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000c0402a5008 CR3: 000000011c1ac003 CR4: 00000000003706e0 Call Trace: _raw_spin_unlock_bh+0x31/0x40 io_rsrc_node_ref_zero+0x13e/0x190 io_dismantle_req+0x215/0x220 io_req_complete_post+0x1b8/0x720 __io_complete_rw.isra.0+0x16b/0x1f0 io_complete_rw+0x10/0x20 where it's clear we end up calling the percpu count release directly from the completion path, as it's in atomic mode and we drop the last ref. For file/block IO, this can be from IRQ context already, and the softirq locking for rsrc isn't enough. Just make the lock fully IRQ safe, and ensure we correctly safe state from the release path as we don't know the full context there. Reported-by: Nadav Amit Tested-by: Nadav Amit Link: https://lore.kernel.org/io-uring/C187C836-E78B-4A31-B24C-D16919ACA093@gmail.com/ Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca064486cb41..6a8257233061 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7138,16 +7138,6 @@ static void **io_alloc_page_table(size_t size) return table; } -static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx) -{ - spin_lock_bh(&ctx->rsrc_ref_lock); -} - -static inline void io_rsrc_ref_unlock(struct io_ring_ctx *ctx) -{ - spin_unlock_bh(&ctx->rsrc_ref_lock); -} - static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) { percpu_ref_exit(&ref_node->refs); @@ -7164,9 +7154,9 @@ static void io_rsrc_node_switch(struct io_ring_ctx *ctx, struct io_rsrc_node *rsrc_node = ctx->rsrc_node; rsrc_node->rsrc_data = data_to_kill; - io_rsrc_ref_lock(ctx); + spin_lock_irq(&ctx->rsrc_ref_lock); list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); - io_rsrc_ref_unlock(ctx); + spin_unlock_irq(&ctx->rsrc_ref_lock); atomic_inc(&data_to_kill->refs); percpu_ref_kill(&rsrc_node->refs); @@ -7674,9 +7664,10 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) { struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); struct io_ring_ctx *ctx = node->rsrc_data->ctx; + unsigned long flags; bool first_add = false; - io_rsrc_ref_lock(ctx); + spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); node->done = true; while (!list_empty(&ctx->rsrc_ref_list)) { @@ -7688,7 +7679,7 @@ static void io_rsrc_node_ref_zero(struct percpu_ref *ref) list_del(&node->node); first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); } - io_rsrc_ref_unlock(ctx); + spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); if (first_add) mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); -- cgit From 49e7f0c789add1330b111af0b7caeb0e87df063e Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Sun, 8 Aug 2021 21:54:33 +0800 Subject: io-wq: fix bug of creating io-wokers unconditionally The former patch to add check between nr_workers and max_workers has a bug, which will cause unconditionally creating io-workers. That's because the result of the check doesn't affect the call of create_io_worker(), fix it by bringing in a boolean value for it. Fixes: 21698274da5b ("io-wq: fix lack of acct->nr_workers < acct->max_workers judgement") Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210808135434.68667-2-haoxu@linux.alibaba.com [axboe: drop hunk that isn't strictly needed] Signed-off-by: Jens Axboe --- fs/io-wq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index 12fc19353bb0..ac8604a35169 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -282,16 +282,24 @@ static void create_worker_cb(struct callback_head *cb) struct io_wq *wq; struct io_wqe *wqe; struct io_wqe_acct *acct; + bool do_create = false; cwd = container_of(cb, struct create_worker_data, work); wqe = cwd->wqe; wq = wqe->wq; acct = &wqe->acct[cwd->index]; raw_spin_lock_irq(&wqe->lock); - if (acct->nr_workers < acct->max_workers) + if (acct->nr_workers < acct->max_workers) { acct->nr_workers++; + do_create = true; + } raw_spin_unlock_irq(&wqe->lock); - create_io_worker(wq, cwd->wqe, cwd->index); + if (do_create) { + create_io_worker(wq, cwd->wqe, cwd->index); + } else { + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + } kfree(cwd); } -- cgit From 47cae0c71f7a126903f930191e6e9f103674aca1 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Sun, 8 Aug 2021 21:54:34 +0800 Subject: io-wq: fix IO_WORKER_F_FIXED issue in create_io_worker() There may be cases like: A B spin_lock(wqe->lock) nr_workers is 0 nr_workers++ spin_unlock(wqe->lock) spin_lock(wqe->lock) nr_wokers is 1 nr_workers++ spin_unlock(wqe->lock) create_io_worker() acct->worker is 1 create_io_worker() acct->worker is 1 There should be one worker marked IO_WORKER_F_FIXED, but no one is. Fix this by introduce a new agrument for create_io_worker() to indicate if it is the first worker. Fixes: 3d4e4face9c1 ("io-wq: fix no lock protection of acct->nr_worker") Signed-off-by: Hao Xu Link: https://lore.kernel.org/r/20210808135434.68667-3-haoxu@linux.alibaba.com Signed-off-by: Jens Axboe --- fs/io-wq.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/fs/io-wq.c b/fs/io-wq.c index ac8604a35169..7d2ed8c7dd31 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -129,7 +129,7 @@ struct io_cb_cancel_data { bool cancel_all; }; -static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first); static void io_wqe_dec_running(struct io_worker *worker); static bool io_worker_get(struct io_worker *worker) @@ -248,18 +248,20 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) rcu_read_unlock(); if (!ret) { - bool do_create = false; + bool do_create = false, first = false; raw_spin_lock_irq(&wqe->lock); if (acct->nr_workers < acct->max_workers) { atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); + if (!acct->nr_workers) + first = true; acct->nr_workers++; do_create = true; } raw_spin_unlock_irq(&wqe->lock); if (do_create) - create_io_worker(wqe->wq, wqe, acct->index); + create_io_worker(wqe->wq, wqe, acct->index, first); } } @@ -282,7 +284,7 @@ static void create_worker_cb(struct callback_head *cb) struct io_wq *wq; struct io_wqe *wqe; struct io_wqe_acct *acct; - bool do_create = false; + bool do_create = false, first = false; cwd = container_of(cb, struct create_worker_data, work); wqe = cwd->wqe; @@ -290,12 +292,14 @@ static void create_worker_cb(struct callback_head *cb) acct = &wqe->acct[cwd->index]; raw_spin_lock_irq(&wqe->lock); if (acct->nr_workers < acct->max_workers) { + if (!acct->nr_workers) + first = true; acct->nr_workers++; do_create = true; } raw_spin_unlock_irq(&wqe->lock); if (do_create) { - create_io_worker(wq, cwd->wqe, cwd->index); + create_io_worker(wq, wqe, cwd->index, first); } else { atomic_dec(&acct->nr_running); io_worker_ref_put(wq); @@ -637,7 +641,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk) raw_spin_unlock_irq(&worker->wqe->lock); } -static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first) { struct io_wqe_acct *acct = &wqe->acct[index]; struct io_worker *worker; @@ -678,7 +682,7 @@ fail: worker->flags |= IO_WORKER_F_FREE; if (index == IO_WQ_ACCT_BOUND) worker->flags |= IO_WORKER_F_BOUND; - if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND)) + if (first && (worker->flags & IO_WORKER_F_BOUND)) worker->flags |= IO_WORKER_F_FIXED; raw_spin_unlock_irq(&wqe->lock); wake_up_new_task(tsk); -- cgit From c018db4a57f3e31a9cb24d528e9f094eda89a499 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Aug 2021 08:15:50 -0600 Subject: io_uring: drop ctx->uring_lock before flushing work item Ammar reports that he's seeing a lockdep splat on running test/rsrc_tags from the regression suite: ====================================================== WARNING: possible circular locking dependency detected 5.14.0-rc3-bluetea-test-00249-gc7d102232649 #5 Tainted: G OE ------------------------------------------------------ kworker/2:4/2684 is trying to acquire lock: ffff88814bb1c0a8 (&ctx->uring_lock){+.+.}-{3:3}, at: io_rsrc_put_work+0x13d/0x1a0 but task is already holding lock: ffffc90001c6be70 ((work_completion)(&(&ctx->rsrc_put_work)->work)){+.+.}-{0:0}, at: process_one_work+0x1bc/0x530 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 ((work_completion)(&(&ctx->rsrc_put_work)->work)){+.+.}-{0:0}: __flush_work+0x31b/0x490 io_rsrc_ref_quiesce.part.0.constprop.0+0x35/0xb0 __do_sys_io_uring_register+0x45b/0x1060 do_syscall_64+0x35/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae -> #0 (&ctx->uring_lock){+.+.}-{3:3}: __lock_acquire+0x119a/0x1e10 lock_acquire+0xc8/0x2f0 __mutex_lock+0x86/0x740 io_rsrc_put_work+0x13d/0x1a0 process_one_work+0x236/0x530 worker_thread+0x52/0x3b0 kthread+0x135/0x160 ret_from_fork+0x1f/0x30 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock((work_completion)(&(&ctx->rsrc_put_work)->work)); lock(&ctx->uring_lock); lock((work_completion)(&(&ctx->rsrc_put_work)->work)); lock(&ctx->uring_lock); *** DEADLOCK *** 2 locks held by kworker/2:4/2684: #0: ffff88810004d938 ((wq_completion)events){+.+.}-{0:0}, at: process_one_work+0x1bc/0x530 #1: ffffc90001c6be70 ((work_completion)(&(&ctx->rsrc_put_work)->work)){+.+.}-{0:0}, at: process_one_work+0x1bc/0x530 stack backtrace: CPU: 2 PID: 2684 Comm: kworker/2:4 Tainted: G OE 5.14.0-rc3-bluetea-test-00249-gc7d102232649 #5 Hardware name: Acer Aspire ES1-421/OLVIA_BE, BIOS V1.05 07/02/2015 Workqueue: events io_rsrc_put_work Call Trace: dump_stack_lvl+0x6a/0x9a check_noncircular+0xfe/0x110 __lock_acquire+0x119a/0x1e10 lock_acquire+0xc8/0x2f0 ? io_rsrc_put_work+0x13d/0x1a0 __mutex_lock+0x86/0x740 ? io_rsrc_put_work+0x13d/0x1a0 ? io_rsrc_put_work+0x13d/0x1a0 ? io_rsrc_put_work+0x13d/0x1a0 ? process_one_work+0x1ce/0x530 io_rsrc_put_work+0x13d/0x1a0 process_one_work+0x236/0x530 worker_thread+0x52/0x3b0 ? process_one_work+0x530/0x530 kthread+0x135/0x160 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 which is due to holding the ctx->uring_lock when flushing existing pending work, while the pending work flushing may need to grab the uring lock if we're using IOPOLL. Fix this by dropping the uring_lock a bit earlier as part of the flush. Cc: stable@vger.kernel.org Link: https://github.com/axboe/liburing/issues/404 Tested-by: Ammar Faizi Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6a8257233061..0c3e90f974e9 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7195,17 +7195,19 @@ static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ct /* kill initial ref, already quiesced if zero */ if (atomic_dec_and_test(&data->refs)) break; + mutex_unlock(&ctx->uring_lock); flush_delayed_work(&ctx->rsrc_put_work); ret = wait_for_completion_interruptible(&data->done); - if (!ret) + if (!ret) { + mutex_lock(&ctx->uring_lock); break; + } atomic_inc(&data->refs); /* wait for all works potentially completing data->done */ flush_delayed_work(&ctx->rsrc_put_work); reinit_completion(&data->done); - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(); mutex_lock(&ctx->uring_lock); } while (ret >= 0); -- cgit From 43597aac1f87230cb565ab354d331682f13d3c7a Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 10 Aug 2021 02:44:23 +0100 Subject: io_uring: fix ctx-exit io_rsrc_put_work() deadlock __io_rsrc_put_work() might need ->uring_lock, so nobody should wait for rsrc nodes holding the mutex. However, that's exactly what io_ring_ctx_free() does with io_wait_rsrc_data(). Split it into rsrc wait + dealloc, and move the first one out of the lock. Cc: stable@vger.kernel.org Fixes: b60c8dce33895 ("io_uring: preparation for rsrc tagging") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0130c5c2693468173ec1afab714e0885d2c9c363.1628559783.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0c3e90f974e9..04c6d059ea94 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8652,13 +8652,10 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) mutex_unlock(&ctx->uring_lock); } -static bool io_wait_rsrc_data(struct io_rsrc_data *data) +static void io_wait_rsrc_data(struct io_rsrc_data *data) { - if (!data) - return false; - if (!atomic_dec_and_test(&data->refs)) + if (data && !atomic_dec_and_test(&data->refs)) wait_for_completion(&data->done); - return true; } static void io_ring_ctx_free(struct io_ring_ctx *ctx) @@ -8670,10 +8667,14 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->mm_account = NULL; } + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); + io_wait_rsrc_data(ctx->file_data); + mutex_lock(&ctx->uring_lock); - if (io_wait_rsrc_data(ctx->buf_data)) + if (ctx->buf_data) __io_sqe_buffers_unregister(ctx); - if (io_wait_rsrc_data(ctx->file_data)) + if (ctx->file_data) __io_sqe_files_unregister(ctx); if (ctx->rings) __io_cqring_overflow_flush(ctx, true); -- cgit From 11431e26c9c43fa26f6b33ee1a90989f57b86024 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 3 Aug 2021 15:06:08 +0800 Subject: blk-iocost: fix lockdep warning on blkcg->lock blkcg->lock depends on q->queue_lock which may depend on another driver lock required in irq context, one example is dm-thin: Chain exists of: &pool->lock#3 --> &q->queue_lock --> &blkcg->lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&blkcg->lock); local_irq_disable(); lock(&pool->lock#3); lock(&q->queue_lock); lock(&pool->lock#3); Fix the issue by using spin_lock_irq(&blkcg->lock) in ioc_weight_write(). Cc: Tejun Heo Reported-by: Bruno Goncalves Link: https://lore.kernel.org/linux-block/CA+QYu4rzz6079ighEanS3Qq_Dmnczcf45ZoJoHKVLVATTo1e4Q@mail.gmail.com/T/#u Signed-off-by: Ming Lei Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210803070608.1766400-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-iocost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 5fac3757e6e0..0e56557cacf2 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3061,19 +3061,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; - spin_lock(&blkcg->lock); + spin_lock_irq(&blkcg->lock); iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { - spin_lock_irq(&iocg->ioc->lock); + spin_lock(&iocg->ioc->lock); ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); - spin_unlock_irq(&iocg->ioc->lock); + spin_unlock(&iocg->ioc->lock); } } - spin_unlock(&blkcg->lock); + spin_unlock_irq(&blkcg->lock); return nbytes; } -- cgit From 9977d880f7a3c233db9165a75a3a14defc2a4aee Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 9 Aug 2021 11:09:47 -0400 Subject: scsi: lpfc: Move initialization of phba->poll_list earlier to avoid crash The phba->poll_list is traversed in case of an error in lpfc_sli4_hba_setup(), so it must be initialized earlier in case the error path is taken. [ 490.030738] lpfc 0000:65:00.0: 0:1413 Failed to init iocb list. [ 490.036661] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 490.044485] PGD 0 P4D 0 [ 490.047027] Oops: 0000 [#1] SMP PTI [ 490.050518] CPU: 0 PID: 7 Comm: kworker/0:1 Kdump: loaded Tainted: G I --------- - - 4.18. [ 490.060511] Hardware name: Dell Inc. PowerEdge R440/0WKGTH, BIOS 1.4.8 05/22/2018 [ 490.067994] Workqueue: events work_for_cpu_fn [ 490.072371] RIP: 0010:lpfc_sli4_cleanup_poll_list+0x20/0xb0 [lpfc] [ 490.078546] Code: cf e9 04 f7 fe ff 0f 1f 40 00 0f 1f 44 00 00 41 57 49 89 ff 41 56 41 55 41 54 4d 8d a79 [ 490.097291] RSP: 0018:ffffbd1a463dbcc8 EFLAGS: 00010246 [ 490.102518] RAX: 0000000000008200 RBX: ffff945cdb8c0000 RCX: 0000000000000000 [ 490.109649] RDX: 0000000000018200 RSI: ffff9468d0e16818 RDI: 0000000000000000 [ 490.116783] RBP: ffff945cdb8c1740 R08: 00000000000015c5 R09: 0000000000000042 [ 490.123915] R10: 0000000000000000 R11: ffffbd1a463dbab0 R12: ffff945cdb8c25c0 [ 490.131049] R13: 00000000fffffff4 R14: 0000000000001800 R15: ffff945cdb8c0000 [ 490.138182] FS: 0000000000000000(0000) GS:ffff9468d0e00000(0000) knlGS:0000000000000000 [ 490.146267] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 490.152013] CR2: 0000000000000000 CR3: 000000042ca10002 CR4: 00000000007706f0 [ 490.159146] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 490.166277] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 490.173409] PKRU: 55555554 [ 490.176123] Call Trace: [ 490.178598] lpfc_sli4_queue_destroy+0x7f/0x3c0 [lpfc] [ 490.183745] lpfc_sli4_hba_setup+0x1bc7/0x23e0 [lpfc] [ 490.188797] ? kernfs_activate+0x63/0x80 [ 490.192721] ? kernfs_add_one+0xe7/0x130 [ 490.196647] ? __kernfs_create_file+0x80/0xb0 [ 490.201020] ? lpfc_pci_probe_one_s4.isra.48+0x46f/0x9e0 [lpfc] [ 490.206944] lpfc_pci_probe_one_s4.isra.48+0x46f/0x9e0 [lpfc] [ 490.212697] lpfc_pci_probe_one+0x179/0xb70 [lpfc] [ 490.217492] local_pci_probe+0x41/0x90 [ 490.221246] work_for_cpu_fn+0x16/0x20 [ 490.224994] process_one_work+0x1a7/0x360 [ 490.229009] ? create_worker+0x1a0/0x1a0 [ 490.232933] worker_thread+0x1cf/0x390 [ 490.236687] ? create_worker+0x1a0/0x1a0 [ 490.240612] kthread+0x116/0x130 [ 490.243846] ? kthread_flush_work_fn+0x10/0x10 [ 490.248293] ret_from_fork+0x35/0x40 [ 490.251869] Modules linked in: lpfc(+) xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4i [ 490.332609] CR2: 0000000000000000 Link: https://lore.kernel.org/r/20210809150947.18104-1-emilne@redhat.com Fixes: 93a4d6f40198 ("scsi: lpfc: Add registration for CPU Offline/Online events") Cc: stable@vger.kernel.org Reviewed-by: James Smart Signed-off-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 5983e05b648f..e29523a1b530 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -13193,6 +13193,8 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) if (!phba) return -ENOMEM; + INIT_LIST_HEAD(&phba->poll_list); + /* Perform generic PCI device enabling operation */ error = lpfc_enable_pci_dev(phba); if (error) @@ -13327,7 +13329,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) /* Enable RAS FW log support */ lpfc_sli4_ras_setup(phba); - INIT_LIST_HEAD(&phba->poll_list); timer_setup(&phba->cpuhp_poll_timer, lpfc_sli4_poll_hbtimer, 0); cpuhp_state_add_instance_nocalls(lpfc_cpuhp_state, &phba->cpuhp); -- cgit From dbe7633c394be4a500b887fe8f9ad486dcba9d77 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Fri, 6 Aug 2021 10:12:50 -0700 Subject: scsi: storvsc: Log TEST_UNIT_READY errors as warnings Commit 08f76547f08d ("scsi: storvsc: Update error logging") added more robust logging of errors, particularly those reported as Hyper-V errors. But this change produces extra logging noise in that TEST_UNIT_READY may report errors during the normal course of detecting device adds and removes. Fix this by logging TEST_UNIT_READY errors as warnings, so that log lines are produced only if the storvsc log level is changed to WARN level on the kernel boot line. Link: https://lore.kernel.org/r/1628269970-87876-1-git-send-email-mikelley@microsoft.com Fixes: 08f76547f08d ("scsi: storvsc: Update error logging") Signed-off-by: Michael Kelley Signed-off-by: Martin K. Petersen --- drivers/scsi/storvsc_drv.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 328bb961c281..37506b3fe5a9 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1199,14 +1199,24 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, vstor_packet->vm_srb.sense_info_length); if (vstor_packet->vm_srb.scsi_status != 0 || - vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) - storvsc_log(device, STORVSC_LOGGING_ERROR, + vstor_packet->vm_srb.srb_status != SRB_STATUS_SUCCESS) { + + /* + * Log TEST_UNIT_READY errors only as warnings. Hyper-V can + * return errors when detecting devices using TEST_UNIT_READY, + * and logging these as errors produces unhelpful noise. + */ + int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ? + STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR; + + storvsc_log(device, loglevel, "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n", request->cmd->request->tag, stor_pkt->vm_srb.cdb[0], vstor_packet->vm_srb.scsi_status, vstor_packet->vm_srb.srb_status, vstor_packet->status); + } if (vstor_packet->vm_srb.scsi_status == SAM_STAT_CHECK_CONDITION && (vstor_packet->vm_srb.srb_status & SRB_STATUS_AUTOSENSE_VALID)) -- cgit From 40d32727931cee82cdc5aaca25ce725d1f3ac864 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 4 Aug 2021 14:49:40 +0100 Subject: scsi: mpt3sas: Fix incorrectly assigned error return and check Currently the call to _base_static_config_pages() is assigning the error return to variable 'rc' but checking the error return in error 'r'. Fix this by assigning the error return to variable 'r' instead of 'rc'. Link: https://lore.kernel.org/r/20210804134940.114011-1-colin.king@canonical.com Fixes: 19a622c39a9d ("scsi: mpt3sas: Handle firmware faults during first half of IOC init") Signed-off-by: Colin Ian King Signed-off-by: Martin K. Petersen Addresses-Coverity: ("Unused value") --- drivers/scsi/mpt3sas/mpt3sas_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 19b1c0cf5f2a..cf4a3a2c22ad 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7851,7 +7851,7 @@ _base_make_ioc_operational(struct MPT3SAS_ADAPTER *ioc) return r; } - rc = _base_static_config_pages(ioc); + r = _base_static_config_pages(ioc); if (r) return r; -- cgit From c633e799641cf13960bd83189b4d5b1b2adb0d4e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 2 Aug 2021 16:39:54 +0300 Subject: net/mlx5: Don't skip subfunction cleanup in case of error in module init Clean SF resources if mlx5 eth failed to initialize. Fixes: 1958fc2f0712 ("net/mlx5: SF, Add auxiliary device driver") Signed-off-by: Leon Romanovsky Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 12 ++++-------- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 +++++ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index eb1b316560a8..c84ad87c99bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1784,16 +1784,14 @@ static int __init init(void) if (err) goto err_sf; -#ifdef CONFIG_MLX5_CORE_EN err = mlx5e_init(); - if (err) { - pci_unregister_driver(&mlx5_core_driver); - goto err_debug; - } -#endif + if (err) + goto err_en; return 0; +err_en: + mlx5_sf_driver_unregister(); err_sf: pci_unregister_driver(&mlx5_core_driver); err_debug: @@ -1803,9 +1801,7 @@ err_debug: static void __exit cleanup(void) { -#ifdef CONFIG_MLX5_CORE_EN mlx5e_cleanup(); -#endif mlx5_sf_driver_unregister(); pci_unregister_driver(&mlx5_core_driver); mlx5_unregister_debugfs(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 343807ac2036..da365b8f0141 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -206,8 +206,13 @@ int mlx5_firmware_flash(struct mlx5_core_dev *dev, const struct firmware *fw, int mlx5_fw_version_query(struct mlx5_core_dev *dev, u32 *running_ver, u32 *stored_ver); +#ifdef CONFIG_MLX5_CORE_EN int mlx5e_init(void); void mlx5e_cleanup(void); +#else +static inline int mlx5e_init(void){ return 0; } +static inline void mlx5e_cleanup(void){} +#endif static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) { -- cgit From d3875924dae632d5edd908d285fffc5f07c835a3 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Tue, 22 Jun 2021 16:51:58 +0300 Subject: net/mlx5: DR, Add fail on error check on decap While processing encapsulated packet on RX, one of the fields that is checked is the inner packet length. If the length as specified in the header doesn't match the actual inner packet length, the packet is invalid and should be dropped. However, such packet caused the NIC to hang. This patch turns on a 'fail_on_error' HW bit which allows HW to drop such an invalid packet while processing RX packet and trying to decap it. Fixes: ad17dc8cf910 ("net/mlx5: DR, Move STEv0 action apply logic") Signed-off-by: Alex Vesker Signed-off-by: Yevgeny Kliteynik Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c index f1950e4968da..e4dd4eed5aee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c @@ -352,6 +352,7 @@ static void dr_ste_v0_set_rx_decap(u8 *hw_ste_p) { MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, DR_STE_TUNL_ACTION_DECAP); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); } static void dr_ste_v0_set_rx_pop_vlan(u8 *hw_ste_p) @@ -365,6 +366,7 @@ static void dr_ste_v0_set_rx_decap_l3(u8 *hw_ste_p, bool vlan) MLX5_SET(ste_rx_steering_mult, hw_ste_p, tunneling_action, DR_STE_TUNL_ACTION_L3_DECAP); MLX5_SET(ste_modify_packet, hw_ste_p, action_description, vlan ? 1 : 0); + MLX5_SET(ste_rx_steering_mult, hw_ste_p, fail_on_error, 1); } static void dr_ste_v0_set_rewrite_actions(u8 *hw_ste_p, u16 num_of_actions, -- cgit From c623c95afa56bf4bf64e4f58742dc94616ef83db Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Sun, 1 Aug 2021 15:47:46 +0300 Subject: net/mlx5e: Avoid creating tunnel headers for local route It could be local and remote are on the same machine and the route result will be a local route which will result in creating encap id with src/dst mac address of 0. Fixes: a54e20b4fcae ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads") Signed-off-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 8f79f04eccd6..1e2d117082d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -124,6 +124,11 @@ static int mlx5e_route_lookup_ipv4_get(struct mlx5e_priv *priv, if (IS_ERR(rt)) return PTR_ERR(rt); + if (rt->rt_type != RTN_UNICAST) { + ret = -ENETUNREACH; + goto err_rt_release; + } + if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) { ret = -ENETUNREACH; goto err_rt_release; -- cgit From 6d8680da2e98410a25fe49e0a53f28c004be6d6d Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 4 Aug 2021 18:33:26 +0300 Subject: net/mlx5: Bridge, fix ageing time Ageing time is not converted from clock_t to jiffies which results incorrect ageing timeout calculation in workqueue update task. Fix it by applying clock_t_to_jiffies() to provided value. Fixes: c636a0f0f3f0 ("net/mlx5: Bridge, dynamic entry ageing") Signed-off-by: Vlad Buslov Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index a6e1d4f78268..f3f56f32e435 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -579,7 +579,7 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, xa_init(&bridge->vports); bridge->ifindex = ifindex; bridge->refcnt = 1; - bridge->ageing_time = BR_DEFAULT_AGEING_TIME; + bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); list_add(&bridge->list, &br_offloads->bridges); return bridge; @@ -1006,7 +1006,7 @@ int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswit if (!vport->bridge) return -EINVAL; - vport->bridge->ageing_time = ageing_time; + vport->bridge->ageing_time = clock_t_to_jiffies(ageing_time); return 0; } -- cgit From 8ba3e4c85825c8801a2c298dcadac650a40d7137 Mon Sep 17 00:00:00 2001 From: Maxim Mikityanskiy Date: Mon, 24 May 2021 16:15:22 +0300 Subject: net/mlx5e: Destroy page pool after XDP SQ to fix use-after-free mlx5e_close_xdpsq does the cleanup: it calls mlx5e_free_xdpsq_descs to free the outstanding descriptors, which relies on mlx5e_page_release_dynamic and page_pool_release_page. However, page_pool_destroy is already called by this point, because mlx5e_close_rq runs before mlx5e_close_xdpsq. This commit fixes the use-after-free by swapping mlx5e_close_xdpsq and mlx5e_close_rq. The commit cited below started calling page_pool_destroy directly from the driver. Previously, the page pool was destroyed under a call_rcu from xdp_rxq_info_unreg_mem_model, which would defer the deallocation until after the XDPSQ is cleaned up. Fixes: 1da4bbeffe41 ("net: core: page_pool: add user refcnt and reintroduce page_pool_destroy") Signed-off-by: Maxim Mikityanskiy Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 37c440837945..fd250f7bcd88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1891,30 +1891,30 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, if (err) goto err_close_icosq; + err = mlx5e_open_rxq_rq(c, params, &cparam->rq); + if (err) + goto err_close_sqs; + if (c->xdp) { err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->rq_xdpsq, false); if (err) - goto err_close_sqs; + goto err_close_rq; } - err = mlx5e_open_rxq_rq(c, params, &cparam->rq); - if (err) - goto err_close_xdp_sq; - err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true); if (err) - goto err_close_rq; + goto err_close_xdp_sq; return 0; -err_close_rq: - mlx5e_close_rq(&c->rq); - err_close_xdp_sq: if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); +err_close_rq: + mlx5e_close_rq(&c->rq); + err_close_sqs: mlx5e_close_sqs(c); @@ -1949,9 +1949,9 @@ err_close_async_icosq_cq: static void mlx5e_close_queues(struct mlx5e_channel *c) { mlx5e_close_xdpsq(&c->xdpsq); - mlx5e_close_rq(&c->rq); if (c->xdp) mlx5e_close_xdpsq(&c->rq_xdpsq); + mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); mlx5e_close_icosq(&c->icosq); mlx5e_close_icosq(&c->async_icosq); -- cgit From c85a6b8feb16c0cdbbc8d9f581c7861c4a9ac351 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 28 Jul 2021 18:18:59 +0300 Subject: net/mlx5: Block switchdev mode while devlink traps are active Since switchdev mode can't support devlink traps, verify there are no active devlink traps before moving eswitch to switchdev mode. If there are active traps, prevent the switchdev mode configuration. Fixes: eb3862a0525d ("net/mlx5e: Enable traps according to link state") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 011e766e4f67..3bb71a186004 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -48,6 +48,7 @@ #include "lib/fs_chains.h" #include "en_tc.h" #include "en/mapping.h" +#include "devlink.h" #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) @@ -3001,12 +3002,19 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (cur_mlx5_mode == mlx5_mode) goto unlock; - if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) { + if (mlx5_devlink_trap_get_num_active(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change mode while devlink traps are active"); + err = -EOPNOTSUPP; + goto unlock; + } err = esw_offloads_start(esw, extack); - else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + } else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) { err = esw_offloads_stop(esw, extack); - else + } else { err = -EINVAL; + } unlock: mlx5_esw_unlock(esw); -- cgit From 3c8946e0e2841aa7cbdabf6acaac6559fa8d1a49 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 16 Jun 2021 18:54:05 +0300 Subject: net/mlx5: Fix order of functions in mlx5_irq_detach_nb() Change order of functions in mlx5_irq_detach_nb() so it will be a mirror of mlx5_irq_attach_nb. Fixes: 71e084e26414 ("net/mlx5: Allocating a pool of MSI-X vectors for SFs") Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index b25f764daa08..95e60da33f03 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -251,8 +251,11 @@ int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb) int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb) { + int err = 0; + + err = atomic_notifier_chain_unregister(&irq->nh, nb); irq_put(irq); - return atomic_notifier_chain_unregister(&irq->nh, nb); + return err; } struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq) -- cgit From 5957cc557dc5d52c3448be15c2474f33224b89b6 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 16 Jun 2021 18:59:05 +0300 Subject: net/mlx5: Set all field of mlx5_irq before inserting it to the xarray Currently irq->pool is set after the irq is insert to the xarray. Set irq->pool before the irq is inserted to the xarray. Fixes: 71e084e26414 ("net/mlx5: Allocating a pool of MSI-X vectors for SFs") Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 95e60da33f03..7b923f6b5462 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -214,6 +214,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) err = -ENOMEM; goto err_cpumask; } + irq->pool = pool; kref_init(&irq->kref); irq->index = i; err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL)); @@ -222,7 +223,6 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i) irq->index, err); goto err_xa; } - irq->pool = pool; return irq; err_xa: free_cpumask_var(irq->mask); -- cgit From ba317e832d457bc8fcecf6a6ed289732544b87e9 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 17 Jun 2021 01:51:00 +0300 Subject: net/mlx5: Destroy pool->mutex Destroy pool->mutex when we destroy the pool. Fixes: c36326d38d93 ("net/mlx5: Round-Robin EQs over IRQs") Signed-off-by: Shay Drory Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 7b923f6b5462..3465b363fc2f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -440,6 +440,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, if (!pool) return ERR_PTR(-ENOMEM); pool->dev = dev; + mutex_init(&pool->lock); xa_init_flags(&pool->irqs, XA_FLAGS_ALLOC); pool->xa_num_irqs.min = start; pool->xa_num_irqs.max = start + size - 1; @@ -448,7 +449,6 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, name); pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; - mutex_init(&pool->lock); mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", name, size, start); return pool; @@ -462,6 +462,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool) xa_for_each(&pool->irqs, index, irq) irq_release(&irq->kref); xa_destroy(&pool->irqs); + mutex_destroy(&pool->lock); kvfree(pool); } -- cgit From 88bbd7b2369aca4598eb8f38c5f16be98c3bb5d4 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Thu, 6 May 2021 11:40:34 +0800 Subject: net/mlx5e: TC, Fix error handling memory leak Free the offload sample action on error. Fixes: f94d6389f6a8 ("net/mlx5e: TC, Add support to offload sample action") Signed-off-by: Chris Mi Reviewed-by: Oz Shlomo Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c index 794012c5c476..d3ad78aa9d45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c @@ -501,6 +501,7 @@ err_sampler: err_offload_rule: mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); err_default_tbl: + kfree(sample_flow); return ERR_PTR(err); } -- cgit From 563476ae0c5e48a028cbfa38fa9d2fc0418eb88f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 11 Apr 2021 15:32:55 +0300 Subject: net/mlx5: Synchronize correct IRQ when destroying CQ The CQ destroy is performed based on the IRQ number that is stored in cq->irqn. That number wasn't set explicitly during CQ creation and as expected some of the API users of mlx5_core_create_cq() forgot to update it. This caused to wrong synchronization call of the wrong IRQ with a number 0 instead of the real one. As a fix, set the IRQ number directly in the mlx5_core_create_cq() and update all users accordingly. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Fixes: ef1659ade359 ("IB/mlx5: Add DEVX support for CQ events") Signed-off-by: Shay Drory Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 4 +--- drivers/infiniband/hw/mlx5/devx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++----------- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 20 ++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 ++ .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 4 +--- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 +-- include/linux/mlx5/driver.h | 3 +-- 10 files changed, 27 insertions(+), 30 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7abeb576b3c5..b8e5e371bb19 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -945,7 +945,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, u32 *cqb = NULL; void *cqc; int cqe_size; - unsigned int irqn; int eqn; int err; @@ -984,7 +983,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, vector, &eqn); if (err) goto err_cqb; @@ -1007,7 +1006,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - cq->mcq.irqn = irqn; if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eb9b0a2707f8..c869b2a91a28 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -975,7 +975,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( struct mlx5_ib_dev *dev; int user_vector; int dev_eqn; - unsigned int irqn; int err; if (uverbs_copy_from(&user_vector, attrs, @@ -987,7 +986,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn); if (err < 0) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index df3e4938ecdd..360e093874d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -134,6 +134,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn); cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fd250f7bcd88..24f919ef9b8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1535,15 +1535,9 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; int err; u32 i; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - if (err) - return err; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1557,7 +1551,6 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1605,11 +1598,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) void *in; void *cqc; int inlen; - unsigned int irqn_not_used; int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); if (err) return err; @@ -1983,9 +1975,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel *c; unsigned int irq; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + err = mlx5_vector2irqn(priv->mdev, ix, &irq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6e074cc457de..605c8ecc3610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -855,8 +855,8 @@ clean: return err; } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; @@ -865,8 +865,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->core.eqn; - *irqn = eq->core.irqn; + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; err = 0; break; } @@ -874,8 +876,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, return err; } + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +{ + return vector2eqnirqn(dev, vector, eqn, NULL); +} EXPORT_SYMBOL(mlx5_vector2eqn); +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + return vector2eqnirqn(dev, vector, NULL, irqn); +} + unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { return dev->priv.eq_table->num_comp_eqs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index bd66ab2af5b5..d5da4ab65766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -417,7 +417,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; u32 i; @@ -446,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -476,7 +475,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) *conn->cq.mcq.arm_db = 0; conn->cq.mcq.vector = 0; conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; - conn->cq.mcq.irqn = irqn; conn->cq.mcq.uar = fdev->conn_res.uar; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 624cedebb510..d3d628b862f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -104,4 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 12cf323a5943..9df0e73d1c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -749,7 +749,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_cqe64 *cqe; struct mlx5dr_cq *cq; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; int vector; @@ -782,7 +781,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -818,7 +817,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; - cq->mcq.irqn = irqn; cq->mcq.uar = uar; return cq; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2a31467f7ac5..379a19144a25 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -526,7 +526,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) void __iomem *uar_page = ndev->mvdev.res.uar->map; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_vdpa_cq *vcq = &mvq->cq; - unsigned int irqn; __be64 *pas; int inlen; void *cqc; @@ -566,7 +565,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, 0, &eqn); if (err) goto err_vec; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1efe37466969..25a8be58d289 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1044,8 +1044,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit From bd37c2888ccaa5ceb9895718f6909b247cc372e0 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 8 Jun 2021 16:38:30 +0300 Subject: net/mlx5: Fix return value from tracer initialization Check return value of mlx5_fw_tracer_start(), set error path and fix return value of mlx5_fw_tracer_init() accordingly. Fixes: c71ad41ccb0c ("net/mlx5: FW tracer, events handling") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 01a1d02dcf15..3f8a98093f8c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -1019,12 +1019,19 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) MLX5_NB_INIT(&tracer->nb, fw_tracer_event, DEVICE_TRACER); mlx5_eq_notifier_register(dev, &tracer->nb); - mlx5_fw_tracer_start(tracer); - + err = mlx5_fw_tracer_start(tracer); + if (err) { + mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err); + goto err_notifier_unregister; + } return 0; +err_notifier_unregister: + mlx5_eq_notifier_unregister(dev, &tracer->nb); + mlx5_core_destroy_mkey(dev, &tracer->buff.mkey); err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); + cancel_work_sync(&tracer->read_fw_strings_work); return err; } -- cgit From 7b637cd52f02c6d7ff0580143a438940978fc719 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Mon, 2 Aug 2021 06:59:41 +0300 Subject: MAINTAINERS: fix Microchip CAN BUS Analyzer Tool entry typo This patch fixes the abbreviated name of the Microchip CAN BUS Analyzer Tool. Fixes: 8a7b46fa7902 ("MAINTAINERS: add Yasushi SHOJI as reviewer for the Microchip CAN BUS Analyzer Tool driver") Link: https://lore.kernel.org/r/cc4831cb1c8759c15fb32c21fd326e831183733d.1627876781.git.baruch@tkos.co.il Signed-off-by: Baruch Siach Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index c9467d2839f5..25dc566a67c1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11327,7 +11327,7 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/radio/radio-maxiradio* -MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER +MCBA MICROCHIP CAN BUS ANALYZER TOOL DRIVER R: Yasushi SHOJI L: linux-can@vger.kernel.org S: Maintained -- cgit From aae32b784ebdbda6f6055a8021c9fb8a0ab5bcba Mon Sep 17 00:00:00 2001 From: Hussein Alasadi Date: Mon, 9 Aug 2021 17:36:52 +0000 Subject: can: m_can: m_can_set_bittiming(): fix setting M_CAN_DBTP register This patch fixes the setting of the M_CAN_DBTP register contents: - use DBTP_ (the data bitrate macros) instead of NBTP_ which area used for the nominal bitrate - do not overwrite possibly-existing DBTP_TDC flag by ORing reg_btp instead of overwriting Link: https://lore.kernel.org/r/FRYP281MB06140984ABD9994C0AAF7433D1F69@FRYP281MB0614.DEUP281.PROD.OUTLOOK.COM Fixes: 20779943a080 ("can: m_can: use bits.h macros for all regmasks") Cc: Torin Cooper-Bennun Cc: Chandrasekar Ramakrishnan Signed-off-by: Hussein Alasadi [mkl: update patch description, update indention] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index bba2a449ac70..43bca315a66c 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1164,10 +1164,10 @@ static int m_can_set_bittiming(struct net_device *dev) FIELD_PREP(TDCR_TDCO_MASK, tdco)); } - reg_btp = FIELD_PREP(NBTP_NBRP_MASK, brp) | - FIELD_PREP(NBTP_NSJW_MASK, sjw) | - FIELD_PREP(NBTP_NTSEG1_MASK, tseg1) | - FIELD_PREP(NBTP_NTSEG2_MASK, tseg2); + reg_btp |= FIELD_PREP(DBTP_DBRP_MASK, brp) | + FIELD_PREP(DBTP_DSJW_MASK, sjw) | + FIELD_PREP(DBTP_DTSEG1_MASK, tseg1) | + FIELD_PREP(DBTP_DTSEG2_MASK, tseg2); m_can_write(cdev, M_CAN_DBTP, reg_btp); } -- cgit From 07d25971b220e477eb019fcb520a9f2e3ac966af Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 31 Jul 2021 20:30:11 +0800 Subject: locking/rtmutex: Use the correct rtmutex debugging config option It's CONFIG_DEBUG_RT_MUTEXES not CONFIG_DEBUG_RT_MUTEX. Fixes: f7efc4799f81 ("locking/rtmutex: Inline chainwalk depth check") Signed-off-by: Zhen Lei Signed-off-by: Thomas Gleixner Acked-by: Will Deacon Acked-by: Boqun Feng Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210731123011.4555-1-thunder.leizhen@huawei.com --- kernel/locking/rtmutex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index b5d9bb5202c6..ad0db322ed3b 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -343,7 +343,7 @@ static __always_inline bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, enum rtmutex_chainwalk chwalk) { - if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEX)) + if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES)) return waiter != NULL; return chwalk == RT_MUTEX_FULL_CHAINWALK; } -- cgit From 664cc971fb259007e49cc8a3ac43b0787d89443f Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 10 Aug 2021 09:10:15 +0200 Subject: Revert "usb: dwc3: gadget: Use list_replace_init() before traversing lists" This reverts commit d25d85061bd856d6be221626605319154f9b5043 as it is reported to cause problems on many different types of boards. Reported-by: Thinh Nguyen Reported-by: John Stultz Cc: Ray Chi Link: https://lore.kernel.org/r/CANcMJZCEVxVLyFgLwK98hqBEdc0_n4P0x_K6Gih8zNH3ouzbJQ@mail.gmail.com Fixes: d25d85061bd8 ("usb: dwc3: gadget: Use list_replace_init() before traversing lists") Cc: stable Cc: Felipe Balbi Cc: Wesley Cheng Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index b8d4b2d327b2..84fe57ef5a49 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1741,13 +1741,9 @@ static void dwc3_gadget_ep_cleanup_cancelled_requests(struct dwc3_ep *dep) { struct dwc3_request *req; struct dwc3_request *tmp; - struct list_head local; struct dwc3 *dwc = dep->dwc; -restart: - list_replace_init(&dep->cancelled_list, &local); - - list_for_each_entry_safe(req, tmp, &local, list) { + list_for_each_entry_safe(req, tmp, &dep->cancelled_list, list) { dwc3_gadget_ep_skip_trbs(dep, req); switch (req->status) { case DWC3_REQUEST_STATUS_DISCONNECTED: @@ -1765,9 +1761,6 @@ restart: break; } } - - if (!list_empty(&dep->cancelled_list)) - goto restart; } static int dwc3_gadget_ep_dequeue(struct usb_ep *ep, @@ -2976,12 +2969,8 @@ static void dwc3_gadget_ep_cleanup_completed_requests(struct dwc3_ep *dep, { struct dwc3_request *req; struct dwc3_request *tmp; - struct list_head local; -restart: - list_replace_init(&dep->started_list, &local); - - list_for_each_entry_safe(req, tmp, &local, list) { + list_for_each_entry_safe(req, tmp, &dep->started_list, list) { int ret; ret = dwc3_gadget_ep_cleanup_completed_request(dep, event, @@ -2989,9 +2978,6 @@ restart: if (ret) break; } - - if (!list_empty(&dep->started_list)) - goto restart; } static bool dwc3_gadget_ep_should_continue(struct dwc3_ep *dep) -- cgit From bf33677a3c394bb8fddd48d3bbc97adf0262e045 Mon Sep 17 00:00:00 2001 From: Christian Hewitt Date: Fri, 6 Aug 2021 09:40:05 +0000 Subject: drm/meson: fix colour distortion from HDR set during vendor u-boot Add support for the OSD1 HDR registers so meson DRM can handle the HDR properties set by Amlogic u-boot on G12A and newer devices which result in blue/green/pink colour distortion to display output. This takes the original patch submissions from Mathias [0] and [1] with corrections for formatting and the missing description and attribution needed for merge. [0] https://lore.kernel.org/linux-amlogic/59dfd7e6-fc91-3d61-04c4-94e078a3188c@baylibre.com/T/ [1] https://lore.kernel.org/linux-amlogic/CAOKfEHBx_fboUqkENEMd-OC-NSrf46nto+vDLgvgttzPe99kXg@mail.gmail.com/T/#u Fixes: 728883948b0d ("drm/meson: Add G12A Support for VIU setup") Suggested-by: Mathias Steiger Signed-off-by: Christian Hewitt Tested-by: Neil Armstrong Tested-by: Philip Milev [narmsrong: adding missing space on second tested-by tag] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20210806094005.7136-1-christianshewitt@gmail.com --- drivers/gpu/drm/meson/meson_registers.h | 5 +++++ drivers/gpu/drm/meson/meson_viu.c | 7 ++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_registers.h b/drivers/gpu/drm/meson/meson_registers.h index 446e7961da48..0f3cafab8860 100644 --- a/drivers/gpu/drm/meson/meson_registers.h +++ b/drivers/gpu/drm/meson/meson_registers.h @@ -634,6 +634,11 @@ #define VPP_WRAP_OSD3_MATRIX_PRE_OFFSET2 0x3dbc #define VPP_WRAP_OSD3_MATRIX_EN_CTRL 0x3dbd +/* osd1 HDR */ +#define OSD1_HDR2_CTRL 0x38a0 +#define OSD1_HDR2_CTRL_VDIN0_HDR2_TOP_EN BIT(13) +#define OSD1_HDR2_CTRL_REG_ONLY_MAT BIT(16) + /* osd2 scaler */ #define OSD2_VSC_PHASE_STEP 0x3d00 #define OSD2_VSC_INI_PHASE 0x3d01 diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index aede0c67a57f..259f3e6bec90 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -425,9 +425,14 @@ void meson_viu_init(struct meson_drm *priv) if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXM) || meson_vpu_is_compatible(priv, VPU_COMPATIBLE_GXL)) meson_viu_load_matrix(priv); - else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) + else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) { meson_viu_set_g12a_osd1_matrix(priv, RGB709_to_YUV709l_coeff, true); + /* fix green/pink color distortion from vendor u-boot */ + writel_bits_relaxed(OSD1_HDR2_CTRL_REG_ONLY_MAT | + OSD1_HDR2_CTRL_VDIN0_HDR2_TOP_EN, 0, + priv->io_base + _REG(OSD1_HDR2_CTRL)); + } /* Initialize OSD1 fifo control register */ reg = VIU_OSD_DDR_PRIORITY_URGENT | -- cgit From 51e1bb9eeaf7868db56e58f47848e364ab4c4129 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 9 Aug 2021 12:43:17 +0200 Subject: bpf: Add lockdown check for probe_write_user helper Back then, commit 96ae52279594 ("bpf: Add bpf_probe_write_user BPF helper to be called in tracers") added the bpf_probe_write_user() helper in order to allow to override user space memory. Its original goal was to have a facility to "debug, divert, and manipulate execution of semi-cooperative processes" under CAP_SYS_ADMIN. Write to kernel was explicitly disallowed since it would otherwise tamper with its integrity. One use case was shown in cf9b1199de27 ("samples/bpf: Add test/example of using bpf_probe_write_user bpf helper") where the program DNATs traffic at the time of connect(2) syscall, meaning, it rewrites the arguments to a syscall while they're still in userspace, and before the syscall has a chance to copy the argument into kernel space. These days we have better mechanisms in BPF for achieving the same (e.g. for load-balancers), but without having to write to userspace memory. Of course the bpf_probe_write_user() helper can also be used to abuse many other things for both good or bad purpose. Outside of BPF, there is a similar mechanism for ptrace(2) such as PTRACE_PEEK{TEXT,DATA} and PTRACE_POKE{TEXT,DATA}, but would likely require some more effort. Commit 96ae52279594 explicitly dedicated the helper for experimentation purpose only. Thus, move the helper's availability behind a newly added LOCKDOWN_BPF_WRITE_USER lockdown knob so that the helper is disabled under the "integrity" mode. More fine-grained control can be implemented also from LSM side with this change. Fixes: 96ae52279594 ("bpf: Add bpf_probe_write_user BPF helper to be called in tracers") Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko --- include/linux/security.h | 1 + kernel/trace/bpf_trace.c | 5 +++-- security/security.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 724d7a4a0c91..5b7288521300 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -120,6 +120,7 @@ enum lockdown_reason { LOCKDOWN_MMIOTRACE, LOCKDOWN_DEBUGFS, LOCKDOWN_XMON_WR, + LOCKDOWN_BPF_WRITE_USER, LOCKDOWN_INTEGRITY_MAX, LOCKDOWN_KCORE, LOCKDOWN_KPROBES, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1836591197a5..fdd14072fc3b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -990,12 +990,13 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_numa_node_id_proto; case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; - case BPF_FUNC_probe_write_user: - return bpf_get_probe_write_proto(); case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; + case BPF_FUNC_probe_write_user: + return security_locked_down(LOCKDOWN_BPF_WRITE_USER) < 0 ? + NULL : bpf_get_probe_write_proto(); case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: diff --git a/security/security.c b/security/security.c index 6b83ab4e9d66..9ffa9e9c5c55 100644 --- a/security/security.c +++ b/security/security.c @@ -58,6 +58,7 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = { [LOCKDOWN_MMIOTRACE] = "unsafe mmio", [LOCKDOWN_DEBUGFS] = "debugfs access", [LOCKDOWN_XMON_WR] = "xmon write access", + [LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM", [LOCKDOWN_INTEGRITY_MAX] = "integrity", [LOCKDOWN_KCORE] = "/proc/kcore access", [LOCKDOWN_KPROBES] = "use of kprobes", -- cgit From 87b7b5335e6995a6d64fca98fc67b92b29caac9c Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 9 Aug 2021 16:51:51 -0700 Subject: bpf: Add missing bpf_read_[un]lock_trace() for syscall program Commit 79a7f8bdb159d ("bpf: Introduce bpf_sys_bpf() helper and program type.") added support for syscall program, which is a sleepable program. But the program run missed bpf_read_lock_trace()/bpf_read_unlock_trace(), which is needed to ensure proper rcu callback invocations. This patch adds bpf_read_[un]lock_trace() properly. Fixes: 79a7f8bdb159d ("bpf: Introduce bpf_sys_bpf() helper and program type.") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210809235151.1663680-1-yhs@fb.com --- net/bpf/test_run.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1cc75c811e24..caa16bf30fb5 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -951,7 +952,10 @@ int bpf_prog_test_run_syscall(struct bpf_prog *prog, goto out; } } + + rcu_read_lock_trace(); retval = bpf_prog_run_pin_on_cpu(prog, ctx); + rcu_read_unlock_trace(); if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) { err = -EFAULT; -- cgit From 9011c2791e63fc05721b545c41ad025d8073566e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 26 Apr 2021 18:20:21 +0300 Subject: ovl: skip stale entries in merge dir cache iteration On the first getdents call, ovl_iterate() populates the readdir cache with a list of entries, but for upper entries with origin lower inode, p->ino remains zero. Following getdents calls traverse the readdir cache list and call ovl_cache_update_ino() for entries with zero p->ino to lookup the entry in the overlay and return d_ino that is consistent with st_ino. If the upper file was unlinked between the first getdents call and the getdents call that lists the file entry, ovl_cache_update_ino() will not find the entry and fall back to setting d_ino to the upper real st_ino, which is inconsistent with how this object was presented to users. Instead of listing a stale entry with inconsistent d_ino, simply skip the stale entry, which is better for users. xfstest overlay/077 is failing without this patch. Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/fstests/CAOQ4uxgR_cLnC_vdU5=seP3fwqVkuZM_-WfD6maFTMbMYq=a9w@mail.gmail.com/ Signed-off-by: Miklos Szeredi --- fs/overlayfs/readdir.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index e8ad2c2c77dd..150fdf3bc68d 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -481,6 +481,8 @@ static int ovl_cache_update_ino(struct path *path, struct ovl_cache_entry *p) } this = lookup_one_len(p->name, dir, p->len); if (IS_ERR_OR_NULL(this) || !this->d_inode) { + /* Mark a stale entry */ + p->is_whiteout = true; if (IS_ERR(this)) { err = PTR_ERR(this); this = NULL; @@ -776,6 +778,9 @@ static int ovl_iterate(struct file *file, struct dir_context *ctx) if (err) goto out; } + } + /* ovl_cache_update_ino() sets is_whiteout on stale entry */ + if (!p->is_whiteout) { if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) break; } -- cgit From 9b91b6b019fda817eb52f728eb9c79b3579760bc Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 28 Jul 2021 10:38:43 +0200 Subject: ovl: fix deadlock in splice write There's possibility of an ABBA deadlock in case of a splice write to an overlayfs file and a concurrent splice write to a corresponding real file. The call chain for splice to an overlay file: -> do_splice [takes sb_writers on overlay file] -> do_splice_from -> iter_file_splice_write [takes pipe->mutex] -> vfs_iter_write ... -> ovl_write_iter [takes sb_writers on real file] And the call chain for splice to a real file: -> do_splice [takes sb_writers on real file] -> do_splice_from -> iter_file_splice_write [takes pipe->mutex] Syzbot successfully bisected this to commit 82a763e61e2b ("ovl: simplify file splice"). Fix by reverting the write part of the above commit and by adding missing bits from ovl_write_iter() into ovl_splice_write(). Fixes: 82a763e61e2b ("ovl: simplify file splice") Reported-and-tested-by: syzbot+579885d1a9a833336209@syzkaller.appspotmail.com Signed-off-by: Miklos Szeredi --- fs/overlayfs/file.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 4d53d3b7e5fe..d081faa55e83 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -392,6 +392,51 @@ out_unlock: return ret; } +/* + * Calling iter_file_splice_write() directly from overlay's f_op may deadlock + * due to lock order inversion between pipe->mutex in iter_file_splice_write() + * and file_start_write(real.file) in ovl_write_iter(). + * + * So do everything ovl_write_iter() does and call iter_file_splice_write() on + * the real file. + */ +static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) +{ + struct fd real; + const struct cred *old_cred; + struct inode *inode = file_inode(out); + struct inode *realinode = ovl_inode_real(inode); + ssize_t ret; + + inode_lock(inode); + /* Update mode */ + ovl_copyattr(realinode, inode); + ret = file_remove_privs(out); + if (ret) + goto out_unlock; + + ret = ovl_real_fdget(out, &real); + if (ret) + goto out_unlock; + + old_cred = ovl_override_creds(inode->i_sb); + file_start_write(real.file); + + ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); + + file_end_write(real.file); + /* Update size */ + ovl_copyattr(realinode, inode); + revert_creds(old_cred); + fdput(real); + +out_unlock: + inode_unlock(inode); + + return ret; +} + static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fd real; @@ -603,7 +648,7 @@ const struct file_operations ovl_file_operations = { .fadvise = ovl_fadvise, .flush = ovl_flush, .splice_read = generic_file_splice_read, - .splice_write = iter_file_splice_write, + .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, .remap_file_range = ovl_remap_file_range, -- cgit From 580c610429b3994e8db24418927747cf28443cde Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Aug 2021 10:03:12 +0200 Subject: ovl: fix uninitialized pointer read in ovl_lookup_real_one() One error path can result in release_dentry_name_snapshot() being called before "name" was initialized by take_dentry_name_snapshot(). Fix by moving the release_dentry_name_snapshot() to immediately after the only use. Reported-by: Colin Ian King Signed-off-by: Miklos Szeredi --- fs/overlayfs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 41ebf52f1bbc..ebde05c9cf62 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -392,6 +392,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); this = lookup_one_len(name.name.name, connected, name.name.len); + release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { goto fail; @@ -406,7 +407,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, } out: - release_dentry_name_snapshot(&name); dput(parent); inode_unlock(dir); return this; -- cgit From 427215d85e8d1476da1a86b8d67aceb485eb3631 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 9 Aug 2021 10:19:47 +0200 Subject: ovl: prevent private clone if bind mount is not allowed Add the following checks from __do_loopback() to clone_private_mount() as well: - verify that the mount is in the current namespace - verify that there are no locked children Reported-by: Alois Wohlschlager Fixes: c771d683a62e ("vfs: introduce clone_private_mount()") Cc: # v3.18 Signed-off-by: Miklos Szeredi --- fs/namespace.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index ab4174a3c802..f79d9471cb76 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1938,6 +1938,20 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } +static bool has_locked_children(struct mount *mnt, struct dentry *dentry) +{ + struct mount *child; + + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { + if (!is_subdir(child->mnt_mountpoint, dentry)) + continue; + + if (child->mnt.mnt_flags & MNT_LOCKED) + return true; + } + return false; +} + /** * clone_private_mount - create a private clone of a path * @path: path to clone @@ -1953,10 +1967,19 @@ struct vfsmount *clone_private_mount(const struct path *path) struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; + down_read(&namespace_sem); if (IS_MNT_UNBINDABLE(old_mnt)) - return ERR_PTR(-EINVAL); + goto invalid; + + if (!check_mnt(old_mnt)) + goto invalid; + + if (has_locked_children(old_mnt, path->dentry)) + goto invalid; new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); + up_read(&namespace_sem); + if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); @@ -1964,6 +1987,10 @@ struct vfsmount *clone_private_mount(const struct path *path) new_mnt->mnt_ns = MNT_NS_INTERNAL; return &new_mnt->mnt; + +invalid: + up_read(&namespace_sem); + return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -2315,19 +2342,6 @@ static int do_change_type(struct path *path, int ms_flags) return err; } -static bool has_locked_children(struct mount *mnt, struct dentry *dentry) -{ - struct mount *child; - list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { - if (!is_subdir(child->mnt_mountpoint, dentry)) - continue; - - if (child->mnt.mnt_flags & MNT_LOCKED) - return true; - } - return false; -} - static struct mount *__do_loopback(struct path *old_path, int recurse) { struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); -- cgit From a2baf4e8bb0f306fbed7b5e6197c02896a638ab5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 9 Aug 2021 18:04:13 -0700 Subject: bpf: Fix potentially incorrect results with bpf_get_local_storage() Commit b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") fixed a bug for bpf_get_local_storage() helper so different tasks won't mess up with each other's percpu local storage. The percpu data contains 8 slots so it can hold up to 8 contexts (same or different tasks), for 8 different program runs, at the same time. This in general is sufficient. But our internal testing showed the following warning multiple times: [...] warning: WARNING: CPU: 13 PID: 41661 at include/linux/bpf-cgroup.h:193 __cgroup_bpf_run_filter_sock_ops+0x13e/0x180 RIP: 0010:__cgroup_bpf_run_filter_sock_ops+0x13e/0x180 tcp_call_bpf.constprop.99+0x93/0xc0 tcp_conn_request+0x41e/0xa50 ? tcp_rcv_state_process+0x203/0xe00 tcp_rcv_state_process+0x203/0xe00 ? sk_filter_trim_cap+0xbc/0x210 ? tcp_v6_inbound_md5_hash.constprop.41+0x44/0x160 tcp_v6_do_rcv+0x181/0x3e0 tcp_v6_rcv+0xc65/0xcb0 ip6_protocol_deliver_rcu+0xbd/0x450 ip6_input_finish+0x11/0x20 ip6_input+0xb5/0xc0 ip6_sublist_rcv_finish+0x37/0x50 ip6_sublist_rcv+0x1dc/0x270 ipv6_list_rcv+0x113/0x140 __netif_receive_skb_list_core+0x1a0/0x210 netif_receive_skb_list_internal+0x186/0x2a0 gro_normal_list.part.170+0x19/0x40 napi_complete_done+0x65/0x150 mlx5e_napi_poll+0x1ae/0x680 __napi_poll+0x25/0x120 net_rx_action+0x11e/0x280 __do_softirq+0xbb/0x271 irq_exit_rcu+0x97/0xa0 common_interrupt+0x7f/0xa0 asm_common_interrupt+0x1e/0x40 RIP: 0010:bpf_prog_1835a9241238291a_tw_egress+0x5/0xbac ? __cgroup_bpf_run_filter_skb+0x378/0x4e0 ? do_softirq+0x34/0x70 ? ip6_finish_output2+0x266/0x590 ? ip6_finish_output+0x66/0xa0 ? ip6_output+0x6c/0x130 ? ip6_xmit+0x279/0x550 ? ip6_dst_check+0x61/0xd0 [...] Using drgn [0] to dump the percpu buffer contents showed that on this CPU slot 0 is still available, but slots 1-7 are occupied and those tasks in slots 1-7 mostly don't exist any more. So we might have issues in bpf_cgroup_storage_unset(). Further debugging confirmed that there is a bug in bpf_cgroup_storage_unset(). Currently, it tries to unset "current" slot with searching from the start. So the following sequence is possible: 1. A task is running and claims slot 0 2. Running BPF program is done, and it checked slot 0 has the "task" and ready to reset it to NULL (not yet). 3. An interrupt happens, another BPF program runs and it claims slot 1 with the *same* task. 4. The unset() in interrupt context releases slot 0 since it matches "task". 5. Interrupt is done, the task in process context reset slot 0. At the end, slot 1 is not reset and the same process can continue to occupy slots 2-7 and finally, when the above step 1-5 is repeated again, step 3 BPF program won't be able to claim an empty slot and a warning will be issued. To fix the issue, for unset() function, we should traverse from the last slot to the first. This way, the above issue can be avoided. The same reverse traversal should also be done in bpf_get_local_storage() helper itself. Otherwise, incorrect local storage may be returned to BPF program. [0] https://github.com/osandov/drgn Fixes: b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210810010413.1976277-1-yhs@fb.com --- include/linux/bpf-cgroup.h | 4 ++-- kernel/bpf/helpers.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8b77d08d4b47..6c9b10d82c80 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -201,8 +201,8 @@ static inline void bpf_cgroup_storage_unset(void) { int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) continue; this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0b04553e8c44..7a97b2f4747d 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -397,8 +397,8 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) void *ptr; int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) continue; storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); -- cgit From c34f674c8875235725c3ef86147a627f165d23b4 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 00:59:12 +0200 Subject: net: dsa: microchip: Fix ksz_read64() ksz_read64() currently does some dubious byte-swapping on the two halves of a 64-bit register, and then only returns the high bits. Replace this with a straightforward expression. Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 2e6bfd333f50..6afbb41ad39e 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -205,12 +205,8 @@ static inline int ksz_read64(struct ksz_device *dev, u32 reg, u64 *val) int ret; ret = regmap_bulk_read(dev->regmap[2], reg, value, 2); - if (!ret) { - /* Ick! ToDo: Add 64bit R/W to regmap on 32bit systems */ - value[0] = swab32(value[0]); - value[1] = swab32(value[1]); - *val = swab64((u64)*value); - } + if (!ret) + *val = (u64)value[0] << 32 | value[1]; return ret; } -- cgit From ef3b02a1d79b691f9a354c4903cf1e6917e315f9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 00:59:28 +0200 Subject: net: dsa: microchip: ksz8795: Fix PVID tag insertion ksz8795 has never actually enabled PVID tag insertion, and it also programmed the PVID incorrectly. To fix this: * Allow tag insertion to be controlled per ingress port. On most chips, set bit 2 in Global Control 19. On KSZ88x3 this control flag doesn't exist. * When adding a PVID: - Set the appropriate register bits to enable tag insertion on egress at every other port if this was the packet's ingress port. - Mask *out* the VID from the default tag, before or-ing in the new PVID. * When removing a PVID: - Clear the same control bits to disable tag insertion. - Don't update the default tag. This wasn't doing anything useful. Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 26 +++++++++++++++++++------- drivers/net/dsa/microchip/ksz8795_reg.h | 4 ++++ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 560f6843bb65..b788db829d07 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1124,6 +1124,16 @@ static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag, return 0; } +static void ksz8_port_enable_pvid(struct ksz_device *dev, int port, bool state) +{ + if (ksz_is_ksz88x3(dev)) { + ksz_cfg(dev, REG_SW_INSERT_SRC_PVID, + 0x03 << (4 - 2 * port), state); + } else { + ksz_pwrite8(dev, port, REG_PORT_CTRL_12, state ? 0x0f : 0x00); + } +} + static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack) @@ -1160,9 +1170,11 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, u16 vid; ksz_pread16(dev, port, REG_PORT_CTRL_VID, &vid); - vid &= 0xfff; + vid &= ~VLAN_VID_MASK; vid |= new_pvid; ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, vid); + + ksz8_port_enable_pvid(dev, port, true); } return 0; @@ -1173,7 +1185,7 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; - u16 data, pvid, new_pvid = 0; + u16 data, pvid; u8 fid, member, valid; if (ksz_is_ksz88x3(dev)) @@ -1195,14 +1207,11 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, valid = 0; } - if (pvid == vlan->vid) - new_pvid = 1; - ksz8_to_vlan(dev, fid, member, valid, &data); ksz8_w_vlan_table(dev, vlan->vid, data); - if (new_pvid != pvid) - ksz_pwrite16(dev, port, REG_PORT_CTRL_VID, pvid); + if (pvid == vlan->vid) + ksz8_port_enable_pvid(dev, port, false); return 0; } @@ -1435,6 +1444,9 @@ static int ksz8_setup(struct dsa_switch *ds) ksz_cfg(dev, S_MIRROR_CTRL, SW_MIRROR_RX_TX, false); + if (!ksz_is_ksz88x3(dev)) + ksz_cfg(dev, REG_SW_CTRL_19, SW_INS_TAG_ENABLE, true); + /* set broadcast storm protection 10% rate */ regmap_update_bits(dev->regmap[1], S_REPLACE_VID_CTRL, BROADCAST_STORM_RATE, diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index a32355624f31..6b40bc25f7ff 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -631,6 +631,10 @@ #define REG_PORT_4_OUT_RATE_3 0xEE #define REG_PORT_5_OUT_RATE_3 0xFE +/* 88x3 specific */ + +#define REG_SW_INSERT_SRC_PVID 0xC2 + /* PME */ #define SW_PME_OUTPUT_ENABLE BIT(1) -- cgit From 8f4f58f88fe0d9bd591f21f53de7dbd42baeb3fa Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 00:59:37 +0200 Subject: net: dsa: microchip: ksz8795: Reject unsupported VLAN configuration The switches supported by ksz8795 only have a per-port flag for Tag Removal. This means it is not possible to support both tagged and untagged VLANs on the same port. Reject attempts to add a VLAN that requires the flag to be changed, unless there are no VLANs currently configured. VID 0 is excluded from this check since it is untagged regardless of the state of the flag. On the CPU port we could support tagged and untagged VLANs at the same time. This will be enabled by a later patch. Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 27 ++++++++++++++++++++++++++- drivers/net/dsa/microchip/ksz_common.h | 1 + 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index b788db829d07..29a18058d1ee 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1140,13 +1140,38 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, { bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; + struct ksz_port *p = &dev->ports[port]; u16 data, new_pvid = 0; u8 fid, member, valid; if (ksz_is_ksz88x3(dev)) return -ENOTSUPP; - ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); + /* If a VLAN is added with untagged flag different from the + * port's Remove Tag flag, we need to change the latter. + * Ignore VID 0, which is always untagged. + */ + if (untagged != p->remove_tag && vlan->vid != 0) { + unsigned int vid; + + /* Reject attempts to add a VLAN that requires the + * Remove Tag flag to be changed, unless there are no + * other VLANs currently configured. + */ + for (vid = 1; vid < dev->num_vlans; ++vid) { + /* Skip the VID we are going to add or reconfigure */ + if (vid == vlan->vid) + continue; + + ksz8_from_vlan(dev, dev->vlan_cache[vid].table[0], + &fid, &member, &valid); + if (valid && (member & BIT(port))) + return -EINVAL; + } + + ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); + p->remove_tag = untagged; + } ksz8_r_vlan_table(dev, vlan->vid, &data); ksz8_from_vlan(dev, data, &fid, &member, &valid); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 6afbb41ad39e..1597c63988b4 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -27,6 +27,7 @@ struct ksz_port_mib { struct ksz_port { u16 member; u16 vid_member; + bool remove_tag; /* Remove Tag flag set, for ksz8795 only */ int stp_state; struct phy_device phydev; -- cgit From af01754f9e3c553a2ee63b4693c79a3956e230ab Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 00:59:47 +0200 Subject: net: dsa: microchip: ksz8795: Fix VLAN untagged flag change on deletion When a VLAN is deleted from a port, the flags in struct switchdev_obj_port_vlan are always 0. ksz8_port_vlan_del() copies the BRIDGE_VLAN_INFO_UNTAGGED flag to the port's Tag Removal flag, and therefore always clears it. In case there are multiple VLANs configured as untagged on this port - which seems useless, but is allowed - deleting one of them changes the remaining VLANs to be tagged. It's only ever necessary to change this flag when a VLAN is added to the port, so leave it unchanged in ksz8_port_vlan_del(). Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 29a18058d1ee..af3744d8b6cf 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1208,7 +1208,6 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; struct ksz_device *dev = ds->priv; u16 data, pvid; u8 fid, member, valid; @@ -1219,8 +1218,6 @@ static int ksz8_port_vlan_del(struct dsa_switch *ds, int port, ksz_pread16(dev, port, REG_PORT_CTRL_VID, &pvid); pvid = pvid & 0xFFF; - ksz_port_cfg(dev, port, P_TAG_CTRL, PORT_REMOVE_TAG, untagged); - ksz8_r_vlan_table(dev, vlan->vid, &data); ksz8_from_vlan(dev, data, &fid, &member, &valid); -- cgit From 9130c2d30c17846287b803a9803106318cbe5266 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 00:59:57 +0200 Subject: net: dsa: microchip: ksz8795: Use software untagging on CPU port On the CPU port, we can support both tagged and untagged VLANs at the same time by doing any necessary untagging in software rather than hardware. To enable that, keep the CPU port's Remove Tag flag cleared and set the dsa_switch::untag_bridge_pvid flag. Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index af3744d8b6cf..8bcef4b2dd6f 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1150,8 +1150,10 @@ static int ksz8_port_vlan_add(struct dsa_switch *ds, int port, /* If a VLAN is added with untagged flag different from the * port's Remove Tag flag, we need to change the latter. * Ignore VID 0, which is always untagged. + * Ignore CPU port, which will always be tagged. */ - if (untagged != p->remove_tag && vlan->vid != 0) { + if (untagged != p->remove_tag && vlan->vid != 0 && + port != dev->cpu_port) { unsigned int vid; /* Reject attempts to add a VLAN that requires the @@ -1751,6 +1753,11 @@ static int ksz8_switch_init(struct ksz_device *dev) /* set the real number of ports */ dev->ds->num_ports = dev->port_cnt; + /* We rely on software untagging on the CPU port, so that we + * can support both tagged and untagged VLANs + */ + dev->ds->untag_bridge_pvid = true; + return 0; } -- cgit From 164844135a3f215d3018ee9d6875336beb942413 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 01:00:06 +0200 Subject: net: dsa: microchip: ksz8795: Fix VLAN filtering Currently ksz8_port_vlan_filtering() sets or clears the VLAN Enable hardware flag. That controls discarding of packets with a VID that has not been enabled for any port on the switch. Since it is a global flag, set the dsa_switch::vlan_filtering_is_global flag so that the DSA core understands this can't be controlled per port. When VLAN filtering is enabled, the switch should also discard packets with a VID that's not enabled on the ingress port. Set or clear each external port's VLAN Ingress Filter flag in ksz8_port_vlan_filtering() to make that happen. Fixes: e66f840c08a2 ("net: dsa: ksz: Add Microchip KSZ8795 DSA driver") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index 8bcef4b2dd6f..b1a3763c97de 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -1119,8 +1119,14 @@ static int ksz8_port_vlan_filtering(struct dsa_switch *ds, int port, bool flag, if (ksz_is_ksz88x3(dev)) return -ENOTSUPP; + /* Discard packets with VID not enabled on the switch */ ksz_cfg(dev, S_MIRROR_CTRL, SW_VLAN_ENABLE, flag); + /* Discard packets with VID not enabled on the ingress port */ + for (port = 0; port < dev->phy_port_cnt; ++port) + ksz_port_cfg(dev, port, REG_PORT_CTRL_2, PORT_INGRESS_FILTER, + flag); + return 0; } @@ -1758,6 +1764,11 @@ static int ksz8_switch_init(struct ksz_device *dev) */ dev->ds->untag_bridge_pvid = true; + /* VLAN filtering is partly controlled by the global VLAN + * Enable flag + */ + dev->ds->vlan_filtering_is_global = true; + return 0; } -- cgit From 411d466d94a6b16a20c8b552e403b7e8ce2397a2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 10 Aug 2021 01:00:15 +0200 Subject: net: dsa: microchip: ksz8795: Don't use phy_port_cnt in VLAN table lookup The magic number 4 in VLAN table lookup was the number of entries we can read and write at once. Using phy_port_cnt here doesn't make sense and presumably broke VLAN filtering for 3-port switches. Change it back to 4. Fixes: 4ce2a984abd8 ("net: dsa: microchip: ksz8795: use phy_port_cnt ...") Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz8795.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index b1a3763c97de..c5142f86a3c7 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -687,8 +687,8 @@ static void ksz8_r_vlan_entries(struct ksz_device *dev, u16 addr) shifts = ksz8->shifts; ksz8_r_table(dev, TABLE_VLAN, addr, &data); - addr *= dev->phy_port_cnt; - for (i = 0; i < dev->phy_port_cnt; i++) { + addr *= 4; + for (i = 0; i < 4; i++) { dev->vlan_cache[addr + i].table[0] = (u16)data; data >>= shifts[VLAN_TABLE]; } @@ -702,7 +702,7 @@ static void ksz8_r_vlan_table(struct ksz_device *dev, u16 vid, u16 *vlan) u64 buf; data = (u16 *)&buf; - addr = vid / dev->phy_port_cnt; + addr = vid / 4; index = vid & 3; ksz8_r_table(dev, TABLE_VLAN, addr, &buf); *vlan = data[index]; @@ -716,7 +716,7 @@ static void ksz8_w_vlan_table(struct ksz_device *dev, u16 vid, u16 vlan) u64 buf; data = (u16 *)&buf; - addr = vid / dev->phy_port_cnt; + addr = vid / 4; index = vid & 3; ksz8_r_table(dev, TABLE_VLAN, addr, &buf); data[index] = vlan; -- cgit From 438553958ba19296663c6d6583d208dfb6792830 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:40 +0200 Subject: PCI/MSI: Enable and mask MSI-X early The ordering of MSI-X enable in hardware is dysfunctional: 1) MSI-X is disabled in the control register 2) Various setup functions 3) pci_msi_setup_msi_irqs() is invoked which ends up accessing the MSI-X table entries 4) MSI-X is enabled and masked in the control register with the comment that enabling is required for some hardware to access the MSI-X table Step #4 obviously contradicts #3. The history of this is an issue with the NIU hardware. When #4 was introduced the table access actually happened in msix_program_entries() which was invoked after enabling and masking MSI-X. This was changed in commit d71d6432e105 ("PCI/MSI: Kill redundant call of irq_set_msi_desc() for MSI-X interrupts") which removed the table write from msix_program_entries(). Interestingly enough nobody noticed and either NIU still works or it did not get any testing with a kernel 3.19 or later. Nevertheless this is inconsistent and there is no reason why MSI-X can't be enabled and masked in the control register early on, i.e. move step #4 above to step #1. This preserves the NIU workaround and has no side effects on other hardware. Fixes: d71d6432e105 ("PCI/MSI: Kill redundant call of irq_set_msi_desc() for MSI-X interrupts") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Ashok Raj Reviewed-by: Marc Zyngier Acked-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.344136412@linutronix.de --- drivers/pci/msi.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9232255c8515..5d39ed828085 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -772,18 +772,25 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, u16 control; void __iomem *base; - /* Ensure MSI-X is disabled while it is set up */ - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + /* + * Some devices require MSI-X to be enabled before the MSI-X + * registers can be accessed. Mask all the vectors to prevent + * interrupts coming in before they're fully set up. + */ + pci_msix_clear_and_set_ctrl(dev, 0, PCI_MSIX_FLAGS_MASKALL | + PCI_MSIX_FLAGS_ENABLE); pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); /* Request & Map MSI-X table region */ base = msix_map_region(dev, msix_table_size(control)); - if (!base) - return -ENOMEM; + if (!base) { + ret = -ENOMEM; + goto out_disable; + } ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) - return ret; + goto out_disable; ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX); if (ret) @@ -794,14 +801,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, if (ret) goto out_free; - /* - * Some devices require MSI-X to be enabled before we can touch the - * MSI-X registers. We need to mask all the vectors to prevent - * interrupts coming in before they're fully set up. - */ - pci_msix_clear_and_set_ctrl(dev, 0, - PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE); - msix_program_entries(dev, entries); ret = populate_msi_sysfs(dev); @@ -836,6 +835,9 @@ out_avail: out_free: free_msi_irqs(dev); +out_disable: + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + return ret; } -- cgit From 7d5ec3d3612396dc6d4b76366d20ab9fc06f399f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:41 +0200 Subject: PCI/MSI: Mask all unused MSI-X entries When MSI-X is enabled the ordering of calls is: msix_map_region(); msix_setup_entries(); pci_msi_setup_msi_irqs(); msix_program_entries(); This has a few interesting issues: 1) msix_setup_entries() allocates the MSI descriptors and initializes them except for the msi_desc:masked member which is left zero initialized. 2) pci_msi_setup_msi_irqs() allocates the interrupt descriptors and sets up the MSI interrupts which ends up in pci_write_msi_msg() unless the interrupt chip provides its own irq_write_msi_msg() function. 3) msix_program_entries() does not do what the name suggests. It solely updates the entries array (if not NULL) and initializes the masked member for each MSI descriptor by reading the hardware state and then masks the entry. Obviously this has some issues: 1) The uninitialized masked member of msi_desc prevents the enforcement of masking the entry in pci_write_msi_msg() depending on the cached masked bit. Aside of that half initialized data is a NONO in general 2) msix_program_entries() only ensures that the actually allocated entries are masked. This is wrong as experimentation with crash testing and crash kernel kexec has shown. This limited testing unearthed that when the production kernel had more entries in use and unmasked when it crashed and the crash kernel allocated a smaller amount of entries, then a full scan of all entries found unmasked entries which were in use in the production kernel. This is obviously a device or emulation issue as the device reset should mask all MSI-X table entries, but obviously that's just part of the paper specification. Cure this by: 1) Masking all table entries in hardware 2) Initializing msi_desc::masked in msix_setup_entries() 3) Removing the mask dance in msix_program_entries() 4) Renaming msix_program_entries() to msix_update_entries() to reflect the purpose of that function. As the masking of unused entries has never been done the Fixes tag refers to a commit in: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Fixes: f036d4ea5fa7 ("[PATCH] ia32 Message Signalled Interrupt support") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Acked-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.403833459@linutronix.de --- drivers/pci/msi.c | 45 +++++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 5d39ed828085..57c9ec9b976b 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -691,6 +691,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, { struct irq_affinity_desc *curmsk, *masks = NULL; struct msi_desc *entry; + void __iomem *addr; int ret, i; int vec_count = pci_msix_vec_count(dev); @@ -711,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.is_msix = 1; entry->msi_attrib.is_64 = 1; + if (entries) entry->msi_attrib.entry_nr = entries[i].entry; else @@ -722,6 +724,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, entry->msi_attrib.default_irq = dev->irq; entry->mask_base = base; + addr = pci_msix_desc_addr(entry); + if (addr) + entry->masked = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); + list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); if (masks) curmsk++; @@ -732,26 +738,25 @@ out: return ret; } -static void msix_program_entries(struct pci_dev *dev, - struct msix_entry *entries) +static void msix_update_entries(struct pci_dev *dev, struct msix_entry *entries) { struct msi_desc *entry; - int i = 0; - void __iomem *desc_addr; for_each_pci_msi_entry(entry, dev) { - if (entries) - entries[i++].vector = entry->irq; + if (entries) { + entries->vector = entry->irq; + entries++; + } + } +} - desc_addr = pci_msix_desc_addr(entry); - if (desc_addr) - entry->masked = readl(desc_addr + - PCI_MSIX_ENTRY_VECTOR_CTRL); - else - entry->masked = 0; +static void msix_mask_all(void __iomem *base, int tsize) +{ + u32 ctrl = PCI_MSIX_ENTRY_CTRL_MASKBIT; + int i; - msix_mask_irq(entry, 1); - } + for (i = 0; i < tsize; i++, base += PCI_MSIX_ENTRY_SIZE) + writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); } /** @@ -768,9 +773,9 @@ static void msix_program_entries(struct pci_dev *dev, static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd) { - int ret; - u16 control; void __iomem *base; + int ret, tsize; + u16 control; /* * Some devices require MSI-X to be enabled before the MSI-X @@ -782,12 +787,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); /* Request & Map MSI-X table region */ - base = msix_map_region(dev, msix_table_size(control)); + tsize = msix_table_size(control); + base = msix_map_region(dev, tsize); if (!base) { ret = -ENOMEM; goto out_disable; } + /* Ensure that all table entries are masked. */ + msix_mask_all(base, tsize); + ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) goto out_disable; @@ -801,7 +810,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, if (ret) goto out_free; - msix_program_entries(dev, entries); + msix_update_entries(dev, entries); ret = populate_msi_sysfs(dev); if (ret) -- cgit From da181dc974ad667579baece33c2c8d2d1e4558d5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:42 +0200 Subject: PCI/MSI: Enforce that MSI-X table entry is masked for update The specification (PCIe r5.0, sec 6.1.4.5) states: For MSI-X, a function is permitted to cache Address and Data values from unmasked MSI-X Table entries. However, anytime software unmasks a currently masked MSI-X Table entry either by clearing its Mask bit or by clearing the Function Mask bit, the function must update any Address or Data values that it cached from that entry. If software changes the Address or Data value of an entry while the entry is unmasked, the result is undefined. The Linux kernel's MSI-X support never enforced that the entry is masked before the entry is modified hence the Fixes tag refers to a commit in: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Enforce the entry to be masked across the update. There is no point in enforcing this to be handled at all possible call sites as this is just pointless code duplication and the common update function is the obvious place to enforce this. Fixes: f036d4ea5fa7 ("[PATCH] ia32 Message Signalled Interrupt support") Reported-by: Kevin Tian Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Acked-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.462096385@linutronix.de --- drivers/pci/msi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 57c9ec9b976b..7ee1ac47caa7 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -289,13 +289,28 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) /* Don't touch the hardware now */ } else if (entry->msi_attrib.is_msix) { void __iomem *base = pci_msix_desc_addr(entry); + bool unmasked = !(entry->masked & PCI_MSIX_ENTRY_CTRL_MASKBIT); if (!base) goto skip; + /* + * The specification mandates that the entry is masked + * when the message is modified: + * + * "If software changes the Address or Data value of an + * entry while the entry is unmasked, the result is + * undefined." + */ + if (unmasked) + __pci_msix_desc_mask_irq(entry, PCI_MSIX_ENTRY_CTRL_MASKBIT); + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); writel(msg->data, base + PCI_MSIX_ENTRY_DATA); + + if (unmasked) + __pci_msix_desc_mask_irq(entry, 0); } else { int pos = dev->msi_cap; u16 msgctl; -- cgit From b9255a7cb51754e8d2645b65dd31805e282b4f3e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:43 +0200 Subject: PCI/MSI: Enforce MSI[X] entry updates to be visible Nothing enforces the posted writes to be visible when the function returns. Flush them even if the flush might be redundant when the entry is masked already as the unmask will flush as well. This is either setup or a rare affinity change event so the extra flush is not the end of the world. While this is more a theoretical issue especially the logic in the X86 specific msi_set_affinity() function relies on the assumption that the update has reached the hardware when the function returns. Again, as this never has been enforced the Fixes tag refers to a commit in: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Fixes: f036d4ea5fa7 ("[PATCH] ia32 Message Signalled Interrupt support") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Acked-by: Bjorn Helgaas Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.515188147@linutronix.de --- drivers/pci/msi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 7ee1ac47caa7..434c7041b176 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -311,6 +311,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) if (unmasked) __pci_msix_desc_mask_irq(entry, 0); + + /* Ensure that the writes are visible in the device */ + readl(base + PCI_MSIX_ENTRY_DATA); } else { int pos = dev->msi_cap; u16 msgctl; @@ -331,6 +334,8 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data); } + /* Ensure that the writes are visible in the device */ + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); } skip: -- cgit From 361fd37397f77578735907341579397d5bed0a2d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:44 +0200 Subject: PCI/MSI: Do not set invalid bits in MSI mask msi_mask_irq() takes a mask and a flags argument. The mask argument is used to mask out bits from the cached mask and the flags argument to set bits. Some places invoke it with a flags argument which sets bits which are not used by the device, i.e. when the device supports up to 8 vectors a full unmask in some places sets the mask to 0xFFFFFF00. While devices probably do not care, it's still bad practice. Fixes: 7ba1930db02f ("PCI MSI: Unmask MSI if setup failed") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.568173099@linutronix.de --- drivers/pci/msi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 434c7041b176..e27ac6b5b8d5 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -656,21 +656,21 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, /* Configure MSI capability structure */ ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI); if (ret) { - msi_mask_irq(entry, mask, ~mask); + msi_mask_irq(entry, mask, 0); free_msi_irqs(dev); return ret; } ret = msi_verify_entries(dev); if (ret) { - msi_mask_irq(entry, mask, ~mask); + msi_mask_irq(entry, mask, 0); free_msi_irqs(dev); return ret; } ret = populate_msi_sysfs(dev); if (ret) { - msi_mask_irq(entry, mask, ~mask); + msi_mask_irq(entry, mask, 0); free_msi_irqs(dev); return ret; } @@ -962,7 +962,7 @@ static void pci_msi_shutdown(struct pci_dev *dev) /* Return the device with MSI unmasked as initial states */ mask = msi_mask(desc->msi_attrib.multi_cap); /* Keep cached state to be restored */ - __pci_msi_desc_mask_irq(desc, mask, ~mask); + __pci_msi_desc_mask_irq(desc, mask, 0); /* Restore dev->irq to its default pin-assertion IRQ */ dev->irq = desc->msi_attrib.default_irq; -- cgit From 689e6b5351573c38ccf92a0dd8b3e2c2241e4aff Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:45 +0200 Subject: PCI/MSI: Correct misleading comments The comments about preserving the cached state in pci_msi[x]_shutdown() are misleading as the MSI descriptors are freed right after those functions return. So there is nothing to restore. Preparatory change. Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.621609423@linutronix.de --- drivers/pci/msi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index e27ac6b5b8d5..b3f58074e8d4 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -961,7 +961,6 @@ static void pci_msi_shutdown(struct pci_dev *dev) /* Return the device with MSI unmasked as initial states */ mask = msi_mask(desc->msi_attrib.multi_cap); - /* Keep cached state to be restored */ __pci_msi_desc_mask_irq(desc, mask, 0); /* Restore dev->irq to its default pin-assertion IRQ */ @@ -1047,10 +1046,8 @@ static void pci_msix_shutdown(struct pci_dev *dev) } /* Return the device with MSI-X masked as initial states */ - for_each_pci_msi_entry(entry, dev) { - /* Keep cached states to be restored */ + for_each_pci_msi_entry(entry, dev) __pci_msix_desc_mask_irq(entry, 1); - } pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); pci_intx_for_msi(dev, 1); -- cgit From d28d4ad2a1aef27458b3383725bb179beb8d015c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:46 +0200 Subject: PCI/MSI: Use msi_mask_irq() in pci_msi_shutdown() No point in using the raw write function from shutdown. Preparatory change to introduce proper serialization for the msi_desc::masked cache. Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.674391354@linutronix.de --- drivers/pci/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index b3f58074e8d4..f0f7026b7ac0 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -961,7 +961,7 @@ static void pci_msi_shutdown(struct pci_dev *dev) /* Return the device with MSI unmasked as initial states */ mask = msi_mask(desc->msi_attrib.multi_cap); - __pci_msi_desc_mask_irq(desc, mask, 0); + msi_mask_irq(desc, mask, 0); /* Restore dev->irq to its default pin-assertion IRQ */ dev->irq = desc->msi_attrib.default_irq; -- cgit From 77e89afc25f30abd56e76a809ee2884d7c1b63ce Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:47 +0200 Subject: PCI/MSI: Protect msi_desc::masked for multi-MSI Multi-MSI uses a single MSI descriptor and there is a single mask register when the device supports per vector masking. To avoid reading back the mask register the value is cached in the MSI descriptor and updates are done by clearing and setting bits in the cache and writing it to the device. But nothing protects msi_desc::masked and the mask register from being modified concurrently on two different CPUs for two different Linux interrupts which belong to the same multi-MSI descriptor. Add a lock to struct device and protect any operation on the mask and the mask register with it. This makes the update of msi_desc::masked unconditional, but there is no place which requires a modification of the hardware register without updating the masked cache. msi_mask_irq() is now an empty wrapper which will be cleaned up in follow up changes. The problem goes way back to the initial support of multi-MSI, but picking the commit which introduced the mask cache is a valid cut off point (2.6.30). Fixes: f2440d9acbe8 ("PCI MSI: Refactor interrupt masking code") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.726833414@linutronix.de --- drivers/base/core.c | 1 + drivers/pci/msi.c | 19 ++++++++++--------- include/linux/device.h | 1 + include/linux/msi.h | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index f6360490a4a3..6c0ef9d55a34 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2837,6 +2837,7 @@ void device_initialize(struct device *dev) device_pm_init(dev); set_dev_node(dev, -1); #ifdef CONFIG_GENERIC_MSI_IRQ + raw_spin_lock_init(&dev->msi_lock); INIT_LIST_HEAD(&dev->msi_list); #endif INIT_LIST_HEAD(&dev->links.consumers); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index f0f7026b7ac0..e5e75331b415 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -143,24 +143,25 @@ static inline __attribute_const__ u32 msi_mask(unsigned x) * reliably as devices without an INTx disable bit will then generate a * level IRQ which will never be cleared. */ -u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) +void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - u32 mask_bits = desc->masked; + raw_spinlock_t *lock = &desc->dev->msi_lock; + unsigned long flags; if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit) - return 0; + return; - mask_bits &= ~mask; - mask_bits |= flag; + raw_spin_lock_irqsave(lock, flags); + desc->masked &= ~mask; + desc->masked |= flag; pci_write_config_dword(msi_desc_to_pci_dev(desc), desc->mask_pos, - mask_bits); - - return mask_bits; + desc->masked); + raw_spin_unlock_irqrestore(lock, flags); } static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) { - desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag); + __pci_msi_desc_mask_irq(desc, mask, flag); } static void __iomem *pci_msix_desc_addr(struct msi_desc *desc) diff --git a/include/linux/device.h b/include/linux/device.h index 59940f1744c1..e53aa5065f58 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -506,6 +506,7 @@ struct device { struct dev_pin_info *pins; #endif #ifdef CONFIG_GENERIC_MSI_IRQ + raw_spinlock_t msi_lock; struct list_head msi_list; #endif #ifdef CONFIG_DMA_OPS diff --git a/include/linux/msi.h b/include/linux/msi.h index 6aff469e511d..e8bdcb83172b 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -233,7 +233,7 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag); -u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); +void __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag); void pci_msi_mask_irq(struct irq_data *data); void pci_msi_unmask_irq(struct irq_data *data); -- cgit From 826da771291fc25a428e871f9e7fb465e390f852 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:48 +0200 Subject: genirq: Provide IRQCHIP_AFFINITY_PRE_STARTUP X86 IO/APIC and MSI interrupts (when used without interrupts remapping) require that the affinity setup on startup is done before the interrupt is enabled for the first time as the non-remapped operation mode cannot safely migrate enabled interrupts from arbitrary contexts. Provide a new irq chip flag which allows affected hardware to request this. This has to be opt-in because there have been reports in the past that some interrupt chips cannot handle affinity setting before startup. Fixes: 18404756765c ("genirq: Expose default irq affinity mask (take 3)") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.779791738@linutronix.de --- include/linux/irq.h | 2 ++ kernel/irq/chip.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index 8e9a9ae471a6..c8293c817646 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -569,6 +569,7 @@ struct irq_chip { * IRQCHIP_SUPPORTS_NMI: Chip can deliver NMIs, only for root irqchips * IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND: Invokes __enable_irq()/__disable_irq() for wake irqs * in the suspend path if they are in disabled state + * IRQCHIP_AFFINITY_PRE_STARTUP: Default affinity update before startup */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), @@ -581,6 +582,7 @@ enum { IRQCHIP_SUPPORTS_LEVEL_MSI = (1 << 7), IRQCHIP_SUPPORTS_NMI = (1 << 8), IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND = (1 << 9), + IRQCHIP_AFFINITY_PRE_STARTUP = (1 << 10), }; #include diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 7f04c7d8296e..a98bcfc4be7b 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -265,8 +265,11 @@ int irq_startup(struct irq_desc *desc, bool resend, bool force) } else { switch (__irq_startup_managed(desc, aff, force)) { case IRQ_STARTUP_NORMAL: + if (d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP) + irq_setup_affinity(desc); ret = __irq_startup(desc); - irq_setup_affinity(desc); + if (!(d->chip->flags & IRQCHIP_AFFINITY_PRE_STARTUP)) + irq_setup_affinity(desc); break; case IRQ_STARTUP_MANAGED: irq_do_set_affinity(d, aff, false); -- cgit From 0c0e37dc11671384e53ba6ede53a4d91162a2cc5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:49 +0200 Subject: x86/ioapic: Force affinity setup before startup The IO/APIC cannot handle interrupt affinity changes safely after startup other than from an interrupt handler. The startup sequence in the generic interrupt code violates that assumption. Mark the irq chip with the new IRQCHIP_AFFINITY_PRE_STARTUP flag so that the default interrupt setting happens before the interrupt is started up for the first time. Fixes: 18404756765c ("genirq: Expose default irq affinity mask (take 3)") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.832143400@linutronix.de --- arch/x86/kernel/apic/io_apic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d5c691a3208b..39224e035e47 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1986,7 +1986,8 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct irq_chip ioapic_ir_chip __read_mostly = { @@ -1999,7 +2000,8 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_set_affinity = ioapic_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_get_irqchip_state = ioapic_irq_get_chip_state, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static inline void init_IO_APIC_traps(void) -- cgit From ff363f480e5997051dd1de949121ffda3b753741 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 29 Jul 2021 23:51:50 +0200 Subject: x86/msi: Force affinity setup before startup The X86 MSI mechanism cannot handle interrupt affinity changes safely after startup other than from an interrupt handler, unless interrupt remapping is enabled. The startup sequence in the generic interrupt code violates that assumption. Mark the irq chips with the new IRQCHIP_AFFINITY_PRE_STARTUP flag so that the default interrupt setting happens before the interrupt is started up for the first time. While the interrupt remapping MSI chip does not require this, there is no point in treating it differently as this might spare an interrupt to a CPU which is not in the default affinity mask. For the non-remapping case go to the direct write path when the interrupt is not yet started similar to the not yet activated case. Fixes: 18404756765c ("genirq: Expose default irq affinity mask (take 3)") Signed-off-by: Thomas Gleixner Tested-by: Marc Zyngier Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210729222542.886722080@linutronix.de --- arch/x86/kernel/apic/msi.c | 11 ++++++++--- arch/x86/kernel/hpet.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 44ebe25e7703..dbacb9ec8843 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -58,11 +58,13 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) * The quirk bit is not set in this case. * - The new vector is the same as the old vector * - The old vector is MANAGED_IRQ_SHUTDOWN_VECTOR (interrupt starts up) + * - The interrupt is not yet started up * - The new destination CPU is the same as the old destination CPU */ if (!irqd_msi_nomask_quirk(irqd) || cfg->vector == old_cfg.vector || old_cfg.vector == MANAGED_IRQ_SHUTDOWN_VECTOR || + !irqd_is_started(irqd) || cfg->dest_apicid == old_cfg.dest_apicid) { irq_msi_update_msg(irqd, cfg); return ret; @@ -150,7 +152,8 @@ static struct irq_chip pci_msi_controller = { .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = msi_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, @@ -219,7 +222,8 @@ static struct irq_chip pci_msi_ir_controller = { .irq_mask = pci_msi_mask_irq, .irq_ack = irq_chip_ack_parent, .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct msi_domain_info pci_msi_ir_domain_info = { @@ -273,7 +277,8 @@ static struct irq_chip dmar_msi_controller = { .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_compose_msi_msg = dmar_msi_compose_msg, .irq_write_msi_msg = dmar_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static int dmar_msi_init(struct irq_domain *domain, diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 08651a4e6aa0..42fc41dd0e1f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -508,7 +508,7 @@ static struct irq_chip hpet_msi_controller __ro_after_init = { .irq_set_affinity = msi_domain_set_affinity, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_write_msi_msg = hpet_msi_write_msg, - .flags = IRQCHIP_SKIP_SET_WAKE, + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_AFFINITY_PRE_STARTUP, }; static int hpet_msi_init(struct irq_domain *domain, -- cgit From 1090340f7ee53e824fd4eef66a4855d548110c5b Mon Sep 17 00:00:00 2001 From: Takeshi Misawa Date: Thu, 5 Aug 2021 16:54:14 +0900 Subject: net: Fix memory leak in ieee802154_raw_deliver If IEEE-802.15.4-RAW is closed before receive skb, skb is leaked. Fix this, by freeing sk_receive_queue in sk->sk_destruct(). syzbot report: BUG: memory leak unreferenced object 0xffff88810f644600 (size 232): comm "softirq", pid 0, jiffies 4294967032 (age 81.270s) hex dump (first 32 bytes): 10 7d 4b 12 81 88 ff ff 10 7d 4b 12 81 88 ff ff .}K......}K..... 00 00 00 00 00 00 00 00 40 7c 4b 12 81 88 ff ff ........@|K..... backtrace: [] skb_clone+0xaa/0x2b0 net/core/skbuff.c:1496 [] ieee802154_raw_deliver net/ieee802154/socket.c:369 [inline] [] ieee802154_rcv+0x100/0x340 net/ieee802154/socket.c:1070 [] __netif_receive_skb_one_core+0x6a/0xa0 net/core/dev.c:5384 [] __netif_receive_skb+0x27/0xa0 net/core/dev.c:5498 [] netif_receive_skb_internal net/core/dev.c:5603 [inline] [] netif_receive_skb+0x59/0x260 net/core/dev.c:5662 [] ieee802154_deliver_skb net/mac802154/rx.c:29 [inline] [] ieee802154_subif_frame net/mac802154/rx.c:102 [inline] [] __ieee802154_rx_handle_packet net/mac802154/rx.c:212 [inline] [] ieee802154_rx+0x612/0x620 net/mac802154/rx.c:284 [] ieee802154_tasklet_handler+0x86/0xa0 net/mac802154/main.c:35 [] tasklet_action_common.constprop.0+0x5b/0x100 kernel/softirq.c:557 [] __do_softirq+0xbf/0x2ab kernel/softirq.c:345 [] do_softirq kernel/softirq.c:248 [inline] [] do_softirq+0x5c/0x80 kernel/softirq.c:235 [] __local_bh_enable_ip+0x51/0x60 kernel/softirq.c:198 [] local_bh_enable include/linux/bottom_half.h:32 [inline] [] rcu_read_unlock_bh include/linux/rcupdate.h:745 [inline] [] __dev_queue_xmit+0x7f4/0xf60 net/core/dev.c:4221 [] raw_sendmsg+0x1f4/0x2b0 net/ieee802154/socket.c:295 [] sock_sendmsg_nosec net/socket.c:654 [inline] [] sock_sendmsg+0x56/0x80 net/socket.c:674 [] __sys_sendto+0x15c/0x200 net/socket.c:1977 [] __do_sys_sendto net/socket.c:1989 [inline] [] __se_sys_sendto net/socket.c:1985 [inline] [] __x64_sys_sendto+0x26/0x30 net/socket.c:1985 Fixes: 9ec767160357 ("net: add IEEE 802.15.4 socket family implementation") Reported-and-tested-by: syzbot+1f68113fa907bf0695a8@syzkaller.appspotmail.com Signed-off-by: Takeshi Misawa Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20210805075414.GA15796@DESKTOP Signed-off-by: Stefan Schmidt --- net/ieee802154/socket.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a45a0401adc5..c25f7617770c 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -984,6 +984,11 @@ static const struct proto_ops ieee802154_dgram_ops = { .sendpage = sock_no_sendpage, }; +static void ieee802154_sock_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + /* Create a socket. Initialise the socket, blank the addresses * set the state. */ @@ -1024,7 +1029,7 @@ static int ieee802154_create(struct net *net, struct socket *sock, sock->ops = ops; sock_init_data(sock, sk); - /* FIXME: sk->sk_destruct */ + sk->sk_destruct = ieee802154_sock_destruct; sk->sk_family = PF_IEEE802154; /* Checksums on by default */ -- cgit From 4a2b285e7e103d4d6c6ed3e5052a0ff74a5d7f15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Aug 2021 02:45:47 -0700 Subject: net: igmp: fix data-race in igmp_ifc_timer_expire() Fix the data-race reported by syzbot [1] Issue here is that igmp_ifc_timer_expire() can update in_dev->mr_ifc_count while another change just occured from another context. in_dev->mr_ifc_count is only 8bit wide, so the race had little consequences. [1] BUG: KCSAN: data-race in igmp_ifc_event / igmp_ifc_timer_expire write to 0xffff8881051e3062 of 1 bytes by task 12547 on cpu 0: igmp_ifc_event+0x1d5/0x290 net/ipv4/igmp.c:821 igmp_group_added+0x462/0x490 net/ipv4/igmp.c:1356 ____ip_mc_inc_group+0x3ff/0x500 net/ipv4/igmp.c:1461 __ip_mc_join_group+0x24d/0x2c0 net/ipv4/igmp.c:2199 ip_mc_join_group_ssm+0x20/0x30 net/ipv4/igmp.c:2218 do_ip_setsockopt net/ipv4/ip_sockglue.c:1285 [inline] ip_setsockopt+0x1827/0x2a80 net/ipv4/ip_sockglue.c:1423 tcp_setsockopt+0x8c/0xa0 net/ipv4/tcp.c:3657 sock_common_setsockopt+0x5d/0x70 net/core/sock.c:3362 __sys_setsockopt+0x18f/0x200 net/socket.c:2159 __do_sys_setsockopt net/socket.c:2170 [inline] __se_sys_setsockopt net/socket.c:2167 [inline] __x64_sys_setsockopt+0x62/0x70 net/socket.c:2167 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae read to 0xffff8881051e3062 of 1 bytes by interrupt on cpu 1: igmp_ifc_timer_expire+0x706/0xa30 net/ipv4/igmp.c:808 call_timer_fn+0x2e/0x1d0 kernel/time/timer.c:1419 expire_timers+0x135/0x250 kernel/time/timer.c:1464 __run_timers+0x358/0x420 kernel/time/timer.c:1732 run_timer_softirq+0x19/0x30 kernel/time/timer.c:1745 __do_softirq+0x12c/0x26e kernel/softirq.c:558 invoke_softirq kernel/softirq.c:432 [inline] __irq_exit_rcu+0x9a/0xb0 kernel/softirq.c:636 sysvec_apic_timer_interrupt+0x69/0x80 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638 console_unlock+0x8e8/0xb30 kernel/printk/printk.c:2646 vprintk_emit+0x125/0x3d0 kernel/printk/printk.c:2174 vprintk_default+0x22/0x30 kernel/printk/printk.c:2185 vprintk+0x15a/0x170 kernel/printk/printk_safe.c:392 printk+0x62/0x87 kernel/printk/printk.c:2216 selinux_netlink_send+0x399/0x400 security/selinux/hooks.c:6041 security_netlink_send+0x42/0x90 security/security.c:2070 netlink_sendmsg+0x59e/0x7c0 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:703 [inline] sock_sendmsg net/socket.c:723 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392 ___sys_sendmsg net/socket.c:2446 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2475 __do_sys_sendmsg net/socket.c:2484 [inline] __se_sys_sendmsg net/socket.c:2482 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2482 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae value changed: 0x01 -> 0x02 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 12539 Comm: syz-executor.1 Not tainted 5.14.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6b3c558a4f23..a51360087b19 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,10 +803,17 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); + u8 mr_ifc_count; igmpv3_send_cr(in_dev); - if (in_dev->mr_ifc_count) { - in_dev->mr_ifc_count--; +restart: + mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count); + + if (mr_ifc_count) { + if (cmpxchg(&in_dev->mr_ifc_count, + mr_ifc_count, + mr_ifc_count - 1) != mr_ifc_count) + goto restart; igmp_ifc_start_timer(in_dev, unsolicited_report_interval(in_dev)); } @@ -818,7 +825,7 @@ static void igmp_ifc_event(struct in_device *in_dev) struct net *net = dev_net(in_dev->dev); if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) return; - in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; + WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv); igmp_ifc_start_timer(in_dev, 1); } @@ -957,7 +964,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, in_dev->mr_qri; } /* cancel the interface change timer */ - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); /* clear deleted report items */ @@ -1724,7 +1731,7 @@ void ip_mc_down(struct in_device *in_dev) igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST - in_dev->mr_ifc_count = 0; + WRITE_ONCE(in_dev->mr_ifc_count, 0); if (del_timer(&in_dev->mr_ifc_timer)) __in_dev_put(in_dev); in_dev->mr_gq_running = 0; @@ -1941,7 +1948,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfmode = MCAST_INCLUDE; #ifdef CONFIG_IP_MULTICAST pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(pmc->interface); @@ -2120,7 +2127,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, /* else no filters; keep old mode for reports */ pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv; - in_dev->mr_ifc_count = pmc->crcount; + WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount); for (psf = pmc->sources; psf; psf = psf->sf_next) psf->sf_crcount = 0; igmp_ifc_event(in_dev); -- cgit From 019d0454c61707879cf9853c894e0a191f6b9774 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Aug 2021 14:52:29 -0700 Subject: bpf, core: Fix kernel-doc notation Fix kernel-doc warnings in kernel/bpf/core.c (found by scripts/kernel-doc and W=1 builds). That is, correct a function name in a comment and add return descriptions for 2 functions. Fixes these kernel-doc warnings: kernel/bpf/core.c:1372: warning: expecting prototype for __bpf_prog_run(). Prototype was for ___bpf_prog_run() instead kernel/bpf/core.c:1372: warning: No description found for return value of '___bpf_prog_run' kernel/bpf/core.c:1883: warning: No description found for return value of 'bpf_prog_select_runtime' Signed-off-by: Randy Dunlap Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809215229.7556-1-rdunlap@infradead.org --- kernel/bpf/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b1a5fc04492b..0a28a8095d3e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1362,11 +1362,13 @@ u64 __weak bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) } /** - * __bpf_prog_run - run eBPF program on a given context + * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions * * Decode and execute eBPF instructions. + * + * Return: whatever value is in %BPF_R0 at program exit */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { @@ -1878,6 +1880,9 @@ static void bpf_prog_select_func(struct bpf_prog *fp) * * Try to JIT eBPF program, if JIT is not available, use interpreter. * The BPF program will be executed via BPF_PROG_RUN() macro. + * + * Return: the &fp argument along with &err set to 0 for success or + * a negative errno code on failure */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { -- cgit From cd391280bf4693ceddca8f19042cff42f98c1a89 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Aug 2021 14:19:53 +0300 Subject: net: dsa: hellcreek: fix broken backpressure in .port_fdb_dump rtnl_fdb_dump() has logic to split a dump of PF_BRIDGE neighbors into multiple netlink skbs if the buffer provided by user space is too small (one buffer will typically handle a few hundred FDB entries). When the current buffer becomes full, nlmsg_put() in dsa_slave_port_fdb_do_dump() returns -EMSGSIZE and DSA saves the index of the last dumped FDB entry, returns to rtnl_fdb_dump() up to that point, and then the dump resumes on the same port with a new skb, and FDB entries up to the saved index are simply skipped. Since dsa_slave_port_fdb_do_dump() is pointed to by the "cb" passed to drivers, then drivers must check for the -EMSGSIZE error code returned by it. Otherwise, when a netlink skb becomes full, DSA will no longer save newly dumped FDB entries to it, but the driver will continue dumping. So FDB entries will be missing from the dump. Fix the broken backpressure by propagating the "cb" return code and allow rtnl_fdb_dump() to restart the FDB dump with a new skb. Fixes: e4b27ebc780f ("net: dsa: Add DSA driver for Hirschmann Hellcreek switches") Signed-off-by: Vladimir Oltean Acked-by: Kurt Kanzenbach Signed-off-by: David S. Miller --- drivers/net/dsa/hirschmann/hellcreek.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/hirschmann/hellcreek.c b/drivers/net/dsa/hirschmann/hellcreek.c index 9fdcc4bde480..5c54ae1be62c 100644 --- a/drivers/net/dsa/hirschmann/hellcreek.c +++ b/drivers/net/dsa/hirschmann/hellcreek.c @@ -912,6 +912,7 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port, { struct hellcreek *hellcreek = ds->priv; u16 entries; + int ret = 0; size_t i; mutex_lock(&hellcreek->reg_lock); @@ -943,12 +944,14 @@ static int hellcreek_fdb_dump(struct dsa_switch *ds, int port, if (!(entry.portmask & BIT(port))) continue; - cb(entry.mac, 0, entry.is_static, data); + ret = cb(entry.mac, 0, entry.is_static, data); + if (ret) + break; } mutex_unlock(&hellcreek->reg_lock); - return 0; + return ret; } static int hellcreek_vlan_filtering(struct dsa_switch *ds, int port, -- cgit From ada2fee185d8145afb89056558bb59545b9dbdd0 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Aug 2021 14:19:54 +0300 Subject: net: dsa: lan9303: fix broken backpressure in .port_fdb_dump rtnl_fdb_dump() has logic to split a dump of PF_BRIDGE neighbors into multiple netlink skbs if the buffer provided by user space is too small (one buffer will typically handle a few hundred FDB entries). When the current buffer becomes full, nlmsg_put() in dsa_slave_port_fdb_do_dump() returns -EMSGSIZE and DSA saves the index of the last dumped FDB entry, returns to rtnl_fdb_dump() up to that point, and then the dump resumes on the same port with a new skb, and FDB entries up to the saved index are simply skipped. Since dsa_slave_port_fdb_do_dump() is pointed to by the "cb" passed to drivers, then drivers must check for the -EMSGSIZE error code returned by it. Otherwise, when a netlink skb becomes full, DSA will no longer save newly dumped FDB entries to it, but the driver will continue dumping. So FDB entries will be missing from the dump. Fix the broken backpressure by propagating the "cb" return code and allow rtnl_fdb_dump() to restart the FDB dump with a new skb. Fixes: ab335349b852 ("net: dsa: lan9303: Add port_fast_age and port_fdb_dump methods") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/lan9303-core.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index 344374025426..d7ce281570b5 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -557,12 +557,12 @@ static int lan9303_alr_make_entry_raw(struct lan9303 *chip, u32 dat0, u32 dat1) return 0; } -typedef void alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1, - int portmap, void *ctx); +typedef int alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1, + int portmap, void *ctx); -static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) +static int lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) { - int i; + int ret = 0, i; mutex_lock(&chip->alr_mutex); lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, @@ -582,13 +582,17 @@ static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx) LAN9303_ALR_DAT1_PORT_BITOFFS; portmap = alrport_2_portmap[alrport]; - cb(chip, dat0, dat1, portmap, ctx); + ret = cb(chip, dat0, dat1, portmap, ctx); + if (ret) + break; lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, LAN9303_ALR_CMD_GET_NEXT); lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0); } mutex_unlock(&chip->alr_mutex); + + return ret; } static void alr_reg_to_mac(u32 dat0, u32 dat1, u8 mac[6]) @@ -606,18 +610,20 @@ struct del_port_learned_ctx { }; /* Clear learned (non-static) entry on given port */ -static void alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0, - u32 dat1, int portmap, void *ctx) +static int alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0, + u32 dat1, int portmap, void *ctx) { struct del_port_learned_ctx *del_ctx = ctx; int port = del_ctx->port; if (((BIT(port) & portmap) == 0) || (dat1 & LAN9303_ALR_DAT1_STATIC)) - return; + return 0; /* learned entries has only one port, we can just delete */ dat1 &= ~LAN9303_ALR_DAT1_VALID; /* delete entry */ lan9303_alr_make_entry_raw(chip, dat0, dat1); + + return 0; } struct port_fdb_dump_ctx { @@ -626,19 +632,19 @@ struct port_fdb_dump_ctx { dsa_fdb_dump_cb_t *cb; }; -static void alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0, - u32 dat1, int portmap, void *ctx) +static int alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0, + u32 dat1, int portmap, void *ctx) { struct port_fdb_dump_ctx *dump_ctx = ctx; u8 mac[ETH_ALEN]; bool is_static; if ((BIT(dump_ctx->port) & portmap) == 0) - return; + return 0; alr_reg_to_mac(dat0, dat1, mac); is_static = !!(dat1 & LAN9303_ALR_DAT1_STATIC); - dump_ctx->cb(mac, 0, is_static, dump_ctx->data); + return dump_ctx->cb(mac, 0, is_static, dump_ctx->data); } /* Set a static ALR entry. Delete entry if port_map is zero */ @@ -1210,9 +1216,7 @@ static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port, }; dev_dbg(chip->dev, "%s(%d)\n", __func__, port); - lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx); - - return 0; + return lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx); } static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port, -- cgit From 871a73a1c8f55da0a3db234e9dd816ea4fd546f2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Aug 2021 14:19:55 +0300 Subject: net: dsa: lantiq: fix broken backpressure in .port_fdb_dump rtnl_fdb_dump() has logic to split a dump of PF_BRIDGE neighbors into multiple netlink skbs if the buffer provided by user space is too small (one buffer will typically handle a few hundred FDB entries). When the current buffer becomes full, nlmsg_put() in dsa_slave_port_fdb_do_dump() returns -EMSGSIZE and DSA saves the index of the last dumped FDB entry, returns to rtnl_fdb_dump() up to that point, and then the dump resumes on the same port with a new skb, and FDB entries up to the saved index are simply skipped. Since dsa_slave_port_fdb_do_dump() is pointed to by the "cb" passed to drivers, then drivers must check for the -EMSGSIZE error code returned by it. Otherwise, when a netlink skb becomes full, DSA will no longer save newly dumped FDB entries to it, but the driver will continue dumping. So FDB entries will be missing from the dump. Fix the broken backpressure by propagating the "cb" return code and allow rtnl_fdb_dump() to restart the FDB dump with a new skb. Fixes: 58c59ef9e930 ("net: dsa: lantiq: Add Forwarding Database access") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/lantiq_gswip.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 314ae78bbdd6..e78026ef6d8c 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -1404,11 +1404,17 @@ static int gswip_port_fdb_dump(struct dsa_switch *ds, int port, addr[1] = mac_bridge.key[2] & 0xff; addr[0] = (mac_bridge.key[2] >> 8) & 0xff; if (mac_bridge.val[1] & GSWIP_TABLE_MAC_BRIDGE_STATIC) { - if (mac_bridge.val[0] & BIT(port)) - cb(addr, 0, true, data); + if (mac_bridge.val[0] & BIT(port)) { + err = cb(addr, 0, true, data); + if (err) + return err; + } } else { - if (((mac_bridge.val[0] & GENMASK(7, 4)) >> 4) == port) - cb(addr, 0, false, data); + if (((mac_bridge.val[0] & GENMASK(7, 4)) >> 4) == port) { + err = cb(addr, 0, false, data); + if (err) + return err; + } } } return 0; -- cgit From 21b52fed928e96d2f75d2f6aa9eac7a4b0b55d22 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Aug 2021 14:19:56 +0300 Subject: net: dsa: sja1105: fix broken backpressure in .port_fdb_dump rtnl_fdb_dump() has logic to split a dump of PF_BRIDGE neighbors into multiple netlink skbs if the buffer provided by user space is too small (one buffer will typically handle a few hundred FDB entries). When the current buffer becomes full, nlmsg_put() in dsa_slave_port_fdb_do_dump() returns -EMSGSIZE and DSA saves the index of the last dumped FDB entry, returns to rtnl_fdb_dump() up to that point, and then the dump resumes on the same port with a new skb, and FDB entries up to the saved index are simply skipped. Since dsa_slave_port_fdb_do_dump() is pointed to by the "cb" passed to drivers, then drivers must check for the -EMSGSIZE error code returned by it. Otherwise, when a netlink skb becomes full, DSA will no longer save newly dumped FDB entries to it, but the driver will continue dumping. So FDB entries will be missing from the dump. Fix the broken backpressure by propagating the "cb" return code and allow rtnl_fdb_dump() to restart the FDB dump with a new skb. Fixes: 291d1e72b756 ("net: dsa: sja1105: Add support for FDB and MDB management") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 8667c9754330..b188a80eeec6 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1635,7 +1635,9 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, /* We need to hide the dsa_8021q VLANs from the user. */ if (priv->vlan_state == SJA1105_VLAN_UNAWARE) l2_lookup.vlanid = 0; - cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); + rc = cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data); + if (rc) + return rc; } return 0; } -- cgit From d07149aba2ef423eae94a9cc2a6365d0cdf6fd51 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Tue, 10 Aug 2021 18:08:45 +0800 Subject: ALSA: hda/realtek: fix mute/micmute LEDs for HP ProBook 650 G8 Notebook PC The HP ProBook 650 G8 Notebook PC is using ALC236 codec which is using 0x02 to control mute LED and 0x01 to control micmute LED. Therefore, add a quirk to make it works. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20210810100846.65844-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 4043a2362f27..a065260d0d20 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8431,6 +8431,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), -- cgit From d1dee814168538eba166ae4150b37f0d88257884 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 22 Jul 2021 14:25:48 +0100 Subject: pinctrl: sunxi: Don't underestimate number of functions When we are building all the various pinctrl structures for the Allwinner pinctrl devices, we do some estimation about the maximum number of distinct function (names) that we will need. So far we take the number of pins as an upper bound, even though we can actually have up to four special functions per pin. This wasn't a problem until now, since we indeed have typically far more pins than functions, and most pins share common functions. However the H616 "-r" pin controller has only two pins, but four functions, so we run over the end of the array when we are looking for a matching function name in sunxi_pinctrl_add_function - there is no NULL sentinel left that would terminate the loop: [ 8.200648] Unable to handle kernel paging request at virtual address fffdff7efbefaff5 [ 8.209179] Mem abort info: .... [ 8.368456] Call trace: [ 8.370925] __pi_strcmp+0x90/0xf0 [ 8.374559] sun50i_h616_r_pinctrl_probe+0x1c/0x28 [ 8.379557] platform_probe+0x68/0xd8 Do an actual worst case allocation (4 functions per pin, three common functions and the sentinel) for the initial array allocation. This is now heavily overestimating the number of functions in the common case, but we will reallocate this array later with the actual number of functions, so it's only temporarily. Fixes: 561c1cf17c46 ("pinctrl: sunxi: Add support for the Allwinner H616-R pin controller") Signed-off-by: Andre Przywara Acked-by: Maxime Ripard Link: https://lore.kernel.org/r/20210722132548.22121-1-andre.przywara@arm.com Signed-off-by: Linus Walleij --- drivers/pinctrl/sunxi/pinctrl-sunxi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index dc8d39ae045b..9c7679c06dca 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1219,10 +1219,12 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) } /* - * We suppose that we won't have any more functions than pins, - * we'll reallocate that later anyway + * Find an upper bound for the maximum number of functions: in + * the worst case we have gpio_in, gpio_out, irq and up to four + * special functions per pin, plus one entry for the sentinel. + * We'll reallocate that later anyway. */ - pctl->functions = kcalloc(pctl->ngroups, + pctl->functions = kcalloc(4 * pctl->ngroups + 4, sizeof(*pctl->functions), GFP_KERNEL); if (!pctl->functions) -- cgit From b9cc7d8a4656a6e815852c27ab50365009cb69c1 Mon Sep 17 00:00:00 2001 From: Ben Dai Date: Sun, 25 Apr 2021 23:09:03 +0800 Subject: genirq/timings: Prevent potential array overflow in __irq_timings_store() When the interrupt interval is greater than 2 ^ PREDICTION_BUFFER_SIZE * PREDICTION_FACTOR us and less than 1s, the calculated index will be greater than the length of irqs->ema_time[]. Check the calculated index before using it to prevent array overflow. Fixes: 23aa3b9a6b7d ("genirq/timings: Encapsulate storing function") Signed-off-by: Ben Dai Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210425150903.25456-1-ben.dai9703@gmail.com --- kernel/irq/timings.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index d309d6fbf5bd..4d2a702d7aa9 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -453,6 +453,11 @@ static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, */ index = irq_timings_interval_index(interval); + if (index > PREDICTION_BUFFER_SIZE - 1) { + irqs->count = 0; + return; + } + /* * Store the index as an element of the pattern in another * circular array. -- cgit From dbbc93576e03fbe24b365fab0e901eb442237a8a Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Tue, 18 May 2021 11:31:17 +0800 Subject: genirq/msi: Ensure deactivation on teardown msi_domain_alloc_irqs() invokes irq_domain_activate_irq(), but msi_domain_free_irqs() does not enforce deactivation before tearing down the interrupts. This happens when PCI/MSI interrupts are set up and never used before being torn down again, e.g. in error handling pathes. The only place which cleans that up is the error handling path in msi_domain_alloc_irqs(). Move the cleanup from msi_domain_alloc_irqs() into msi_domain_free_irqs() to cure that. Fixes: f3b0946d629c ("genirq/msi: Make sure PCI MSIs are activated early") Signed-off-by: Bixuan Cui Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210518033117.78104-1-cuibixuan@huawei.com --- kernel/irq/msi.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c41965e348b5..85df3ca03efe 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -476,11 +476,6 @@ skip_activate: return 0; cleanup: - for_each_msi_vector(desc, i, dev) { - irq_data = irq_domain_get_irq_data(domain, i); - if (irqd_is_activated(irq_data)) - irq_domain_deactivate_irq(irq_data); - } msi_domain_free_irqs(domain, dev); return ret; } @@ -505,7 +500,15 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) { + struct irq_data *irq_data; struct msi_desc *desc; + int i; + + for_each_msi_vector(desc, i, dev) { + irq_data = irq_domain_get_irq_data(domain, i); + if (irqd_is_activated(irq_data)) + irq_domain_deactivate_irq(irq_data); + } for_each_msi_entry(desc, dev) { /* -- cgit From 7cbe08a930a132d84b4cf79953b00b074ec7a2a7 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 9 Aug 2021 11:22:20 -0400 Subject: drm/amdgpu: handle VCN instances when harvesting (v2) There may be multiple instances and only one is harvested. v2: fix typo in commit message Fixes: 83a0b8639185 ("drm/amdgpu: add judgement when add ip blocks (v2)") Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1673 Reviewed-by: Guchun Chen Reviewed-by: James Zhu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 43e7b61d1c5c..ada7bc19118a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -299,6 +299,9 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) ip->major, ip->minor, ip->revision); + if (le16_to_cpu(ip->hw_id) == VCN_HWID) + adev->vcn.num_vcn_inst++; + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -385,7 +388,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { struct binary_header *bhdr; struct harvest_table *harvest_info; - int i; + int i, vcn_harvest_count = 0; bhdr = (struct binary_header *)adev->mman.discovery_bin; harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + @@ -397,8 +400,7 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) switch (le32_to_cpu(harvest_info->list[i].hw_id)) { case VCN_HWID: - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + vcn_harvest_count++; break; case DMU_HWID: adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; @@ -407,6 +409,10 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) break; } } + if (vcn_harvest_count == adev->vcn.num_vcn_inst) { + adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; + adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + } } int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) -- cgit From 3042f80c6cb9340354dc56ecb06473be57adc432 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 6 Aug 2021 10:28:04 +0800 Subject: drm/amd/pm: bug fix for the runtime pm BACO In some systems only MACO is supported. This is to fix the problem that runtime pm is enabled but BACO is not supported. MACO will be handled seperately. Signed-off-by: Kenneth Feng Reviewed-by: Jack Gui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index c751f717a0da..d92dd2c7448e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -353,8 +353,7 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; uint32_t val; - if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO || - powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_MACO) { + if (powerplay_table->platform_caps & SMU_11_0_7_PP_PLATFORM_CAP_BACO) { val = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP0); smu_baco->platform_support = (val & RCC_BIF_STRAP0__STRAP_PX_CAPABLE_MASK) ? true : -- cgit From c90f6263f58a28c3d97b83679d6fd693b33dfd4e Mon Sep 17 00:00:00 2001 From: Eric Bernstein Date: Mon, 26 Jul 2021 15:53:18 -0400 Subject: drm/amd/display: Remove invalid assert for ODM + MPC case Reviewed-by: Dmytro Laktyushkin Acked-by: Anson Jacob Signed-off-by: Eric Bernstein Cc: stable@vger.kernel.org Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 253654d605c2..28e15ebf2f43 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1788,7 +1788,6 @@ static bool dcn30_split_stream_for_mpc_or_odm( } pri_pipe->next_odm_pipe = sec_pipe; sec_pipe->prev_odm_pipe = pri_pipe; - ASSERT(sec_pipe->top_pipe == NULL); if (!sec_pipe->top_pipe) sec_pipe->stream_res.opp = pool->opps[pipe_idx]; -- cgit From 0cde63a8fc4d9f9f580c297211fd05f91c0fd66d Mon Sep 17 00:00:00 2001 From: Anson Jacob Date: Fri, 30 Jul 2021 19:46:20 -0400 Subject: drm/amd/display: use GFP_ATOMIC in amdgpu_dm_irq_schedule_work Replace GFP_KERNEL with GFP_ATOMIC as amdgpu_dm_irq_schedule_work can't sleep. BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 253, name: kworker/6:1H CPU: 6 PID: 253 Comm: kworker/6:1H Tainted: G W OE 5.11.0-promotion_2021_06_07-18_36_28_prelim_revert_retrain #8 Hardware name: System manufacturer System Product Name/PRIME X570-PRO, BIOS 3405 02/01/2021 Workqueue: events_highpri dm_irq_work_func [amdgpu] Call Trace: dump_stack+0x5e/0x74 ___might_sleep.cold+0x87/0x98 __might_sleep+0x4b/0x80 kmem_cache_alloc_trace+0x390/0x4f0 amdgpu_dm_irq_handler+0x171/0x230 [amdgpu] amdgpu_irq_dispatch+0xc0/0x1e0 [amdgpu] amdgpu_ih_process+0x81/0x100 [amdgpu] amdgpu_irq_handler+0x26/0xa0 [amdgpu] __handle_irq_event_percpu+0x49/0x190 ? __hrtimer_get_next_event+0x4d/0x80 handle_irq_event_percpu+0x33/0x80 handle_irq_event+0x33/0x60 handle_edge_irq+0x82/0x190 asm_call_irq_on_stack+0x12/0x20 common_interrupt+0xbb/0x140 asm_common_interrupt+0x1e/0x40 RIP: 0010:amdgpu_device_rreg.part.0+0x44/0xf0 [amdgpu] Code: 53 48 89 fb 4c 3b af c8 08 00 00 73 6d 83 e2 02 75 0d f6 87 40 62 01 00 10 0f 85 83 00 00 00 4c 03 ab d0 08 00 00 45 8b 6d 00 <8b> 05 3e b6 52 00 85 c0 7e 62 48 8b 43 08 0f b7 70 3e 65 8b 05 e3 RSP: 0018:ffffae7740fff9e8 EFLAGS: 00000286 RAX: ffffffffc05ee610 RBX: ffff8aaf8f620000 RCX: 0000000000000006 RDX: 0000000000000000 RSI: 0000000000005430 RDI: ffff8aaf8f620000 RBP: ffffae7740fffa08 R08: 0000000000000001 R09: 000000000000000a R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000005430 R13: 0000000071000000 R14: 0000000000000001 R15: 0000000000005430 ? amdgpu_cgs_write_register+0x20/0x20 [amdgpu] amdgpu_device_rreg+0x17/0x20 [amdgpu] amdgpu_cgs_read_register+0x14/0x20 [amdgpu] dm_read_reg_func+0x38/0xb0 [amdgpu] generic_reg_wait+0x80/0x160 [amdgpu] dce_aux_transfer_raw+0x324/0x7c0 [amdgpu] dc_link_aux_transfer_raw+0x43/0x50 [amdgpu] dm_dp_aux_transfer+0x87/0x110 [amdgpu] drm_dp_dpcd_access+0x72/0x110 [drm_kms_helper] drm_dp_dpcd_read+0xb7/0xf0 [drm_kms_helper] drm_dp_get_one_sb_msg+0x349/0x480 [drm_kms_helper] drm_dp_mst_hpd_irq+0xc5/0xe40 [drm_kms_helper] ? drm_dp_mst_hpd_irq+0xc5/0xe40 [drm_kms_helper] dm_handle_hpd_rx_irq+0x184/0x1a0 [amdgpu] ? dm_handle_hpd_rx_irq+0x184/0x1a0 [amdgpu] handle_hpd_rx_irq+0x195/0x240 [amdgpu] ? __switch_to_asm+0x42/0x70 ? __switch_to+0x131/0x450 dm_irq_work_func+0x19/0x20 [amdgpu] process_one_work+0x209/0x400 worker_thread+0x4d/0x3e0 ? cancel_delayed_work+0xa0/0xa0 kthread+0x124/0x160 ? kthread_park+0x90/0x90 ret_from_fork+0x22/0x30 Reviewed-by: Aurabindo Jayamohanan Pillai Acked-by: Anson Jacob Signed-off-by: Anson Jacob Cc: stable@vger.kernel.org Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 40f617bbb86f..4aba0e8c84f8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -584,7 +584,7 @@ static void amdgpu_dm_irq_schedule_work(struct amdgpu_device *adev, handler_data = container_of(handler_list->next, struct amdgpu_dm_irq_handler_data, list); /*allocate a new amdgpu_dm_irq_handler_data*/ - handler_data_add = kzalloc(sizeof(*handler_data), GFP_KERNEL); + handler_data_add = kzalloc(sizeof(*handler_data), GFP_ATOMIC); if (!handler_data_add) { DRM_ERROR("DM_IRQ: failed to allocate irq handler!\n"); return; -- cgit From 981567bd965329df7e64b13e92a54da816c1e0a4 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Tue, 10 Aug 2021 16:33:55 +1000 Subject: cifs: use the correct max-length for dentry_path_raw() RHBZ: 1972502 PATH_MAX is 4096 but PAGE_SIZE can be >4096 on some architectures such as ppc and would thus write beyond the end of the actual object. Cc: Reported-by: Xiaoli Feng Suggested-by: Brian foster Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Ronnie Sahlberg Signed-off-by: Steve French --- fs/cifs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 79402ca0ddfa..5f8a302ffcb2 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -100,7 +100,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0; - s = dentry_path_raw(direntry, page, PAGE_SIZE); + s = dentry_path_raw(direntry, page, PATH_MAX); if (IS_ERR(s)) return s; if (!s[1]) // for root we want "", not "/" -- cgit From 60f0779862e4ab943810187752c462e85f5fa371 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:45 +0300 Subject: virtio: Improve vq->broken access to avoid any compiler optimization Currently vq->broken field is read by virtqueue_is_broken() in busy loop in one context by virtnet_send_command(). vq->broken is set to true in other process context by virtio_break_device(). Reader and writer are accessing it without any synchronization. This may lead to a compiler optimization which may result to optimize reading vq->broken only once. Hence, force reading vq->broken on each invocation of virtqueue_is_broken() and also force writing it so that such update is visible to the readers. It is a theoretical fix that isn't yet encountered in the field. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 89bfe46a8a7f..e179c7c7622c 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2373,7 +2373,7 @@ bool virtqueue_is_broken(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - return vq->broken; + return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); @@ -2387,7 +2387,9 @@ void virtio_break_device(struct virtio_device *dev) list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); - vq->broken = true; + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); } } EXPORT_SYMBOL_GPL(virtio_break_device); -- cgit From 249f255476328e597a598ccdbd4414e51a5b6d6e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:46 +0300 Subject: virtio: Keep vring_del_virtqueue() mirror of VQ create Keep the vring_del_virtqueue() mirror of the create routines. i.e. to delete list entry first as it is added last during the create routine. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-3-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index e179c7c7622c..d5934c2e5a89 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2291,6 +2291,8 @@ void vring_del_virtqueue(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + list_del(&_vq->list); + if (vq->we_own_ring) { if (vq->packed_ring) { vring_free_queue(vq->vq.vdev, @@ -2321,7 +2323,6 @@ void vring_del_virtqueue(struct virtqueue *_vq) kfree(vq->split.desc_state); kfree(vq->split.desc_extra); } - list_del(&_vq->list); kfree(vq); } EXPORT_SYMBOL_GPL(vring_del_virtqueue); -- cgit From 0e566c8f0f2e8325e35f6f97e13cde5356b41814 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:47 +0300 Subject: virtio: Protect vqs list access VQs may be accessed to mark the device broken while they are created/destroyed. Hence protect the access to the vqs list. Fixes: e2dcdfe95c0b ("virtio: virtio_break_device() to mark all virtqueues broken.") Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-4-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 1 + drivers/virtio/virtio_ring.c | 8 ++++++++ include/linux/virtio.h | 1 + 3 files changed, 10 insertions(+) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 4b15c00c0a0a..49984d2cba24 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -355,6 +355,7 @@ int register_virtio_device(struct virtio_device *dev) virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); INIT_LIST_HEAD(&dev->vqs); + spin_lock_init(&dev->vqs_list_lock); /* * device_add() causes the bus infrastructure to look for a matching diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index d5934c2e5a89..c2aaa0eff6df 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1755,7 +1755,9 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); return &vq->vq; err_desc_extra: @@ -2229,7 +2231,9 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + spin_lock(&vdev->vqs_list_lock); list_add_tail(&vq->vq.list, &vdev->vqs); + spin_unlock(&vdev->vqs_list_lock); return &vq->vq; err_extra: @@ -2291,7 +2295,9 @@ void vring_del_virtqueue(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + spin_lock(&vq->vq.vdev->vqs_list_lock); list_del(&_vq->list); + spin_unlock(&vq->vq.vdev->vqs_list_lock); if (vq->we_own_ring) { if (vq->packed_ring) { @@ -2386,12 +2392,14 @@ void virtio_break_device(struct virtio_device *dev) { struct virtqueue *_vq; + spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ WRITE_ONCE(vq->broken, true); } + spin_unlock(&dev->vqs_list_lock); } EXPORT_SYMBOL_GPL(virtio_break_device); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index b1894e0323fa..41edbc01ffa4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -110,6 +110,7 @@ struct virtio_device { bool config_enabled; bool config_change_pending; spinlock_t config_lock; + spinlock_t vqs_list_lock; /* Protects VQs list access */ struct device dev; struct virtio_device_id id; const struct virtio_config_ops *config; -- cgit From 43bb40c5b92659966bdf4bfe584fde0a3575a049 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:48 +0300 Subject: virtio_pci: Support surprise removal of virtio pci device When a virtio pci device undergo surprise removal (aka async removal in PCIe spec), mark the device as broken so that any upper layer drivers can abort any outstanding operation. When a virtio net pci device undergo surprise removal which is used by a NetworkManager, a below call trace was observed. kernel:watchdog: BUG: soft lockup - CPU#1 stuck for 26s! [kworker/1:1:27059] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kworker/1:1:27059] CPU: 1 PID: 27059 Comm: kworker/1:1 Tainted: G S W I L 5.13.0-hotplug+ #8 Hardware name: Dell Inc. PowerEdge R640/0H28RR, BIOS 2.9.4 11/06/2020 Workqueue: events linkwatch_event RIP: 0010:virtnet_send_command+0xfc/0x150 [virtio_net] Call Trace: virtnet_set_rx_mode+0xcf/0x2a7 [virtio_net] ? __hw_addr_create_ex+0x85/0xc0 __dev_mc_add+0x72/0x80 igmp6_group_added+0xa7/0xd0 ipv6_mc_up+0x3c/0x60 ipv6_find_idev+0x36/0x80 addrconf_add_dev+0x1e/0xa0 addrconf_dev_config+0x71/0x130 addrconf_notify+0x1f5/0xb40 ? rtnl_is_locked+0x11/0x20 ? __switch_to_asm+0x42/0x70 ? finish_task_switch+0xaf/0x2c0 ? raw_notifier_call_chain+0x3e/0x50 raw_notifier_call_chain+0x3e/0x50 netdev_state_change+0x67/0x90 linkwatch_do_dev+0x3c/0x50 __linkwatch_run_queue+0xd2/0x220 linkwatch_event+0x21/0x30 process_one_work+0x1c8/0x370 worker_thread+0x30/0x380 ? process_one_work+0x370/0x370 kthread+0x118/0x140 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Hence, add the ability to abort the command on surprise removal which prevents infinite loop and system lockup. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-5-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 222d630c41fc..b35bb2d57f62 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -576,6 +576,13 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); + /* + * Device is marked broken on surprise removal so that virtio upper + * layers can abort any ongoing operation. + */ + if (!pci_device_is_present(pci_dev)) + virtio_break_device(&vp_dev->vdev); + pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); -- cgit From 0e398290cff997610b66e73573faaee70c9a700e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 28 Jul 2021 21:07:55 +0800 Subject: vhost-vdpa: Fix integer overflow in vhost_vdpa_process_iotlb_update() The "msg->iova + msg->size" addition can have an integer overflow if the iotlb message is from a malicious user space application. So let's fix it. Fixes: 1b48dc03e575 ("vhost: vdpa: report iova range") Reported-by: Dan Carpenter Signed-off-by: Xie Yongji Acked-by: Jason Wang Link: https://lore.kernel.org/r/20210728130756.97-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 210ab35a7ebf..9479f7f79217 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -614,7 +614,8 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, long pinned; int ret = 0; - if (msg->iova < v->range.first || + if (msg->iova < v->range.first || !msg->size || + msg->iova > U64_MAX - msg->size + 1 || msg->iova + msg->size - 1 > v->range.last) return -EINVAL; -- cgit From 7b9cae027ba3aaac295ae23a62f47876ed97da73 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 10 Aug 2021 10:19:49 -0700 Subject: KVM: VMX: Use current VMCS to query WAITPKG support for MSR emulation Use the secondary_exec_controls_get() accessor in vmx_has_waitpkg() to effectively get the controls for the current VMCS, as opposed to using vmx->secondary_exec_controls, which is the cached value of KVM's desired controls for vmcs01 and truly not reflective of any particular VMCS. While the waitpkg control is not dynamic, i.e. vmcs01 will always hold the same waitpkg configuration as vmx->secondary_exec_controls, the same does not hold true for vmcs02 if the L1 VMM hides the feature from L2. If L1 hides the feature _and_ does not intercept MSR_IA32_UMWAIT_CONTROL, L2 could incorrectly read/write L1's virtual MSR instead of taking a #GP. Fixes: 6e3ba4abcea5 ("KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210810171952.2758100-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index db88ed4f2121..17a1cb4b059d 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -522,7 +522,7 @@ static inline struct vmcs *alloc_vmcs(bool shadow) static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) { - return vmx->secondary_exec_control & + return secondary_exec_controls_get(vmx) & SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE; } -- cgit From 45a687879b31caae4032abd1c2402e289d2b8083 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 10 Aug 2021 14:00:10 +0300 Subject: net: bridge: fix flags interpretation for extern learn fdb entries Ignore fdb flags when adding port extern learn entries and always set BR_FDB_LOCAL flag when adding bridge extern learn entries. This is closest to the behaviour we had before and avoids breaking any use cases which were allowed. This patch fixes iproute2 calls which assume NUD_PERMANENT and were allowed before, example: $ bridge fdb add 00:11:22:33:44:55 dev swp1 extern_learn Extern learn entries are allowed to roam, but do not expire, so static or dynamic flags make no sense for them. Also add a comment for future reference. Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") Fixes: 0541a6293298 ("net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry") Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Signed-off-by: Nikolay Aleksandrov Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210810110010.43859-1-razor@blackwall.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/neighbour.h | 7 +++++-- net/bridge/br.c | 3 +-- net/bridge/br_fdb.c | 11 ++++------- net/bridge/br_private.h | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index dc8b72201f6c..00a60695fa53 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -66,8 +66,11 @@ enum { #define NUD_NONE 0x00 /* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT also cannot be deleted by garbage collectors. + * and make no address resolution or NUD. + * NUD_PERMANENT also cannot be deleted by garbage collectors. + * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry + * states don't make sense and thus are ignored. Such entries don't age and + * can roam. */ struct nda_cacheinfo { diff --git a/net/bridge/br.c b/net/bridge/br.c index bbab9984f24e..ef743f94254d 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -166,8 +166,7 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, - fdb_info->vid, - fdb_info->is_local, false); + fdb_info->vid, false); if (err) { err = notifier_from_errno(err); break; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 835cec1e5a03..5dee30966ed3 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1044,10 +1044,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, "FDB entry towards bridge must be permanent"); return -EINVAL; } - - err = br_fdb_external_learn_add(br, p, addr, vid, - ndm->ndm_state & NUD_PERMANENT, - true); + err = br_fdb_external_learn_add(br, p, addr, vid, true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); @@ -1275,7 +1272,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, bool is_local, + const unsigned char *addr, u16 vid, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; @@ -1293,7 +1290,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); - if (is_local) + if (!p) flags |= BIT(BR_FDB_LOCAL); fdb = fdb_create(br, p, addr, vid, flags); @@ -1322,7 +1319,7 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); - if (is_local) + if (!p) set_bit(BR_FDB_LOCAL, &fdb->flags); if (modified) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aa64d8d63ca3..2b48b204205e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -711,7 +711,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, bool is_local, + const unsigned char *addr, u16 vid, bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, -- cgit From c35b57ceff906856dd85af2d6709dab18fbca81f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Aug 2021 14:50:24 +0300 Subject: net: switchdev: zero-initialize struct switchdev_notifier_fdb_info emitted by drivers towards the bridge The blamed commit added a new field to struct switchdev_notifier_fdb_info, but did not make sure that all call paths set it to something valid. For example, a switchdev driver may emit a SWITCHDEV_FDB_ADD_TO_BRIDGE notifier, and since the 'is_local' flag is not set, it contains junk from the stack, so the bridge might interpret those notifications as being for local FDB entries when that was not intended. To avoid that now and in the future, zero-initialize all switchdev_notifier_fdb_info structures created by drivers such that all newly added fields to not need to touch drivers again. Fixes: 2c4eca3ef716 ("net: bridge: switchdev: include local flag in FDB notifications") Reported-by: Ido Schimmel Signed-off-by: Vladimir Oltean Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Reviewed-by: Leon Romanovsky Reviewed-by: Karsten Graul Link: https://lore.kernel.org/r/20210810115024.1629983-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/prestera/prestera_switchdev.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 4 ++-- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 +- drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c | 2 +- drivers/net/ethernet/rocker/rocker_main.c | 2 +- drivers/net/ethernet/rocker/rocker_ofdpa.c | 2 +- drivers/net/ethernet/ti/am65-cpsw-switchdev.c | 2 +- drivers/net/ethernet/ti/cpsw_switchdev.c | 2 +- drivers/s390/net/qeth_l2_main.c | 4 ++-- net/dsa/slave.c | 2 +- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 0b3e8f2db294..9a309169dbae 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -748,7 +748,7 @@ static void prestera_fdb_offload_notify(struct prestera_port *port, struct switchdev_notifier_fdb_info *info) { - struct switchdev_notifier_fdb_info send_info; + struct switchdev_notifier_fdb_info send_info = {}; send_info.addr = info->addr; send_info.vid = info->vid; @@ -1123,7 +1123,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused, static void prestera_fdb_event(struct prestera_switch *sw, struct prestera_event *evt, void *arg) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; struct net_device *dev = NULL; struct prestera_port *port; struct prestera_lag *lag; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index f3f56f32e435..69a3630818d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -69,7 +69,7 @@ static void mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid, unsigned long val) { - struct switchdev_notifier_fdb_info send_info; + struct switchdev_notifier_fdb_info send_info = {}; send_info.addr = addr; send_info.vid = vid; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7e221ef01437..f69cbb3852d5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -9079,7 +9079,7 @@ mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif, static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; struct net_device *dev; dev = br_fdb_find_port(rif->dev, mac, 0); @@ -9127,8 +9127,8 @@ mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif, static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac) { + struct switchdev_notifier_fdb_info info = {}; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); - struct switchdev_notifier_fdb_info info; struct net_device *br_dev; struct net_device *dev; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c5ef9aa64efe..8f90cd323d5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2508,7 +2508,7 @@ mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, struct net_device *dev, bool offloaded) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = mac; info.vid = vid; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c index 0443f66b5550..9a8e4f201eb1 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_mactable.c @@ -277,7 +277,7 @@ static void sparx5_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, struct net_device *dev, bool offloaded) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = mac; info.vid = vid; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index a46633606cae..1f06b92ee5bb 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2715,7 +2715,7 @@ static void rocker_fdb_offload_notify(struct rocker_port *rocker_port, struct switchdev_notifier_fdb_info *recv_info) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = recv_info->addr; info.vid = recv_info->vid; diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 967a634ee9ac..e33a9d283a4e 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1822,7 +1822,7 @@ static void ofdpa_port_fdb_learn_work(struct work_struct *work) container_of(work, struct ofdpa_fdb_learn_work, work); bool removing = (lw->flags & OFDPA_OP_FLAG_REMOVE); bool learned = (lw->flags & OFDPA_OP_FLAG_LEARNED); - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = lw->addr; info.vid = lw->vid; diff --git a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c index 9c29b363e9ae..599708a3e81d 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-switchdev.c +++ b/drivers/net/ethernet/ti/am65-cpsw-switchdev.c @@ -358,7 +358,7 @@ static int am65_cpsw_port_obj_del(struct net_device *ndev, const void *ctx, static void am65_cpsw_fdb_offload_notify(struct net_device *ndev, struct switchdev_notifier_fdb_info *rcv) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = rcv->addr; info.vid = rcv->vid; diff --git a/drivers/net/ethernet/ti/cpsw_switchdev.c b/drivers/net/ethernet/ti/cpsw_switchdev.c index f7fb6e17dadd..a7d97d429e06 100644 --- a/drivers/net/ethernet/ti/cpsw_switchdev.c +++ b/drivers/net/ethernet/ti/cpsw_switchdev.c @@ -368,7 +368,7 @@ static int cpsw_port_obj_del(struct net_device *ndev, const void *ctx, static void cpsw_fdb_offload_notify(struct net_device *ndev, struct switchdev_notifier_fdb_info *rcv) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; info.addr = rcv->addr; info.vid = rcv->vid; diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2abf86c104d5..d7cdd9cfe485 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -279,7 +279,7 @@ static void qeth_l2_set_pnso_mode(struct qeth_card *card, static void qeth_l2_dev2br_fdb_flush(struct qeth_card *card) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; QETH_CARD_TEXT(card, 2, "fdbflush"); @@ -679,7 +679,7 @@ static void qeth_l2_dev2br_fdb_notify(struct qeth_card *card, u8 code, struct net_if_token *token, struct mac_addr_lnid *addr_lnid) { - struct switchdev_notifier_fdb_info info; + struct switchdev_notifier_fdb_info info = {}; u8 ntfy_mac[ETH_ALEN]; ether_addr_copy(ntfy_mac, addr_lnid->mac); diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 532085da8d8f..23be8e01026b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -2291,8 +2291,8 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, static void dsa_fdb_offload_notify(struct dsa_switchdev_event_work *switchdev_work) { + struct switchdev_notifier_fdb_info info = {}; struct dsa_switch *ds = switchdev_work->ds; - struct switchdev_notifier_fdb_info info; struct dsa_port *dp; if (!dsa_is_user_port(ds, switchdev_work->port)) -- cgit From 519133debcc19f5c834e7e28480b60bdc234fe02 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 9 Aug 2021 21:20:23 +0800 Subject: net: bridge: fix memleak in br_add_if() I got a memleak report: BUG: memory leak unreferenced object 0x607ee521a658 (size 240): comm "syz-executor.0", pid 955, jiffies 4294780569 (age 16.449s) hex dump (first 32 bytes, cpu 1): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000d830ea5a>] br_multicast_add_port+0x1c2/0x300 net/bridge/br_multicast.c:1693 [<00000000274d9a71>] new_nbp net/bridge/br_if.c:435 [inline] [<00000000274d9a71>] br_add_if+0x670/0x1740 net/bridge/br_if.c:611 [<0000000012ce888e>] do_set_master net/core/rtnetlink.c:2513 [inline] [<0000000012ce888e>] do_set_master+0x1aa/0x210 net/core/rtnetlink.c:2487 [<0000000099d1cafc>] __rtnl_newlink+0x1095/0x13e0 net/core/rtnetlink.c:3457 [<00000000a01facc0>] rtnl_newlink+0x64/0xa0 net/core/rtnetlink.c:3488 [<00000000acc9186c>] rtnetlink_rcv_msg+0x369/0xa10 net/core/rtnetlink.c:5550 [<00000000d4aabb9c>] netlink_rcv_skb+0x134/0x3d0 net/netlink/af_netlink.c:2504 [<00000000bc2e12a3>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] [<00000000bc2e12a3>] netlink_unicast+0x4a0/0x6a0 net/netlink/af_netlink.c:1340 [<00000000e4dc2d0e>] netlink_sendmsg+0x789/0xc70 net/netlink/af_netlink.c:1929 [<000000000d22c8b3>] sock_sendmsg_nosec net/socket.c:654 [inline] [<000000000d22c8b3>] sock_sendmsg+0x139/0x170 net/socket.c:674 [<00000000e281417a>] ____sys_sendmsg+0x658/0x7d0 net/socket.c:2350 [<00000000237aa2ab>] ___sys_sendmsg+0xf8/0x170 net/socket.c:2404 [<000000004f2dc381>] __sys_sendmsg+0xd3/0x190 net/socket.c:2433 [<0000000005feca6c>] do_syscall_64+0x37/0x90 arch/x86/entry/common.c:47 [<000000007304477d>] entry_SYSCALL_64_after_hwframe+0x44/0xae On error path of br_add_if(), p->mcast_stats allocated in new_nbp() need be freed, or it will be leaked. Fixes: 1080ab95e3c7 ("net: bridge: add support for IGMP/MLD stats and export them via netlink") Reported-by: Hulk Robot Signed-off-by: Yang Yingliang Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/r/20210809132023.978546-1-yangyingliang@huawei.com Signed-off-by: Jakub Kicinski --- net/bridge/br_if.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 6e4a32354a13..14cd6ef96111 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -616,6 +616,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, err = dev_set_allmulti(dev, 1); if (err) { + br_multicast_del_port(p); kfree(p); /* kobject not yet init'd, manually free */ goto err1; } @@ -729,6 +730,7 @@ err4: err3: sysfs_remove_link(br->ifobj, p->dev->name); err2: + br_multicast_del_port(p); kobject_put(&p->kobj); dev_set_allmulti(dev, -1); err1: -- cgit From bba676cc0b6122a74fa2e246f38a6b05c6f95b36 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Thu, 5 Aug 2021 14:49:05 -0700 Subject: i2c: iproc: fix race between client unreg and tasklet Similar NULL deref was originally fixed by graceful teardown sequence - https://lore.kernel.org/linux-i2c/1597106560-79693-1-git-send-email-dphadke@linux.microsoft.com After this, a tasklet was added to take care of FIFO full condition for large i2c transaction. https://lore.kernel.org/linux-arm-kernel/20201102035433.6774-1-rayagonda.kokatanur@broadcom.com/ This introduced regression, a new race condition between tasklet enabling interrupts and client unreg teardown sequence. Kill tasklet before unreg_slave() masks bits in IE_OFFSET. Updated teardown sequence - (1) disable_irq() (2) Kill tasklet (3) Mask event enable bits in control reg (4) Erase slave address (avoid further writes to rx fifo) (5) Flush tx and rx FIFOs (6) Clear pending event (interrupt) bits in status reg (7) Set client pointer to NULL (8) enable_irq() -- Unable to handle kernel read from unreadable memory at virtual address 0000000000000320 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=000000009212a000 [0000000000000320] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] SMP CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O Hardware name: Overlake (DT) pstate: 40400085 (nZcv daIf +PAN -UAO -TCO BTYPE=--) pc : bcm_iproc_i2c_slave_isr+0x2b8/0x8e4 lr : bcm_iproc_i2c_slave_isr+0x1c8/0x8e4 sp : ffff800010003e70 x29: ffff800010003e80 x28: ffffda017acdc000 x27: ffffda017b0ae000 x26: ffff800010004000 x25: ffff800010000000 x24: ffffda017af4a168 x23: 0000000000000073 x22: 0000000000000000 x21: 0000000001400000 x20: 0000000001000000 x19: ffff06f09583f880 x18: 00000000fa83b2da x17: 000000000000b67e x16: 0000000002edb2f3 x15: 00000000000002c7 x14: 00000000000002c7 x13: 0000000000000006 x12: 0000000000000033 x11: 0000000000000000 x10: 0000000001000000 x9 : 0000000003289312 x8 : 0000000003289311 x7 : 02d0cd03a303adbc x6 : 02d18e7f0a4dfc6c x5 : 02edb2f33f76ea68 x4 : 00000000fa83b2da x3 : ffffda017af43cd0 x2 : ffff800010003e74 x1 : 0000000001400000 x0 : 0000000000000000 Call trace: bcm_iproc_i2c_slave_isr+0x2b8/0x8e4 bcm_iproc_i2c_isr+0x178/0x290 __handle_irq_event_percpu+0xd0/0x200 handle_irq_event+0x60/0x1a0 handle_fasteoi_irq+0x130/0x220 __handle_domain_irq+0x8c/0xcc gic_handle_irq+0xc0/0x120 el1_irq+0xcc/0x180 finish_task_switch+0x100/0x1d8 __schedule+0x61c/0x7a0 schedule_idle+0x28/0x44 do_idle+0x254/0x28c cpu_startup_entry+0x28/0x2c rest_init+0xc4/0xd0 arch_call_rest_init+0x14/0x1c start_kernel+0x33c/0x3b8 Code: f9423260 910013e2 11000509 b9047a69 (f9419009) ---[ end trace 4781455b2a7bec15 ]--- Fixes: 4d658451c9d6 ("i2c: iproc: handle rx fifo full interrupt") Signed-off-by: Dhananjay Phadke Acked-by: Ray Jui Acked-by: Rayagonda Kokatanur Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-bcm-iproc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index cceaf69279a9..6304d1dd2dd6 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -1224,14 +1224,14 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) disable_irq(iproc_i2c->irq); + tasklet_kill(&iproc_i2c->slave_rx_tasklet); + /* disable all slave interrupts */ tmp = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); tmp &= ~(IE_S_ALL_INTERRUPT_MASK << IE_S_ALL_INTERRUPT_SHIFT); iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, tmp); - tasklet_kill(&iproc_i2c->slave_rx_tasklet); - /* Erase the slave address programmed */ tmp = iproc_i2c_rd_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET); tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT); -- cgit From 86ff25ed6cd8240d18df58930bd8848b19fce308 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 29 Jul 2021 16:35:32 +0200 Subject: i2c: dev: zero out array used for i2c reads from userspace If an i2c driver happens to not provide the full amount of data that a user asks for, it is possible that some uninitialized data could be sent to userspace. While all in-kernel drivers look to be safe, just be sure by initializing the buffer to zero before it is passed to the i2c driver so that any future drivers will not have this issue. Also properly copy the amount of data recvieved to the userspace buffer, as pointed out by Dan Carpenter. Reported-by: Eric Dumazet Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index cb64fe649390..77f576e51652 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -141,7 +141,7 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count, if (count > 8192) count = 8192; - tmp = kmalloc(count, GFP_KERNEL); + tmp = kzalloc(count, GFP_KERNEL); if (tmp == NULL) return -ENOMEM; @@ -150,7 +150,8 @@ static ssize_t i2cdev_read(struct file *file, char __user *buf, size_t count, ret = i2c_master_recv(client, tmp, count); if (ret >= 0) - ret = copy_to_user(buf, tmp, count) ? -EFAULT : ret; + if (copy_to_user(buf, tmp, ret)) + ret = -EFAULT; kfree(tmp); return ret; } -- cgit From 3f12cc4bb0a4d7b542af43b6f1b7175f13015629 Mon Sep 17 00:00:00 2001 From: Hu Haowen Date: Wed, 28 Jul 2021 23:53:46 +0800 Subject: Documentation: i2c: add i2c-sysfs into index Append i2c-sysfs to toctree in order to get rid of building warnings. Fixes: 31df7195b100 ("Documentation: i2c: Add doc for I2C sysfs") Signed-off-by: Hu Haowen Signed-off-by: Wolfram Sang --- Documentation/i2c/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/i2c/index.rst b/Documentation/i2c/index.rst index 8b76217e370a..6270f1fd7d4e 100644 --- a/Documentation/i2c/index.rst +++ b/Documentation/i2c/index.rst @@ -17,6 +17,7 @@ Introduction busses/index i2c-topology muxes/i2c-mux-gpio + i2c-sysfs Writing device drivers ====================== -- cgit From 06a089ef644934372a3062528244fca3417d3430 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 11 Aug 2021 08:34:46 +0300 Subject: bus: ti-sysc: Fix error handling for sysc_check_active_timer() We have changed the return type for sysc_check_active_timer() from -EBUSY to -ENXIO, but the gpt12 system timer fix still checks for -EBUSY. We are also not returning on other errors like we did earlier as noted by Pavel Machek . Commit 3ff340e24c9d ("bus: ti-sysc: Fix gpt12 system timer issue with reserved status") should have been updated for commit 65fb73676112 ("bus: ti-sysc: suppress err msg for timers used as clockevent/source"). Let's fix the issue by checking for -ENXIO and returning on any other errors as suggested by Pavel Machek . Fixes: 3ff340e24c9d ("bus: ti-sysc: Fix gpt12 system timer issue with reserved status") Depends-on: 65fb73676112 ("bus: ti-sysc: suppress err msg for timers used as clockevent/source") Reported-by: Pavel Machek Reviewed-by: Pavel Machek (CIP) Cc: Grygorii Strashko Cc: Jarkko Nikula Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 0ef98e3ba341..148a4dd8cb9a 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -3097,8 +3097,10 @@ static int sysc_probe(struct platform_device *pdev) return error; error = sysc_check_active_timer(ddata); - if (error == -EBUSY) + if (error == -ENXIO) ddata->reserved = true; + else if (error) + return error; error = sysc_get_clocks(ddata); if (error) -- cgit From 017f5fb9ce793e3558db94ee72068622bc0b79db Mon Sep 17 00:00:00 2001 From: Andrew Delgadillo Date: Tue, 10 Aug 2021 23:17:55 +0000 Subject: arm64: clean vdso & vdso32 files commit a5b8ca97fbf8 ("arm64: do not descend to vdso directories twice") changes the cleaning behavior of arm64's vdso files, in that vdso.lds, vdso.so, and vdso.so.dbg are not removed upon a 'make clean/mrproper': $ make defconfig ARCH=arm64 $ make ARCH=arm64 $ make mrproper ARCH=arm64 $ git clean -nxdf Would remove arch/arm64/kernel/vdso/vdso.lds Would remove arch/arm64/kernel/vdso/vdso.so Would remove arch/arm64/kernel/vdso/vdso.so.dbg To remedy this, manually descend into arch/arm64/kernel/vdso upon cleaning. After this commit: $ make defconfig ARCH=arm64 $ make ARCH=arm64 $ make mrproper ARCH=arm64 $ git clean -nxdf Similar results are obtained for the vdso32 equivalent. Signed-off-by: Andrew Delgadillo Cc: stable@vger.kernel.org Fixes: a5b8ca97fbf8 ("arm64: do not descend to vdso directories twice") Link: https://lore.kernel.org/r/20210810231755.1743524-1-adelg@google.com Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7b668db43261..1110d386f3b4 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -183,6 +183,8 @@ endif # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) + $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso + $(Q)$(MAKE) $(clean)=arch/arm64/kernel/vdso32 ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. -- cgit From f7ad318ea0ad58ebe0e595e59aed270bb643b29b Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Wed, 28 Jul 2021 21:07:56 +0800 Subject: vhost: Fix the calculation in vhost_overflow() This fixes the incorrect calculation for integer overflow when the last address of iova range is 0xffffffff. Fixes: ec33d031a14b ("vhost: detect 32 bit integer wrap around") Reported-by: Jason Wang Signed-off-by: Xie Yongji Acked-by: Jason Wang Link: https://lore.kernel.org/r/20210728130756.97-2-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index b9e853e6094d..59edb5a1ffe2 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -735,10 +735,16 @@ static bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); } +/* Make sure 64 bit math will not overflow. */ static bool vhost_overflow(u64 uaddr, u64 size) { - /* Make sure 64 bit math will not overflow. */ - return uaddr > ULONG_MAX || size > ULONG_MAX || uaddr > ULONG_MAX - size; + if (uaddr > ULONG_MAX || size > ULONG_MAX) + return true; + + if (!size) + return false; + + return uaddr > ULONG_MAX - size + 1; } /* Caller should have vq mutex and device mutex. */ -- cgit From 2b847f21145d84e2e1dde99d3e2c00a5468f02e4 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 15 Jul 2021 16:00:23 +0800 Subject: vdpa_sim: Fix return value check for vdpa_alloc_device() The vdpa_alloc_device() returns an error pointer upon failure, not NULL. To handle the failure correctly, this replaces NULL check with IS_ERR() check and propagate the error upwards. Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") Reported-by: Dan Carpenter Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210715080026.242-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 14e024de5cbf..c621cf7feec0 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -251,8 +251,10 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, dev_attr->name); - if (!vdpasim) + if (IS_ERR(vdpasim)) { + ret = PTR_ERR(vdpasim); goto err_alloc; + } vdpasim->dev_attr = *dev_attr; INIT_WORK(&vdpasim->work, dev_attr->work_fn); -- cgit From 9632e78e82648aa98340df78eab9106f63da151e Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 15 Jul 2021 16:00:24 +0800 Subject: vp_vdpa: Fix return value check for vdpa_alloc_device() The vdpa_alloc_device() returns an error pointer upon failure, not NULL. To handle the failure correctly, this replaces NULL check with IS_ERR() check and propagate the error upwards. Fixes: 64b9f64f80a6 ("vdpa: introduce virtio pci driver") Reported-by: Dan Carpenter Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210715080026.242-2-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vdpa/virtio_pci/vp_vdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 7b4a6396c553..fe0527329857 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -436,9 +436,9 @@ static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa, dev, &vp_vdpa_ops, NULL); - if (vp_vdpa == NULL) { + if (IS_ERR(vp_vdpa)) { dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n"); - return -ENOMEM; + return PTR_ERR(vp_vdpa); } mdev = &vp_vdpa->mdev; -- cgit From 1057afa0121db8bd3ca4718c8e0ca12388ab7759 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 15 Jul 2021 16:00:25 +0800 Subject: vDPA/ifcvf: Fix return value check for vdpa_alloc_device() The vdpa_alloc_device() returns an error pointer upon failure, not NULL. To handle the failure correctly, this replaces NULL check with IS_ERR() check and propagate the error upwards. Fixes: 5a2414bc454e ("virtio: Intel IFC VF driver for VDPA") Reported-by: Dan Carpenter Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210715080026.242-3-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/vdpa/ifcvf/ifcvf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 21b78f1cd521..351c6cfb24c3 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -493,9 +493,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa, dev, &ifc_vdpa_ops, NULL); - if (adapter == NULL) { + if (IS_ERR(adapter)) { IFCVF_ERR(pdev, "Failed to allocate vDPA structure"); - return -ENOMEM; + return PTR_ERR(adapter); } pci_set_master(pdev); -- cgit From c8d182bd387a09a8b95303c8086238e8bf61fcfc Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Thu, 15 Jul 2021 16:00:26 +0800 Subject: vdpa: Add documentation for vdpa_alloc_device() macro The return value of vdpa_alloc_device() macro is not very clear, so that most of callers did the wrong check. Let's add some comments to better document it. Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210715080026.242-4-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- include/linux/vdpa.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 3357ac98878d..8cfe49d201dd 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -277,6 +277,17 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, const struct vdpa_config_ops *config, size_t size, const char *name); +/** + * vdpa_alloc_device - allocate and initilaize a vDPA device + * + * @dev_struct: the type of the parent structure + * @member: the name of struct vdpa_device within the @dev_struct + * @parent: the parent device + * @config: the bus operations that is supported by this device + * @name: name of the vdpa device + * + * Return allocated data structure or ERR_PTR upon error + */ #define vdpa_alloc_device(dev_struct, member, parent, config, name) \ container_of(__vdpa_alloc_device( \ parent, config, \ -- cgit From cb5d2c1f6cc0e5769099a7d44b9d08cf58cae206 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 1 Jul 2021 13:46:52 +0200 Subject: virtio_vdpa: reject invalid vq indices Do not call vDPA drivers' callbacks with vq indicies larger than what the drivers indicate that they support. vDPA drivers do not bounds check the indices. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20210701114652.21956-1-vincent.whitchurch@axis.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella --- drivers/virtio/virtio_vdpa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index e1a141135992..72eaef2caeb1 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -151,6 +151,9 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (!name) return NULL; + if (index >= vdpa->nvqs) + return ERR_PTR(-ENOENT); + /* Queue shouldn't already be set up. */ if (ops->get_vq_ready(vdpa, index)) return ERR_PTR(-ENOENT); -- cgit From e74cfa91f42c50f7f649b0eca46aa049754ccdbd Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 25 Jun 2021 08:55:02 +0530 Subject: vringh: Use wiov->used to check for read/write desc order As __vringh_iov() traverses a descriptor chain, it populates each descriptor entry into either read or write vring iov and increments that iov's ->used member. So, as we iterate over a descriptor chain, at any point, (riov/wriov)->used value gives the number of descriptor enteries available, which are to be read or written by the device. As all read iovs must precede the write iovs, wiov->used should be zero when we are traversing a read descriptor. Current code checks for wiov->i, to figure out whether any previous entry in the current descriptor chain was a write descriptor. However, iov->i is only incremented, when these vring iovs are consumed, at a later point, and remain 0 in __vringh_iov(). So, correct the check for read and write descriptor order, to use wiov->used. Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Signed-off-by: Neeraj Upadhyay Link: https://lore.kernel.org/r/1624591502-4827-1-git-send-email-neeraju@codeaurora.org Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vringh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 4af8fa259d65..14e2043d7685 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -359,7 +359,7 @@ __vringh_iov(struct vringh *vrh, u16 i, iov = wiov; else { iov = riov; - if (unlikely(wiov && wiov->i)) { + if (unlikely(wiov && wiov->used)) { vringh_bad("Readable desc %p after writable", &descs[i]); err = -EINVAL; -- cgit From 82e89ea077b93b3c131fa175b0df3acb5b1d5cdf Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Mon, 9 Aug 2021 18:16:09 +0800 Subject: virtio-blk: Add validation for block size in config space An untrusted device might presents an invalid block size in configuration space. This tries to add validation for it in the validate callback and clear the VIRTIO_BLK_F_BLK_SIZE feature bit if the value is out of the supported range. And we also double check the value in virtblk_probe() in case that it's changed after the validation. Signed-off-by: Xie Yongji Link: https://lore.kernel.org/r/20210809101609.148-1-xieyongji@bytedance.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/block/virtio_blk.c | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4b49df2dfd23..afb37aac09e8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -692,6 +692,28 @@ static const struct blk_mq_ops virtio_mq_ops = { static unsigned int virtblk_queue_depth; module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); +static int virtblk_validate(struct virtio_device *vdev) +{ + u32 blk_size; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + if (!virtio_has_feature(vdev, VIRTIO_BLK_F_BLK_SIZE)) + return 0; + + blk_size = virtio_cread32(vdev, + offsetof(struct virtio_blk_config, blk_size)); + + if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) + __virtio_clear_bit(vdev, VIRTIO_BLK_F_BLK_SIZE); + + return 0; +} + static int virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -703,12 +725,6 @@ static int virtblk_probe(struct virtio_device *vdev) u8 physical_block_exp, alignment_offset; unsigned int queue_depth; - if (!vdev->config->get) { - dev_err(&vdev->dev, "%s failure: config access disabled\n", - __func__); - return -EINVAL; - } - err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), GFP_KERNEL); if (err < 0) @@ -823,6 +839,14 @@ static int virtblk_probe(struct virtio_device *vdev) else blk_size = queue_logical_block_size(q); + if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) { + dev_err(&vdev->dev, + "block size is changed unexpectedly, now is %u\n", + blk_size); + err = -EINVAL; + goto err_cleanup_disk; + } + /* Use topology information if available */ err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, struct virtio_blk_config, physical_block_exp, @@ -881,6 +905,8 @@ static int virtblk_probe(struct virtio_device *vdev) device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; +err_cleanup_disk: + blk_cleanup_disk(vblk->disk); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_free_vq: @@ -983,6 +1009,7 @@ static struct virtio_driver virtio_blk = { .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, + .validate = virtblk_validate, .probe = virtblk_probe, .remove = virtblk_remove, .config_changed = virtblk_config_changed, -- cgit From ea2f6af16532511eb1cd8eb62845c37861f24ce8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 10 Aug 2021 12:25:05 -0400 Subject: vringh: pull in spinlock header we use a spinlock now pull in the correct header to make vring.h self sufficient. Signed-off-by: Michael S. Tsirkin --- include/linux/vringh.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 84db7b8f912f..212892cf9822 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -14,6 +14,7 @@ #include #include #include +#include #if IS_REACHABLE(CONFIG_VHOST_IOTLB) #include #include -- cgit From f8ce72632fa7ed286cc9a62c35e279330a14d3e0 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 10 Aug 2021 12:26:05 -0400 Subject: virtio_ring: pull in spinlock header we use a spinlock now pull in the correct header to make virtio_ring.c self sufficient. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index c2aaa0eff6df..dd95dfd85e98 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #ifdef DEBUG -- cgit From a24ce06c70fe7df795a846ad713ccaa9b56a7666 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 10 Aug 2021 12:26:05 -0400 Subject: tools/virtio: fix build We use a spinlock now so add a stub. Ignore bogus uninitialized variable warnings. Signed-off-by: Michael S. Tsirkin --- tools/virtio/Makefile | 3 ++- tools/virtio/linux/spinlock.h | 56 +++++++++++++++++++++++++++++++++++++++++++ tools/virtio/linux/virtio.h | 2 ++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tools/virtio/linux/spinlock.h diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index b587b9a7a124..0d7bbe49359d 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -4,7 +4,8 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +LDFLAGS += -lpthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} diff --git a/tools/virtio/linux/spinlock.h b/tools/virtio/linux/spinlock.h new file mode 100644 index 000000000000..028e3cdcc5d3 --- /dev/null +++ b/tools/virtio/linux/spinlock.h @@ -0,0 +1,56 @@ +#ifndef SPINLOCK_H_STUB +#define SPINLOCK_H_STUB + +#include + +typedef pthread_spinlock_t spinlock_t; + +static inline void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static inline void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static inline void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 5d90254ddae4..363b98228301 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -3,6 +3,7 @@ #define LINUX_VIRTIO_H #include #include +#include struct device { void *parent; @@ -12,6 +13,7 @@ struct virtio_device { struct device dev; u64 features; struct list_head vqs; + spinlock_t vqs_list_lock; }; struct virtqueue { -- cgit From 08dbd5660232bede7916d8568003012c1182cc9a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 11 Aug 2021 08:37:13 +0300 Subject: vdpa/mlx5: Avoid destroying MR on empty iotlb The current code treats an empty iotlb provdied in set_map() as a special case and destroy the memory region object. This must not be done since the virtqueue objects reference this MR. Doing so will cause the driver unload to emit errors and log timeouts caused by the firmware complaining on busy resources. This patch treats an empty iotlb as any other change of mapping. In this case, mlx5_vdpa_create_mr() will fail and the entire set_map() call to fail. This issue has not been encountered before but was seen to occur in a non-official version of qemu. Since qemu is a userspace program, the driver must protect against such case. Fixes: 94abbccdf291 ("vdpa/mlx5: Add shared memory registration code") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20210811053713.66658-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/core/mr.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index dcee6039e966..e59135fa867e 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -512,11 +512,6 @@ out: mutex_unlock(&mr->mkey_mtx); } -static bool map_empty(struct vhost_iotlb *iotlb) -{ - return !vhost_iotlb_itree_first(iotlb, 0, U64_MAX); -} - int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, bool *change_map) { @@ -524,10 +519,6 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io int err = 0; *change_map = false; - if (map_empty(iotlb)) { - mlx5_vdpa_destroy_mr(mvdev); - return 0; - } mutex_lock(&mr->mkey_mtx); if (mr->initialized) { mlx5_vdpa_info(mvdev, "memory map update\n"); -- cgit From 879753c816dbbdb2a9a395aa4448d29feee92d1a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 11 Aug 2021 08:37:59 +0300 Subject: vdpa/mlx5: Fix queue type selection logic get_queue_type() comments that splict virtqueue is preferred, however, the actual logic preferred packed virtqueues. Since firmware has not supported packed virtqueues we ended up using split virtqueues as was desired. Since we do not advertise support for packed virtqueues, we add a check to verify split virtqueues are indeed supported. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20210811053759.66752-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 14 ++++++++++---- include/linux/mlx5/mlx5_ifc_vdpa.h | 10 ++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2a31467f7ac5..b1230fa2f5d1 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -753,12 +753,12 @@ static int get_queue_type(struct mlx5_vdpa_net *ndev) type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type); /* prefer split queue */ - if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED) - return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; + if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT) + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; - WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)); + WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED)); - return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT; + return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED; } static bool vq_is_tx(u16 idx) @@ -2030,6 +2030,12 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) return -ENOSPC; mdev = mgtdev->madev->mdev; + if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) & + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) { + dev_warn(mdev->device, "missing support for split virtqueues\n"); + return -EOPNOTSUPP; + } + /* we save one virtqueue for control virtqueue should we require it */ max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues); max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS); diff --git a/include/linux/mlx5/mlx5_ifc_vdpa.h b/include/linux/mlx5/mlx5_ifc_vdpa.h index 98b56b75c625..1a9c9d94cb59 100644 --- a/include/linux/mlx5/mlx5_ifc_vdpa.h +++ b/include/linux/mlx5/mlx5_ifc_vdpa.h @@ -11,13 +11,15 @@ enum { }; enum { - MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = 0x1, // do I check this caps? - MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = 0x2, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, + MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, }; enum { - MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT = 0, - MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED = 1, + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT = + BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT), + MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED = + BIT(MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED), }; struct mlx5_ifc_virtio_q_bits { -- cgit From 31697ef7f3f45293bba3da87bcc710953e97fc3e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 6 Aug 2021 09:43:11 +0900 Subject: pinctrl: k210: Fix k210_fpioa_probe() In k210_fpioa_probe(), add missing calls to clk_disable_unprepare() in case of error after cenabling the clk and pclk clocks. Also add missing error handling when enabling pclk. Reported-by: kernel test robot Reported-by: Dan Carpenter Fixes: d4c34d09ab03 ("pinctrl: Add RISC-V Canaan Kendryte K210 FPIOA driver") Cc: Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210806004311.52859-1-damien.lemoal@wdc.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-k210.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c index f831526d06ff..49e32684dbb2 100644 --- a/drivers/pinctrl/pinctrl-k210.c +++ b/drivers/pinctrl/pinctrl-k210.c @@ -950,23 +950,37 @@ static int k210_fpioa_probe(struct platform_device *pdev) return ret; pdata->pclk = devm_clk_get_optional(dev, "pclk"); - if (!IS_ERR(pdata->pclk)) - clk_prepare_enable(pdata->pclk); + if (!IS_ERR(pdata->pclk)) { + ret = clk_prepare_enable(pdata->pclk); + if (ret) + goto disable_clk; + } pdata->sysctl_map = syscon_regmap_lookup_by_phandle_args(np, "canaan,k210-sysctl-power", 1, &pdata->power_offset); - if (IS_ERR(pdata->sysctl_map)) - return PTR_ERR(pdata->sysctl_map); + if (IS_ERR(pdata->sysctl_map)) { + ret = PTR_ERR(pdata->sysctl_map); + goto disable_pclk; + } k210_fpioa_init_ties(pdata); pdata->pctl = pinctrl_register(&k210_pinctrl_desc, dev, (void *)pdata); - if (IS_ERR(pdata->pctl)) - return PTR_ERR(pdata->pctl); + if (IS_ERR(pdata->pctl)) { + ret = PTR_ERR(pdata->pctl); + goto disable_pclk; + } return 0; + +disable_pclk: + clk_disable_unprepare(pdata->pclk); +disable_clk: + clk_disable_unprepare(pdata->clk); + + return ret; } static const struct of_device_id k210_fpioa_dt_ids[] = { -- cgit From 2d3a1e3615c5449a4583010f41a6f824a4ffa03e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 10 Aug 2021 16:05:37 -0700 Subject: bpf: Add rcu_read_lock in bpf_get_current_[ancestor_]cgroup_id() helpers Currently, if bpf_get_current_cgroup_id() or bpf_get_current_ancestor_cgroup_id() helper is called with sleepable programs e.g., sleepable fentry/fmod_ret/fexit/lsm programs, a rcu warning may appear. For example, if I added the following hack to test_progs/test_lsm sleepable fentry program test_sys_setdomainname: --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -168,6 +168,10 @@ int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) int buf = 0; long ret; + __u64 cg_id = bpf_get_current_cgroup_id(); + if (cg_id == 1000) + copy_test++; + ret = bpf_copy_from_user(&buf, sizeof(buf), ptr); if (len == -2 && ret == 0 && buf == 1234) copy_test++; I will hit the following rcu warning: include/linux/cgroup.h:481 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by test_progs/260: #0: ffffffffa5173360 (rcu_read_lock_trace){....}-{0:0}, at: __bpf_prog_enter_sleepable+0x0/0xa0 stack backtrace: CPU: 1 PID: 260 Comm: test_progs Tainted: G O 5.14.0-rc2+ #176 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x56/0x7b bpf_get_current_cgroup_id+0x9c/0xb1 bpf_prog_a29888d1c6706e09_test_sys_setdomainname+0x3e/0x89c bpf_trampoline_6442469132_0+0x2d/0x1000 __x64_sys_setdomainname+0x5/0x110 do_syscall_64+0x3a/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae I can get similar warning using bpf_get_current_ancestor_cgroup_id() helper. syzbot reported a similar issue in [1] for syscall program. Helper bpf_get_current_cgroup_id() or bpf_get_current_ancestor_cgroup_id() has the following callchain: task_dfl_cgroup task_css_set task_css_set_check and we have #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ lockdep_is_held(&cgroup_mutex) || \ lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) Since cgroup_mutex/css_set_lock is not held and the task is not existing and rcu read_lock is not held, a warning will be issued. Note that bpf sleepable program is protected by rcu_read_lock_trace(). The above sleepable bpf programs are already protected by migrate_disable(). Adding rcu_read_lock() in these two helpers will silence the above warning. I marked the patch fixing 95b861a7935b ("bpf: Allow bpf_get_current_ancestor_cgroup_id for tracing") which added bpf_get_current_ancestor_cgroup_id() to tracing programs in 5.14. I think backporting 5.14 is probably good enough as sleepable progrems are not widely used. This patch should fix [1] as well since syscall program is a sleepable program protected with migrate_disable(). [1] https://lore.kernel.org/bpf/0000000000006d5cab05c7d9bb87@google.com/ Fixes: 95b861a7935b ("bpf: Allow bpf_get_current_ancestor_cgroup_id for tracing") Reported-by: syzbot+7ee5c2c09c284495371f@syzkaller.appspotmail.com Signed-off-by: Yonghong Song Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210810230537.2864668-1-yhs@fb.com --- kernel/bpf/helpers.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 7a97b2f4747d..55f83ea09dae 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -353,9 +353,15 @@ const struct bpf_func_proto bpf_jiffies64_proto = { #ifdef CONFIG_CGROUPS BPF_CALL_0(bpf_get_current_cgroup_id) { - struct cgroup *cgrp = task_dfl_cgroup(current); + struct cgroup *cgrp; + u64 cgrp_id; - return cgroup_id(cgrp); + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); + cgrp_id = cgroup_id(cgrp); + rcu_read_unlock(); + + return cgrp_id; } const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { @@ -366,13 +372,17 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = { BPF_CALL_1(bpf_get_current_ancestor_cgroup_id, int, ancestor_level) { - struct cgroup *cgrp = task_dfl_cgroup(current); + struct cgroup *cgrp; struct cgroup *ancestor; + u64 cgrp_id; + rcu_read_lock(); + cgrp = task_dfl_cgroup(current); ancestor = cgroup_ancestor(cgrp, ancestor_level); - if (!ancestor) - return 0; - return cgroup_id(ancestor); + cgrp_id = ancestor ? cgroup_id(ancestor) : 0; + rcu_read_unlock(); + + return cgrp_id; } const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { -- cgit From b4d8a58f8dcfcc890f296696cadb76e77be44b5f Mon Sep 17 00:00:00 2001 From: Hsuan-Chi Kuo Date: Thu, 4 Mar 2021 17:37:08 -0600 Subject: seccomp: Fix setting loaded filter count during TSYNC The desired behavior is to set the caller's filter count to thread's. This value is reported via /proc, so this fixes the inaccurate count exposed to userspace; it is not used for reference counting, etc. Signed-off-by: Hsuan-Chi Kuo Link: https://lore.kernel.org/r/20210304233708.420597-1-hsuanchikuo@gmail.com Co-developed-by: Wiktor Garbacz Signed-off-by: Wiktor Garbacz Link: https://lore.kernel.org/lkml/20210810125158.329849-1-wiktorg@google.com Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Fixes: c818c03b661c ("seccomp: Report number of loaded filters in /proc/$pid/status") --- kernel/seccomp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 057e17f3215d..6469eca8078c 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -602,7 +602,7 @@ static inline void seccomp_sync_threads(unsigned long flags) smp_store_release(&thread->seccomp.filter, caller->seccomp.filter); atomic_set(&thread->seccomp.filter_count, - atomic_read(&thread->seccomp.filter_count)); + atomic_read(&caller->seccomp.filter_count)); /* * Don't let an unprivileged task work around -- cgit From b93dfa6bda4d4e88e5386490f2b277a26958f9d3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 11 Aug 2021 11:53:37 -0700 Subject: ACPI: NFIT: Fix support for virtual SPA ranges Fix the NFIT parsing code to treat a 0 index in a SPA Range Structure as a special case and not match Region Mapping Structures that use 0 to indicate that they are not mapped. Without this fix some platform BIOS descriptions of "virtual disk" ranges do not result in the pmem driver attaching to the range. Details: In addition to typical persistent memory ranges, the ACPI NFIT may also convey "virtual" ranges. These ranges are indicated by a UUID in the SPA Range Structure of UUID_VOLATILE_VIRTUAL_DISK, UUID_VOLATILE_VIRTUAL_CD, UUID_PERSISTENT_VIRTUAL_DISK, or UUID_PERSISTENT_VIRTUAL_CD. The critical difference between virtual ranges and UUID_PERSISTENT_MEMORY, is that virtual do not support associations with Region Mapping Structures. For this reason the "index" value of virtual SPA Range Structures is allowed to be 0. If a platform BIOS decides to represent NVDIMMs with disconnected "Region Mapping Structures" (range-index == 0), the kernel may falsely associate them with standalone ranges where the "SPA Range Structure Index" is also zero. When this happens the driver may falsely require labels where "virtual disks" are expected to be label-less. I.e. "label-less" is where the namespace-range == region-range and the pmem driver attaches with no user action to create a namespace. Cc: Jacek Zloch Cc: Lukasz Sobieraj Cc: "Lee, Chun-Yi" Cc: Fixes: c2f32acdf848 ("acpi, nfit: treat virtual ramdisk SPA as pmem region") Reported-by: Krzysztof Rusocki Reported-by: Damian Bassa Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/162870796589.2521182.1240403310175570220.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 23d9a09d7060..a3ef6cce644c 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3021,6 +3021,9 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; struct nd_mapping_desc *mapping; + /* range index 0 == unmapped in SPA or invalid-SPA */ + if (memdev->range_index == 0 || spa->range_index == 0) + continue; if (memdev->range_index != spa->range_index) continue; if (count >= ND_MAX_MAPPINGS) { -- cgit From d9cee9f85b22fab88d2b76d2e92b18e3d0e6aa8c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 30 Jul 2021 09:46:04 -0700 Subject: libnvdimm/region: Fix label activation vs errors There are a few scenarios where init_active_labels() can return without registering deactivate_labels() to run when the region is disabled. In particular label error injection creates scenarios where a DIMM is disabled, but labels on other DIMMs in the region become activated. Arrange for init_active_labels() to always register deactivate_labels(). Reported-by: Krzysztof Kensicki Cc: Fixes: bf9bccc14c05 ("libnvdimm: pmem label sets and namespace instantiation.") Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/162766356450.3223041.1183118139023841447.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 2403b71b601e..745478213ff2 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2527,7 +2527,7 @@ static void deactivate_labels(void *region) static int init_active_labels(struct nd_region *nd_region) { - int i; + int i, rc = 0; for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; @@ -2546,13 +2546,14 @@ static int init_active_labels(struct nd_region *nd_region) else if (test_bit(NDD_LABELING, &nvdimm->flags)) /* fail, labels needed to disambiguate dpa */; else - return 0; + continue; dev_err(&nd_region->dev, "%s: is %s, failing probe\n", dev_name(&nd_mapping->nvdimm->dev), test_bit(NDD_LOCKED, &nvdimm->flags) ? "locked" : "disabled"); - return -ENXIO; + rc = -ENXIO; + goto out; } nd_mapping->ndd = ndd; atomic_inc(&nvdimm->busy); @@ -2586,13 +2587,17 @@ static int init_active_labels(struct nd_region *nd_region) break; } - if (i < nd_region->ndr_mappings) { + if (i < nd_region->ndr_mappings) + rc = -ENOMEM; + +out: + if (rc) { deactivate_labels(nd_region); - return -ENOMEM; + return rc; } return devm_add_action_or_reset(&nd_region->dev, deactivate_labels, - nd_region); + nd_region); } int nd_region_register_namespaces(struct nd_region *nd_region, int *err) -- cgit From f21453b0ff6e307bfd59e7a126d9848cea25315c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 30 Jul 2021 13:00:20 -0700 Subject: tools/testing/nvdimm: Fix missing 'fallthrough' warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use "fallthrough;" to address: tools/testing/nvdimm/test/nfit.c: In function ‘nd_intel_test_finish_query’: tools/testing/nvdimm/test/nfit.c:436:37: warning: this statement may fall through [-Wimplicit-fallthrough=] 436 | fw->missed_activate = false; | ~~~~~~~~~~~~~~~~~~~~^~~~~~~ tools/testing/nvdimm/test/nfit.c:438:9: note: here 438 | case FW_STATE_UPDATED: | ^~~~ Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/162767522046.3313209.14767278726893995797.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 54f367cbadae..b1bff5fb0f65 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -434,7 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t, dev_dbg(dev, "%s: transition out verify\n", __func__); fw->state = FW_STATE_UPDATED; fw->missed_activate = false; - /* fall through */ + fallthrough; case FW_STATE_UPDATED: nd_cmd->status = 0; /* bogus test version */ -- cgit From 848378812e40152abe9b9baf58ce2004f76fb988 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 30 Jul 2021 19:31:08 -0700 Subject: vmlinux.lds.h: Handle clang's module.{c,d}tor sections A recent change in LLVM causes module_{c,d}tor sections to appear when CONFIG_K{A,C}SAN are enabled, which results in orphan section warnings because these are not handled anywhere: ld.lld: warning: arch/x86/pci/built-in.a(legacy.o):(.text.asan.module_ctor) is being placed in '.text.asan.module_ctor' ld.lld: warning: arch/x86/pci/built-in.a(legacy.o):(.text.asan.module_dtor) is being placed in '.text.asan.module_dtor' ld.lld: warning: arch/x86/pci/built-in.a(legacy.o):(.text.tsan.module_ctor) is being placed in '.text.tsan.module_ctor' Fangrui explains: "the function asan.module_ctor has the SHF_GNU_RETAIN flag, so it is in a separate section even with -fno-function-sections (default)". Place them in the TEXT_TEXT section so that these technologies continue to work with the newer compiler versions. All of the KASAN and KCSAN KUnit tests continue to pass after this change. Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/1432 Link: https://github.com/llvm/llvm-project/commit/7b789562244ee941b7bf2cefeb3fc08a59a01865 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Reviewed-by: Fangrui Song Acked-by: Marco Elver Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210731023107.1932981-1-nathan@kernel.org --- include/asm-generic/vmlinux.lds.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 17325416e2de..62669b36a772 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -586,6 +586,7 @@ NOINSTR_TEXT \ *(.text..refcount) \ *(.ref.text) \ + *(.text.asan.* .text.tsan.*) \ TEXT_CFI_JT \ MEM_KEEP(init.text*) \ MEM_KEEP(exit.text*) \ -- cgit From 0f78399551146bfbed357759e2ad5abb8d39e50a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 11 Aug 2021 07:41:45 -1000 Subject: Revert "block/mq-deadline: Add cgroup support" This reverts commit 08a9ad8bf607 ("block/mq-deadline: Add cgroup support") and a follow-up commit c06bc5a3fb42 ("block/mq-deadline: Remove a WARN_ON_ONCE() call"). The added cgroup support has the following issues: * It breaks cgroup interface file format rule by adding custom elements to a nested key-value file. * It registers mq-deadline as a cgroup-aware policy even though all it's doing is collecting per-cgroup stats. Even if we need these stats, this isn't the right way to add them. * It hasn't been reviewed from cgroup side. Cc: Bart Van Assche Cc: Jens Axboe Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/Kconfig.iosched | 6 - block/Makefile | 2 - block/mq-deadline-cgroup.c | 126 ----- block/mq-deadline-cgroup.h | 114 ----- block/mq-deadline-main.c | 1175 -------------------------------------------- block/mq-deadline.c | 1130 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 1130 insertions(+), 1423 deletions(-) delete mode 100644 block/mq-deadline-cgroup.c delete mode 100644 block/mq-deadline-cgroup.h delete mode 100644 block/mq-deadline-main.c create mode 100644 block/mq-deadline.c diff --git a/block/Kconfig.iosched b/block/Kconfig.iosched index 64053d67a97b..2f2158e05a91 100644 --- a/block/Kconfig.iosched +++ b/block/Kconfig.iosched @@ -9,12 +9,6 @@ config MQ_IOSCHED_DEADLINE help MQ version of the deadline IO scheduler. -config MQ_IOSCHED_DEADLINE_CGROUP - tristate - default y - depends on MQ_IOSCHED_DEADLINE - depends on BLK_CGROUP - config MQ_IOSCHED_KYBER tristate "Kyber I/O scheduler" default y diff --git a/block/Makefile b/block/Makefile index bfbe4e13ca1e..1e1afa10f869 100644 --- a/block/Makefile +++ b/block/Makefile @@ -22,8 +22,6 @@ obj-$(CONFIG_BLK_CGROUP_IOPRIO) += blk-ioprio.o obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o -mq-deadline-y += mq-deadline-main.o -mq-deadline-$(CONFIG_MQ_IOSCHED_DEADLINE_CGROUP)+= mq-deadline-cgroup.o obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o diff --git a/block/mq-deadline-cgroup.c b/block/mq-deadline-cgroup.c deleted file mode 100644 index 3b4bfddec39f..000000000000 --- a/block/mq-deadline-cgroup.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include -#include - -#include "mq-deadline-cgroup.h" - -static struct blkcg_policy dd_blkcg_policy; - -static struct blkcg_policy_data *dd_cpd_alloc(gfp_t gfp) -{ - struct dd_blkcg *pd; - - pd = kzalloc(sizeof(*pd), gfp); - if (!pd) - return NULL; - pd->stats = alloc_percpu_gfp(typeof(*pd->stats), - GFP_KERNEL | __GFP_ZERO); - if (!pd->stats) { - kfree(pd); - return NULL; - } - return &pd->cpd; -} - -static void dd_cpd_free(struct blkcg_policy_data *cpd) -{ - struct dd_blkcg *dd_blkcg = container_of(cpd, typeof(*dd_blkcg), cpd); - - free_percpu(dd_blkcg->stats); - kfree(dd_blkcg); -} - -static struct dd_blkcg *dd_blkcg_from_pd(struct blkg_policy_data *pd) -{ - return container_of(blkcg_to_cpd(pd->blkg->blkcg, &dd_blkcg_policy), - struct dd_blkcg, cpd); -} - -/* - * Convert an association between a block cgroup and a request queue into a - * pointer to the mq-deadline information associated with a (blkcg, queue) pair. - */ -struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio) -{ - struct blkg_policy_data *pd; - - pd = blkg_to_pd(bio->bi_blkg, &dd_blkcg_policy); - if (!pd) - return NULL; - - return dd_blkcg_from_pd(pd); -} - -static size_t dd_pd_stat(struct blkg_policy_data *pd, char *buf, size_t size) -{ - static const char *const prio_class_name[] = { - [IOPRIO_CLASS_NONE] = "NONE", - [IOPRIO_CLASS_RT] = "RT", - [IOPRIO_CLASS_BE] = "BE", - [IOPRIO_CLASS_IDLE] = "IDLE", - }; - struct dd_blkcg *blkcg = dd_blkcg_from_pd(pd); - int res = 0; - u8 prio; - - for (prio = 0; prio < ARRAY_SIZE(blkcg->stats->stats); prio++) - res += scnprintf(buf + res, size - res, - " [%s] dispatched=%u inserted=%u merged=%u", - prio_class_name[prio], - ddcg_sum(blkcg, dispatched, prio) + - ddcg_sum(blkcg, merged, prio) - - ddcg_sum(blkcg, completed, prio), - ddcg_sum(blkcg, inserted, prio) - - ddcg_sum(blkcg, completed, prio), - ddcg_sum(blkcg, merged, prio)); - - return res; -} - -static struct blkg_policy_data *dd_pd_alloc(gfp_t gfp, struct request_queue *q, - struct blkcg *blkcg) -{ - struct dd_blkg *pd; - - pd = kzalloc(sizeof(*pd), gfp); - if (!pd) - return NULL; - return &pd->pd; -} - -static void dd_pd_free(struct blkg_policy_data *pd) -{ - struct dd_blkg *dd_blkg = container_of(pd, typeof(*dd_blkg), pd); - - kfree(dd_blkg); -} - -static struct blkcg_policy dd_blkcg_policy = { - .cpd_alloc_fn = dd_cpd_alloc, - .cpd_free_fn = dd_cpd_free, - - .pd_alloc_fn = dd_pd_alloc, - .pd_free_fn = dd_pd_free, - .pd_stat_fn = dd_pd_stat, -}; - -int dd_activate_policy(struct request_queue *q) -{ - return blkcg_activate_policy(q, &dd_blkcg_policy); -} - -void dd_deactivate_policy(struct request_queue *q) -{ - blkcg_deactivate_policy(q, &dd_blkcg_policy); -} - -int __init dd_blkcg_init(void) -{ - return blkcg_policy_register(&dd_blkcg_policy); -} - -void __exit dd_blkcg_exit(void) -{ - blkcg_policy_unregister(&dd_blkcg_policy); -} diff --git a/block/mq-deadline-cgroup.h b/block/mq-deadline-cgroup.h deleted file mode 100644 index 0143fd74f3ce..000000000000 --- a/block/mq-deadline-cgroup.h +++ /dev/null @@ -1,114 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -#if !defined(_MQ_DEADLINE_CGROUP_H_) -#define _MQ_DEADLINE_CGROUP_H_ - -#include - -struct request_queue; - -/** - * struct io_stats_per_prio - I/O statistics per I/O priority class. - * @inserted: Number of inserted requests. - * @merged: Number of merged requests. - * @dispatched: Number of dispatched requests. - * @completed: Number of I/O completions. - */ -struct io_stats_per_prio { - local_t inserted; - local_t merged; - local_t dispatched; - local_t completed; -}; - -/* I/O statistics per I/O cgroup per I/O priority class (IOPRIO_CLASS_*). */ -struct blkcg_io_stats { - struct io_stats_per_prio stats[4]; -}; - -/** - * struct dd_blkcg - Per cgroup data. - * @cpd: blkcg_policy_data structure. - * @stats: I/O statistics. - */ -struct dd_blkcg { - struct blkcg_policy_data cpd; /* must be the first member */ - struct blkcg_io_stats __percpu *stats; -}; - -/* - * Count one event of type 'event_type' and with I/O priority class - * 'prio_class'. - */ -#define ddcg_count(ddcg, event_type, prio_class) do { \ -if (ddcg) { \ - struct blkcg_io_stats *io_stats = get_cpu_ptr((ddcg)->stats); \ - \ - BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \ - BUILD_BUG_ON(!__same_type((prio_class), u8)); \ - local_inc(&io_stats->stats[(prio_class)].event_type); \ - put_cpu_ptr(io_stats); \ -} \ -} while (0) - -/* - * Returns the total number of ddcg_count(ddcg, event_type, prio_class) calls - * across all CPUs. No locking or barriers since it is fine if the returned - * sum is slightly outdated. - */ -#define ddcg_sum(ddcg, event_type, prio) ({ \ - unsigned int cpu; \ - u32 sum = 0; \ - \ - BUILD_BUG_ON(!__same_type((ddcg), struct dd_blkcg *)); \ - BUILD_BUG_ON(!__same_type((prio), u8)); \ - for_each_present_cpu(cpu) \ - sum += local_read(&per_cpu_ptr((ddcg)->stats, cpu)-> \ - stats[(prio)].event_type); \ - sum; \ -}) - -#ifdef CONFIG_BLK_CGROUP - -/** - * struct dd_blkg - Per (cgroup, request queue) data. - * @pd: blkg_policy_data structure. - */ -struct dd_blkg { - struct blkg_policy_data pd; /* must be the first member */ -}; - -struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio); -int dd_activate_policy(struct request_queue *q); -void dd_deactivate_policy(struct request_queue *q); -int __init dd_blkcg_init(void); -void __exit dd_blkcg_exit(void); - -#else /* CONFIG_BLK_CGROUP */ - -static inline struct dd_blkcg *dd_blkcg_from_bio(struct bio *bio) -{ - return NULL; -} - -static inline int dd_activate_policy(struct request_queue *q) -{ - return 0; -} - -static inline void dd_deactivate_policy(struct request_queue *q) -{ -} - -static inline int dd_blkcg_init(void) -{ - return 0; -} - -static inline void dd_blkcg_exit(void) -{ -} - -#endif /* CONFIG_BLK_CGROUP */ - -#endif /* _MQ_DEADLINE_CGROUP_H_ */ diff --git a/block/mq-deadline-main.c b/block/mq-deadline-main.c deleted file mode 100644 index 6f612e6dc82b..000000000000 --- a/block/mq-deadline-main.c +++ /dev/null @@ -1,1175 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, - * for the blk-mq scheduling framework - * - * Copyright (C) 2016 Jens Axboe - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "blk.h" -#include "blk-mq.h" -#include "blk-mq-debugfs.h" -#include "blk-mq-tag.h" -#include "blk-mq-sched.h" -#include "mq-deadline-cgroup.h" - -/* - * See Documentation/block/deadline-iosched.rst - */ -static const int read_expire = HZ / 2; /* max time before a read is submitted. */ -static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -/* - * Time after which to dispatch lower priority requests even if higher - * priority requests are pending. - */ -static const int aging_expire = 10 * HZ; -static const int writes_starved = 2; /* max times reads can starve a write */ -static const int fifo_batch = 16; /* # of sequential requests treated as one - by the above parameters. For throughput. */ - -enum dd_data_dir { - DD_READ = READ, - DD_WRITE = WRITE, -}; - -enum { DD_DIR_COUNT = 2 }; - -enum dd_prio { - DD_RT_PRIO = 0, - DD_BE_PRIO = 1, - DD_IDLE_PRIO = 2, - DD_PRIO_MAX = 2, -}; - -enum { DD_PRIO_COUNT = 3 }; - -/* I/O statistics for all I/O priorities (enum dd_prio). */ -struct io_stats { - struct io_stats_per_prio stats[DD_PRIO_COUNT]; -}; - -/* - * Deadline scheduler data per I/O priority (enum dd_prio). Requests are - * present on both sort_list[] and fifo_list[]. - */ -struct dd_per_prio { - struct list_head dispatch; - struct rb_root sort_list[DD_DIR_COUNT]; - struct list_head fifo_list[DD_DIR_COUNT]; - /* Next request in FIFO order. Read, write or both are NULL. */ - struct request *next_rq[DD_DIR_COUNT]; -}; - -struct deadline_data { - /* - * run time data - */ - - /* Request queue that owns this data structure. */ - struct request_queue *queue; - - struct dd_per_prio per_prio[DD_PRIO_COUNT]; - - /* Data direction of latest dispatched request. */ - enum dd_data_dir last_dir; - unsigned int batching; /* number of sequential requests made */ - unsigned int starved; /* times reads have starved writes */ - - struct io_stats __percpu *stats; - - /* - * settings that change how the i/o scheduler behaves - */ - int fifo_expire[DD_DIR_COUNT]; - int fifo_batch; - int writes_starved; - int front_merges; - u32 async_depth; - int aging_expire; - - spinlock_t lock; - spinlock_t zone_lock; -}; - -/* Count one event of type 'event_type' and with I/O priority 'prio' */ -#define dd_count(dd, event_type, prio) do { \ - struct io_stats *io_stats = get_cpu_ptr((dd)->stats); \ - \ - BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \ - BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \ - local_inc(&io_stats->stats[(prio)].event_type); \ - put_cpu_ptr(io_stats); \ -} while (0) - -/* - * Returns the total number of dd_count(dd, event_type, prio) calls across all - * CPUs. No locking or barriers since it is fine if the returned sum is slightly - * outdated. - */ -#define dd_sum(dd, event_type, prio) ({ \ - unsigned int cpu; \ - u32 sum = 0; \ - \ - BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \ - BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \ - for_each_present_cpu(cpu) \ - sum += local_read(&per_cpu_ptr((dd)->stats, cpu)-> \ - stats[(prio)].event_type); \ - sum; \ -}) - -/* Maps an I/O priority class to a deadline scheduler priority. */ -static const enum dd_prio ioprio_class_to_prio[] = { - [IOPRIO_CLASS_NONE] = DD_BE_PRIO, - [IOPRIO_CLASS_RT] = DD_RT_PRIO, - [IOPRIO_CLASS_BE] = DD_BE_PRIO, - [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, -}; - -static inline struct rb_root * -deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) -{ - return &per_prio->sort_list[rq_data_dir(rq)]; -} - -/* - * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a - * request. - */ -static u8 dd_rq_ioclass(struct request *rq) -{ - return IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); -} - -/* - * get the request after `rq' in sector-sorted order - */ -static inline struct request * -deadline_latter_request(struct request *rq) -{ - struct rb_node *node = rb_next(&rq->rb_node); - - if (node) - return rb_entry_rq(node); - - return NULL; -} - -static void -deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq) -{ - struct rb_root *root = deadline_rb_root(per_prio, rq); - - elv_rb_add(root, rq); -} - -static inline void -deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq) -{ - const enum dd_data_dir data_dir = rq_data_dir(rq); - - if (per_prio->next_rq[data_dir] == rq) - per_prio->next_rq[data_dir] = deadline_latter_request(rq); - - elv_rb_del(deadline_rb_root(per_prio, rq), rq); -} - -/* - * remove rq from rbtree and fifo. - */ -static void deadline_remove_request(struct request_queue *q, - struct dd_per_prio *per_prio, - struct request *rq) -{ - list_del_init(&rq->queuelist); - - /* - * We might not be on the rbtree, if we are doing an insert merge - */ - if (!RB_EMPTY_NODE(&rq->rb_node)) - deadline_del_rq_rb(per_prio, rq); - - elv_rqhash_del(q, rq); - if (q->last_merge == rq) - q->last_merge = NULL; -} - -static void dd_request_merged(struct request_queue *q, struct request *req, - enum elv_merge type) -{ - struct deadline_data *dd = q->elevator->elevator_data; - const u8 ioprio_class = dd_rq_ioclass(req); - const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; - struct dd_per_prio *per_prio = &dd->per_prio[prio]; - - /* - * if the merge was a front merge, we need to reposition request - */ - if (type == ELEVATOR_FRONT_MERGE) { - elv_rb_del(deadline_rb_root(per_prio, req), req); - deadline_add_rq_rb(per_prio, req); - } -} - -/* - * Callback function that is invoked after @next has been merged into @req. - */ -static void dd_merged_requests(struct request_queue *q, struct request *req, - struct request *next) -{ - struct deadline_data *dd = q->elevator->elevator_data; - const u8 ioprio_class = dd_rq_ioclass(next); - const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; - struct dd_blkcg *blkcg = next->elv.priv[0]; - - dd_count(dd, merged, prio); - ddcg_count(blkcg, merged, ioprio_class); - - /* - * if next expires before rq, assign its expire time to rq - * and move into next position (next will be deleted) in fifo - */ - if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { - if (time_before((unsigned long)next->fifo_time, - (unsigned long)req->fifo_time)) { - list_move(&req->queuelist, &next->queuelist); - req->fifo_time = next->fifo_time; - } - } - - /* - * kill knowledge of next, this one is a goner - */ - deadline_remove_request(q, &dd->per_prio[prio], next); -} - -/* - * move an entry to dispatch queue - */ -static void -deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio, - struct request *rq) -{ - const enum dd_data_dir data_dir = rq_data_dir(rq); - - per_prio->next_rq[data_dir] = deadline_latter_request(rq); - - /* - * take it off the sort and fifo list - */ - deadline_remove_request(rq->q, per_prio, rq); -} - -/* Number of requests queued for a given priority level. */ -static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio) -{ - return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio); -} - -/* - * deadline_check_fifo returns 0 if there are no expired requests on the fifo, - * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) - */ -static inline int deadline_check_fifo(struct dd_per_prio *per_prio, - enum dd_data_dir data_dir) -{ - struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); - - /* - * rq is expired! - */ - if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) - return 1; - - return 0; -} - -/* - * For the specified data direction, return the next request to - * dispatch using arrival ordered lists. - */ -static struct request * -deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, - enum dd_data_dir data_dir) -{ - struct request *rq; - unsigned long flags; - - if (list_empty(&per_prio->fifo_list[data_dir])) - return NULL; - - rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) - return rq; - - /* - * Look for a write request that can be dispatched, that is one with - * an unlocked target zone. - */ - spin_lock_irqsave(&dd->zone_lock, flags); - list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) { - if (blk_req_can_dispatch_to_zone(rq)) - goto out; - } - rq = NULL; -out: - spin_unlock_irqrestore(&dd->zone_lock, flags); - - return rq; -} - -/* - * For the specified data direction, return the next request to - * dispatch using sector position sorted lists. - */ -static struct request * -deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, - enum dd_data_dir data_dir) -{ - struct request *rq; - unsigned long flags; - - rq = per_prio->next_rq[data_dir]; - if (!rq) - return NULL; - - if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) - return rq; - - /* - * Look for a write request that can be dispatched, that is one with - * an unlocked target zone. - */ - spin_lock_irqsave(&dd->zone_lock, flags); - while (rq) { - if (blk_req_can_dispatch_to_zone(rq)) - break; - rq = deadline_latter_request(rq); - } - spin_unlock_irqrestore(&dd->zone_lock, flags); - - return rq; -} - -/* - * deadline_dispatch_requests selects the best request according to - * read/write expire, fifo_batch, etc and with a start time <= @latest. - */ -static struct request *__dd_dispatch_request(struct deadline_data *dd, - struct dd_per_prio *per_prio, - u64 latest_start_ns) -{ - struct request *rq, *next_rq; - enum dd_data_dir data_dir; - struct dd_blkcg *blkcg; - enum dd_prio prio; - u8 ioprio_class; - - lockdep_assert_held(&dd->lock); - - if (!list_empty(&per_prio->dispatch)) { - rq = list_first_entry(&per_prio->dispatch, struct request, - queuelist); - if (rq->start_time_ns > latest_start_ns) - return NULL; - list_del_init(&rq->queuelist); - goto done; - } - - /* - * batches are currently reads XOR writes - */ - rq = deadline_next_request(dd, per_prio, dd->last_dir); - if (rq && dd->batching < dd->fifo_batch) - /* we have a next request are still entitled to batch */ - goto dispatch_request; - - /* - * at this point we are not running a batch. select the appropriate - * data direction (read / write) - */ - - if (!list_empty(&per_prio->fifo_list[DD_READ])) { - BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ])); - - if (deadline_fifo_request(dd, per_prio, DD_WRITE) && - (dd->starved++ >= dd->writes_starved)) - goto dispatch_writes; - - data_dir = DD_READ; - - goto dispatch_find_request; - } - - /* - * there are either no reads or writes have been starved - */ - - if (!list_empty(&per_prio->fifo_list[DD_WRITE])) { -dispatch_writes: - BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE])); - - dd->starved = 0; - - data_dir = DD_WRITE; - - goto dispatch_find_request; - } - - return NULL; - -dispatch_find_request: - /* - * we are not running a batch, find best request for selected data_dir - */ - next_rq = deadline_next_request(dd, per_prio, data_dir); - if (deadline_check_fifo(per_prio, data_dir) || !next_rq) { - /* - * A deadline has expired, the last request was in the other - * direction, or we have run out of higher-sectored requests. - * Start again from the request with the earliest expiry time. - */ - rq = deadline_fifo_request(dd, per_prio, data_dir); - } else { - /* - * The last req was the same dir and we have a next request in - * sort order. No expired requests so continue on from here. - */ - rq = next_rq; - } - - /* - * For a zoned block device, if we only have writes queued and none of - * them can be dispatched, rq will be NULL. - */ - if (!rq) - return NULL; - - dd->last_dir = data_dir; - dd->batching = 0; - -dispatch_request: - if (rq->start_time_ns > latest_start_ns) - return NULL; - /* - * rq is the selected appropriate request. - */ - dd->batching++; - deadline_move_request(dd, per_prio, rq); -done: - ioprio_class = dd_rq_ioclass(rq); - prio = ioprio_class_to_prio[ioprio_class]; - dd_count(dd, dispatched, prio); - blkcg = rq->elv.priv[0]; - ddcg_count(blkcg, dispatched, ioprio_class); - /* - * If the request needs its target zone locked, do it. - */ - blk_req_zone_write_lock(rq); - rq->rq_flags |= RQF_STARTED; - return rq; -} - -/* - * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). - * - * One confusing aspect here is that we get called for a specific - * hardware queue, but we may return a request that is for a - * different hardware queue. This is because mq-deadline has shared - * state for all hardware queues, in terms of sorting, FIFOs, etc. - */ -static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) -{ - struct deadline_data *dd = hctx->queue->elevator->elevator_data; - const u64 now_ns = ktime_get_ns(); - struct request *rq = NULL; - enum dd_prio prio; - - spin_lock(&dd->lock); - /* - * Start with dispatching requests whose deadline expired more than - * aging_expire jiffies ago. - */ - for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) { - rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns - - jiffies_to_nsecs(dd->aging_expire)); - if (rq) - goto unlock; - } - /* - * Next, dispatch requests in priority order. Ignore lower priority - * requests if any higher priority requests are pending. - */ - for (prio = 0; prio <= DD_PRIO_MAX; prio++) { - rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns); - if (rq || dd_queued(dd, prio)) - break; - } - -unlock: - spin_unlock(&dd->lock); - - return rq; -} - -/* - * Called by __blk_mq_alloc_request(). The shallow_depth value set by this - * function is used by __blk_mq_get_tag(). - */ -static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) -{ - struct deadline_data *dd = data->q->elevator->elevator_data; - - /* Do not throttle synchronous reads. */ - if (op_is_sync(op) && !op_is_write(op)) - return; - - /* - * Throttle asynchronous requests and writes such that these requests - * do not block the allocation of synchronous requests. - */ - data->shallow_depth = dd->async_depth; -} - -/* Called by blk_mq_update_nr_requests(). */ -static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) -{ - struct request_queue *q = hctx->queue; - struct deadline_data *dd = q->elevator->elevator_data; - struct blk_mq_tags *tags = hctx->sched_tags; - - dd->async_depth = max(1UL, 3 * q->nr_requests / 4); - - sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth); -} - -/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */ -static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) -{ - dd_depth_updated(hctx); - return 0; -} - -static void dd_exit_sched(struct elevator_queue *e) -{ - struct deadline_data *dd = e->elevator_data; - enum dd_prio prio; - - dd_deactivate_policy(dd->queue); - - for (prio = 0; prio <= DD_PRIO_MAX; prio++) { - struct dd_per_prio *per_prio = &dd->per_prio[prio]; - - WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ])); - WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE])); - } - - free_percpu(dd->stats); - - kfree(dd); -} - -/* - * Initialize elevator private data (deadline_data) and associate with blkcg. - */ -static int dd_init_sched(struct request_queue *q, struct elevator_type *e) -{ - struct deadline_data *dd; - struct elevator_queue *eq; - enum dd_prio prio; - int ret = -ENOMEM; - - /* - * Initialization would be very tricky if the queue is not frozen, - * hence the warning statement below. - */ - WARN_ON_ONCE(!percpu_ref_is_zero(&q->q_usage_counter)); - - eq = elevator_alloc(q, e); - if (!eq) - return ret; - - dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); - if (!dd) - goto put_eq; - - eq->elevator_data = dd; - - dd->stats = alloc_percpu_gfp(typeof(*dd->stats), - GFP_KERNEL | __GFP_ZERO); - if (!dd->stats) - goto free_dd; - - dd->queue = q; - - for (prio = 0; prio <= DD_PRIO_MAX; prio++) { - struct dd_per_prio *per_prio = &dd->per_prio[prio]; - - INIT_LIST_HEAD(&per_prio->dispatch); - INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]); - INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]); - per_prio->sort_list[DD_READ] = RB_ROOT; - per_prio->sort_list[DD_WRITE] = RB_ROOT; - } - dd->fifo_expire[DD_READ] = read_expire; - dd->fifo_expire[DD_WRITE] = write_expire; - dd->writes_starved = writes_starved; - dd->front_merges = 1; - dd->last_dir = DD_WRITE; - dd->fifo_batch = fifo_batch; - dd->aging_expire = aging_expire; - spin_lock_init(&dd->lock); - spin_lock_init(&dd->zone_lock); - - ret = dd_activate_policy(q); - if (ret) - goto free_stats; - - ret = 0; - q->elevator = eq; - return 0; - -free_stats: - free_percpu(dd->stats); - -free_dd: - kfree(dd); - -put_eq: - kobject_put(&eq->kobj); - return ret; -} - -/* - * Try to merge @bio into an existing request. If @bio has been merged into - * an existing request, store the pointer to that request into *@rq. - */ -static int dd_request_merge(struct request_queue *q, struct request **rq, - struct bio *bio) -{ - struct deadline_data *dd = q->elevator->elevator_data; - const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio); - const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; - struct dd_per_prio *per_prio = &dd->per_prio[prio]; - sector_t sector = bio_end_sector(bio); - struct request *__rq; - - if (!dd->front_merges) - return ELEVATOR_NO_MERGE; - - __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector); - if (__rq) { - BUG_ON(sector != blk_rq_pos(__rq)); - - if (elv_bio_merge_ok(__rq, bio)) { - *rq = __rq; - return ELEVATOR_FRONT_MERGE; - } - } - - return ELEVATOR_NO_MERGE; -} - -/* - * Attempt to merge a bio into an existing request. This function is called - * before @bio is associated with a request. - */ -static bool dd_bio_merge(struct request_queue *q, struct bio *bio, - unsigned int nr_segs) -{ - struct deadline_data *dd = q->elevator->elevator_data; - struct request *free = NULL; - bool ret; - - spin_lock(&dd->lock); - ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); - spin_unlock(&dd->lock); - - if (free) - blk_mq_free_request(free); - - return ret; -} - -/* - * add rq to rbtree and fifo - */ -static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, - bool at_head) -{ - struct request_queue *q = hctx->queue; - struct deadline_data *dd = q->elevator->elevator_data; - const enum dd_data_dir data_dir = rq_data_dir(rq); - u16 ioprio = req_get_ioprio(rq); - u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio); - struct dd_per_prio *per_prio; - enum dd_prio prio; - struct dd_blkcg *blkcg; - LIST_HEAD(free); - - lockdep_assert_held(&dd->lock); - - /* - * This may be a requeue of a write request that has locked its - * target zone. If it is the case, this releases the zone lock. - */ - blk_req_zone_write_unlock(rq); - - /* - * If a block cgroup has been associated with the submitter and if an - * I/O priority has been set in the associated block cgroup, use the - * lowest of the cgroup priority and the request priority for the - * request. If no priority has been set in the request, use the cgroup - * priority. - */ - prio = ioprio_class_to_prio[ioprio_class]; - dd_count(dd, inserted, prio); - blkcg = dd_blkcg_from_bio(rq->bio); - ddcg_count(blkcg, inserted, ioprio_class); - rq->elv.priv[0] = blkcg; - - if (blk_mq_sched_try_insert_merge(q, rq, &free)) { - blk_mq_free_requests(&free); - return; - } - - trace_block_rq_insert(rq); - - per_prio = &dd->per_prio[prio]; - if (at_head) { - list_add(&rq->queuelist, &per_prio->dispatch); - } else { - deadline_add_rq_rb(per_prio, rq); - - if (rq_mergeable(rq)) { - elv_rqhash_add(q, rq); - if (!q->last_merge) - q->last_merge = rq; - } - - /* - * set expire time and add to fifo list - */ - rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; - list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]); - } -} - -/* - * Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests(). - */ -static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, - struct list_head *list, bool at_head) -{ - struct request_queue *q = hctx->queue; - struct deadline_data *dd = q->elevator->elevator_data; - - spin_lock(&dd->lock); - while (!list_empty(list)) { - struct request *rq; - - rq = list_first_entry(list, struct request, queuelist); - list_del_init(&rq->queuelist); - dd_insert_request(hctx, rq, at_head); - } - spin_unlock(&dd->lock); -} - -/* Callback from inside blk_mq_rq_ctx_init(). */ -static void dd_prepare_request(struct request *rq) -{ - rq->elv.priv[0] = NULL; -} - -/* - * Callback from inside blk_mq_free_request(). - * - * For zoned block devices, write unlock the target zone of - * completed write requests. Do this while holding the zone lock - * spinlock so that the zone is never unlocked while deadline_fifo_request() - * or deadline_next_request() are executing. This function is called for - * all requests, whether or not these requests complete successfully. - * - * For a zoned block device, __dd_dispatch_request() may have stopped - * dispatching requests if all the queued requests are write requests directed - * at zones that are already locked due to on-going write requests. To ensure - * write request dispatch progress in this case, mark the queue as needing a - * restart to ensure that the queue is run again after completion of the - * request and zones being unlocked. - */ -static void dd_finish_request(struct request *rq) -{ - struct request_queue *q = rq->q; - struct deadline_data *dd = q->elevator->elevator_data; - struct dd_blkcg *blkcg = rq->elv.priv[0]; - const u8 ioprio_class = dd_rq_ioclass(rq); - const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; - struct dd_per_prio *per_prio = &dd->per_prio[prio]; - - dd_count(dd, completed, prio); - ddcg_count(blkcg, completed, ioprio_class); - - if (blk_queue_is_zoned(q)) { - unsigned long flags; - - spin_lock_irqsave(&dd->zone_lock, flags); - blk_req_zone_write_unlock(rq); - if (!list_empty(&per_prio->fifo_list[DD_WRITE])) - blk_mq_sched_mark_restart_hctx(rq->mq_hctx); - spin_unlock_irqrestore(&dd->zone_lock, flags); - } -} - -static bool dd_has_work_for_prio(struct dd_per_prio *per_prio) -{ - return !list_empty_careful(&per_prio->dispatch) || - !list_empty_careful(&per_prio->fifo_list[DD_READ]) || - !list_empty_careful(&per_prio->fifo_list[DD_WRITE]); -} - -static bool dd_has_work(struct blk_mq_hw_ctx *hctx) -{ - struct deadline_data *dd = hctx->queue->elevator->elevator_data; - enum dd_prio prio; - - for (prio = 0; prio <= DD_PRIO_MAX; prio++) - if (dd_has_work_for_prio(&dd->per_prio[prio])) - return true; - - return false; -} - -/* - * sysfs parts below - */ -#define SHOW_INT(__FUNC, __VAR) \ -static ssize_t __FUNC(struct elevator_queue *e, char *page) \ -{ \ - struct deadline_data *dd = e->elevator_data; \ - \ - return sysfs_emit(page, "%d\n", __VAR); \ -} -#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR)) -SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]); -SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); -SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire); -SHOW_INT(deadline_writes_starved_show, dd->writes_starved); -SHOW_INT(deadline_front_merges_show, dd->front_merges); -SHOW_INT(deadline_async_depth_show, dd->front_merges); -SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); -#undef SHOW_INT -#undef SHOW_JIFFIES - -#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ -static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ -{ \ - struct deadline_data *dd = e->elevator_data; \ - int __data, __ret; \ - \ - __ret = kstrtoint(page, 0, &__data); \ - if (__ret < 0) \ - return __ret; \ - if (__data < (MIN)) \ - __data = (MIN); \ - else if (__data > (MAX)) \ - __data = (MAX); \ - *(__PTR) = __CONV(__data); \ - return count; \ -} -#define STORE_INT(__FUNC, __PTR, MIN, MAX) \ - STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, ) -#define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX) \ - STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies) -STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX); -STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX); -STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX); -STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); -STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); -STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX); -STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); -#undef STORE_FUNCTION -#undef STORE_INT -#undef STORE_JIFFIES - -#define DD_ATTR(name) \ - __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) - -static struct elv_fs_entry deadline_attrs[] = { - DD_ATTR(read_expire), - DD_ATTR(write_expire), - DD_ATTR(writes_starved), - DD_ATTR(front_merges), - DD_ATTR(async_depth), - DD_ATTR(fifo_batch), - DD_ATTR(aging_expire), - __ATTR_NULL -}; - -#ifdef CONFIG_BLK_DEBUG_FS -#define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \ -static void *deadline_##name##_fifo_start(struct seq_file *m, \ - loff_t *pos) \ - __acquires(&dd->lock) \ -{ \ - struct request_queue *q = m->private; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ - \ - spin_lock(&dd->lock); \ - return seq_list_start(&per_prio->fifo_list[data_dir], *pos); \ -} \ - \ -static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ - loff_t *pos) \ -{ \ - struct request_queue *q = m->private; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ - \ - return seq_list_next(v, &per_prio->fifo_list[data_dir], pos); \ -} \ - \ -static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ - __releases(&dd->lock) \ -{ \ - struct request_queue *q = m->private; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - \ - spin_unlock(&dd->lock); \ -} \ - \ -static const struct seq_operations deadline_##name##_fifo_seq_ops = { \ - .start = deadline_##name##_fifo_start, \ - .next = deadline_##name##_fifo_next, \ - .stop = deadline_##name##_fifo_stop, \ - .show = blk_mq_debugfs_rq_show, \ -}; \ - \ -static int deadline_##name##_next_rq_show(void *data, \ - struct seq_file *m) \ -{ \ - struct request_queue *q = data; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ - struct request *rq = per_prio->next_rq[data_dir]; \ - \ - if (rq) \ - __blk_mq_debugfs_rq_show(m, rq); \ - return 0; \ -} - -DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0); -DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0); -DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1); -DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1); -DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2); -DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2); -#undef DEADLINE_DEBUGFS_DDIR_ATTRS - -static int deadline_batching_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - struct deadline_data *dd = q->elevator->elevator_data; - - seq_printf(m, "%u\n", dd->batching); - return 0; -} - -static int deadline_starved_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - struct deadline_data *dd = q->elevator->elevator_data; - - seq_printf(m, "%u\n", dd->starved); - return 0; -} - -static int dd_async_depth_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - struct deadline_data *dd = q->elevator->elevator_data; - - seq_printf(m, "%u\n", dd->async_depth); - return 0; -} - -static int dd_queued_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - struct deadline_data *dd = q->elevator->elevator_data; - - seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO), - dd_queued(dd, DD_BE_PRIO), - dd_queued(dd, DD_IDLE_PRIO)); - return 0; -} - -/* Number of requests owned by the block driver for a given priority. */ -static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio) -{ - return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio) - - dd_sum(dd, completed, prio); -} - -static int dd_owned_by_driver_show(void *data, struct seq_file *m) -{ - struct request_queue *q = data; - struct deadline_data *dd = q->elevator->elevator_data; - - seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO), - dd_owned_by_driver(dd, DD_BE_PRIO), - dd_owned_by_driver(dd, DD_IDLE_PRIO)); - return 0; -} - -#define DEADLINE_DISPATCH_ATTR(prio) \ -static void *deadline_dispatch##prio##_start(struct seq_file *m, \ - loff_t *pos) \ - __acquires(&dd->lock) \ -{ \ - struct request_queue *q = m->private; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ - \ - spin_lock(&dd->lock); \ - return seq_list_start(&per_prio->dispatch, *pos); \ -} \ - \ -static void *deadline_dispatch##prio##_next(struct seq_file *m, \ - void *v, loff_t *pos) \ -{ \ - struct request_queue *q = m->private; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ - \ - return seq_list_next(v, &per_prio->dispatch, pos); \ -} \ - \ -static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v) \ - __releases(&dd->lock) \ -{ \ - struct request_queue *q = m->private; \ - struct deadline_data *dd = q->elevator->elevator_data; \ - \ - spin_unlock(&dd->lock); \ -} \ - \ -static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \ - .start = deadline_dispatch##prio##_start, \ - .next = deadline_dispatch##prio##_next, \ - .stop = deadline_dispatch##prio##_stop, \ - .show = blk_mq_debugfs_rq_show, \ -} - -DEADLINE_DISPATCH_ATTR(0); -DEADLINE_DISPATCH_ATTR(1); -DEADLINE_DISPATCH_ATTR(2); -#undef DEADLINE_DISPATCH_ATTR - -#define DEADLINE_QUEUE_DDIR_ATTRS(name) \ - {#name "_fifo_list", 0400, \ - .seq_ops = &deadline_##name##_fifo_seq_ops} -#define DEADLINE_NEXT_RQ_ATTR(name) \ - {#name "_next_rq", 0400, deadline_##name##_next_rq_show} -static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { - DEADLINE_QUEUE_DDIR_ATTRS(read0), - DEADLINE_QUEUE_DDIR_ATTRS(write0), - DEADLINE_QUEUE_DDIR_ATTRS(read1), - DEADLINE_QUEUE_DDIR_ATTRS(write1), - DEADLINE_QUEUE_DDIR_ATTRS(read2), - DEADLINE_QUEUE_DDIR_ATTRS(write2), - DEADLINE_NEXT_RQ_ATTR(read0), - DEADLINE_NEXT_RQ_ATTR(write0), - DEADLINE_NEXT_RQ_ATTR(read1), - DEADLINE_NEXT_RQ_ATTR(write1), - DEADLINE_NEXT_RQ_ATTR(read2), - DEADLINE_NEXT_RQ_ATTR(write2), - {"batching", 0400, deadline_batching_show}, - {"starved", 0400, deadline_starved_show}, - {"async_depth", 0400, dd_async_depth_show}, - {"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops}, - {"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops}, - {"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops}, - {"owned_by_driver", 0400, dd_owned_by_driver_show}, - {"queued", 0400, dd_queued_show}, - {}, -}; -#undef DEADLINE_QUEUE_DDIR_ATTRS -#endif - -static struct elevator_type mq_deadline = { - .ops = { - .depth_updated = dd_depth_updated, - .limit_depth = dd_limit_depth, - .insert_requests = dd_insert_requests, - .dispatch_request = dd_dispatch_request, - .prepare_request = dd_prepare_request, - .finish_request = dd_finish_request, - .next_request = elv_rb_latter_request, - .former_request = elv_rb_former_request, - .bio_merge = dd_bio_merge, - .request_merge = dd_request_merge, - .requests_merged = dd_merged_requests, - .request_merged = dd_request_merged, - .has_work = dd_has_work, - .init_sched = dd_init_sched, - .exit_sched = dd_exit_sched, - .init_hctx = dd_init_hctx, - }, - -#ifdef CONFIG_BLK_DEBUG_FS - .queue_debugfs_attrs = deadline_queue_debugfs_attrs, -#endif - .elevator_attrs = deadline_attrs, - .elevator_name = "mq-deadline", - .elevator_alias = "deadline", - .elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE, - .elevator_owner = THIS_MODULE, -}; -MODULE_ALIAS("mq-deadline-iosched"); - -static int __init deadline_init(void) -{ - int ret; - - ret = elv_register(&mq_deadline); - if (ret) - goto out; - ret = dd_blkcg_init(); - if (ret) - goto unreg; - -out: - return ret; - -unreg: - elv_unregister(&mq_deadline); - goto out; -} - -static void __exit deadline_exit(void) -{ - dd_blkcg_exit(); - elv_unregister(&mq_deadline); -} - -module_init(deadline_init); -module_exit(deadline_exit); - -MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("MQ deadline IO scheduler"); diff --git a/block/mq-deadline.c b/block/mq-deadline.c new file mode 100644 index 000000000000..a09761cbdf12 --- /dev/null +++ b/block/mq-deadline.c @@ -0,0 +1,1130 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MQ Deadline i/o scheduler - adaptation of the legacy deadline scheduler, + * for the blk-mq scheduling framework + * + * Copyright (C) 2016 Jens Axboe + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "blk.h" +#include "blk-mq.h" +#include "blk-mq-debugfs.h" +#include "blk-mq-tag.h" +#include "blk-mq-sched.h" + +/* + * See Documentation/block/deadline-iosched.rst + */ +static const int read_expire = HZ / 2; /* max time before a read is submitted. */ +static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ +/* + * Time after which to dispatch lower priority requests even if higher + * priority requests are pending. + */ +static const int aging_expire = 10 * HZ; +static const int writes_starved = 2; /* max times reads can starve a write */ +static const int fifo_batch = 16; /* # of sequential requests treated as one + by the above parameters. For throughput. */ + +enum dd_data_dir { + DD_READ = READ, + DD_WRITE = WRITE, +}; + +enum { DD_DIR_COUNT = 2 }; + +enum dd_prio { + DD_RT_PRIO = 0, + DD_BE_PRIO = 1, + DD_IDLE_PRIO = 2, + DD_PRIO_MAX = 2, +}; + +enum { DD_PRIO_COUNT = 3 }; + +/* I/O statistics per I/O priority. */ +struct io_stats_per_prio { + local_t inserted; + local_t merged; + local_t dispatched; + local_t completed; +}; + +/* I/O statistics for all I/O priorities (enum dd_prio). */ +struct io_stats { + struct io_stats_per_prio stats[DD_PRIO_COUNT]; +}; + +/* + * Deadline scheduler data per I/O priority (enum dd_prio). Requests are + * present on both sort_list[] and fifo_list[]. + */ +struct dd_per_prio { + struct list_head dispatch; + struct rb_root sort_list[DD_DIR_COUNT]; + struct list_head fifo_list[DD_DIR_COUNT]; + /* Next request in FIFO order. Read, write or both are NULL. */ + struct request *next_rq[DD_DIR_COUNT]; +}; + +struct deadline_data { + /* + * run time data + */ + + struct dd_per_prio per_prio[DD_PRIO_COUNT]; + + /* Data direction of latest dispatched request. */ + enum dd_data_dir last_dir; + unsigned int batching; /* number of sequential requests made */ + unsigned int starved; /* times reads have starved writes */ + + struct io_stats __percpu *stats; + + /* + * settings that change how the i/o scheduler behaves + */ + int fifo_expire[DD_DIR_COUNT]; + int fifo_batch; + int writes_starved; + int front_merges; + u32 async_depth; + int aging_expire; + + spinlock_t lock; + spinlock_t zone_lock; +}; + +/* Count one event of type 'event_type' and with I/O priority 'prio' */ +#define dd_count(dd, event_type, prio) do { \ + struct io_stats *io_stats = get_cpu_ptr((dd)->stats); \ + \ + BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \ + BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \ + local_inc(&io_stats->stats[(prio)].event_type); \ + put_cpu_ptr(io_stats); \ +} while (0) + +/* + * Returns the total number of dd_count(dd, event_type, prio) calls across all + * CPUs. No locking or barriers since it is fine if the returned sum is slightly + * outdated. + */ +#define dd_sum(dd, event_type, prio) ({ \ + unsigned int cpu; \ + u32 sum = 0; \ + \ + BUILD_BUG_ON(!__same_type((dd), struct deadline_data *)); \ + BUILD_BUG_ON(!__same_type((prio), enum dd_prio)); \ + for_each_present_cpu(cpu) \ + sum += local_read(&per_cpu_ptr((dd)->stats, cpu)-> \ + stats[(prio)].event_type); \ + sum; \ +}) + +/* Maps an I/O priority class to a deadline scheduler priority. */ +static const enum dd_prio ioprio_class_to_prio[] = { + [IOPRIO_CLASS_NONE] = DD_BE_PRIO, + [IOPRIO_CLASS_RT] = DD_RT_PRIO, + [IOPRIO_CLASS_BE] = DD_BE_PRIO, + [IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO, +}; + +static inline struct rb_root * +deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq) +{ + return &per_prio->sort_list[rq_data_dir(rq)]; +} + +/* + * Returns the I/O priority class (IOPRIO_CLASS_*) that has been assigned to a + * request. + */ +static u8 dd_rq_ioclass(struct request *rq) +{ + return IOPRIO_PRIO_CLASS(req_get_ioprio(rq)); +} + +/* + * get the request after `rq' in sector-sorted order + */ +static inline struct request * +deadline_latter_request(struct request *rq) +{ + struct rb_node *node = rb_next(&rq->rb_node); + + if (node) + return rb_entry_rq(node); + + return NULL; +} + +static void +deadline_add_rq_rb(struct dd_per_prio *per_prio, struct request *rq) +{ + struct rb_root *root = deadline_rb_root(per_prio, rq); + + elv_rb_add(root, rq); +} + +static inline void +deadline_del_rq_rb(struct dd_per_prio *per_prio, struct request *rq) +{ + const enum dd_data_dir data_dir = rq_data_dir(rq); + + if (per_prio->next_rq[data_dir] == rq) + per_prio->next_rq[data_dir] = deadline_latter_request(rq); + + elv_rb_del(deadline_rb_root(per_prio, rq), rq); +} + +/* + * remove rq from rbtree and fifo. + */ +static void deadline_remove_request(struct request_queue *q, + struct dd_per_prio *per_prio, + struct request *rq) +{ + list_del_init(&rq->queuelist); + + /* + * We might not be on the rbtree, if we are doing an insert merge + */ + if (!RB_EMPTY_NODE(&rq->rb_node)) + deadline_del_rq_rb(per_prio, rq); + + elv_rqhash_del(q, rq); + if (q->last_merge == rq) + q->last_merge = NULL; +} + +static void dd_request_merged(struct request_queue *q, struct request *req, + enum elv_merge type) +{ + struct deadline_data *dd = q->elevator->elevator_data; + const u8 ioprio_class = dd_rq_ioclass(req); + const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; + struct dd_per_prio *per_prio = &dd->per_prio[prio]; + + /* + * if the merge was a front merge, we need to reposition request + */ + if (type == ELEVATOR_FRONT_MERGE) { + elv_rb_del(deadline_rb_root(per_prio, req), req); + deadline_add_rq_rb(per_prio, req); + } +} + +/* + * Callback function that is invoked after @next has been merged into @req. + */ +static void dd_merged_requests(struct request_queue *q, struct request *req, + struct request *next) +{ + struct deadline_data *dd = q->elevator->elevator_data; + const u8 ioprio_class = dd_rq_ioclass(next); + const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; + + dd_count(dd, merged, prio); + + /* + * if next expires before rq, assign its expire time to rq + * and move into next position (next will be deleted) in fifo + */ + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before((unsigned long)next->fifo_time, + (unsigned long)req->fifo_time)) { + list_move(&req->queuelist, &next->queuelist); + req->fifo_time = next->fifo_time; + } + } + + /* + * kill knowledge of next, this one is a goner + */ + deadline_remove_request(q, &dd->per_prio[prio], next); +} + +/* + * move an entry to dispatch queue + */ +static void +deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio, + struct request *rq) +{ + const enum dd_data_dir data_dir = rq_data_dir(rq); + + per_prio->next_rq[data_dir] = deadline_latter_request(rq); + + /* + * take it off the sort and fifo list + */ + deadline_remove_request(rq->q, per_prio, rq); +} + +/* Number of requests queued for a given priority level. */ +static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio) +{ + return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio); +} + +/* + * deadline_check_fifo returns 0 if there are no expired requests on the fifo, + * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) + */ +static inline int deadline_check_fifo(struct dd_per_prio *per_prio, + enum dd_data_dir data_dir) +{ + struct request *rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); + + /* + * rq is expired! + */ + if (time_after_eq(jiffies, (unsigned long)rq->fifo_time)) + return 1; + + return 0; +} + +/* + * For the specified data direction, return the next request to + * dispatch using arrival ordered lists. + */ +static struct request * +deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, + enum dd_data_dir data_dir) +{ + struct request *rq; + unsigned long flags; + + if (list_empty(&per_prio->fifo_list[data_dir])) + return NULL; + + rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next); + if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) + return rq; + + /* + * Look for a write request that can be dispatched, that is one with + * an unlocked target zone. + */ + spin_lock_irqsave(&dd->zone_lock, flags); + list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) { + if (blk_req_can_dispatch_to_zone(rq)) + goto out; + } + rq = NULL; +out: + spin_unlock_irqrestore(&dd->zone_lock, flags); + + return rq; +} + +/* + * For the specified data direction, return the next request to + * dispatch using sector position sorted lists. + */ +static struct request * +deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio, + enum dd_data_dir data_dir) +{ + struct request *rq; + unsigned long flags; + + rq = per_prio->next_rq[data_dir]; + if (!rq) + return NULL; + + if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q)) + return rq; + + /* + * Look for a write request that can be dispatched, that is one with + * an unlocked target zone. + */ + spin_lock_irqsave(&dd->zone_lock, flags); + while (rq) { + if (blk_req_can_dispatch_to_zone(rq)) + break; + rq = deadline_latter_request(rq); + } + spin_unlock_irqrestore(&dd->zone_lock, flags); + + return rq; +} + +/* + * deadline_dispatch_requests selects the best request according to + * read/write expire, fifo_batch, etc and with a start time <= @latest. + */ +static struct request *__dd_dispatch_request(struct deadline_data *dd, + struct dd_per_prio *per_prio, + u64 latest_start_ns) +{ + struct request *rq, *next_rq; + enum dd_data_dir data_dir; + enum dd_prio prio; + u8 ioprio_class; + + lockdep_assert_held(&dd->lock); + + if (!list_empty(&per_prio->dispatch)) { + rq = list_first_entry(&per_prio->dispatch, struct request, + queuelist); + if (rq->start_time_ns > latest_start_ns) + return NULL; + list_del_init(&rq->queuelist); + goto done; + } + + /* + * batches are currently reads XOR writes + */ + rq = deadline_next_request(dd, per_prio, dd->last_dir); + if (rq && dd->batching < dd->fifo_batch) + /* we have a next request are still entitled to batch */ + goto dispatch_request; + + /* + * at this point we are not running a batch. select the appropriate + * data direction (read / write) + */ + + if (!list_empty(&per_prio->fifo_list[DD_READ])) { + BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_READ])); + + if (deadline_fifo_request(dd, per_prio, DD_WRITE) && + (dd->starved++ >= dd->writes_starved)) + goto dispatch_writes; + + data_dir = DD_READ; + + goto dispatch_find_request; + } + + /* + * there are either no reads or writes have been starved + */ + + if (!list_empty(&per_prio->fifo_list[DD_WRITE])) { +dispatch_writes: + BUG_ON(RB_EMPTY_ROOT(&per_prio->sort_list[DD_WRITE])); + + dd->starved = 0; + + data_dir = DD_WRITE; + + goto dispatch_find_request; + } + + return NULL; + +dispatch_find_request: + /* + * we are not running a batch, find best request for selected data_dir + */ + next_rq = deadline_next_request(dd, per_prio, data_dir); + if (deadline_check_fifo(per_prio, data_dir) || !next_rq) { + /* + * A deadline has expired, the last request was in the other + * direction, or we have run out of higher-sectored requests. + * Start again from the request with the earliest expiry time. + */ + rq = deadline_fifo_request(dd, per_prio, data_dir); + } else { + /* + * The last req was the same dir and we have a next request in + * sort order. No expired requests so continue on from here. + */ + rq = next_rq; + } + + /* + * For a zoned block device, if we only have writes queued and none of + * them can be dispatched, rq will be NULL. + */ + if (!rq) + return NULL; + + dd->last_dir = data_dir; + dd->batching = 0; + +dispatch_request: + if (rq->start_time_ns > latest_start_ns) + return NULL; + /* + * rq is the selected appropriate request. + */ + dd->batching++; + deadline_move_request(dd, per_prio, rq); +done: + ioprio_class = dd_rq_ioclass(rq); + prio = ioprio_class_to_prio[ioprio_class]; + dd_count(dd, dispatched, prio); + /* + * If the request needs its target zone locked, do it. + */ + blk_req_zone_write_lock(rq); + rq->rq_flags |= RQF_STARTED; + return rq; +} + +/* + * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests(). + * + * One confusing aspect here is that we get called for a specific + * hardware queue, but we may return a request that is for a + * different hardware queue. This is because mq-deadline has shared + * state for all hardware queues, in terms of sorting, FIFOs, etc. + */ +static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + const u64 now_ns = ktime_get_ns(); + struct request *rq = NULL; + enum dd_prio prio; + + spin_lock(&dd->lock); + /* + * Start with dispatching requests whose deadline expired more than + * aging_expire jiffies ago. + */ + for (prio = DD_BE_PRIO; prio <= DD_PRIO_MAX; prio++) { + rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns - + jiffies_to_nsecs(dd->aging_expire)); + if (rq) + goto unlock; + } + /* + * Next, dispatch requests in priority order. Ignore lower priority + * requests if any higher priority requests are pending. + */ + for (prio = 0; prio <= DD_PRIO_MAX; prio++) { + rq = __dd_dispatch_request(dd, &dd->per_prio[prio], now_ns); + if (rq || dd_queued(dd, prio)) + break; + } + +unlock: + spin_unlock(&dd->lock); + + return rq; +} + +/* + * Called by __blk_mq_alloc_request(). The shallow_depth value set by this + * function is used by __blk_mq_get_tag(). + */ +static void dd_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) +{ + struct deadline_data *dd = data->q->elevator->elevator_data; + + /* Do not throttle synchronous reads. */ + if (op_is_sync(op) && !op_is_write(op)) + return; + + /* + * Throttle asynchronous requests and writes such that these requests + * do not block the allocation of synchronous requests. + */ + data->shallow_depth = dd->async_depth; +} + +/* Called by blk_mq_update_nr_requests(). */ +static void dd_depth_updated(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; + struct blk_mq_tags *tags = hctx->sched_tags; + + dd->async_depth = max(1UL, 3 * q->nr_requests / 4); + + sbitmap_queue_min_shallow_depth(tags->bitmap_tags, dd->async_depth); +} + +/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */ +static int dd_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) +{ + dd_depth_updated(hctx); + return 0; +} + +static void dd_exit_sched(struct elevator_queue *e) +{ + struct deadline_data *dd = e->elevator_data; + enum dd_prio prio; + + for (prio = 0; prio <= DD_PRIO_MAX; prio++) { + struct dd_per_prio *per_prio = &dd->per_prio[prio]; + + WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ])); + WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE])); + } + + free_percpu(dd->stats); + + kfree(dd); +} + +/* + * initialize elevator private data (deadline_data). + */ +static int dd_init_sched(struct request_queue *q, struct elevator_type *e) +{ + struct deadline_data *dd; + struct elevator_queue *eq; + enum dd_prio prio; + int ret = -ENOMEM; + + eq = elevator_alloc(q, e); + if (!eq) + return ret; + + dd = kzalloc_node(sizeof(*dd), GFP_KERNEL, q->node); + if (!dd) + goto put_eq; + + eq->elevator_data = dd; + + dd->stats = alloc_percpu_gfp(typeof(*dd->stats), + GFP_KERNEL | __GFP_ZERO); + if (!dd->stats) + goto free_dd; + + for (prio = 0; prio <= DD_PRIO_MAX; prio++) { + struct dd_per_prio *per_prio = &dd->per_prio[prio]; + + INIT_LIST_HEAD(&per_prio->dispatch); + INIT_LIST_HEAD(&per_prio->fifo_list[DD_READ]); + INIT_LIST_HEAD(&per_prio->fifo_list[DD_WRITE]); + per_prio->sort_list[DD_READ] = RB_ROOT; + per_prio->sort_list[DD_WRITE] = RB_ROOT; + } + dd->fifo_expire[DD_READ] = read_expire; + dd->fifo_expire[DD_WRITE] = write_expire; + dd->writes_starved = writes_starved; + dd->front_merges = 1; + dd->last_dir = DD_WRITE; + dd->fifo_batch = fifo_batch; + dd->aging_expire = aging_expire; + spin_lock_init(&dd->lock); + spin_lock_init(&dd->zone_lock); + + q->elevator = eq; + return 0; + +free_dd: + kfree(dd); + +put_eq: + kobject_put(&eq->kobj); + return ret; +} + +/* + * Try to merge @bio into an existing request. If @bio has been merged into + * an existing request, store the pointer to that request into *@rq. + */ +static int dd_request_merge(struct request_queue *q, struct request **rq, + struct bio *bio) +{ + struct deadline_data *dd = q->elevator->elevator_data; + const u8 ioprio_class = IOPRIO_PRIO_CLASS(bio->bi_ioprio); + const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; + struct dd_per_prio *per_prio = &dd->per_prio[prio]; + sector_t sector = bio_end_sector(bio); + struct request *__rq; + + if (!dd->front_merges) + return ELEVATOR_NO_MERGE; + + __rq = elv_rb_find(&per_prio->sort_list[bio_data_dir(bio)], sector); + if (__rq) { + BUG_ON(sector != blk_rq_pos(__rq)); + + if (elv_bio_merge_ok(__rq, bio)) { + *rq = __rq; + return ELEVATOR_FRONT_MERGE; + } + } + + return ELEVATOR_NO_MERGE; +} + +/* + * Attempt to merge a bio into an existing request. This function is called + * before @bio is associated with a request. + */ +static bool dd_bio_merge(struct request_queue *q, struct bio *bio, + unsigned int nr_segs) +{ + struct deadline_data *dd = q->elevator->elevator_data; + struct request *free = NULL; + bool ret; + + spin_lock(&dd->lock); + ret = blk_mq_sched_try_merge(q, bio, nr_segs, &free); + spin_unlock(&dd->lock); + + if (free) + blk_mq_free_request(free); + + return ret; +} + +/* + * add rq to rbtree and fifo + */ +static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool at_head) +{ + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; + const enum dd_data_dir data_dir = rq_data_dir(rq); + u16 ioprio = req_get_ioprio(rq); + u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio); + struct dd_per_prio *per_prio; + enum dd_prio prio; + LIST_HEAD(free); + + lockdep_assert_held(&dd->lock); + + /* + * This may be a requeue of a write request that has locked its + * target zone. If it is the case, this releases the zone lock. + */ + blk_req_zone_write_unlock(rq); + + prio = ioprio_class_to_prio[ioprio_class]; + dd_count(dd, inserted, prio); + + if (blk_mq_sched_try_insert_merge(q, rq, &free)) { + blk_mq_free_requests(&free); + return; + } + + trace_block_rq_insert(rq); + + per_prio = &dd->per_prio[prio]; + if (at_head) { + list_add(&rq->queuelist, &per_prio->dispatch); + } else { + deadline_add_rq_rb(per_prio, rq); + + if (rq_mergeable(rq)) { + elv_rqhash_add(q, rq); + if (!q->last_merge) + q->last_merge = rq; + } + + /* + * set expire time and add to fifo list + */ + rq->fifo_time = jiffies + dd->fifo_expire[data_dir]; + list_add_tail(&rq->queuelist, &per_prio->fifo_list[data_dir]); + } +} + +/* + * Called from blk_mq_sched_insert_request() or blk_mq_sched_insert_requests(). + */ +static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, + struct list_head *list, bool at_head) +{ + struct request_queue *q = hctx->queue; + struct deadline_data *dd = q->elevator->elevator_data; + + spin_lock(&dd->lock); + while (!list_empty(list)) { + struct request *rq; + + rq = list_first_entry(list, struct request, queuelist); + list_del_init(&rq->queuelist); + dd_insert_request(hctx, rq, at_head); + } + spin_unlock(&dd->lock); +} + +/* + * Nothing to do here. This is defined only to ensure that .finish_request + * method is called upon request completion. + */ +static void dd_prepare_request(struct request *rq) +{ +} + +/* + * Callback from inside blk_mq_free_request(). + * + * For zoned block devices, write unlock the target zone of + * completed write requests. Do this while holding the zone lock + * spinlock so that the zone is never unlocked while deadline_fifo_request() + * or deadline_next_request() are executing. This function is called for + * all requests, whether or not these requests complete successfully. + * + * For a zoned block device, __dd_dispatch_request() may have stopped + * dispatching requests if all the queued requests are write requests directed + * at zones that are already locked due to on-going write requests. To ensure + * write request dispatch progress in this case, mark the queue as needing a + * restart to ensure that the queue is run again after completion of the + * request and zones being unlocked. + */ +static void dd_finish_request(struct request *rq) +{ + struct request_queue *q = rq->q; + struct deadline_data *dd = q->elevator->elevator_data; + const u8 ioprio_class = dd_rq_ioclass(rq); + const enum dd_prio prio = ioprio_class_to_prio[ioprio_class]; + struct dd_per_prio *per_prio = &dd->per_prio[prio]; + + dd_count(dd, completed, prio); + + if (blk_queue_is_zoned(q)) { + unsigned long flags; + + spin_lock_irqsave(&dd->zone_lock, flags); + blk_req_zone_write_unlock(rq); + if (!list_empty(&per_prio->fifo_list[DD_WRITE])) + blk_mq_sched_mark_restart_hctx(rq->mq_hctx); + spin_unlock_irqrestore(&dd->zone_lock, flags); + } +} + +static bool dd_has_work_for_prio(struct dd_per_prio *per_prio) +{ + return !list_empty_careful(&per_prio->dispatch) || + !list_empty_careful(&per_prio->fifo_list[DD_READ]) || + !list_empty_careful(&per_prio->fifo_list[DD_WRITE]); +} + +static bool dd_has_work(struct blk_mq_hw_ctx *hctx) +{ + struct deadline_data *dd = hctx->queue->elevator->elevator_data; + enum dd_prio prio; + + for (prio = 0; prio <= DD_PRIO_MAX; prio++) + if (dd_has_work_for_prio(&dd->per_prio[prio])) + return true; + + return false; +} + +/* + * sysfs parts below + */ +#define SHOW_INT(__FUNC, __VAR) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ +{ \ + struct deadline_data *dd = e->elevator_data; \ + \ + return sysfs_emit(page, "%d\n", __VAR); \ +} +#define SHOW_JIFFIES(__FUNC, __VAR) SHOW_INT(__FUNC, jiffies_to_msecs(__VAR)) +SHOW_JIFFIES(deadline_read_expire_show, dd->fifo_expire[DD_READ]); +SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); +SHOW_JIFFIES(deadline_aging_expire_show, dd->aging_expire); +SHOW_INT(deadline_writes_starved_show, dd->writes_starved); +SHOW_INT(deadline_front_merges_show, dd->front_merges); +SHOW_INT(deadline_async_depth_show, dd->front_merges); +SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); +#undef SHOW_INT +#undef SHOW_JIFFIES + +#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ +{ \ + struct deadline_data *dd = e->elevator_data; \ + int __data, __ret; \ + \ + __ret = kstrtoint(page, 0, &__data); \ + if (__ret < 0) \ + return __ret; \ + if (__data < (MIN)) \ + __data = (MIN); \ + else if (__data > (MAX)) \ + __data = (MAX); \ + *(__PTR) = __CONV(__data); \ + return count; \ +} +#define STORE_INT(__FUNC, __PTR, MIN, MAX) \ + STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, ) +#define STORE_JIFFIES(__FUNC, __PTR, MIN, MAX) \ + STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, msecs_to_jiffies) +STORE_JIFFIES(deadline_read_expire_store, &dd->fifo_expire[DD_READ], 0, INT_MAX); +STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MAX); +STORE_JIFFIES(deadline_aging_expire_store, &dd->aging_expire, 0, INT_MAX); +STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); +STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); +STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX); +STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); +#undef STORE_FUNCTION +#undef STORE_INT +#undef STORE_JIFFIES + +#define DD_ATTR(name) \ + __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store) + +static struct elv_fs_entry deadline_attrs[] = { + DD_ATTR(read_expire), + DD_ATTR(write_expire), + DD_ATTR(writes_starved), + DD_ATTR(front_merges), + DD_ATTR(async_depth), + DD_ATTR(fifo_batch), + DD_ATTR(aging_expire), + __ATTR_NULL +}; + +#ifdef CONFIG_BLK_DEBUG_FS +#define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \ +static void *deadline_##name##_fifo_start(struct seq_file *m, \ + loff_t *pos) \ + __acquires(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ + \ + spin_lock(&dd->lock); \ + return seq_list_start(&per_prio->fifo_list[data_dir], *pos); \ +} \ + \ +static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ + loff_t *pos) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ + \ + return seq_list_next(v, &per_prio->fifo_list[data_dir], pos); \ +} \ + \ +static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ + __releases(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + spin_unlock(&dd->lock); \ +} \ + \ +static const struct seq_operations deadline_##name##_fifo_seq_ops = { \ + .start = deadline_##name##_fifo_start, \ + .next = deadline_##name##_fifo_next, \ + .stop = deadline_##name##_fifo_stop, \ + .show = blk_mq_debugfs_rq_show, \ +}; \ + \ +static int deadline_##name##_next_rq_show(void *data, \ + struct seq_file *m) \ +{ \ + struct request_queue *q = data; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ + struct request *rq = per_prio->next_rq[data_dir]; \ + \ + if (rq) \ + __blk_mq_debugfs_rq_show(m, rq); \ + return 0; \ +} + +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_READ, read0); +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_RT_PRIO, DD_WRITE, write0); +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_READ, read1); +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_BE_PRIO, DD_WRITE, write1); +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_READ, read2); +DEADLINE_DEBUGFS_DDIR_ATTRS(DD_IDLE_PRIO, DD_WRITE, write2); +#undef DEADLINE_DEBUGFS_DDIR_ATTRS + +static int deadline_batching_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->batching); + return 0; +} + +static int deadline_starved_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->starved); + return 0; +} + +static int dd_async_depth_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u\n", dd->async_depth); + return 0; +} + +static int dd_queued_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO), + dd_queued(dd, DD_BE_PRIO), + dd_queued(dd, DD_IDLE_PRIO)); + return 0; +} + +/* Number of requests owned by the block driver for a given priority. */ +static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio) +{ + return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio) + - dd_sum(dd, completed, prio); +} + +static int dd_owned_by_driver_show(void *data, struct seq_file *m) +{ + struct request_queue *q = data; + struct deadline_data *dd = q->elevator->elevator_data; + + seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO), + dd_owned_by_driver(dd, DD_BE_PRIO), + dd_owned_by_driver(dd, DD_IDLE_PRIO)); + return 0; +} + +#define DEADLINE_DISPATCH_ATTR(prio) \ +static void *deadline_dispatch##prio##_start(struct seq_file *m, \ + loff_t *pos) \ + __acquires(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ + \ + spin_lock(&dd->lock); \ + return seq_list_start(&per_prio->dispatch, *pos); \ +} \ + \ +static void *deadline_dispatch##prio##_next(struct seq_file *m, \ + void *v, loff_t *pos) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + struct dd_per_prio *per_prio = &dd->per_prio[prio]; \ + \ + return seq_list_next(v, &per_prio->dispatch, pos); \ +} \ + \ +static void deadline_dispatch##prio##_stop(struct seq_file *m, void *v) \ + __releases(&dd->lock) \ +{ \ + struct request_queue *q = m->private; \ + struct deadline_data *dd = q->elevator->elevator_data; \ + \ + spin_unlock(&dd->lock); \ +} \ + \ +static const struct seq_operations deadline_dispatch##prio##_seq_ops = { \ + .start = deadline_dispatch##prio##_start, \ + .next = deadline_dispatch##prio##_next, \ + .stop = deadline_dispatch##prio##_stop, \ + .show = blk_mq_debugfs_rq_show, \ +} + +DEADLINE_DISPATCH_ATTR(0); +DEADLINE_DISPATCH_ATTR(1); +DEADLINE_DISPATCH_ATTR(2); +#undef DEADLINE_DISPATCH_ATTR + +#define DEADLINE_QUEUE_DDIR_ATTRS(name) \ + {#name "_fifo_list", 0400, \ + .seq_ops = &deadline_##name##_fifo_seq_ops} +#define DEADLINE_NEXT_RQ_ATTR(name) \ + {#name "_next_rq", 0400, deadline_##name##_next_rq_show} +static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = { + DEADLINE_QUEUE_DDIR_ATTRS(read0), + DEADLINE_QUEUE_DDIR_ATTRS(write0), + DEADLINE_QUEUE_DDIR_ATTRS(read1), + DEADLINE_QUEUE_DDIR_ATTRS(write1), + DEADLINE_QUEUE_DDIR_ATTRS(read2), + DEADLINE_QUEUE_DDIR_ATTRS(write2), + DEADLINE_NEXT_RQ_ATTR(read0), + DEADLINE_NEXT_RQ_ATTR(write0), + DEADLINE_NEXT_RQ_ATTR(read1), + DEADLINE_NEXT_RQ_ATTR(write1), + DEADLINE_NEXT_RQ_ATTR(read2), + DEADLINE_NEXT_RQ_ATTR(write2), + {"batching", 0400, deadline_batching_show}, + {"starved", 0400, deadline_starved_show}, + {"async_depth", 0400, dd_async_depth_show}, + {"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops}, + {"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops}, + {"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops}, + {"owned_by_driver", 0400, dd_owned_by_driver_show}, + {"queued", 0400, dd_queued_show}, + {}, +}; +#undef DEADLINE_QUEUE_DDIR_ATTRS +#endif + +static struct elevator_type mq_deadline = { + .ops = { + .depth_updated = dd_depth_updated, + .limit_depth = dd_limit_depth, + .insert_requests = dd_insert_requests, + .dispatch_request = dd_dispatch_request, + .prepare_request = dd_prepare_request, + .finish_request = dd_finish_request, + .next_request = elv_rb_latter_request, + .former_request = elv_rb_former_request, + .bio_merge = dd_bio_merge, + .request_merge = dd_request_merge, + .requests_merged = dd_merged_requests, + .request_merged = dd_request_merged, + .has_work = dd_has_work, + .init_sched = dd_init_sched, + .exit_sched = dd_exit_sched, + .init_hctx = dd_init_hctx, + }, + +#ifdef CONFIG_BLK_DEBUG_FS + .queue_debugfs_attrs = deadline_queue_debugfs_attrs, +#endif + .elevator_attrs = deadline_attrs, + .elevator_name = "mq-deadline", + .elevator_alias = "deadline", + .elevator_features = ELEVATOR_F_ZBD_SEQ_WRITE, + .elevator_owner = THIS_MODULE, +}; +MODULE_ALIAS("mq-deadline-iosched"); + +static int __init deadline_init(void) +{ + return elv_register(&mq_deadline); +} + +static void __exit deadline_exit(void) +{ + elv_unregister(&mq_deadline); +} + +module_init(deadline_init); +module_exit(deadline_exit); + +MODULE_AUTHOR("Jens Axboe, Damien Le Moal and Bart Van Assche"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MQ deadline IO scheduler"); -- cgit From 14c4c8e41511aa8fba7fb239b20b6539b5bce201 Mon Sep 17 00:00:00 2001 From: Elliot Berman Date: Wed, 11 Aug 2021 08:59:14 -0700 Subject: cfi: Use rcu_read_{un}lock_sched_notrace If rcu_read_lock_sched tracing is enabled, the tracing subsystem can perform a jump which needs to be checked by CFI. For example, stm_ftrace source is enabled as a module and hooks into enabled ftrace events. This can cause an recursive loop where find_shadow_check_fn -> rcu_read_lock_sched -> (call to stm_ftrace generates cfi slowpath) -> find_shadow_check_fn -> rcu_read_lock_sched -> ... To avoid the recursion, either the ftrace codes needs to be marked with __no_cfi or CFI should not trace. Use the "_notrace" in CFI to avoid tracing so that CFI can guard ftrace. Signed-off-by: Elliot Berman Reviewed-by: Sami Tolvanen Cc: stable@vger.kernel.org Fixes: cf68fffb66d6 ("add support for Clang CFI") Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210811155914.19550-1-quic_eberman@quicinc.com --- kernel/cfi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/cfi.c b/kernel/cfi.c index e17a56639766..9594cfd1cf2c 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -248,9 +248,9 @@ static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr) { cfi_check_fn fn; - rcu_read_lock_sched(); + rcu_read_lock_sched_notrace(); fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr); - rcu_read_unlock_sched(); + rcu_read_unlock_sched_notrace(); return fn; } @@ -269,11 +269,11 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr) cfi_check_fn fn = NULL; struct module *mod; - rcu_read_lock_sched(); + rcu_read_lock_sched_notrace(); mod = __module_address(ptr); if (mod) fn = mod->cfi_check; - rcu_read_unlock_sched(); + rcu_read_unlock_sched_notrace(); return fn; } -- cgit From 6922110d152e56d7569616b45a1f02876cf3eb9f Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Aug 2021 18:06:28 +0200 Subject: net: linkwatch: fix failure to restore device state across suspend/resume After migrating my laptop from 4.19-LTS to 5.4-LTS a while ago I noticed that my Ethernet port to which a bond and a VLAN interface are attached appeared to remain up after resuming from suspend with the cable unplugged (and that problem still persists with 5.10-LTS). It happens that the following happens: - the network driver (e1000e here) prepares to suspend, calls e1000e_down() which calls netif_carrier_off() to signal that the link is going down. - netif_carrier_off() adds a link_watch event to the list of events for this device - the device is completely stopped. - the machine suspends - the cable is unplugged and the machine brought to another location - the machine is resumed - the queued linkwatch events are processed for the device - the device doesn't yet have the __LINK_STATE_PRESENT bit and its events are silently dropped - the device is resumed with its link down - the upper VLAN and bond interfaces are never notified that the link had been turned down and remain up - the only way to provoke a change is to physically connect the machine to a port and possibly unplug it. The state after resume looks like this: $ ip -br li | egrep 'bond|eth' bond0 UP e8:6a:64:64:64:64 eth0 DOWN e8:6a:64:64:64:64 eth0.2@eth0 UP e8:6a:64:64:64:64 Placing an explicit call to netdev_state_change() either in the suspend or the resume code in the NIC driver worked around this but the solution is not satisfying. The issue in fact really is in link_watch that loses events while it ought not to. It happens that the test for the device being present was added by commit 124eee3f6955 ("net: linkwatch: add check for netdevice being present to linkwatch_do_dev") in 4.20 to avoid an access to devices that are not present. Instead of dropping events, this patch proceeds slightly differently by postponing their handling so that they happen after the device is fully resumed. Fixes: 124eee3f6955 ("net: linkwatch: add check for netdevice being present to linkwatch_do_dev") Link: https://lists.openwall.net/netdev/2018/03/15/62 Cc: Heiner Kallweit Cc: Geert Uytterhoeven Cc: Florian Fainelli Signed-off-by: Willy Tarreau Link: https://lore.kernel.org/r/20210809160628.22623-1-w@1wt.eu Signed-off-by: Jakub Kicinski --- net/core/link_watch.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 75431ca9300f..1a455847da54 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -158,7 +158,7 @@ static void linkwatch_do_dev(struct net_device *dev) clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); - if (dev->flags & IFF_UP && netif_device_present(dev)) { + if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else @@ -204,7 +204,8 @@ static void __linkwatch_run_queue(int urgent_only) dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); - if (urgent_only && !linkwatch_urgent_event(dev)) { + if (!netif_device_present(dev) || + (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } -- cgit From 2cad5d2ed1b47eded5a2f2372c2a94bb065a8f97 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Tue, 10 Aug 2021 16:58:12 +0800 Subject: net: pcs: xpcs: fix error handling on failed to allocate memory Drivers such as sja1105 and stmmac that call xpcs_create() expects an error returned by the pcs-xpcs module, but this was not the case on failed to allocate memory. Fixed this by returning an -ENOMEM instead of a NULL pointer. Fixes: 3ad1d171548e ("net: dsa: sja1105: migrate to xpcs for SGMII") Signed-off-by: Wong Vee Khee Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210810085812.1808466-1-vee.khee.wong@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 63fda3fc40aa..4bd61339823c 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1089,7 +1089,7 @@ struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev, xpcs = kzalloc(sizeof(*xpcs), GFP_KERNEL); if (!xpcs) - return NULL; + return ERR_PTR(-ENOMEM); xpcs->mdiodev = mdiodev; -- cgit From 6de035fec045f8ae5ee5f3a02373a18b939e91fb Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 10 Aug 2021 22:40:56 -0400 Subject: tcp_bbr: fix u32 wrap bug in round logic if bbr_init() called after 2B packets Currently if BBR congestion control is initialized after more than 2B packets have been delivered, depending on the phase of the tp->delivered counter the tracking of BBR round trips can get stuck. The bug arises because if tp->delivered is between 2^31 and 2^32 at the time the BBR congestion control module is initialized, then the initialization of bbr->next_rtt_delivered to 0 will cause the logic to believe that the end of the round trip is still billions of packets in the future. More specifically, the following check will fail repeatedly: !before(rs->prior_delivered, bbr->next_rtt_delivered) and thus the connection will take up to 2B packets delivered before that check will pass and the connection will set: bbr->round_start = 1; This could cause many mechanisms in BBR to fail to trigger, for example bbr_check_full_bw_reached() would likely never exit STARTUP. This bug is 5 years old and has not been observed, and as a practical matter this would likely rarely trigger, since it would require transferring at least 2B packets, or likely more than 3 terabytes of data, before switching congestion control algorithms to BBR. This patch is a stable candidate for kernels as far back as v4.9, when tcp_bbr.c was added. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Kevin Yang Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20210811024056.235161-1-ncardwell@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp_bbr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6ea3dc2e4219..6274462b86b4 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1041,7 +1041,7 @@ static void bbr_init(struct sock *sk) bbr->prior_cwnd = 0; tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; bbr->rtt_cnt = 0; - bbr->next_rtt_delivered = 0; + bbr->next_rtt_delivered = tp->delivered; bbr->prev_ca_state = TCP_CA_Open; bbr->packet_conservation = 0; -- cgit From 0271824d9ebe945a2ecefdb87e1ce0a520be704d Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 10 Aug 2021 23:17:12 +0300 Subject: MAINTAINERS: switch to my OMP email for Renesas Ethernet drivers I'm still going to continue looking after the Renesas Ethernet drivers and device tree bindings. Now my new employer, Open Mobile Platform (OMP), will pay for all my upstream work. Let's switch to my OMP email for the reviews. Signed-off-by: Sergey Shtylyov Link: https://lore.kernel.org/r/9c212711-a0d7-39cd-7840-ff7abf938da1@omp.ru Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 25dc566a67c1..f13c0d3b7d0c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15803,7 +15803,7 @@ F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c RENESAS ETHERNET DRIVERS -R: Sergei Shtylyov +R: Sergey Shtylyov L: netdev@vger.kernel.org L: linux-renesas-soc@vger.kernel.org F: Documentation/devicetree/bindings/net/renesas,*.yaml -- cgit From 71ac6f390f6a3017f58d05d677b961bb1f851338 Mon Sep 17 00:00:00 2001 From: "jason-jh.lin" Date: Tue, 10 Aug 2021 10:55:03 +0800 Subject: drm/mediatek: Add AAL output size configuration To avoid the output width and height is incorrect, AAL_OUTPUT_SIZE configuration should be set. Fixes: 0664d1392c26 ("drm/mediatek: Add AAL engine basic function") Signed-off-by: jason-jh.lin Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c index 75bc00e17fc4..50d20562e612 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c @@ -34,6 +34,7 @@ #define DISP_AAL_EN 0x0000 #define DISP_AAL_SIZE 0x0030 +#define DISP_AAL_OUTPUT_SIZE 0x04d8 #define DISP_DITHER_EN 0x0000 #define DITHER_EN BIT(0) @@ -197,6 +198,7 @@ static void mtk_aal_config(struct device *dev, unsigned int w, struct mtk_ddp_comp_dev *priv = dev_get_drvdata(dev); mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_AAL_SIZE); + mtk_ddp_write(cmdq_pkt, w << 16 | h, &priv->cmdq_reg, priv->regs, DISP_AAL_OUTPUT_SIZE); } static void mtk_aal_gamma_set(struct device *dev, struct drm_crtc_state *state) -- cgit From b69dd5b3780a7298bd893816a09da751bc0636f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Aug 2021 12:57:15 -0700 Subject: net: igmp: increase size of mr_ifc_count Some arches support cmpxchg() on 4-byte and 8-byte only. Increase mr_ifc_count width to 32bit to fix this problem. Fixes: 4a2b285e7e10 ("net: igmp: fix data-race in igmp_ifc_timer_expire()") Signed-off-by: Eric Dumazet Reported-by: Guenter Roeck Link: https://lore.kernel.org/r/20210811195715.3684218-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/inetdevice.h | 2 +- net/ipv4/igmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 53aa0343bf69..aaf4f1b4c277 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -41,7 +41,7 @@ struct in_device { unsigned long mr_qri; /* Query Response Interval */ unsigned char mr_qrv; /* Query Robustness Variable */ unsigned char mr_gq_running; - unsigned char mr_ifc_count; + u32 mr_ifc_count; struct timer_list mr_gq_timer; /* general query timer */ struct timer_list mr_ifc_timer; /* interface change timer */ diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a51360087b19..00576bae183d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -803,7 +803,7 @@ static void igmp_gq_timer_expire(struct timer_list *t) static void igmp_ifc_timer_expire(struct timer_list *t) { struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer); - u8 mr_ifc_count; + u32 mr_ifc_count; igmpv3_send_cr(in_dev); restart: -- cgit From da4d4517ba70216799e3eb3b9bd71aa9dca065da Mon Sep 17 00:00:00 2001 From: "jason-jh.lin" Date: Tue, 10 Aug 2021 12:01:48 +0800 Subject: drm/mediatek: Add component_del in OVL and COLOR remove function Add component_del in OVL and COLOR remove function. Fixes: ff1395609e20 ("drm/mediatek: Move mtk_ddp_comp_init() from sub driver to DRM driver") Signed-off-by: jason-jh.lin Signed-off-by: Chun-Kuang Hu --- drivers/gpu/drm/mediatek/mtk_disp_color.c | 2 ++ drivers/gpu/drm/mediatek/mtk_disp_ovl.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/mediatek/mtk_disp_color.c b/drivers/gpu/drm/mediatek/mtk_disp_color.c index 6f4c80bbc0eb..473f5bb5cbad 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_color.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_color.c @@ -133,6 +133,8 @@ static int mtk_disp_color_probe(struct platform_device *pdev) static int mtk_disp_color_remove(struct platform_device *pdev) { + component_del(&pdev->dev, &mtk_disp_color_component_ops); + return 0; } diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index fa9d79963cd3..5326989d5206 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -423,6 +423,8 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev) static int mtk_disp_ovl_remove(struct platform_device *pdev) { + component_del(&pdev->dev, &mtk_disp_ovl_component_ops); + return 0; } -- cgit From f753067494c2726d7a09ffca351d9b2599fee876 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Thu, 12 Aug 2021 09:24:39 +0300 Subject: Revert "interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate" This reverts commit f84f5b6f72e68bbaeb850b58ac167e4a3a47532a, which is causing regressions on some platforms, preventing them to boot or do a clean reboot. This is because the above commit is sending also all the zero bandwidth requests to turn off any resources that might be enabled unnecessarily, but currently this may turn off interconnects that are enabled by default, but with no consumer to keep them on. Let's revert this for now as some platforms are not ready for such change yet. In the future we can introduce some _ignore_unused option that could keep also the unused resources on platforms that have only partial interconnect support and also add .shutdown callbacks to deal with disabling the resources in the right order. Reported-by: Stephen Boyd Reported-by: Bjorn Andersson Link: https://lore.kernel.org/r/CAE-0n52iVgX0JjjnYi=NDg49xP961p=+W5R2bmO+2xwRceFhfA@mail.gmail.com Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/icc-rpmh.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c index 27cc5f03611c..f6fae64861ce 100644 --- a/drivers/interconnect/qcom/icc-rpmh.c +++ b/drivers/interconnect/qcom/icc-rpmh.c @@ -20,18 +20,13 @@ void qcom_icc_pre_aggregate(struct icc_node *node) { size_t i; struct qcom_icc_node *qn; - struct qcom_icc_provider *qp; qn = node->data; - qp = to_qcom_provider(node->provider); for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) { qn->sum_avg[i] = 0; qn->max_peak[i] = 0; } - - for (i = 0; i < qn->num_bcms; i++) - qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); } EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate); @@ -49,8 +44,10 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, { size_t i; struct qcom_icc_node *qn; + struct qcom_icc_provider *qp; qn = node->data; + qp = to_qcom_provider(node->provider); if (!tag) tag = QCOM_ICC_TAG_ALWAYS; @@ -70,6 +67,9 @@ int qcom_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw, *agg_avg += avg_bw; *agg_peak = max_t(u32, *agg_peak, peak_bw); + for (i = 0; i < qn->num_bcms; i++) + qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]); + return 0; } EXPORT_SYMBOL_GPL(qcom_icc_aggregate); -- cgit From 48c812e0327744b4965296f65c23fe2405692afc Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Aug 2021 13:37:48 +0100 Subject: net: mscc: Fix non-GPL export of regmap APIs The ocelot driver makes use of regmap, wrapping it with driver specific operations that are thin wrappers around the core regmap APIs. These are exported with EXPORT_SYMBOL, dropping the _GPL from the core regmap exports which is frowned upon. Add _GPL suffixes to at least the APIs that are doing register I/O. Signed-off-by: Mark Brown Acked-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot_io.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c index ea4e83410fe4..7390fa3980ec 100644 --- a/drivers/net/ethernet/mscc/ocelot_io.c +++ b/drivers/net/ethernet/mscc/ocelot_io.c @@ -21,7 +21,7 @@ u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset) ocelot->map[target][reg & REG_MASK] + offset, &val); return val; } -EXPORT_SYMBOL(__ocelot_read_ix); +EXPORT_SYMBOL_GPL(__ocelot_read_ix); void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset) { @@ -32,7 +32,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset) regmap_write(ocelot->targets[target], ocelot->map[target][reg & REG_MASK] + offset, val); } -EXPORT_SYMBOL(__ocelot_write_ix); +EXPORT_SYMBOL_GPL(__ocelot_write_ix); void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, u32 offset) @@ -45,7 +45,7 @@ void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, ocelot->map[target][reg & REG_MASK] + offset, mask, val); } -EXPORT_SYMBOL(__ocelot_rmw_ix); +EXPORT_SYMBOL_GPL(__ocelot_rmw_ix); u32 ocelot_port_readl(struct ocelot_port *port, u32 reg) { @@ -58,7 +58,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg) regmap_read(port->target, ocelot->map[target][reg & REG_MASK], &val); return val; } -EXPORT_SYMBOL(ocelot_port_readl); +EXPORT_SYMBOL_GPL(ocelot_port_readl); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) { @@ -69,7 +69,7 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) regmap_write(port->target, ocelot->map[target][reg & REG_MASK], val); } -EXPORT_SYMBOL(ocelot_port_writel); +EXPORT_SYMBOL_GPL(ocelot_port_writel); void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) { @@ -77,7 +77,7 @@ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) ocelot_port_writel(port, (cur & (~mask)) | val, reg); } -EXPORT_SYMBOL(ocelot_port_rmwl); +EXPORT_SYMBOL_GPL(ocelot_port_rmwl); u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, u32 reg, u32 offset) @@ -128,7 +128,7 @@ int ocelot_regfields_init(struct ocelot *ocelot, return 0; } -EXPORT_SYMBOL(ocelot_regfields_init); +EXPORT_SYMBOL_GPL(ocelot_regfields_init); static struct regmap_config ocelot_regmap_config = { .reg_bits = 32, @@ -148,4 +148,4 @@ struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res) return devm_regmap_init_mmio(ocelot->dev, regs, &ocelot_regmap_config); } -EXPORT_SYMBOL(ocelot_regmap_init); +EXPORT_SYMBOL_GPL(ocelot_regmap_init); -- cgit From 86704993e6a5989e256b4212ca03115cc2694eda Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Wed, 11 Aug 2021 08:22:09 +0700 Subject: Revert "tipc: Return the correct errno code" This reverts commit 0efea3c649f0 because of: - The returning -ENOBUF error is fine on socket buffer allocation. - There is side effect in the calling path tipc_node_xmit()->tipc_link_xmit() when checking error code returning. Fixes: 0efea3c649f0 ("tipc: Return the correct errno code") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/link.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index cf586840caeb..1b7a487c8841 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -913,7 +913,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) - return -ENOMEM; + return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(&l->wakeupq, skb); @@ -1031,7 +1031,7 @@ void tipc_link_reset(struct tipc_link *l) * * Consumes the buffer chain. * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted - * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS or -ENOMEM + * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) @@ -1089,7 +1089,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (!_skb) { kfree_skb(skb); __skb_queue_purge(list); - return -ENOMEM; + return -ENOBUFS; } __skb_queue_tail(transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); -- cgit From c4b68e513953c3370ce02c3208c1c628c0b86fd3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Aug 2021 15:15:13 -0500 Subject: pinctrl: amd: Fix an issue with shutdown when system set to s0ix IRQs are getting armed on shutdown causing the system to immediately wake back up. Link: https://lkml.org/lkml/2021/8/2/1114 Reported-by: nix.or.die@googlemail.com Acked-by: Shyam Sundar S K Tested-by: Gabriel Craciunescu CC: Raul E Rangel Fixes: d62bd5ce12d7 ("pinctrl: amd: Implement irq_set_wake") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20210809201513.12367-1-mario.limonciello@amd.com Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-amd.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index a76be6cc26ee..5b764740b829 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -444,8 +444,7 @@ static int amd_gpio_irq_set_wake(struct irq_data *d, unsigned int on) unsigned long flags; struct gpio_chip *gc = irq_data_get_irq_chip_data(d); struct amd_gpio *gpio_dev = gpiochip_get_data(gc); - u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | - BIT(WAKE_CNTRL_OFF_S4); + u32 wake_mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3); raw_spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); -- cgit From 700fa08da43edb0af3e6a513f0255443e96088e8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 11 Aug 2021 14:59:45 +0300 Subject: net: dsa: sja1105: unregister the MDIO buses during teardown The call to sja1105_mdiobus_unregister is present in the error path but absent from the main driver unbind path. Fixes: 5a8f09748ee7 ("net: dsa: sja1105: register the MDIO buses for 100base-T1 and 100base-TX") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index b188a80eeec6..49eb0ac41b7d 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -3187,6 +3187,7 @@ static void sja1105_teardown(struct dsa_switch *ds) } sja1105_devlink_teardown(ds); + sja1105_mdiobus_unregister(ds); sja1105_flower_teardown(ds); sja1105_tas_teardown(ds); sja1105_ptp_clock_unregister(ds); -- cgit From abd9d66a055722393d33685214c08386694871d7 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Wed, 11 Aug 2021 10:48:57 +0530 Subject: drm/i915/display: Fix the 12 BPC bits for PIPE_MISC reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Till DISPLAY12 the PIPE_MISC bits 5-7 are used to set the Dithering BPC, with valid values of 6, 8, 10 BPC. For ADLP+ these bits are used to set the PORT OUTPUT BPC, with valid values of: 6, 8, 10, 12 BPC, and need to be programmed whether dithering is enabled or not. This patch: -corrects the bits 5-7 for PIPE MISC register for 12 BPC. -renames the bits and mask to have generic names for these bits for dithering bpc and port output bpc. v3: Added a note for MIPI DSI which uses the PIPE_MISC for readout for pipe_bpp. (Uma Shankar) v2: Added 'display' to the subject and fixes tag. (Uma Shankar) Fixes: 756f85cffef2 ("drm/i915/bdw: Broadwell has PIPEMISC") Cc: Paulo Zanoni (v1) Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: intel-gfx@lists.freedesktop.org Cc: # v3.13+ Signed-off-by: Ankit Nautiyal Reviewed-by: Uma Shankar Signed-off-by: Uma Shankar Link: https://patchwork.freedesktop.org/patch/msgid/20210811051857.109723-1-ankit.k.nautiyal@intel.com (cherry picked from commit 70418a68713c13da3f36c388087d0220b456a430) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display.c | 34 ++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_reg.h | 16 +++++++++---- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 2d5d21740c25..0a8a2395c8ac 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5746,16 +5746,18 @@ static void bdw_set_pipemisc(const struct intel_crtc_state *crtc_state) switch (crtc_state->pipe_bpp) { case 18: - val |= PIPEMISC_DITHER_6_BPC; + val |= PIPEMISC_6_BPC; break; case 24: - val |= PIPEMISC_DITHER_8_BPC; + val |= PIPEMISC_8_BPC; break; case 30: - val |= PIPEMISC_DITHER_10_BPC; + val |= PIPEMISC_10_BPC; break; case 36: - val |= PIPEMISC_DITHER_12_BPC; + /* Port output 12BPC defined for ADLP+ */ + if (DISPLAY_VER(dev_priv) > 12) + val |= PIPEMISC_12_BPC_ADLP; break; default: MISSING_CASE(crtc_state->pipe_bpp); @@ -5808,15 +5810,27 @@ int bdw_get_pipemisc_bpp(struct intel_crtc *crtc) tmp = intel_de_read(dev_priv, PIPEMISC(crtc->pipe)); - switch (tmp & PIPEMISC_DITHER_BPC_MASK) { - case PIPEMISC_DITHER_6_BPC: + switch (tmp & PIPEMISC_BPC_MASK) { + case PIPEMISC_6_BPC: return 18; - case PIPEMISC_DITHER_8_BPC: + case PIPEMISC_8_BPC: return 24; - case PIPEMISC_DITHER_10_BPC: + case PIPEMISC_10_BPC: return 30; - case PIPEMISC_DITHER_12_BPC: - return 36; + /* + * PORT OUTPUT 12 BPC defined for ADLP+. + * + * TODO: + * For previous platforms with DSI interface, bits 5:7 + * are used for storing pipe_bpp irrespective of dithering. + * Since the value of 12 BPC is not defined for these bits + * on older platforms, need to find a workaround for 12 BPC + * MIPI DSI HW readout. + */ + case PIPEMISC_12_BPC_ADLP: + if (DISPLAY_VER(dev_priv) > 12) + return 36; + fallthrough; default: MISSING_CASE(tmp); return 0; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 41186c1f771e..476bb3b9ad11 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6163,11 +6163,17 @@ enum { #define PIPEMISC_HDR_MODE_PRECISION (1 << 23) /* icl+ */ #define PIPEMISC_OUTPUT_COLORSPACE_YUV (1 << 11) #define PIPEMISC_PIXEL_ROUNDING_TRUNC REG_BIT(8) /* tgl+ */ -#define PIPEMISC_DITHER_BPC_MASK (7 << 5) -#define PIPEMISC_DITHER_8_BPC (0 << 5) -#define PIPEMISC_DITHER_10_BPC (1 << 5) -#define PIPEMISC_DITHER_6_BPC (2 << 5) -#define PIPEMISC_DITHER_12_BPC (3 << 5) +/* + * For Display < 13, Bits 5-7 of PIPE MISC represent DITHER BPC with + * valid values of: 6, 8, 10 BPC. + * ADLP+, the bits 5-7 represent PORT OUTPUT BPC with valid values of: + * 6, 8, 10, 12 BPC. + */ +#define PIPEMISC_BPC_MASK (7 << 5) +#define PIPEMISC_8_BPC (0 << 5) +#define PIPEMISC_10_BPC (1 << 5) +#define PIPEMISC_6_BPC (2 << 5) +#define PIPEMISC_12_BPC_ADLP (4 << 5) /* adlp+ */ #define PIPEMISC_DITHER_ENABLE (1 << 4) #define PIPEMISC_DITHER_TYPE_MASK (3 << 2) #define PIPEMISC_DITHER_TYPE_SP (0 << 2) -- cgit From 24d032e2359e3abc926b3d423f49a7c33e0b7836 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 6 Aug 2021 10:41:30 -0700 Subject: drm/i915: Only access SFC_DONE when media domain is not fused off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SFC_DONE register lives within the corresponding VD0/VD2/VD4/VD6 forcewake domain and is not accessible if the vdbox in that domain is fused off and the forcewake is not initialized. This mistake went unnoticed because until recently we were using the wrong register offset for the SFC_DONE register; once the register offset was corrected, we started hitting errors like <4> [544.989065] i915 0000:cc:00.0: Uninitialized forcewake domain(s) 0x80 accessed at 0x1ce000 on parts with fused-off vdbox engines. Fixes: e50dbdbfd9fb ("drm/i915/tgl: Add SFC instdone to error state") Fixes: 9c9c6d0ab08a ("drm/i915: Correct SFC_DONE register offset") Cc: Daniele Ceraolo Spurio Cc: Mika Kuoppala Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210806174130.1058960-1-matthew.d.roper@intel.com Reviewed-by: José Roberto de Souza (cherry picked from commit c5589bb5dccb0c5cb74910da93663f489589f3ce) Signed-off-by: Rodrigo Vivi [Changed Fixes tag to match the cherry-picked 82929a2140eb] --- drivers/gpu/drm/i915/i915_gpu_error.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 35c97c39f125..966664610c8c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -727,9 +727,18 @@ static void err_print_gt(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) >= 12) { int i; - for (i = 0; i < GEN12_SFC_DONE_MAX; i++) + for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + /* + * SFC_DONE resides in the VD forcewake domain, so it + * only exists if the corresponding VCS engine is + * present. + */ + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) + continue; + err_printf(m, " SFC_DONE[%d]: 0x%08x\n", i, gt->sfc_done[i]); + } err_printf(m, " GAM_DONE: 0x%08x\n", gt->gam_done); } @@ -1581,6 +1590,14 @@ static void gt_record_regs(struct intel_gt_coredump *gt) if (GRAPHICS_VER(i915) >= 12) { for (i = 0; i < GEN12_SFC_DONE_MAX; i++) { + /* + * SFC_DONE resides in the VD forcewake domain, so it + * only exists if the corresponding VCS engine is + * present. + */ + if (!HAS_ENGINE(gt->_gt, _VCS(i * 2))) + continue; + gt->sfc_done[i] = intel_uncore_read(uncore, GEN12_SFC_DONE(i)); } -- cgit From ffd5caa26f6afde0c1e3ed126806607748a83c6e Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Aug 2021 16:27:48 +0200 Subject: drm/doc/rfc: drop lmem uapi section We still have quite a bit more work to do with overall reworking of the ttm-based dg1 code, but the uapi stuff is now finalized with the latest pull. So remove that. This also fixes kerneldoc build warnings because we've included the same headers in two places, resulting in sphinx complaining about duplicated symbols. This regression has been created when we moved the uapi definitions to the real include/uapi/ folder in 727ecd99a4c9 ("drm/doc/rfc: drop the i915_gem_lmem.h header") v2: Fix a few references that I missed, the htmldocs build took forever. Acked-by: Jason Ekstrand Acked-by: Maarten Lankhorst Tested-by Stephen Rothwell (v1) References: https://lore.kernel.org/dri-devel/20210603193242.1ce99344@canb.auug.org.au/ Reported-by: Stephen Rothwell Cc: Stephen Rothwell Fixes: 727ecd99a4c9 ("drm/doc/rfc: drop the i915_gem_lmem.h header") Cc: Matthew Auld Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210810142748.1983271-1-daniel.vetter@ffwll.ch (cherry picked from commit dae2d28832968751f7731336b560a4a84a197b76) Signed-off-by: Rodrigo Vivi --- Documentation/gpu/rfc/i915_gem_lmem.rst | 109 -------------------------------- 1 file changed, 109 deletions(-) diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst index 675ba8620d66..b421a3c1806e 100644 --- a/Documentation/gpu/rfc/i915_gem_lmem.rst +++ b/Documentation/gpu/rfc/i915_gem_lmem.rst @@ -18,114 +18,5 @@ real, with all the uAPI bits is: * Route shmem backend over to TTM SYSTEM for discrete * TTM purgeable object support * Move i915 buddy allocator over to TTM - * MMAP ioctl mode(see `I915 MMAP`_) - * SET/GET ioctl caching(see `I915 SET/GET CACHING`_) * Send RFC(with mesa-dev on cc) for final sign off on the uAPI * Add pciid for DG1 and turn on uAPI for real - -New object placement and region query uAPI -========================================== -Starting from DG1 we need to give userspace the ability to allocate buffers from -device local-memory. Currently the driver supports gem_create, which can place -buffers in system memory via shmem, and the usual assortment of other -interfaces, like dumb buffers and userptr. - -To support this new capability, while also providing a uAPI which will work -beyond just DG1, we propose to offer three new bits of uAPI: - -DRM_I915_QUERY_MEMORY_REGIONS ------------------------------ -New query ID which allows userspace to discover the list of supported memory -regions(like system-memory and local-memory) for a given device. We identify -each region with a class and instance pair, which should be unique. The class -here would be DEVICE or SYSTEM, and the instance would be zero, on platforms -like DG1. - -Side note: The class/instance design is borrowed from our existing engine uAPI, -where we describe every physical engine in terms of its class, and the -particular instance, since we can have more than one per class. - -In the future we also want to expose more information which can further -describe the capabilities of a region. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions - -GEM_CREATE_EXT --------------- -New ioctl which is basically just gem_create but now allows userspace to provide -a chain of possible extensions. Note that if we don't provide any extensions and -set flags=0 then we get the exact same behaviour as gem_create. - -Side note: We also need to support PXP[1] in the near future, which is also -applicable to integrated platforms, and adds its own gem_create_ext extension, -which basically lets userspace mark a buffer as "protected". - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext - -I915_GEM_CREATE_EXT_MEMORY_REGIONS ----------------------------------- -Implemented as an extension for gem_create_ext, we would now allow userspace to -optionally provide an immutable list of preferred placements at creation time, -in priority order, for a given buffer object. For the placements we expect -them each to use the class/instance encoding, as per the output of the regions -query. Having the list in priority order will be useful in the future when -placing an object, say during eviction. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext_memory_regions - -One fair criticism here is that this seems a little over-engineered[2]. If we -just consider DG1 then yes, a simple gem_create.flags or something is totally -all that's needed to tell the kernel to allocate the buffer in local-memory or -whatever. However looking to the future we need uAPI which can also support -upcoming Xe HP multi-tile architecture in a sane way, where there can be -multiple local-memory instances for a given device, and so using both class and -instance in our uAPI to describe regions is desirable, although specifically -for DG1 it's uninteresting, since we only have a single local-memory instance. - -Existing uAPI issues -==================== -Some potential issues we still need to resolve. - -I915 MMAP ---------- -In i915 there are multiple ways to MMAP GEM object, including mapping the same -object using different mapping types(WC vs WB), i.e multiple active mmaps per -object. TTM expects one MMAP at most for the lifetime of the object. If it -turns out that we have to backpedal here, there might be some potential -userspace fallout. - -I915 SET/GET CACHING --------------------- -In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but -DG1 doesn't support non-snooped pcie transactions, so we can just always -allocate as WB for smem-only buffers. If/when our hw gains support for -non-snooped pcie transactions then we must fix this mode at allocation time as -a new GEM extension. - -This is related to the mmap problem, because in general (meaning, when we're -not running on intel cpus) the cpu mmap must not, ever, be inconsistent with -allocation mode. - -Possible idea is to let the kernel picks the mmap mode for userspace from the -following table: - -smem-only: WB. Userspace does not need to call clflush. - -smem+lmem: We only ever allow a single mode, so simply allocate this as uncached -memory, and always give userspace a WC mapping. GPU still does snooped access -here(assuming we can't turn it off like on DG1), which is a bit inefficient. - -lmem only: always WC - -This means on discrete you only get a single mmap mode, all others must be -rejected. That's probably going to be a new default mode or something like -that. - -Links -===== -[1] https://patchwork.freedesktop.org/series/86798/ - -[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791 -- cgit From a2befe9380dd04ee76c871568deca00eedf89134 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Wed, 11 Aug 2021 18:14:41 +0200 Subject: ALSA: hda - fix the 'Capture Switch' value change notifications The original code in the cap_put_caller() function does not handle correctly the positive values returned from the passed function for multiple iterations. It means that the change notifications may be lost. Fixes: 352f7f914ebb ("ALSA: hda - Merge Realtek parser code to generic parser") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213851 Cc: Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20210811161441.1325250-1-perex@perex.cz Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_generic.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index e97d00585e8e..481d8f8d3396 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -3460,7 +3460,7 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol, struct hda_gen_spec *spec = codec->spec; const struct hda_input_mux *imux; struct nid_path *path; - int i, adc_idx, err = 0; + int i, adc_idx, ret, err = 0; imux = &spec->input_mux; adc_idx = kcontrol->id.index; @@ -3470,9 +3470,13 @@ static int cap_put_caller(struct snd_kcontrol *kcontrol, if (!path || !path->ctls[type]) continue; kcontrol->private_value = path->ctls[type]; - err = func(kcontrol, ucontrol); - if (err < 0) + ret = func(kcontrol, ucontrol); + if (ret < 0) { + err = ret; break; + } + if (ret > 0) + err = 1; } mutex_unlock(&codec->control_mutex); if (err >= 0 && spec->cap_sync_hook) -- cgit From 67bb66d32905627e29400e2cb7f87a7c4c8cf667 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 12 Aug 2021 11:28:39 +0900 Subject: ALSA: oxfw: fix functioal regression for silence in Apogee Duet FireWire OXFW 971 has no function to use the value in syt field of received isochronous packet for playback timing generation. In kernel prepatch for v5.14, ALSA OXFW driver got change to send NO_INFO value in the field instead of actual timing value. The change brings Apogee Duet FireWire to generate no playback sound, while output meter moves. As long as I investigate, _any_ value in the syt field takes the device to generate sound. It's reasonable to think that the device just ignores data blocks in packet with NO_INFO value in its syt field for audio data processing. This commit adds a new flag for the quirk to fix regression. Fixes: 029ffc429440 ("ALSA: oxfw: perform sequence replay for media clock recovery") Cc: Signed-off-by: Takashi Sakamoto Link: https://lore.kernel.org/r/20210812022839.42043-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Iwai --- sound/firewire/oxfw/oxfw-stream.c | 9 ++++++++- sound/firewire/oxfw/oxfw.c | 6 ++++-- sound/firewire/oxfw/oxfw.h | 5 +++++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c index 0ef242fdd3bc..fff18b5d4e05 100644 --- a/sound/firewire/oxfw/oxfw-stream.c +++ b/sound/firewire/oxfw/oxfw-stream.c @@ -153,7 +153,7 @@ static int init_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream) struct cmp_connection *conn; enum cmp_direction c_dir; enum amdtp_stream_direction s_dir; - unsigned int flags = CIP_UNAWARE_SYT; + unsigned int flags = 0; int err; if (!(oxfw->quirks & SND_OXFW_QUIRK_BLOCKING_TRANSMISSION)) @@ -161,6 +161,13 @@ static int init_stream(struct snd_oxfw *oxfw, struct amdtp_stream *stream) else flags |= CIP_BLOCKING; + // OXFW 970/971 has no function to generate playback timing according to the sequence + // of value in syt field, thus the packet should include NO_INFO value in the field. + // However, some models just ignore data blocks in packet with NO_INFO for audio data + // processing. + if (!(oxfw->quirks & SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET)) + flags |= CIP_UNAWARE_SYT; + if (stream == &oxfw->tx_stream) { conn = &oxfw->out_conn; c_dir = CMP_OUTPUT; diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c index 84971d78d152..cb5b5e3a481b 100644 --- a/sound/firewire/oxfw/oxfw.c +++ b/sound/firewire/oxfw/oxfw.c @@ -159,8 +159,10 @@ static int detect_quirks(struct snd_oxfw *oxfw, const struct ieee1394_device_id return snd_oxfw_scs1x_add(oxfw); } - if (entry->vendor_id == OUI_APOGEE && entry->model_id == MODEL_DUET_FW) - oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION; + if (entry->vendor_id == OUI_APOGEE && entry->model_id == MODEL_DUET_FW) { + oxfw->quirks |= SND_OXFW_QUIRK_BLOCKING_TRANSMISSION | + SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET; + } /* * TASCAM FireOne has physical control and requires a pair of additional diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h index ee47abcb0c90..c13034f6c2ca 100644 --- a/sound/firewire/oxfw/oxfw.h +++ b/sound/firewire/oxfw/oxfw.h @@ -42,6 +42,11 @@ enum snd_oxfw_quirk { SND_OXFW_QUIRK_BLOCKING_TRANSMISSION = 0x04, // Stanton SCS1.d and SCS1.m support unique transaction. SND_OXFW_QUIRK_SCS_TRANSACTION = 0x08, + // Apogee Duet FireWire ignores data blocks in packet with NO_INFO for audio data + // processing, while output level meter moves. Any value in syt field of packet takes + // the device to process audio data even if the value is invalid in a point of + // IEC 61883-1/6. + SND_OXFW_QUIRK_IGNORE_NO_INFO_PACKET = 0x10, }; /* This is an arbitrary number for convinience. */ -- cgit From 98694166c27d473c36b434bd3572934c2f2a16ab Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 10 Aug 2021 16:13:16 +0000 Subject: powerpc/interrupt: Fix OOPS by not calling do_IRQ() from timer_interrupt() An interrupt handler shall not be called from another interrupt handler otherwise this leads to problems like the following: Kernel attempted to write user page (afd4fa84) - exploit attempt? (uid: 1000) ------------[ cut here ]------------ Bug: Write fault blocked by KUAP! WARNING: CPU: 0 PID: 1617 at arch/powerpc/mm/fault.c:230 do_page_fault+0x484/0x720 Modules linked in: CPU: 0 PID: 1617 Comm: sshd Tainted: G W 5.13.0-pmac-00010-g8393422eb77 #7 NIP: c001b77c LR: c001b77c CTR: 00000000 REGS: cb9e5bc0 TRAP: 0700 Tainted: G W (5.13.0-pmac-00010-g8393422eb77) MSR: 00021032 CR: 24942424 XER: 00000000 GPR00: c001b77c cb9e5c80 c1582c00 00000021 3ffffbff 085b0000 00000027 c8eb644c GPR08: 00000023 00000000 00000000 00000000 24942424 0063f8c8 00000000 000186a0 GPR16: afd52dd4 afd52dd0 afd52dcc afd52dc8 0065a990 c07640c4 cb9e5e98 cb9e5e90 GPR24: 00000040 afd4fa96 00000040 02000000 c1fda6c0 afd4fa84 00000300 cb9e5cc0 NIP [c001b77c] do_page_fault+0x484/0x720 LR [c001b77c] do_page_fault+0x484/0x720 Call Trace: [cb9e5c80] [c001b77c] do_page_fault+0x484/0x720 (unreliable) [cb9e5cb0] [c000424c] DataAccess_virt+0xd4/0xe4 --- interrupt: 300 at __copy_tofrom_user+0x110/0x20c NIP: c001f9b4 LR: c03250a0 CTR: 00000004 REGS: cb9e5cc0 TRAP: 0300 Tainted: G W (5.13.0-pmac-00010-g8393422eb77) MSR: 00009032 CR: 48028468 XER: 20000000 DAR: afd4fa84 DSISR: 0a000000 GPR00: 20726f6f cb9e5d80 c1582c00 00000004 cb9e5e3a 00000016 afd4fa80 00000000 GPR08: 3835202d 72777872 2d78722d 00000004 28028464 0063f8c8 00000000 000186a0 GPR16: afd52dd4 afd52dd0 afd52dcc afd52dc8 0065a990 c07640c4 cb9e5e98 cb9e5e90 GPR24: 00000040 afd4fa96 00000040 cb9e5e0c 00000daa a0000000 cb9e5e98 afd4fa56 NIP [c001f9b4] __copy_tofrom_user+0x110/0x20c LR [c03250a0] _copy_to_iter+0x144/0x990 --- interrupt: 300 [cb9e5d80] [c03e89c0] n_tty_read+0xa4/0x598 (unreliable) [cb9e5df0] [c03e2a0c] tty_read+0xdc/0x2b4 [cb9e5e80] [c0156bf8] vfs_read+0x274/0x340 [cb9e5f00] [c01571ac] ksys_read+0x70/0x118 [cb9e5f30] [c0016048] ret_from_syscall+0x0/0x28 --- interrupt: c00 at 0xa7855c88 NIP: a7855c88 LR: a7855c5c CTR: 00000000 REGS: cb9e5f40 TRAP: 0c00 Tainted: G W (5.13.0-pmac-00010-g8393422eb77) MSR: 0000d032 CR: 2402446c XER: 00000000 GPR00: 00000003 afd4ec70 a72137d0 0000000b afd4ecac 00004000 0065a990 00000800 GPR08: 00000000 a7947930 00000000 00000004 c15831b0 0063f8c8 00000000 000186a0 GPR16: afd52dd4 afd52dd0 afd52dcc afd52dc8 0065a990 0065a9e0 00000001 0065fac0 GPR24: 00000000 00000089 00664050 00000000 00668e30 a720c8dc a7943ff4 0065f9b0 NIP [a7855c88] 0xa7855c88 LR [a7855c5c] 0xa7855c5c --- interrupt: c00 Instruction dump: 3884aa88 38630178 48076861 807f0080 48042e45 2f830000 419e0148 3c80c079 3c60c076 38841be4 386301c0 4801f705 <0fe00000> 3860000b 4bfffe30 3c80c06b ---[ end trace fd69b91a8046c2e5 ]--- Here the problem is that by re-enterring an exception handler, kuap_save_and_lock() is called a second time with this time KUAP access locked, leading to regs->kuap being overwritten hence KUAP not being unlocked at exception exit as expected. Do not call do_IRQ() from timer_interrupt() directly. Instead, redefine do_IRQ() as a standard function named __do_IRQ(), and call it from both do_IRQ() and time_interrupt() handlers. Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers") Cc: stable@vger.kernel.org # v5.12+ Reported-by: Stan Johnson Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/c17d234f4927d39a1d7100864a8e1145323d33a0.1628611927.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 3 +++ arch/powerpc/include/asm/irq.h | 2 +- arch/powerpc/kernel/irq.c | 7 ++++++- arch/powerpc/kernel/time.c | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index d4bdf7d274ac..6b800d3e2681 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -583,6 +583,9 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); +/* irq.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ); + void __noreturn unrecoverable_exception(struct pt_regs *regs); void replay_system_reset(void); diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 4982f3711fc3..2b3278534bc1 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h @@ -52,7 +52,7 @@ extern void *mcheckirq_ctx[NR_CPUS]; extern void *hardirq_ctx[NR_CPUS]; extern void *softirq_ctx[NR_CPUS]; -extern void do_IRQ(struct pt_regs *regs); +void __do_IRQ(struct pt_regs *regs); extern void __init init_IRQ(void); extern void __do_irq(struct pt_regs *regs); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 91e63eac4e8f..551b653228c4 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -750,7 +750,7 @@ void __do_irq(struct pt_regs *regs) trace_irq_exit(regs); } -DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +void __do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; @@ -774,6 +774,11 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) set_irq_regs(old_regs); } +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) +{ + __do_IRQ(regs); +} + static void *__init alloc_vm_stack(void) { return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e45ce427bffb..c487ba5a6e11 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -586,7 +586,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) #if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC) if (atomic_read(&ppc_n_lost_interrupts) != 0) - do_IRQ(regs); + __do_IRQ(regs); #endif old_regs = set_irq_regs(regs); -- cgit From 01fcac8e4dfc112f420dcaeb70056a74e326cacf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 10 Aug 2021 16:13:17 +0000 Subject: powerpc/interrupt: Do not call single_step_exception() from other exceptions single_step_exception() is called by emulate_single_step() which is called from (at least) alignment exception() handler and program_check_exception() handler. Redefine it as a regular __single_step_exception() which is called by both single_step_exception() handler and emulate_single_step() function. Fixes: 3a96570ffceb ("powerpc: convert interrupt handlers to use wrappers") Cc: stable@vger.kernel.org # v5.12+ Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/aed174f5cbc06f2cf95233c071d8aac948e46043.1628611921.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/traps.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfbce527c98e..d56254f05e17 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1104,7 +1104,7 @@ DEFINE_INTERRUPT_HANDLER(RunModeException) _exception(SIGTRAP, regs, TRAP_UNK, 0); } -DEFINE_INTERRUPT_HANDLER(single_step_exception) +static void __single_step_exception(struct pt_regs *regs) { clear_single_step(regs); clear_br_trace(regs); @@ -1121,6 +1121,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); } +DEFINE_INTERRUPT_HANDLER(single_step_exception) +{ + __single_step_exception(regs); +} + /* * After we have successfully emulated an instruction, we have to * check if the instruction was being single-stepped, and if so, @@ -1130,7 +1135,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) static void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) - single_step_exception(regs); + __single_step_exception(regs); } static inline int __parse_fpscr(unsigned long fpscr) -- cgit From cbc06f051c524dcfe52ef0d1f30647828e226d30 Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Sat, 7 Aug 2021 09:20:57 +0200 Subject: powerpc/xive: Do not skip CPU-less nodes when creating the IPIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On PowerVM, CPU-less nodes can be populated with hot-plugged CPUs at runtime. Today, the IPI is not created for such nodes, and hot-plugged CPUs use a bogus IPI, which leads to soft lockups. We can not directly allocate and request the IPI on demand because bringup_up() is called under the IRQ sparse lock. The alternative is to allocate the IPIs for all possible nodes at startup and to request the mapping on demand when the first CPU of a node is brought up. Fixes: 7dcc37b3eff9 ("powerpc/xive: Map one IPI interrupt per node") Cc: stable@vger.kernel.org # v5.13 Reported-by: Geetika Moolchandani Signed-off-by: Cédric Le Goater Tested-by: Srikar Dronamraju Tested-by: Laurent Vivier Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210807072057.184698-1-clg@kaod.org --- arch/powerpc/sysdev/xive/common.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index dbdbbc2f1dc5..943fd30095af 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -67,6 +67,7 @@ static struct irq_domain *xive_irq_domain; static struct xive_ipi_desc { unsigned int irq; char name[16]; + atomic_t started; } *xive_ipis; /* @@ -1120,7 +1121,7 @@ static const struct irq_domain_ops xive_ipi_irq_domain_ops = { .alloc = xive_ipi_irq_domain_alloc, }; -static int __init xive_request_ipi(void) +static int __init xive_init_ipis(void) { struct fwnode_handle *fwnode; struct irq_domain *ipi_domain; @@ -1144,10 +1145,6 @@ static int __init xive_request_ipi(void) struct xive_ipi_desc *xid = &xive_ipis[node]; struct xive_ipi_alloc_info info = { node }; - /* Skip nodes without CPUs */ - if (cpumask_empty(cpumask_of_node(node))) - continue; - /* * Map one IPI interrupt per node for all cpus of that node. * Since the HW interrupt number doesn't have any meaning, @@ -1159,11 +1156,6 @@ static int __init xive_request_ipi(void) xid->irq = ret; snprintf(xid->name, sizeof(xid->name), "IPI-%d", node); - - ret = request_irq(xid->irq, xive_muxed_ipi_action, - IRQF_PERCPU | IRQF_NO_THREAD, xid->name, NULL); - - WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); } return ret; @@ -1178,6 +1170,22 @@ out: return ret; } +static int __init xive_request_ipi(unsigned int cpu) +{ + struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; + int ret; + + if (atomic_inc_return(&xid->started) > 1) + return 0; + + ret = request_irq(xid->irq, xive_muxed_ipi_action, + IRQF_PERCPU | IRQF_NO_THREAD, + xid->name, NULL); + + WARN(ret < 0, "Failed to request IPI %d: %d\n", xid->irq, ret); + return ret; +} + static int xive_setup_cpu_ipi(unsigned int cpu) { unsigned int xive_ipi_irq = xive_ipi_cpu_to_irq(cpu); @@ -1192,6 +1200,9 @@ static int xive_setup_cpu_ipi(unsigned int cpu) if (xc->hw_ipi != XIVE_BAD_IRQ) return 0; + /* Register the IPI */ + xive_request_ipi(cpu); + /* Grab an IPI from the backend, this will populate xc->hw_ipi */ if (xive_ops->get_ipi(cpu, xc)) return -EIO; @@ -1231,6 +1242,8 @@ static void xive_cleanup_cpu_ipi(unsigned int cpu, struct xive_cpu *xc) if (xc->hw_ipi == XIVE_BAD_IRQ) return; + /* TODO: clear IPI mapping */ + /* Mask the IPI */ xive_do_source_set_mask(&xc->ipi_data, true); @@ -1253,7 +1266,7 @@ void __init xive_smp_probe(void) smp_ops->cause_ipi = xive_cause_ipi; /* Register the IPI */ - xive_request_ipi(); + xive_init_ipis(); /* Allocate and setup IPI for the boot CPU */ xive_setup_cpu_ipi(smp_processor_id()); -- cgit From 030d6dbf0c2e5fdf23ad29557f0c87a882993e26 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 22 Jul 2021 10:17:15 +0800 Subject: riscv: kexec: do not add '-mno-relax' flag if compiler doesn't support it The RISC-V special option '-mno-relax' which to disable linker relaxations is supported by GCC8+. For GCC7 and lower versions do not support this option. Fixes: fba8a8674f68 ("RISC-V: Add kexec support") Signed-off-by: Changbin Du Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index d3081e4d9600..3397ddac1a30 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -11,7 +11,7 @@ endif CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) ifdef CONFIG_KEXEC -AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax +AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) endif extra-y += head.o -- cgit From fdf3a7a1e0a67a52f631b055975c6ac7e0e49a65 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Mon, 26 Jul 2021 07:42:54 +0200 Subject: riscv: Fix comment regarding kernel mapping overlapping with IS_ERR_VALUE The current comment states that we check if the 64-bit kernel mapping overlaps with the last 4K of the address space that is reserved to error values in create_kernel_page_table, which is not the case since it is done in setup_vm. But anyway, remove the reference to any function and simply note that in 64-bit kernel, the check should be done as soon as the kernel mapping base address is known. Fixes: db6b84a368b4 ("riscv: Make sure the kernel mapping does not overlap with IS_ERR_VALUE") Signed-off-by: Alexandre Ghiti Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 88134cc288d9..7cb4f391d106 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -197,7 +197,7 @@ static void __init setup_bootmem(void) * if end of dram is equal to maximum addressable memory. For 64-bit * kernel, this problem can't happen here as the end of the virtual * address space is occupied by the kernel mapping then this check must - * be done in create_kernel_page_table. + * be done as soon as the kernel mapping base address is determined. */ max_mapped_addr = __pa(~(ulong)0); if (max_mapped_addr == (phys_ram_end - 1)) -- cgit From 839ad22f755132838f406751439363c07272ad87 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 30 Jul 2021 17:01:46 -0700 Subject: x86/tools: Fix objdump version check again Skip (omit) any version string info that is parenthesized. Warning: objdump version 15) is older than 2.19 Warning: Skipping posttest. where 'objdump -v' says: GNU objdump (GNU Binutils; SUSE Linux Enterprise 15) 2.35.1.20201123-7.18 Fixes: 8bee738bb1979 ("x86: Fix objdump version check in chkobjdump.awk for different formats.") Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20210731000146.2720-1-rdunlap@infradead.org --- arch/x86/tools/chkobjdump.awk | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/tools/chkobjdump.awk b/arch/x86/tools/chkobjdump.awk index fd1ab80be0de..a4cf678cf5c8 100644 --- a/arch/x86/tools/chkobjdump.awk +++ b/arch/x86/tools/chkobjdump.awk @@ -10,6 +10,7 @@ BEGIN { /^GNU objdump/ { verstr = "" + gsub(/\(.*\)/, ""); for (i = 3; i <= NF; i++) if (match($(i), "^[0-9]")) { verstr = $(i); -- cgit From 7f45621c14a209b986cd636447bb53b7f6f881c3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Aug 2021 16:55:12 +0200 Subject: platform/x86: asus-nb-wmi: Allow configuring SW_TABLET_MODE method with a module option Unfortunately we have been unable to find a reliable way to detect if and how SW_TABLET_MODE reporting is supported, so we are relying on DMI quirks for this. Add a module-option to specify the SW_TABLET_MODE method so that this can be easily tested without needing to rebuild the kernel. BugLink: https://gitlab.freedesktop.org/libinput/libinput/-/issues/639 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210812145513.39117-1-hdegoede@redhat.com --- drivers/platform/x86/asus-nb-wmi.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 0cb927f0f301..9929eedf7dd8 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -41,6 +41,10 @@ static int wapf = -1; module_param(wapf, uint, 0444); MODULE_PARM_DESC(wapf, "WAPF value"); +static int tablet_mode_sw = -1; +module_param(tablet_mode_sw, uint, 0444); +MODULE_PARM_DESC(tablet_mode_sw, "Tablet mode detect: -1:auto 0:disable 1:kbd-dock 2:lid-flip"); + static struct quirk_entry *quirks; static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, @@ -477,6 +481,21 @@ static void asus_nb_wmi_quirks(struct asus_wmi_driver *driver) else wapf = quirks->wapf; + switch (tablet_mode_sw) { + case 0: + quirks->use_kbd_dock_devid = false; + quirks->use_lid_flip_devid = false; + break; + case 1: + quirks->use_kbd_dock_devid = true; + quirks->use_lid_flip_devid = false; + break; + case 2: + quirks->use_kbd_dock_devid = false; + quirks->use_lid_flip_devid = true; + break; + } + if (quirks->i8042_filter) { ret = i8042_install_filter(quirks->i8042_filter); if (ret) { -- cgit From 73fcbad691110ece47a487c9e584822070e3626f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 12 Aug 2021 16:55:13 +0200 Subject: platform/x86: asus-nb-wmi: Add tablet_mode_sw=lid-flip quirk for the TP200s The Asus TP200s / E205SA 360 degree hinges 2-in-1 supports reporting SW_TABLET_MODE info through the ASUS_WMI_DEVID_LID_FLIP WMI device-id. Add a quirk to enable this. BugLink: https://gitlab.freedesktop.org/libinput/libinput/-/issues/639 Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210812145513.39117-2-hdegoede@redhat.com --- drivers/platform/x86/asus-nb-wmi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 9929eedf7dd8..a81dc4b191b7 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -462,6 +462,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_use_lid_flip_devid, }, + { + .callback = dmi_matched, + .ident = "ASUS TP200s / E205SA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "E205SA"), + }, + .driver_data = &quirk_asus_use_lid_flip_devid, + }, {}, }; -- cgit From 88ca2521bd5b4e8b83743c01a2d4cb09325b51e9 Mon Sep 17 00:00:00 2001 From: Maximilian Heyne Date: Thu, 12 Aug 2021 13:09:27 +0000 Subject: xen/events: Fix race in set_evtchn_to_irq There is a TOCTOU issue in set_evtchn_to_irq. Rows in the evtchn_to_irq mapping are lazily allocated in this function. The check whether the row is already present and the row initialization is not synchronized. Two threads can at the same time allocate a new row for evtchn_to_irq and add the irq mapping to the their newly allocated row. One thread will overwrite what the other has set for evtchn_to_irq[row] and therefore the irq mapping is lost. This will trigger a BUG_ON later in bind_evtchn_to_cpu: INFO: pci 0000:1a:15.4: [1d0f:8061] type 00 class 0x010802 INFO: nvme 0000:1a:12.1: enabling device (0000 -> 0002) INFO: nvme nvme77: 1/0/0 default/read/poll queues CRIT: kernel BUG at drivers/xen/events/events_base.c:427! WARN: invalid opcode: 0000 [#1] SMP NOPTI WARN: Workqueue: nvme-reset-wq nvme_reset_work [nvme] WARN: RIP: e030:bind_evtchn_to_cpu+0xc2/0xd0 WARN: Call Trace: WARN: set_affinity_irq+0x121/0x150 WARN: irq_do_set_affinity+0x37/0xe0 WARN: irq_setup_affinity+0xf6/0x170 WARN: irq_startup+0x64/0xe0 WARN: __setup_irq+0x69e/0x740 WARN: ? request_threaded_irq+0xad/0x160 WARN: request_threaded_irq+0xf5/0x160 WARN: ? nvme_timeout+0x2f0/0x2f0 [nvme] WARN: pci_request_irq+0xa9/0xf0 WARN: ? pci_alloc_irq_vectors_affinity+0xbb/0x130 WARN: queue_request_irq+0x4c/0x70 [nvme] WARN: nvme_reset_work+0x82d/0x1550 [nvme] WARN: ? check_preempt_wakeup+0x14f/0x230 WARN: ? check_preempt_curr+0x29/0x80 WARN: ? nvme_irq_check+0x30/0x30 [nvme] WARN: process_one_work+0x18e/0x3c0 WARN: worker_thread+0x30/0x3a0 WARN: ? process_one_work+0x3c0/0x3c0 WARN: kthread+0x113/0x130 WARN: ? kthread_park+0x90/0x90 WARN: ret_from_fork+0x3a/0x50 This patch sets evtchn_to_irq rows via a cmpxchg operation so that they will be set only once. The row is now cleared before writing it to evtchn_to_irq in order to not create a race once the row is visible for other threads. While at it, do not require the page to be zeroed, because it will be overwritten with -1's in clear_evtchn_to_irq_row anyway. Signed-off-by: Maximilian Heyne Fixes: d0b075ffeede ("xen/events: Refactor evtchn_to_irq array to be dynamically allocated") Link: https://lore.kernel.org/r/20210812130930.127134-1-mheyne@amazon.de Reviewed-by: Boris Ostrovsky Signed-off-by: Boris Ostrovsky --- drivers/xen/events/events_base.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 154daddbdcb4..a78704ae3618 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -198,12 +198,12 @@ static void disable_dynirq(struct irq_data *data); static DEFINE_PER_CPU(unsigned int, irq_epoch); -static void clear_evtchn_to_irq_row(unsigned row) +static void clear_evtchn_to_irq_row(int *evtchn_row) { unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - WRITE_ONCE(evtchn_to_irq[row][col], -1); + WRITE_ONCE(evtchn_row[col], -1); } static void clear_evtchn_to_irq_all(void) @@ -213,7 +213,7 @@ static void clear_evtchn_to_irq_all(void) for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) { if (evtchn_to_irq[row] == NULL) continue; - clear_evtchn_to_irq_row(row); + clear_evtchn_to_irq_row(evtchn_to_irq[row]); } } @@ -221,6 +221,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) { unsigned row; unsigned col; + int *evtchn_row; if (evtchn >= xen_evtchn_max_channels()) return -EINVAL; @@ -233,11 +234,18 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) if (irq == -1) return 0; - evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL); - if (evtchn_to_irq[row] == NULL) + evtchn_row = (int *) __get_free_pages(GFP_KERNEL, 0); + if (evtchn_row == NULL) return -ENOMEM; - clear_evtchn_to_irq_row(row); + clear_evtchn_to_irq_row(evtchn_row); + + /* + * We've prepared an empty row for the mapping. If a different + * thread was faster inserting it, we can drop ours. + */ + if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL) + free_page((unsigned long) evtchn_row); } WRITE_ONCE(evtchn_to_irq[row][col], irq); -- cgit From 41535701da3324b80029cabb501e86c4fafe339d Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Thu, 29 Jul 2021 07:45:29 +0000 Subject: cifs: Handle race conditions during rename When rename is executed on directory which has files for which close is deferred, then rename will fail with EACCES. This patch will try to close all deferred files when EACCES is received and retry rename on a directory. Signed-off-by: Rohith Surabattula Cc: stable@vger.kernel.org # 5.13 Reviewed-by: Shyam Prasad N Signed-off-by: Steve French --- fs/cifs/inode.c | 19 +++++++++++++++++-- fs/cifs/misc.c | 16 +++++++++++----- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b96b253e7635..65f8a70cece3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1625,7 +1625,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) goto unlink_out; } - cifs_close_all_deferred_files(tcon); + cifs_close_deferred_file(CIFS_I(inode)); if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = CIFSPOSIXDelFile(xid, tcon, full_path, @@ -2084,6 +2084,7 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, FILE_UNIX_BASIC_INFO *info_buf_target; unsigned int xid; int rc, tmprc; + int retry_count = 0; if (flags & ~RENAME_NOREPLACE) return -EINVAL; @@ -2113,10 +2114,24 @@ cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir, goto cifs_rename_exit; } - cifs_close_all_deferred_files(tcon); + cifs_close_deferred_file(CIFS_I(d_inode(source_dentry))); + if (d_inode(target_dentry) != NULL) + cifs_close_deferred_file(CIFS_I(d_inode(target_dentry))); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, to_name); + if (rc == -EACCES) { + while (retry_count < 3) { + cifs_close_all_deferred_files(tcon); + rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry, + to_name); + if (rc != -EACCES) + break; + retry_count++; + } + } + /* * No-replace is the natural behavior for CIFS, so skip unlink hacks. */ diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 844abeb2b48f..cdb1ec1461de 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -723,13 +723,19 @@ void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *cfile = NULL; - struct cifs_deferred_close *dclose; + + if (cifs_inode == NULL) + return; list_for_each_entry(cfile, &cifs_inode->openFileList, flist) { - spin_lock(&cifs_inode->deferred_lock); - if (cifs_is_deferred_close(cfile, &dclose)) - mod_delayed_work(deferredclose_wq, &cfile->deferred, 0); - spin_unlock(&cifs_inode->deferred_lock); + if (delayed_work_pending(&cfile->deferred)) { + /* + * If there is no pending work, mod_delayed_work queues new work. + * So, Increase the ref count to avoid use-after-free. + */ + if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) + cifsFileInfo_get(cfile); + } } } -- cgit From 9e992755be8f2d458a0bcbefd19e493483c1dba2 Mon Sep 17 00:00:00 2001 From: Rohith Surabattula Date: Mon, 9 Aug 2021 09:32:46 +0000 Subject: cifs: Call close synchronously during unlink/rename/lease break. During unlink/rename/lease break, deferred work for close is scheduled immediately but in an asynchronous manner which might lead to race with actual(unlink/rename) commands. This change will schedule close synchronously which will avoid the race conditions with other commands. Signed-off-by: Rohith Surabattula Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org # 5.13 Signed-off-by: Steve French --- fs/cifs/cifsglob.h | 5 +++++ fs/cifs/file.c | 35 +++++++++++++++++------------------ fs/cifs/misc.c | 46 ++++++++++++++++++++++++++++++++++------------ 3 files changed, 56 insertions(+), 30 deletions(-) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index c0bfc2f01030..c6a9542ca281 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1611,6 +1611,11 @@ struct dfs_info3_param { int ttl; }; +struct file_list { + struct list_head list; + struct cifsFileInfo *cfile; +}; + /* * common struct for holding inode info when searching for or updating an * inode with new info diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0a72840a88f1..bb98fbdd22a9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4847,17 +4847,6 @@ void cifs_oplock_break(struct work_struct *work) cifs_dbg(VFS, "Push locks rc = %d\n", rc); oplock_break_ack: - /* - * releasing stale oplock after recent reconnect of smb session using - * a now incorrect file handle is not a data integrity issue but do - * not bother sending an oplock release if session to server still is - * disconnected since oplock already released by the server - */ - if (!cfile->oplock_break_cancelled) { - rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, - cinode); - cifs_dbg(FYI, "Oplock release rc = %d\n", rc); - } /* * When oplock break is received and there are no active * file handles but cached, then schedule deferred close immediately. @@ -4865,17 +4854,27 @@ oplock_break_ack: */ spin_lock(&CIFS_I(inode)->deferred_lock); is_deferred = cifs_is_deferred_close(cfile, &dclose); + spin_unlock(&CIFS_I(inode)->deferred_lock); if (is_deferred && cfile->deferred_close_scheduled && delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + _cifsFileInfo_put(cfile, false, false); + goto oplock_break_done; + } } - spin_unlock(&CIFS_I(inode)->deferred_lock); + /* + * releasing stale oplock after recent reconnect of smb session using + * a now incorrect file handle is not a data integrity issue but do + * not bother sending an oplock release if session to server still is + * disconnected since oplock already released by the server + */ + if (!cfile->oplock_break_cancelled) { + rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid, + cinode); + cifs_dbg(FYI, "Oplock release rc = %d\n", rc); + } +oplock_break_done: _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); cifs_done_oplock_break(cinode); } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index cdb1ec1461de..9469f1cf0b46 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -723,20 +723,32 @@ void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode) { struct cifsFileInfo *cfile = NULL; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; if (cifs_inode == NULL) return; + INIT_LIST_HEAD(&file_head); + spin_lock(&cifs_inode->open_file_lock); list_for_each_entry(cfile, &cifs_inode->openFileList, flist) { if (delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + continue; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } } } + spin_unlock(&cifs_inode->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); + } } void @@ -744,20 +756,30 @@ cifs_close_all_deferred_files(struct cifs_tcon *tcon) { struct cifsFileInfo *cfile; struct list_head *tmp; + struct file_list *tmp_list, *tmp_next_list; + struct list_head file_head; + INIT_LIST_HEAD(&file_head); spin_lock(&tcon->open_file_lock); list_for_each(tmp, &tcon->openFileList) { cfile = list_entry(tmp, struct cifsFileInfo, tlist); if (delayed_work_pending(&cfile->deferred)) { - /* - * If there is no pending work, mod_delayed_work queues new work. - * So, Increase the ref count to avoid use-after-free. - */ - if (!mod_delayed_work(deferredclose_wq, &cfile->deferred, 0)) - cifsFileInfo_get(cfile); + if (cancel_delayed_work(&cfile->deferred)) { + tmp_list = kmalloc(sizeof(struct file_list), GFP_ATOMIC); + if (tmp_list == NULL) + continue; + tmp_list->cfile = cfile; + list_add_tail(&tmp_list->list, &file_head); + } } } spin_unlock(&tcon->open_file_lock); + + list_for_each_entry_safe(tmp_list, tmp_next_list, &file_head, list) { + _cifsFileInfo_put(tmp_list->cfile, true, false); + list_del(&tmp_list->list); + kfree(tmp_list); + } } /* parses DFS refferal V3 structure -- cgit From d9d5b8961284b0051726e0fcda91d1e297e087f5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 11 Aug 2021 15:48:45 +0300 Subject: wwan: core: Avoid returning NULL from wwan_create_dev() Make wwan_create_dev() to return either valid or error pointer, In some cases it may return NULL. Prevent this by converting it to the respective error pointer. Fixes: 9a44c1cc6388 ("net: Add a WWAN subsystem") Signed-off-by: Andy Shevchenko Acked-by: Sergey Ryazanov Reviewed-by: Loic Poulain Link: https://lore.kernel.org/r/20210811124845.10955-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/wwan_core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/wwan/wwan_core.c b/drivers/net/wwan/wwan_core.c index 674a81d79db3..35ece98134c0 100644 --- a/drivers/net/wwan/wwan_core.c +++ b/drivers/net/wwan/wwan_core.c @@ -164,11 +164,14 @@ static struct wwan_device *wwan_create_dev(struct device *parent) goto done_unlock; id = ida_alloc(&wwan_dev_ids, GFP_KERNEL); - if (id < 0) + if (id < 0) { + wwandev = ERR_PTR(id); goto done_unlock; + } wwandev = kzalloc(sizeof(*wwandev), GFP_KERNEL); if (!wwandev) { + wwandev = ERR_PTR(-ENOMEM); ida_free(&wwan_dev_ids, id); goto done_unlock; } @@ -182,7 +185,8 @@ static struct wwan_device *wwan_create_dev(struct device *parent) err = device_register(&wwandev->dev); if (err) { put_device(&wwandev->dev); - wwandev = NULL; + wwandev = ERR_PTR(err); + goto done_unlock; } done_unlock: @@ -1014,8 +1018,8 @@ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops, return -EINVAL; wwandev = wwan_create_dev(parent); - if (!wwandev) - return -ENOMEM; + if (IS_ERR(wwandev)) + return PTR_ERR(wwandev); if (WARN_ON(wwandev->ops)) { wwan_remove_dev(wwandev); -- cgit From d03721a6e7e8c04261873b3840daa3ce2c5b0543 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Sun, 18 Jul 2021 11:07:53 +0200 Subject: trace/osnoise: Add a header with PREEMPT_RT additional fields Some extra flags are printed to the trace header when using the PREEMPT_RT config. The extra flags are: need-resched-lazy, preempt-lazy-depth, and migrate-disable. Without printing these fields, the osnoise specific fields are shifted by three positions, for example: # tracer: osnoise # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth MAX # || / SINGLE Interference counters: # |||| RUNTIME NOISE %% OF CPU NOISE +-----------------------------+ # TASK-PID CPU# |||| TIMESTAMP IN US IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD # | | | |||| | | | | | | | | | | <...>-741 [000] ....... 1105.690909: 1000000 234 99.97660 36 21 0 1001 22 3 <...>-742 [001] ....... 1105.691923: 1000000 281 99.97190 197 7 0 1012 35 14 <...>-743 [002] ....... 1105.691958: 1000000 1324 99.86760 118 11 0 1016 155 143 <...>-744 [003] ....... 1105.691998: 1000000 109 99.98910 21 4 0 1004 33 7 <...>-745 [004] ....... 1105.692015: 1000000 2023 99.79770 97 37 0 1023 52 18 Add a new header for osnoise with the missing fields, to be used when the PREEMPT_RT is enabled. Link: https://lkml.kernel.org/r/1f03289d2a51fde5a58c2e7def063dc630820ad1.1626598844.git.bristot@kernel.org Cc: Tom Zanussi Cc: Namhyung Kim Cc: Masami Hiramatsu Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_osnoise.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index a7e3c24dee13..03ef720b491d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -253,10 +253,40 @@ static struct osnoise_data { */ static bool osnoise_busy; +#ifdef CONFIG_PREEMPT_RT /* * Print the osnoise header info. */ static void print_osnoise_headers(struct seq_file *s) +{ + if (osnoise_data.tainted) + seq_puts(s, "# osnoise is tainted!\n"); + + seq_puts(s, "# _-------=> irqs-off\n"); + seq_puts(s, "# / _------=> need-resched\n"); + seq_puts(s, "# | / _-----=> need-resched-lazy\n"); + seq_puts(s, "# || / _----=> hardirq/softirq\n"); + seq_puts(s, "# ||| / _---=> preempt-depth\n"); + seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); + seq_puts(s, "# ||||| / _-=> migrate-disable\n"); + + seq_puts(s, "# |||||| / "); + seq_puts(s, " MAX\n"); + + seq_puts(s, "# ||||| / "); + seq_puts(s, " SINGLE Interference counters:\n"); + + seq_puts(s, "# ||||||| RUNTIME "); + seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n"); + + seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US "); + seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n"); + + seq_puts(s, "# | | | ||||||| | | "); + seq_puts(s, " | | | | | | | |\n"); +} +#else /* CONFIG_PREEMPT_RT */ +static void print_osnoise_headers(struct seq_file *s) { if (osnoise_data.tainted) seq_puts(s, "# osnoise is tainted!\n"); @@ -279,6 +309,7 @@ static void print_osnoise_headers(struct seq_file *s) seq_puts(s, "# | | | |||| | | "); seq_puts(s, " | | | | | | | |\n"); } +#endif /* CONFIG_PREEMPT_RT */ /* * osnoise_taint - report an osnoise error. -- cgit From e1c4ad4a7f58417a6c483432b69c640670b6fe3d Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Sun, 18 Jul 2021 11:07:54 +0200 Subject: trace/timerlat: Add a header with PREEMPT_RT additional fields Some extra flags are printed to the trace header when using the PREEMPT_RT config. The extra flags are: need-resched-lazy, preempt-lazy-depth, and migrate-disable. Without printing these fields, the timerlat specific fields are shifted by three positions, for example: # tracer: timerlat # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # || / # |||| ACTIVATION # TASK-PID CPU# |||| TIMESTAMP ID CONTEXT LATENCY # | | | |||| | | | | -0 [000] d..h... 3279.798871: #1 context irq timer_latency 830 ns <...>-807 [000] ....... 3279.798881: #1 context thread timer_latency 11301 ns Add a new header for timerlat with the missing fields, to be used when the PREEMPT_RT is enabled. Link: https://lkml.kernel.org/r/babb83529a3211bd0805be0b8c21608230202c55.1626598844.git.bristot@kernel.org Cc: Tom Zanussi Cc: Namhyung Kim Cc: Masami Hiramatsu Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_osnoise.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 03ef720b491d..518a5c190b2b 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -354,6 +354,24 @@ static void trace_osnoise_sample(struct osnoise_sample *sample) /* * Print the timerlat header info. */ +#ifdef CONFIG_PREEMPT_RT +static void print_timerlat_headers(struct seq_file *s) +{ + seq_puts(s, "# _-------=> irqs-off\n"); + seq_puts(s, "# / _------=> need-resched\n"); + seq_puts(s, "# | / _-----=> need-resched-lazy\n"); + seq_puts(s, "# || / _----=> hardirq/softirq\n"); + seq_puts(s, "# ||| / _---=> preempt-depth\n"); + seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n"); + seq_puts(s, "# ||||| / _-=> migrate-disable\n"); + seq_puts(s, "# |||||| /\n"); + seq_puts(s, "# ||||||| ACTIVATION\n"); + seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID "); + seq_puts(s, " CONTEXT LATENCY\n"); + seq_puts(s, "# | | | ||||||| | | "); + seq_puts(s, " | |\n"); +} +#else /* CONFIG_PREEMPT_RT */ static void print_timerlat_headers(struct seq_file *s) { seq_puts(s, "# _-----=> irqs-off\n"); @@ -367,6 +385,7 @@ static void print_timerlat_headers(struct seq_file *s) seq_puts(s, "# | | | |||| | | "); seq_puts(s, " | |\n"); } +#endif /* CONFIG_PREEMPT_RT */ /* * Record an timerlat_sample into the tracer buffer. -- cgit From 0e05ba498dd0a19fc12868a9506be0f86cf36912 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Sun, 18 Jul 2021 11:07:55 +0200 Subject: trace/osnoise: Print a stop tracing message When using osnoise/timerlat with stop tracing, sometimes it is not clear in which CPU the stop condition was hit, mainly when using some extra events. Print a message informing in which CPU the trace stopped, like in the example below: -0 [006] d.h. 2932.676616: #1672599 context irq timer_latency 34689 ns -0 [006] dNh. 2932.676618: irq_noise: local_timer:236 start 2932.676615639 duration 2391 ns -0 [006] dNh. 2932.676620: irq_noise: virtio0-output.0:47 start 2932.676620180 duration 86 ns -0 [003] d.h. 2932.676621: #1673374 context irq timer_latency 1200 ns -0 [006] d... 2932.676623: thread_noise: swapper/6:0 start 2932.676615964 duration 4339 ns -0 [003] dNh. 2932.676623: irq_noise: local_timer:236 start 2932.676620597 duration 1881 ns -0 [006] d... 2932.676623: sched_switch: prev_comm=swapper/6 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=timerlat/6 next_pid=852 next_prio=4 timerlat/6-852 [006] .... 2932.676623: #1672599 context thread timer_latency 41931 ns -0 [003] d... 2932.676623: thread_noise: swapper/3:0 start 2932.676620854 duration 880 ns -0 [003] d... 2932.676624: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=timerlat/3 next_pid=849 next_prio=4 timerlat/6-852 [006] .... 2932.676624: timerlat_main: stop tracing hit on cpu 6 timerlat/3-849 [003] .... 2932.676624: #1673374 context thread timer_latency 4310 ns Link: https://lkml.kernel.org/r/b30a0d7542adba019185f44ee648e60e14923b11.1626598844.git.bristot@kernel.org Cc: Tom Zanussi Cc: Namhyung Kim Cc: Masami Hiramatsu Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_osnoise.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 518a5c190b2b..b61eefe5ccf5 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1075,9 +1075,13 @@ diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample * /* * osnoise_stop_tracing - Stop tracing and the tracer. */ -static void osnoise_stop_tracing(void) +static __always_inline void osnoise_stop_tracing(void) { struct trace_array *tr = osnoise_trace; + + trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_, + "stop tracing hit on cpu %d\n", smp_processor_id()); + tracer_tracing_off(tr); } -- cgit From 12f9951d3f311acb1d4ffe8e839bc2c07983546f Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 6 Aug 2021 21:50:27 +0200 Subject: tracing: define needed config DYNAMIC_FTRACE_WITH_ARGS Commit 2860cd8a2353 ("livepatch: Use the default ftrace_ops instead of REGS when ARGS is available") intends to enable config LIVEPATCH when ftrace with ARGS is available. However, the chain of configs to enable LIVEPATCH is incomplete, as HAVE_DYNAMIC_FTRACE_WITH_ARGS is available, but the definition of DYNAMIC_FTRACE_WITH_ARGS, combining DYNAMIC_FTRACE and HAVE_DYNAMIC_FTRACE_WITH_ARGS, needed to enable LIVEPATCH, is missing in the commit. Fortunately, ./scripts/checkkconfigsymbols.py detects this and warns: DYNAMIC_FTRACE_WITH_ARGS Referencing files: kernel/livepatch/Kconfig So, define the config DYNAMIC_FTRACE_WITH_ARGS analogously to the already existing similar configs, DYNAMIC_FTRACE_WITH_REGS and DYNAMIC_FTRACE_WITH_DIRECT_CALLS, in ./kernel/trace/Kconfig to connect the chain of configs. Link: https://lore.kernel.org/kernel-janitors/CAKXUXMwT2zS9fgyQHKUUiqo8ynZBdx2UEUu1WnV_q0OCmknqhw@mail.gmail.com/ Link: https://lkml.kernel.org/r/20210806195027.16808-1-lukas.bulwahn@gmail.com Cc: Josh Poimboeuf Cc: Jiri Kosina Cc: Peter Zijlstra Cc: Miroslav Benes Cc: stable@vger.kernel.org Fixes: 2860cd8a2353 ("livepatch: Use the default ftrace_ops instead of REGS when ARGS is available") Signed-off-by: Lukas Bulwahn Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index d567b1717c4c..3ee23f4d437f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -219,6 +219,11 @@ config DYNAMIC_FTRACE_WITH_DIRECT_CALLS depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +config DYNAMIC_FTRACE_WITH_ARGS + def_bool y + depends on DYNAMIC_FTRACE + depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS + config FUNCTION_PROFILER bool "Kernel function profiler" depends on FUNCTION_TRACER -- cgit From d0ac5fbaf783d59715b8bf426fdffc8c9e84187a Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 5 Aug 2021 11:10:51 +0900 Subject: init: Suppress wrong warning for bootconfig cmdline parameter Since the 'bootconfig' command line parameter is handled before parsing the command line, it doesn't use early_param(). But in this case, kernel shows a wrong warning message about it. [ 0.013714] Kernel command line: ro console=ttyS0 bootconfig console=tty0 [ 0.013741] Unknown command line parameters: bootconfig To suppress this message, add a dummy handler for 'bootconfig'. Link: https://lkml.kernel.org/r/162812945097.77369.1849780946468010448.stgit@devnote2 Fixes: 86d1919a4fb0 ("init: print out unknown kernel parameters") Reviewed-by: Andrew Halaney Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- init/main.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/init/main.c b/init/main.c index f5b8246e8aa1..8d97aba78c3a 100644 --- a/init/main.c +++ b/init/main.c @@ -397,6 +397,12 @@ static int __init bootconfig_params(char *param, char *val, return 0; } +static int __init warn_bootconfig(char *str) +{ + /* The 'bootconfig' has been handled by bootconfig_params(). */ + return 0; +} + static void __init setup_boot_config(void) { static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata; @@ -475,9 +481,8 @@ static int __init warn_bootconfig(char *str) pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n"); return 0; } -early_param("bootconfig", warn_bootconfig); - #endif +early_param("bootconfig", warn_bootconfig); /* Change NUL term back to "=", to make "param" the whole string. */ static void __init repair_env_string(char *param, char *val) -- cgit From 5acce0bff2a0420ce87d4591daeb867f47d552c2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 8 Aug 2021 00:30:11 -0400 Subject: tracing / histogram: Fix NULL pointer dereference on strcmp() on NULL event name The following commands: # echo 'read_max u64 size;' > synthetic_events # echo 'hist:keys=common_pid:count=count:onmax($count).trace(read_max,count)' > events/syscalls/sys_enter_read/trigger Causes: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 4 PID: 1763 Comm: bash Not tainted 5.14.0-rc2-test+ #155 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:strcmp+0xc/0x20 Code: 75 f7 31 c0 0f b6 0c 06 88 0c 02 48 83 c0 01 84 c9 75 f1 4c 89 c0 c3 0f 1f 80 00 00 00 00 31 c0 eb 08 48 83 c0 01 84 d2 74 0f <0f> b6 14 07 3a 14 06 74 ef 19 c0 83 c8 01 c3 31 c0 c3 66 90 48 89 RSP: 0018:ffffb5fdc0963ca8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffffffb3a4e040 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9714c0d0b640 RDI: 0000000000000000 RBP: 0000000000000000 R08: 00000022986b7cde R09: ffffffffb3a4dff8 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9714c50603c8 R13: 0000000000000000 R14: ffff97143fdf9e48 R15: ffff9714c01a2210 FS: 00007f1fa6785740(0000) GS:ffff9714da400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000002d863004 CR4: 00000000001706e0 Call Trace: __find_event_file+0x4e/0x80 action_create+0x6b7/0xeb0 ? kstrdup+0x44/0x60 event_hist_trigger_func+0x1a07/0x2130 trigger_process_regex+0xbd/0x110 event_trigger_write+0x71/0xd0 vfs_write+0xe9/0x310 ksys_write+0x68/0xe0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f1fa6879e87 The problem was the "trace(read_max,count)" where the "count" should be "$count" as "onmax()" only handles variables (although it really should be able to figure out that "count" is a field of sys_enter_read). But there's a path that does not find the variable and ends up passing a NULL for the event, which ends up getting passed to "strcmp()". Add a check for NULL to return and error on the command with: # cat error_log hist:syscalls:sys_enter_read: error: Couldn't create or find variable Command: hist:keys=common_pid:count=count:onmax($count).trace(read_max,count) ^ Link: https://lkml.kernel.org/r/20210808003011.4037f8d0@oasis.local.home Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 50450603ec9cb tracing: Add 'onmax' hist trigger action support Reviewed-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 949ef09dc537..a48aa2a2875b 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3430,6 +3430,8 @@ trace_action_create_field_var(struct hist_trigger_data *hist_data, event = data->match_data.event; } + if (!event) + goto free; /* * At this point, we're looking at a field on another * event. Because we can't modify a hist trigger on -- cgit From 49b0b6ffe20c5344f4173f3436298782a08da4f2 Mon Sep 17 00:00:00 2001 From: "Longpeng(Mike)" Date: Thu, 12 Aug 2021 13:30:56 +0800 Subject: vsock/virtio: avoid potential deadlock when vsock device remove There's a potential deadlock case when remove the vsock device or process the RESET event: vsock_for_each_connected_socket: spin_lock_bh(&vsock_table_lock) ----------- (1) ... virtio_vsock_reset_sock: lock_sock(sk) --------------------- (2) ... spin_unlock_bh(&vsock_table_lock) lock_sock() may do initiative schedule when the 'sk' is owned by other thread at the same time, we would receivce a warning message that "scheduling while atomic". Even worse, if the next task (selected by the scheduler) try to release a 'sk', it need to request vsock_table_lock and the deadlock occur, cause the system into softlockup state. Call trace: queued_spin_lock_slowpath vsock_remove_bound vsock_remove_sock virtio_transport_release __vsock_release vsock_release __sock_release sock_close __fput ____fput So we should not require sk_lock in this case, just like the behavior in vhost_vsock or vmci. Fixes: 0ea9e1d3a9e3 ("VSOCK: Introduce virtio_transport.ko") Cc: Stefan Hajnoczi Signed-off-by: Longpeng(Mike) Reviewed-by: Stefano Garzarella Link: https://lore.kernel.org/r/20210812053056.1699-1-longpeng2@huawei.com Signed-off-by: Jakub Kicinski --- net/vmw_vsock/virtio_transport.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index e0c2c992ad9c..4f7c99dfd16c 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -357,11 +357,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) static void virtio_vsock_reset_sock(struct sock *sk) { - lock_sock(sk); + /* vmci_transport.c doesn't take sk_lock here either. At least we're + * under vsock_table_lock so the sock cannot disappear while we're + * executing. + */ + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk_error_report(sk); - release_sock(sk); } static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock) -- cgit From 064855a69003c24bd6b473b367d364e418c57625 Mon Sep 17 00:00:00 2001 From: Babu Moger Date: Mon, 2 Aug 2021 14:38:58 -0500 Subject: x86/resctrl: Fix default monitoring groups reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creating a new sub monitoring group in the root /sys/fs/resctrl leads to getting the "Unavailable" value for mbm_total_bytes and mbm_local_bytes on the entire filesystem. Steps to reproduce: 1. mount -t resctrl resctrl /sys/fs/resctrl/ 2. cd /sys/fs/resctrl/ 3. cat mon_data/mon_L3_00/mbm_total_bytes 23189832 4. Create sub monitor group: mkdir mon_groups/test1 5. cat mon_data/mon_L3_00/mbm_total_bytes Unavailable When a new monitoring group is created, a new RMID is assigned to the new group. But the RMID is not active yet. When the events are read on the new RMID, it is expected to report the status as "Unavailable". When the user reads the events on the default monitoring group with multiple subgroups, the events on all subgroups are consolidated together. Currently, if any of the RMID reads report as "Unavailable", then everything will be reported as "Unavailable". Fix the issue by discarding the "Unavailable" reads and reporting all the successful RMID reads. This is not a problem on Intel systems as Intel reports 0 on Inactive RMIDs. Fixes: d89b7379015f ("x86/intel_rdt/cqm: Add mon_data") Reported-by: Paweł Szulik Signed-off-by: Babu Moger Signed-off-by: Borislav Petkov Acked-by: Reinette Chatre Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=213311 Link: https://lkml.kernel.org/r/162793309296.9224.15871659871696482080.stgit@bmoger-ubuntu --- arch/x86/kernel/cpu/resctrl/monitor.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f07c10b87a87..57e4bb695ff9 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -285,15 +285,14 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) return chunks >>= shift; } -static int __mon_event_count(u32 rmid, struct rmid_read *rr) +static u64 __mon_event_count(u32 rmid, struct rmid_read *rr) { struct mbm_state *m; u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { - rr->val = tval; - return -EINVAL; + return tval; } switch (rr->evtid) { case QOS_L3_OCCUP_EVENT_ID: @@ -305,12 +304,6 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) case QOS_L3_MBM_LOCAL_EVENT_ID: m = &rr->d->mbm_local[rmid]; break; - default: - /* - * Code would never reach here because - * an invalid event id would fail the __rmid_read. - */ - return -EINVAL; } if (rr->first) { @@ -361,23 +354,29 @@ void mon_event_count(void *info) struct rdtgroup *rdtgrp, *entry; struct rmid_read *rr = info; struct list_head *head; + u64 ret_val; rdtgrp = rr->rgrp; - if (__mon_event_count(rdtgrp->mon.rmid, rr)) - return; + ret_val = __mon_event_count(rdtgrp->mon.rmid, rr); /* - * For Ctrl groups read data from child monitor groups. + * For Ctrl groups read data from child monitor groups and + * add them together. Count events which are read successfully. + * Discard the rmid_read's reporting errors. */ head = &rdtgrp->mon.crdtgrp_list; if (rdtgrp->type == RDTCTRL_GROUP) { list_for_each_entry(entry, head, mon.crdtgrp_list) { - if (__mon_event_count(entry->mon.rmid, rr)) - return; + if (__mon_event_count(entry->mon.rmid, rr) == 0) + ret_val = 0; } } + + /* Report error if none of rmid_reads are successful */ + if (ret_val) + rr->val = ret_val; } /* -- cgit From 813bacf4109802926d86f2d7c6583c6c0a0fddb5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 8 Aug 2021 00:55:18 +0200 Subject: ARM: configs: Update the nhk8815_defconfig The platform lost the framebuffer due to a commit solving a circular dependency in v5.14-rc1, so add it back in by explicitly selecting the framebuffer. Also fix up some Kconfig options that got dropped or moved around while we're at it. Fixes: f611b1e7624c ("drm: Avoid circular dependencies for CONFIG_FB") Signed-off-by: Linus Walleij Cc: Kees Cook Link: https://lore.kernel.org/r/20210807225518.3607126-1-linus.walleij@linaro.org' Signed-off-by: Arnd Bergmann --- arch/arm/configs/nhk8815_defconfig | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig index 3f35761dc9ff..23595fc5a29a 100644 --- a/arch/arm/configs/nhk8815_defconfig +++ b/arch/arm/configs/nhk8815_defconfig @@ -15,8 +15,6 @@ CONFIG_SLAB=y CONFIG_ARCH_NOMADIK=y CONFIG_MACH_NOMADIK_8815NHK=y CONFIG_AEABI=y -CONFIG_ZBOOT_ROM_TEXT=0x0 -CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -52,9 +50,9 @@ CONFIG_MTD_BLOCK=y CONFIG_MTD_ONENAND=y CONFIG_MTD_ONENAND_VERIFY_WRITE=y CONFIG_MTD_ONENAND_GENERIC=y -CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_FSMC=y +CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_CRYPTOLOOP=y CONFIG_BLK_DEV_RAM=y @@ -97,6 +95,7 @@ CONFIG_REGULATOR=y CONFIG_DRM=y CONFIG_DRM_PANEL_TPO_TPG110=y CONFIG_DRM_PL111=y +CONFIG_FB=y CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_BACKLIGHT_PWM=y CONFIG_FRAMEBUFFER_CONSOLE=y @@ -136,9 +135,8 @@ CONFIG_NLS_ISO8859_15=y CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_DES=y +# CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -# CONFIG_ENABLE_MUST_CHECK is not set CONFIG_DEBUG_FS=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set -# CONFIG_DEBUG_BUGVERBOSE is not set -- cgit From cbfece75186d6dae6e0fe2b3492ac76eb380afdb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Jul 2021 17:15:22 +0200 Subject: ARM: ixp4xx: fix building both pci drivers When both the old and the new PCI drivers are enabled in the same kernel, there are a couple of namespace conflicts that cause a build failure: drivers/pci/controller/pci-ixp4xx.c:38: error: "IXP4XX_PCI_CSR" redefined [-Werror] 38 | #define IXP4XX_PCI_CSR 0x1c | In file included from arch/arm/mach-ixp4xx/include/mach/hardware.h:23, from arch/arm/mach-ixp4xx/include/mach/io.h:15, from arch/arm/include/asm/io.h:198, from include/linux/io.h:13, from drivers/pci/controller/pci-ixp4xx.c:20: arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h:221: note: this is the location of the previous definition 221 | #define IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x))) | drivers/pci/controller/pci-ixp4xx.c:148:12: error: 'ixp4xx_pci_read' redeclared as different kind of symbol 148 | static int ixp4xx_pci_read(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data) | ^~~~~~~~~~~~~~~ Rename both the ixp4xx_pci_read/ixp4xx_pci_write functions and the IXP4XX_PCI_CSR macro. In each case, I went with the version that has fewer callers to keep the change small. Fixes: f7821b493458 ("PCI: ixp4xx: Add a new driver for IXP4xx") Signed-off-by: Arnd Bergmann Reviewed-by: Linus Walleij Acked-by: Lorenzo Pieralisi Cc: soc@kernel.org Link: https://lore.kernel.org/r/20210721151546.2325937-1-arnd@kernel.org' Signed-off-by: Arnd Bergmann --- arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h | 48 ++++++++++++------------- drivers/pci/controller/pci-ixp4xx.c | 8 ++--- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h index abb07f105515..74e63d4531aa 100644 --- a/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h +++ b/arch/arm/mach-ixp4xx/include/mach/ixp4xx-regs.h @@ -218,30 +218,30 @@ /* * PCI Control/Status Registers */ -#define IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x))) - -#define PCI_NP_AD IXP4XX_PCI_CSR(PCI_NP_AD_OFFSET) -#define PCI_NP_CBE IXP4XX_PCI_CSR(PCI_NP_CBE_OFFSET) -#define PCI_NP_WDATA IXP4XX_PCI_CSR(PCI_NP_WDATA_OFFSET) -#define PCI_NP_RDATA IXP4XX_PCI_CSR(PCI_NP_RDATA_OFFSET) -#define PCI_CRP_AD_CBE IXP4XX_PCI_CSR(PCI_CRP_AD_CBE_OFFSET) -#define PCI_CRP_WDATA IXP4XX_PCI_CSR(PCI_CRP_WDATA_OFFSET) -#define PCI_CRP_RDATA IXP4XX_PCI_CSR(PCI_CRP_RDATA_OFFSET) -#define PCI_CSR IXP4XX_PCI_CSR(PCI_CSR_OFFSET) -#define PCI_ISR IXP4XX_PCI_CSR(PCI_ISR_OFFSET) -#define PCI_INTEN IXP4XX_PCI_CSR(PCI_INTEN_OFFSET) -#define PCI_DMACTRL IXP4XX_PCI_CSR(PCI_DMACTRL_OFFSET) -#define PCI_AHBMEMBASE IXP4XX_PCI_CSR(PCI_AHBMEMBASE_OFFSET) -#define PCI_AHBIOBASE IXP4XX_PCI_CSR(PCI_AHBIOBASE_OFFSET) -#define PCI_PCIMEMBASE IXP4XX_PCI_CSR(PCI_PCIMEMBASE_OFFSET) -#define PCI_AHBDOORBELL IXP4XX_PCI_CSR(PCI_AHBDOORBELL_OFFSET) -#define PCI_PCIDOORBELL IXP4XX_PCI_CSR(PCI_PCIDOORBELL_OFFSET) -#define PCI_ATPDMA0_AHBADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_AHBADDR_OFFSET) -#define PCI_ATPDMA0_PCIADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_PCIADDR_OFFSET) -#define PCI_ATPDMA0_LENADDR IXP4XX_PCI_CSR(PCI_ATPDMA0_LENADDR_OFFSET) -#define PCI_ATPDMA1_AHBADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_AHBADDR_OFFSET) -#define PCI_ATPDMA1_PCIADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_PCIADDR_OFFSET) -#define PCI_ATPDMA1_LENADDR IXP4XX_PCI_CSR(PCI_ATPDMA1_LENADDR_OFFSET) +#define _IXP4XX_PCI_CSR(x) ((volatile u32 *)(IXP4XX_PCI_CFG_BASE_VIRT+(x))) + +#define PCI_NP_AD _IXP4XX_PCI_CSR(PCI_NP_AD_OFFSET) +#define PCI_NP_CBE _IXP4XX_PCI_CSR(PCI_NP_CBE_OFFSET) +#define PCI_NP_WDATA _IXP4XX_PCI_CSR(PCI_NP_WDATA_OFFSET) +#define PCI_NP_RDATA _IXP4XX_PCI_CSR(PCI_NP_RDATA_OFFSET) +#define PCI_CRP_AD_CBE _IXP4XX_PCI_CSR(PCI_CRP_AD_CBE_OFFSET) +#define PCI_CRP_WDATA _IXP4XX_PCI_CSR(PCI_CRP_WDATA_OFFSET) +#define PCI_CRP_RDATA _IXP4XX_PCI_CSR(PCI_CRP_RDATA_OFFSET) +#define PCI_CSR _IXP4XX_PCI_CSR(PCI_CSR_OFFSET) +#define PCI_ISR _IXP4XX_PCI_CSR(PCI_ISR_OFFSET) +#define PCI_INTEN _IXP4XX_PCI_CSR(PCI_INTEN_OFFSET) +#define PCI_DMACTRL _IXP4XX_PCI_CSR(PCI_DMACTRL_OFFSET) +#define PCI_AHBMEMBASE _IXP4XX_PCI_CSR(PCI_AHBMEMBASE_OFFSET) +#define PCI_AHBIOBASE _IXP4XX_PCI_CSR(PCI_AHBIOBASE_OFFSET) +#define PCI_PCIMEMBASE _IXP4XX_PCI_CSR(PCI_PCIMEMBASE_OFFSET) +#define PCI_AHBDOORBELL _IXP4XX_PCI_CSR(PCI_AHBDOORBELL_OFFSET) +#define PCI_PCIDOORBELL _IXP4XX_PCI_CSR(PCI_PCIDOORBELL_OFFSET) +#define PCI_ATPDMA0_AHBADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_AHBADDR_OFFSET) +#define PCI_ATPDMA0_PCIADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_PCIADDR_OFFSET) +#define PCI_ATPDMA0_LENADDR _IXP4XX_PCI_CSR(PCI_ATPDMA0_LENADDR_OFFSET) +#define PCI_ATPDMA1_AHBADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_AHBADDR_OFFSET) +#define PCI_ATPDMA1_PCIADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_PCIADDR_OFFSET) +#define PCI_ATPDMA1_LENADDR _IXP4XX_PCI_CSR(PCI_ATPDMA1_LENADDR_OFFSET) /* * PCI register values and bit definitions diff --git a/drivers/pci/controller/pci-ixp4xx.c b/drivers/pci/controller/pci-ixp4xx.c index 896a45b24236..654ac4a82beb 100644 --- a/drivers/pci/controller/pci-ixp4xx.c +++ b/drivers/pci/controller/pci-ixp4xx.c @@ -145,7 +145,7 @@ static int ixp4xx_pci_check_master_abort(struct ixp4xx_pci *p) return 0; } -static int ixp4xx_pci_read(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data) +static int ixp4xx_pci_read_indirect(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data) { ixp4xx_writel(p, IXP4XX_PCI_NP_AD, addr); @@ -170,7 +170,7 @@ static int ixp4xx_pci_read(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 *data) return ixp4xx_pci_check_master_abort(p); } -static int ixp4xx_pci_write(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 data) +static int ixp4xx_pci_write_indirect(struct ixp4xx_pci *p, u32 addr, u32 cmd, u32 data) { ixp4xx_writel(p, IXP4XX_PCI_NP_AD, addr); @@ -308,7 +308,7 @@ static int ixp4xx_pci_read_config(struct pci_bus *bus, unsigned int devfn, dev_dbg(p->dev, "read_config from %d size %d dev %d:%d:%d address: %08x cmd: %08x\n", where, size, bus_num, PCI_SLOT(devfn), PCI_FUNC(devfn), addr, cmd); - ret = ixp4xx_pci_read(p, addr, cmd, &val); + ret = ixp4xx_pci_read_indirect(p, addr, cmd, &val); if (ret) return PCIBIOS_DEVICE_NOT_FOUND; @@ -356,7 +356,7 @@ static int ixp4xx_pci_write_config(struct pci_bus *bus, unsigned int devfn, dev_dbg(p->dev, "write_config_byte %#x to %d size %d dev %d:%d:%d addr: %08x cmd %08x\n", value, where, size, bus_num, PCI_SLOT(devfn), PCI_FUNC(devfn), addr, cmd); - ret = ixp4xx_pci_write(p, addr, cmd, val); + ret = ixp4xx_pci_write_indirect(p, addr, cmd, val); if (ret) return PCIBIOS_DEVICE_NOT_FOUND; -- cgit From 1383279c6494c6b62d1d6939f34906a4d2ef721c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 5 Aug 2021 11:38:04 -0700 Subject: KVM: x86: Allow guest to set EFER.NX=1 on non-PAE 32-bit kernels Remove an ancient restriction that disallowed exposing EFER.NX to the guest if EFER.NX=0 on the host, even if NX is fully supported by the CPU. The motivation of the check, added by commit 2cc51560aed0 ("KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit"), was to rule out the case of host.EFER.NX=0 and guest.EFER.NX=1 so that KVM could run the guest with the host's EFER.NX and thus avoid context switching EFER if the only divergence was the NX bit. Fast forward to today, and KVM has long since stopped running the guest with the host's EFER.NX. Not only does KVM context switch EFER if host.EFER.NX=1 && guest.EFER.NX=0, KVM also forces host.EFER.NX=0 && guest.EFER.NX=1 when using shadow paging (to emulate SMEP). Furthermore, the entire motivation for the restriction was made obsolete over a decade ago when Intel added dedicated host and guest EFER fields in the VMCS (Nehalem timeframe), which reduced the overhead of context switching EFER from 400+ cycles (2 * WRMSR + 1 * RDMSR) to a mere ~2 cycles. In practice, the removed restriction only affects non-PAE 32-bit kernels, as EFER.NX is set during boot if NX is supported and the kernel will use PAE paging (32-bit or 64-bit), regardless of whether or not the kernel will actually use NX itself (mark PTEs non-executable). Alternatively and/or complementarily, startup_32_smp() in head_32.S could be modified to set EFER.NX=1 regardless of paging mode, thus eliminating the scenario where NX is supported but not enabled. However, that runs the risk of breaking non-KVM non-PAE kernels (though the risk is very, very low as there are no known EFER.NX errata), and also eliminates an easy-to-use mechanism for stressing KVM's handling of guest vs. host EFER across nested virtualization transitions. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Message-Id: <20210805183804.1221554-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 739be5da3bca..fe03bd978761 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -208,30 +208,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_mmu_after_set_cpuid(vcpu); } -static int is_efer_nx(void) -{ - return host_efer & EFER_NX; -} - -static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) -{ - int i; - struct kvm_cpuid_entry2 *e, *entry; - - entry = NULL; - for (i = 0; i < vcpu->arch.cpuid_nent; ++i) { - e = &vcpu->arch.cpuid_entries[i]; - if (e->function == 0x80000001) { - entry = e; - break; - } - } - if (entry && cpuid_entry_has(entry, X86_FEATURE_NX) && !is_efer_nx()) { - cpuid_entry_clear(entry, X86_FEATURE_NX); - printk(KERN_INFO "kvm: guest NX capability removed\n"); - } -} - int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; @@ -302,7 +278,6 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu->arch.cpuid_entries = e2; vcpu->arch.cpuid_nent = cpuid->nent; - cpuid_fix_nx_cap(vcpu); kvm_update_cpuid_runtime(vcpu); kvm_vcpu_after_set_cpuid(vcpu); @@ -401,7 +376,6 @@ static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) void kvm_set_cpu_caps(void) { - unsigned int f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 unsigned int f_gbpages = F(GBPAGES); unsigned int f_lm = F(LM); @@ -515,7 +489,7 @@ void kvm_set_cpu_caps(void) F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | F(PAT) | F(PSE36) | 0 /* Reserved */ | - f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | + F(NX) | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | F(FXSR) | F(FXSR_OPT) | f_gbpages | F(RDTSCP) | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW) ); -- cgit From ffbe17cadaf564b5da0e4eabdcff1b719e184a76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 9 Aug 2021 07:00:58 -0400 Subject: KVM: x86: remove dead initialization hv_vcpu is initialized again a dozen lines below, and at this point vcpu->arch.hyperv is not valid. Remove the initializer. Reported-by: kernel test robot Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0b38f944c6b6..41d2a53c5dea 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1933,7 +1933,7 @@ ret_success: void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *entry; - struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + struct kvm_vcpu_hv *hv_vcpu; entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0); if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) { -- cgit From 85aa8889b82e0eec680a21ea28dbf57c6acfe182 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Fri, 6 Aug 2021 15:22:29 -0700 Subject: kvm: vmx: Sync all matching EPTPs when injecting nested EPT fault When a nested EPT violation/misconfig is injected into the guest, the shadow EPT PTEs associated with that address need to be synced. This is done by kvm_inject_emulated_page_fault() before it calls nested_ept_inject_page_fault(). However, that will only sync the shadow EPT PTE associated with the current L1 EPTP. Since the ASID is based on EP4TA rather than the full EPTP, so syncing the current EPTP is not enough. The SPTEs associated with any other L1 EPTPs in the prev_roots cache with the same EP4TA also need to be synced. Signed-off-by: Junaid Shahid Message-Id: <20210806222229.1645356-1-junaids@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 53 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 1a52134b0c42..81160ebe6acb 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -330,6 +330,31 @@ void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu) vcpu_put(vcpu); } +#define EPTP_PA_MASK GENMASK_ULL(51, 12) + +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) +{ + return VALID_PAGE(root_hpa) && + ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); +} + +static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp, + gpa_t addr) +{ + uint i; + struct kvm_mmu_root_info *cached_root; + + WARN_ON_ONCE(!mmu_is_nested(vcpu)); + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + cached_root = &vcpu->arch.mmu->prev_roots[i]; + + if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd, + eptp)) + vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa); + } +} + static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { @@ -342,10 +367,22 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; exit_qualification &= INTR_INFO_UNBLOCK_NMI; - } else if (fault->error_code & PFERR_RSVD_MASK) - vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; - else - vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + } else { + if (fault->error_code & PFERR_RSVD_MASK) + vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + else + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; + + /* + * Although the caller (kvm_inject_emulated_page_fault) would + * have already synced the faulting address in the shadow EPT + * tables for the current EPTP12, we also need to sync it for + * any other cached EPTP02s based on the same EP4TA, since the + * TLB associates mappings to the EP4TA rather than the full EPTP. + */ + nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer, + fault->address); + } nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; @@ -5325,14 +5362,6 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) return nested_vmx_succeed(vcpu); } -#define EPTP_PA_MASK GENMASK_ULL(51, 12) - -static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) -{ - return VALID_PAGE(root_hpa) && - ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); -} - /* Emulate the INVEPT instruction */ static int handle_invept(struct kvm_vcpu *vcpu) { -- cgit From 18712c13709d2de9516c5d3414f707c4f0a9c190 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 11 Aug 2021 21:56:15 -0700 Subject: KVM: nVMX: Use vmx_need_pf_intercept() when deciding if L0 wants a #PF Use vmx_need_pf_intercept() when determining if L0 wants to handle a #PF in L2 or if the VM-Exit should be forwarded to L1. The current logic fails to account for the case where #PF is intercepted to handle guest.MAXPHYADDR < host.MAXPHYADDR and ends up reflecting all #PFs into L1. At best, L1 will complain and inject the #PF back into L2. At worst, L1 will eat the unexpected fault and cause L2 to hang on infinite page faults. Note, while the bug was technically introduced by the commit that added support for the MAXPHYADDR madness, the shame is all on commit a0c134347baf ("KVM: VMX: introduce vmx_need_pf_intercept"). Fixes: 1dbf5d68af6f ("KVM: VMX: Add guest physical address check in EPT violation and misconfig") Cc: stable@vger.kernel.org Cc: Peter Shier Cc: Oliver Upton Cc: Jim Mattson Signed-off-by: Sean Christopherson Message-Id: <20210812045615.3167686-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 81160ebe6acb..b3f77d18eb5a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5855,7 +5855,8 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, if (is_nmi(intr_info)) return true; else if (is_page_fault(intr_info)) - return vcpu->arch.apf.host_apf_flags || !enable_ept; + return vcpu->arch.apf.host_apf_flags || + vmx_need_pf_intercept(vcpu); else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) -- cgit From 524a1e4e381fc5e7781008d5bd420fd1357c0113 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Aug 2021 11:14:13 -0700 Subject: KVM: x86/mmu: Don't leak non-leaf SPTEs when zapping all SPTEs Pass "all ones" as the end GFN to signal "zap all" for the TDP MMU and really zap all SPTEs in this case. As is, zap_gfn_range() skips non-leaf SPTEs whose range exceeds the range to be zapped. If shadow_phys_bits is not aligned to the range size of top-level SPTEs, e.g. 512gb with 4-level paging, the "zap all" flows will skip top-level SPTEs whose range extends beyond shadow_phys_bits and leak their SPs when the VM is destroyed. Use the current upper bound (based on host.MAXPHYADDR) to detect that the caller wants to zap all SPTEs, e.g. instead of using the max theoretical gfn, 1 << (52 - 12). The more precise upper bound allows the TDP iterator to terminate its walk earlier when running on hosts with MAXPHYADDR < 52. Add a WARN on kmv->arch.tdp_mmu_pages when the TDP MMU is destroyed to help future debuggers should KVM decide to leak SPTEs again. The bug is most easily reproduced by running (and unloading!) KVM in a VM whose host.MAXPHYADDR < 39, as the SPTE for gfn=0 will be skipped. ============================================================================= BUG kvm_mmu_page_header (Not tainted): Objects remaining in kvm_mmu_page_header on __kmem_cache_shutdown() ----------------------------------------------------------------------------- Slab 0x000000004d8f7af1 objects=22 used=2 fp=0x00000000624d29ac flags=0x4000000000000200(slab|zone=1) CPU: 0 PID: 1582 Comm: rmmod Not tainted 5.14.0-rc2+ #420 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x45/0x59 slab_err+0x95/0xc9 __kmem_cache_shutdown.cold+0x3c/0x158 kmem_cache_destroy+0x3d/0xf0 kvm_mmu_module_exit+0xa/0x30 [kvm] kvm_arch_exit+0x5d/0x90 [kvm] kvm_exit+0x78/0x90 [kvm] vmx_exit+0x1a/0x50 [kvm_intel] __x64_sys_delete_module+0x13f/0x220 do_syscall_64+0x3b/0xc0 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: faaf05b00aec ("kvm: x86/mmu: Support zapping SPTEs in the TDP MMU") Cc: stable@vger.kernel.org Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210812181414.3376143-2-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 0853370bd811..8783b9eb2b33 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) if (!kvm->arch.tdp_mmu_enabled) return; + WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, bool shared) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); - kvm_lockdep_assert_mmu_lock_held(kvm, shared); if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) @@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root, list_del_rcu(&root->link); spin_unlock(&kvm->arch.tdp_mmu_pages_lock); - zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared); + zap_gfn_range(kvm, root, 0, -1ull, false, false, shared); call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); } @@ -724,8 +723,17 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end, bool can_yield, bool flush, bool shared) { + gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT); + bool zap_all = (start == 0 && end >= max_gfn_host); struct tdp_iter iter; + /* + * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will + * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF, + * and so KVM will never install a SPTE for such addresses. + */ + end = min(end, max_gfn_host); + kvm_lockdep_assert_mmu_lock_held(kvm, shared); rcu_read_lock(); @@ -744,9 +752,10 @@ retry: /* * If this is a non-last-level SPTE that covers a larger range * than should be zapped, continue, and zap the mappings at a - * lower level. + * lower level, except when zapping all SPTEs. */ - if ((iter.gfn < start || + if (!zap_all && + (iter.gfn < start || iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) && !is_last_spte(iter.old_spte, iter.level)) continue; @@ -794,12 +803,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start, void kvm_tdp_mmu_zap_all(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); bool flush = false; int i; for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn, + flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull, flush, false); if (flush) @@ -838,7 +846,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm, */ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) { - gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT); struct kvm_mmu_page *next_root; struct kvm_mmu_page *root; bool flush = false; @@ -854,8 +861,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm) rcu_read_unlock(); - flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush, - true); + flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true); /* * Put the reference acquired in -- cgit From 0103098fb4f13b447b26ed514bcd3140f6791047 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Aug 2021 11:14:14 -0700 Subject: KVM: x86/mmu: Don't step down in the TDP iterator when zapping all SPTEs Set the min_level for the TDP iterator at the root level when zapping all SPTEs to optimize the iterator's try_step_down(). Zapping a non-leaf SPTE will recursively zap all its children, thus there is no need for the iterator to attempt to step down. This avoids rereading the top-level SPTEs after they are zapped by causing try_step_down() to short-circuit. In most cases, optimizing try_step_down() will be in the noise as the cost of zapping SPTEs completely dominates the overall time. The optimization is however helpful if the zap occurs with relatively few SPTEs, e.g. if KVM is zapping in response to multiple memslot updates when userspace is adding and removing read-only memslots for option ROMs. In that case, the task doing the zapping likely isn't a vCPU thread, but it still holds mmu_lock for read and thus can be a noisy neighbor of sorts. Reviewed-by: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210812181414.3376143-3-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 8783b9eb2b33..d80cb122b5f3 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -727,6 +727,12 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, bool zap_all = (start == 0 && end >= max_gfn_host); struct tdp_iter iter; + /* + * No need to try to step down in the iterator when zapping all SPTEs, + * zapping the top-level non-leaf SPTEs will recurse on their children. + */ + int min_level = zap_all ? root->role.level : PG_LEVEL_4K; + /* * Bound the walk at host.MAXPHYADDR, guest accesses beyond that will * hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF, @@ -738,7 +744,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, rcu_read_lock(); - tdp_root_for_each_pte(iter, root, start, end) { + for_each_tdp_pte_min_level(iter, root->spt, root->role.level, + min_level, start, end) { retry: if (can_yield && tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) { -- cgit From ce25681d59ffc4303321e555a2d71b1946af07da Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Aug 2021 11:18:15 -0700 Subject: KVM: x86/mmu: Protect marking SPs unsync when using TDP MMU with spinlock Add yet another spinlock for the TDP MMU and take it when marking indirect shadow pages unsync. When using the TDP MMU and L1 is running L2(s) with nested TDP, KVM may encounter shadow pages for the TDP entries managed by L1 (controlling L2) when handling a TDP MMU page fault. The unsync logic is not thread safe, e.g. the kvm_mmu_page fields are not atomic, and misbehaves when a shadow page is marked unsync via a TDP MMU page fault, which runs with mmu_lock held for read, not write. Lack of a critical section manifests most visibly as an underflow of unsync_children in clear_unsync_child_bit() due to unsync_children being corrupted when multiple CPUs write it without a critical section and without atomic operations. But underflow is the best case scenario. The worst case scenario is that unsync_children prematurely hits '0' and leads to guest memory corruption due to KVM neglecting to properly sync shadow pages. Use an entirely new spinlock even though piggybacking tdp_mmu_pages_lock would functionally be ok. Usurping the lock could degrade performance when building upper level page tables on different vCPUs, especially since the unsync flow could hold the lock for a comparatively long time depending on the number of indirect shadow pages and the depth of the paging tree. For simplicity, take the lock for all MMUs, even though KVM could fairly easily know that mmu_lock is held for write. If mmu_lock is held for write, there cannot be contention for the inner spinlock, and marking shadow pages unsync across multiple vCPUs will be slow enough that bouncing the kvm_arch cacheline should be in the noise. Note, even though L2 could theoretically be given access to its own EPT entries, a nested MMU must hold mmu_lock for write and thus cannot race against a TDP MMU page fault. I.e. the additional spinlock only _needs_ to be taken by the TDP MMU, as opposed to being taken by any MMU for a VM that is running with the TDP MMU enabled. Holding mmu_lock for read also prevents the indirect shadow page from being freed. But as above, keep it simple and always take the lock. Alternative #1, the TDP MMU could simply pass "false" for can_unsync and effectively disable unsync behavior for nested TDP. Write protecting leaf shadow pages is unlikely to noticeably impact traditional L1 VMMs, as such VMMs typically don't modify TDP entries, but the same may not hold true for non-standard use cases and/or VMMs that are migrating physical pages (from L1's perspective). Alternative #2, the unsync logic could be made thread safe. In theory, simply converting all relevant kvm_mmu_page fields to atomics and using atomic bitops for the bitmap would suffice. However, (a) an in-depth audit would be required, (b) the code churn would be substantial, and (c) legacy shadow paging would incur additional atomic operations in performance sensitive paths for no benefit (to legacy shadow paging). Fixes: a2855afc7ee8 ("KVM: x86/mmu: Allow parallel page faults for the TDP MMU") Cc: stable@vger.kernel.org Cc: Ben Gardon Signed-off-by: Sean Christopherson Message-Id: <20210812181815.3378104-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 8 ++++---- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/mmu/mmu.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index 35eca377543d..88fa495abbac 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -25,10 +25,10 @@ On x86: - vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock -- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is - taken inside kvm->arch.mmu_lock, and cannot be taken without already - holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise - there's no need to take kvm->arch.tdp_mmu_pages_lock at all). +- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and + kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and + cannot be taken without already holding kvm->arch.mmu_lock (typically with + ``read_lock`` for the TDP MMU, thus the need for additional spinlocks). Everything else is a leaf: no other lock is taken inside the critical sections. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 974cbfb1eefe..af6ce8d4c86a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1038,6 +1038,13 @@ struct kvm_arch { struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; + /* + * Protects marking pages unsync during page faults, as TDP MMU page + * faults only take mmu_lock for read. For simplicity, the unsync + * pages lock is always taken when marking pages unsync regardless of + * whether mmu_lock is held for read or write. + */ + spinlock_t mmu_unsync_pages_lock; struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c4f4fa23320e..47b765270239 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2535,6 +2535,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *sp; + bool locked = false; /* * Force write-protection if the page is being tracked. Note, the page @@ -2557,9 +2558,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) if (sp->unsync) continue; + /* + * TDP MMU page faults require an additional spinlock as they + * run with mmu_lock held for read, not write, and the unsync + * logic is not thread safe. Take the spinklock regardless of + * the MMU type to avoid extra conditionals/parameters, there's + * no meaningful penalty if mmu_lock is held for write. + */ + if (!locked) { + locked = true; + spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock); + + /* + * Recheck after taking the spinlock, a different vCPU + * may have since marked the page unsync. A false + * positive on the unprotected check above is not + * possible as clearing sp->unsync _must_ hold mmu_lock + * for write, i.e. unsync cannot transition from 0->1 + * while this CPU holds mmu_lock for read (or write). + */ + if (READ_ONCE(sp->unsync)) + continue; + } + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } + if (locked) + spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock); /* * We need to ensure that the marking of unsync pages is visible @@ -5537,6 +5563,8 @@ void kvm_mmu_init_vm(struct kvm *kvm) { struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker; + spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); + if (!kvm_mmu_init_tdp_mmu(kvm)) /* * No smp_load/store wrappers needed here as we are in -- cgit From 9659281ce78de0f15a4aa124da8f7450b1399c09 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:25 +0100 Subject: slimbus: messaging: start transaction ids from 1 instead of zero As tid is unsigned its hard to figure out if the tid is valid or invalid. So Start the transaction ids from 1 instead of zero so that we could differentiate between a valid tid and invalid tids This is useful in cases where controller would add a tid for controller specific transfers. Fixes: d3062a210930 ("slimbus: messaging: add slim_alloc/free_txn_tid()") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/messaging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index f2b5d347d227..6097ddc43a35 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -66,7 +66,7 @@ int slim_alloc_txn_tid(struct slim_controller *ctrl, struct slim_msg_txn *txn) int ret = 0; spin_lock_irqsave(&ctrl->txn_lock, flags); - ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 0, + ret = idr_alloc_cyclic(&ctrl->tid_idr, txn, 1, SLIM_MAX_TIDS, GFP_ATOMIC); if (ret < 0) { spin_unlock_irqrestore(&ctrl->txn_lock, flags); -- cgit From a263c1ff6abe0e66712f40d595bbddc7a35907f8 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:26 +0100 Subject: slimbus: messaging: check for valid transaction id In some usecases transaction ids are dynamically allocated inside the controller driver after sending the messages which have generic acknowledge responses. So check for this before refcounting pm_runtime. Without this we would end up imbalancing runtime pm count by doing pm_runtime_put() in both slim_do_transfer() and slim_msg_response() for a single pm_runtime_get() in slim_do_transfer() Fixes: d3062a210930 ("slimbus: messaging: add slim_alloc/free_txn_tid()") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/messaging.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/slimbus/messaging.c b/drivers/slimbus/messaging.c index 6097ddc43a35..e5ae26227bdb 100644 --- a/drivers/slimbus/messaging.c +++ b/drivers/slimbus/messaging.c @@ -131,7 +131,8 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) goto slim_xfer_err; } } - + /* Initialize tid to invalid value */ + txn->tid = 0; need_tid = slim_tid_txn(txn->mt, txn->mc); if (need_tid) { @@ -163,7 +164,7 @@ int slim_do_transfer(struct slim_controller *ctrl, struct slim_msg_txn *txn) txn->mt, txn->mc, txn->la, ret); slim_xfer_err: - if (!clk_pause_msg && (!need_tid || ret == -ETIMEDOUT)) { + if (!clk_pause_msg && (txn->tid == 0 || ret == -ETIMEDOUT)) { /* * remove runtime-pm vote if this was TX only, or * if there was error during this transaction -- cgit From c0e38eaa8d5102c138e4f16658ea762417d42a8f Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:27 +0100 Subject: slimbus: ngd: set correct device for pm For some reason we ended up using wrong device in some places for pm_runtime calls. Fix this so that NGG driver can do runtime pm correctly. Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index c054e83ab636..f3ee8e036372 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -618,7 +618,7 @@ static void qcom_slim_ngd_rx(struct qcom_slim_ngd_ctrl *ctrl, u8 *buf) (mc == SLIM_USR_MC_GENERIC_ACK && mt == SLIM_MSG_MT_SRC_REFERRED_USER)) { slim_msg_response(&ctrl->ctrl, &buf[4], buf[3], len - 4); - pm_runtime_mark_last_busy(ctrl->dev); + pm_runtime_mark_last_busy(ctrl->ctrl.dev); } } @@ -1257,13 +1257,14 @@ static int qcom_slim_ngd_enable(struct qcom_slim_ngd_ctrl *ctrl, bool enable) } /* controller state should be in sync with framework state */ complete(&ctrl->qmi.qmi_comp); - if (!pm_runtime_enabled(ctrl->dev) || - !pm_runtime_suspended(ctrl->dev)) - qcom_slim_ngd_runtime_resume(ctrl->dev); + if (!pm_runtime_enabled(ctrl->ctrl.dev) || + !pm_runtime_suspended(ctrl->ctrl.dev)) + qcom_slim_ngd_runtime_resume(ctrl->ctrl.dev); else - pm_runtime_resume(ctrl->dev); - pm_runtime_mark_last_busy(ctrl->dev); - pm_runtime_put(ctrl->dev); + pm_runtime_resume(ctrl->ctrl.dev); + + pm_runtime_mark_last_busy(ctrl->ctrl.dev); + pm_runtime_put(ctrl->ctrl.dev); ret = slim_register_controller(&ctrl->ctrl); if (ret) { @@ -1389,7 +1390,7 @@ static int qcom_slim_ngd_ssr_pdr_notify(struct qcom_slim_ngd_ctrl *ctrl, /* Make sure the last dma xfer is finished */ mutex_lock(&ctrl->tx_lock); if (ctrl->state != QCOM_SLIM_NGD_CTRL_DOWN) { - pm_runtime_get_noresume(ctrl->dev); + pm_runtime_get_noresume(ctrl->ctrl.dev); ctrl->state = QCOM_SLIM_NGD_CTRL_DOWN; qcom_slim_ngd_down(ctrl); qcom_slim_ngd_exit_dma(ctrl); -- cgit From d77772538f00b7265deace6e77e555ee18365ad0 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 9 Aug 2021 09:24:28 +0100 Subject: slimbus: ngd: reset dma setup during runtime pm During suspend/resume NGD remote instance is power cycled along with remotely controlled bam dma engine. So Reset the dma configuration during this suspend resume path so that we are not dealing with any stale dma setup. Without this transactions timeout after first suspend resume path. Fixes: 917809e2280b ("slimbus: ngd: Add qcom SLIMBus NGD driver") Cc: Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210809082428.11236-5-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index f3ee8e036372..7040293c2ee8 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1080,7 +1080,8 @@ static void qcom_slim_ngd_setup(struct qcom_slim_ngd_ctrl *ctrl) { u32 cfg = readl_relaxed(ctrl->ngd->base); - if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN) + if (ctrl->state == QCOM_SLIM_NGD_CTRL_DOWN || + ctrl->state == QCOM_SLIM_NGD_CTRL_ASLEEP) qcom_slim_ngd_init_dma(ctrl); /* By default enable message queues */ @@ -1131,6 +1132,7 @@ static int qcom_slim_ngd_power_up(struct qcom_slim_ngd_ctrl *ctrl) dev_info(ctrl->dev, "Subsys restart: ADSP active framer\n"); return 0; } + qcom_slim_ngd_setup(ctrl); return 0; } @@ -1618,6 +1620,7 @@ static int __maybe_unused qcom_slim_ngd_runtime_suspend(struct device *dev) struct qcom_slim_ngd_ctrl *ctrl = dev_get_drvdata(dev); int ret = 0; + qcom_slim_ngd_exit_dma(ctrl); if (!ctrl->qmi.handle) return 0; -- cgit From 57a1681095f912239c7fb4d66683ab0425973838 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 10 Aug 2021 18:03:18 +0800 Subject: ipack: tpci200: fix many double free issues in tpci200_pci_probe The function tpci200_register called by tpci200_install and tpci200_unregister called by tpci200_uninstall are in pair. However, tpci200_unregister has some cleanup operations not in the tpci200_register. So the error handling code of tpci200_pci_probe has many different double free issues. Fix this problem by moving those cleanup operations out of tpci200_unregister, into tpci200_pci_remove and reverting the previous commit 9272e5d0028d ("ipack/carriers/tpci200: Fix a double free in tpci200_pci_probe"). Fixes: 9272e5d0028d ("ipack/carriers/tpci200: Fix a double free in tpci200_pci_probe") Cc: stable@vger.kernel.org Reported-by: Dongliang Mu Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210810100323.3938492-1-mudongliangabcd@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/ipack/carriers/tpci200.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c index 3461b0a7dc62..92795a0230ca 100644 --- a/drivers/ipack/carriers/tpci200.c +++ b/drivers/ipack/carriers/tpci200.c @@ -89,16 +89,13 @@ static void tpci200_unregister(struct tpci200_board *tpci200) free_irq(tpci200->info->pdev->irq, (void *) tpci200); pci_iounmap(tpci200->info->pdev, tpci200->info->interface_regs); - pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs); pci_release_region(tpci200->info->pdev, TPCI200_IP_INTERFACE_BAR); pci_release_region(tpci200->info->pdev, TPCI200_IO_ID_INT_SPACES_BAR); pci_release_region(tpci200->info->pdev, TPCI200_MEM16_SPACE_BAR); pci_release_region(tpci200->info->pdev, TPCI200_MEM8_SPACE_BAR); - pci_release_region(tpci200->info->pdev, TPCI200_CFG_MEM_BAR); pci_disable_device(tpci200->info->pdev); - pci_dev_put(tpci200->info->pdev); } static void tpci200_enable_irq(struct tpci200_board *tpci200, @@ -527,7 +524,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, tpci200->info = kzalloc(sizeof(struct tpci200_infos), GFP_KERNEL); if (!tpci200->info) { ret = -ENOMEM; - goto out_err_info; + goto err_tpci200; } pci_dev_get(pdev); @@ -538,7 +535,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, if (ret) { dev_err(&pdev->dev, "Failed to allocate PCI Configuration Memory"); ret = -EBUSY; - goto out_err_pci_request; + goto err_tpci200_info; } tpci200->info->cfg_regs = ioremap( pci_resource_start(pdev, TPCI200_CFG_MEM_BAR), @@ -546,7 +543,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, if (!tpci200->info->cfg_regs) { dev_err(&pdev->dev, "Failed to map PCI Configuration Memory"); ret = -EFAULT; - goto out_err_ioremap; + goto err_request_region; } /* Disable byte swapping for 16 bit IP module access. This will ensure @@ -569,7 +566,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, if (ret) { dev_err(&pdev->dev, "error during tpci200 install\n"); ret = -ENODEV; - goto out_err_install; + goto err_cfg_regs; } /* Register the carrier in the industry pack bus driver */ @@ -581,7 +578,7 @@ static int tpci200_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "error registering the carrier on ipack driver\n"); ret = -EFAULT; - goto out_err_bus_register; + goto err_tpci200_install; } /* save the bus number given by ipack to logging purpose */ @@ -592,19 +589,16 @@ static int tpci200_pci_probe(struct pci_dev *pdev, tpci200_create_device(tpci200, i); return 0; -out_err_bus_register: +err_tpci200_install: tpci200_uninstall(tpci200); - /* tpci200->info->cfg_regs is unmapped in tpci200_uninstall */ - tpci200->info->cfg_regs = NULL; -out_err_install: - if (tpci200->info->cfg_regs) - iounmap(tpci200->info->cfg_regs); -out_err_ioremap: +err_cfg_regs: + pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs); +err_request_region: pci_release_region(pdev, TPCI200_CFG_MEM_BAR); -out_err_pci_request: - pci_dev_put(pdev); +err_tpci200_info: kfree(tpci200->info); -out_err_info: + pci_dev_put(pdev); +err_tpci200: kfree(tpci200); return ret; } @@ -614,6 +608,12 @@ static void __tpci200_pci_remove(struct tpci200_board *tpci200) ipack_bus_unregister(tpci200->info->ipack_bus); tpci200_uninstall(tpci200); + pci_iounmap(tpci200->info->pdev, tpci200->info->cfg_regs); + + pci_release_region(tpci200->info->pdev, TPCI200_CFG_MEM_BAR); + + pci_dev_put(tpci200->info->pdev); + kfree(tpci200->info); kfree(tpci200); } -- cgit From 50f05bd114a46a74726e432bf81079d3f13a55b7 Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Tue, 10 Aug 2021 18:03:19 +0800 Subject: ipack: tpci200: fix memory leak in the tpci200_register The error handling code in tpci200_register does not free interface_regs allocated by ioremap and the current version of error handling code is problematic. Fix this by refactoring the error handling code and free interface_regs when necessary. Fixes: 43986798fd50 ("ipack: add error handling for ioremap_nocache") Cc: stable@vger.kernel.org Reported-by: Dongliang Mu Signed-off-by: Dongliang Mu Link: https://lore.kernel.org/r/20210810100323.3938492-2-mudongliangabcd@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/ipack/carriers/tpci200.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c index 92795a0230ca..cbfdadecb23b 100644 --- a/drivers/ipack/carriers/tpci200.c +++ b/drivers/ipack/carriers/tpci200.c @@ -254,7 +254,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 2 !", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_disable_pci; + goto err_disable_device; } /* Request IO ID INT space (Bar 3) */ @@ -266,7 +266,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 3 !", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_ip_space; + goto err_ip_interface_bar; } /* Request MEM8 space (Bar 5) */ @@ -277,7 +277,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 5!", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_ioid_int_space; + goto err_io_id_int_spaces_bar; } /* Request MEM16 space (Bar 4) */ @@ -288,7 +288,7 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) failed to allocate PCI resource for BAR 4!", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_mem8_space; + goto err_mem8_space_bar; } /* Map internal tpci200 driver user space */ @@ -302,7 +302,7 @@ static int tpci200_register(struct tpci200_board *tpci200) tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); res = -ENOMEM; - goto out_release_mem8_space; + goto err_mem16_space_bar; } /* Initialize lock that protects interface_regs */ @@ -341,18 +341,22 @@ static int tpci200_register(struct tpci200_board *tpci200) "(bn 0x%X, sn 0x%X) unable to register IRQ !", tpci200->info->pdev->bus->number, tpci200->info->pdev->devfn); - goto out_release_ioid_int_space; + goto err_interface_regs; } return 0; -out_release_mem8_space: +err_interface_regs: + pci_iounmap(tpci200->info->pdev, tpci200->info->interface_regs); +err_mem16_space_bar: + pci_release_region(tpci200->info->pdev, TPCI200_MEM16_SPACE_BAR); +err_mem8_space_bar: pci_release_region(tpci200->info->pdev, TPCI200_MEM8_SPACE_BAR); -out_release_ioid_int_space: +err_io_id_int_spaces_bar: pci_release_region(tpci200->info->pdev, TPCI200_IO_ID_INT_SPACES_BAR); -out_release_ip_space: +err_ip_interface_bar: pci_release_region(tpci200->info->pdev, TPCI200_IP_INTERFACE_BAR); -out_disable_pci: +err_disable_device: pci_disable_device(tpci200->info->pdev); return res; } -- cgit From 7a3dc4f35bf8e1a07e5c3f8ecc8ac923f48493fe Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 13 Aug 2021 12:36:14 +0200 Subject: driver core: Add missing kernel doc for device::msi_lock Fixes: 77e89afc25f3 ("PCI/MSI: Protect msi_desc::masked for multi-MSI") Reported-by: Stephen Rothwell Signed-off-by: Thomas Gleixner --- include/linux/device.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/device.h b/include/linux/device.h index e53aa5065f58..65d84b67b024 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -407,6 +407,7 @@ struct dev_links_info { * @em_pd: device's energy model performance domain * @pins: For device pin management. * See Documentation/driver-api/pin-control.rst for details. + * @msi_lock: Lock to protect MSI mask cache and mask register * @msi_list: Hosts MSI descriptors * @msi_domain: The generic MSI domain this device is using. * @numa_node: NUMA node this device is close to. -- cgit From 454bb6775202d94f0f489c4632efecdb62d3c904 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Sat, 31 Jul 2021 14:21:30 +0800 Subject: blk-mq: clear active_queues before clearing BLK_MQ_F_TAG_QUEUE_SHARED We run a test that delete and recover devcies frequently(two devices on the same host), and we found that 'active_queues' is super big after a period of time. If device a and device b share a tag set, and a is deleted, then blk_mq_exit_queue() will clear BLK_MQ_F_TAG_QUEUE_SHARED because there is only one queue that are using the tag set. However, if b is still active, the active_queues of b might never be cleared even if b is deleted. Thus clear active_queues before BLK_MQ_F_TAG_QUEUE_SHARED is cleared. Signed-off-by: Yu Kuai Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20210731062130.1533893-1-yukuai3@huawei.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2c4ac51e54eb..2fe396385a4a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2994,10 +2994,12 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) + if (shared) { hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; - else + } else { + blk_mq_tag_idle(hctx); hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; + } } } -- cgit From 8f40d0370795313b6f1b1782035919cfc76b159f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 13 Aug 2021 08:57:07 -0600 Subject: tools/io_uring/io_uring-cp: sync with liburing example This example is missing a few fixes that are in the liburing version, synchronize with the upstream version. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Jens Axboe --- tools/io_uring/io_uring-cp.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c index 81461813ec62..d9bd6f5f8f46 100644 --- a/tools/io_uring/io_uring-cp.c +++ b/tools/io_uring/io_uring-cp.c @@ -131,8 +131,7 @@ static int copy_file(struct io_uring *ring, off_t insize) writes = reads = offset = 0; while (insize || write_left) { - unsigned long had_reads; - int got_comp; + int had_reads, got_comp; /* * Queue up as many reads as we can @@ -174,8 +173,13 @@ static int copy_file(struct io_uring *ring, off_t insize) if (!got_comp) { ret = io_uring_wait_cqe(ring, &cqe); got_comp = 1; - } else + } else { ret = io_uring_peek_cqe(ring, &cqe); + if (ret == -EAGAIN) { + cqe = NULL; + ret = 0; + } + } if (ret < 0) { fprintf(stderr, "io_uring_peek_cqe: %s\n", strerror(-ret)); @@ -194,7 +198,7 @@ static int copy_file(struct io_uring *ring, off_t insize) fprintf(stderr, "cqe failed: %s\n", strerror(-cqe->res)); return 1; - } else if ((size_t) cqe->res != data->iov.iov_len) { + } else if (cqe->res != data->iov.iov_len) { /* Short read/write, adjust and requeue */ data->iov.iov_base += cqe->res; data->iov.iov_len -= cqe->res; @@ -221,6 +225,25 @@ static int copy_file(struct io_uring *ring, off_t insize) } } + /* wait out pending writes */ + while (writes) { + struct io_data *data; + + ret = io_uring_wait_cqe(ring, &cqe); + if (ret) { + fprintf(stderr, "wait_cqe=%d\n", ret); + return 1; + } + if (cqe->res < 0) { + fprintf(stderr, "write res=%d\n", cqe->res); + return 1; + } + data = io_uring_cqe_get_data(cqe); + free(data); + writes--; + io_uring_cqe_seen(ring, cqe); + } + return 0; } -- cgit From 45c709f8c71b525b51988e782febe84ce933e7e0 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 12 Aug 2021 17:18:10 +0200 Subject: bpf: Clear zext_dst of dead insns "access skb fields ok" verifier test fails on s390 with the "verifier bug. zext_dst is set, but no reg is defined" message. The first insns of the test prog are ... 0: 61 01 00 00 00 00 00 00 ldxw %r0,[%r1+0] 8: 35 00 00 01 00 00 00 00 jge %r0,0,1 10: 61 01 00 08 00 00 00 00 ldxw %r0,[%r1+8] ... and the 3rd one is dead (this does not look intentional to me, but this is a separate topic). sanitize_dead_code() converts dead insns into "ja -1", but keeps zext_dst. When opt_subreg_zext_lo32_rnd_hi32() tries to parse such an insn, it sees this discrepancy and bails. This problem can be seen only with JITs whose bpf_jit_needs_zext() returns true. Fix by clearning dead insns' zext_dst. The commits that contributed to this problem are: 1. 5aa5bd14c5f8 ("bpf: add initial suite for selftests"), which introduced the test with the dead code. 2. 5327ed3d44b7 ("bpf: verifier: mark verified-insn with sub-register zext flag"), which introduced the zext_dst flag. 3. 83a2881903f3 ("bpf: Account for BPF_FETCH in insn_has_def32()"), which introduced the sanity check. 4. 9183671af6db ("bpf: Fix leakage under speculation on mispredicted branches"), which bisect points to. It's best to fix this on stable branches that contain the second one, since that's the point where the inconsistency was introduced. Fixes: 5327ed3d44b7 ("bpf: verifier: mark verified-insn with sub-register zext flag") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210812151811.184086-2-iii@linux.ibm.com --- kernel/bpf/verifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f9bda5476ea5..381d3d6f24bc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11663,6 +11663,7 @@ static void sanitize_dead_code(struct bpf_verifier_env *env) if (aux_data[i].seen) continue; memcpy(insn + i, &trap, sizeof(trap)); + aux_data[i].zext_dst = false; } } -- cgit From 3776f3517ed94d40ff0e3851d7ce2ce17b63099f Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Thu, 12 Aug 2021 17:18:11 +0200 Subject: selftests, bpf: Test that dead ldx_w insns are accepted Prevent regressions related to zero-extension metadata handling during dead code sanitization. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210812151811.184086-3-iii@linux.ibm.com --- tools/testing/selftests/bpf/verifier/dead_code.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index 2c8935b3e65d..ee454327e5c6 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -159,3 +159,15 @@ .result = ACCEPT, .retval = 2, }, +{ + "dead code: zero extension", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0, +}, -- cgit From cddce01160582a5f52ada3da9626c052d852ec42 Mon Sep 17 00:00:00 2001 From: Xie Yongji Date: Fri, 13 Aug 2021 23:13:30 +0800 Subject: nbd: Aovid double completion of a request There is a race between iterating over requests in nbd_clear_que() and completing requests in recv_work(), which can lead to double completion of a request. To fix it, flush the recv worker before iterating over the requests and don't abort the completed request while iterating. Fixes: 96d97e17828f ("nbd: clear_sock on netlink disconnect") Reported-by: Jiang Yadong Signed-off-by: Xie Yongji Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210813151330.96-1-xieyongji@bytedance.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index c38317979f74..19f5d5a8b16a 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -818,6 +818,10 @@ static bool nbd_clear_req(struct request *req, void *data, bool reserved) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); + /* don't abort one completed request */ + if (blk_mq_request_completed(req)) + return true; + mutex_lock(&cmd->lock); cmd->status = BLK_STS_IOERR; mutex_unlock(&cmd->lock); @@ -2004,15 +2008,19 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) { mutex_lock(&nbd->config_lock); nbd_disconnect(nbd); - nbd_clear_sock(nbd); - mutex_unlock(&nbd->config_lock); + sock_shutdown(nbd); /* * Make sure recv thread has finished, so it does not drop the last * config ref and try to destroy the workqueue from inside the work - * queue. + * queue. And this also ensure that we can safely call nbd_clear_que() + * to cancel the inflight I/Os. */ if (nbd->recv_workq) flush_workqueue(nbd->recv_workq); + nbd_clear_que(nbd); + nbd->task_setup = NULL; + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); -- cgit From 3c603136c9f82833813af77185618de5af67676c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Aug 2021 14:42:39 -0700 Subject: bnxt: don't lock the tx queue from napi poll We can't take the tx lock from the napi poll routine, because netpoll can poll napi at any moment, including with the tx lock already held. The tx lock is protecting against two paths - the disable path, and (as Michael points out) the NETDEV_TX_BUSY case which may occur if NAPI completions race with start_xmit and both decide to re-enable the queue. For the disable/ifdown path use synchronize_net() to make sure closing the device does not race we restarting the queues. Annotate accesses to dev_state against data races. For the NAPI cleanup vs start_xmit path - appropriate barriers are already in place in the main spot where Tx queue is stopped but we need to do the same careful dance in the TX_BUSY case. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Michael Chan Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 54 ++++++++++++++++++------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2fe743503949..84aa25875050 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -365,6 +365,26 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) return md_dst->u.port_info.port_id; } +static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp, + struct bnxt_tx_ring_info *txr, + struct netdev_queue *txq) +{ + netif_tx_stop_queue(txq); + + /* netif_tx_stop_queue() must be done before checking + * tx index in bnxt_tx_avail() below, because in + * bnxt_tx_int(), we update tx index before checking for + * netif_tx_queue_stopped(). + */ + smp_mb(); + if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) { + netif_tx_wake_queue(txq); + return false; + } + + return true; +} + static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); @@ -393,8 +413,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) free_size = bnxt_tx_avail(bp, txr); if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { - netif_tx_stop_queue(txq); - return NETDEV_TX_BUSY; + if (bnxt_txr_netif_try_stop_queue(bp, txr, txq)) + return NETDEV_TX_BUSY; } length = skb->len; @@ -626,16 +646,7 @@ tx_done: if (netdev_xmit_more() && !tx_buf->is_push) bnxt_db_write(bp, &txr->tx_db, prod); - netif_tx_stop_queue(txq); - - /* netif_tx_stop_queue() must be done before checking - * tx index in bnxt_tx_avail() below, because in - * bnxt_tx_int(), we update tx index before checking for - * netif_tx_queue_stopped(). - */ - smp_mb(); - if (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh) - netif_tx_wake_queue(txq); + bnxt_txr_netif_try_stop_queue(bp, txr, txq); } return NETDEV_TX_OK; @@ -732,14 +743,9 @@ next_tx_int: smp_mb(); if (unlikely(netif_tx_queue_stopped(txq)) && - (bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh)) { - __netif_tx_lock(txq, smp_processor_id()); - if (netif_tx_queue_stopped(txq) && - bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && - txr->dev_state != BNXT_DEV_STATE_CLOSING) - netif_tx_wake_queue(txq); - __netif_tx_unlock(txq); - } + bnxt_tx_avail(bp, txr) > bp->tx_wake_thresh && + READ_ONCE(txr->dev_state) != BNXT_DEV_STATE_CLOSING) + netif_tx_wake_queue(txq); } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, @@ -9165,9 +9171,11 @@ void bnxt_tx_disable(struct bnxt *bp) if (bp->tx_ring) { for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txr->dev_state = BNXT_DEV_STATE_CLOSING; + WRITE_ONCE(txr->dev_state, BNXT_DEV_STATE_CLOSING); } } + /* Make sure napi polls see @dev_state change */ + synchronize_net(); /* Drop carrier first to prevent TX timeout */ netif_carrier_off(bp->dev); /* Stop all TX queues */ @@ -9181,8 +9189,10 @@ void bnxt_tx_enable(struct bnxt *bp) for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; - txr->dev_state = 0; + WRITE_ONCE(txr->dev_state, 0); } + /* Make sure napi polls see @dev_state change */ + synchronize_net(); netif_tx_wake_all_queues(bp->dev); if (bp->link_info.link_up) netif_carrier_on(bp->dev); -- cgit From 01cca6b9330ac7460de44eeeb3a0607f8aae69ff Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Aug 2021 14:42:40 -0700 Subject: bnxt: disable napi before canceling DIM napi schedules DIM, napi has to be disabled first, then DIM canceled. Noticed while reading the code. Fixes: 0bc0b97fca73 ("bnxt_en: cleanup DIM work on device shutdown") Fixes: 6a8788f25625 ("bnxt_en: add support for software dynamic interrupt moderation") Reviewed-by: Michael Chan Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 84aa25875050..721b5df36311 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9134,10 +9134,9 @@ static void bnxt_disable_napi(struct bnxt *bp) for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_cp_ring_info *cpr = &bp->bnapi[i]->cp_ring; + napi_disable(&bp->bnapi[i]->napi); if (bp->bnapi[i]->rx_ring) cancel_work_sync(&cpr->dim.work); - - napi_disable(&bp->bnapi[i]->napi); } } -- cgit From e8d8c5d80f5e9d4586c68061b62c642752289095 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Aug 2021 14:42:41 -0700 Subject: bnxt: make sure xmit_more + errors does not miss doorbells skbs are freed on error and not put on the ring. We may, however, be in a situation where we're freeing the last skb of a batch, and there is a doorbell ring pending because of xmit_more() being true earlier. Make sure we ring the door bell in such situations. Since errors are rare don't pay attention to xmit_more() and just always flush the pending frames. The busy case should be safe to be left alone because it can only happen if start_xmit races with completions and they both enable the queue. In that case the kick can't be pending. Noticed while reading the code. Fixes: 4d172f21cefe ("bnxt_en: Implement xmit_more.") Reviewed-by: Michael Chan Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 39 ++++++++++++++++++++----------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 + 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 721b5df36311..389016ea65cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -72,7 +72,8 @@ #include "bnxt_debugfs.h" #define BNXT_TX_TIMEOUT (5 * HZ) -#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW) +#define BNXT_DEF_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_HW | \ + NETIF_MSG_TX_ERR) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Broadcom BCM573xx network driver"); @@ -365,6 +366,13 @@ static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb) return md_dst->u.port_info.port_id; } +static void bnxt_txr_db_kick(struct bnxt *bp, struct bnxt_tx_ring_info *txr, + u16 prod) +{ + bnxt_db_write(bp, &txr->tx_db, prod); + txr->kick_pending = 0; +} + static bool bnxt_txr_netif_try_stop_queue(struct bnxt *bp, struct bnxt_tx_ring_info *txr, struct netdev_queue *txq) @@ -413,6 +421,10 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) free_size = bnxt_tx_avail(bp, txr); if (unlikely(free_size < skb_shinfo(skb)->nr_frags + 2)) { + /* We must have raced with NAPI cleanup */ + if (net_ratelimit() && txr->kick_pending) + netif_warn(bp, tx_err, dev, + "bnxt: ring busy w/ flush pending!\n"); if (bnxt_txr_netif_try_stop_queue(bp, txr, txq)) return NETDEV_TX_BUSY; } @@ -537,21 +549,16 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) normal_tx: if (length < BNXT_MIN_PKT_SIZE) { pad = BNXT_MIN_PKT_SIZE - length; - if (skb_pad(skb, pad)) { + if (skb_pad(skb, pad)) /* SKB already freed. */ - tx_buf->skb = NULL; - return NETDEV_TX_OK; - } + goto tx_kick_pending; length = BNXT_MIN_PKT_SIZE; } mapping = dma_map_single(&pdev->dev, skb->data, len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&pdev->dev, mapping))) { - dev_kfree_skb_any(skb); - tx_buf->skb = NULL; - return NETDEV_TX_OK; - } + if (unlikely(dma_mapping_error(&pdev->dev, mapping))) + goto tx_free; dma_unmap_addr_set(tx_buf, mapping, mapping); flags = (len << TX_BD_LEN_SHIFT) | TX_BD_TYPE_LONG_TX_BD | @@ -638,13 +645,15 @@ normal_tx: txr->tx_prod = prod; if (!netdev_xmit_more() || netif_xmit_stopped(txq)) - bnxt_db_write(bp, &txr->tx_db, prod); + bnxt_txr_db_kick(bp, txr, prod); + else + txr->kick_pending = 1; tx_done: if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) { if (netdev_xmit_more() && !tx_buf->is_push) - bnxt_db_write(bp, &txr->tx_db, prod); + bnxt_txr_db_kick(bp, txr, prod); bnxt_txr_netif_try_stop_queue(bp, txr, txq); } @@ -659,7 +668,6 @@ tx_dma_error: /* start back at beginning and unmap skb */ prod = txr->tx_prod; tx_buf = &txr->tx_buf_ring[prod]; - tx_buf->skb = NULL; dma_unmap_single(&pdev->dev, dma_unmap_addr(tx_buf, mapping), skb_headlen(skb), PCI_DMA_TODEVICE); prod = NEXT_TX(prod); @@ -673,7 +681,12 @@ tx_dma_error: PCI_DMA_TODEVICE); } +tx_free: dev_kfree_skb_any(skb); +tx_kick_pending: + if (txr->kick_pending) + bnxt_txr_db_kick(bp, txr, txr->tx_prod); + txr->tx_buf_ring[txr->tx_prod].skb = NULL; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index bcf8d00b8c80..ba4e0fc38520 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -786,6 +786,7 @@ struct bnxt_tx_ring_info { u16 tx_prod; u16 tx_cons; u16 txq_index; + u8 kick_pending; struct bnxt_db_info tx_db; struct tx_bd *tx_desc_ring[MAX_TX_PAGES]; -- cgit From fb9f7190092d2bbd1f8f0b1cc252732cbe99a87e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 12 Aug 2021 14:42:42 -0700 Subject: bnxt: count Tx drops Drivers should count packets they are dropping. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reviewed-by: Michael Chan Reviewed-by: Edwin Peer Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 389016ea65cf..dd2d2a5fef15 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -412,6 +412,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) i = skb_get_queue_mapping(skb); if (unlikely(i >= bp->tx_nr_rings)) { dev_kfree_skb_any(skb); + atomic_long_inc(&dev->tx_dropped); return NETDEV_TX_OK; } @@ -687,6 +688,7 @@ tx_kick_pending: if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); txr->tx_buf_ring[txr->tx_prod].skb = NULL; + atomic_long_inc(&dev->tx_dropped); return NETDEV_TX_OK; } -- cgit From c1e64c0aec8cb0499e61af7ea086b59abba97945 Mon Sep 17 00:00:00 2001 From: Maxim Kochetkov Date: Wed, 11 Aug 2021 10:10:36 +0300 Subject: soc: fsl: qe: fix static checker warning The patch be7ecbd240b2: "soc: fsl: qe: convert QE interrupt controller to platform_device" from Aug 3, 2021, leads to the following static checker warning: drivers/soc/fsl/qe/qe_ic.c:438 qe_ic_init() warn: unsigned 'qe_ic->virq_low' is never less than zero. In old variant irq_of_parse_and_map() returns zero if failed so unsigned int for virq_high/virq_low was ok. In new variant platform_get_irq() returns negative error codes if failed so we need to use int for virq_high/virq_low. Also simplify high_handler checking and remove the curly braces to make checkpatch happy. Fixes: be7ecbd240b2 ("soc: fsl: qe: convert QE interrupt controller to platform_device") Signed-off-by: Maxim Kochetkov Reported-by: Dan Carpenter Signed-off-by: Li Yang --- drivers/soc/fsl/qe/qe_ic.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/soc/fsl/qe/qe_ic.c index e710d554425d..bbae3d39c7be 100644 --- a/drivers/soc/fsl/qe/qe_ic.c +++ b/drivers/soc/fsl/qe/qe_ic.c @@ -54,8 +54,8 @@ struct qe_ic { struct irq_chip hc_irq; /* VIRQ numbers of QE high/low irqs */ - unsigned int virq_high; - unsigned int virq_low; + int virq_high; + int virq_low; }; /* @@ -435,11 +435,10 @@ static int qe_ic_init(struct platform_device *pdev) qe_ic->virq_high = platform_get_irq(pdev, 0); qe_ic->virq_low = platform_get_irq(pdev, 1); - if (qe_ic->virq_low < 0) { + if (qe_ic->virq_low <= 0) return -ENODEV; - } - if (qe_ic->virq_high != qe_ic->virq_low) { + if (qe_ic->virq_high > 0 && qe_ic->virq_high != qe_ic->virq_low) { low_handler = qe_ic_cascade_low; high_handler = qe_ic_cascade_high; } else { @@ -459,7 +458,7 @@ static int qe_ic_init(struct platform_device *pdev) irq_set_handler_data(qe_ic->virq_low, qe_ic); irq_set_chained_handler(qe_ic->virq_low, low_handler); - if (qe_ic->virq_high && qe_ic->virq_high != qe_ic->virq_low) { + if (high_handler) { irq_set_handler_data(qe_ic->virq_high, qe_ic); irq_set_chained_handler(qe_ic->virq_high, high_handler); } -- cgit From 6c7a00b843370feaf7710cef2350367c7e61cd1a Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Fri, 13 Aug 2021 16:54:24 -0700 Subject: kasan, kmemleak: reset tags when scanning block Patch series "kasan, slub: reset tag when printing address", v3. With hardware tag-based kasan enabled, we reset the tag when we access metadata to avoid from false alarm. This patch (of 2): Kmemleak needs to scan kernel memory to check memory leak. With hardware tag-based kasan enabled, when it scans on the invalid slab and dereference, the issue will occur as below. Hardware tag-based KASAN doesn't use compiler instrumentation, we can not use kasan_disable_current() to ignore tag check. Based on the below report, there are 11 0xf7 granules, which amounts to 176 bytes, and the object is allocated from the kmalloc-256 cache. So when kmemleak accesses the last 256-176 bytes, it causes faults, as those are marked with KASAN_KMALLOC_REDZONE == KASAN_TAG_INVALID == 0xfe. Thus, we reset tags before accessing metadata to avoid from false positives. BUG: KASAN: out-of-bounds in scan_block+0x58/0x170 Read at addr f7ff0000c0074eb0 by task kmemleak/138 Pointer tag: [f7], memory tag: [fe] CPU: 7 PID: 138 Comm: kmemleak Not tainted 5.14.0-rc2-00001-g8cae8cd89f05-dirty #134 Hardware name: linux,dummy-virt (DT) Call trace: dump_backtrace+0x0/0x1b0 show_stack+0x1c/0x30 dump_stack_lvl+0x68/0x84 print_address_description+0x7c/0x2b4 kasan_report+0x138/0x38c __do_kernel_fault+0x190/0x1c4 do_tag_check_fault+0x78/0x90 do_mem_abort+0x44/0xb4 el1_abort+0x40/0x60 el1h_64_sync_handler+0xb4/0xd0 el1h_64_sync+0x78/0x7c scan_block+0x58/0x170 scan_gray_list+0xdc/0x1a0 kmemleak_scan+0x2ac/0x560 kmemleak_scan_thread+0xb0/0xe0 kthread+0x154/0x160 ret_from_fork+0x10/0x18 Allocated by task 0: kasan_save_stack+0x2c/0x60 __kasan_kmalloc+0xec/0x104 __kmalloc+0x224/0x3c4 __register_sysctl_paths+0x200/0x290 register_sysctl_table+0x2c/0x40 sysctl_init+0x20/0x34 proc_sys_init+0x3c/0x48 proc_root_init+0x80/0x9c start_kernel+0x648/0x6a4 __primary_switched+0xc0/0xc8 Freed by task 0: kasan_save_stack+0x2c/0x60 kasan_set_track+0x2c/0x40 kasan_set_free_info+0x44/0x54 ____kasan_slab_free.constprop.0+0x150/0x1b0 __kasan_slab_free+0x14/0x20 slab_free_freelist_hook+0xa4/0x1fc kfree+0x1e8/0x30c put_fs_context+0x124/0x220 vfs_kern_mount.part.0+0x60/0xd4 kern_mount+0x24/0x4c bdev_cache_init+0x70/0x9c vfs_caches_init+0xdc/0xf4 start_kernel+0x638/0x6a4 __primary_switched+0xc0/0xc8 The buggy address belongs to the object at ffff0000c0074e00 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 176 bytes inside of 256-byte region [ffff0000c0074e00, ffff0000c0074f00) The buggy address belongs to the page: page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x100074 head:(____ptrval____) order:2 compound_mapcount:0 compound_pincount:0 flags: 0xbfffc0000010200(slab|head|node=0|zone=2|lastcpupid=0xffff|kasantag=0x0) raw: 0bfffc0000010200 0000000000000000 dead000000000122 f5ff0000c0002300 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff0000c0074c00: f0 f0 f0 f0 f0 f0 f0 f0 f0 fe fe fe fe fe fe fe ffff0000c0074d00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe >ffff0000c0074e00: f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 f7 fe fe fe fe fe ^ ffff0000c0074f00: fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe fe ffff0000c0075000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Disabling lock debugging due to kernel taint kmemleak: 181 new suspected memory leaks (see /sys/kernel/debug/kmemleak) Link: https://lkml.kernel.org/r/20210804090957.12393-1-Kuan-Ying.Lee@mediatek.com Link: https://lkml.kernel.org/r/20210804090957.12393-2-Kuan-Ying.Lee@mediatek.com Signed-off-by: Kuan-Ying Lee Acked-by: Catalin Marinas Reviewed-by: Andrey Konovalov Cc: Marco Elver Cc: Nicholas Tang Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Chinwen Chang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 228a2fbe0657..73d46d16d575 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -290,7 +290,7 @@ static void hex_dump_object(struct seq_file *seq, warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len); kasan_disable_current(); warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE, - HEX_GROUP_SIZE, ptr, len, HEX_ASCII); + HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII); kasan_enable_current(); } @@ -1171,7 +1171,7 @@ static bool update_checksum(struct kmemleak_object *object) kasan_disable_current(); kcsan_disable_current(); - object->checksum = crc32(0, (void *)object->pointer, object->size); + object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); kasan_enable_current(); kcsan_enable_current(); @@ -1246,7 +1246,7 @@ static void scan_block(void *_start, void *_end, break; kasan_disable_current(); - pointer = *ptr; + pointer = *(unsigned long *)kasan_reset_tag((void *)ptr); kasan_enable_current(); untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); -- cgit From 340caf178ddc2efb0294afaf54c715f7928c258e Mon Sep 17 00:00:00 2001 From: Kuan-Ying Lee Date: Fri, 13 Aug 2021 16:54:27 -0700 Subject: kasan, slub: reset tag when printing address The address still includes the tags when it is printed. With hardware tag-based kasan enabled, we will get a false positive KASAN issue when we access metadata. Reset the tag before we access the metadata. Link: https://lkml.kernel.org/r/20210804090957.12393-3-Kuan-Ying.Lee@mediatek.com Fixes: aa1ef4d7b3f6 ("kasan, mm: reset tags when accessing metadata") Signed-off-by: Kuan-Ying Lee Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Catalin Marinas Cc: Chinwen Chang Cc: Nicholas Tang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index af984e4990e8..1583354fbf48 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -576,8 +576,8 @@ static void print_section(char *level, char *text, u8 *addr, unsigned int length) { metadata_access_enable(); - print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS, - 16, 1, addr, length, 1); + print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, + 16, 1, kasan_reset_tag((void *)addr), length, 1); metadata_access_disable(); } -- cgit From 1ed7ce574c136569f55fb5c32e69e382c77ba500 Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Fri, 13 Aug 2021 16:54:31 -0700 Subject: slub: fix kmalloc_pagealloc_invalid_free unit test The unit test kmalloc_pagealloc_invalid_free makes sure that for the higher order slub allocation which goes to page allocator, the free is called with the correct address i.e. the virtual address of the head page. Commit f227f0faf63b ("slub: fix unreclaimable slab stat for bulk free") unified the free code paths for page allocator based slub allocations but instead of using the address passed by the caller, it extracted the address from the page. Thus making the unit test kmalloc_pagealloc_invalid_free moot. So, fix this by using the address passed by the caller. Should we fix this? I think yes because dev expect kasan to catch these type of programming bugs. Link: https://lkml.kernel.org/r/20210802180819.1110165-1-shakeelb@google.com Fixes: f227f0faf63b ("slub: fix unreclaimable slab stat for bulk free") Signed-off-by: Shakeel Butt Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Acked-by: Roman Gushchin Cc: Michal Hocko Cc: Muchun Song Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 1583354fbf48..5c2a13d66e71 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3236,12 +3236,12 @@ struct detached_freelist { struct kmem_cache *s; }; -static inline void free_nonslab_page(struct page *page) +static inline void free_nonslab_page(struct page *page, void *object) { unsigned int order = compound_order(page); VM_BUG_ON_PAGE(!PageCompound(page), page); - kfree_hook(page_address(page)); + kfree_hook(object); mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); __free_pages(page, order); } @@ -3282,7 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!PageSlab(page))) { - free_nonslab_page(page); + free_nonslab_page(page, object); p[size] = NULL; /* mark object processed */ return size; } @@ -4258,7 +4258,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { - free_nonslab_page(page); + free_nonslab_page(page, object); return; } slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); -- cgit From a7f1d48585b34730765dcda09ead6edc4ac16a5c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Fri, 13 Aug 2021 16:54:34 -0700 Subject: mm: slub: fix slub_debug disabling for list of slabs Vijayanand Jitta reports: Consider the scenario where CONFIG_SLUB_DEBUG_ON is set and we would want to disable slub_debug for few slabs. Using boot parameter with slub_debug=-,slab_name syntax doesn't work as expected i.e; only disabling debugging for the specified list of slabs. Instead it disables debugging for all slabs, which is wrong. This patch fixes it by delaying the moment when the global slub_debug flags variable is updated. In case a "slub_debug=-,slab_name" has been passed, the global flags remain as initialized (depending on CONFIG_SLUB_DEBUG_ON enabled or disabled) and are not simply reset to 0. Link: https://lkml.kernel.org/r/8a3d992a-473a-467b-28a0-4ad2ff60ab82@suse.cz Signed-off-by: Vlastimil Babka Reported-by: Vijayanand Jitta Reviewed-by: Vijayanand Jitta Acked-by: David Rientjes Cc: Christoph Lameter Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Vinayak Menon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 5c2a13d66e71..f77d8cd79ef7 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1400,12 +1400,13 @@ check_slabs: static int __init setup_slub_debug(char *str) { slab_flags_t flags; + slab_flags_t global_flags; char *saved_str; char *slab_list; bool global_slub_debug_changed = false; bool slab_list_specified = false; - slub_debug = DEBUG_DEFAULT_FLAGS; + global_flags = DEBUG_DEFAULT_FLAGS; if (*str++ != '=' || !*str) /* * No options specified. Switch on full debugging. @@ -1417,7 +1418,7 @@ static int __init setup_slub_debug(char *str) str = parse_slub_debug_flags(str, &flags, &slab_list, true); if (!slab_list) { - slub_debug = flags; + global_flags = flags; global_slub_debug_changed = true; } else { slab_list_specified = true; @@ -1426,16 +1427,18 @@ static int __init setup_slub_debug(char *str) /* * For backwards compatibility, a single list of flags with list of - * slabs means debugging is only enabled for those slabs, so the global - * slub_debug should be 0. We can extended that to multiple lists as + * slabs means debugging is only changed for those slabs, so the global + * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending + * on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as * long as there is no option specifying flags without a slab list. */ if (slab_list_specified) { if (!global_slub_debug_changed) - slub_debug = 0; + global_flags = slub_debug; slub_debug_string = saved_str; } out: + slub_debug = global_flags; if (slub_debug != 0 || slub_debug_string) static_branch_enable(&slub_debug_enabled); else -- cgit From eb2faa513c246ed47ae34a205928ab663bc5a18f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 13 Aug 2021 16:54:37 -0700 Subject: mm/madvise: report SIGBUS as -EFAULT for MADV_POPULATE_(READ|WRITE) Doing some extended tests and polishing the man page update for MADV_POPULATE_(READ|WRITE), I realized that we end up converting also SIGBUS (via -EFAULT) to -EINVAL, making it look like yet another madvise() user error. We want to report only problematic mappings and permission problems that the user could have know as -EINVAL. Let's not convert -EFAULT arising due to SIGBUS (or SIGSEGV) to -EINVAL, but instead indicate -EFAULT to user space. While we could also convert it to -ENOMEM, using -EFAULT looks more helpful when user space might want to troubleshoot what's going wrong: MADV_POPULATE_(READ|WRITE) is not part of an final Linux release and we can still adjust the behavior. Link: https://lkml.kernel.org/r/20210726154932.102880-1-david@redhat.com Fixes: 4ca9b3859dac ("mm/madvise: introduce MADV_POPULATE_(READ|WRITE) to prefault page tables") Signed-off-by: David Hildenbrand Cc: Arnd Bergmann Cc: Michal Hocko Cc: Oscar Salvador Cc: Matthew Wilcox (Oracle) Cc: Andrea Arcangeli Cc: Minchan Kim Cc: Jann Horn Cc: Jason Gunthorpe Cc: Dave Hansen Cc: Hugh Dickins Cc: Rik van Riel Cc: Michael S. Tsirkin Cc: Kirill A. Shutemov Cc: Vlastimil Babka Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Thomas Bogendoerfer Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Chris Zankel Cc: Max Filippov Cc: Mike Kravetz Cc: Peter Xu Cc: Rolf Eike Beer Cc: Ram Pai Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/gup.c | 7 +++++-- mm/madvise.c | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 42b8b1fa6521..b94717977d17 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1558,9 +1558,12 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start, gup_flags |= FOLL_WRITE; /* - * See check_vma_flags(): Will return -EFAULT on incompatible mappings - * or with insufficient permissions. + * We want to report -EINVAL instead of -EFAULT for any permission + * problems or incompatible mappings. */ + if (check_vma_flags(vma, gup_flags)) + return -EINVAL; + return __get_user_pages(mm, start, nr_pages, gup_flags, NULL, NULL, locked); } diff --git a/mm/madvise.c b/mm/madvise.c index 6d3d348b17f4..5c065bc8b5f6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -862,10 +862,12 @@ static long madvise_populate(struct vm_area_struct *vma, switch (pages) { case -EINTR: return -EINTR; - case -EFAULT: /* Incompatible mappings / permissions. */ + case -EINVAL: /* Incompatible mappings / permissions. */ return -EINVAL; case -EHWPOISON: return -EHWPOISON; + case -EFAULT: /* VM_FAULT_SIGBUS or VM_FAULT_SIGSEGV */ + return -EFAULT; default: pr_warn_once("%s: unhandled return value: %ld\n", __func__, pages); -- cgit From 7fa0dacbaf1259fd3d1dda6d602fdd084dea9c0e Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 13 Aug 2021 16:54:41 -0700 Subject: mm/memcg: fix incorrect flushing of lruvec data in obj_stock When mod_objcg_state() is called with a pgdat that is different from that in the obj_stock, the old lruvec data cached in obj_stock are flushed out. Unfortunately, they were flushed to the new pgdat and so the data go to the wrong node. This will screw up the slab data reported in /sys/devices/system/node/node*/meminfo. Fix that by flushing the data to the cached pgdat instead. Link: https://lkml.kernel.org/r/20210802143834.30578-1-longman@redhat.com Fixes: 68ac5b3c8db2 ("mm/memcg: cache vmstat data in percpu memcg_stock_pcp") Signed-off-by: Waiman Long Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Tejun Heo Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Vlastimil Babka Cc: Muchun Song Cc: Alex Shi Cc: Chris Down Cc: Yafang Shao Cc: Wei Yang Cc: Masayoshi Mizuma Cc: Xing Zhengjun Cc: Matthew Wilcox Cc: Waiman Long Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index eb8e87c4833f..702a81dfe72d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3106,13 +3106,15 @@ void mod_objcg_state(struct obj_cgroup *objcg, struct pglist_data *pgdat, stock->cached_pgdat = pgdat; } else if (stock->cached_pgdat != pgdat) { /* Flush the existing cached vmstat data */ + struct pglist_data *oldpg = stock->cached_pgdat; + if (stock->nr_slab_reclaimable_b) { - mod_objcg_mlstate(objcg, pgdat, NR_SLAB_RECLAIMABLE_B, + mod_objcg_mlstate(objcg, oldpg, NR_SLAB_RECLAIMABLE_B, stock->nr_slab_reclaimable_b); stock->nr_slab_reclaimable_b = 0; } if (stock->nr_slab_unreclaimable_b) { - mod_objcg_mlstate(objcg, pgdat, NR_SLAB_UNRECLAIMABLE_B, + mod_objcg_mlstate(objcg, oldpg, NR_SLAB_UNRECLAIMABLE_B, stock->nr_slab_unreclaimable_b); stock->nr_slab_unreclaimable_b = 0; } -- cgit From 854f32648b8a5e424d682953b1a9f3b7c3322701 Mon Sep 17 00:00:00 2001 From: Liang Wang Date: Fri, 13 Aug 2021 16:54:45 -0700 Subject: lib: use PFN_PHYS() in devmem_is_allowed() The physical address may exceed 32 bits on 32-bit systems with more than 32 bits of physcial address. Use PFN_PHYS() in devmem_is_allowed(), or the physical address may overflow and be truncated. We found this bug when mapping a high addresses through devmem tool, when CONFIG_STRICT_DEVMEM is enabled on the ARM with ARM_LPAE and devmem is used to map a high address that is not in the iomem address range, an unexpected error indicating no permission is returned. This bug was initially introduced from v2.6.37, and the function was moved to lib in v5.11. Link: https://lkml.kernel.org/r/20210731025057.78825-1-wangliang101@huawei.com Fixes: 087aaffcdf9c ("ARM: implement CONFIG_STRICT_DEVMEM by disabling access to RAM via /dev/mem") Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()") Signed-off-by: Liang Wang Reviewed-by: Luis Chamberlain Cc: Palmer Dabbelt Cc: Greg Kroah-Hartman Cc: Russell King Cc: Liang Wang Cc: Xiaoming Ni Cc: Kefeng Wang Cc: [2.6.37+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/devmem_is_allowed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/devmem_is_allowed.c b/lib/devmem_is_allowed.c index c0d67c541849..60be9e24bd57 100644 --- a/lib/devmem_is_allowed.c +++ b/lib/devmem_is_allowed.c @@ -19,7 +19,7 @@ */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; -- cgit From 5f773519639041faed4132f5943d7895f9078a4c Mon Sep 17 00:00:00 2001 From: Maciej Machnikowski Date: Fri, 13 Aug 2021 09:50:18 -0700 Subject: ice: Fix perout start time rounding Internal tests found out that the latest code doesn't bring up 1PPS out as expected. As a result of incorrect define used to round the time up the time was round down to the past second boundary. Fix define used for rounding to properly round up to the next Top of second in ice_ptp_cfg_clkout to fix it. Fixes: 172db5f91d5f ("ice: add support for auxiliary input/output pins") Signed-off-by: Maciej Machnikowski Tested-by: Sunitha Mekala Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20210813165018.2196013-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 5d5207b56ca9..9e3ddb9b8b51 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -656,7 +656,7 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, * maintaining phase */ if (start_time < current_time) - start_time = div64_u64(current_time + NSEC_PER_MSEC - 1, + start_time = div64_u64(current_time + NSEC_PER_SEC - 1, NSEC_PER_SEC) * NSEC_PER_SEC + phase; start_time -= E810_OUT_PROP_DELAY_NS; -- cgit From 21f965221e7c42609521342403e8fb91b8b3e76e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 14 Aug 2021 09:04:40 -0600 Subject: io_uring: only assign io_uring_enter() SQPOLL error in actual error case If an SQPOLL based ring is newly created and an application issues an io_uring_enter(2) system call on it, then we can return a spurious -EOWNERDEAD error. This happens because there's nothing to submit, and if the caller doesn't specify any other action, the initial error assignment of -EOWNERDEAD never gets overwritten. This causes us to return it directly, even if it isn't valid. Move the error assignment into the actual failure case instead. Cc: stable@vger.kernel.org Fixes: d9d05217cb69 ("io_uring: stop SQPOLL submit on creator's death") Reported-by: Sherlock Holo sherlockya@gmail.com Link: https://github.com/axboe/liburing/issues/413 Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 04c6d059ea94..6a092a534d2b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9370,9 +9370,10 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (ctx->flags & IORING_SETUP_SQPOLL) { io_cqring_overflow_flush(ctx, false); - ret = -EOWNERDEAD; - if (unlikely(ctx->sq_data->thread == NULL)) + if (unlikely(ctx->sq_data->thread == NULL)) { + ret = -EOWNERDEAD; goto out; + } if (flags & IORING_ENTER_SQ_WAKEUP) wake_up(&ctx->sq_data->wait); if (flags & IORING_ENTER_SQ_WAIT) { -- cgit From da94692001ea45ffa1f5e9f17ecdef7aecd90c27 Mon Sep 17 00:00:00 2001 From: Kristin Paget Date: Sat, 14 Aug 2021 15:46:05 -0700 Subject: ALSA: hda/realtek: Enable 4-speaker output for Dell XPS 15 9510 laptop The 2021-model XPS 15 appears to use the same 4-speakers-on-ALC289 audio setup as the Precision models, so requires the same quirk to enable woofer output. Tested on my own 9510. Signed-off-by: Kristin Paget Cc: Link: https://lore.kernel.org/r/e1fc95c5-c10a-1f98-a5c2-dd6e336157e1@tombom.co.uk Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a065260d0d20..96f32eaa24df 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8332,6 +8332,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a2e, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), + SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), -- cgit From 7c60610d476766e128cc4284bb6349732cbd6606 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Aug 2021 13:40:53 -1000 Subject: Linux 5.14-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eae1314a5b86..c19d1638da25 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Opossums on Parade # *DOCUMENTATION* -- cgit From c3ddfe66d2bb511f7fbcdc8e64952c7859e7e69d Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 15 Aug 2021 19:43:44 +0300 Subject: opp: Drop empty-table checks from _put functions The current_opp is released only when whole OPP table is released, otherwise it's only marked as removed by dev_pm_opp_remove_table(). Functions like dev_pm_opp_put_clkname() and dev_pm_opp_put_supported_hw() are checking whether OPP table is empty and it's not if current_opp is set since it holds the refcount of OPP, this produces a noisy warning from these functions about busy OPP table. Remove the checks to fix it. Cc: stable@vger.kernel.org Fixes: 81c4d8a3c414 ("opp: Keep track of currently programmed OPP") Signed-off-by: Dmitry Osipenko Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index b335c077f215..5543c54dacc5 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1856,9 +1856,6 @@ void dev_pm_opp_put_supported_hw(struct opp_table *opp_table) if (unlikely(!opp_table)) return; - /* Make sure there are no concurrent readers while updating opp_table */ - WARN_ON(!list_empty(&opp_table->opp_list)); - kfree(opp_table->supported_hw); opp_table->supported_hw = NULL; opp_table->supported_hw_count = 0; @@ -1944,9 +1941,6 @@ void dev_pm_opp_put_prop_name(struct opp_table *opp_table) if (unlikely(!opp_table)) return; - /* Make sure there are no concurrent readers while updating opp_table */ - WARN_ON(!list_empty(&opp_table->opp_list)); - kfree(opp_table->prop_name); opp_table->prop_name = NULL; @@ -2056,9 +2050,6 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table) if (!opp_table->regulators) goto put_opp_table; - /* Make sure there are no concurrent readers while updating opp_table */ - WARN_ON(!list_empty(&opp_table->opp_list)); - if (opp_table->enabled) { for (i = opp_table->regulator_count - 1; i >= 0; i--) regulator_disable(opp_table->regulators[i]); @@ -2178,9 +2169,6 @@ void dev_pm_opp_put_clkname(struct opp_table *opp_table) if (unlikely(!opp_table)) return; - /* Make sure there are no concurrent readers while updating opp_table */ - WARN_ON(!list_empty(&opp_table->opp_list)); - clk_put(opp_table->clk); opp_table->clk = ERR_PTR(-EINVAL); @@ -2279,9 +2267,6 @@ void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) if (unlikely(!opp_table)) return; - /* Make sure there are no concurrent readers while updating opp_table */ - WARN_ON(!list_empty(&opp_table->opp_list)); - opp_table->set_opp = NULL; mutex_lock(&opp_table->lock); -- cgit From 19d1532a187669ce86d5a2696eb7275310070793 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Fri, 13 Aug 2021 18:14:33 +0300 Subject: net: 6pack: fix slab-out-of-bounds in decode_data Syzbot reported slab-out-of bounds write in decode_data(). The problem was in missing validation checks. Syzbot's reproducer generated malicious input, which caused decode_data() to be called a lot in sixpack_decode(). Since rx_count_cooked is only 400 bytes and noone reported before, that 400 bytes is not enough, let's just check if input is malicious and complain about buffer overrun. Fail log: ================================================================== BUG: KASAN: slab-out-of-bounds in drivers/net/hamradio/6pack.c:843 Write of size 1 at addr ffff888087c5544e by task kworker/u4:0/7 CPU: 0 PID: 7 Comm: kworker/u4:0 Not tainted 5.6.0-rc3-syzkaller #0 ... Workqueue: events_unbound flush_to_ldisc Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x32 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:641 __asan_report_store1_noabort+0x17/0x20 mm/kasan/generic_report.c:137 decode_data.part.0+0x23b/0x270 drivers/net/hamradio/6pack.c:843 decode_data drivers/net/hamradio/6pack.c:965 [inline] sixpack_decode drivers/net/hamradio/6pack.c:968 [inline] Reported-and-tested-by: syzbot+fc8cd9a673d4577fb2e4@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Pavel Skripkin Reviewed-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index fcf3af76b6d7..8fe8887d506a 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -827,6 +827,12 @@ static void decode_data(struct sixpack *sp, unsigned char inbyte) return; } + if (sp->rx_count_cooked + 2 >= sizeof(sp->cooked_buf)) { + pr_err("6pack: cooked buffer overrun, data loss\n"); + sp->rx_count = 0; + return; + } + buf = sp->raw_buf; sp->cooked_buf[sp->rx_count_cooked++] = buf[0] | ((buf[1] << 2) & 0xc0); -- cgit From c9107dd0b851777d7e134420baf13a5c5343bc16 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Sat, 7 Aug 2021 13:06:35 +0200 Subject: mmc: sdhci-iproc: Cap min clock frequency on BCM2711 There is a known bug on BCM2711's SDHCI core integration where the controller will hang when the difference between the core clock and the bus clock is too great. Specifically this can be reproduced under the following conditions: - No SD card plugged in, polling thread is running, probing cards at 100 kHz. - BCM2711's core clock configured at 500MHz or more. So set 200 kHz as the minimum clock frequency available for that board. For more information on the issue see this: https://lore.kernel.org/linux-mmc/20210322185816.27582-1-nsaenz@kernel.org/T/#m11f2783a09b581da6b8a15f302625b43a6ecdeca Fixes: f84e411c85be ("mmc: sdhci-iproc: Add support for emmc2 of the BCM2711") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Stefan Wahren Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1628334401-6577-5-git-send-email-stefan.wahren@i2se.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-iproc.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index cce390fe9cf3..032bf852397f 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -173,6 +173,23 @@ static unsigned int sdhci_iproc_get_max_clock(struct sdhci_host *host) return pltfm_host->clock; } +/* + * There is a known bug on BCM2711's SDHCI core integration where the + * controller will hang when the difference between the core clock and the bus + * clock is too great. Specifically this can be reproduced under the following + * conditions: + * + * - No SD card plugged in, polling thread is running, probing cards at + * 100 kHz. + * - BCM2711's core clock configured at 500MHz or more + * + * So we set 200kHz as the minimum clock frequency available for that SoC. + */ +static unsigned int sdhci_iproc_bcm2711_get_min_clock(struct sdhci_host *host) +{ + return 200000; +} + static const struct sdhci_ops sdhci_iproc_ops = { .set_clock = sdhci_set_clock, .get_max_clock = sdhci_iproc_get_max_clock, @@ -271,6 +288,7 @@ static const struct sdhci_ops sdhci_iproc_bcm2711_ops = { .set_clock = sdhci_set_clock, .set_power = sdhci_set_power_and_bus_voltage, .get_max_clock = sdhci_iproc_get_max_clock, + .get_min_clock = sdhci_iproc_bcm2711_get_min_clock, .set_bus_width = sdhci_set_bus_width, .reset = sdhci_reset, .set_uhs_signaling = sdhci_set_uhs_signaling, -- cgit From 55c8fca1dae1fb0d11deaa21b65a647dedb1bc50 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 13 Aug 2021 20:33:27 +0300 Subject: ptp_pch: Restore dependency on PCI During the swap dependency on PCH_GBE to selection PTP_1588_CLOCK_PCH incidentally dropped the implicit dependency on the PCI. Restore it. Fixes: 18d359ceb044 ("pch_gbe, ptp_pch: Fix the dependency direction between these drivers") Reported-by: kernel test robot Signed-off-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/ptp/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 8c20e524e9ad..e085c255da0c 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -90,7 +90,8 @@ config PTP_1588_CLOCK_INES config PTP_1588_CLOCK_PCH tristate "Intel PCH EG20T as PTP clock" depends on X86_32 || COMPILE_TEST - depends on HAS_IOMEM && NET + depends on HAS_IOMEM && PCI + depends on NET imply PTP_1588_CLOCK help This driver adds support for using the PCH EG20T as a PTP -- cgit From 419dd626e357e89fc9c4e3863592c8b38cfe1571 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Sat, 7 Aug 2021 13:06:36 +0200 Subject: mmc: sdhci-iproc: Set SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN on BCM2711 The controller doesn't seem to pick-up on clock changes, so set the SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN flag to query the clock frequency directly from the clock. Fixes: f84e411c85be ("mmc: sdhci-iproc: Add support for emmc2 of the BCM2711") Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Stefan Wahren Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/1628334401-6577-6-git-send-email-stefan.wahren@i2se.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-iproc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 032bf852397f..e7565c671998 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -295,7 +295,8 @@ static const struct sdhci_ops sdhci_iproc_bcm2711_ops = { }; static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = { - .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, + .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 | + SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, .ops = &sdhci_iproc_bcm2711_ops, }; -- cgit From 7387a72c5f84f0dfb57618f9e4770672c0d2e4c9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Aug 2021 03:13:36 -0400 Subject: tipc: call tipc_wait_for_connect only when dlen is not 0 __tipc_sendmsg() is called to send SYN packet by either tipc_sendmsg() or tipc_connect(). The difference is in tipc_connect(), it will call tipc_wait_for_connect() after __tipc_sendmsg() to wait until connecting is done. So there's no need to wait in __tipc_sendmsg() for this case. This patch is to fix it by calling tipc_wait_for_connect() only when dlen is not 0 in __tipc_sendmsg(), which means it's called by tipc_connect(). Note this also fixes the failure in tipcutils/test/ptts/: # ./tipcTS & # ./tipcTC 9 (hang) Fixes: 36239dab6da7 ("tipc: fix implicit-connect for SYN+") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 75b99b7eda22..8754bd885169 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1518,7 +1518,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); - if (timeout) { + if (dlen && timeout) { timeout = msecs_to_jiffies(timeout); tipc_wait_for_connect(sock, &timeout); } -- cgit From 37110237f31105d679fc0aa7b11cdec867750ea7 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Sun, 15 Aug 2021 14:05:08 +0300 Subject: qed: qed ll2 race condition fixes Avoiding qed ll2 race condition and NULL pointer dereference as part of the remove and recovery flows. Changes form V1: - Change (!p_rx->set_prod_addr). - qed_ll2.c checkpatch fixes. Change from V2: - Revert "qed_ll2.c checkpatch fixes". Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 02a4610d9330..c46a7f756ed5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -327,6 +327,9 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) unsigned long flags; int rc = -EINVAL; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_tx->lock, flags); if (p_tx->b_completing_packet) { rc = -EBUSY; @@ -500,7 +503,16 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) unsigned long flags = 0; int rc = 0; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_rx->lock, flags); + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) { + spin_unlock_irqrestore(&p_rx->lock, flags); + return 0; + } + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); @@ -821,6 +833,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) return 0; @@ -844,6 +859,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; @@ -1728,6 +1746,8 @@ int qed_ll2_post_rx_buffer(void *cxt, if (!p_ll2_conn) return -EINVAL; p_rx = &p_ll2_conn->rx_queue; + if (!p_rx->set_prod_addr) + return -EIO; spin_lock_irqsave(&p_rx->lock, flags); if (!list_empty(&p_rx->free_descq)) -- cgit From d33d19d313d3466abdf8b0428be7837aff767802 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Sun, 15 Aug 2021 14:06:39 +0300 Subject: qed: Fix null-pointer dereference in qed_rdma_create_qp() Fix a possible null-pointer dereference in qed_rdma_create_qp(). Changes from V2: - Revert checkpatch fixes. Reported-by: TOTE Robot Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index da864d12916b..4f4b79250a2b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1285,8 +1285,7 @@ qed_rdma_create_qp(void *rdma_cxt, if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info->active) { - DP_ERR(p_hwfn->cdev, - "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + pr_err("qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", rdma_cxt, in_params, out_params); return NULL; } -- cgit From 976e52b718c3de9077fff8f3f674afb159c57fb1 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 15 Aug 2021 16:15:36 -0400 Subject: bnxt_en: Disable aRFS if running on 212 firmware 212 firmware broke aRFS, so disable it. Traffic may stop after ntuple filters are inserted and deleted by the 212 firmware. Fixes: ae10ae740ad2 ("bnxt_en: Add new hardware RFS mode.") Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index dd2d2a5fef15..207ef7dc9bff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -10792,6 +10792,9 @@ static bool bnxt_rfs_supported(struct bnxt *bp) return true; return false; } + /* 212 firmware is broken for aRFS */ + if (BNXT_FW_MAJ(bp) == 212) + return false; if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) return true; if (bp->flags & BNXT_FLAG_NEW_RSS_CAP) -- cgit From 828affc27ed43441bd1efdaf4e07e96dd43a0362 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 15 Aug 2021 16:15:37 -0400 Subject: bnxt_en: Add missing DMA memory barriers Each completion ring entry has a valid bit to indicate that the entry contains a valid completion event. The driver's main poll loop __bnxt_poll_work() has the proper dma_rmb() to make sure the valid bit of the next entry has been checked before proceeding further. But when we call bnxt_rx_pkt() to process the RX event, the RX completion event consists of two completion entries and only the first entry has been checked to be valid. We need the same barrier after checking the next completion entry. Add missing dma_rmb() barriers in bnxt_rx_pkt() and other similar locations. Fixes: 67a95e2022c7 ("bnxt_en: Need memory barrier when processing the completion ring.") Reported-by: Lance Richardson Reviewed-by: Andy Gospodarek Reviewed-by: Lance Richardson Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 207ef7dc9bff..8a97640cdfe7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1788,6 +1788,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); prod = rxr->rx_prod; if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) { @@ -2010,6 +2014,10 @@ static int bnxt_force_rx_discard(struct bnxt *bp, if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); cmp_type = RX_CMP_TYPE(rxcmp); if (cmp_type == CMP_TYPE_RX_L2_CMP) { rxcmp1->rx_cmp_cfa_code_errors_v2 |= @@ -2475,6 +2483,10 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) if (!TX_CMP_VALID(txcmp, raw_cons)) break; + /* The valid test of the entry must be done first before + * reading any further. + */ + dma_rmb(); if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { tmp_raw_cons = NEXT_RAW_CMP(raw_cons); cp_cons = RING_CMP(tmp_raw_cons); -- cgit From 3f79f6f6247c83f448c8026c3ee16d4636ef8d4f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 6 Aug 2021 14:26:24 +1000 Subject: btrfs: prevent rename2 from exchanging a subvol with a directory from different parents Cross-rename lacks a check when that would prevent exchanging a directory and subvolume from different parent subvolume. This causes data inconsistencies and is caught before commit by tree-checker, turning the filesystem to read-only. Calling the renameat2 with RENAME_EXCHANGE flags like renameat2(AT_FDCWD, namesrc, AT_FDCWD, namedest, (1 << 1)) on two paths: namesrc = dir1/subvol1/dir2 namedest = subvol2/subvol3 will cause key order problem with following write time tree-checker report: [1194842.307890] BTRFS critical (device loop1): corrupt leaf: root=5 block=27574272 slot=10 ino=258, invalid previous key objectid, have 257 expect 258 [1194842.322221] BTRFS info (device loop1): leaf 27574272 gen 8 total ptrs 11 free space 15444 owner 5 [1194842.331562] BTRFS info (device loop1): refs 2 lock_owner 0 current 26561 [1194842.338772] item 0 key (256 1 0) itemoff 16123 itemsize 160 [1194842.338793] inode generation 3 size 16 mode 40755 [1194842.338801] item 1 key (256 12 256) itemoff 16111 itemsize 12 [1194842.338809] item 2 key (256 84 2248503653) itemoff 16077 itemsize 34 [1194842.338817] dir oid 258 type 2 [1194842.338823] item 3 key (256 84 2363071922) itemoff 16043 itemsize 34 [1194842.338830] dir oid 257 type 2 [1194842.338836] item 4 key (256 96 2) itemoff 16009 itemsize 34 [1194842.338843] item 5 key (256 96 3) itemoff 15975 itemsize 34 [1194842.338852] item 6 key (257 1 0) itemoff 15815 itemsize 160 [1194842.338863] inode generation 6 size 8 mode 40755 [1194842.338869] item 7 key (257 12 256) itemoff 15801 itemsize 14 [1194842.338876] item 8 key (257 84 2505409169) itemoff 15767 itemsize 34 [1194842.338883] dir oid 256 type 2 [1194842.338888] item 9 key (257 96 2) itemoff 15733 itemsize 34 [1194842.338895] item 10 key (258 12 256) itemoff 15719 itemsize 14 [1194842.339163] BTRFS error (device loop1): block=27574272 write time tree block corruption detected [1194842.339245] ------------[ cut here ]------------ [1194842.443422] WARNING: CPU: 6 PID: 26561 at fs/btrfs/disk-io.c:449 csum_one_extent_buffer+0xed/0x100 [btrfs] [1194842.511863] CPU: 6 PID: 26561 Comm: kworker/u17:2 Not tainted 5.14.0-rc3-git+ #793 [1194842.511870] Hardware name: empty empty/S3993, BIOS PAQEX0-3 02/24/2008 [1194842.511876] Workqueue: btrfs-worker-high btrfs_work_helper [btrfs] [1194842.511976] RIP: 0010:csum_one_extent_buffer+0xed/0x100 [btrfs] [1194842.512068] RSP: 0018:ffffa2c284d77da0 EFLAGS: 00010282 [1194842.512074] RAX: 0000000000000000 RBX: 0000000000001000 RCX: ffff928867bd9978 [1194842.512078] RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff928867bd9970 [1194842.512081] RBP: ffff92876b958000 R08: 0000000000000001 R09: 00000000000c0003 [1194842.512085] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [1194842.512088] R13: ffff92875f989f98 R14: 0000000000000000 R15: 0000000000000000 [1194842.512092] FS: 0000000000000000(0000) GS:ffff928867a00000(0000) knlGS:0000000000000000 [1194842.512095] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [1194842.512099] CR2: 000055f5384da1f0 CR3: 0000000102fe4000 CR4: 00000000000006e0 [1194842.512103] Call Trace: [1194842.512128] ? run_one_async_free+0x10/0x10 [btrfs] [1194842.631729] btree_csum_one_bio+0x1ac/0x1d0 [btrfs] [1194842.631837] run_one_async_start+0x18/0x30 [btrfs] [1194842.631938] btrfs_work_helper+0xd5/0x1d0 [btrfs] [1194842.647482] process_one_work+0x262/0x5e0 [1194842.647520] worker_thread+0x4c/0x320 [1194842.655935] ? process_one_work+0x5e0/0x5e0 [1194842.655946] kthread+0x135/0x160 [1194842.655953] ? set_kthread_struct+0x40/0x40 [1194842.655965] ret_from_fork+0x1f/0x30 [1194842.672465] irq event stamp: 1729 [1194842.672469] hardirqs last enabled at (1735): [] console_trylock_spinning+0x185/0x1a0 [1194842.672477] hardirqs last disabled at (1740): [] console_trylock_spinning+0x15c/0x1a0 [1194842.672482] softirqs last enabled at (1666): [] __do_softirq+0x2e1/0x50a [1194842.672491] softirqs last disabled at (1651): [] __irq_exit_rcu+0xa7/0xd0 The corrupted data will not be written, and filesystem can be unmounted and mounted again (all changes since the last commit will be lost). Add the missing check for new_ino so that all non-subvolumes must reside under the same parent subvolume. There's an exception allowing to exchange two subvolumes from any parents as the directory representing a subvolume is only a logical link and does not have any other structures related to the parent subvolume, unlike files, directories etc, that are always in the inode namespace of the parent subvolume. Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") CC: stable@vger.kernel.org # 4.7+ Reviewed-by: Nikolay Borisov Signed-off-by: NeilBrown Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0117d867ecf8..06f9f167222b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9226,8 +9226,14 @@ static int btrfs_rename_exchange(struct inode *old_dir, bool dest_log_pinned = false; bool need_abort = false; - /* we only allow rename subvolume link between subvolumes */ - if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest) + /* + * For non-subvolumes allow exchange only within one subvolume, in the + * same inode namespace. Two subvolumes (represented as directory) can + * be exchanged as they're a logical link and have a fixed inode number. + */ + if (root != dest && + (old_ino != BTRFS_FIRST_FREE_OBJECTID || + new_ino != BTRFS_FIRST_FREE_OBJECTID)) return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ -- cgit From 958f44255058338f4b370d8e4100e1e7d72db0cc Mon Sep 17 00:00:00 2001 From: Dan Moulding Date: Tue, 10 Aug 2021 13:59:06 -0600 Subject: drm: ttm: Don't bail from ttm_global_init if debugfs_create_dir fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In 69de4421bb4c ("drm/ttm: Initialize debugfs from ttm_global_init()"), ttm_global_init was changed so that if creation of the debugfs global root directory fails, ttm_global_init will bail out early and return an error, leading to initialization failure of DRM drivers. However, not every system will be using debugfs. On such a system, debugfs directory creation can be expected to fail, but DRM drivers must still be usable. This changes it so that if creation of TTM's debugfs root directory fails, then no biggie: keep calm and carry on. Fixes: 69de4421bb4c ("drm/ttm: Initialize debugfs from ttm_global_init()") Signed-off-by: Dan Moulding Tested-by: Huacai Chen Reviewed-by: Huang Rui Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210810195906.22220-2-dmoulding@me.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_device.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 74e3b460132b..2df59b3c2ea1 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -78,9 +78,7 @@ static int ttm_global_init(void) ttm_debugfs_root = debugfs_create_dir("ttm", NULL); if (IS_ERR(ttm_debugfs_root)) { - ret = PTR_ERR(ttm_debugfs_root); ttm_debugfs_root = NULL; - goto out; } /* Limit the number of pages in the pool to about 50% of the total -- cgit From 4f3f2e3fa0431b93745b110da1c365806c5acce3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Aug 2021 14:13:33 +0300 Subject: net: iosm: Prevent underflow in ipc_chnl_cfg_get() The bounds check on "index" doesn't catch negative values. Using ARRAY_SIZE() directly is more readable and more robust because it prevents negative values for "index". Fortunately we only pass valid values to ipc_chnl_cfg_get() so this patch does not affect runtime. Reported-by: Solomon Ucko Signed-off-by: Dan Carpenter Reviewed-by: M Chetan Kumar Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c index 804e6c4f2c78..519361ec40df 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c +++ b/drivers/net/wwan/iosm/iosm_ipc_chnl_cfg.c @@ -64,10 +64,9 @@ static struct ipc_chnl_cfg modem_cfg[] = { int ipc_chnl_cfg_get(struct ipc_chnl_cfg *chnl_cfg, int index) { - int array_size = ARRAY_SIZE(modem_cfg); - - if (index >= array_size) { - pr_err("index: %d and array_size %d", index, array_size); + if (index >= ARRAY_SIZE(modem_cfg)) { + pr_err("index: %d and array size %zu", index, + ARRAY_SIZE(modem_cfg)); return -ECHRNG; } -- cgit From 0f923e07124df069ba68d8bb12324398f4b6b709 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 15 Jul 2021 01:56:24 +0300 Subject: KVM: nSVM: avoid picking up unsupported bits from L2 in int_ctl (CVE-2021-3653) * Invert the mask of bits that we pick from L2 in nested_vmcb02_prepare_control * Invert and explicitly use VIRQ related bits bitmask in svm_clear_vintr This fixes a security issue that allowed a malicious L1 to run L2 with AVIC enabled, which allowed the L2 to exploit the uninitialized and enabled AVIC to read/write the host physical memory at some offsets. Fixes: 3d6368ef580a ("KVM: SVM: Add VMRUN handler") Signed-off-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/svm.h | 2 ++ arch/x86/kvm/svm/nested.c | 10 +++++++--- arch/x86/kvm/svm/svm.c | 9 +++++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index e322676039f4..b00dbc5fac2b 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -184,6 +184,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define V_IGN_TPR_SHIFT 20 #define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) +#define V_IRQ_INJECTION_BITS_MASK (V_IRQ_MASK | V_INTR_PRIO_MASK | V_IGN_TPR_MASK) + #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 61738ff8ef33..28381ca5221c 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -503,7 +503,11 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) { - const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + const u32 int_ctl_vmcb01_bits = + V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK; + + const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; + struct kvm_vcpu *vcpu = &svm->vcpu; /* @@ -535,8 +539,8 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; svm->vmcb->control.int_ctl = - (svm->nested.ctl.int_ctl & ~mask) | - (svm->vmcb01.ptr->control.int_ctl & mask); + (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | + (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits); svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e8ccab50ebf6..69639f9624f5 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1589,17 +1589,18 @@ static void svm_set_vintr(struct vcpu_svm *svm) static void svm_clear_vintr(struct vcpu_svm *svm) { - const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; svm_clr_intercept(svm, INTERCEPT_VINTR); /* Drop int_ctl fields related to VINTR injection. */ - svm->vmcb->control.int_ctl &= mask; + svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; if (is_guest_mode(&svm->vcpu)) { - svm->vmcb01.ptr->control.int_ctl &= mask; + svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK; WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != (svm->nested.ctl.int_ctl & V_TPR_MASK)); - svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; + + svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & + V_IRQ_INJECTION_BITS_MASK; } vmcb_mark_dirty(svm->vmcb, VMCB_INTR); -- cgit From c7dfa4009965a9b2d7b329ee970eb8da0d32f0bc Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 19 Jul 2021 16:05:00 +0300 Subject: KVM: nSVM: always intercept VMLOAD/VMSAVE when nested (CVE-2021-3656) If L1 disables VMLOAD/VMSAVE intercepts, and doesn't enable Virtual VMLOAD/VMSAVE (currently not supported for the nested hypervisor), then VMLOAD/VMSAVE must operate on the L1 physical memory, which is only possible by making L0 intercept these instructions. Failure to do so allowed the nested guest to run VMLOAD/VMSAVE unintercepted, and thus read/write portions of the host physical memory. Fixes: 89c8a4984fc9 ("KVM: SVM: Enable Virtual VMLOAD VMSAVE feature") Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 28381ca5221c..e5515477c30a 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -158,6 +158,9 @@ void recalc_intercepts(struct vcpu_svm *svm) /* If SMI is not intercepted, ignore guest SMI intercept as well */ if (!intercept_smi) vmcb_clr_intercept(c, INTERCEPT_SMI); + + vmcb_set_intercept(c, INTERCEPT_VMLOAD); + vmcb_set_intercept(c, INTERCEPT_VMSAVE); } static void copy_vmcb_control_area(struct vmcb_control_area *dst, -- cgit From 6c34df6f350df9579ce99d887a2b5fa14cc13b32 Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Sat, 14 Aug 2021 11:45:38 +0800 Subject: tracing: Apply trace filters on all output channels The event filters are not applied on all of the output, which results in the flood of printk when using tp_printk. Unfolding event_trigger_unlock_commit_regs() into trace_event_buffer_commit(), so the filters can be applied on every output. Link: https://lkml.kernel.org/r/20210814034538.8428-1-kernelfans@gmail.com Cc: stable@vger.kernel.org Fixes: 0daa2302968c1 ("tracing: Add tp_printk cmdline to have tracepoints go to printk()") Signed-off-by: Pingfan Liu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 18 +++++++++++++++--- kernel/trace/trace.h | 32 -------------------------------- 2 files changed, 15 insertions(+), 35 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 33899a71fdc1..a1adb29ef5c1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2897,14 +2897,26 @@ int tracepoint_printk_sysctl(struct ctl_table *table, int write, void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) { + enum event_trigger_type tt = ETT_NONE; + struct trace_event_file *file = fbuffer->trace_file; + + if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event, + fbuffer->entry, &tt)) + goto discard; + if (static_key_false(&tracepoint_printk_key.key)) output_printk(fbuffer); if (static_branch_unlikely(&trace_event_exports_enabled)) ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT); - event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer, - fbuffer->event, fbuffer->entry, - fbuffer->trace_ctx, fbuffer->regs); + + trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer, + fbuffer->event, fbuffer->trace_ctx, fbuffer->regs); + +discard: + if (tt) + event_triggers_post_call(file, tt); + } EXPORT_SYMBOL_GPL(trace_event_buffer_commit); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a180abf76d4e..4a0e693000c6 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1389,38 +1389,6 @@ event_trigger_unlock_commit(struct trace_event_file *file, event_triggers_post_call(file, tt); } -/** - * event_trigger_unlock_commit_regs - handle triggers and finish event commit - * @file: The file pointer associated with the event - * @buffer: The ring buffer that the event is being written to - * @event: The event meta data in the ring buffer - * @entry: The event itself - * @trace_ctx: The tracing context flags. - * - * This is a helper function to handle triggers that require data - * from the event itself. It also tests the event against filters and - * if the event is soft disabled and should be discarded. - * - * Same as event_trigger_unlock_commit() but calls - * trace_buffer_unlock_commit_regs() instead of trace_buffer_unlock_commit(). - */ -static inline void -event_trigger_unlock_commit_regs(struct trace_event_file *file, - struct trace_buffer *buffer, - struct ring_buffer_event *event, - void *entry, unsigned int trace_ctx, - struct pt_regs *regs) -{ - enum event_trigger_type tt = ETT_NONE; - - if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit_regs(file->tr, buffer, event, - trace_ctx, regs); - - if (tt) - event_triggers_post_call(file, tt); -} - #define FILTER_PRED_INVALID ((unsigned short)-1) #define FILTER_PRED_IS_RIGHT (1 << 15) #define FILTER_PRED_FOLD (1 << 15) -- cgit From 2bbfa0addd63fd06756b7af8bf146ae166e2abf5 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Fri, 6 Aug 2021 08:46:24 +0800 Subject: ACPI: PRM: Deal with table not present or no module found On the system PRMT table is not present, dmesg output: $ dmesg | grep PRM [ 1.532237] ACPI: PRMT not present [ 1.532237] PRM: found 4294967277 modules The result of acpi_table_parse_entries need to be checked and return immediately if PRMT table is not present or no PRM module found. Signed-off-by: Aubrey Li Signed-off-by: Rafael J. Wysocki --- drivers/acpi/prmt.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 31cf9aee5edd..1f6007abcf18 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -292,6 +292,12 @@ void __init init_prmt(void) int mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) + sizeof (struct acpi_table_prmt_header), 0, acpi_parse_prmt, 0); + /* + * Return immediately if PRMT table is not present or no PRM module found. + */ + if (mc <= 0) + return; + pr_info("PRM: found %u modules\n", mc); status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, -- cgit From 4753b46e16073c3100551a61024989d50f5e4874 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 9 Aug 2021 20:40:04 -0500 Subject: ACPI: PM: s2idle: Invert Microsoft UUID entry and exit It was reported by a user with a Dell m15 R5 (5800H) that the keyboard backlight was turning on when entering suspend and turning off when exiting (the opposite of how it should be). The user bisected it back to commit 5dbf50997578 ("ACPI: PM: s2idle: Add support for new Microsoft UUID"). Previous to that commit the LEDs didn't turn off at all. Confirming in the spec, these were reversed when introduced. Fix them to match the spec. BugLink: https://gitlab.freedesktop.org/drm/amd/-/issues/1230#note_1021836 Fixes: 5dbf50997578 ("ACPI: PM: s2idle: Add support for new Microsoft UUID") Signed-off-by: Mario Limonciello Signed-off-by: Rafael J. Wysocki --- drivers/acpi/x86/s2idle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index fbdbef0ab552..3a308461246a 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -452,7 +452,7 @@ int acpi_s2idle_prepare_late(void) if (lps0_dsm_func_mask_microsoft > 0) { acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_OFF, lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT, + acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY, lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); acpi_sleep_run_lps0_dsm(ACPI_LPS0_ENTRY, lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); @@ -479,7 +479,7 @@ void acpi_s2idle_restore_early(void) if (lps0_dsm_func_mask_microsoft > 0) { acpi_sleep_run_lps0_dsm(ACPI_LPS0_EXIT, lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); - acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_ENTRY, + acpi_sleep_run_lps0_dsm(ACPI_LPS0_MS_EXIT, lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); acpi_sleep_run_lps0_dsm(ACPI_LPS0_SCREEN_ON, lps0_dsm_func_mask_microsoft, lps0_dsm_guid_microsoft); -- cgit From 2fd31689f9e44af949f60ff4f8aca013e628ab81 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 13 Aug 2021 14:20:33 +0800 Subject: Revert "drm/amd/pm: fix workload mismatch on vega10" This reverts commit 0979d43259e13846d86ba17e451e17fec185d240. Revert this because it does not apply to all the cards. Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 25979106fd25..f519345a5a24 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5164,7 +5164,7 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui out: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, - (!power_profile_mode) ? 0 : 1 << (power_profile_mode - 1), + 1 << power_profile_mode, NULL); hwmgr->power_profile_mode = power_profile_mode; -- cgit From 93c5701b00d50d192ce2247cb10d6c0b3fe25cd8 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 13 Aug 2021 14:40:18 +0800 Subject: drm/amd/pm: change the workload type for some cards change the workload type for some cards as it is needed. Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index f519345a5a24..02e8c6e5448d 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5127,6 +5127,13 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) return size; } +static bool vega10_get_power_profile_mode_quirks(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + + return (adev->pdev->device == 0x6860); +} + static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size) { struct vega10_hwmgr *data = hwmgr->backend; @@ -5163,9 +5170,15 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui } out: - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + if (vega10_get_power_profile_mode_quirks(hwmgr)) + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, 1 << power_profile_mode, NULL); + else + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + (!power_profile_mode) ? 0 : 1 << (power_profile_mode - 1), + NULL); + hwmgr->power_profile_mode = power_profile_mode; return 0; -- cgit From f924f3a1f0c70e27a5a39ebc9e294dd8489f6927 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 10 Aug 2021 11:01:05 +0800 Subject: drm/amdkfd: fix random KFDSVMRangeTest.SetGetAttributesTest test failure KFDSVMRangeTest.SetGetAttributesTest randomly fails in stress test. Note: Google Test filter = KFDSVMRangeTest.* [==========] Running 18 tests from 1 test case. [----------] Global test environment set-up. [----------] 18 tests from KFDSVMRangeTest [ RUN ] KFDSVMRangeTest.BasicSystemMemTest [ OK ] KFDSVMRangeTest.BasicSystemMemTest (30 ms) [ RUN ] KFDSVMRangeTest.SetGetAttributesTest [ ] Get default atrributes /home/yifan/brahma/libhsakmt/tests/kfdtest/src/KFDSVMRangeTest.cpp:154: Failure Value of: expectedDefaultResults[i] Actual: 4294967295 Expected: outputAttributes[i].value Which is: 0 /home/yifan/brahma/libhsakmt/tests/kfdtest/src/KFDSVMRangeTest.cpp:154: Failure Value of: expectedDefaultResults[i] Actual: 4294967295 Expected: outputAttributes[i].value Which is: 0 /home/yifan/brahma/libhsakmt/tests/kfdtest/src/KFDSVMRangeTest.cpp:152: Failure Value of: expectedDefaultResults[i] Actual: 4 Expected: outputAttributes[i].type Which is: 2 [ ] Setting/Getting atrributes [ FAILED ] the root cause is that svm work queue has not finished when svm_range_get_attr is called, thus some garbage svm interval tree data make svm_range_get_attr get wrong result. Flush work queue before iterate svm interval tree. Signed-off-by: Yifan Zhang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c7b364e4a287..e883731c3f8f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -3026,6 +3026,14 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start, start + size - 1, nattr); + /* Flush pending deferred work to avoid racing with deferred actions from + * previous memory map changes (e.g. munmap). Concurrent memory map changes + * can still race with get_attr because we don't hold the mmap lock. But that + * would be a race condition in the application anyway, and undefined + * behaviour is acceptable in that case. + */ + flush_work(&p->svms.deferred_list_work); + mmap_read_lock(mm); if (!svm_range_is_valid(mm, start, size)) { pr_debug("invalid range\n"); -- cgit From 71ae580f31f27cdacbabf0b798b27fa0bffeb9a1 Mon Sep 17 00:00:00 2001 From: Jake Wang Date: Thu, 5 Aug 2021 14:43:41 -0400 Subject: drm/amd/display: Ensure DCN save after VM setup [Why] DM initializes VM context after DMCUB initialization. This results in loss of DCN_VM_CONTEXT registers after z10. [How] Notify DMCUB when VM setup is complete, and have DMCUB save init registers. v2: squash in CONFIG_DRM_AMD_DC_DCN3_1 fix Reviewed-by: Nicholas Kazlauskas Acked-by: Wayne Lin Signed-off-by: Jake Wang Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++++ drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c | 3 +++ drivers/gpu/drm/amd/display/dc/dc.h | 1 + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c | 12 ++++++++++++ drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h | 1 + drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c | 1 + drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h | 1 + drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 5 +++++ 8 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 605e297b7a59..a30283fa5173 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1530,6 +1530,12 @@ void dc_z10_restore(struct dc *dc) if (dc->hwss.z10_restore) dc->hwss.z10_restore(dc); } + +void dc_z10_save_init(struct dc *dc) +{ + if (dc->hwss.z10_save_init) + dc->hwss.z10_save_init(dc); +} #endif /* * Applies given context to HW and copy it into current context. diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c index f2b39ec35c89..cde8ed2560b3 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_vm_helper.c @@ -47,6 +47,9 @@ int dc_setup_system_context(struct dc *dc, struct dc_phy_addr_space_config *pa_c */ memcpy(&dc->vm_pa_config, pa_config, sizeof(struct dc_phy_addr_space_config)); dc->vm_pa_config.valid = true; +#if defined(CONFIG_DRM_AMD_DC_DCN) + dc_z10_save_init(dc); +#endif } return num_vmids; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index af7b60108e9d..21d78289b048 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -1338,6 +1338,7 @@ void dc_hardware_release(struct dc *dc); bool dc_set_psr_allow_active(struct dc *dc, bool enable); #if defined(CONFIG_DRM_AMD_DC_DCN) void dc_z10_restore(struct dc *dc); +void dc_z10_save_init(struct dc *dc); #endif bool dc_enable_dmub_notifications(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 6ac6faf0c533..8a2119d8ca0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -404,6 +404,18 @@ void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx) &pipe_ctx->stream_res.encoder_info_frame); } } +void dcn31_z10_save_init(struct dc *dc) +{ + union dmub_rb_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.dcn_restore.header.type = DMUB_CMD__IDLE_OPT; + cmd.dcn_restore.header.sub_type = DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT; + + dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd); + dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv); + dc_dmub_srv_wait_idle(dc->ctx->dmub_srv); +} void dcn31_z10_restore(struct dc *dc) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h index 40dfebe78fdd..140435e4f7ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h @@ -44,6 +44,7 @@ void dcn31_enable_power_gating_plane( void dcn31_update_info_frame(struct pipe_ctx *pipe_ctx); void dcn31_z10_restore(struct dc *dc); +void dcn31_z10_save_init(struct dc *dc); void dcn31_hubp_pg_control(struct dce_hwseq *hws, unsigned int hubp_inst, bool power_on); int dcn31_init_sys_ctx(struct dce_hwseq *hws, struct dc *dc, struct dc_phy_addr_space_config *pa_config); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index aaf2dbd095fe..b30d923471cb 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -97,6 +97,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .z10_restore = dcn31_z10_restore, + .z10_save_init = dcn31_z10_save_init, .is_abm_supported = dcn31_is_abm_supported, .set_disp_pattern_generator = dcn30_set_disp_pattern_generator, .update_visual_confirm_color = dcn20_update_visual_confirm_color, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 5ab008e62b82..ad5f2adcc40d 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -237,6 +237,7 @@ struct hw_sequencer_funcs { int width, int height, int offset); void (*z10_restore)(struct dc *dc); + void (*z10_save_init)(struct dc *dc); void (*update_visual_confirm_color)(struct dc *dc, struct pipe_ctx *pipe_ctx, diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 7c4734f905d9..7fafb8d6c1da 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -856,6 +856,11 @@ enum dmub_cmd_idle_opt_type { * DCN hardware restore. */ DMUB_CMD__IDLE_OPT_DCN_RESTORE = 0, + + /** + * DCN hardware save. + */ + DMUB_CMD__IDLE_OPT_DCN_SAVE_INIT = 1 }; /** -- cgit From 09e856d54bda5f288ef8437a90ab2b9b3eab83d1 Mon Sep 17 00:00:00 2001 From: Lahav Schlesinger Date: Sun, 15 Aug 2021 12:00:02 +0000 Subject: vrf: Reset skb conntrack connection on VRF rcv To fix the "reverse-NAT" for replies. When a packet is sent over a VRF, the POST_ROUTING hooks are called twice: Once from the VRF interface, and once from the "actual" interface the packet will be sent from: 1) First SNAT: l3mdev_l3_out() -> vrf_l3_out() -> .. -> vrf_output_direct() This causes the POST_ROUTING hooks to run. 2) Second SNAT: 'ip_output()' calls POST_ROUTING hooks again. Similarly for replies, first ip_rcv() calls PRE_ROUTING hooks, and second vrf_l3_rcv() calls them again. As an example, consider the following SNAT rule: > iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j SNAT --to-source 2.2.2.2 -o vrf_1 In this case sending over a VRF will create 2 conntrack entries. The first is from the VRF interface, which performs the IP SNAT. The second will run the SNAT, but since the "expected reply" will remain the same, conntrack randomizes the source port of the packet: e..g With a socket bound to 1.1.1.1:10000, sending to 3.3.3.3:53, the conntrack rules are: udp 17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=0 bytes=0 mark=0 use=1 udp 17 29 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1 i.e. First SNAT IP from 1.1.1.1 --> 2.2.2.2, and second the src port is SNAT-ed from 10000 --> 61033. But when a reply is sent (3.3.3.3:53 -> 2.2.2.2:61033) only the later conntrack entry is matched: udp 17 29 src=2.2.2.2 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 src=3.3.3.3 dst=2.2.2.2 sport=53 dport=61033 packets=1 bytes=49 mark=0 use=1 udp 17 28 src=1.1.1.1 dst=3.3.3.3 sport=10000 dport=53 packets=1 bytes=68 [UNREPLIED] src=3.3.3.3 dst=2.2.2.2 sport=53 dport=10000 packets=0 bytes=0 mark=0 use=1 And a "port 61033 unreachable" ICMP packet is sent back. The issue is that when PRE_ROUTING hooks are called from vrf_l3_rcv(), the skb already has a conntrack flow attached to it, which means nf_conntrack_in() will not resolve the flow again. This means only the dest port is "reverse-NATed" (61033 -> 10000) but the dest IP remains 2.2.2.2, and since the socket is bound to 1.1.1.1 it's not received. This can be verified by logging the 4-tuple of the packet in '__udp4_lib_rcv()'. The fix is then to reset the flow when skb is received on a VRF, to let conntrack resolve the flow again (which now will hit the earlier flow). To reproduce: (Without the fix "Got pkt_to_nat_port" will not be printed by running 'bash ./repro'): $ cat run_in_A1.py import logging logging.getLogger("scapy.runtime").setLevel(logging.ERROR) from scapy.all import * import argparse def get_packet_to_send(udp_dst_port, msg_name): return Ether(src='11:22:33:44:55:66', dst=iface_mac)/ \ IP(src='3.3.3.3', dst='2.2.2.2')/ \ UDP(sport=53, dport=udp_dst_port)/ \ Raw(f'{msg_name}\x0012345678901234567890') parser = argparse.ArgumentParser() parser.add_argument('-iface_mac', dest="iface_mac", type=str, required=True, help="From run_in_A3.py") parser.add_argument('-socket_port', dest="socket_port", type=str, required=True, help="From run_in_A3.py") parser.add_argument('-v1_mac', dest="v1_mac", type=str, required=True, help="From script") args, _ = parser.parse_known_args() iface_mac = args.iface_mac socket_port = int(args.socket_port) v1_mac = args.v1_mac print(f'Source port before NAT: {socket_port}') while True: pkts = sniff(iface='_v0', store=True, count=1, timeout=10) if 0 == len(pkts): print('Something failed, rerun the script :(', flush=True) break pkt = pkts[0] if not pkt.haslayer('UDP'): continue pkt_sport = pkt.getlayer('UDP').sport print(f'Source port after NAT: {pkt_sport}', flush=True) pkt_to_send = get_packet_to_send(pkt_sport, 'pkt_to_nat_port') sendp(pkt_to_send, '_v0', verbose=False) # Will not be received pkt_to_send = get_packet_to_send(socket_port, 'pkt_to_socket_port') sendp(pkt_to_send, '_v0', verbose=False) break $ cat run_in_A2.py import socket import netifaces print(f"{netifaces.ifaddresses('e00000')[netifaces.AF_LINK][0]['addr']}", flush=True) s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_BINDTODEVICE, str('vrf_1' + '\0').encode('utf-8')) s.connect(('3.3.3.3', 53)) print(f'{s. getsockname()[1]}', flush=True) s.settimeout(5) while True: try: # Periodically send in order to keep the conntrack entry alive. s.send(b'a'*40) resp = s.recvfrom(1024) msg_name = resp[0].decode('utf-8').split('\0')[0] print(f"Got {msg_name}", flush=True) except Exception as e: pass $ cat repro.sh ip netns del A1 2> /dev/null ip netns del A2 2> /dev/null ip netns add A1 ip netns add A2 ip -n A1 link add _v0 type veth peer name _v1 netns A2 ip -n A1 link set _v0 up ip -n A2 link add e00000 type bond ip -n A2 link add lo0 type dummy ip -n A2 link add vrf_1 type vrf table 10001 ip -n A2 link set vrf_1 up ip -n A2 link set e00000 master vrf_1 ip -n A2 addr add 1.1.1.1/24 dev e00000 ip -n A2 link set e00000 up ip -n A2 link set _v1 master e00000 ip -n A2 link set _v1 up ip -n A2 link set lo0 up ip -n A2 addr add 2.2.2.2/32 dev lo0 ip -n A2 neigh add 1.1.1.10 lladdr 77:77:77:77:77:77 dev e00000 ip -n A2 route add 3.3.3.3/32 via 1.1.1.10 dev e00000 table 10001 ip netns exec A2 iptables -t nat -A POSTROUTING -p udp -m udp --dport 53 -j \ SNAT --to-source 2.2.2.2 -o vrf_1 sleep 5 ip netns exec A2 python3 run_in_A2.py > x & XPID=$! sleep 5 IFACE_MAC=`sed -n 1p x` SOCKET_PORT=`sed -n 2p x` V1_MAC=`ip -n A2 link show _v1 | sed -n 2p | awk '{print $2'}` ip netns exec A1 python3 run_in_A1.py -iface_mac ${IFACE_MAC} -socket_port \ ${SOCKET_PORT} -v1_mac ${SOCKET_PORT} sleep 5 kill -9 $XPID wait $XPID 2> /dev/null ip netns del A1 ip netns del A2 tail x -n 2 rm x set +x Fixes: 73e20b761acf ("net: vrf: Add support for PREROUTING rules on vrf device") Signed-off-by: Lahav Schlesinger Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20210815120002.2787653-1-lschlesinger@drivenets.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 2b1b944d4b28..8bbe2a7bb141 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1367,6 +1367,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); + nf_reset_ct(skb); + /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For strict packets with a source LLA, determine the dst using the @@ -1429,6 +1431,8 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; + nf_reset_ct(skb); + if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; -- cgit From 0165c4e19f6ec76b535de090e4bd145c73810c51 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 16 Aug 2021 20:42:59 +0300 Subject: ALSA: hda: Fix hang during shutdown due to link reset During system shutdown codecs may be still active, and resetting the controller->codec HW link in this state - based on the bug reporter's tests - leads to the shutdown sequence to get stuck. This happens at least on the reporter's KBL system with an ALC662 codec. For now fix the issue by skipping the link reset step. Fixes: 472e18f63c42 ("ALSA: hda: Release controller display power during shutdown/reboot") References: https://bugzilla.kernel.org/show_bug.cgi?id=214045 References: https://gitlab.freedesktop.org/drm/intel/-/issues/3618#note_1024665 Reported-and-tested-by: youling257@gmail.com Cc: youling257@gmail.com Signed-off-by: Imre Deak Link: https://lore.kernel.org/r/20210816174259.2759103-1-imre.deak@intel.com Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 0322b289505e..0062c18b646a 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -883,10 +883,11 @@ static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev) return azx_get_pos_posbuf(chip, azx_dev); } -static void azx_shutdown_chip(struct azx *chip) +static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset) { azx_stop_chip(chip); - azx_enter_link_reset(chip); + if (!skip_link_reset) + azx_enter_link_reset(chip); azx_clear_irq_pending(chip); display_power(chip, false); } @@ -895,6 +896,11 @@ static void azx_shutdown_chip(struct azx *chip) static DEFINE_MUTEX(card_list_lock); static LIST_HEAD(card_list); +static void azx_shutdown_chip(struct azx *chip) +{ + __azx_shutdown_chip(chip, false); +} + static void azx_add_card_list(struct azx *chip) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); @@ -2385,7 +2391,7 @@ static void azx_shutdown(struct pci_dev *pci) return; chip = card->private_data; if (chip && chip->running) - azx_shutdown_chip(chip); + __azx_shutdown_chip(chip, true); } /* PCI IDs */ -- cgit From 4bf61ad5f0204b67ba570da6e5c052c2095e29df Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 17 Aug 2021 07:24:32 +0200 Subject: ALSA: hda/via: Apply runtime PM workaround for ASUS B23E ASUS B23E requires the same workaround like other machines with VT1802, otherwise it looses the codec power on a few nodes and the sound kept silence. Fixes: a0645daf1610 ("ALSA: HDA: Early Forbid of runtime PM") Link: https://lore.kernel.org/r/ac2232f142efcd67fe6ac38897f704f7176bd200.camel@gmail.com Cc: Link: https://lore.kernel.org/r/20210817052432.14751-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_via.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index a5c1a2c4eae4..773a136161f1 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1041,6 +1041,7 @@ static const struct hda_fixup via_fixups[] = { }; static const struct snd_pci_quirk vt2002p_fixups[] = { + SND_PCI_QUIRK(0x1043, 0x13f7, "Asus B23E", VIA_FIXUP_POWER_SAVE), SND_PCI_QUIRK(0x1043, 0x1487, "Asus G75", VIA_FIXUP_ASUS_G75), SND_PCI_QUIRK(0x1043, 0x8532, "Asus X202E", VIA_FIXUP_INTMIC_BOOST), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo", VIA_FIXUP_POWER_SAVE), -- cgit From dbcf24d153884439dad30484a0e3f02350692e4c Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 17 Aug 2021 16:06:59 +0800 Subject: virtio-net: use NETIF_F_GRO_HW instead of NETIF_F_LRO Commit a02e8964eaf92 ("virtio-net: ethtool configurable LRO") maps LRO to virtio guest offloading features and allows the administrator to enable and disable those features via ethtool. This leads to several issues: - For a device that doesn't support control guest offloads, the "LRO" can't be disabled triggering WARN in dev_disable_lro() when turning off LRO or when enabling forwarding bridging etc. - For a device that supports control guest offloads, the guest offloads are disabled in cases of bridging, forwarding etc slowing down the traffic. Fix this by using NETIF_F_GRO_HW instead. Though the spec does not guarantee packets to be re-segmented as the original ones, we can add that to the spec, possibly with a flag for devices to differentiate between GRO and LRO. Further, we never advertised LRO historically before a02e8964eaf92 ("virtio-net: ethtool configurable LRO") and so bridged/forwarded configs effectively always relied on virtio receive offloads behaving like GRO - thus even if this breaks any configs it is at least not a regression. Fixes: a02e8964eaf92 ("virtio-net: ethtool configurable LRO") Acked-by: Michael S. Tsirkin Reported-by: Ivan Tested-by: Ivan Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56c3f8519093..eee493685aad 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,7 +63,7 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; -#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ +#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_NET_F_GUEST_UFO)) @@ -2515,7 +2515,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_CSUM))) { - NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO/CSUM, disable LRO/CSUM first"); + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing GRO_HW/CSUM, disable GRO_HW/CSUM first"); return -EOPNOTSUPP; } @@ -2646,15 +2646,15 @@ static int virtnet_set_features(struct net_device *dev, u64 offloads; int err; - if ((dev->features ^ features) & NETIF_F_LRO) { + if ((dev->features ^ features) & NETIF_F_GRO_HW) { if (vi->xdp_enabled) return -EBUSY; - if (features & NETIF_F_LRO) + if (features & NETIF_F_GRO_HW) offloads = vi->guest_offloads_capable; else offloads = vi->guest_offloads_capable & - ~GUEST_OFFLOAD_LRO_MASK; + ~GUEST_OFFLOAD_GRO_HW_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) @@ -3134,9 +3134,9 @@ static int virtnet_probe(struct virtio_device *vdev) dev->features |= NETIF_F_RXCSUM; if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) - dev->features |= NETIF_F_LRO; + dev->features |= NETIF_F_GRO_HW; if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) - dev->hw_features |= NETIF_F_LRO; + dev->hw_features |= NETIF_F_GRO_HW; dev->vlan_features = dev->features; -- cgit From 276e189f8e4e3cce1634d6bac4ed0d9ca242441b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 17 Aug 2021 12:12:22 +0200 Subject: mac80211: fix locking in ieee80211_restart_work() Ilan's change to move locking around accidentally lost the wiphy_lock() during some porting, add it back. Fixes: 45daaa131841 ("mac80211: Properly WARN on HW scan before restart") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20210817121210.47bdb177064f.Ib1ef79440cd27f318c028ddfc0c642406917f512@changeid Signed-off-by: Jakub Kicinski --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 05f4c3c72619..fcae76ddd586 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -260,6 +260,8 @@ static void ieee80211_restart_work(struct work_struct *work) flush_work(&local->radar_detected_work); rtnl_lock(); + /* we might do interface manipulations, so need both */ + wiphy_lock(local->hw.wiphy); WARN(test_bit(SCAN_HW_SCANNING, &local->scanning), "%s called with hardware scan in progress\n", __func__); -- cgit From c797b40ccc340b8a66f7a7842aecc90bf749f087 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Aug 2021 23:52:02 +0800 Subject: blk-mq: don't grab rq's refcount in blk_mq_check_expired() Inside blk_mq_queue_tag_busy_iter() we already grabbed request's refcount before calling ->fn(), so needn't to grab it one more time in blk_mq_check_expired(). Meantime remove extra request expire check in blk_mq_check_expired(). Cc: Keith Busch Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20210811155202.629575-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2fe396385a4a..914c71ecc72a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -923,34 +923,14 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, unsigned long *next = priv; /* - * Just do a quick check if it is expired before locking the request in - * so we're not unnecessarilly synchronizing across CPUs. - */ - if (!blk_mq_req_expired(rq, next)) - return true; - - /* - * We have reason to believe the request may be expired. Take a - * reference on the request to lock this request lifetime into its - * currently allocated context to prevent it from being reallocated in - * the event the completion by-passes this timeout handler. - * - * If the reference was already released, then the driver beat the - * timeout handler to posting a natural completion. - */ - if (!refcount_inc_not_zero(&rq->ref)) - return true; - - /* - * The request is now locked and cannot be reallocated underneath the - * timeout handler's processing. Re-verify this exact request is truly - * expired; if it is not expired, then the request was completed and - * reallocated as a new request. + * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot + * be reallocated underneath the timeout handler's processing, then + * the expire check is reliable. If the request is not expired, then + * it was completed and reallocated as a new request after returning + * from blk_mq_check_expired(). */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); - - blk_mq_put_rq_ref(rq); return true; } -- cgit From c2da19ed50554ce52ecbad3655c98371fe58599f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Aug 2021 22:26:24 +0800 Subject: blk-mq: fix kernel panic during iterating over flush request For fixing use-after-free during iterating over requests, we grabbed request's refcount before calling ->fn in commit 2e315dc07df0 ("blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter"). Turns out this way may cause kernel panic when iterating over one flush request: 1) old flush request's tag is just released, and this tag is reused by one new request, but ->rqs[] isn't updated yet 2) the flush request can be re-used for submitting one new flush command, so blk_rq_init() is called at the same time 3) meantime blk_mq_queue_tag_busy_iter() is called, and old flush request is retrieved from ->rqs[tag]; when blk_mq_put_rq_ref() is called, flush_rq->end_io may not be updated yet, so NULL pointer dereference is triggered in blk_mq_put_rq_ref(). Fix the issue by calling refcount_set(&flush_rq->ref, 1) after flush_rq->end_io is set. So far the only other caller of blk_rq_init() is scsi_ioctl_reset() in which the request doesn't enter block IO stack and the request reference count isn't used, so the change is safe. Fixes: 2e315dc07df0 ("blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter") Reported-by: "Blank-Burian, Markus, Dr." Tested-by: "Blank-Burian, Markus, Dr." Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20210811142624.618598-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-flush.c | 8 ++++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 04477697ee4b..4f8449b29b21 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -122,7 +122,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) rq->internal_tag = BLK_MQ_NO_TAG; rq->start_time_ns = ktime_get_ns(); rq->part = NULL; - refcount_set(&rq->ref, 1); blk_crypto_rq_set_defaults(rq); } EXPORT_SYMBOL(blk_rq_init); diff --git a/block/blk-flush.c b/block/blk-flush.c index 1002f6c58181..4912c8dbb1d8 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -329,6 +329,14 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->rq_flags |= RQF_FLUSH_SEQ; flush_rq->rq_disk = first_rq->rq_disk; flush_rq->end_io = flush_end_io; + /* + * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one + * implied in refcount_inc_not_zero() called from + * blk_mq_find_and_get_req(), which orders WRITE/READ flush_rq->ref + * and READ flush_rq->end_io + */ + smp_wmb(); + refcount_set(&flush_rq->ref, 1); blk_flush_queue_rq(flush_rq, false); } -- cgit From 0a298d133893c72c96e2156ed7cb0f0c4a306a3e Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 16 Aug 2021 21:14:04 +0800 Subject: net: qlcnic: add missed unlock in qlcnic_83xx_flash_read32 qlcnic_83xx_unlock_flash() is called on all paths after we call qlcnic_83xx_lock_flash(), except for one error path on failure of QLCRD32(), which may cause a deadlock. This bug is suggested by a static analysis tool, please advise. Fixes: 81d0aeb0a4fff ("qlcnic: flash template based firmware reset recovery") Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20210816131405.24024-1-dinghao.liu@zju.edu.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index d8882d0b6b49..d51bac7ba5af 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -3156,8 +3156,10 @@ int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr, indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr); ret = QLCRD32(adapter, indirect_addr, &err); - if (err == -EIO) + if (err == -EIO) { + qlcnic_83xx_unlock_flash(adapter); return err; + } word = ret; *(u32 *)p_data = word; -- cgit From fa0b1ef5f7a694f48e00804a391245f3471aa155 Mon Sep 17 00:00:00 2001 From: Mark Yacoub Date: Thu, 12 Aug 2021 15:49:17 -0400 Subject: drm: Copy drm_wait_vblank to user before returning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Userspace should get back a copy of drm_wait_vblank that's been modified even when drm_wait_vblank_ioctl returns a failure. Rationale: drm_wait_vblank_ioctl modifies the request and expects the user to read it back. When the type is RELATIVE, it modifies it to ABSOLUTE and updates the sequence to become current_vblank_count + sequence (which was RELATIVE), but now it became ABSOLUTE. drmWaitVBlank (in libdrm) expects this to be the case as it modifies the request to be Absolute so it expects the sequence to would have been updated. The change is in compat_drm_wait_vblank, which is called by drm_compat_ioctl. This change of copying the data back regardless of the return number makes it en par with drm_ioctl, which always copies the data before returning. [How] Return from the function after everything has been copied to user. Fixes IGT:kms_flip::modeset-vs-vblank-race-interruptible Tested on ChromeOS Trogdor(msm) Reviewed-by: Michel Dänzer Signed-off-by: Mark Yacoub Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210812194917.1703356-1-markyacoub@chromium.org --- drivers/gpu/drm/drm_ioc32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index d29907955ff7..5d82891c3222 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -855,8 +855,6 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED); - if (err) - return err; req32.reply.type = req.reply.type; req32.reply.sequence = req.reply.sequence; @@ -865,7 +863,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, if (copy_to_user(argp, &req32, sizeof(req32))) return -EFAULT; - return 0; + return err; } #if defined(CONFIG_X86) -- cgit From 9cb0073b302a6b8a8c1015ff31b2b3ab4900f866 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 17 Aug 2021 22:36:44 +0100 Subject: io_uring: pin ctx on fallback execution Pin ring in io_fallback_req_func() by briefly elevating ctx->refs in case any task_work handler touches ctx after releasing a request. Fixes: 9011bf9a13e3b ("io_uring: fix stuck fallback reqs") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/833a494713d235ec144284a9bbfe418df4f6b61c.1629235576.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6a092a534d2b..979941bcd15a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2477,8 +2477,10 @@ static void io_fallback_req_func(struct work_struct *work) struct llist_node *node = llist_del_all(&ctx->fallback_llist); struct io_kiocb *req, *tmp; + percpu_ref_get(&ctx->refs); llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) req->io_task_work.func(req); + percpu_ref_put(&ctx->refs); } static void __io_complete_rw(struct io_kiocb *req, long res, long res2, -- cgit From 1b80fec7b043552e01609bae7d0aad07aa742adc Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Tue, 17 Aug 2021 13:37:36 -0700 Subject: ixgbe, xsk: clean up the resources in ixgbe_xsk_pool_enable error path In ixgbe_xsk_pool_enable(), if ixgbe_xsk_wakeup() fails, We should restore the previous state and clean up the resources. Add the missing clear af_xdp_zc_qps and unmap dma to fix this bug. Fixes: d49e286d354e ("ixgbe: add tracking of AF_XDP zero-copy state for each queue pair") Fixes: 4a9b32f30f80 ("ixgbe: fix potential RX buffer starvation for AF_XDP") Signed-off-by: Wang Hai Acked-by: Magnus Karlsson Tested-by: Sandeep Penigalapati Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20210817203736.3529939-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index 96dd1a4f956a..b1d22e4d5ec9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -52,8 +52,11 @@ static int ixgbe_xsk_pool_enable(struct ixgbe_adapter *adapter, /* Kick start the NAPI context so that receiving will start */ err = ixgbe_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); - if (err) + if (err) { + clear_bit(qid, adapter->af_xdp_zc_qps); + xsk_pool_dma_unmap(pool, IXGBE_RX_DMA_ATTR); return err; + } } return 0; -- cgit From a9ed27a764156929efe714033edb3e9023c5f321 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 18 Aug 2021 09:09:25 +0800 Subject: blk-mq: fix is_flush_rq is_flush_rq() is called from bt_iter()/bt_tags_iter(), and runs the following check: hctx->fq->flush_rq == req but the passed hctx from bt_iter()/bt_tags_iter() may be NULL because: 1) memory re-order in blk_mq_rq_ctx_init(): rq->mq_hctx = data->hctx; ... refcount_set(&rq->ref, 1); OR 2) tag re-use and ->rqs[] isn't updated with new request. Fix the issue by re-writing is_flush_rq() as: return rq->end_io == flush_end_io; which turns out simpler to follow and immune to data race since we have ordered WRITE rq->end_io and refcount_set(&rq->ref, 1). Fixes: 2e315dc07df0 ("blk-mq: grab rq->refcount before calling ->fn in blk_mq_tagset_busy_iter") Cc: "Blank-Burian, Markus, Dr." Cc: Yufen Yu Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20210818010925.607383-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-flush.c | 5 +++++ block/blk-mq.c | 2 +- block/blk.h | 6 +----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 4912c8dbb1d8..4201728bf3a5 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -262,6 +262,11 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) spin_unlock_irqrestore(&fq->mq_flush_lock, flags); } +bool is_flush_rq(struct request *rq) +{ + return rq->end_io == flush_end_io; +} + /** * blk_kick_flush - consider issuing flush request * @q: request_queue being kicked diff --git a/block/blk-mq.c b/block/blk-mq.c index 914c71ecc72a..9d4fdc2be88a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -911,7 +911,7 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) void blk_mq_put_rq_ref(struct request *rq) { - if (is_flush_rq(rq, rq->mq_hctx)) + if (is_flush_rq(rq)) rq->end_io(rq, 0); else if (refcount_dec_and_test(&rq->ref)) __blk_mq_free_request(rq); diff --git a/block/blk.h b/block/blk.h index 4b885c0f6708..cb01429c162c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -44,11 +44,7 @@ static inline void __blk_get_queue(struct request_queue *q) kobject_get(&q->kobj); } -static inline bool -is_flush_rq(struct request *req, struct blk_mq_hw_ctx *hctx) -{ - return hctx->fq->flush_rq == req; -} +bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags); -- cgit From b9570f5c9240cadf87fb5f9313e8f425aa9e788f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 17 Aug 2021 17:46:28 +0200 Subject: platform/x86: gigabyte-wmi: add support for X570 GAMING X MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported as working here: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-900263115 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210817154628.84992-1-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index fbb224a82e34..9e8cfac403d3 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -147,6 +147,7 @@ static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), { } -- cgit From 2a671f77ee49f3e78997b77fdee139467ff6a598 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 6 Aug 2021 12:11:16 +0200 Subject: s390/pci: fix use after free of zpci_dev The struct pci_dev uses reference counting but zPCI assumed erroneously that the last reference would always be the local reference after calling pci_stop_and_remove_bus_device(). This is usually the case but not how reference counting works and thus inherently fragile. In fact one case where this causes a NULL pointer dereference when on an SRIOV device the function 0 was hot unplugged before another function of the same multi-function device. In this case the second function's pdev->sriov->dev reference keeps the struct pci_dev of function 0 alive even after the unplug. This bug was previously hidden by the fact that we were leaking the struct pci_dev which in turn means that it always outlived the struct zpci_dev. This was fixed in commit 0b13525c20fe ("s390/pci: fix leak of PCI device structure") exposing the broken behavior. Fix this by accounting for the long living reference a struct pci_dev has to its underlying struct zpci_dev via the zbus->function[] array and only release that in pcibios_release_device() ensuring that the struct pci_dev is not left with a dangling reference. This is a minimal fix in the future it would probably better to use fine grained reference counting for struct zpci_dev. Fixes: 05bc1be6db4b2 ("s390/pci: create zPCI bus") Cc: stable@vger.kernel.org Reviewed-by: Matthew Rosato Signed-off-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci.c | 6 ++++++ arch/s390/pci/pci_bus.h | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index b0993e05affe..8fcb7ecb7225 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -560,9 +560,12 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; + /* The pdev has a reference to the zdev via its bus */ + zpci_zdev_get(zdev); if (pdev->is_physfn) pdev->no_vf_scan = 1; @@ -582,7 +585,10 @@ int pcibios_add_device(struct pci_dev *pdev) void pcibios_release_device(struct pci_dev *pdev) { + struct zpci_dev *zdev = to_zpci(pdev); + zpci_unmap_resources(pdev); + zpci_zdev_put(zdev); } int pcibios_enable_device(struct pci_dev *pdev, int mask) diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h index b877a97e6745..e359d2686178 100644 --- a/arch/s390/pci/pci_bus.h +++ b/arch/s390/pci/pci_bus.h @@ -22,6 +22,11 @@ static inline void zpci_zdev_put(struct zpci_dev *zdev) kref_put(&zdev->kref, zpci_release_device); } +static inline void zpci_zdev_get(struct zpci_dev *zdev) +{ + kref_get(&zdev->kref); +} + int zpci_alloc_domain(int domain); void zpci_free_domain(int domain); int zpci_setup_bus_resources(struct zpci_dev *zdev, -- cgit From fa25f28ef2cef19bc9ffeb827b8ecbf48af7f892 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 10 Aug 2021 20:29:24 +1000 Subject: drm/nouveau: recognise GA107 Still no GA106 as I don't have HW to verif. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b930f539feec..93ddf63d1114 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -2624,6 +2624,26 @@ nv174_chipset = { .dma = { 0x00000001, gv100_dma_new }, }; +static const struct nvkm_device_chip +nv177_chipset = { + .name = "GA107", + .bar = { 0x00000001, tu102_bar_new }, + .bios = { 0x00000001, nvkm_bios_new }, + .devinit = { 0x00000001, ga100_devinit_new }, + .fb = { 0x00000001, ga102_fb_new }, + .gpio = { 0x00000001, ga102_gpio_new }, + .i2c = { 0x00000001, gm200_i2c_new }, + .imem = { 0x00000001, nv50_instmem_new }, + .mc = { 0x00000001, ga100_mc_new }, + .mmu = { 0x00000001, tu102_mmu_new }, + .pci = { 0x00000001, gp100_pci_new }, + .privring = { 0x00000001, gm200_privring_new }, + .timer = { 0x00000001, gk20a_timer_new }, + .top = { 0x00000001, ga100_top_new }, + .disp = { 0x00000001, ga102_disp_new }, + .dma = { 0x00000001, gv100_dma_new }, +}; + static int nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size, struct nvkm_notify *notify) @@ -3049,6 +3069,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func, case 0x168: device->chip = &nv168_chipset; break; case 0x172: device->chip = &nv172_chipset; break; case 0x174: device->chip = &nv174_chipset; break; + case 0x177: device->chip = &nv177_chipset; break; default: if (nvkm_boolopt(device->cfgopt, "NvEnableUnsupportedChipsets", false)) { switch (device->chipset) { -- cgit From 6eaa1f3c59a707332e921e32782ffcad49915c5e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Aug 2021 16:40:48 +1000 Subject: drm/nouveau/disp: power down unused DP links during init When booted with multiple displays attached, the EFI GOP driver on (at least) Ampere, can leave DP links powered up that aren't being used to display anything. This confuses our tracking of SOR routing, with the likely result being a failed modeset and display engine hang. Fix this by (ab?)using the DisableLT IED script to power-down the link, restoring HW to a state the driver expects. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 55fbfe28c6dc..9669472a2749 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -440,7 +440,7 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) return ret; } -static void +void nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_dp *dp = nvkm_dp(outp); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h index 428b3f488f03..e484d0c3b0d4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h @@ -32,6 +32,7 @@ struct nvkm_dp { int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *, struct nvkm_outp **); +void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *); /* DPCD Receiver Capabilities */ #define DPCD_RC00_DPCD_REV 0x00000 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index dffcac249211..129982fef7ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "outp.h" +#include "dp.h" #include "ior.h" #include @@ -257,6 +258,14 @@ nvkm_outp_init_route(struct nvkm_outp *outp) if (!ior->arm.head || ior->arm.proto != proto) { OUTP_DBG(outp, "no heads (%x %d %d)", ior->arm.head, ior->arm.proto, proto); + + /* The EFI GOP driver on Ampere can leave unused DP links routed, + * which we don't expect. The DisableLT IED script *should* get + * us back to where we need to be. + */ + if (ior->func->route.get && !ior->arm.head && outp->info.type == DCB_OUTPUT_DP) + nvkm_dp_disable(outp, ior); + return; } -- cgit From e78b1b545c6cfe9f87fc577128e00026fff230ba Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 10 Aug 2021 19:29:57 +1000 Subject: drm/nouveau/kms/nv50: workaround EFI GOP window channel format differences Should fix some initial modeset failures on (at least) Ampere boards. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 27 +++++++++++++++++++++++++++ drivers/gpu/drm/nouveau/dispnv50/head.c | 13 +++++++++---- drivers/gpu/drm/nouveau/dispnv50/head.h | 1 + 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index f949767698fc..bcb0310a41b6 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2237,6 +2237,33 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) interlock[NV50_DISP_INTERLOCK_CORE] = 0; } + /* Finish updating head(s)... + * + * NVD is rather picky about both where window assignments can change, + * *and* about certain core and window channel states matching. + * + * The EFI GOP driver on newer GPUs configures window channels with a + * different output format to what we do, and the core channel update + * in the assign_windows case above would result in a state mismatch. + * + * Delay some of the head update until after that point to workaround + * the issue. This only affects the initial modeset. + * + * TODO: handle this better when adding flexible window mapping + */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); + struct nv50_head *head = nv50_head(crtc); + + NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name, + asyh->set.mask, asyh->clr.mask); + + if (asyh->set.mask) { + nv50_head_flush_set_wndw(head, asyh); + interlock[NV50_DISP_INTERLOCK_CORE] = 1; + } + } + /* Update plane(s). */ for_each_new_plane_in_state(state, plane, new_plane_state, i) { struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index ec361d17e900..d66f97280282 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -50,11 +50,8 @@ nv50_head_flush_clr(struct nv50_head *head, } void -nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) +nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh) { - if (asyh->set.view ) head->func->view (head, asyh); - if (asyh->set.mode ) head->func->mode (head, asyh); - if (asyh->set.core ) head->func->core_set(head, asyh); if (asyh->set.olut ) { asyh->olut.offset = nv50_lut_load(&head->olut, asyh->olut.buffer, @@ -62,6 +59,14 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) asyh->olut.load); head->func->olut_set(head, asyh); } +} + +void +nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + if (asyh->set.view ) head->func->view (head, asyh); + if (asyh->set.mode ) head->func->mode (head, asyh); + if (asyh->set.core ) head->func->core_set(head, asyh); if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.base ) head->func->base (head, asyh); if (asyh->set.ovly ) head->func->ovly (head, asyh); diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.h b/drivers/gpu/drm/nouveau/dispnv50/head.h index dae841dc05fd..0bac6be9ba34 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.h +++ b/drivers/gpu/drm/nouveau/dispnv50/head.h @@ -21,6 +21,7 @@ struct nv50_head { struct nv50_head *nv50_head_create(struct drm_device *, int index); void nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh); +void nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh); void nv50_head_flush_clr(struct nv50_head *head, struct nv50_head_atom *asyh, bool flush); -- cgit From 50c4a644910fbe6b965683b3c99f823b59bde1b9 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 4 Mar 2021 18:52:52 +1000 Subject: drm/nouveau/fifo/nv50-: rip out dma channels I honestly don't even know why... These have never been used. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/include/nvif/class.h | 2 - drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild | 2 - .../gpu/drm/nouveau/nvkm/engine/fifo/channv50.h | 2 - drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c | 94 ---------------------- drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c | 92 --------------------- drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c | 1 - drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c | 1 - 7 files changed, 194 deletions(-) delete mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c delete mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h index ba2c28ea43d2..c68cc957248e 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/class.h +++ b/drivers/gpu/drm/nouveau/include/nvif/class.h @@ -61,8 +61,6 @@ #define NV10_CHANNEL_DMA /* cl506b.h */ 0x0000006e #define NV17_CHANNEL_DMA /* cl506b.h */ 0x0000176e #define NV40_CHANNEL_DMA /* cl506b.h */ 0x0000406e -#define NV50_CHANNEL_DMA /* cl506e.h */ 0x0000506e -#define G82_CHANNEL_DMA /* cl826e.h */ 0x0000826e #define NV50_CHANNEL_GPFIFO /* cl506f.h */ 0x0000506f #define G82_CHANNEL_GPFIFO /* cl826f.h */ 0x0000826f diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild index 90e9a0972a44..3209eb7af65f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild @@ -27,8 +27,6 @@ nvkm-y += nvkm/engine/fifo/dmanv04.o nvkm-y += nvkm/engine/fifo/dmanv10.o nvkm-y += nvkm/engine/fifo/dmanv17.o nvkm-y += nvkm/engine/fifo/dmanv40.o -nvkm-y += nvkm/engine/fifo/dmanv50.o -nvkm-y += nvkm/engine/fifo/dmag84.o nvkm-y += nvkm/engine/fifo/gpfifonv50.o nvkm-y += nvkm/engine/fifo/gpfifog84.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h index af8bdf275552..3a95730d7ff5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h @@ -48,8 +48,6 @@ void nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *, int); int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push, const struct nvkm_oclass *, struct nv50_fifo_chan *); -extern const struct nvkm_fifo_chan_oclass nv50_fifo_dma_oclass; extern const struct nvkm_fifo_chan_oclass nv50_fifo_gpfifo_oclass; -extern const struct nvkm_fifo_chan_oclass g84_fifo_dma_oclass; extern const struct nvkm_fifo_chan_oclass g84_fifo_gpfifo_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c deleted file mode 100644 index fc34cddcd2f5..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmag84.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2012 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs - */ -#include "channv50.h" - -#include -#include - -#include -#include -#include - -static int -g84_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, - void *data, u32 size, struct nvkm_object **pobject) -{ - struct nvkm_object *parent = oclass->parent; - union { - struct g82_channel_dma_v0 v0; - } *args = data; - struct nv50_fifo *fifo = nv50_fifo(base); - struct nv50_fifo_chan *chan; - int ret = -ENOSYS; - - nvif_ioctl(parent, "create channel dma size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel dma vers %d vmm %llx " - "pushbuf %llx offset %016llx\n", - args->v0.version, args->v0.vmm, args->v0.pushbuf, - args->v0.offset); - if (!args->v0.pushbuf) - return -EINVAL; - } else - return ret; - - if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) - return -ENOMEM; - *pobject = &chan->base.object; - - ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, - oclass, chan); - if (ret) - return ret; - - args->v0.chid = chan->base.chid; - - nvkm_kmap(chan->ramfc); - nvkm_wo32(chan->ramfc, 0x08, lower_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x0c, upper_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x10, lower_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x14, upper_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x3c, 0x003f6078); - nvkm_wo32(chan->ramfc, 0x44, 0x01003fff); - nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4); - nvkm_wo32(chan->ramfc, 0x4c, 0xffffffff); - nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff); - nvkm_wo32(chan->ramfc, 0x78, 0x00000000); - nvkm_wo32(chan->ramfc, 0x7c, 0x30000001); - nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) | - (4 << 24) /* SEARCH_FULL */ | - (chan->ramht->gpuobj->node->offset >> 4)); - nvkm_wo32(chan->ramfc, 0x88, chan->cache->addr >> 10); - nvkm_wo32(chan->ramfc, 0x98, chan->base.inst->addr >> 12); - nvkm_done(chan->ramfc); - return 0; -} - -const struct nvkm_fifo_chan_oclass -g84_fifo_dma_oclass = { - .base.oclass = G82_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = g84_fifo_dma_new, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c deleted file mode 100644 index 8043718ad150..000000000000 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv50.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2012 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Ben Skeggs - */ -#include "channv50.h" - -#include -#include - -#include -#include -#include - -static int -nv50_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass, - void *data, u32 size, struct nvkm_object **pobject) -{ - struct nvkm_object *parent = oclass->parent; - union { - struct nv50_channel_dma_v0 v0; - } *args = data; - struct nv50_fifo *fifo = nv50_fifo(base); - struct nv50_fifo_chan *chan; - int ret = -ENOSYS; - - nvif_ioctl(parent, "create channel dma size %d\n", size); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - nvif_ioctl(parent, "create channel dma vers %d vmm %llx " - "pushbuf %llx offset %016llx\n", - args->v0.version, args->v0.vmm, args->v0.pushbuf, - args->v0.offset); - if (!args->v0.pushbuf) - return -EINVAL; - } else - return ret; - - if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL))) - return -ENOMEM; - *pobject = &chan->base.object; - - ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf, - oclass, chan); - if (ret) - return ret; - - args->v0.chid = chan->base.chid; - - nvkm_kmap(chan->ramfc); - nvkm_wo32(chan->ramfc, 0x08, lower_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x0c, upper_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x10, lower_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x14, upper_32_bits(args->v0.offset)); - nvkm_wo32(chan->ramfc, 0x3c, 0x003f6078); - nvkm_wo32(chan->ramfc, 0x44, 0x01003fff); - nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4); - nvkm_wo32(chan->ramfc, 0x4c, 0xffffffff); - nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff); - nvkm_wo32(chan->ramfc, 0x78, 0x00000000); - nvkm_wo32(chan->ramfc, 0x7c, 0x30000001); - nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) | - (4 << 24) /* SEARCH_FULL */ | - (chan->ramht->gpuobj->node->offset >> 4)); - nvkm_done(chan->ramfc); - return 0; -} - -const struct nvkm_fifo_chan_oclass -nv50_fifo_dma_oclass = { - .base.oclass = NV50_CHANNEL_DMA, - .base.minver = 0, - .base.maxver = 0, - .ctor = nv50_fifo_dma_new, -}; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c index c0a7d0f21dac..3885c3830b94 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c @@ -119,7 +119,6 @@ g84_fifo = { .uevent_init = g84_fifo_uevent_init, .uevent_fini = g84_fifo_uevent_fini, .chan = { - &g84_fifo_dma_oclass, &g84_fifo_gpfifo_oclass, NULL }, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c index be94156ea248..a08742cf425a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c @@ -136,7 +136,6 @@ nv50_fifo = { .pause = nv04_fifo_pause, .start = nv04_fifo_start, .chan = { - &nv50_fifo_dma_oclass, &nv50_fifo_gpfifo_oclass, NULL }, -- cgit From 148a8653789c01f159764ffcc3f370008966b42f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 4 Mar 2021 19:16:18 +1000 Subject: drm/nouveau: block a bunch of classes from userspace Long ago, there had been plans for making use of a bunch of these APIs from userspace and there's various checks in place to stop misbehaving. Countless other projects have occurred in the meantime, and the pieces didn't finish falling into place for that to happen. They will (hopefully) in the not-too-distant future, but it won't look quite as insane. The super checks are causing problems right now, and are going to be removed. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/include/nvif/cl0080.h | 3 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 1 + drivers/gpu/drm/nouveau/nouveau_usif.c | 57 +++++++++++++++++------ drivers/gpu/drm/nouveau/nvkm/engine/device/user.c | 2 +- 4 files changed, 48 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h index 0b86c44878e0..59759c4fb62e 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h @@ -4,7 +4,8 @@ struct nv_device_v0 { __u8 version; - __u8 pad01[7]; + __u8 priv; + __u8 pad02[6]; __u64 device; /* device identifier, ~0 for client default */ }; diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index a616cf4573b8..d7fdff917aed 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -244,6 +244,7 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, ret = nvif_device_ctor(&cli->base.object, "drmDevice", 0, NV_DEVICE, &(struct nv_device_v0) { .device = ~0, + .priv = true, }, sizeof(struct nv_device_v0), &cli->device); if (ret) { diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c index 9dc10b17ad34..5da1f4d223d7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_usif.c +++ b/drivers/gpu/drm/nouveau/nouveau_usif.c @@ -32,6 +32,9 @@ #include #include +#include +#include + struct usif_notify_p { struct drm_pending_event base; struct { @@ -261,7 +264,7 @@ usif_object_dtor(struct usif_object *object) } static int -usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc) +usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc, bool parent_abi16) { struct nouveau_cli *cli = nouveau_cli(f); struct nvif_client *client = &cli->base; @@ -271,23 +274,48 @@ usif_object_new(struct drm_file *f, void *data, u32 size, void *argv, u32 argc) struct usif_object *object; int ret = -ENOSYS; + if ((ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) + return ret; + + switch (args->v0.oclass) { + case NV_DMA_FROM_MEMORY: + case NV_DMA_TO_MEMORY: + case NV_DMA_IN_MEMORY: + return -EINVAL; + case NV_DEVICE: { + union { + struct nv_device_v0 v0; + } *args = data; + + if ((ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) + return ret; + + args->v0.priv = false; + break; + } + default: + if (!parent_abi16) + return -EINVAL; + break; + } + if (!(object = kmalloc(sizeof(*object), GFP_KERNEL))) return -ENOMEM; list_add(&object->head, &cli->objects); - if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) { - object->route = args->v0.route; - object->token = args->v0.token; - args->v0.route = NVDRM_OBJECT_USIF; - args->v0.token = (unsigned long)(void *)object; - ret = nvif_client_ioctl(client, argv, argc); - args->v0.token = object->token; - args->v0.route = object->route; + object->route = args->v0.route; + object->token = args->v0.token; + args->v0.route = NVDRM_OBJECT_USIF; + args->v0.token = (unsigned long)(void *)object; + ret = nvif_client_ioctl(client, argv, argc); + if (ret) { + usif_object_dtor(object); + return ret; } - if (ret) - usif_object_dtor(object); - return ret; + args->v0.token = object->token; + args->v0.route = object->route; + return 0; } int @@ -301,6 +329,7 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc) struct nvif_ioctl_v0 v0; } *argv = data; struct usif_object *object; + bool abi16 = false; u8 owner; int ret; @@ -331,11 +360,13 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc) mutex_unlock(&cli->mutex); goto done; } + + abi16 = true; } switch (argv->v0.type) { case NVIF_IOCTL_V0_NEW: - ret = usif_object_new(filp, data, size, argv, argc); + ret = usif_object_new(filp, data, size, argv, argc, abi16); break; case NVIF_IOCTL_V0_NTFY_NEW: ret = usif_notify_new(filp, data, size, argv, argc); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c index fea9d8f2b10c..f28894fdede9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c @@ -397,7 +397,7 @@ nvkm_udevice_new(const struct nvkm_oclass *oclass, void *data, u32 size, return ret; /* give priviledged clients register access */ - if (client->super) + if (args->v0.priv) func = &nvkm_udevice_super; else func = &nvkm_udevice; -- cgit From 59f216cf04d973b4316761cbf3e7cb9556715b7a Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 4 Mar 2021 19:53:01 +1000 Subject: drm/nouveau: rip out nvkm_client.super No longer required now that userspace can't touch anything that might need it, and should fix DRM MM operations racing with each other, and the random hangs/crashes that come with that. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul --- drivers/gpu/drm/nouveau/include/nvif/client.h | 1 - drivers/gpu/drm/nouveau/include/nvif/driver.h | 2 +- drivers/gpu/drm/nouveau/include/nvkm/core/client.h | 1 - drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h | 2 +- drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h | 1 - drivers/gpu/drm/nouveau/nouveau_abi16.c | 2 -- drivers/gpu/drm/nouveau/nouveau_chan.c | 19 ++------------- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 -- drivers/gpu/drm/nouveau/nouveau_mem.c | 15 +----------- drivers/gpu/drm/nouveau/nouveau_nvif.c | 4 ++-- drivers/gpu/drm/nouveau/nouveau_svm.c | 9 -------- drivers/gpu/drm/nouveau/nvif/client.c | 3 +-- drivers/gpu/drm/nouveau/nvif/object.c | 3 +-- drivers/gpu/drm/nouveau/nvkm/core/ioctl.c | 4 +--- drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c | 15 ------------ .../gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c | 2 -- .../gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c | 2 -- .../gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c | 2 -- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c | 6 ++--- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h | 1 - drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c | 27 +++++----------------- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c | 6 +---- drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 16 ++++++------- 24 files changed, 27 insertions(+), 120 deletions(-) diff --git a/drivers/gpu/drm/nouveau/include/nvif/client.h b/drivers/gpu/drm/nouveau/include/nvif/client.h index 347d2c020bd1..5d9395e651b6 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/client.h +++ b/drivers/gpu/drm/nouveau/include/nvif/client.h @@ -9,7 +9,6 @@ struct nvif_client { const struct nvif_driver *driver; u64 version; u8 route; - bool super; }; int nvif_client_ctor(struct nvif_client *parent, const char *name, u64 device, diff --git a/drivers/gpu/drm/nouveau/include/nvif/driver.h b/drivers/gpu/drm/nouveau/include/nvif/driver.h index 8e85b936eaa0..7a3af05f7f98 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/driver.h +++ b/drivers/gpu/drm/nouveau/include/nvif/driver.h @@ -11,7 +11,7 @@ struct nvif_driver { void (*fini)(void *priv); int (*suspend)(void *priv); int (*resume)(void *priv); - int (*ioctl)(void *priv, bool super, void *data, u32 size, void **hack); + int (*ioctl)(void *priv, void *data, u32 size, void **hack); void __iomem *(*map)(void *priv, u64 handle, u32 size); void (*unmap)(void *priv, void __iomem *ptr, u32 size); bool keep; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h index 5d7017fe5039..2f86606e708c 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h @@ -13,7 +13,6 @@ struct nvkm_client { struct nvkm_client_notify *notify[32]; struct rb_root objroot; - bool super; void *data; int (*ntfy)(const void *, u32, const void *, u32); diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h b/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h index 71ed147ad077..f52918a43246 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/ioctl.h @@ -4,5 +4,5 @@ #include struct nvkm_client; -int nvkm_ioctl(struct nvkm_client *, bool, void *, u32, void **); +int nvkm_ioctl(struct nvkm_client *, void *, u32, void **); #endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h index 0911e73f7424..70e7887ef4b4 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h @@ -15,7 +15,6 @@ struct nvkm_vma { u8 refd:3; /* Current page type (index, or NONE for unreferenced). */ bool used:1; /* Region allocated. */ bool part:1; /* Region was split from an allocated region by map(). */ - bool user:1; /* Region user-allocated. */ bool busy:1; /* Region busy (for temporarily preventing user access). */ bool mapped:1; /* Region contains valid pages. */ struct nvkm_memory *memory; /* Memory currently mapped into VMA. */ diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index b45ec3086285..4107b7006539 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -570,11 +570,9 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS) } client->route = NVDRM_OBJECT_ABI16; - client->super = true; ret = nvif_object_ctor(&chan->chan->user, "abi16Ntfy", info->handle, NV_DMA_IN_MEMORY, &args, sizeof(args), &ntfy->object); - client->super = false; client->route = NVDRM_OBJECT_NVIF; if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 40362600eed2..80099ef75702 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -86,12 +86,6 @@ nouveau_channel_del(struct nouveau_channel **pchan) struct nouveau_channel *chan = *pchan; if (chan) { struct nouveau_cli *cli = (void *)chan->user.client; - bool super; - - if (cli) { - super = cli->base.super; - cli->base.super = true; - } if (chan->fence) nouveau_fence(chan->drm)->context_del(chan); @@ -111,9 +105,6 @@ nouveau_channel_del(struct nouveau_channel **pchan) nouveau_bo_unpin(chan->push.buffer); nouveau_bo_ref(NULL, &chan->push.buffer); kfree(chan); - - if (cli) - cli->base.super = super; } *pchan = NULL; } @@ -512,20 +503,16 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device, struct nouveau_channel **pchan) { struct nouveau_cli *cli = (void *)device->object.client; - bool super; int ret; /* hack until fencenv50 is fixed, and agp access relaxed */ - super = cli->base.super; - cli->base.super = true; - ret = nouveau_channel_ind(drm, device, arg0, priv, pchan); if (ret) { NV_PRINTK(dbg, cli, "ib channel create, %d\n", ret); ret = nouveau_channel_dma(drm, device, pchan); if (ret) { NV_PRINTK(dbg, cli, "dma channel create, %d\n", ret); - goto done; + return ret; } } @@ -533,15 +520,13 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device, if (ret) { NV_PRINTK(err, cli, "channel failed to initialise, %d\n", ret); nouveau_channel_del(pchan); - goto done; + return ret; } ret = nouveau_svmm_join((*pchan)->vmm->svmm, (*pchan)->inst); if (ret) nouveau_channel_del(pchan); -done: - cli->base.super = super; return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index d7fdff917aed..ba4cd5f83725 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -1087,8 +1087,6 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) if (ret) goto done; - cli->base.super = false; - fpriv->driver_priv = cli; mutex_lock(&drm->client.mutex); diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c index 0de6549fb875..2ca3207c13fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_mem.c +++ b/drivers/gpu/drm/nouveau/nouveau_mem.c @@ -41,8 +41,6 @@ nouveau_mem_map(struct nouveau_mem *mem, struct gf100_vmm_map_v0 gf100; } args; u32 argc = 0; - bool super; - int ret; switch (vmm->object.oclass) { case NVIF_CLASS_VMM_NV04: @@ -73,12 +71,7 @@ nouveau_mem_map(struct nouveau_mem *mem, return -ENOSYS; } - super = vmm->object.client->super; - vmm->object.client->super = true; - ret = nvif_vmm_map(vmm, vma->addr, mem->mem.size, &args, argc, - &mem->mem, 0); - vmm->object.client->super = super; - return ret; + return nvif_vmm_map(vmm, vma->addr, mem->mem.size, &args, argc, &mem->mem, 0); } void @@ -99,7 +92,6 @@ nouveau_mem_host(struct ttm_resource *reg, struct ttm_tt *tt) struct nouveau_drm *drm = cli->drm; struct nvif_mmu *mmu = &cli->mmu; struct nvif_mem_ram_v0 args = {}; - bool super = cli->base.super; u8 type; int ret; @@ -122,11 +114,9 @@ nouveau_mem_host(struct ttm_resource *reg, struct ttm_tt *tt) args.dma = tt->dma_address; mutex_lock(&drm->master.lock); - cli->base.super = true; ret = nvif_mem_ctor_type(mmu, "ttmHostMem", cli->mem->oclass, type, PAGE_SHIFT, reg->num_pages << PAGE_SHIFT, &args, sizeof(args), &mem->mem); - cli->base.super = super; mutex_unlock(&drm->master.lock); return ret; } @@ -138,12 +128,10 @@ nouveau_mem_vram(struct ttm_resource *reg, bool contig, u8 page) struct nouveau_cli *cli = mem->cli; struct nouveau_drm *drm = cli->drm; struct nvif_mmu *mmu = &cli->mmu; - bool super = cli->base.super; u64 size = ALIGN(reg->num_pages << PAGE_SHIFT, 1 << page); int ret; mutex_lock(&drm->master.lock); - cli->base.super = true; switch (cli->mem->oclass) { case NVIF_CLASS_MEM_GF100: ret = nvif_mem_ctor_type(mmu, "ttmVram", cli->mem->oclass, @@ -167,7 +155,6 @@ nouveau_mem_vram(struct ttm_resource *reg, bool contig, u8 page) WARN_ON(1); break; } - cli->base.super = super; mutex_unlock(&drm->master.lock); reg->start = mem->mem.addr >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c index b3f29b1ce9ea..52f5793b7274 100644 --- a/drivers/gpu/drm/nouveau/nouveau_nvif.c +++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c @@ -52,9 +52,9 @@ nvkm_client_map(void *priv, u64 handle, u32 size) } static int -nvkm_client_ioctl(void *priv, bool super, void *data, u32 size, void **hack) +nvkm_client_ioctl(void *priv, void *data, u32 size, void **hack) { - return nvkm_ioctl(priv, super, data, size, hack); + return nvkm_ioctl(priv, data, size, hack); } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 82b583f5fca8..b0c3422cb01f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -237,14 +237,11 @@ void nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) { if (limit > start) { - bool super = svmm->vmm->vmm.object.client->super; - svmm->vmm->vmm.object.client->super = true; nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR, &(struct nvif_vmm_pfnclr_v0) { .addr = start, .size = limit - start, }, sizeof(struct nvif_vmm_pfnclr_v0)); - svmm->vmm->vmm.object.client->super = super; } } @@ -634,9 +631,7 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, NVIF_VMM_PFNMAP_V0_A | NVIF_VMM_PFNMAP_V0_HOST; - svmm->vmm->vmm.object.client->super = true; ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); - svmm->vmm->vmm.object.client->super = false; mutex_unlock(&svmm->mutex); unlock_page(page); @@ -702,9 +697,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, nouveau_hmm_convert_pfn(drm, &range, args); - svmm->vmm->vmm.object.client->super = true; ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); - svmm->vmm->vmm.object.client->super = false; mutex_unlock(&svmm->mutex); out: @@ -928,10 +921,8 @@ nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, mutex_lock(&svmm->mutex); - svmm->vmm->vmm.object.client->super = true; ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, sizeof(*args) + npages * sizeof(args->p.phys[0]), NULL); - svmm->vmm->vmm.object.client->super = false; mutex_unlock(&svmm->mutex); } diff --git a/drivers/gpu/drm/nouveau/nvif/client.c b/drivers/gpu/drm/nouveau/nvif/client.c index 12644f811b3e..a3264a0e933a 100644 --- a/drivers/gpu/drm/nouveau/nvif/client.c +++ b/drivers/gpu/drm/nouveau/nvif/client.c @@ -32,7 +32,7 @@ int nvif_client_ioctl(struct nvif_client *client, void *data, u32 size) { - return client->driver->ioctl(client->object.priv, client->super, data, size, NULL); + return client->driver->ioctl(client->object.priv, data, size, NULL); } int @@ -80,7 +80,6 @@ nvif_client_ctor(struct nvif_client *parent, const char *name, u64 device, client->object.client = client; client->object.handle = ~0; client->route = NVIF_IOCTL_V0_ROUTE_NVIF; - client->super = true; client->driver = parent->driver; if (ret == 0) { diff --git a/drivers/gpu/drm/nouveau/nvif/object.c b/drivers/gpu/drm/nouveau/nvif/object.c index 671a5c0199e0..dce1ecee2af5 100644 --- a/drivers/gpu/drm/nouveau/nvif/object.c +++ b/drivers/gpu/drm/nouveau/nvif/object.c @@ -44,8 +44,7 @@ nvif_object_ioctl(struct nvif_object *object, void *data, u32 size, void **hack) } else return -ENOSYS; - return client->driver->ioctl(client->object.priv, client->super, - data, size, hack); + return client->driver->ioctl(client->object.priv, data, size, hack); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c index d777df5a64e6..735cb6816f10 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c @@ -426,8 +426,7 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type, } int -nvkm_ioctl(struct nvkm_client *client, bool supervisor, - void *data, u32 size, void **hack) +nvkm_ioctl(struct nvkm_client *client, void *data, u32 size, void **hack) { struct nvkm_object *object = &client->object; union { @@ -435,7 +434,6 @@ nvkm_ioctl(struct nvkm_client *client, bool supervisor, } *args = data; int ret = -ENOSYS; - client->super = supervisor; nvif_ioctl(object, "size %d\n", size); if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c index d20cc0681a88..797131ed7d67 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -72,11 +71,7 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma, union { struct nv_dma_v0 v0; } *args = *pdata; - struct nvkm_device *device = dma->engine.subdev.device; - struct nvkm_client *client = oclass->client; struct nvkm_object *parent = oclass->parent; - struct nvkm_instmem *instmem = device->imem; - struct nvkm_fb *fb = device->fb; void *data = *pdata; u32 size = *psize; int ret = -ENOSYS; @@ -109,23 +104,13 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma, dmaobj->target = NV_MEM_TARGET_VM; break; case NV_DMA_V0_TARGET_VRAM: - if (!client->super) { - if (dmaobj->limit >= fb->ram->size - instmem->reserved) - return -EACCES; - if (device->card_type >= NV_50) - return -EACCES; - } dmaobj->target = NV_MEM_TARGET_VRAM; break; case NV_DMA_V0_TARGET_PCI: - if (!client->super) - return -EACCES; dmaobj->target = NV_MEM_TARGET_PCI; break; case NV_DMA_V0_TARGET_PCI_US: case NV_DMA_V0_TARGET_AGP: - if (!client->super) - return -EACCES; dmaobj->target = NV_MEM_TARGET_PCI_NOSNOOP; break; default: diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c index b6900a52bcce..ae6c4d846eb5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c @@ -341,8 +341,6 @@ gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, "runlist %016llx priv %d\n", args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength, args->v0.runlist, args->v0.priv); - if (args->v0.priv && !oclass->client->super) - return -EINVAL; return gk104_fifo_gpfifo_new_(fifo, &args->v0.runlist, &args->v0.chid, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c index ee4967b706a7..743791c514fe 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c @@ -226,8 +226,6 @@ gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, "runlist %016llx priv %d\n", args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength, args->v0.runlist, args->v0.priv); - if (args->v0.priv && !oclass->client->super) - return -EINVAL; return gv100_fifo_gpfifo_new_(&gv100_fifo_gpfifo, fifo, &args->v0.runlist, &args->v0.chid, diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c index abef7fb6e2d3..99aafa103a31 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c @@ -65,8 +65,6 @@ tu102_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass, "runlist %016llx priv %d\n", args->v0.version, args->v0.vmm, args->v0.ioffset, args->v0.ilength, args->v0.runlist, args->v0.priv); - if (args->v0.priv && !oclass->client->super) - return -EINVAL; return gv100_fifo_gpfifo_new_(&tu102_fifo_gpfifo, fifo, &args->v0.runlist, &args->v0.chid, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c index fac2f9a45ea6..e530bb8b3b17 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.c @@ -41,7 +41,7 @@ nvkm_umem_search(struct nvkm_client *client, u64 handle) object = nvkm_object_search(client, handle, &nvkm_umem); if (IS_ERR(object)) { - if (client->super && client != master) { + if (client != master) { spin_lock(&master->lock); list_for_each_entry(umem, &master->umem, head) { if (umem->object.object == handle) { @@ -53,8 +53,7 @@ nvkm_umem_search(struct nvkm_client *client, u64 handle) } } else { umem = nvkm_umem(object); - if (!umem->priv || client->super) - memory = nvkm_memory_ref(umem->memory); + memory = nvkm_memory_ref(umem->memory); } return memory ? memory : ERR_PTR(-ENOENT); @@ -167,7 +166,6 @@ nvkm_umem_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, nvkm_object_ctor(&nvkm_umem, oclass, &umem->object); umem->mmu = mmu; umem->type = mmu->type[type].type; - umem->priv = oclass->client->super; INIT_LIST_HEAD(&umem->head); *pobject = &umem->object; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h index 85cf692d620a..d56a594016cc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/umem.h @@ -8,7 +8,6 @@ struct nvkm_umem { struct nvkm_object object; struct nvkm_mmu *mmu; u8 type:8; - bool priv:1; bool mappable:1; bool io:1; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c index 0e4b8941da37..6870fda4b188 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/ummu.c @@ -34,7 +34,7 @@ nvkm_ummu_sclass(struct nvkm_object *object, int index, { struct nvkm_mmu *mmu = nvkm_ummu(object)->mmu; - if (mmu->func->mem.user.oclass && oclass->client->super) { + if (mmu->func->mem.user.oclass) { if (index-- == 0) { oclass->base = mmu->func->mem.user; oclass->ctor = nvkm_umem_new; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c index c43b8248c682..d6a1f8d04c09 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c @@ -45,7 +45,6 @@ nvkm_uvmm_search(struct nvkm_client *client, u64 handle) static int nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc) { - struct nvkm_client *client = uvmm->object.client; union { struct nvif_vmm_pfnclr_v0 v0; } *args = argv; @@ -59,9 +58,6 @@ nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc) } else return ret; - if (!client->super) - return -ENOENT; - if (size) { mutex_lock(&vmm->mutex); ret = nvkm_vmm_pfn_unmap(vmm, addr, size); @@ -74,7 +70,6 @@ nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc) static int nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) { - struct nvkm_client *client = uvmm->object.client; union { struct nvif_vmm_pfnmap_v0 v0; } *args = argv; @@ -93,9 +88,6 @@ nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) } else return ret; - if (!client->super) - return -ENOENT; - if (size) { mutex_lock(&vmm->mutex); ret = nvkm_vmm_pfn_map(vmm, page, addr, size, phys); @@ -108,7 +100,6 @@ nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) static int nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) { - struct nvkm_client *client = uvmm->object.client; union { struct nvif_vmm_unmap_v0 v0; } *args = argv; @@ -130,9 +121,8 @@ nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc) goto done; } - if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { - VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, - vma->user, !client->super, vma->busy); + if (ret = -ENOENT, vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d", addr, vma->busy); goto done; } @@ -181,9 +171,8 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc) goto fail; } - if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { - VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, - vma->user, !client->super, vma->busy); + if (ret = -ENOENT, vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d", addr, vma->busy); goto fail; } @@ -230,7 +219,6 @@ fail: static int nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc) { - struct nvkm_client *client = uvmm->object.client; union { struct nvif_vmm_put_v0 v0; } *args = argv; @@ -252,9 +240,8 @@ nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc) goto done; } - if (ret = -ENOENT, (!vma->user && !client->super) || vma->busy) { - VMM_DEBUG(vmm, "denied %016llx: %d %d %d", addr, - vma->user, !client->super, vma->busy); + if (ret = -ENOENT, vma->busy) { + VMM_DEBUG(vmm, "denied %016llx: %d", addr, vma->busy); goto done; } @@ -268,7 +255,6 @@ done: static int nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc) { - struct nvkm_client *client = uvmm->object.client; union { struct nvif_vmm_get_v0 v0; } *args = argv; @@ -297,7 +283,6 @@ nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc) return ret; args->v0.addr = vma->addr; - vma->user = !client->super; return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c index 710f3f8dc7c9..8bf00b396ec1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c @@ -774,7 +774,6 @@ nvkm_vma_tail(struct nvkm_vma *vma, u64 tail) new->refd = vma->refd; new->used = vma->used; new->part = vma->part; - new->user = vma->user; new->busy = vma->busy; new->mapped = vma->mapped; list_add(&new->head, &vma->head); @@ -951,7 +950,7 @@ nvkm_vmm_node_split(struct nvkm_vmm *vmm, static void nvkm_vma_dump(struct nvkm_vma *vma) { - printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c%c %p\n", + printk(KERN_ERR "%016llx %016llx %c%c%c%c%c%c%c%c %p\n", vma->addr, (u64)vma->size, vma->used ? '-' : 'F', vma->mapref ? 'R' : '-', @@ -959,7 +958,6 @@ nvkm_vma_dump(struct nvkm_vma *vma) vma->page != NVKM_VMA_PAGE_NONE ? '0' + vma->page : '-', vma->refd != NVKM_VMA_PAGE_NONE ? '0' + vma->refd : '-', vma->part ? 'P' : '-', - vma->user ? 'U' : '-', vma->busy ? 'B' : '-', vma->mapped ? 'M' : '-', vma->memory); @@ -1024,7 +1022,6 @@ nvkm_vmm_ctor_managed(struct nvkm_vmm *vmm, u64 addr, u64 size) vma->mapref = true; vma->sparse = false; vma->used = true; - vma->user = true; nvkm_vmm_node_insert(vmm, vma); list_add_tail(&vma->head, &vmm->list); return 0; @@ -1615,7 +1612,6 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma) vma->page = NVKM_VMA_PAGE_NONE; vma->refd = NVKM_VMA_PAGE_NONE; vma->used = false; - vma->user = false; nvkm_vmm_put_region(vmm, vma); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c index f02abd9cb4dd..b5e733783b5b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c @@ -534,15 +534,13 @@ int gp100_vmm_mthd(struct nvkm_vmm *vmm, struct nvkm_client *client, u32 mthd, void *argv, u32 argc) { - if (client->super) { - switch (mthd) { - case GP100_VMM_VN_FAULT_REPLAY: - return gp100_vmm_fault_replay(vmm, argv, argc); - case GP100_VMM_VN_FAULT_CANCEL: - return gp100_vmm_fault_cancel(vmm, argv, argc); - default: - break; - } + switch (mthd) { + case GP100_VMM_VN_FAULT_REPLAY: + return gp100_vmm_fault_replay(vmm, argv, argc); + case GP100_VMM_VN_FAULT_CANCEL: + return gp100_vmm_fault_cancel(vmm, argv, argc); + default: + break; } return -EINVAL; } -- cgit From 86b9bbd332d0510679c7fedcee3e3bd278be5756 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Mon, 16 Aug 2021 13:59:17 +0200 Subject: sch_cake: fix srchost/dsthost hashing mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When adding support for using the skb->hash value as the flow hash in CAKE, I accidentally introduced a logic error that broke the host-only isolation modes of CAKE (srchost and dsthost keywords). Specifically, the flow_hash variable should stay initialised to 0 in cake_hash() in pure host-based hashing mode. Add a check for this before using the skb->hash value as flow_hash. Fixes: b0c19ed6088a ("sch_cake: Take advantage of skb->hash where appropriate") Reported-by: Pete Heist Tested-by: Pete Heist Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 951542843cab..28af8b1e1bb1 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -720,7 +720,7 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, skip_hash: if (flow_override) flow_hash = flow_override - 1; - else if (use_skbhash) + else if (use_skbhash && (flow_mode & CAKE_FLOW_FLOWS)) flow_hash = skb->hash; if (host_override) { dsthost_hash = host_override - 1; -- cgit From ed5d2937a6a8f12e7f815748f991990e79ac4cd1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 17 Aug 2021 17:52:45 +0300 Subject: net: dsa: sja1105: fix use-after-free after calling of_find_compatible_node, or worse It seems that of_find_compatible_node has a weird calling convention in which it calls of_node_put() on the "from" node argument, instead of leaving that up to the caller. This comes from the fact that of_find_compatible_node with a non-NULL "from" argument it only supposed to be used as the iterator function of for_each_compatible_node(). OF iterator functions call of_node_get on the next OF node and of_node_put() on the previous one. When of_find_compatible_node calls of_node_put, it actually never expects the refcount to drop to zero, because the call is done under the atomic devtree_lock context, and when the refcount drops to zero it triggers a kobject and a sysfs file deletion, which assume blocking context. So any driver call to of_find_compatible_node is probably buggy because an unexpected of_node_put() takes place. What should be done is to use the of_get_compatible_child() function. Fixes: 5a8f09748ee7 ("net: dsa: sja1105: register the MDIO buses for 100base-T1 and 100base-TX") Link: https://lore.kernel.org/netdev/20210814010139.kzryimmp4rizlznt@skbuf/ Suggested-by: Frank Rowand Suggested-by: Rob Herring Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_mdio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c index 19aea8fb76f6..705d3900e43a 100644 --- a/drivers/net/dsa/sja1105/sja1105_mdio.c +++ b/drivers/net/dsa/sja1105/sja1105_mdio.c @@ -284,8 +284,7 @@ static int sja1105_mdiobus_base_tx_register(struct sja1105_private *priv, struct mii_bus *bus; int rc = 0; - np = of_find_compatible_node(mdio_node, NULL, - "nxp,sja1110-base-tx-mdio"); + np = of_get_compatible_child(mdio_node, "nxp,sja1110-base-tx-mdio"); if (!np) return 0; @@ -339,8 +338,7 @@ static int sja1105_mdiobus_base_t1_register(struct sja1105_private *priv, struct mii_bus *bus; int rc = 0; - np = of_find_compatible_node(mdio_node, NULL, - "nxp,sja1110-base-t1-mdio"); + np = of_get_compatible_child(mdio_node, "nxp,sja1110-base-t1-mdio"); if (!np) return 0; -- cgit From 663d946af5fb2fde0c0498f11fb295e9e8db979f Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 17 Aug 2021 20:38:01 -0700 Subject: net: mdio-mux: Delete unnecessary devm_kfree The whole point of devm_* APIs is that you don't have to undo them if you are returning an error that's going to get propagated out of a probe() function. So delete unnecessary devm_kfree() call in the error return path. Fixes: b60161668199 ("mdio: mux: Correct mdio_mux_init error path issues") Signed-off-by: Saravana Kannan Reviewed-by: Andrew Lunn Acked-by: Marc Zyngier Tested-by: Marc Zyngier Acked-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-mux.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c index 110e4ee85785..5b37284f54d6 100644 --- a/drivers/net/mdio/mdio-mux.c +++ b/drivers/net/mdio/mdio-mux.c @@ -181,7 +181,6 @@ int mdio_mux_init(struct device *dev, } dev_err(dev, "Error: No acceptable child buses found\n"); - devm_kfree(dev, pb); err_pb_kz: put_device(&parent_bus->dev); err_parent_bus: -- cgit From 99d81e942474cc7677d12f673f42a7ea699e2589 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 17 Aug 2021 20:38:02 -0700 Subject: net: mdio-mux: Don't ignore memory allocation errors If we are seeing memory allocation errors, don't try to continue registering child mdiobus devices. It's unlikely they'll succeed. Fixes: 342fa1964439 ("mdio: mux: make child bus walking more permissive and errors more verbose") Signed-off-by: Saravana Kannan Reviewed-by: Andrew Lunn Acked-by: Marc Zyngier Tested-by: Marc Zyngier Acked-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-mux.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c index 5b37284f54d6..13035e2685c4 100644 --- a/drivers/net/mdio/mdio-mux.c +++ b/drivers/net/mdio/mdio-mux.c @@ -82,6 +82,17 @@ out: static int parent_count; +static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb) +{ + struct mdio_mux_child_bus *cb = pb->children; + + while (cb) { + mdiobus_unregister(cb->mii_bus); + mdiobus_free(cb->mii_bus); + cb = cb->next; + } +} + int mdio_mux_init(struct device *dev, struct device_node *mux_node, int (*switch_fn)(int cur, int desired, void *data), @@ -144,7 +155,7 @@ int mdio_mux_init(struct device *dev, cb = devm_kzalloc(dev, sizeof(*cb), GFP_KERNEL); if (!cb) { ret_val = -ENOMEM; - continue; + goto err_loop; } cb->bus_number = v; cb->parent = pb; @@ -152,8 +163,7 @@ int mdio_mux_init(struct device *dev, cb->mii_bus = mdiobus_alloc(); if (!cb->mii_bus) { ret_val = -ENOMEM; - devm_kfree(dev, cb); - continue; + goto err_loop; } cb->mii_bus->priv = cb; @@ -181,6 +191,10 @@ int mdio_mux_init(struct device *dev, } dev_err(dev, "Error: No acceptable child buses found\n"); + +err_loop: + mdio_mux_uninit_children(pb); + of_node_put(child_bus_node); err_pb_kz: put_device(&parent_bus->dev); err_parent_bus: @@ -192,14 +206,8 @@ EXPORT_SYMBOL_GPL(mdio_mux_init); void mdio_mux_uninit(void *mux_handle) { struct mdio_mux_parent_bus *pb = mux_handle; - struct mdio_mux_child_bus *cb = pb->children; - - while (cb) { - mdiobus_unregister(cb->mii_bus); - mdiobus_free(cb->mii_bus); - cb = cb->next; - } + mdio_mux_uninit_children(pb); put_device(&pb->mii_bus->dev); } EXPORT_SYMBOL_GPL(mdio_mux_uninit); -- cgit From 7bd0cef5dac685f09ef8b0b2a7748ff42d284dc7 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 17 Aug 2021 20:38:03 -0700 Subject: net: mdio-mux: Handle -EPROBE_DEFER correctly When registering mdiobus children, if we get an -EPROBE_DEFER, we shouldn't ignore it and continue registering the rest of the mdiobus children. This would permanently prevent the deferring child mdiobus from working instead of reattempting it in the future. So, if a child mdiobus needs to be reattempted in the future, defer the entire mdio-mux initialization. This fixes the issue where PHYs sitting under the mdio-mux aren't initialized correctly if the PHY's interrupt controller is not yet ready when the mdio-mux is being probed. Additional context in the link below. Fixes: 0ca2997d1452 ("netdev/of/phy: Add MDIO bus multiplexer support.") Link: https://lore.kernel.org/lkml/CAGETcx95kHrv8wA-O+-JtfH7H9biJEGJtijuPVN0V5dUKUAB3A@mail.gmail.com/#t Signed-off-by: Saravana Kannan Reviewed-by: Andrew Lunn Acked-by: Marc Zyngier Tested-by: Marc Zyngier Acked-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: David S. Miller --- drivers/net/mdio/mdio-mux.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c index 13035e2685c4..ebd001f0eece 100644 --- a/drivers/net/mdio/mdio-mux.c +++ b/drivers/net/mdio/mdio-mux.c @@ -175,11 +175,15 @@ int mdio_mux_init(struct device *dev, cb->mii_bus->write = mdio_mux_write; r = of_mdiobus_register(cb->mii_bus, child_bus_node); if (r) { + mdiobus_free(cb->mii_bus); + if (r == -EPROBE_DEFER) { + ret_val = r; + goto err_loop; + } + devm_kfree(dev, cb); dev_err(dev, "Error: Failed to register MDIO bus for child %pOF\n", child_bus_node); - mdiobus_free(cb->mii_bus); - devm_kfree(dev, cb); } else { cb->next = pb->children; pb->children = cb; -- cgit From 01634047bf0d5c2d9b7d8095bb4de1663dbeedeb Mon Sep 17 00:00:00 2001 From: "kaixi.fan" Date: Wed, 18 Aug 2021 10:22:15 +0800 Subject: ovs: clear skb->tstamp in forwarding path fq qdisc requires tstamp to be cleared in the forwarding path. Now ovs doesn't clear skb->tstamp. We encountered a problem with linux version 5.4.56 and ovs version 2.14.1, and packets failed to dequeue from qdisc when fq qdisc was attached to ovs port. Fixes: fb420d5d91c1 ("tcp/fq: move back to CLOCK_MONOTONIC") Signed-off-by: kaixi.fan Signed-off-by: xiexiaohui Reviewed-by: Cong Wang Signed-off-by: David S. Miller --- net/openvswitch/vport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 88deb5b41429..cf2ce5812489 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -507,6 +507,7 @@ void ovs_vport_send(struct vport *vport, struct sk_buff *skb, u8 mac_proto) } skb->dev = vport->dev; + skb->tstamp = 0; vport->ops->send(skb); return; -- cgit From a786e3195d6af183033e86f0518ffd2c51c0e8ac Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Tue, 17 Aug 2021 19:37:23 +0300 Subject: net: asix: fix uninit value bugs Syzbot reported uninit-value in asix_mdio_read(). The problem was in missing error handling. asix_read_cmd() should initialize passed stack variable smsr, but it can fail in some cases. Then while condidition checks possibly uninit smsr variable. Since smsr is uninitialized stack variable, driver can misbehave, because smsr will be random in case of asix_read_cmd() failure. Fix it by adding error handling and just continue the loop instead of checking uninit value. Added helper function for checking Host_En bit, since wrong loop was used in 4 functions and there is no need in copy-pasting code parts. Cc: Robert Foss Fixes: d9fe64e51114 ("net: asix: Add in_pm parameter") Reported-by: syzbot+a631ec9e717fb0423053@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 70 +++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index ac92bc52a85e..38cda590895c 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -63,6 +63,29 @@ void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, value, index, data, size); } +static int asix_check_host_enable(struct usbnet *dev, int in_pm) +{ + int i, ret; + u8 smsr; + + for (i = 0; i < 30; ++i) { + ret = asix_set_sw_mii(dev, in_pm); + if (ret == -ENODEV || ret == -ETIMEDOUT) + break; + usleep_range(1000, 1100); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, in_pm); + if (ret == -ENODEV) + break; + else if (ret < 0) + continue; + else if (smsr & AX_HOST_EN) + break; + } + + return ret; +} + static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) { /* Reset the variables that have a lifetime outside of @@ -467,19 +490,11 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; - u8 smsr; - int i = 0; int ret; mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 0); - if (ret == -ENODEV || ret == -ETIMEDOUT) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 0); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + + ret = asix_check_host_enable(dev, 0); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; @@ -505,23 +520,14 @@ static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc, { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); - u8 smsr; - int i = 0; int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 0); - if (ret == -ENODEV) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 0); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + ret = asix_check_host_enable(dev, 0); if (ret == -ENODEV) goto out; @@ -561,19 +567,11 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; - u8 smsr; - int i = 0; int ret; mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 1); - if (ret == -ENODEV || ret == -ETIMEDOUT) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 1); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + + ret = asix_check_host_enable(dev, 1); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; @@ -595,22 +593,14 @@ asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); - u8 smsr; - int i = 0; int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); - do { - ret = asix_set_sw_mii(dev, 1); - if (ret == -ENODEV) - break; - usleep_range(1000, 1100); - ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, - 0, 0, 1, &smsr, 1); - } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + + ret = asix_check_host_enable(dev, 1); if (ret == -ENODEV) { mutex_unlock(&dev->phy_mutex); return; -- cgit From 62ef907a045e1a81830941c48004d7af71c9d75a Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 17 Aug 2021 20:43:20 +0800 Subject: iommu/vt-d: Fix PASID reference leak A PASID reference is increased whenever a device is bound to an mm (and its PASID) successfully (i.e. the device's sdev user count is increased). But the reference is not dropped every time the device is unbound successfully from the mm (i.e. the device's sdev user count is decreased). The reference is dropped only once by calling intel_svm_free_pasid() when there isn't any device bound to the mm. intel_svm_free_pasid() drops the reference and only frees the PASID on zero reference. Fix the issue by dropping the PASID reference and freeing the PASID when no reference on successful unbinding the device by calling intel_svm_free_pasid() . Fixes: 4048377414162 ("iommu/vt-d: Use iommu_sva_alloc(free)_pasid() helpers") Signed-off-by: Fenghua Yu Link: https://lore.kernel.org/r/20210813181345.1870742-1-fenghua.yu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210817124321.1517985-2-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 9b0f22bc0514..4b9b3f35ba0e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -675,7 +675,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { - intel_svm_free_pasid(mm); if (svm->notifier.ops) { mmu_notifier_unregister(&svm->notifier, mm); /* Clear mm's pasid. */ @@ -690,6 +689,8 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } + /* Drop a PASID reference and free it if no reference. */ + intel_svm_free_pasid(mm); } out: return ret; -- cgit From 8798d36411196da86e70b994725349c16c1119f6 Mon Sep 17 00:00:00 2001 From: Liu Yi L Date: Tue, 17 Aug 2021 20:43:21 +0800 Subject: iommu/vt-d: Fix incomplete cache flush in intel_pasid_tear_down_entry() This fixes improper iotlb invalidation in intel_pasid_tear_down_entry(). When a PASID was used as nested mode, released and reused, the following error message will appear: [ 180.187556] Unexpected page request in Privilege Mode [ 180.187565] Unexpected page request in Privilege Mode [ 180.279933] Unexpected page request in Privilege Mode [ 180.279937] Unexpected page request in Privilege Mode Per chapter 6.5.3.3 of VT-d spec 3.3, when tear down a pasid entry, the software should use Domain selective IOTLB flush if the PGTT of the pasid entry is SL only or Nested, while for the pasid entries whose PGTT is FL only or PT using PASID-based IOTLB flush is enough. Fixes: 2cd1311a26673 ("iommu/vt-d: Add set domain DOMAIN_ATTR_NESTING attr") Signed-off-by: Kumar Sanjay K Signed-off-by: Liu Yi L Tested-by: Yi Sun Link: https://lore.kernel.org/r/20210817042425.1784279-1-yi.l.liu@intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210817124321.1517985-3-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.c | 10 ++++++++-- drivers/iommu/intel/pasid.h | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c6cf44a6c923..9ec374e17469 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -511,7 +511,7 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, u32 pasid, bool fault_ignore) { struct pasid_entry *pte; - u16 did; + u16 did, pgtt; pte = intel_pasid_get_entry(dev, pasid); if (WARN_ON(!pte)) @@ -521,13 +521,19 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, return; did = pasid_get_domain_id(pte); + pgtt = pasid_pte_get_pgtt(pte); + intel_pasid_clear_entry(dev, pasid, fault_ignore); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte)); pasid_cache_invalidation_with_pasid(iommu, did, pasid); - qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); + + if (pgtt == PASID_ENTRY_PGTT_PT || pgtt == PASID_ENTRY_PGTT_FL_ONLY) + qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); + else + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); /* Device IOTLB doesn't need to be flushed in caching mode. */ if (!cap_caching_mode(iommu->cap)) diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 5ff61c3d401f..c11bc8b833b8 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -99,6 +99,12 @@ static inline bool pasid_pte_is_present(struct pasid_entry *pte) return READ_ONCE(pte->val[0]) & PASID_PTE_PRESENT; } +/* Get PGTT field of a PASID table entry */ +static inline u16 pasid_pte_get_pgtt(struct pasid_entry *pte) +{ + return (u16)((READ_ONCE(pte->val[0]) >> 6) & 0x7); +} + extern unsigned int intel_pasid_max_id; int intel_pasid_alloc_table(struct device *dev); void intel_pasid_free_table(struct device *dev); -- cgit From b8441b288d6031eac21390891ba36487b2cb398b Mon Sep 17 00:00:00 2001 From: Anshuman Gupta Date: Tue, 10 Aug 2021 17:01:12 +0530 Subject: drm/i915: Tweaked Wa_14010685332 for all PCHs dispcnlunit1_cp_xosc_clkreq clock observed to be active on TGL-H platform despite Wa_14010685332 original sequence, thus blocks entry to deeper s0ix state. The Tweaked Wa_14010685332 sequence fixes this issue, therefore use tweaked Wa_14010685332 sequence for every PCH since PCH_CNP. v2: - removed RKL from comment and simplified condition. [Rodrigo] Fixes: b896898c7369 ("drm/i915: Tweaked Wa_14010685332 for PCHs used on gen11 platforms") Cc: Matt Roper Cc: Rodrigo Vivi Cc: Imre Deak Signed-off-by: Anshuman Gupta Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210810113112.31739-2-anshuman.gupta@intel.com (cherry picked from commit 8b46cc6577f4bbef7e5909bb926da31d705f350f) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_display_power.c | 16 ++++++++-------- drivers/gpu/drm/i915/i915_irq.c | 21 --------------------- 2 files changed, 8 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c index 4298ae684d7d..86b7ac7b65ec 100644 --- a/drivers/gpu/drm/i915/display/intel_display_power.c +++ b/drivers/gpu/drm/i915/display/intel_display_power.c @@ -6387,13 +6387,13 @@ void intel_display_power_suspend_late(struct drm_i915_private *i915) if (DISPLAY_VER(i915) >= 11 || IS_GEMINILAKE(i915) || IS_BROXTON(i915)) { bxt_enable_dc9(i915); - /* Tweaked Wa_14010685332:icp,jsp,mcc */ - if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= PCH_MCC) - intel_de_rmw(i915, SOUTH_CHICKEN1, - SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { hsw_enable_pc8(i915); } + + /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */ + if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1) + intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, SBCLK_RUN_REFCLK_DIS); } void intel_display_power_resume_early(struct drm_i915_private *i915) @@ -6402,13 +6402,13 @@ void intel_display_power_resume_early(struct drm_i915_private *i915) IS_BROXTON(i915)) { gen9_sanitize_dc_state(i915); bxt_disable_dc9(i915); - /* Tweaked Wa_14010685332:icp,jsp,mcc */ - if (INTEL_PCH_TYPE(i915) >= PCH_ICP && INTEL_PCH_TYPE(i915) <= PCH_MCC) - intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); - } else if (IS_HASWELL(i915) || IS_BROADWELL(i915)) { hsw_disable_pc8(i915); } + + /* Tweaked Wa_14010685332:cnp,icp,jsp,mcc,tgp,adp */ + if (INTEL_PCH_TYPE(i915) >= PCH_CNP && INTEL_PCH_TYPE(i915) < PCH_DG1) + intel_de_rmw(i915, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); } void intel_display_power_suspend(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index c03943198089..c3816f5c6900 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3064,24 +3064,6 @@ static void valleyview_irq_reset(struct drm_i915_private *dev_priv) spin_unlock_irq(&dev_priv->irq_lock); } -static void cnp_display_clock_wa(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - - /* - * Wa_14010685332:cnp/cmp,tgp,adp - * TODO: Clarify which platforms this applies to - * TODO: Figure out if this workaround can be applied in the s0ix suspend/resume handlers as - * on earlier platforms and whether the workaround is also needed for runtime suspend/resume - */ - if (INTEL_PCH_TYPE(dev_priv) == PCH_CNP || - (INTEL_PCH_TYPE(dev_priv) >= PCH_TGP && INTEL_PCH_TYPE(dev_priv) < PCH_DG1)) { - intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, - SBCLK_RUN_REFCLK_DIS); - intel_uncore_rmw(uncore, SOUTH_CHICKEN1, SBCLK_RUN_REFCLK_DIS, 0); - } -} - static void gen8_display_irq_reset(struct drm_i915_private *dev_priv) { struct intel_uncore *uncore = &dev_priv->uncore; @@ -3115,7 +3097,6 @@ static void gen8_irq_reset(struct drm_i915_private *dev_priv) if (HAS_PCH_SPLIT(dev_priv)) ibx_irq_reset(dev_priv); - cnp_display_clock_wa(dev_priv); } static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) @@ -3159,8 +3140,6 @@ static void gen11_display_irq_reset(struct drm_i915_private *dev_priv) if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP) GEN3_IRQ_RESET(uncore, SDE); - - cnp_display_clock_wa(dev_priv); } static void gen11_irq_reset(struct drm_i915_private *dev_priv) -- cgit From baa2152dae0496e230545051395a83ac434744bd Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 12 Aug 2021 16:23:54 +0300 Subject: drm/i915/edp: fix eDP MSO pipe sanity checks for ADL-P MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ADL-P supports stream splitter on pipe B in addition to pipe A. Update the sanity check in intel_ddi_mso_get_config() to reflect this, and remove the check in intel_ddi_mso_configure() as redundant with encoder->pipe_mask. Abstract the splitter pipe mask to a single point of truth while at it to avoid similar mistakes in the future. Fixes: 7bc188cc2c8c ("drm/i915/adl_p: enable MSO on pipe B") Cc: Uma Shankar Cc: Ville Syrjälä Cc: Swati Sharma Reviewed-by: Swati Sharma Tested-by: Swati Sharma Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210812132354.10885-1-jani.nikula@intel.com (cherry picked from commit f6864b27d6d324771d979694de7ca455afbad32a) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_ddi.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index be716b56e8e0..00dade49665b 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -2463,6 +2463,15 @@ static void intel_ddi_power_up_lanes(struct intel_encoder *encoder, } } +/* Splitter enable for eDP MSO is limited to certain pipes. */ +static u8 intel_ddi_splitter_pipe_mask(struct drm_i915_private *i915) +{ + if (IS_ALDERLAKE_P(i915)) + return BIT(PIPE_A) | BIT(PIPE_B); + else + return BIT(PIPE_A); +} + static void intel_ddi_mso_get_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) { @@ -2480,8 +2489,7 @@ static void intel_ddi_mso_get_config(struct intel_encoder *encoder, if (!pipe_config->splitter.enable) return; - /* Splitter enable is supported for pipe A only. */ - if (drm_WARN_ON(&i915->drm, pipe != PIPE_A)) { + if (drm_WARN_ON(&i915->drm, !(intel_ddi_splitter_pipe_mask(i915) & BIT(pipe)))) { pipe_config->splitter.enable = false; return; } @@ -2513,10 +2521,6 @@ static void intel_ddi_mso_configure(const struct intel_crtc_state *crtc_state) return; if (crtc_state->splitter.enable) { - /* Splitter enable is supported for pipe A only. */ - if (drm_WARN_ON(&i915->drm, pipe != PIPE_A)) - return; - dss1 |= SPLITTER_ENABLE; dss1 |= OVERLAP_PIXELS(crtc_state->splitter.pixel_overlap); if (crtc_state->splitter.link_count == 2) @@ -4743,12 +4747,8 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) dig_port->hpd_pulse = intel_dp_hpd_pulse; - /* Splitter enable for eDP MSO is limited to certain pipes. */ - if (dig_port->dp.mso_link_count) { - encoder->pipe_mask = BIT(PIPE_A); - if (IS_ALDERLAKE_P(dev_priv)) - encoder->pipe_mask |= BIT(PIPE_B); - } + if (dig_port->dp.mso_link_count) + encoder->pipe_mask = intel_ddi_splitter_pipe_mask(dev_priv); } /* In theory we don't need the encoder->type check, but leave it just in -- cgit From e3e86f41385b2b5d5f771de6009748b584334366 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 16 Aug 2021 10:17:37 +0300 Subject: drm/i915/dp: remove superfluous EXPORT_SYMBOL() The symbol isn't needed outside of i915.ko. Fixes: b30edfd8d0b4 ("drm/i915: Switch to LTTPR non-transparent mode link training") Fixes: 264613b406eb ("drm/i915: Disable LTTPR support when the DPCD rev < 1.4") Cc: Imre Deak Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210816071737.2917-1-jani.nikula@intel.com (cherry picked from commit d8959fb33890ba1956c142e83398e89812450ffc) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/display/intel_dp_link_training.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 08bceae40aa8..053a3c2f7267 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -206,7 +206,6 @@ int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp) return lttpr_count; } -EXPORT_SYMBOL(intel_dp_init_lttpr_and_dprx_caps); static u8 dp_voltage_max(u8 preemph) { -- cgit From 3f78c90f9eb2e228f44ecc8f4377753f0e11dbab Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 16 Aug 2021 11:57:11 -0700 Subject: powerpc/xive: Do not mark xive_request_ipi() as __init Compiling ppc64le_defconfig with clang-14 shows a modpost warning: WARNING: modpost: vmlinux.o(.text+0xa74e0): Section mismatch in reference from the function xive_setup_cpu_ipi() to the function .init.text:xive_request_ipi() The function xive_setup_cpu_ipi() references the function __init xive_request_ipi(). This is often because xive_setup_cpu_ipi lacks a __init annotation or the annotation of xive_request_ipi is wrong. xive_request_ipi() is called from xive_setup_cpu_ipi(), which is not __init, so xive_request_ipi() should not be marked __init. Remove the attribute so there is no more warning. Fixes: cbc06f051c52 ("powerpc/xive: Do not skip CPU-less nodes when creating the IPIs") Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210816185711.21563-1-nathan@kernel.org --- arch/powerpc/sysdev/xive/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 943fd30095af..8183ca343675 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -1170,7 +1170,7 @@ out: return ret; } -static int __init xive_request_ipi(unsigned int cpu) +static int xive_request_ipi(unsigned int cpu) { struct xive_ipi_desc *xid = &xive_ipis[early_cpu_to_node(cpu)]; int ret; -- cgit From 5571ea3117ca22849072adb58074fb5a2fd12c00 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 16 Aug 2021 17:46:32 +0200 Subject: usb: typec: tcpm: Fix VDMs sometimes not being forwarded to alt-mode drivers Commit a20dcf53ea98 ("usb: typec: tcpm: Respond Not_Supported if no snk_vdo"), stops tcpm_pd_data_request() calling tcpm_handle_vdm_request() when port->nr_snk_vdo is not set. But the VDM might be intended for an altmode-driver, in which case nr_snk_vdo does not matter. This change breaks the forwarding of connector hotplug (HPD) events for displayport altmode on devices which don't set nr_snk_vdo. tcpm_pd_data_request() is the only caller of tcpm_handle_vdm_request(), so we can move the nr_snk_vdo check to inside it, at which point we have already looked up the altmode device so we can check for this too. Doing this check here also ensures that vdm_state gets set to VDM_STATE_DONE if it was VDM_STATE_BUSY, even if we end up with responding with PD_MSG_CTRL_NOT_SUPP later. Note that tcpm_handle_vdm_request() was already sending PD_MSG_CTRL_NOT_SUPP in some circumstances, after moving the nr_snk_vdo check the same error-path is now taken when that check fails. So that we have only one error-path for this and not two. Replace the tcpm_queue_message(PD_MSG_CTRL_NOT_SUPP) used by the existing error-path with the more robust tcpm_pd_handle_msg() from the (now removed) second error-path. Fixes: a20dcf53ea98 ("usb: typec: tcpm: Respond Not_Supported if no snk_vdo") Cc: stable Cc: Kyle Tso Acked-by: Heikki Krogerus Acked-by: Kyle Tso Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210816154632.381968-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index b9bb63d749ec..f4079b5cb26d 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1737,6 +1737,10 @@ static int tcpm_pd_svdm(struct tcpm_port *port, struct typec_altmode *adev, return rlen; } +static void tcpm_pd_handle_msg(struct tcpm_port *port, + enum pd_msg_request message, + enum tcpm_ams ams); + static void tcpm_handle_vdm_request(struct tcpm_port *port, const __le32 *payload, int cnt) { @@ -1764,11 +1768,11 @@ static void tcpm_handle_vdm_request(struct tcpm_port *port, port->vdm_state = VDM_STATE_DONE; } - if (PD_VDO_SVDM(p[0])) { + if (PD_VDO_SVDM(p[0]) && (adev || tcpm_vdm_ams(port) || port->nr_snk_vdo)) { rlen = tcpm_pd_svdm(port, adev, p, cnt, response, &adev_action); } else { if (port->negotiated_rev >= PD_REV30) - tcpm_queue_message(port, PD_MSG_CTRL_NOT_SUPP); + tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); } /* @@ -2471,10 +2475,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port, NONE_AMS); break; case PD_DATA_VENDOR_DEF: - if (tcpm_vdm_ams(port) || port->nr_snk_vdo) - tcpm_handle_vdm_request(port, msg->payload, cnt); - else if (port->negotiated_rev > PD_REV20) - tcpm_pd_handle_msg(port, PD_MSG_CTRL_NOT_SUPP, NONE_AMS); + tcpm_handle_vdm_request(port, msg->payload, cnt); break; case PD_DATA_BIST: port->bist_request = le32_to_cpu(msg->payload[0]); -- cgit From 1e35b8a7780a0c043cc5389420f069b69343f5d9 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 18 Aug 2021 18:44:35 +0200 Subject: platform/x86: gigabyte-wmi: add support for B450M S2H V2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported as working here: https://github.com/t-8ch/linux-gigabyte-wmi-driver/issues/1#issuecomment-901207693 Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20210818164435.99821-1-linux@weissschuh.net Signed-off-by: Hans de Goede --- drivers/platform/x86/gigabyte-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 9e8cfac403d3..7f3a03f937f6 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -140,6 +140,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) }} static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), -- cgit From 3b844826b6c6affa80755254da322b017358a2f4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 5 Aug 2021 10:04:43 -0700 Subject: pipe: avoid unnecessary EPOLLET wakeups under normal loads I had forgotten just how sensitive hackbench is to extra pipe wakeups, and commit 3a34b13a88ca ("pipe: make pipe writes always wake up readers") ended up causing a quite noticeable regression on larger machines. Now, hackbench isn't necessarily a hugely meaningful benchmark, and it's not clear that this matters in real life all that much, but as Mel points out, it's used often enough when comparing kernels and so the performance regression shows up like a sore thumb. It's easy enough to fix at least for the common cases where pipes are used purely for data transfer, and you never have any exciting poll usage at all. So set a special 'poll_usage' flag when there is polling activity, and make the ugly "EPOLLET has crazy legacy expectations" semantics explicit to only that case. I would love to limit it to just the broken EPOLLET case, but the pipe code can't see the difference between epoll and regular select/poll, so any non-read/write waiting will trigger the extra wakeup behavior. That is sufficient for at least the hackbench case. Apart from making the odd extra wakeup cases more explicitly about EPOLLET, this also makes the extra wakeup be at the _end_ of the pipe write, not at the first write chunk. That is actually much saner semantics (as much as you can call any of the legacy edge-triggered expectations for EPOLLET "sane") since it means that you know the wakeup will happen once the write is done, rather than possibly in the middle of one. [ For stable people: I'm putting a "Fixes" tag on this, but I leave it up to you to decide whether you actually want to backport it or not. It likely has no impact outside of synthetic benchmarks - Linus ] Link: https://lore.kernel.org/lkml/20210802024945.GA8372@xsang-OptiPlex-9020/ Fixes: 3a34b13a88ca ("pipe: make pipe writes always wake up readers") Reported-by: kernel test robot Tested-by: Sandeep Patil Tested-by: Mel Gorman Signed-off-by: Linus Torvalds --- fs/pipe.c | 15 +++++++++------ include/linux/pipe_fs_i.h | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 8e6ef62aeb1c..678dee2a8228 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -444,9 +444,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Epoll nonsensically wants a wakeup whether the pipe - * was already empty or not. - * * If it wasn't empty we try to merge new data into * the last buffer. * @@ -455,9 +452,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * spanning multiple pages. */ head = pipe->head; - was_empty = true; + was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); - if (chars && !pipe_empty(head, pipe->tail)) { + if (chars && !was_empty) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; @@ -590,8 +587,11 @@ out: * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs + * + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. */ - if (was_empty) { + if (was_empty || pipe->poll_usage) { wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -654,6 +654,9 @@ pipe_poll(struct file *filp, poll_table *wait) struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; + /* Epoll has some historical nasty semantics, this enables them */ + pipe->poll_usage = 1; + /* * Reading pipe state only -- no need for acquiring the semaphore. * diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5d2705f1d01c..fc5642431b92 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -48,6 +48,7 @@ struct pipe_buffer { * @files: number of struct file referring this pipe (protected by ->i_lock) * @r_counter: reader counter * @w_counter: writer counter + * @poll_usage: is this pipe used for epoll, which has crazy wakeups? * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers @@ -70,6 +71,7 @@ struct pipe_inode_info { unsigned int files; unsigned int r_counter; unsigned int w_counter; + unsigned int poll_usage; struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; -- cgit From 37717b8c9f0e8c4dd73fc522769cc14649b4f657 Mon Sep 17 00:00:00 2001 From: Zhan Liu Date: Fri, 13 Aug 2021 08:31:04 -0700 Subject: drm/amd/display: Use DCN30 watermark calc for DCN301 [why] dcn301_calculate_wm_and_dl() causes flickering when external monitor is connected. This issue has been fixed before by commit 0e4c0ae59d7e ("drm/amdgpu/display: drop dcn301_calculate_wm_and_dl for now"), however part of the fix was gone after commit 2cbcb78c9ee5 ("Merge tag 'amd-drm-next-5.13-2021-03-23' of https://gitlab.freedesktop.org/agd5f/linux into drm-next"). [how] Use dcn30_calculate_wm_and_dlg() instead as in the original fix. Fixes: 2cbcb78c9ee5 ("Merge tag 'amd-drm-next-5.13-2021-03-23' of https://gitlab.freedesktop.org/agd5f/linux into drm-next") Signed-off-by: Nikola Cornij Reviewed-by: Zhan Liu Tested-by: Zhan Liu Tested-by: Oliver Logush Signed-off-by: Zhan Liu Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/dcn301/dcn301_resource.c | 96 +--------------------- 1 file changed, 1 insertion(+), 95 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 9776d1737818..912285fdce18 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1622,106 +1622,12 @@ static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30); } -static void calculate_wm_set_for_vlevel( - int vlevel, - struct wm_range_table_entry *table_entry, - struct dcn_watermarks *wm_set, - struct display_mode_lib *dml, - display_e2e_pipe_params_st *pipes, - int pipe_cnt) -{ - double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us; - - ASSERT(vlevel < dml->soc.num_states); - /* only pipe 0 is read for voltage and dcf/soc clocks */ - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz; - pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz; - - dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us; - dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us; - dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us; - - wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000; - wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000; - wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000; - wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000; - wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000; - wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000; - wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000; - wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000; - dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached; - -} - -static void dcn301_calculate_wm_and_dlg( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel_req) -{ - int i, pipe_idx; - int vlevel, vlevel_max; - struct wm_range_table_entry *table_entry; - struct clk_bw_params *bw_params = dc->clk_mgr->bw_params; - - ASSERT(bw_params); - - vlevel_max = bw_params->clk_table.num_entries - 1; - - /* WM Set D */ - table_entry = &bw_params->wm_table.entries[WM_D]; - if (table_entry->wm_type == WM_TYPE_RETRAINING) - vlevel = 0; - else - vlevel = vlevel_max; - calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d, - &context->bw_ctx.dml, pipes, pipe_cnt); - /* WM Set C */ - table_entry = &bw_params->wm_table.entries[WM_C]; - vlevel = min(max(vlevel_req, 2), vlevel_max); - calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c, - &context->bw_ctx.dml, pipes, pipe_cnt); - /* WM Set B */ - table_entry = &bw_params->wm_table.entries[WM_B]; - vlevel = min(max(vlevel_req, 1), vlevel_max); - calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b, - &context->bw_ctx.dml, pipes, pipe_cnt); - - /* WM Set A */ - table_entry = &bw_params->wm_table.entries[WM_A]; - vlevel = min(vlevel_req, vlevel_max); - calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a, - &context->bw_ctx.dml, pipes, pipe_cnt); - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); -} - static struct resource_funcs dcn301_res_pool_funcs = { .destroy = dcn301_destroy_resource_pool, .link_enc_create = dcn301_link_encoder_create, .panel_cntl_create = dcn301_panel_cntl_create, .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg, + .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, .update_soc_for_wm_a = dcn30_update_soc_for_wm_a, .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, -- cgit From c1930148a3941f891ddbd76fceaa4e10a957ccf2 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 17 Aug 2021 19:04:25 +0300 Subject: net: mscc: ocelot: allow forwarding from bridge ports to the tag_8021q CPU port Currently we are unable to ping a bridge on top of a felix switch which uses the ocelot-8021q tagger. The packets are dropped on the ingress of the user port and the 'drop_local' counter increments (the counter which denotes drops due to no valid destinations). Dumping the PGID tables, it becomes clear that the PGID_SRC of the user port is zero, so it has no valid destinations. But looking at the code, the cpu_fwd_mask (the bit mask of DSA tag_8021q ports) is clearly missing from the forwarding mask of ports that are under a bridge. So this has always been broken. Looking at the version history of the patch, in v7 https://patchwork.kernel.org/project/netdevbpf/patch/20210125220333.1004365-12-olteanv@gmail.com/ the code looked like this: /* Standalone ports forward only to DSA tag_8021q CPU ports */ unsigned long mask = cpu_fwd_mask; (...) } else if (ocelot->bridge_fwd_mask & BIT(port)) { mask |= ocelot->bridge_fwd_mask & ~BIT(port); while in v8 (the merged version) https://patchwork.kernel.org/project/netdevbpf/patch/20210129010009.3959398-12-olteanv@gmail.com/ it looked like this: unsigned long mask; (...) } else if (ocelot->bridge_fwd_mask & BIT(port)) { mask = ocelot->bridge_fwd_mask & ~BIT(port); So the breakage was introduced between v7 and v8 of the patch. Fixes: e21268efbe26 ("net: dsa: felix: perform switch setup for tag_8021q") Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20210817160425.3702809-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mscc/ocelot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index adfb9781799e..2948d731a1c1 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1334,6 +1334,7 @@ void ocelot_apply_bridge_fwd_mask(struct ocelot *ocelot) struct net_device *bond = ocelot_port->bond; mask = ocelot_get_bridge_fwd_mask(ocelot, bridge); + mask |= cpu_fwd_mask; mask &= ~BIT(port); if (bond) { mask &= ~ocelot_get_bond_mask(ocelot, bond, -- cgit From fb4b1373dcab086d0619c29310f0466a0b2ceb8a Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 17 Aug 2021 10:04:37 -0700 Subject: net/rds: dma_map_sg is entitled to merge entries Function "dma_map_sg" is entitled to merge adjacent entries and return a value smaller than what was passed as "nents". Subsequently "ib_map_mr_sg" needs to work with this value ("sg_dma_len") rather than the original "nents" parameter ("sg_len"). This old RDS bug was exposed and reliably causes kernel panics (using RDMA operations "rds-stress -D") on x86_64 starting with: commit c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Simply put: Linux 5.11 and later. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/60efc69f-1f35-529d-a7ef-da0549cad143@oracle.com Signed-off-by: Jakub Kicinski --- net/rds/ib_frmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 9b6ffff72f2d..28c1b0022178 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; if (cmpxchg(&frmr->fr_state, -- cgit From ef486bf448a057a6e2d50e40ae879f7add6585da Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 18 Aug 2021 06:49:29 +0000 Subject: powerpc/32s: Fix random crashes by adding isync() after locking/unlocking KUEP Commit b5efec00b671 ("powerpc/32s: Move KUEP locking/unlocking in C") removed the 'isync' instruction after adding/removing NX bit in user segments. The reasoning behind this change was that when setting the NX bit we don't mind it taking effect with delay as the kernel never executes text from userspace, and when clearing the NX bit this is to return to userspace and then the 'rfi' should synchronise the context. However, it looks like on book3s/32 having a hash page table, at least on the G3 processor, we get an unexpected fault from userspace, then this is followed by something wrong in the verification of MSR_PR at end of another interrupt. This is fixed by adding back the removed isync() following update of NX bit in user segment registers. Only do it for cores with an hash table, as 603 cores don't exhibit that problem and the two isync increase ./null_syscall selftest by 6 cycles on an MPC 832x. First problem: unexpected WARN_ON() for mysterious PROTFAULT WARNING: CPU: 0 PID: 1660 at arch/powerpc/mm/fault.c:354 do_page_fault+0x6c/0x5b0 Modules linked in: CPU: 0 PID: 1660 Comm: Xorg Not tainted 5.13.0-pmac-00028-gb3c15b60339a #40 NIP: c001b5c8 LR: c001b6f8 CTR: 00000000 REGS: e2d09e40 TRAP: 0700 Not tainted (5.13.0-pmac-00028-gb3c15b60339a) MSR: 00021032 CR: 42d04f30 XER: 20000000 GPR00: c000424c e2d09f00 c301b680 e2d09f40 0000001e 42000000 00cba028 00000000 GPR08: 08000000 48000010 c301b680 e2d09f30 22d09f30 00c1fff0 00cba000 a7b7ba4c GPR16: 00000031 00000000 00000000 00000000 00000000 00000000 a7b7b0d0 00c5c010 GPR24: a7b7b64c a7b7d2f0 00000004 00000000 c1efa6c0 00cba02c 00000300 e2d09f40 NIP [c001b5c8] do_page_fault+0x6c/0x5b0 LR [c001b6f8] do_page_fault+0x19c/0x5b0 Call Trace: [e2d09f00] [e2d09f04] 0xe2d09f04 (unreliable) [e2d09f30] [c000424c] DataAccess_virt+0xd4/0xe4 --- interrupt: 300 at 0xa7a261dc NIP: a7a261dc LR: a7a253bc CTR: 00000000 REGS: e2d09f40 TRAP: 0300 Not tainted (5.13.0-pmac-00028-gb3c15b60339a) MSR: 0000d032 CR: 228428e2 XER: 20000000 DAR: 00cba02c DSISR: 42000000 GPR00: a7a27448 afa6b0e0 a74c35c0 a7b7b614 0000001e a7b7b614 00cba028 00000000 GPR08: 00020fd9 00000031 00cb9ff8 a7a273b0 220028e2 00c1fff0 00cba000 a7b7ba4c GPR16: 00000031 00000000 00000000 00000000 00000000 00000000 a7b7b0d0 00c5c010 GPR24: a7b7b64c a7b7d2f0 00000004 00000002 0000001e a7b7b614 a7b7aff4 00000030 NIP [a7a261dc] 0xa7a261dc LR [a7a253bc] 0xa7a253bc --- interrupt: 300 Instruction dump: 7c4a1378 810300a0 75278410 83820298 83a300a4 553b018c 551e0036 4082038c 2e1b0000 40920228 75280800 41820220 <0fe00000> 3b600000 41920214 81420594 Second problem: MSR PR is seen unset allthough the interrupt frame shows it set kernel BUG at arch/powerpc/kernel/interrupt.c:458! Oops: Exception in kernel mode, sig: 5 [#1] BE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2 PowerMac Modules linked in: CPU: 0 PID: 1660 Comm: Xorg Tainted: G W 5.13.0-pmac-00028-gb3c15b60339a #40 NIP: c0011434 LR: c001629c CTR: 00000000 REGS: e2d09e70 TRAP: 0700 Tainted: G W (5.13.0-pmac-00028-gb3c15b60339a) MSR: 00029032 CR: 42d09f30 XER: 00000000 GPR00: 00000000 e2d09f30 c301b680 e2d09f40 83440000 c44d0e68 e2d09e8c 00000000 GPR08: 00000002 00dc228a 00004000 e2d09f30 22d09f30 00c1fff0 afa6ceb4 00c26144 GPR16: 00c25fb8 00c26140 afa6ceb8 90000000 00c944d8 0000001c 00000000 00200000 GPR24: 00000000 000001fb afa6d1b4 00000001 00000000 a539a2a0 a530fd80 00000089 NIP [c0011434] interrupt_exit_kernel_prepare+0x10/0x70 LR [c001629c] interrupt_return+0x9c/0x144 Call Trace: [e2d09f30] [c000424c] DataAccess_virt+0xd4/0xe4 (unreliable) --- interrupt: 300 at 0xa09be008 NIP: a09be008 LR: a09bdfe8 CTR: a09bdfc0 REGS: e2d09f40 TRAP: 0300 Tainted: G W (5.13.0-pmac-00028-gb3c15b60339a) MSR: 0000d032 CR: 420028e2 XER: 20000000 DAR: a539a308 DSISR: 0a000000 GPR00: a7b90d50 afa6b2d0 a74c35c0 a0a8b690 a0a8b698 a5365d70 a4fa82a8 00000004 GPR08: 00000000 a09bdfc0 00000000 a5360000 a09bde7c 00c1fff0 afa6ceb4 00c26144 GPR16: 00c25fb8 00c26140 afa6ceb8 90000000 00c944d8 0000001c 00000000 00200000 GPR24: 00000000 000001fb afa6d1b4 00000001 00000000 a539a2a0 a530fd80 00000089 NIP [a09be008] 0xa09be008 LR [a09bdfe8] 0xa09bdfe8 --- interrupt: 300 Instruction dump: 80010024 83e1001c 7c0803a6 4bffff80 3bc00800 4bffffd0 486b42fd 4bffffcc 81430084 71480002 41820038 554a0462 <0f0a0000> 80620060 74630001 40820034 Fixes: b5efec00b671 ("powerpc/32s: Move KUEP locking/unlocking in C") Cc: stable@vger.kernel.org # v5.13+ Reported-by: Stan Johnson Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/4856f5574906e2aec0522be17bf3848a22b2cd0b.1629269345.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 64201125a287..d4b145b279f6 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -4,6 +4,8 @@ #include #include +#include +#include #ifndef __ASSEMBLY__ @@ -28,6 +30,15 @@ static inline void kuep_lock(void) return; update_user_segments(mfsr(0) | SR_NX); + /* + * This isync() shouldn't be necessary as the kernel is not excepted to + * run any instruction in userspace soon after the update of segments, + * but hash based cores (at least G3) seem to exhibit a random + * behaviour when the 'isync' is not there. 603 cores don't have this + * behaviour so don't do the 'isync' as it saves several CPU cycles. + */ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + isync(); /* Context sync required after mtsr() */ } static inline void kuep_unlock(void) @@ -36,6 +47,15 @@ static inline void kuep_unlock(void) return; update_user_segments(mfsr(0) & ~SR_NX); + /* + * This isync() shouldn't be necessary as a 'rfi' will soon be executed + * to return to userspace, but hash based cores (at least G3) seem to + * exhibit a random behaviour when the 'isync' is not there. 603 cores + * don't have this behaviour so don't do the 'isync' as it saves several + * CPU cycles. + */ + if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) + isync(); /* Context sync required after mtsr() */ } #ifdef CONFIG_PPC_KUAP -- cgit From 9f7853d7609d59172eecfc5e7ccf503bc1b690bd Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 15 Aug 2021 14:10:24 +1000 Subject: powerpc/mm: Fix set_memory_*() against concurrent accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Laurent reported that STRICT_MODULE_RWX was causing intermittent crashes on one of his systems: kernel tried to execute exec-protected page (c008000004073278) - exploit attempt? (uid: 0) BUG: Unable to handle kernel instruction fetch Faulting instruction address: 0xc008000004073278 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: drm virtio_console fuse drm_panel_orientation_quirks ... CPU: 3 PID: 44 Comm: kworker/3:1 Not tainted 5.14.0-rc4+ #12 Workqueue: events control_work_handler [virtio_console] NIP: c008000004073278 LR: c008000004073278 CTR: c0000000001e9de0 REGS: c00000002e4ef7e0 TRAP: 0400 Not tainted (5.14.0-rc4+) MSR: 800000004280b033 CR: 24002822 XER: 200400cf ... NIP fill_queue+0xf0/0x210 [virtio_console] LR fill_queue+0xf0/0x210 [virtio_console] Call Trace: fill_queue+0xb4/0x210 [virtio_console] (unreliable) add_port+0x1a8/0x470 [virtio_console] control_work_handler+0xbc/0x1e8 [virtio_console] process_one_work+0x290/0x590 worker_thread+0x88/0x620 kthread+0x194/0x1a0 ret_from_kernel_thread+0x5c/0x64 Jordan, Fabiano & Murilo were able to reproduce and identify that the problem is caused by the call to module_enable_ro() in do_init_module(), which happens after the module's init function has already been called. Our current implementation of change_page_attr() is not safe against concurrent accesses, because it invalidates the PTE before flushing the TLB and then installing the new PTE. That leaves a window in time where there is no valid PTE for the page, if another CPU tries to access the page at that time we see something like the fault above. We can't simply switch to set_pte_at()/flush TLB, because our hash MMU code doesn't handle a set_pte_at() of a valid PTE. See [1]. But we do have pte_update(), which replaces the old PTE with the new, meaning there's no window where the PTE is invalid. And the hash MMU version hash__pte_update() deals with synchronising the hash page table correctly. [1]: https://lore.kernel.org/linuxppc-dev/87y318wp9r.fsf@linux.ibm.com/ Fixes: 1f9ad21c3b38 ("powerpc/mm: Implement set_memory() routines") Reported-by: Laurent Vivier Reviewed-by: Christophe Leroy Reviewed-by: Murilo Opsfelder Araújo Tested-by: Laurent Vivier Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210818120518.3603172-1-mpe@ellerman.id.au --- arch/powerpc/mm/pageattr.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 0876216ceee6..edea388e9d3f 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -18,16 +18,12 @@ /* * Updates the attributes of a page in three steps: * - * 1. invalidate the page table entry - * 2. flush the TLB - * 3. install the new entry with the updated attributes - * - * Invalidating the pte means there are situations where this will not work - * when in theory it should. - * For example: - * - removing write from page whilst it is being executed - * - setting a page read-only whilst it is being read by another CPU + * 1. take the page_table_lock + * 2. install the new entry with the updated attributes + * 3. flush the TLB * + * This sequence is safe against concurrent updates, and also allows updating the + * attributes of a page currently being executed or accessed. */ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) { @@ -36,9 +32,7 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) spin_lock(&init_mm.page_table_lock); - /* invalidate the PTE so it's safe to modify */ - pte = ptep_get_and_clear(&init_mm, addr, ptep); - flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + pte = ptep_get(ptep); /* modify the PTE bits as desired, then apply */ switch (action) { @@ -59,11 +53,14 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) break; } - set_pte_at(&init_mm, addr, ptep, pte); + pte_update(&init_mm, addr, ptep, ~0UL, pte_val(pte), 0); /* See ptesync comment in radix__set_pte_at() */ if (radix_enabled()) asm volatile("ptesync": : :"memory"); + + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + spin_unlock(&init_mm.page_table_lock); return 0; -- cgit From bde8fff82e4a4b0f000dbf4d5eadab2079be0b56 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 18 Aug 2021 17:15:35 +0100 Subject: arm64: initialize all of CNTHCTL_EL2 In __init_el2_timers we initialize CNTHCTL_EL2.{EL1PCEN,EL1PCTEN} with a RMW sequence, leaving all other bits UNKNOWN. In general, we should initialize all bits in a register rather than using an RMW sequence, since most bits are UNKNOWN out of reset, and as new bits are added to the reigster their reset value might not result in expected behaviour. In the case of CNTHCTL_EL2, FEAT_ECV added a number of new control bits in previously RES0 bits, which reset to UNKNOWN values, and may cause issues for EL1 and EL0: * CNTHCTL_EL2.ECV enables the CNTPOFF_EL2 offset (which itself resets to an UNKNOWN value) at EL0 and EL1. Since the offset could reset to distinct values across CPUs, when the control bit resets to 1 this could break timekeeping generally. * CNTHCTL_EL2.{EL1TVT,EL1TVCT} trap EL0 and EL1 accesses to the EL1 virtual timer/counter registers to EL2. When reset to 1, this could cause unexpected traps to EL2. Initializing these bits to zero avoids these problems, and all other bits in CNTHCTL_EL2 other than EL1PCEN and EL1PCTEN can safely be reset to zero. This patch ensures we initialize CNTHCTL_EL2 accordingly, only setting EL1PCEN and EL1PCTEN, and setting all other bits to zero. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: Oliver Upton Cc: Will Deacon Reviewed-by: Oliver Upton Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20210818161535.52786-1-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/el2_setup.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 21fa330f498d..b83fb24954b7 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -33,8 +33,7 @@ * EL2. */ .macro __init_el2_timers - mrs x0, cnthctl_el2 - orr x0, x0, #3 // Enable EL1 physical timers + mov x0, #3 // Enable EL1 physical timers msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset .endm -- cgit From a0eea5f10eeb5180d115452b0d77afa6603dfe18 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 18 Aug 2021 16:42:36 -0700 Subject: mptcp: fix memory leak on address flush The endpoint cleanup path is prone to a memory leak, as reported by syzkaller: BUG: memory leak unreferenced object 0xffff88810680ea00 (size 64): comm "syz-executor.6", pid 6191, jiffies 4295756280 (age 24.138s) hex dump (first 32 bytes): 58 75 7d 3c 80 88 ff ff 22 01 00 00 00 00 ad de Xu}<...."....... 01 00 02 00 00 00 00 00 ac 1e 00 07 00 00 00 00 ................ backtrace: [<0000000072a9f72a>] kmalloc include/linux/slab.h:591 [inline] [<0000000072a9f72a>] mptcp_nl_cmd_add_addr+0x287/0x9f0 net/mptcp/pm_netlink.c:1170 [<00000000f6e931bf>] genl_family_rcv_msg_doit.isra.0+0x225/0x340 net/netlink/genetlink.c:731 [<00000000f1504a2c>] genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] [<00000000f1504a2c>] genl_rcv_msg+0x341/0x5b0 net/netlink/genetlink.c:792 [<0000000097e76f6a>] netlink_rcv_skb+0x148/0x430 net/netlink/af_netlink.c:2504 [<00000000ceefa2b8>] genl_rcv+0x24/0x40 net/netlink/genetlink.c:803 [<000000008ff91aec>] netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] [<000000008ff91aec>] netlink_unicast+0x537/0x750 net/netlink/af_netlink.c:1340 [<0000000041682c35>] netlink_sendmsg+0x846/0xd80 net/netlink/af_netlink.c:1929 [<00000000df3aa8e7>] sock_sendmsg_nosec net/socket.c:704 [inline] [<00000000df3aa8e7>] sock_sendmsg+0x14e/0x190 net/socket.c:724 [<000000002154c54c>] ____sys_sendmsg+0x709/0x870 net/socket.c:2403 [<000000001aab01d7>] ___sys_sendmsg+0xff/0x170 net/socket.c:2457 [<00000000fa3b1446>] __sys_sendmsg+0xe5/0x1b0 net/socket.c:2486 [<00000000db2ee9c7>] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [<00000000db2ee9c7>] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 [<000000005873517d>] entry_SYSCALL_64_after_hwframe+0x44/0xae We should not require an allocation to cleanup stuff. Rework the code a bit so that the additional RCU work is no more needed. Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 44 ++++++++++++-------------------------------- 1 file changed, 12 insertions(+), 32 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 56263c2c4014..7b3794459783 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1135,36 +1135,12 @@ next: return 0; } -struct addr_entry_release_work { - struct rcu_work rwork; - struct mptcp_pm_addr_entry *entry; -}; - -static void mptcp_pm_release_addr_entry(struct work_struct *work) +/* caller must ensure the RCU grace period is already elapsed */ +static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) { - struct addr_entry_release_work *w; - struct mptcp_pm_addr_entry *entry; - - w = container_of(to_rcu_work(work), struct addr_entry_release_work, rwork); - entry = w->entry; - if (entry) { - if (entry->lsk) - sock_release(entry->lsk); - kfree(entry); - } - kfree(w); -} - -static void mptcp_pm_free_addr_entry(struct mptcp_pm_addr_entry *entry) -{ - struct addr_entry_release_work *w; - - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - INIT_RCU_WORK(&w->rwork, mptcp_pm_release_addr_entry); - w->entry = entry; - queue_rcu_work(system_wq, &w->rwork); - } + if (entry->lsk) + sock_release(entry->lsk); + kfree(entry); } static int mptcp_nl_remove_id_zero_address(struct net *net, @@ -1244,7 +1220,8 @@ static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) spin_unlock_bh(&pernet->lock); mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), &entry->addr); - mptcp_pm_free_addr_entry(entry); + synchronize_rcu(); + __mptcp_pm_release_addr_entry(entry); return ret; } @@ -1297,6 +1274,7 @@ static void mptcp_nl_remove_addrs_list(struct net *net, } } +/* caller must ensure the RCU grace period is already elapsed */ static void __flush_addrs(struct list_head *list) { while (!list_empty(list)) { @@ -1305,7 +1283,7 @@ static void __flush_addrs(struct list_head *list) cur = list_entry(list->next, struct mptcp_pm_addr_entry, list); list_del_rcu(&cur->list); - mptcp_pm_free_addr_entry(cur); + __mptcp_pm_release_addr_entry(cur); } } @@ -1329,6 +1307,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1); spin_unlock_bh(&pernet->lock); mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); + synchronize_rcu(); __flush_addrs(&free_list); return 0; } @@ -1939,7 +1918,8 @@ static void __net_exit pm_nl_exit_net(struct list_head *net_list) struct pm_nl_pernet *pernet = net_generic(net, pm_nl_pernet_id); /* net is removed from namespace list, can't race with - * other modifiers + * other modifiers, also netns core already waited for a + * RCU grace period. */ __flush_addrs(&pernet->local_addr_list); } -- cgit From 67b12f792d5eaeb8b4fca3b2053e6b819eb3bf0f Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Wed, 18 Aug 2021 16:42:37 -0700 Subject: mptcp: full fully established support after ADD_ADDR If directly after an MP_CAPABLE 3WHS, the client receives an ADD_ADDR with HMAC from the server, it is enough to switch to a "fully established" mode because it has received more MPTCP options. It was then OK to enable the "fully_established" flag on the MPTCP socket. Still, best to check if the ADD_ADDR looks valid by looking if it contains an HMAC (no 'echo' bit). If an ADD_ADDR echo is received while we are not in "fully established" mode, it is strange and then we should not switch to this mode now. But that is not enough. On one hand, the path-manager has be notified the state has changed. On the other hand, the "fully_established" flag on the subflow socket should be turned on as well not to re-send the MP_CAPABLE 3rd ACK content with the next ACK. Fixes: 84dfe3677a6f ("mptcp: send out dedicated ADD_ADDR packet") Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4452455aef7f..7adcbc1f7d49 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -885,20 +885,16 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, return subflow->mp_capable; } - if (mp_opt->dss && mp_opt->use_ack) { + if ((mp_opt->dss && mp_opt->use_ack) || + (mp_opt->add_addr && !mp_opt->echo)) { /* subflows are fully established as soon as we get any - * additional ack. + * additional ack, including ADD_ADDR. */ subflow->fully_established = 1; WRITE_ONCE(msk->fully_established, true); goto fully_established; } - if (mp_opt->add_addr) { - WRITE_ONCE(msk->fully_established, true); - return true; - } - /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP. Fallback scenarios requires a reset for * MP_JOIN subflows. -- cgit From a876a33d2a1102f99fc782fefb784f4dd4841d8c Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 19 Aug 2021 11:05:36 +0800 Subject: r8152: fix writing USB_BP2_EN The register of USB_BP2_EN is 16 bits, so we should use ocp_write_word(), not ocp_write_byte(). Fixes: 9370f2d05a2a ("support request_firmware for RTL8153") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index e09b107b5c99..3fd17b6dc61d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3965,7 +3965,7 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) case RTL_VER_15: default: if (type == MCU_TYPE_USB) { - ocp_write_byte(tp, MCU_TYPE_USB, USB_BP2_EN, 0); + ocp_write_word(tp, MCU_TYPE_USB, USB_BP2_EN, 0); ocp_write_word(tp, MCU_TYPE_USB, USB_BP_8, 0); ocp_write_word(tp, MCU_TYPE_USB, USB_BP_9, 0); -- cgit From 6633fb83f1faddbfcac09e35edcae96bd0468335 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Thu, 19 Aug 2021 11:05:37 +0800 Subject: r8152: fix the maximum number of PLA bp for RTL8153C The maximum PLA bp number of RTL8153C is 16, not 8. That is, the bp 0 ~ 15 are at 0xfc28 ~ 0xfc46, and the bp_en is at 0xfc48. Fixes: 195aae321c82 ("r8152: support new chips") Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3fd17b6dc61d..79832374f78d 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3955,13 +3955,24 @@ static void rtl_clear_bp(struct r8152 *tp, u16 type) case RTL_VER_06: ocp_write_byte(tp, type, PLA_BP_EN, 0); break; + case RTL_VER_14: + ocp_write_word(tp, type, USB_BP2_EN, 0); + + ocp_write_word(tp, type, USB_BP_8, 0); + ocp_write_word(tp, type, USB_BP_9, 0); + ocp_write_word(tp, type, USB_BP_10, 0); + ocp_write_word(tp, type, USB_BP_11, 0); + ocp_write_word(tp, type, USB_BP_12, 0); + ocp_write_word(tp, type, USB_BP_13, 0); + ocp_write_word(tp, type, USB_BP_14, 0); + ocp_write_word(tp, type, USB_BP_15, 0); + break; case RTL_VER_08: case RTL_VER_09: case RTL_VER_10: case RTL_VER_11: case RTL_VER_12: case RTL_VER_13: - case RTL_VER_14: case RTL_VER_15: default: if (type == MCU_TYPE_USB) { @@ -4331,7 +4342,6 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac) case RTL_VER_11: case RTL_VER_12: case RTL_VER_13: - case RTL_VER_14: case RTL_VER_15: fw_reg = 0xf800; bp_ba_addr = PLA_BP_BA; @@ -4339,6 +4349,13 @@ static bool rtl8152_is_fw_mac_ok(struct r8152 *tp, struct fw_mac *mac) bp_start = PLA_BP_0; max_bp = 8; break; + case RTL_VER_14: + fw_reg = 0xf800; + bp_ba_addr = PLA_BP_BA; + bp_en_addr = USB_BP2_EN; + bp_start = PLA_BP_0; + max_bp = 16; + break; default: goto out; } -- cgit From e647eff57466c8cf7547532d6b26166b9b17f341 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Wed, 18 Aug 2021 18:50:30 -0400 Subject: MAINTAINERS: Add Jim Quinlan et al as Broadcom STB PCIe maintainers Add Jim Quinlan, Nicolas Saenz Julienne, and Florian Fainelli as maintainers of the Broadcom STB PCIe controller driver. This driver is also included in these entries: BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE BROADCOM BCM7XXX ARM ARCHITECTURE which cover the Raspberry Pi specifics of the PCIe driver. Link: https://lore.kernel.org/r/20210818225031.8502-1-jim2101024@gmail.com Signed-off-by: Jim Quinlan Signed-off-by: Bjorn Helgaas Acked-by: Florian Fainelli --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index ad2520cef3cb..f5c7a72a06fe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3865,6 +3865,16 @@ L: bcm-kernel-feedback-list@broadcom.com S: Maintained F: drivers/mtd/nand/raw/brcmnand/ +BROADCOM STB PCIE DRIVER +M: Jim Quinlan +M: Nicolas Saenz Julienne +M: Florian Fainelli +M: bcm-kernel-feedback-list@broadcom.com +L: linux-pci@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml +F: drivers/pci/controller/pcie-brcmstb.c + BROADCOM SYSTEMPORT ETHERNET DRIVER M: Florian Fainelli L: bcm-kernel-feedback-list@broadcom.com -- cgit From e0bff43220925b7e527f9d3bc9f5c624177c959e Mon Sep 17 00:00:00 2001 From: Marcin Bachry Date: Wed, 21 Jul 2021 22:58:58 -0400 Subject: PCI: Increase D3 delay for AMD Renoir/Cezanne XHCI The Renoir XHCI controller apparently doesn't resume reliably with the standard D3hot-to-D0 delay. Increase it to 20ms. [Alex: I talked to the AMD USB hardware team and the AMD Windows team and they are not aware of any HW errata or specific issues. The HW works fine in Windows. I was told Windows uses a rather generous default delay of 100ms for PCI state transitions.] Link: https://lore.kernel.org/r/20210722025858.220064-1-alexander.deucher@amd.com Signed-off-by: Marcin Bachry Signed-off-by: Alex Deucher Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Cc: Mario Limonciello Cc: Prike Liang Cc: Shyam Sundar S K --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6d74386eadc2..ab3de1551b50 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1900,6 +1900,7 @@ static void quirk_ryzen_xhci_d3hot(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e0, quirk_ryzen_xhci_d3hot); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x15e1, quirk_ryzen_xhci_d3hot); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x1639, quirk_ryzen_xhci_d3hot); #ifdef CONFIG_X86_IO_APIC static int dmi_disable_ioapicreroute(const struct dmi_system_id *d) -- cgit From 045a9277b5615846c7b662ffaba84e781f08a172 Mon Sep 17 00:00:00 2001 From: Krzysztof Wilczyński Date: Thu, 12 Aug 2021 13:21:44 +0000 Subject: PCI/sysfs: Use correct variable for the legacy_mem sysfs object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two legacy PCI sysfs objects "legacy_io" and "legacy_mem" were updated to use an unified address space in the commit 636b21b50152 ("PCI: Revoke mappings like devmem"). This allows for revocations to be managed from a single place when drivers want to take over and mmap() a /dev/mem range. Following the update, both of the sysfs objects should leverage the iomem_get_mapping() function to get an appropriate address range, but only the "legacy_io" has been correctly updated - the second attribute seems to be using a wrong variable to pass the iomem_get_mapping() function to. Thus, correct the variable name used so that the "legacy_mem" sysfs object would also correctly call the iomem_get_mapping() function. Fixes: 636b21b50152 ("PCI: Revoke mappings like devmem") Link: https://lore.kernel.org/r/20210812132144.791268-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Daniel Vetter --- drivers/pci/pci-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 5d63df7c1820..7bbf2673c7f2 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -978,7 +978,7 @@ void pci_create_legacy_files(struct pci_bus *b) b->legacy_mem->size = 1024*1024; b->legacy_mem->attr.mode = 0600; b->legacy_mem->mmap = pci_mmap_legacy_mem; - b->legacy_io->mapping = iomem_get_mapping(); + b->legacy_mem->mapping = iomem_get_mapping(); pci_adjust_legacy_attr(b, pci_mmap_mem); error = device_create_bin_file(&b->dev, b->legacy_mem); if (error) -- cgit From 8903376dc69949199301b290cc22dc64ae5d8a6d Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 18 Aug 2021 22:41:18 +0800 Subject: ALSA: hda/realtek: Limit mic boost on HP ProBook 445 G8 The mic has lots of noises if mic boost is enabled. So disable mic boost to get crystal clear audio capture. Signed-off-by: Kai-Heng Feng Cc: Link: https://lore.kernel.org/r/20210818144119.121738-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 96f32eaa24df..7ad689f991e7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6658,6 +6658,7 @@ enum { ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, ALC623_FIXUP_LENOVO_THINKSTATION_P340, ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, + ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST, }; static const struct hda_fixup alc269_fixups[] = { @@ -8242,6 +8243,12 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_XIAOMI_HEADSET_MIC }, + [ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_limit_int_mic_boost, + .chained = true, + .chain_id = ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8438,8 +8445,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), - SND_PCI_QUIRK(0x103c, 0x8862, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), - SND_PCI_QUIRK(0x103c, 0x8863, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8862, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST), + SND_PCI_QUIRK(0x103c, 0x8863, "HP ProBook 445 G8 Notebook PC", ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), -- cgit From 65ca89c2b12cca0d473f3dd54267568ad3af55cc Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 19 Aug 2021 17:29:45 +0200 Subject: ASoC: intel: atom: Fix breakage for PCM buffer address setup The commit 2e6b836312a4 ("ASoC: intel: atom: Fix reference to PCM buffer address") changed the reference of PCM buffer address to substream->runtime->dma_addr as the buffer address may change dynamically. However, I forgot that the dma_addr field is still not set up for the CONTINUOUS buffer type (that this driver uses) yet in 5.14 and earlier kernels, and it resulted in garbage I/O. The problem will be fixed in 5.15, but we need to address it quickly for now. The fix is to deduce the address again from the DMA pointer with virt_to_phys(), but from the right one, substream->runtime->dma_area. Fixes: 2e6b836312a4 ("ASoC: intel: atom: Fix reference to PCM buffer address") Reported-and-tested-by: Hans de Goede Cc: Acked-by: Mark Brown Link: https://lore.kernel.org/r/2048c6aa-2187-46bd-6772-36a4fb3c5aeb@redhat.com Link: https://lore.kernel.org/r/20210819152945.8510-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/soc/intel/atom/sst-mfld-platform-pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/intel/atom/sst-mfld-platform-pcm.c b/sound/soc/intel/atom/sst-mfld-platform-pcm.c index 5db2f4865bbb..905c7965f653 100644 --- a/sound/soc/intel/atom/sst-mfld-platform-pcm.c +++ b/sound/soc/intel/atom/sst-mfld-platform-pcm.c @@ -127,7 +127,7 @@ static void sst_fill_alloc_params(struct snd_pcm_substream *substream, snd_pcm_uframes_t period_size; ssize_t periodbytes; ssize_t buffer_bytes = snd_pcm_lib_buffer_bytes(substream); - u32 buffer_addr = substream->runtime->dma_addr; + u32 buffer_addr = virt_to_phys(substream->runtime->dma_area); channels = substream->runtime->channels; period_size = substream->runtime->period_size; -- cgit From a222be597e316389f9f8c26033352c124ce93056 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Wed, 18 Aug 2021 10:42:16 -0700 Subject: i40e: Fix ATR queue selection Without this patch, ATR does not work. Receive/transmit uses queue selection based on SW DCB hashing method. If traffic classes are not configured for PF, then use netdev_pick_tx function for selecting queue for packet transmission. Instead of calling i40e_swdcb_skb_tx_hash, call netdev_pick_tx, which ensures that packet is transmitted/received from CPU that is running the application. Reproduction steps: 1. Load i40e driver 2. Map each MSI interrupt of i40e port for each CPU 3. Disable ntuple, enable ATR i.e.: ethtool -K $interface ntuple off ethtool --set-priv-flags $interface flow-director-atr 4. Run application that is generating traffic and is bound to a single CPU, i.e.: taskset -c 9 netperf -H 1.1.1.1 -t TCP_RR -l 10 5. Observe behavior: Application's traffic should be restricted to the CPU provided in taskset. Fixes: 89ec1f0886c1 ("i40e: Fix queue-to-TC mapping on Tx") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Arkadiusz Kubalewski Tested-by: Dave Switzer Signed-off-by: Tony Nguyen Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 3f25bd8c4924..10a83e5385c7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3663,8 +3663,7 @@ u16 i40e_lan_select_queue(struct net_device *netdev, /* is DCB enabled at all? */ if (vsi->tc_config.numtc == 1) - return i40e_swdcb_skb_tx_hash(netdev, skb, - netdev->real_num_tx_queues); + return netdev_pick_tx(netdev, skb, sb_dev); prio = skb->priority; hw = &vsi->back->hw; -- cgit From 8da80c9d50220a8e4190a4eaa0dd6aeefcbbb5bf Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Wed, 18 Aug 2021 10:42:17 -0700 Subject: iavf: Fix ping is lost after untrusted VF had tried to change MAC Make changes to MAC address dependent on the response of PF. Disallow changes to HW MAC address and MAC filter from untrusted VF, thanks to that ping is not lost if VF tries to change MAC. Add a new field in iavf_mac_filter, to indicate whether there was response from PF for given filter. Based on this field pass or discard the filter. If untrusted VF tried to change it's address, it's not changed. Still filter was changed, because of that ping couldn't go through. Fixes: c5c922b3e09b ("iavf: fix MAC address setting for VFs when filter is rejected") Signed-off-by: Przemyslaw Patynowski Signed-off-by: Sylwester Dziedziuch Signed-off-by: Mateusz Palczewski Tested-by: Gurucharan G Signed-off-by: Tony Nguyen Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/iavf/iavf.h | 1 + drivers/net/ethernet/intel/iavf/iavf_main.c | 1 + drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 47 +++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index e8bd04100ecd..90793b36126e 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -136,6 +136,7 @@ struct iavf_q_vector { struct iavf_mac_filter { struct list_head list; u8 macaddr[ETH_ALEN]; + bool is_new_mac; /* filter is new, wait for PF decision */ bool remove; /* filter needs to be removed */ bool add; /* filter needs to be added */ }; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 244ec74ceca7..606a01ce4073 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -751,6 +751,7 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter, list_add_tail(&f->list, &adapter->mac_filter_list); f->add = true; + f->is_new_mac = true; adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; } else { f->remove = false; diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 0eab3c43bdc5..3c735968e1b8 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -540,6 +540,47 @@ void iavf_del_ether_addrs(struct iavf_adapter *adapter) kfree(veal); } +/** + * iavf_mac_add_ok + * @adapter: adapter structure + * + * Submit list of filters based on PF response. + **/ +static void iavf_mac_add_ok(struct iavf_adapter *adapter) +{ + struct iavf_mac_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + f->is_new_mac = false; + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + +/** + * iavf_mac_add_reject + * @adapter: adapter structure + * + * Remove filters from list based on PF response. + **/ +static void iavf_mac_add_reject(struct iavf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct iavf_mac_filter *f, *ftmp; + + spin_lock_bh(&adapter->mac_vlan_list_lock); + list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) { + if (f->remove && ether_addr_equal(f->macaddr, netdev->dev_addr)) + f->remove = false; + + if (f->is_new_mac) { + list_del(&f->list); + kfree(f); + } + } + spin_unlock_bh(&adapter->mac_vlan_list_lock); +} + /** * iavf_add_vlans * @adapter: adapter structure @@ -1492,6 +1533,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, case VIRTCHNL_OP_ADD_ETH_ADDR: dev_err(&adapter->pdev->dev, "Failed to add MAC filter, error %s\n", iavf_stat_str(&adapter->hw, v_retval)); + iavf_mac_add_reject(adapter); /* restore administratively set MAC address */ ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); break; @@ -1639,10 +1681,11 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, } } switch (v_opcode) { - case VIRTCHNL_OP_ADD_ETH_ADDR: { + case VIRTCHNL_OP_ADD_ETH_ADDR: + if (!v_retval) + iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); - } break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = -- cgit From fa05bdb89b01b098aad19ec0ebc4d1cc7b11177e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Aug 2021 13:58:42 +0300 Subject: Revert "flow_offload: action should not be NULL when it is referenced" This reverts commit 9ea3e52c5bc8bb4a084938dc1e3160643438927a. Cited commit added a check to make sure 'action' is not NULL, but 'action' is already dereferenced before the check, when calling flow_offload_has_one_action(). Therefore, the check does not make any sense and results in a smatch warning: include/net/flow_offload.h:322 flow_action_mixed_hw_stats_check() warn: variable dereferenced before check 'action' (see line 319) Fix by reverting this commit. Cc: gushengxian Fixes: 9ea3e52c5bc8 ("flow_offload: action should not be NULL when it is referenced") Signed-off-by: Ido Schimmel Acked-by: Jamal Hadi Salim Link: https://lore.kernel.org/r/20210819105842.1315705-1-idosch@idosch.org Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index f3c2841566a0..1b9d75aedb22 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -319,14 +319,12 @@ flow_action_mixed_hw_stats_check(const struct flow_action *action, if (flow_offload_has_one_action(action)) return true; - if (action) { - flow_action_for_each(i, action_entry, action) { - if (i && action_entry->hw_stats != last_hw_stats) { - NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); - return false; - } - last_hw_stats = action_entry->hw_stats; + flow_action_for_each(i, action_entry, action) { + if (i && action_entry->hw_stats != last_hw_stats) { + NL_SET_ERR_MSG_MOD(extack, "Mixing HW stats types for actions is not supported"); + return false; } + last_hw_stats = action_entry->hw_stats; } return true; } -- cgit From cd0a719fbd702eb4b455a6ad986483750125588a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 19 Aug 2021 17:17:55 +0300 Subject: net: dpaa2-switch: disable the control interface on error path Currently dpaa2_switch_takedown has a funny name and does not do the opposite of dpaa2_switch_init, which makes probing fail when we need to handle an -EPROBE_DEFER. A sketch of what dpaa2_switch_init does: dpsw_open dpaa2_switch_detect_features dpsw_reset for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { dpsw_if_disable dpsw_if_set_stp dpsw_vlan_remove_if_untagged dpsw_if_set_tci dpsw_vlan_remove_if } dpsw_vlan_remove alloc_ordered_workqueue dpsw_fdb_remove dpaa2_switch_ctrl_if_setup When dpaa2_switch_takedown is called from the error path of dpaa2_switch_probe(), the control interface, enabled by dpaa2_switch_ctrl_if_setup from dpaa2_switch_init, remains enabled, because dpaa2_switch_takedown does not call dpaa2_switch_ctrl_if_teardown. Since dpaa2_switch_probe might fail due to EPROBE_DEFER of a PHY, this means that a second probe of the driver will happen with the control interface directly enabled. This will trigger a second error: [ 93.273528] fsl_dpaa2_switch dpsw.0: dpsw_ctrl_if_set_pools() failed [ 93.281966] fsl_dpaa2_switch dpsw.0: fsl_mc_driver_probe failed: -13 [ 93.288323] fsl_dpaa2_switch: probe of dpsw.0 failed with error -13 Which if we investigate the /dev/dpaa2_mc_console log, we find out is caused by: [E, ctrl_if_set_pools:2211, DPMNG] ctrl_if must be disabled So make dpaa2_switch_takedown do the opposite of dpaa2_switch_init (in reasonable limits, no reason to change STP state, re-add VLANs etc), and rename it to something more conventional, like dpaa2_switch_teardown. Fixes: 613c0a5810b7 ("staging: dpaa2-switch: enable the control interface") Signed-off-by: Vladimir Oltean Reviewed-by: Ioana Ciornei Link: https://lore.kernel.org/r/20210819141755.1931423-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 68b78642c045..98cc0133c343 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -3038,26 +3038,30 @@ static int dpaa2_switch_port_init(struct ethsw_port_priv *port_priv, u16 port) return err; } -static void dpaa2_switch_takedown(struct fsl_mc_device *sw_dev) +static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw) +{ + dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); + dpaa2_switch_free_dpio(ethsw); + dpaa2_switch_destroy_rings(ethsw); + dpaa2_switch_drain_bp(ethsw); + dpaa2_switch_free_dpbp(ethsw); +} + +static void dpaa2_switch_teardown(struct fsl_mc_device *sw_dev) { struct device *dev = &sw_dev->dev; struct ethsw_core *ethsw = dev_get_drvdata(dev); int err; + dpaa2_switch_ctrl_if_teardown(ethsw); + + destroy_workqueue(ethsw->workqueue); + err = dpsw_close(ethsw->mc_io, 0, ethsw->dpsw_handle); if (err) dev_warn(dev, "dpsw_close err %d\n", err); } -static void dpaa2_switch_ctrl_if_teardown(struct ethsw_core *ethsw) -{ - dpsw_ctrl_if_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); - dpaa2_switch_free_dpio(ethsw); - dpaa2_switch_destroy_rings(ethsw); - dpaa2_switch_drain_bp(ethsw); - dpaa2_switch_free_dpbp(ethsw); -} - static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) { struct ethsw_port_priv *port_priv; @@ -3068,8 +3072,6 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) dev = &sw_dev->dev; ethsw = dev_get_drvdata(dev); - dpaa2_switch_ctrl_if_teardown(ethsw); - dpaa2_switch_teardown_irqs(sw_dev); dpsw_disable(ethsw->mc_io, 0, ethsw->dpsw_handle); @@ -3084,9 +3086,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) kfree(ethsw->acls); kfree(ethsw->ports); - dpaa2_switch_takedown(sw_dev); - - destroy_workqueue(ethsw->workqueue); + dpaa2_switch_teardown(sw_dev); fsl_mc_portal_free(ethsw->mc_io); @@ -3199,7 +3199,7 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) GFP_KERNEL); if (!(ethsw->ports)) { err = -ENOMEM; - goto err_takedown; + goto err_teardown; } ethsw->fdbs = kcalloc(ethsw->sw_attr.num_ifs, sizeof(*ethsw->fdbs), @@ -3270,8 +3270,8 @@ err_free_fdbs: err_free_ports: kfree(ethsw->ports); -err_takedown: - dpaa2_switch_takedown(sw_dev); +err_teardown: + dpaa2_switch_teardown(sw_dev); err_free_cmdport: fsl_mc_portal_free(ethsw->mc_io); -- cgit From 1c8094e394bceb4f1880f9d539bdd255c130826e Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 17 Aug 2021 12:47:55 -0500 Subject: dt-bindings: sifive-l2-cache: Fix 'select' matching When the schema fixups are applied to 'select' the result is a single entry is required for a match, but that will never match as there should be 2 entries. Also, a 'select' schema should have the widest possible match, so use 'contains' which matches the compatible string(s) in any position and not just the first position. Fixes: 993dcfac64eb ("dt-bindings: riscv: sifive-l2-cache: convert bindings to json-schema") Signed-off-by: Rob Herring Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml index 1d38ff76d18f..2b1f91603897 100644 --- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml +++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml @@ -24,10 +24,10 @@ allOf: select: properties: compatible: - items: - - enum: - - sifive,fu540-c000-ccache - - sifive,fu740-c000-ccache + contains: + enum: + - sifive,fu540-c000-ccache + - sifive,fu740-c000-ccache required: - compatible -- cgit From aa3e1ba32e553e611a58145c2eb349802feaa6eb Mon Sep 17 00:00:00 2001 From: Petr Pavlu Date: Sat, 7 Aug 2021 19:54:50 +0200 Subject: riscv: Fix a number of free'd resources in init_resources() Function init_resources() allocates a boot memory block to hold an array of resources which it adds to iomem_resource. The array is filled in from its end and the function then attempts to free any unused memory at the beginning. The problem is that size of the unused memory is incorrectly calculated and this can result in releasing memory which is in use by active resources. Their data then gets corrupted later when the memory is reused by a different part of the system. Fix the size of the released memory to correctly match the number of unused resource entries. Fixes: ffe0e5261268 ("RISC-V: Improve init_resources()") Signed-off-by: Petr Pavlu Reviewed-by: Sunil V L Acked-by: Nick Kossifidis Tested-by: Sunil V L Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 18bd0e4bc36c..120b2f6f71bc 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -229,8 +229,8 @@ static void __init init_resources(void) } /* Clean-up any unused pre-allocated resources */ - mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res); - memblock_free(__pa(mem_res), mem_res_sz); + if (res_idx >= 0) + memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res)); return; error: -- cgit From b1e1ef345433fb03742003677ddfb980d148092b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 19 Aug 2021 19:04:05 -0700 Subject: Revert "mm/shmem: fix shmem_swapin() race with swapoff" Due to the change about how block layer detects congestion the justification of commit 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not") doesn't stand anymore, so the commit could be just reverted in order to solve the race reported by commit 2efa33fc7f6e ("mm/shmem: fix shmem_swapin() race with swapoff"), so the fix commit could be just reverted as well. And that fix is also kind of buggy as discussed by [1] and [2]. [1] https://lore.kernel.org/linux-mm/24187e5e-069-9f3f-cefe-39ac70783753@google.com/ [2] https://lore.kernel.org/linux-mm/e82380b9-3ad4-4a52-be50-6d45c7f2b5da@google.com/ Link: https://lkml.kernel.org/r/20210810202936.2672-2-shy828301@gmail.com Signed-off-by: Yang Shi Suggested-by: Hugh Dickins Acked-by: Hugh Dickins Cc: "Huang, Ying" Cc: Miaohe Lin Cc: David Hildenbrand Cc: Johannes Weiner Cc: Joonsoo Kim Cc: Matthew Wilcox (Oracle) Cc: Michal Hocko Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/shmem.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 70d9ce294bb4..dacda7463d54 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1696,8 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL; - struct swap_info_struct *si; - struct page *page = NULL; + struct page *page; swp_entry_t swap; int error; @@ -1705,12 +1704,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap = radix_to_swp_entry(*pagep); *pagep = NULL; - /* Prevent swapoff from happening to us. */ - si = get_swap_device(swap); - if (!si) { - error = EINVAL; - goto failed; - } /* Look it up and read it in.. */ page = lookup_swap_cache(swap, NULL, 0); if (!page) { @@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index, swap_free(swap); *pagep = page; - if (si) - put_swap_device(si); return 0; failed: if (!shmem_confirm_swap(mapping, index, swap)) @@ -1784,9 +1775,6 @@ unlock: put_page(page); } - if (si) - put_swap_device(si); - return error; } -- cgit From c04b3d06904368b71ab9e09336ecfc91f4009bc9 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 19 Aug 2021 19:04:09 -0700 Subject: Revert "mm: swap: check if swap backing device is congested or not" Due to the change about how block layer detects congestion the justification of commit 8fd2e0b505d1 ("mm: swap: check if swap backing device is congested or not") doesn't stand anymore, so the commit could be just reverted in order to solve the race reported by commit 2efa33fc7f6e ("mm/shmem: fix shmem_swapin() race with swapoff"). The fix was reverted by the previous patch. Link: https://lkml.kernel.org/r/20210810202936.2672-3-shy828301@gmail.com Signed-off-by: Yang Shi Suggested-by: Hugh Dickins Acked-by: Hugh Dickins Cc: "Huang, Ying" Cc: Miaohe Lin Cc: Matthew Wilcox (Oracle) Cc: Johannes Weiner Cc: Michal Hocko Cc: Joonsoo Kim Cc: Minchan Kim Cc: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_state.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/mm/swap_state.c b/mm/swap_state.c index c56aa9ac050d..bc7cee6b2ec5 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask, if (!mask) goto skip; - /* Test swap type to make sure the dereference is safe */ - if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) { - struct inode *inode = si->swap_file->f_mapping->host; - if (inode_read_congested(inode)) - goto skip; - } - do_poll = false; /* Read a page_cluster sized and aligned cluster around offset. */ start_offset = offset & ~mask; -- cgit From 47aef6010b837657e1433021bfdeeee7a26a174c Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Thu, 19 Aug 2021 19:04:12 -0700 Subject: mm/page_alloc: don't corrupt pcppage_migratetype When placing pages on a pcp list, migratetype values over MIGRATE_PCPTYPES get added to the MIGRATE_MOVABLE pcp list. However, the actual migratetype is preserved in the page and should not be changed to MIGRATE_MOVABLE or the page may end up on the wrong free_list. The impact is that HIGHATOMIC or CMA pages getting bulk freed from the PCP lists could potentially end up on the wrong buddy list. There are various consequences but minimally NR_FREE_CMA_PAGES accounting could get screwed up. [mgorman@techsingularity.net: changelog update] Link: https://lkml.kernel.org/r/20210811182917.2607994-1-opendmb@gmail.com Fixes: df1acc856923 ("mm/page_alloc: avoid conflating IRQs disabled with zone->lock") Signed-off-by: Doug Berger Acked-by: Vlastimil Babka Acked-by: Mel Gorman Cc: "Peter Zijlstra (Intel)" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 856b175c15a4..eeb3a9cb36bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3453,19 +3453,10 @@ void free_unref_page_list(struct list_head *list) * comment in free_unref_page. */ migratetype = get_pcppage_migratetype(page); - if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { - if (unlikely(is_migrate_isolate(migratetype))) { - list_del(&page->lru); - free_one_page(page_zone(page), page, pfn, 0, - migratetype, FPI_NONE); - continue; - } - - /* - * Non-isolated types over MIGRATE_PCPTYPES get added - * to the MIGRATE_MOVABLE pcp list. - */ - set_pcppage_migratetype(page, MIGRATE_MOVABLE); + if (unlikely(is_migrate_isolate(migratetype))) { + list_del(&page->lru); + free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE); + continue; } set_page_private(page, pfn); @@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list) list_for_each_entry_safe(page, next, list, lru) { pfn = page_private(page); set_page_private(page, 0); + + /* + * Non-isolated types over MIGRATE_PCPTYPES get added + * to the MIGRATE_MOVABLE pcp list. + */ migratetype = get_pcppage_migratetype(page); + if (unlikely(migratetype >= MIGRATE_PCPTYPES)) + migratetype = MIGRATE_MOVABLE; + trace_mm_page_free_batched(page); free_unref_page_commit(page, pfn, migratetype, 0); -- cgit From b16ee0f9ed79fca2f2c31b13cac2ab9cf543525a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 19 Aug 2021 19:04:15 -0700 Subject: mmflags.h: add missing __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON names printk("%pGg") outputs these two flags as hexadecimal number, rather than as a string, e.g: GFP_KERNEL|0x1800000 Fix this by adding missing names of __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON flags to __def_gfpflag_names. Link: https://lkml.kernel.org/r/20210816133502.590-1-rppt@kernel.org Fixes: 013bb59dbb7c ("arm64: mte: handle tags zeroing at page allocation time") Fixes: c275c5c6d50a ("kasan: disable freed user page poisoning with HW tags") Signed-off-by: Mike Rapoport Cc: Peter Collingbourne Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/mmflags.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 390270e00a1d..f160484afc5c 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -48,7 +48,9 @@ {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ - {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\ + {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ + {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \ + {(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\ #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ -- cgit From 91ed3ed0f79884f66581e2162cc5ae91ce82b4fb Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 19 Aug 2021 19:04:18 -0700 Subject: MAINTAINERS: update ClangBuiltLinux IRC chat Everyone has moved from Freenode to Libera so updated the channel entry for MAINTAINERS. Link: https://github.com/ClangBuiltLinux/linux/issues/1402 Link: https://lkml.kernel.org/r/20210818022339.3863058-1-nathan@kernel.org Signed-off-by: Nathan Chancellor Cc: Nick Desaulniers Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fd25e4ecf0b9..b68f04755b2e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4498,7 +4498,7 @@ L: clang-built-linux@googlegroups.com S: Supported W: https://clangbuiltlinux.github.io/ B: https://github.com/ClangBuiltLinux/linux/issues -C: irc://chat.freenode.net/clangbuiltlinux +C: irc://irc.libera.chat/clangbuiltlinux F: Documentation/kbuild/llvm.rst F: include/linux/compiler-clang.h F: scripts/clang-tools/ -- cgit From f56ce412a59d7d938b81de8878faef128812482c Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 19 Aug 2021 19:04:21 -0700 Subject: mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim We've noticed occasional OOM killing when memory.low settings are in effect for cgroups. This is unexpected and undesirable as memory.low is supposed to express non-OOMing memory priorities between cgroups. The reason for this is proportional memory.low reclaim. When cgroups are below their memory.low threshold, reclaim passes them over in the first round, and then retries if it couldn't find pages anywhere else. But when cgroups are slightly above their memory.low setting, page scan force is scaled down and diminished in proportion to the overage, to the point where it can cause reclaim to fail as well - only in that case we currently don't retry, and instead trigger OOM. To fix this, hook proportional reclaim into the same retry logic we have in place for when cgroups are skipped entirely. This way if reclaim fails and some cgroups were scanned with diminished pressure, we'll try another full-force cycle before giving up and OOMing. [akpm@linux-foundation.org: coding-style fixes] Link: https://lkml.kernel.org/r/20210817180506.220056-1-hannes@cmpxchg.org Fixes: 9783aa9917f8 ("mm, memcg: proportional memory.{low,min} reclaim") Signed-off-by: Johannes Weiner Reported-by: Leon Yang Reviewed-by: Rik van Riel Reviewed-by: Shakeel Butt Acked-by: Roman Gushchin Acked-by: Chris Down Acked-by: Michal Hocko Cc: [5.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 29 +++++++++++++++-------------- mm/vmscan.c | 27 +++++++++++++++++++-------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bfe5c486f4ad..24797929d8a1 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg, - bool in_low_reclaim) +static inline void mem_cgroup_protection(struct mem_cgroup *root, + struct mem_cgroup *memcg, + unsigned long *min, + unsigned long *low) { + *min = *low = 0; + if (mem_cgroup_disabled()) - return 0; + return; /* * There is no reclaim protection applied to a targeted reclaim. @@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, * */ if (root == memcg) - return 0; - - if (in_low_reclaim) - return READ_ONCE(memcg->memory.emin); + return; - return max(READ_ONCE(memcg->memory.emin), - READ_ONCE(memcg->memory.elow)); + *min = READ_ONCE(memcg->memory.emin); + *low = READ_ONCE(memcg->memory.elow); } void mem_cgroup_calculate_protection(struct mem_cgroup *root, @@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, { } -static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root, - struct mem_cgroup *memcg, - bool in_low_reclaim) +static inline void mem_cgroup_protection(struct mem_cgroup *root, + struct mem_cgroup *memcg, + unsigned long *min, + unsigned long *low) { - return 0; + *min = *low = 0; } static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, diff --git a/mm/vmscan.c b/mm/vmscan.c index 4620df62f0ff..b0202ab5e136 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -100,9 +100,12 @@ struct scan_control { unsigned int may_swap:1; /* - * Cgroups are not reclaimed below their configured memory.low, - * unless we threaten to OOM. If any cgroups are skipped due to - * memory.low and nothing was reclaimed, go back for memory.low. + * Cgroup memory below memory.low is protected as long as we + * don't threaten to OOM. If any cgroup is reclaimed at + * reduced force or passed over entirely due to its memory.low + * setting (memcg_low_skipped), and nothing is reclaimed as a + * result, then go back for one more cycle that reclaims the protected + * memory (memcg_low_reclaim) to avert OOM. */ unsigned int memcg_low_reclaim:1; unsigned int memcg_low_skipped:1; @@ -2537,15 +2540,14 @@ out: for_each_evictable_lru(lru) { int file = is_file_lru(lru); unsigned long lruvec_size; + unsigned long low, min; unsigned long scan; - unsigned long protection; lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx); - protection = mem_cgroup_protection(sc->target_mem_cgroup, - memcg, - sc->memcg_low_reclaim); + mem_cgroup_protection(sc->target_mem_cgroup, memcg, + &min, &low); - if (protection) { + if (min || low) { /* * Scale a cgroup's reclaim pressure by proportioning * its current usage to its memory.low or memory.min @@ -2576,6 +2578,15 @@ out: * hard protection. */ unsigned long cgroup_size = mem_cgroup_size(memcg); + unsigned long protection; + + /* memory.low scaling, make sure we retry before OOM */ + if (!sc->memcg_low_reclaim && low > min) { + protection = low; + sc->memcg_low_skipped = 1; + } else { + protection = min; + } /* Avoid TOCTOU with earlier protection check */ cgroup_size = max(cgroup_size, protection); -- cgit From fcc00621d88b274b5dffd8daeea71d0e4c28b84e Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 19 Aug 2021 19:04:24 -0700 Subject: mm/hwpoison: retry with shake_page() for unhandlable pages HWPoisonHandlable() sometimes returns false for typical user pages due to races with average memory events like transfers over LRU lists. This causes failures in hwpoison handling. There's retry code for such a case but does not work because the retry loop reaches the retry limit too quickly before the page settles down to handlable state. Let get_any_page() call shake_page() to fix it. [naoya.horiguchi@nec.com: get_any_page(): return -EIO when retry limit reached] Link: https://lkml.kernel.org/r/20210819001958.2365157-1-naoya.horiguchi@linux.dev Link: https://lkml.kernel.org/r/20210817053703.2267588-1-naoya.horiguchi@linux.dev Fixes: 25182f05ffed ("mm,hwpoison: fix race with hugetlb page allocation") Signed-off-by: Naoya Horiguchi Reported-by: Tony Luck Reviewed-by: Yang Shi Cc: Oscar Salvador Cc: Muchun Song Cc: Mike Kravetz Cc: Michal Hocko Cc: [5.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index eefd823deb67..470400cc7513 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page) * unexpected races caused by taking a page refcount. */ if (!HWPoisonHandlable(head)) - return 0; + return -EBUSY; if (PageTransHuge(head)) { /* @@ -1199,9 +1199,15 @@ try_again: } goto out; } else if (ret == -EBUSY) { - /* We raced with freeing huge page to buddy, retry. */ - if (pass++ < 3) + /* + * We raced with (possibly temporary) unhandlable + * page, retry. + */ + if (pass++ < 3) { + shake_page(p, 1); goto try_again; + } + ret = -EIO; goto out; } } -- cgit From 57f29762cdd4687a02f245d1b1e78de046388eac Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 19 Aug 2021 19:04:27 -0700 Subject: mm: vmscan: fix missing psi annotation for node_reclaim() In a debugging session the other day, Rik noticed that node_reclaim() was missing memstall annotations. This means we'll miss pressure and lost productivity resulting from reclaim on an overloaded local NUMA node when vm.zone_reclaim_mode is enabled. There haven't been any reports, but that's likely because vm.zone_reclaim_mode hasn't been a commonly used feature recently, and the intersection between such setups and psi users is probably nil. But secondary memory such as CXL-connected DIMMS, persistent memory etc, and the page demotion patches that handle them (https://lore.kernel.org/lkml/20210401183216.443C4443@viggo.jf.intel.com/) could soon make this a more common codepath again. Link: https://lkml.kernel.org/r/20210818152457.35846-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Rik van Riel Reviewed-by: Shakeel Butt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vmscan.c b/mm/vmscan.c index b0202ab5e136..eeae2f6bc532 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4424,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in .may_swap = 1, .reclaim_idx = gfp_zone(gfp_mask), }; + unsigned long pflags; trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order, sc.gfp_mask); cond_resched(); + psi_memstall_enter(&pflags); fs_reclaim_acquire(sc.gfp_mask); /* * We need to be able to allocate from the reserves for RECLAIM_UNMAP @@ -4453,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); + psi_memstall_leave(&pflags); trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed); -- cgit From a7cb5d23eaea148f8582229846f8dfff192f05c3 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 19 Aug 2021 19:04:30 -0700 Subject: kfence: fix is_kfence_address() for addresses below KFENCE_POOL_SIZE Originally the addr != NULL check was meant to take care of the case where __kfence_pool == NULL (KFENCE is disabled). However, this does not work for addresses where addr > 0 && addr < KFENCE_POOL_SIZE. This can be the case on NULL-deref where addr > 0 && addr < PAGE_SIZE or any other faulting access with addr < KFENCE_POOL_SIZE. While the kernel would likely crash, the stack traces and report might be confusing due to double faults upon KFENCE's attempt to unprotect such an address. Fix it by just checking that __kfence_pool != NULL instead. Link: https://lkml.kernel.org/r/20210818130300.2482437-1-elver@google.com Fixes: 0ce20dd84089 ("mm: add Kernel Electric-Fence infrastructure") Signed-off-by: Marco Elver Reported-by: Kuan-Ying Lee Acked-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: [5.12+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfence.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/kfence.h b/include/linux/kfence.h index a70d1ea03532..3fe6dd8a18c1 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate; static __always_inline bool is_kfence_address(const void *addr) { /* - * The non-NULL check is required in case the __kfence_pool pointer was - * never initialized; keep it in the slow-path after the range-check. + * The __kfence_pool != NULL check is required to deal with the case + * where __kfence_pool == NULL && addr < KFENCE_POOL_SIZE. Keep it in + * the slow-path after the range-check! */ - return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr); + return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && __kfence_pool); } /** -- cgit From c7b1850dfb41d0b4154aca8dbc04777fbd75616f Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Thu, 19 Aug 2021 19:04:33 -0700 Subject: hugetlb: don't pass page cache pages to restore_reserve_on_error syzbot hit kernel BUG at fs/hugetlbfs/inode.c:532 as described in [1]. This BUG triggers if the HPageRestoreReserve flag is set on a page in the page cache. It should never be set, as the routine huge_add_to_page_cache explicitly clears the flag after adding a page to the cache. The only code other than huge page allocation which sets the flag is restore_reserve_on_error. It will potentially set the flag in rare out of memory conditions. syzbot was injecting errors to cause memory allocation errors which exercised this specific path. The code in restore_reserve_on_error is doing the right thing. However, there are instances where pages in the page cache were being passed to restore_reserve_on_error. This is incorrect, as once a page goes into the cache reservation information will not be modified for the page until it is removed from the cache. Error paths do not remove pages from the cache, so even in the case of error, the page will remain in the cache and no reservation adjustment is needed. Modify routines that potentially call restore_reserve_on_error with a page cache page to no longer do so. Note on fixes tag: Prior to commit 846be08578ed ("mm/hugetlb: expand restore_reserve_on_error functionality") the routine would not process page cache pages because the HPageRestoreReserve flag is not set on such pages. Therefore, this issue could not be trigggered. The code added by commit 846be08578ed ("mm/hugetlb: expand restore_reserve_on_error functionality") is needed and correct. It exposed incorrect calls to restore_reserve_on_error which is the root cause addressed by this commit. [1] https://lore.kernel.org/linux-mm/00000000000050776d05c9b7c7f0@google.com/ Link: https://lkml.kernel.org/r/20210818213304.37038-1-mike.kravetz@oracle.com Fixes: 846be08578ed ("mm/hugetlb: expand restore_reserve_on_error functionality") Signed-off-by: Mike Kravetz Reported-by: Cc: Mina Almasry Cc: Axel Rasmussen Cc: Peter Xu Cc: Muchun Song Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index dfc940d5221d..8ea35ba6699f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, if (!rc) { /* * This indicates there is an entry in the reserve map - * added by alloc_huge_page. We know it was added + * not added by alloc_huge_page. We know it was added * before the alloc_huge_page call, otherwise * HPageRestoreReserve would be set on the page. * Remove the entry so that a subsequent allocation @@ -4660,7 +4660,9 @@ retry_avoidcopy: spin_unlock(ptl); mmu_notifier_invalidate_range_end(&range); out_release_all: - restore_reserve_on_error(h, vma, haddr, new_page); + /* No restore in case of successful pagetable update (Break COW) */ + if (new_page != old_page) + restore_reserve_on_error(h, vma, haddr, new_page); put_page(new_page); out_release_old: put_page(old_page); @@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, pte_t new_pte; spinlock_t *ptl; unsigned long haddr = address & huge_page_mask(h); - bool new_page = false; + bool new_page, new_pagecache_page = false; /* * Currently, we are forced to kill the process in the event the @@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, goto out; retry: + new_page = false; page = find_lock_page(mapping, idx); if (!page) { /* Check for page in userfault range */ @@ -4842,6 +4845,7 @@ retry: goto retry; goto out; } + new_pagecache_page = true; } else { lock_page(page); if (unlikely(anon_vma_prepare(vma))) { @@ -4926,7 +4930,9 @@ backout: spin_unlock(ptl); backout_unlocked: unlock_page(page); - restore_reserve_on_error(h, vma, haddr, page); + /* restore reserve for newly allocated pages not in page cache */ + if (new_page && !new_pagecache_page) + restore_reserve_on_error(h, vma, haddr, page); put_page(page); goto out; } @@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, int ret = -ENOMEM; struct page *page; int writable; + bool new_pagecache_page = false; if (is_continue) { ret = -EFAULT; @@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ret = huge_add_to_page_cache(page, mapping, idx); if (ret) goto out_release_nounlock; + new_pagecache_page = true; } ptl = huge_pte_lockptr(h, dst_mm, dst_pte); @@ -5291,7 +5299,8 @@ out_release_unlock: if (vm_shared || is_continue) unlock_page(page); out_release_nounlock: - restore_reserve_on_error(h, dst_vma, dst_addr, page); + if (!new_pagecache_page) + restore_reserve_on_error(h, dst_vma, dst_addr, page); put_page(page); goto out; } -- cgit From a30f895ad3239f45012e860d4f94c1a388b36d14 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Aug 2021 14:53:59 -0600 Subject: io_uring: fix xa_alloc_cycle() error return value check We currently check for ret != 0 to indicate error, but '1' is a valid return and just indicates that the allocation succeeded with a wrap. Correct the check to be for < 0, like it was before the xarray conversion. Cc: stable@vger.kernel.org Fixes: 61cf93700fe6 ("io_uring: Convert personality_idr to XArray") Signed-off-by: Jens Axboe --- fs/io_uring.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 979941bcd15a..a2e20a6fbfed 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9843,10 +9843,11 @@ static int io_register_personality(struct io_ring_ctx *ctx) ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); - if (!ret) - return id; - put_cred(creds); - return ret; + if (ret < 0) { + put_cred(creds); + return ret; + } + return id; } static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, -- cgit From fdd92b64d15bc4aec973caa25899afd782402e68 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Aug 2021 09:29:50 -0400 Subject: fs: warn about impending deprecation of mandatory locks We've had CONFIG_MANDATORY_FILE_LOCKING since 2015 and a lot of distros have disabled it. Warn the stragglers that still use "-o mand" that we'll be dropping support for that mount option. Cc: stable@vger.kernel.org Signed-off-by: Jeff Layton --- fs/namespace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index ab4174a3c802..2279473d0d6f 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1716,8 +1716,12 @@ static inline bool may_mount(void) } #ifdef CONFIG_MANDATORY_FILE_LOCKING -static inline bool may_mandlock(void) +static bool may_mandlock(void) { + pr_warn_once("======================================================\n" + "WARNING: the mand mount option is being deprecated and\n" + " will be removed in v5.15!\n" + "======================================================\n"); return capable(CAP_SYS_ADMIN); } #else -- cgit From e22ce8eb631bdc47a4a4ea7ecf4e4ba499db4f93 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Aug 2021 14:24:56 -0700 Subject: Linux 5.14-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c19d1638da25..80aa85170d6b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Opossums on Parade # *DOCUMENTATION* -- cgit