From 203a6763ab699da0568fd2b76303d03bb121abd4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 13 Mar 2024 16:32:27 -0700 Subject: Revert "crypto: pkcs7 - remove sha1 support" This reverts commit 16ab7cb5825fc3425c16ad2c6e53d827f382d7c6 because it broke iwd. iwd uses the KEYCTL_PKEY_* UAPIs via its dependency libell, and apparently it is relying on SHA-1 signature support. These UAPIs are fairly obscure, and their documentation does not mention which algorithms they support. iwd really should be using a properly supported userspace crypto library instead. Regardless, since something broke we have to revert the change. It may be possible that some parts of this commit can be reinstated without breaking iwd (e.g. probably the removal of MODULE_SIG_SHA1), but for now this just does a full revert to get things working again. Reported-by: Karel Balej Closes: https://lore.kernel.org/r/CZSHRUIJ4RKL.34T4EASV5DNJM@matfyz.cz Cc: Dimitri John Ledkov Signed-off-by: Eric Biggers Tested-by: Karel Balej Signed-off-by: Herbert Xu --- include/linux/oid_registry.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oid_registry.h b/include/linux/oid_registry.h index 3921fbed0b28..51421fdbb0ba 100644 --- a/include/linux/oid_registry.h +++ b/include/linux/oid_registry.h @@ -17,10 +17,12 @@ * build_OID_registry.pl to generate the data for look_up_OID(). */ enum OID { + OID_id_dsa_with_sha1, /* 1.2.840.10030.4.3 */ OID_id_dsa, /* 1.2.840.10040.4.1 */ OID_id_ecPublicKey, /* 1.2.840.10045.2.1 */ OID_id_prime192v1, /* 1.2.840.10045.3.1.1 */ OID_id_prime256v1, /* 1.2.840.10045.3.1.7 */ + OID_id_ecdsa_with_sha1, /* 1.2.840.10045.4.1 */ OID_id_ecdsa_with_sha224, /* 1.2.840.10045.4.3.1 */ OID_id_ecdsa_with_sha256, /* 1.2.840.10045.4.3.2 */ OID_id_ecdsa_with_sha384, /* 1.2.840.10045.4.3.3 */ @@ -28,6 +30,7 @@ enum OID { /* PKCS#1 {iso(1) member-body(2) us(840) rsadsi(113549) pkcs(1) pkcs-1(1)} */ OID_rsaEncryption, /* 1.2.840.113549.1.1.1 */ + OID_sha1WithRSAEncryption, /* 1.2.840.113549.1.1.5 */ OID_sha256WithRSAEncryption, /* 1.2.840.113549.1.1.11 */ OID_sha384WithRSAEncryption, /* 1.2.840.113549.1.1.12 */ OID_sha512WithRSAEncryption, /* 1.2.840.113549.1.1.13 */ @@ -64,6 +67,7 @@ enum OID { OID_PKU2U, /* 1.3.5.1.5.2.7 */ OID_Scram, /* 1.3.6.1.5.5.14 */ OID_certAuthInfoAccess, /* 1.3.6.1.5.5.7.1.1 */ + OID_sha1, /* 1.3.14.3.2.26 */ OID_id_ansip384r1, /* 1.3.132.0.34 */ OID_sha256, /* 2.16.840.1.101.3.4.2.1 */ OID_sha384, /* 2.16.840.1.101.3.4.2.2 */ -- cgit From 0c76106cb97548810214def8ee22700bbbb90543 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 19 Mar 2024 16:12:09 +0900 Subject: scsi: sd: Fix TCG OPAL unlock on system resume Commit 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") introduced the manage_system_start_stop scsi_device flag to allow libata to indicate to the SCSI disk driver that nothing should be done when resuming a disk on system resume. This change turned the execution of sd_resume() into a no-op for ATA devices on system resume. While this solved deadlock issues during device resume, this change also wrongly removed the execution of opal_unlock_from_suspend(). As a result, devices with TCG OPAL locking enabled remain locked and inaccessible after a system resume from sleep. To fix this issue, introduce the SCSI driver resume method and implement it with the sd_resume() function calling opal_unlock_from_suspend(). The former sd_resume() function is renamed to sd_resume_common() and modified to call the new sd_resume() function. For non-ATA devices, this result in no functional changes. In order for libata to explicitly execute sd_resume() when a device is resumed during system restart, the function scsi_resume_device() is introduced. libata calls this function from the revalidation work executed on devie resume, a state that is indicated with the new device flag ATA_DFLAG_RESUMING. Doing so, locked TCG OPAL enabled devices are unlocked on resume, allowing normal operation. Fixes: 3cc2ffe5c16d ("scsi: sd: Differentiate system and runtime start/stop management") Link: https://bugzilla.kernel.org/show_bug.cgi?id=218538 Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240319071209.1179257-1-dlemoal@kernel.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-eh.c | 5 ++++- drivers/ata/libata-scsi.c | 9 +++++++++ drivers/scsi/scsi_scan.c | 34 ++++++++++++++++++++++++++++++++++ drivers/scsi/sd.c | 23 +++++++++++++++++++---- include/linux/libata.h | 1 + include/scsi/scsi_driver.h | 1 + include/scsi/scsi_host.h | 1 + 7 files changed, 69 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b0d6e69c4a5b..214b935c2ced 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -712,8 +712,10 @@ void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) ehc->saved_ncq_enabled |= 1 << devno; /* If we are resuming, wake up the device */ - if (ap->pflags & ATA_PFLAG_RESUMING) + if (ap->pflags & ATA_PFLAG_RESUMING) { + dev->flags |= ATA_DFLAG_RESUMING; ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; + } } } @@ -3169,6 +3171,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, return 0; err: + dev->flags &= ~ATA_DFLAG_RESUMING; *r_failed_dev = dev; return rc; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 0a0f483124c3..2f4c58837641 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -4730,6 +4730,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_link *link; struct ata_device *dev; unsigned long flags; + bool do_resume; int ret = 0; mutex_lock(&ap->scsi_scan_mutex); @@ -4751,7 +4752,15 @@ void ata_scsi_dev_rescan(struct work_struct *work) if (scsi_device_get(sdev)) continue; + do_resume = dev->flags & ATA_DFLAG_RESUMING; + spin_unlock_irqrestore(ap->lock, flags); + if (do_resume) { + ret = scsi_resume_device(sdev); + if (ret == -EWOULDBLOCK) + goto unlock; + dev->flags &= ~ATA_DFLAG_RESUMING; + } ret = scsi_rescan_device(sdev); scsi_device_put(sdev); spin_lock_irqsave(ap->lock, flags); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8d06475de17a..ffd7e7e72933 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1642,6 +1642,40 @@ int scsi_add_device(struct Scsi_Host *host, uint channel, } EXPORT_SYMBOL(scsi_add_device); +int scsi_resume_device(struct scsi_device *sdev) +{ + struct device *dev = &sdev->sdev_gendev; + int ret = 0; + + device_lock(dev); + + /* + * Bail out if the device or its queue are not running. Otherwise, + * the rescan may block waiting for commands to be executed, with us + * holding the device lock. This can result in a potential deadlock + * in the power management core code when system resume is on-going. + */ + if (sdev->sdev_state != SDEV_RUNNING || + blk_queue_pm_only(sdev->request_queue)) { + ret = -EWOULDBLOCK; + goto unlock; + } + + if (dev->driver && try_module_get(dev->driver->owner)) { + struct scsi_driver *drv = to_scsi_driver(dev->driver); + + if (drv->resume) + ret = drv->resume(dev); + module_put(dev->driver->owner); + } + +unlock: + device_unlock(dev); + + return ret; +} +EXPORT_SYMBOL(scsi_resume_device); + int scsi_rescan_device(struct scsi_device *sdev) { struct device *dev = &sdev->sdev_gendev; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index ccff8f2e2e75..3cf898670290 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -4108,7 +4108,21 @@ static int sd_suspend_runtime(struct device *dev) return sd_suspend_common(dev, true); } -static int sd_resume(struct device *dev, bool runtime) +static int sd_resume(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + + if (opal_unlock_from_suspend(sdkp->opal_dev)) { + sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); + return -EIO; + } + + return 0; +} + +static int sd_resume_common(struct device *dev, bool runtime) { struct scsi_disk *sdkp = dev_get_drvdata(dev); int ret; @@ -4124,7 +4138,7 @@ static int sd_resume(struct device *dev, bool runtime) sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { - opal_unlock_from_suspend(sdkp->opal_dev); + sd_resume(dev); sdkp->suspended = false; } @@ -4143,7 +4157,7 @@ static int sd_resume_system(struct device *dev) return 0; } - return sd_resume(dev, false); + return sd_resume_common(dev, false); } static int sd_resume_runtime(struct device *dev) @@ -4170,7 +4184,7 @@ static int sd_resume_runtime(struct device *dev) "Failed to clear sense data\n"); } - return sd_resume(dev, true); + return sd_resume_common(dev, true); } static const struct dev_pm_ops sd_pm_ops = { @@ -4193,6 +4207,7 @@ static struct scsi_driver sd_template = { .pm = &sd_pm_ops, }, .rescan = sd_rescan, + .resume = sd_resume, .init_command = sd_init_command, .uninit_command = sd_uninit_command, .done = sd_done, diff --git a/include/linux/libata.h b/include/linux/libata.h index 26d68115afb8..324d792e7c78 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -107,6 +107,7 @@ enum { ATA_DFLAG_NCQ_PRIO_ENABLED = (1 << 20), /* Priority cmds sent to dev */ ATA_DFLAG_CDL_ENABLED = (1 << 21), /* cmd duration limits is enabled */ + ATA_DFLAG_RESUMING = (1 << 22), /* Device is resuming */ ATA_DFLAG_DETACH = (1 << 24), ATA_DFLAG_DETACHED = (1 << 25), ATA_DFLAG_DA = (1 << 26), /* device supports Device Attention */ diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 4ce1988b2ba0..f40915d2ecee 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -12,6 +12,7 @@ struct request; struct scsi_driver { struct device_driver gendrv; + int (*resume)(struct device *); void (*rescan)(struct device *); blk_status_t (*init_command)(struct scsi_cmnd *); void (*uninit_command)(struct scsi_cmnd *); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index b259d42a1e1a..129001f600fc 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -767,6 +767,7 @@ scsi_template_proc_dir(const struct scsi_host_template *sht); #define scsi_template_proc_dir(sht) NULL #endif extern void scsi_scan_host(struct Scsi_Host *); +extern int scsi_resume_device(struct scsi_device *sdev); extern int scsi_rescan_device(struct scsi_device *sdev); extern void scsi_remove_host(struct Scsi_Host *); extern struct Scsi_Host *scsi_host_get(struct Scsi_Host *); -- cgit From c2ddeb29612f7ca84ed10c6d4f3ac99705135447 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Mar 2024 13:58:08 +0100 Subject: genirq: Introduce IRQF_COND_ONESHOT and use it in pinctrl-amd There is a problem when a driver requests a shared interrupt line to run a threaded handler on it without IRQF_ONESHOT set if that flag has been set already for the IRQ in question by somebody else. Namely, the request fails which usually leads to a probe failure even though the driver might have worked just fine with IRQF_ONESHOT, but it does not want to use it by default. Currently, the only way to handle this is to try to request the IRQ without IRQF_ONESHOT, but with IRQF_PROBE_SHARED set and if this fails, try again with IRQF_ONESHOT set. However, this is a bit cumbersome and not very clean. When commit 7a36b901a6eb ("ACPI: OSL: Use a threaded interrupt handler for SCI") switched the ACPI subsystem over to using a threaded interrupt handler for the SCI, it had to use IRQF_ONESHOT for it because that's required due to the way the SCI handler works (it needs to walk all of the enabled GPEs before the interrupt line can be unmasked). The SCI interrupt line is not shared with other users very often due to the SCI handling overhead, but on sone systems it is shared and when the other user of it attempts to install a threaded handler, a flags mismatch related to IRQF_ONESHOT may occur. As it turned out, that happened to the pinctrl-amd driver and so commit 4451e8e8415e ("pinctrl: amd: Add IRQF_ONESHOT to the interrupt request") attempted to address the issue by adding IRQF_ONESHOT to the interrupt flags in that driver, but this is now causing an IRQF_ONESHOT-related mismatch to occur on another system which cannot boot as a result of it. Clearly, pinctrl-amd can work with IRQF_ONESHOT if need be, but it should not set that flag by default, so it needs a way to indicate that to the interrupt subsystem. To that end, introdcuce a new interrupt flag, IRQF_COND_ONESHOT, which will only have effect when the IRQ line is shared and IRQF_ONESHOT has been set for it already, in which case it will be promoted to the latter. This is sufficient for drivers sharing the interrupt line with the SCI as it is requested by the ACPI subsystem before any drivers are probed, so they will always see IRQF_ONESHOT set for the interrupt in question. Fixes: 4451e8e8415e ("pinctrl: amd: Add IRQF_ONESHOT to the interrupt request") Reported-by: Francisco Ayala Le Brun Signed-off-by: Rafael J. Wysocki Signed-off-by: Thomas Gleixner Reviewed-by: Linus Walleij Cc: 6.8+ # 6.8+ Closes: https://lore.kernel.org/lkml/CAN-StX1HqWqi+YW=t+V52-38Mfp5fAz7YHx4aH-CQjgyNiKx3g@mail.gmail.com/ Link: https://lore.kernel.org/r/12417336.O9o76ZdvQC@kreacher --- drivers/pinctrl/pinctrl-amd.c | 2 +- include/linux/interrupt.h | 3 +++ kernel/irq/manage.c | 9 +++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 49f89b70dcec..7f66ec73199a 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -1159,7 +1159,7 @@ static int amd_gpio_probe(struct platform_device *pdev) } ret = devm_request_irq(&pdev->dev, gpio_dev->irq, amd_gpio_irq_handler, - IRQF_SHARED | IRQF_ONESHOT, KBUILD_MODNAME, gpio_dev); + IRQF_SHARED | IRQF_COND_ONESHOT, KBUILD_MODNAME, gpio_dev); if (ret) goto out2; diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 76121c2bb4f8..5c9bdd3ffccc 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -67,6 +67,8 @@ * later. * IRQF_NO_DEBUG - Exclude from runnaway detection for IPI and similar handlers, * depends on IRQF_PERCPU. + * IRQF_COND_ONESHOT - Agree to do IRQF_ONESHOT if already set for a shared + * interrupt. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -82,6 +84,7 @@ #define IRQF_COND_SUSPEND 0x00040000 #define IRQF_NO_AUTOEN 0x00080000 #define IRQF_NO_DEBUG 0x00100000 +#define IRQF_COND_ONESHOT 0x00200000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad3eaf2ab959..bf9ae8a8686f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1643,8 +1643,13 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) } if (!((old->flags & new->flags) & IRQF_SHARED) || - (oldtype != (new->flags & IRQF_TRIGGER_MASK)) || - ((old->flags ^ new->flags) & IRQF_ONESHOT)) + (oldtype != (new->flags & IRQF_TRIGGER_MASK))) + goto mismatch; + + if ((old->flags & IRQF_ONESHOT) && + (new->flags & IRQF_COND_ONESHOT)) + new->flags |= IRQF_ONESHOT; + else if ((old->flags ^ new->flags) & IRQF_ONESHOT) goto mismatch; /* All handlers must agree on per-cpuness */ -- cgit From 52464f59a361a3ba49d6eabc4f65d5c0b9d1de39 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 25 Mar 2024 17:00:57 +0000 Subject: gpiolib: Add stubs for GPIO lookup functions The gpio_device_find_by_() functions do not have stubs which means that if they are referenced from code with an optiona dependency on gpiolib then the code will fail to link. Add stubs for lookups via fwnode and label. I have not added a stub for plain gpio_device_find() since it seems harder to see a use case for that which does not depend on gpiolib. With the addition of the GPIO reset controller (which lacks a gpiolib dependency) to the arm64 defconfig this is causing build breaks for arm64 virtconfig in -next: aarch64-linux-gnu-ld: drivers/reset/core.o: in function `__reset_add_reset_gpio_lookup': /build/stage/linux/drivers/reset/core.c:861:(.text+0xccc): undefined reference to `gpio_device_find_by_fwnode' Signed-off-by: Mark Brown Reviewed-by: Krzysztof Kozlowski Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index dc75f802e284..f8617eaf08ba 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -646,8 +646,6 @@ int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, struct gpio_device *gpio_device_find(const void *data, int (*match)(struct gpio_chip *gc, const void *data)); -struct gpio_device *gpio_device_find_by_label(const char *label); -struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); struct gpio_device *gpio_device_get(struct gpio_device *gdev); void gpio_device_put(struct gpio_device *gdev); @@ -814,6 +812,9 @@ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc); int gpio_device_get_base(struct gpio_device *gdev); const char *gpio_device_get_label(struct gpio_device *gdev); +struct gpio_device *gpio_device_find_by_label(const char *label); +struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode); + #else /* CONFIG_GPIOLIB */ #include @@ -843,6 +844,18 @@ static inline const char *gpio_device_get_label(struct gpio_device *gdev) return NULL; } +static inline struct gpio_device *gpio_device_find_by_label(const char *label) +{ + WARN_ON(1); + return NULL; +} + +static inline struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + WARN_ON(1); + return NULL; +} + static inline int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { -- cgit From 9cecde80aae0fb0aa44425575d5aca71bc646d89 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Mar 2024 14:08:21 +0000 Subject: mm: increase folio batch size On a 104 thread, 2 socket Skylake system, Intel report a 4.7% performance reduction with will-it-scale page_fault2. This was due to reducing the size of the batch from 32 to 15. Increasing the folio batch size from 15 to 31 gives a performance increase of 12.5% relative to the original, or 17.2% relative to the reduced performance commit. The penalty of this commit is an additional 128 bytes of stack usage. Six folio_batches are also allocated from percpu memory in cpu_fbatches so that will be an additional 768 bytes of percpu memory (per CPU). Tim Chen originally submitted a patch like this in 2020: https://lore.kernel.org/linux-mm/d1cc9f12a8ad6c2a52cb600d93b06b064f2bbc57.1593205965.git.tim.c.chen@linux.intel.com/ Link: https://lkml.kernel.org/r/20240315140823.2478146-1-willy@infradead.org Fixes: 99fbb6bfc16f ("mm: make folios_put() the basis of release_pages()") Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Yujie Liu Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202403151058.7048f6a8-oliver.sang@intel.com Signed-off-by: Andrew Morton --- include/linux/pagevec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index fcc06c300a72..5d3a0cccc6bf 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -11,8 +11,8 @@ #include -/* 15 pointers + header align the folio_batch structure to a power of two */ -#define PAGEVEC_SIZE 15 +/* 31 pointers + header align the folio_batch structure to a power of two */ +#define PAGEVEC_SIZE 31 struct folio; -- cgit From d5aad4c2ca057e760a92a9a7d65bd38d72963f27 Mon Sep 17 00:00:00 2001 From: Zev Weiss Date: Mon, 26 Feb 2024 17:35:41 -0800 Subject: prctl: generalize PR_SET_MDWE support check to be per-arch Patch series "ARM: prctl: Reject PR_SET_MDWE where not supported". I noticed after a recent kernel update that my ARM926 system started segfaulting on any execve() after calling prctl(PR_SET_MDWE). After some investigation it appears that ARMv5 is incapable of providing the appropriate protections for MDWE, since any readable memory is also implicitly executable. The prctl_set_mdwe() function already had some special-case logic added disabling it on PARISC (commit 793838138c15, "prctl: Disable prctl(PR_SET_MDWE) on parisc"); this patch series (1) generalizes that check to use an arch_*() function, and (2) adds a corresponding override for ARM to disable MDWE on pre-ARMv6 CPUs. With the series applied, prctl(PR_SET_MDWE) is rejected on ARMv5 and subsequent execve() calls (as well as mmap(PROT_READ|PROT_WRITE)) can succeed instead of unconditionally failing; on ARMv6 the prctl works as it did previously. [0] https://lore.kernel.org/all/2023112456-linked-nape-bf19@gregkh/ This patch (of 2): There exist systems other than PARISC where MDWE may not be feasible to support; rather than cluttering up the generic code with additional arch-specific logic let's add a generic function for checking MDWE support and allow each arch to override it as needed. Link: https://lkml.kernel.org/r/20240227013546.15769-4-zev@bewilderbeest.net Link: https://lkml.kernel.org/r/20240227013546.15769-5-zev@bewilderbeest.net Signed-off-by: Zev Weiss Acked-by: Helge Deller [parisc] Cc: Borislav Petkov Cc: David Hildenbrand Cc: Florent Revest Cc: "James E.J. Bottomley" Cc: Josh Triplett Cc: Kees Cook Cc: Miguel Ojeda Cc: Mike Rapoport (IBM) Cc: Oleg Nesterov Cc: Ondrej Mosnacek Cc: Rick Edgecombe Cc: Russell King (Oracle) Cc: Sam James Cc: Stefan Roesch Cc: Yang Shi Cc: Yin Fengwei Cc: [6.3+] Signed-off-by: Andrew Morton --- arch/parisc/include/asm/mman.h | 14 ++++++++++++++ include/linux/mman.h | 8 ++++++++ kernel/sys.c | 7 +++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 arch/parisc/include/asm/mman.h (limited to 'include/linux') diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 000000000000..47c5a1991d10 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include + +/* PARISC cannot allow mdwe as it needs writable stacks */ +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return false; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/include/linux/mman.h b/include/linux/mman.h index dc7048824be8..bcb201ab7a41 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -162,6 +162,14 @@ calc_vm_flag_bits(unsigned long flags) unsigned long vm_commit_limit(void); +#ifndef arch_memory_deny_write_exec_supported +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return true; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported +#endif + /* * Denies creating a writable executable mapping or gaining executable permissions. * diff --git a/kernel/sys.c b/kernel/sys.c index f8e543f1e38a..8bb106a56b3a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2408,8 +2408,11 @@ static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; - /* PARISC cannot allow mdwe as it needs writable stacks */ - if (IS_ENABLED(CONFIG_PARISC)) + /* + * EOPNOTSUPP might be more appropriate here in principle, but + * existing userspace depends on EINVAL specifically. + */ + if (!arch_memory_deny_write_exec_supported()) return -EINVAL; current_bits = get_current_mdwe(); -- cgit From ea2c09283b44d1a3732a195a9b257d56779c8863 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Mon, 25 Mar 2024 09:25:05 +0100 Subject: net: wan: framer: Add missing static inline qualifiers Compilation with CONFIG_GENERIC_FRAMER disabled lead to the following warnings: framer.h:184:16: warning: no previous prototype for function 'framer_get' [-Wmissing-prototypes] 184 | struct framer *framer_get(struct device *dev, const char *con_id) framer.h:184:1: note: declare 'static' if the function is not intended to be used outside of this translation unit 184 | struct framer *framer_get(struct device *dev, const char *con_id) framer.h:189:6: warning: no previous prototype for function 'framer_put' [-Wmissing-prototypes] 189 | void framer_put(struct device *dev, struct framer *framer) framer.h:189:1: note: declare 'static' if the function is not intended to be used outside of this translation unit 189 | void framer_put(struct device *dev, struct framer *framer) Add missing 'static inline' qualifiers for these functions. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202403241110.hfJqeJRu-lkp@intel.com/ Fixes: 82c944d05b1a ("net: wan: Add framer framework support") Cc: stable@vger.kernel.org Signed-off-by: Herve Codina Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/framer/framer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/framer/framer.h b/include/linux/framer/framer.h index 9a9b88962c29..2b85fe9e7f9a 100644 --- a/include/linux/framer/framer.h +++ b/include/linux/framer/framer.h @@ -181,12 +181,12 @@ static inline int framer_notifier_unregister(struct framer *framer, return -ENOSYS; } -struct framer *framer_get(struct device *dev, const char *con_id) +static inline struct framer *framer_get(struct device *dev, const char *con_id) { return ERR_PTR(-ENOSYS); } -void framer_put(struct device *dev, struct framer *framer) +static inline void framer_put(struct device *dev, struct framer *framer) { } -- cgit From 18685451fc4e546fc0e718580d32df3c0e5c8272 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Mar 2024 11:18:41 +0100 Subject: inet: inet_defrag: prevent sk release while still in use ip_local_out() and other functions can pass skb->sk as function argument. If the skb is a fragment and reassembly happens before such function call returns, the sk must not be released. This affects skb fragments reassembled via netfilter or similar modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. Eric Dumazet made an initial analysis of this bug. Quoting Eric: Calling ip_defrag() in output path is also implying skb_orphan(), which is buggy because output path relies on sk not disappearing. A relevant old patch about the issue was : 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") [..] net/ipv4/ip_output.c depends on skb->sk being set, and probably to an inet socket, not an arbitrary one. If we orphan the packet in ipvlan, then downstream things like FQ packet scheduler will not work properly. We need to change ip_defrag() to only use skb_orphan() when really needed, ie whenever frag_list is going to be used. Eric suggested to stash sk in fragment queue and made an initial patch. However there is a problem with this: If skb is refragmented again right after, ip_do_fragment() will copy head->sk to the new fragments, and sets up destructor to sock_wfree. IOW, we have no choice but to fix up sk_wmem accouting to reflect the fully reassembled skb, else wmem will underflow. This change moves the orphan down into the core, to last possible moment. As ip_defrag_offset is aliased with sk_buff->sk member, we must move the offset into the FRAG_CB, else skb->sk gets clobbered. This allows to delay the orphaning long enough to learn if the skb has to be queued or if the skb is completing the reasm queue. In the former case, things work as before, skb is orphaned. This is safe because skb gets queued/stolen and won't continue past reasm engine. In the latter case, we will steal the skb->sk reference, reattach it to the head skb, and fix up wmem accouting when inet_frag inflates truesize. Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") Diagnosed-by: Eric Dumazet Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de Signed-off-by: Paolo Abeni --- include/linux/skbuff.h | 7 +--- net/ipv4/inet_fragment.c | 70 +++++++++++++++++++++++++++------ net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0c7c67b3a87b..9d24aec064e8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -753,8 +753,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -875,10 +873,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 7072fc0783ef..c88c9034d630 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -396,12 +399,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -418,13 +421,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -438,7 +441,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -448,13 +451,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -463,6 +481,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -470,13 +494,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -492,7 +516,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -509,6 +533,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -516,6 +555,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -579,6 +620,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index a4941f53b523..fb947d1613fe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -384,6 +384,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -487,7 +488,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1a51a44571c3..d0dcbaca1994 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -294,6 +294,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -469,7 +470,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { -- cgit