aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap2
-rw-r--r--Documentation/trace/user_events.rst7
-rw-r--r--MAINTAINERS4
-rw-r--r--arch/arm64/hyperv/mshyperv.c2
-rw-r--r--arch/x86/hyperv/hv_init.c2
-rw-r--r--arch/x86/hyperv/hv_vtl.c2
-rw-r--r--drivers/acpi/acpica/achware.h2
-rw-r--r--drivers/acpi/sleep.c16
-rw-r--r--drivers/base/regmap/regmap-spi-avmm.c2
-rw-r--r--drivers/dma-buf/udmabuf.c47
-rw-r--r--drivers/firmware/efi/efi.c21
-rw-r--r--drivers/hv/channel_mgmt.c18
-rw-r--r--drivers/hv/hv_common.c48
-rw-r--r--drivers/hv/vmbus_drv.c5
-rw-r--r--drivers/md/dm-cache-metadata.c2
-rw-r--r--drivers/md/dm-thin-metadata.c2
-rw-r--r--drivers/pci/controller/pci-hyperv.c139
-rw-r--r--drivers/spi/spi-geni-qcom.c2
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.c2
-rw-r--r--fs/afs/write.c7
-rw-r--r--fs/btrfs/volumes.c12
-rw-r--r--fs/nilfs2/page.c10
-rw-r--r--fs/nilfs2/segbuf.c6
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/super.c23
-rw-r--r--fs/smb/server/server.c33
-rw-r--r--fs/smb/server/smb2misc.c33
-rw-r--r--fs/smb/server/smb2pdu.c70
-rw-r--r--fs/smb/server/smbacl.c10
-rw-r--r--fs/smb/server/vfs.c117
-rw-r--r--fs/smb/server/vfs.h17
-rw-r--r--fs/smb/server/vfs_cache.c2
-rw-r--r--fs/super.c2
-rw-r--r--include/acpi/acpixf.h1
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/regulator/pca9450.h4
-rw-r--r--include/trace/events/writeback.h2
-rw-r--r--kernel/trace/trace_events_user.c290
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--mm/khugepaged.c1
-rw-r--r--mm/memfd.c9
-rw-r--r--mm/mprotect.c2
-rw-r--r--mm/shrinker_debug.c39
-rw-r--r--mm/vmalloc.c17
-rw-r--r--mm/vmscan.c125
-rw-r--r--scripts/gdb/linux/constants.py.in12
-rwxr-xr-xscripts/gfp-translate6
-rw-r--r--tools/testing/selftests/mm/Makefile13
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c177
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c88
-rw-r--r--tools/testing/selftests/user_events/perf_test.c82
51 files changed, 1036 insertions, 509 deletions
diff --git a/.mailmap b/.mailmap
index 650689d00930..c94da2a63d0f 100644
--- a/.mailmap
+++ b/.mailmap
@@ -70,6 +70,8 @@ Baolin Wang <[email protected]> <[email protected]>
Ben Gardner <[email protected]>
Ben M Cahill <[email protected]>
diff --git a/Documentation/trace/user_events.rst b/Documentation/trace/user_events.rst
index f79987e16cf4..e7b07313550a 100644
--- a/Documentation/trace/user_events.rst
+++ b/Documentation/trace/user_events.rst
@@ -14,10 +14,6 @@ Programs can view status of the events via
/sys/kernel/tracing/user_events_status and can both register and write
data out via /sys/kernel/tracing/user_events_data.
-Programs can also use /sys/kernel/tracing/dynamic_events to register and
-delete user based events via the u: prefix. The format of the command to
-dynamic_events is the same as the ioctl with the u: prefix applied.
-
Typically programs will register a set of events that they wish to expose to
tools that can read trace_events (such as ftrace and perf). The registration
process tells the kernel which address and bit to reflect if any tool has
@@ -144,6 +140,9 @@ its name. Delete will only succeed if there are no references left to the
event (in both user and kernel space). User programs should use a separate file
to request deletes than the one used for registration due to this.
+**NOTE:** By default events will auto-delete when there are no references left
+to the event. Flags in the future may change this logic.
+
Unregistering
-------------
If after registering an event it is no longer wanted to be updated then it can
diff --git a/MAINTAINERS b/MAINTAINERS
index 6992b7cc7095..ad32db6c81cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17827,7 +17827,7 @@ F: tools/testing/selftests/rtc/
Real-time Linux Analysis (RTLA) tools
M: Daniel Bristot de Oliveira <[email protected]>
M: Steven Rostedt <[email protected]>
S: Maintained
F: Documentation/tools/rtla/
F: tools/tracing/rtla/
@@ -18397,7 +18397,7 @@ F: drivers/infiniband/ulp/rtrs/
RUNTIME VERIFICATION (RV)
M: Daniel Bristot de Oliveira <[email protected]>
M: Steven Rostedt <[email protected]>
S: Maintained
F: Documentation/trace/rv/
F: include/linux/rv.h
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index a406454578f0..f1b8a04ee9f2 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -67,7 +67,7 @@ static int __init hyperv_init(void)
if (ret)
return ret;
- ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online",
+ ret = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "arm64/hyperv_init:online",
hv_common_cpu_init, hv_common_cpu_die);
if (ret < 0) {
hv_common_free();
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index a5f9474f08e1..6c04b52f139b 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -416,7 +416,7 @@ void __init hyperv_init(void)
goto free_vp_assist_page;
}
- cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online",
+ cpuhp = cpuhp_setup_state(CPUHP_AP_HYPERV_ONLINE, "x86/hyperv_init:online",
hv_cpu_init, hv_cpu_die);
if (cpuhp < 0)
goto free_ghcb_page;
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 1ba5d3b99b16..85d38b9f3586 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -20,6 +20,8 @@ void __init hv_vtl_init_platform(void)
{
pr_info("Linux runs in Hyper-V Virtual Trust Level\n");
+ x86_platform.realmode_reserve = x86_init_noop;
+ x86_platform.realmode_init = x86_init_noop;
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index ebf8fd373cf7..79bbfe00d241 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -101,8 +101,6 @@ acpi_status
acpi_hw_get_gpe_status(struct acpi_gpe_event_info *gpe_event_info,
acpi_event_status *event_status);
-acpi_status acpi_hw_disable_all_gpes(void);
-
acpi_status acpi_hw_enable_all_runtime_gpes(void);
acpi_status acpi_hw_enable_all_wakeup_gpes(void);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 72470b9f16c4..f32570f72b90 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -636,11 +636,19 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
}
/*
- * Disable and clear GPE status before interrupt is enabled. Some GPEs
- * (like wakeup GPE) haven't handler, this can avoid such GPE misfire.
- * acpi_leave_sleep_state will reenable specific GPEs later
+ * Disable all GPE and clear their status bits before interrupts are
+ * enabled. Some GPEs (like wakeup GPEs) have no handlers and this can
+ * prevent them from producing spurious interrups.
+ *
+ * acpi_leave_sleep_state() will reenable specific GPEs later.
+ *
+ * Because this code runs on one CPU with disabled interrupts (all of
+ * the other CPUs are offline at this time), it need not acquire any
+ * sleeping locks which may trigger an implicit preemption point even
+ * if there is no contention, so avoid doing that by using a low-level
+ * library routine here.
*/
- acpi_disable_all_gpes();
+ acpi_hw_disable_all_gpes();
/* Allow EC transactions to happen. */
acpi_ec_unblock_transactions();
diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c
index 4c2b94b3e30b..6af692844c19 100644
--- a/drivers/base/regmap/regmap-spi-avmm.c
+++ b/drivers/base/regmap/regmap-spi-avmm.c
@@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = {
.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
.max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT,
- .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
+ .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
.free_context = spi_avmm_bridge_ctx_free,
};
diff --git a/drivers/dma-buf/udmabuf.c b/drivers/dma-buf/udmabuf.c
index 01f2e86f3f7c..12cf6bb2e3ce 100644
--- a/drivers/dma-buf/udmabuf.c
+++ b/drivers/dma-buf/udmabuf.c
@@ -12,7 +12,6 @@
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/udmabuf.h>
-#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/iosys-map.h>
@@ -207,9 +206,7 @@ static long udmabuf_create(struct miscdevice *device,
struct udmabuf *ubuf;
struct dma_buf *buf;
pgoff_t pgoff, pgcnt, pgidx, pgbuf = 0, pglimit;
- struct page *page, *hpage = NULL;
- pgoff_t subpgoff, maxsubpgs;
- struct hstate *hpstate;
+ struct page *page;
int seals, ret = -EINVAL;
u32 i, flags;
@@ -245,7 +242,7 @@ static long udmabuf_create(struct miscdevice *device,
if (!memfd)
goto err;
mapping = memfd->f_mapping;
- if (!shmem_mapping(mapping) && !is_file_hugepages(memfd))
+ if (!shmem_mapping(mapping))
goto err;
seals = memfd_fcntl(memfd, F_GET_SEALS, 0);
if (seals == -EINVAL)
@@ -256,48 +253,16 @@ static long udmabuf_create(struct miscdevice *device,
goto err;
pgoff = list[i].offset >> PAGE_SHIFT;
pgcnt = list[i].size >> PAGE_SHIFT;
- if (is_file_hugepages(memfd)) {
- hpstate = hstate_file(memfd);
- pgoff = list[i].offset >> huge_page_shift(hpstate);
- subpgoff = (list[i].offset &
- ~huge_page_mask(hpstate)) >> PAGE_SHIFT;
- maxsubpgs = huge_page_size(hpstate) >> PAGE_SHIFT;
- }
for (pgidx = 0; pgidx < pgcnt; pgidx++) {
- if (is_file_hugepages(memfd)) {
- if (!hpage) {
- hpage = find_get_page_flags(mapping, pgoff,
- FGP_ACCESSED);
- if (!hpage) {
- ret = -EINVAL;
- goto err;
- }
- }
- page = hpage + subpgoff;
- get_page(page);
- subpgoff++;
- if (subpgoff == maxsubpgs) {
- put_page(hpage);
- hpage = NULL;
- subpgoff = 0;
- pgoff++;
- }
- } else {
- page = shmem_read_mapping_page(mapping,
- pgoff + pgidx);
- if (IS_ERR(page)) {
- ret = PTR_ERR(page);
- goto err;
- }
+ page = shmem_read_mapping_page(mapping, pgoff + pgidx);
+ if (IS_ERR(page)) {
+ ret = PTR_ERR(page);
+ goto err;
}
ubuf->pages[pgbuf++] = page;
}
fput(memfd);
memfd = NULL;
- if (hpage) {
- put_page(hpage);
- hpage = NULL;
- }
}
exp_info.ops = &udmabuf_ops;
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index abeff7dc0b58..34b9e7876538 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -361,24 +361,6 @@ static void __init efi_debugfs_init(void)
static inline void efi_debugfs_init(void) {}
#endif
-static void refresh_nv_rng_seed(struct work_struct *work)
-{
- u8 seed[EFI_RANDOM_SEED_SIZE];
-
- get_random_bytes(seed, sizeof(seed));
- efi.set_variable(L"RandomSeed", &LINUX_EFI_RANDOM_SEED_TABLE_GUID,
- EFI_VARIABLE_NON_VOLATILE | EFI_VARIABLE_BOOTSERVICE_ACCESS |
- EFI_VARIABLE_RUNTIME_ACCESS, sizeof(seed), seed);
- memzero_explicit(seed, sizeof(seed));
-}
-static int refresh_nv_rng_seed_notification(struct notifier_block *nb, unsigned long action, void *data)
-{
- static DECLARE_WORK(work, refresh_nv_rng_seed);
- schedule_work(&work);
- return NOTIFY_DONE;
-}
-static struct notifier_block refresh_nv_rng_seed_nb = { .notifier_call = refresh_nv_rng_seed_notification };
-
/*
* We register the efi subsystem with the firmware subsystem and the
* efivars subsystem with the efi subsystem, if the system was booted with
@@ -451,9 +433,6 @@ static int __init efisubsys_init(void)
platform_device_register_simple("efi_secret", 0, NULL, 0);
#endif
- if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
- execute_with_initialized_rng(&refresh_nv_rng_seed_nb);
-
return 0;
err_remove_group:
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 007f26d5f1a4..2f4d09ce027a 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -829,11 +829,22 @@ static void vmbus_wait_for_unload(void)
if (completion_done(&vmbus_connection.unload_event))
goto completed;
- for_each_online_cpu(cpu) {
+ for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
+ /*
+ * In a CoCo VM the synic_message_page is not allocated
+ * in hv_synic_alloc(). Instead it is set/cleared in
+ * hv_synic_enable_regs() and hv_synic_disable_regs()
+ * such that it is set only when the CPU is online. If
+ * not all present CPUs are online, the message page
+ * might be NULL, so skip such CPUs.
+ */
page_addr = hv_cpu->synic_message_page;
+ if (!page_addr)
+ continue;
+
msg = (struct hv_message *)page_addr
+ VMBUS_MESSAGE_SINT;
@@ -867,11 +878,14 @@ completed:
* maybe-pending messages on all CPUs to be able to receive new
* messages after we reconnect.
*/
- for_each_online_cpu(cpu) {
+ for_each_present_cpu(cpu) {
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
page_addr = hv_cpu->synic_message_page;
+ if (!page_addr)
+ continue;
+
msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
msg->header.message_type = HVMSG_NONE;
}
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 64f9ceca887b..542a1d53b303 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -364,13 +364,20 @@ int hv_common_cpu_init(unsigned int cpu)
flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL;
inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
- if (!(*inputarg))
- return -ENOMEM;
- if (hv_root_partition) {
- outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ /*
+ * hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory is already
+ * allocated if this CPU was previously online and then taken offline
+ */
+ if (!*inputarg) {
+ *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags);
+ if (!(*inputarg))
+ return -ENOMEM;
+
+ if (hv_root_partition) {
+ outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
+ *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE;
+ }
}
msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX);
@@ -385,24 +392,17 @@ int hv_common_cpu_init(unsigned int cpu)
int hv_common_cpu_die(unsigned int cpu)
{
- unsigned long flags;
- void **inputarg, **outputarg;
- void *mem;
-
- local_irq_save(flags);
-
- inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg);
- mem = *inputarg;
- *inputarg = NULL;
-
- if (hv_root_partition) {
- outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg);
- *outputarg = NULL;
- }
-
- local_irq_restore(flags);
-
- kfree(mem);
+ /*
+ * The hyperv_pcpu_input_arg and hyperv_pcpu_output_arg memory
+ * is not freed when the CPU goes offline as the hyperv_pcpu_input_arg
+ * may be used by the Hyper-V vPCI driver in reassigning interrupts
+ * as part of the offlining process. The interrupt reassignment
+ * happens *after* the CPUHP_AP_HYPERV_ONLINE state has run and
+ * called this function.
+ *
+ * If a previously offlined CPU is brought back online again, the
+ * originally allocated memory is reused in hv_common_cpu_init().
+ */
return 0;
}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 1c65a6dfb9fa..67f95a29aeca 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -1372,7 +1372,7 @@ static int vmbus_bus_init(void)
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "hyperv/vmbus:online",
hv_synic_init, hv_synic_cleanup);
if (ret < 0)
- goto err_cpuhp;
+ goto err_alloc;
hyperv_cpuhp_online = ret;
ret = vmbus_connect();
@@ -1392,9 +1392,8 @@ static int vmbus_bus_init(void)
err_connect:
cpuhp_remove_state(hyperv_cpuhp_online);
-err_cpuhp:
- hv_synic_free();
err_alloc:
+ hv_synic_free();
if (vmbus_irq == -1) {
hv_remove_vmbus_handler();
} else {
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 9e0c69958587..acffed750e3e 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -1828,7 +1828,7 @@ int dm_cache_metadata_abort(struct dm_cache_metadata *cmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* cmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs cmd root_lock).
- * - must take shrinker_mutex without holding cmd->root_lock
+ * - must take shrinker_rwsem without holding cmd->root_lock
*/
new_bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
CACHE_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index b9461faa9f0d..9dd0409848ab 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -1891,7 +1891,7 @@ int dm_pool_abort_metadata(struct dm_pool_metadata *pmd)
* Replacement block manager (new_bm) is created and old_bm destroyed outside of
* pmd root_lock to avoid ABBA deadlock that would result (due to life-cycle of
* shrinker associated with the block manager's bufio client vs pmd root_lock).
- * - must take shrinker_mutex without holding pmd->root_lock
+ * - must take shrinker_rwsem without holding pmd->root_lock
*/
new_bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
THIN_MAX_CONCURRENT_LOCKS);
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index bc32662c6bb7..2d93d0c4f10d 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -489,7 +489,10 @@ struct hv_pcibus_device {
struct fwnode_handle *fwnode;
/* Protocol version negotiated with the host */
enum pci_protocol_version_t protocol_version;
+
+ struct mutex state_lock;
enum hv_pcibus_state state;
+
struct hv_device *hdev;
resource_size_t low_mmio_space;
resource_size_t high_mmio_space;
@@ -545,19 +548,10 @@ struct hv_dr_state {
struct hv_pcidev_description func[];
};
-enum hv_pcichild_state {
- hv_pcichild_init = 0,
- hv_pcichild_requirements,
- hv_pcichild_resourced,
- hv_pcichild_ejecting,
- hv_pcichild_maximum
-};
-
struct hv_pci_dev {
/* List protected by pci_rescan_remove_lock */
struct list_head list_entry;
refcount_t refs;
- enum hv_pcichild_state state;
struct pci_slot *pci_slot;
struct hv_pcidev_description desc;
bool reported_missing;
@@ -635,6 +629,11 @@ static void hv_arch_irq_unmask(struct irq_data *data)
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
int_desc = data->chip_data;
+ if (!int_desc) {
+ dev_warn(&hbus->hdev->device, "%s() can not unmask irq %u\n",
+ __func__, data->irq);
+ return;
+ }
local_irq_save(flags);
@@ -2004,12 +2003,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
hv_pci_onchannelcallback(hbus);
spin_unlock_irqrestore(&channel->sched_lock, flags);
- if (hpdev->state == hv_pcichild_ejecting) {
- dev_err_once(&hbus->hdev->device,
- "the device is being ejected\n");
- goto enable_tasklet;
- }
-
udelay(100);
}
@@ -2615,6 +2608,8 @@ static void pci_devices_present_work(struct work_struct *work)
if (!dr)
return;
+ mutex_lock(&hbus->state_lock);
+
/* First, mark all existing children as reported missing. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
list_for_each_entry(hpdev, &hbus->children, list_entry) {
@@ -2696,6 +2691,8 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
+ mutex_unlock(&hbus->state_lock);
+
kfree(dr);
}
@@ -2844,7 +2841,7 @@ static void hv_eject_device_work(struct work_struct *work)
hpdev = container_of(work, struct hv_pci_dev, wrk);
hbus = hpdev->hbus;
- WARN_ON(hpdev->state != hv_pcichild_ejecting);
+ mutex_lock(&hbus->state_lock);
/*
* Ejection can come before or after the PCI bus has been set up, so
@@ -2882,6 +2879,8 @@ static void hv_eject_device_work(struct work_struct *work)
put_pcichild(hpdev);
put_pcichild(hpdev);
/* hpdev has been freed. Do not use it any more. */
+
+ mutex_unlock(&hbus->state_lock);
}
/**
@@ -2902,7 +2901,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
return;
}
- hpdev->state = hv_pcichild_ejecting;
get_pcichild(hpdev);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
queue_work(hbus->wq, &hpdev->wrk);
@@ -3331,8 +3329,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
struct pci_bus_d0_entry *d0_entry;
struct hv_pci_compl comp_pkt;
struct pci_packet *pkt;
+ bool retry = true;
int ret;
+enter_d0_retry:
/*
* Tell the host that the bus is ready to use, and moved into the
* powered-on state. This includes telling the host which region
@@ -3359,6 +3359,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
if (ret)
goto exit;
+ /*
+ * In certain case (Kdump) the pci device of interest was
+ * not cleanly shut down and resource is still held on host
+ * side, the host could return invalid device status.
+ * We need to explicitly request host to release the resource
+ * and try to enter D0 again.
+ */
+ if (comp_pkt.completion_status < 0 && retry) {
+ retry = false;
+
+ dev_err(&hdev->device, "Retrying D0 Entry\n");
+
+ /*
+ * Hv_pci_bus_exit() calls hv_send_resource_released()
+ * to free up resources of its child devices.
+ * In the kdump kernel we need to set the
+ * wslot_res_allocated to 255 so it scans all child
+ * devices to release resources allocated in the
+ * normal kernel before panic happened.
+ */
+ hbus->wslot_res_allocated = 255;
+
+ ret = hv_pci_bus_exit(hdev, true);
+
+ if (ret == 0) {
+ kfree(pkt);
+ goto enter_d0_retry;
+ }
+ dev_err(&hdev->device,
+ "Retrying D0 failed with ret %d\n", ret);
+ }
+
if (comp_pkt.completion_status < 0) {
dev_err(&hdev->device,
"PCI Pass-through VSP failed D0 Entry with status %x\n",
@@ -3401,6 +3433,24 @@ static int hv_pci_query_relations(struct hv_device *hdev)
if (!ret)
ret = wait_for_response(hdev, &comp);
+ /*
+ * In the case of fast device addition/removal, it's possible that
+ * vmbus_sendpacket() or wait_for_response() returns -ENODEV but we
+ * already got a PCI_BUS_RELATIONS* message from the host and the
+ * channel callback already scheduled a work to hbus->wq, which can be
+ * running pci_devices_present_work() -> survey_child_resources() ->
+ * complete(&hbus->survey_event), even after hv_pci_query_relations()
+ * exits and the stack variable 'comp' is no longer valid; as a result,
+ * a hang or a page fault may happen when the complete() calls
+ * raw_spin_lock_irqsave(). Flush hbus->wq before we exit from
+ * hv_pci_query_relations() to avoid the issues. Note: if 'ret' is
+ * -ENODEV, there can't be any more work item scheduled to hbus->wq
+ * after the flush_workqueue(): see vmbus_onoffer_rescind() ->
+ * vmbus_reset_channel_cb(), vmbus_rescind_cleanup() ->
+ * channel->rescind = true.
+ */
+ flush_workqueue(hbus->wq);
+
return ret;
}
@@ -3586,7 +3636,6 @@ static int hv_pci_probe(struct hv_device *hdev,
struct hv_pcibus_device *hbus;
u16 dom_req, dom;
char *name;
- bool enter_d0_retry = true;
int ret;
bridge = devm_pci_alloc_host_bridge(&hdev->device, 0);
@@ -3598,6 +3647,7 @@ static int hv_pci_probe(struct hv_device *hdev,
return -ENOMEM;
hbus->bridge = bridge;
+ mutex_init(&hbus->state_lock);
hbus->state = hv_pcibus_init;
hbus->wslot_res_allocated = -1;
@@ -3703,49 +3753,15 @@ static int hv_pci_probe(struct hv_device *hdev,
if (ret)
goto free_fwnode;
-retry:
ret = hv_pci_query_relations(hdev);
if (ret)
goto free_irq_domain;
- ret = hv_pci_enter_d0(hdev);
- /*
- * In certain case (Kdump) the pci device of interest was
- * not cleanly shut down and resource is still held on host
- * side, the host could return invalid device status.
- * We need to explicitly request host to release the resource
- * and try to enter D0 again.
- * Since the hv_pci_bus_exit() call releases structures
- * of all its child devices, we need to start the retry from
- * hv_pci_query_relations() call, requesting host to send
- * the synchronous child device relations message before this
- * information is needed in hv_send_resources_allocated()
- * call later.
- */
- if (ret == -EPROTO && enter_d0_retry) {
- enter_d0_retry = false;
-
- dev_err(&hdev->device, "Retrying D0 Entry\n");
-
- /*
- * Hv_pci_bus_exit() calls hv_send_resources_released()
- * to free up resources of its child devices.
- * In the kdump kernel we need to set the
- * wslot_res_allocated to 255 so it scans all child
- * devices to release resources allocated in the
- * normal kernel before panic happened.
- */
- hbus->wslot_res_allocated = 255;
- ret = hv_pci_bus_exit(hdev, true);
-
- if (ret == 0)
- goto retry;
+ mutex_lock(&hbus->state_lock);
- dev_err(&hdev->device,
- "Retrying D0 failed with ret %d\n", ret);
- }
+ ret = hv_pci_enter_d0(hdev);
if (ret)
- goto free_irq_domain;
+ goto release_state_lock;
ret = hv_pci_allocate_bridge_windows(hbus);
if (ret)
@@ -3763,12 +3779,15 @@ retry:
if (ret)
goto free_windows;
+ mutex_unlock(&hbus->state_lock);
return 0;
free_windows:
hv_pci_free_bridge_windows(hbus);
exit_d0:
(void) hv_pci_bus_exit(hdev, true);
+release_state_lock:
+ mutex_unlock(&hbus->state_lock);
free_irq_domain:
irq_domain_remove(hbus->irq_domain);
free_fwnode:
@@ -4018,20 +4037,26 @@ static int hv_pci_resume(struct hv_device *hdev)
if (ret)
goto out;
+ mutex_lock(&hbus->state_lock);
+
ret = hv_pci_enter_d0(hdev);
if (ret)
- goto out;
+ goto release_state_lock;
ret = hv_send_resources_allocated(hdev);
if (ret)
- goto out;
+ goto release_state_lock;
prepopulate_bars(hbus);
hv_pci_restore_msi_state(hbus);
hbus->state = hv_pcibus_installed;
+ mutex_unlock(&hbus->state_lock);
return 0;
+
+release_state_lock:
+ mutex_unlock(&hbus->state_lock);
out:
vmbus_close(hdev->channel);
return ret;
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index a98b781b103a..b293428760bc 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -646,6 +646,8 @@ static int spi_geni_init(struct spi_geni_master *mas)
geni_se_select_mode(se, GENI_GPI_DMA);
dev_dbg(mas->dev, "Using GPI DMA mode for SPI\n");
break;
+ } else if (ret == -EPROBE_DEFER) {
+ goto out_pm;
}
/*
* in case of failure to get gpi dma channel, we can still do the
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
index f99dc7e4ae89..db97499f4f0a 100644
--- a/drivers/thermal/intel/intel_soc_dts_iosf.c
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -398,7 +398,7 @@ struct intel_soc_dts_sensors *intel_soc_dts_iosf_init(
spin_lock_init(&sensors->intr_notify_lock);
mutex_init(&sensors->dts_update_lock);
sensors->intr_type = intr_type;
- sensors->tj_max = tj_max;
+ sensors->tj_max = tj_max * 1000;
if (intr_type == INTEL_SOC_DTS_INTERRUPT_NONE)
notification = false;
else
diff --git a/fs/afs/write.c b/fs/afs/write.c
index c822d6006033..8750b99c3f56 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -731,6 +731,7 @@ static int afs_writepages_region(struct address_space *mapping,
* (changing page->mapping to NULL), or even swizzled
* back from swapper_space to tmpfs file mapping
*/
+try_again:
if (wbc->sync_mode != WB_SYNC_NONE) {
ret = folio_lock_killable(folio);
if (ret < 0) {
@@ -757,12 +758,14 @@ static int afs_writepages_region(struct address_space *mapping,
#ifdef CONFIG_AFS_FSCACHE
folio_wait_fscache(folio);
#endif
- } else {
- start += folio_size(folio);
+ goto try_again;
}
+
+ start += folio_size(folio);
if (wbc->sync_mode == WB_SYNC_NONE) {
if (skips >= 5 || need_resched()) {
*_next = start;
+ folio_batch_release(&fbatch);
_leave(" = 0 [%llx]", *_next);
return 0;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 841e799dece5..e60beb14852a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5975,12 +5975,12 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
stripe_nr = offset >> BTRFS_STRIPE_LEN_SHIFT;
/* stripe_offset is the offset of this block in its stripe */
- stripe_offset = offset - (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
+ stripe_offset = offset - ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
stripe_nr_end = round_up(offset + length, BTRFS_STRIPE_LEN) >>
BTRFS_STRIPE_LEN_SHIFT;
stripe_cnt = stripe_nr_end - stripe_nr;
- stripe_end_offset = (stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) -
+ stripe_end_offset = ((u64)stripe_nr_end << BTRFS_STRIPE_LEN_SHIFT) -
(offset + length);
/*
* after this, stripe_nr is the number of stripes on this
@@ -6023,7 +6023,7 @@ struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
for (i = 0; i < *num_stripes; i++) {
stripes[i].physical =
map->stripes[stripe_index].physical +
- stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
+ stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
stripes[i].dev = map->stripes[stripe_index].dev;
if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
@@ -6196,9 +6196,11 @@ static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
* not ensured to be power of 2.
*/
*full_stripe_start =
- rounddown(*stripe_nr, nr_data_stripes(map)) <<
+ (u64)rounddown(*stripe_nr, nr_data_stripes(map)) <<
BTRFS_STRIPE_LEN_SHIFT;
+ ASSERT(*full_stripe_start + full_stripe_len > offset);
+ ASSERT(*full_stripe_start <= offset);
/*
* For writes to RAID56, allow to write a full stripe set, but
* no straddling of stripe sets.
@@ -6221,7 +6223,7 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
{
dst->dev = map->stripes[stripe_index].dev;
dst->physical = map->stripes[stripe_index].physical +
- stripe_offset + (stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
+ stripe_offset + ((u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT);
}
int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 5cf30827f244..b4e54d079b7d 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -370,7 +370,15 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
struct folio *folio = fbatch.folios[i];
folio_lock(folio);
- nilfs_clear_dirty_page(&folio->page, silent);
+
+ /*
+ * This folio may have been removed from the address
+ * space by truncation or invalidation when the lock
+ * was acquired. Skip processing in that case.
+ */
+ if (likely(folio->mapping == mapping))
+ nilfs_clear_dirty_page(&folio->page, silent);
+
folio_unlock(folio);
}
folio_batch_release(&fbatch);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 1362ccb64ec7..6e59dc19a732 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -101,6 +101,12 @@ int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *segbuf)
if (unlikely(!bh))
return -ENOMEM;
+ lock_buffer(bh);
+ if (!buffer_uptodate(bh)) {
+ memset(bh->b_data, 0, bh->b_size);
+ set_buffer_uptodate(bh);
+ }
+ unlock_buffer(bh);
nilfs_segbuf_add_segsum_buffer(segbuf, bh);
return 0;
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index ac949fd7603f..c2553024bd25 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -981,10 +981,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
unsigned int isz, srsz;
bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root;
+
+ lock_buffer(bh_sr);
raw_sr = (struct nilfs_super_root *)bh_sr->b_data;
isz = nilfs->ns_inode_size;
srsz = NILFS_SR_BYTES(isz);
+ raw_sr->sr_sum = 0; /* Ensure initialization within this update */
raw_sr->sr_bytes = cpu_to_le16(srsz);
raw_sr->sr_nongc_ctime
= cpu_to_le64(nilfs_doing_gc() ?
@@ -998,6 +1001,8 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci,
nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr +
NILFS_SR_SUFILE_OFFSET(isz), 1);
memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz);
+ set_buffer_uptodate(bh_sr);
+ unlock_buffer(bh_sr);
}
static void nilfs_redirty_inodes(struct list_head *head)
@@ -1780,6 +1785,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1791,6 +1797,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
b_assoc_buffers) {
clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
+ clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 77f1e5778d1c..9ba4933087af 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -372,10 +372,31 @@ static int nilfs_move_2nd_super(struct super_block *sb, loff_t sb2off)
goto out;
}
nsbp = (void *)nsbh->b_data + offset;
- memset(nsbp, 0, nilfs->ns_blocksize);
+ lock_buffer(nsbh);
if (sb2i >= 0) {
+ /*
+ * The position of the second superblock only changes by 4KiB,
+ * which is larger than the maximum superblock data size
+ * (= 1KiB), so there is no need to use memmove() to allow
+ * overlap between source and destination.
+ */
memcpy(nsbp, nilfs->ns_sbp[sb2i], nilfs->ns_sbsize);
+
+ /*
+ * Zero fill after copy to avoid overwriting in case of move
+ * within the same block.
+ */
+ memset(nsbh->b_data, 0, offset);
+ memset((void *)nsbp + nilfs->ns_sbsize, 0,
+ nsbh->b_size - offset - nilfs->ns_sbsize);
+ } else {
+ memset(nsbh->b_data, 0, nsbh->b_size);
+ }
+ set_buffer_uptodate(nsbh);
+ unlock_buffer(nsbh);
+
+ if (sb2i >= 0) {
brelse(nilfs->ns_sbh[sb2i]);
nilfs->ns_sbh[sb2i] = nsbh;
nilfs->ns_sbp[sb2i] = nsbp;
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index f9b2e0f19b03..ced7a9e916f0 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -185,24 +185,31 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
goto send;
}
- if (conn->ops->check_user_session) {
- rc = conn->ops->check_user_session(work);
- if (rc < 0) {
- command = conn->ops->get_cmd_val(work);
- conn->ops->set_rsp_status(work,
- STATUS_USER_SESSION_DELETED);
- goto send;
- } else if (rc > 0) {
- rc = conn->ops->get_ksmbd_tcon(work);
+ do {
+ if (conn->ops->check_user_session) {
+ rc = conn->ops->check_user_session(work);
if (rc < 0) {
- conn->ops->set_rsp_status(work,
- STATUS_NETWORK_NAME_DELETED);
+ if (rc == -EINVAL)
+ conn->ops->set_rsp_status(work,
+ STATUS_INVALID_PARAMETER);
+ else
+ conn->ops->set_rsp_status(work,
+ STATUS_USER_SESSION_DELETED);
goto send;
+ } else if (rc > 0) {
+ rc = conn->ops->get_ksmbd_tcon(work);
+ if (rc < 0) {
+ if (rc == -EINVAL)
+ conn->ops->set_rsp_status(work,
+ STATUS_INVALID_PARAMETER);
+ else
+ conn->ops->set_rsp_status(work,
+ STATUS_NETWORK_NAME_DELETED);
+ goto send;
+ }
}
}
- }
- do {
rc = __process_request(work, conn, &command);
if (rc == SERVER_HANDLER_ABORT)
break;
diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c
index 0ffe663b7590..33b7e6c4ceff 100644
--- a/fs/smb/server/smb2misc.c
+++ b/fs/smb/server/smb2misc.c
@@ -351,9 +351,16 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
int command;
__u32 clc_len; /* calculated length */
__u32 len = get_rfc1002_len(work->request_buf);
+ __u32 req_struct_size, next_cmd = le32_to_cpu(hdr->NextCommand);
- if (le32_to_cpu(hdr->NextCommand) > 0)
- len = le32_to_cpu(hdr->NextCommand);
+ if ((u64)work->next_smb2_rcv_hdr_off + next_cmd > len) {
+ pr_err("next command(%u) offset exceeds smb msg size\n",
+ next_cmd);
+ return 1;
+ }
+
+ if (next_cmd > 0)
+ len = next_cmd;
else if (work->next_smb2_rcv_hdr_off)
len -= work->next_smb2_rcv_hdr_off;
@@ -373,17 +380,9 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
}
if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
- if (command != SMB2_OPLOCK_BREAK_HE &&
- (hdr->Status == 0 || pdu->StructureSize2 != SMB2_ERROR_STRUCTURE_SIZE2_LE)) {
- /* error packets have 9 byte structure size */
- ksmbd_debug(SMB,
- "Illegal request size %u for command %d\n",
- le16_to_cpu(pdu->StructureSize2), command);
- return 1;
- } else if (command == SMB2_OPLOCK_BREAK_HE &&
- hdr->Status == 0 &&
- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+ if (command == SMB2_OPLOCK_BREAK_HE &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
+ le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
/* special case for SMB2.1 lease break message */
ksmbd_debug(SMB,
"Illegal request size %d for oplock break\n",
@@ -392,6 +391,14 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
}
}
+ req_struct_size = le16_to_cpu(pdu->StructureSize2) +
+ __SMB2_HEADER_STRUCTURE_SIZE;
+ if (command == SMB2_LOCK_HE)
+ req_struct_size -= sizeof(struct smb2_lock_element);
+
+ if (req_struct_size > len + 1)
+ return 1;
+
if (smb2_calc_size(hdr, &clc_len))
return 1;
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index 25c0ba04c59d..da1787c68ba0 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -91,7 +91,6 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
unsigned int cmd = le16_to_cpu(req_hdr->Command);
int tree_id;
- work->tcon = NULL;
if (cmd == SMB2_TREE_CONNECT_HE ||
cmd == SMB2_CANCEL_HE ||
cmd == SMB2_LOGOFF_HE) {
@@ -105,10 +104,28 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
}
tree_id = le32_to_cpu(req_hdr->Id.SyncId.TreeId);
+
+ /*
+ * If request is not the first in Compound request,
+ * Just validate tree id in header with work->tcon->id.
+ */
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!work->tcon) {
+ pr_err("The first operation in the compound does not have tcon\n");
+ return -EINVAL;
+ }
+ if (work->tcon->id != tree_id) {
+ pr_err("tree id(%u) is different with id(%u) in first operation\n",
+ tree_id, work->tcon->id);
+ return -EINVAL;
+ }
+ return 1;
+ }
+
work->tcon = ksmbd_tree_conn_lookup(work->sess, tree_id);
if (!work->tcon) {
pr_err("Invalid tid %d\n", tree_id);
- return -EINVAL;
+ return -ENOENT;
}
return 1;
@@ -547,7 +564,6 @@ int smb2_check_user_session(struct ksmbd_work *work)
unsigned int cmd = conn->ops->get_cmd_val(work);
unsigned long long sess_id;
- work->sess = NULL;
/*
* SMB2_ECHO, SMB2_NEGOTIATE, SMB2_SESSION_SETUP command do not
* require a session id, so no need to validate user session's for
@@ -558,15 +574,33 @@ int smb2_check_user_session(struct ksmbd_work *work)
return 0;
if (!ksmbd_conn_good(conn))
- return -EINVAL;
+ return -EIO;
sess_id = le64_to_cpu(req_hdr->SessionId);
+
+ /*
+ * If request is not the first in Compound request,
+ * Just validate session id in header with work->sess->id.
+ */
+ if (work->next_smb2_rcv_hdr_off) {
+ if (!work->sess) {
+ pr_err("The first operation in the compound does not have sess\n");
+ return -EINVAL;
+ }
+ if (work->sess->id != sess_id) {
+ pr_err("session id(%llu) is different with the first operation(%lld)\n",
+ sess_id, work->sess->id);
+ return -EINVAL;
+ }
+ return 1;
+ }
+
/* Check for validity of user session */
work->sess = ksmbd_session_lookup_all(conn, sess_id);
if (work->sess)
return 1;
ksmbd_debug(SMB, "Invalid user session, Uid %llu\n", sess_id);
- return -EINVAL;
+ return -ENOENT;
}
static void destroy_previous_session(struct ksmbd_conn *conn,
@@ -2249,7 +2283,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* delete the EA only when it exits */
if (rc > 0) {
rc = ksmbd_vfs_remove_xattr(idmap,
- path->dentry,
+ path,
attr_name);
if (rc < 0) {
@@ -2263,8 +2297,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len,
/* if the EA doesn't exist, just do nothing. */
rc = 0;
} else {
- rc = ksmbd_vfs_setxattr(idmap,
- path->dentry, attr_name, value,
+ rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value,
le16_to_cpu(eabuf->EaValueLength), 0);
if (rc < 0) {
ksmbd_debug(SMB,
@@ -2321,8 +2354,7 @@ static noinline int smb2_set_stream_name_xattr(const struct path *path,
return -EBADF;
}
- rc = ksmbd_vfs_setxattr(idmap, path->dentry,
- xattr_stream_name, NULL, 0, 0);
+ rc = ksmbd_vfs_setxattr(idmap, path, xattr_stream_name, NULL, 0, 0);
if (rc < 0)
pr_err("Failed to store XATTR stream name :%d\n", rc);
return 0;
@@ -2350,7 +2382,7 @@ static int smb2_remove_smb_xattrs(const struct path *path)
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
!strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX,
STREAM_PREFIX_LEN)) {
- err = ksmbd_vfs_remove_xattr(idmap, path->dentry,
+ err = ksmbd_vfs_remove_xattr(idmap, path,
name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n",
@@ -2397,8 +2429,7 @@ static void smb2_new_xattrs(struct ksmbd_tree_connect *tcon, const struct path *
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt),
- path->dentry, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(mnt_idmap(path->mnt), path, &da);
if (rc)
ksmbd_debug(SMB, "failed to store file attribute into xattr\n");
}
@@ -2972,7 +3003,7 @@ int smb2_open(struct ksmbd_work *work)
struct inode *inode = d_inode(path.dentry);
posix_acl_rc = ksmbd_vfs_inherit_posix_acl(idmap,
- path.dentry,
+ &path,
d_inode(path.dentry->d_parent));
if (posix_acl_rc)
ksmbd_debug(SMB, "inherit posix acl failed : %d\n", posix_acl_rc);
@@ -2988,7 +3019,7 @@ int smb2_open(struct ksmbd_work *work)
if (rc) {
if (posix_acl_rc)
ksmbd_vfs_set_init_posix_acl(idmap,
- path.dentry);
+ &path);
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_ACL_XATTR)) {
@@ -3028,7 +3059,7 @@ int smb2_open(struct ksmbd_work *work)
rc = ksmbd_vfs_set_sd_xattr(conn,
idmap,
- path.dentry,
+ &path,
pntsd,
pntsd_size);
kfree(pntsd);
@@ -5464,7 +5495,7 @@ static int smb2_rename(struct ksmbd_work *work,
goto out;
rc = ksmbd_vfs_setxattr(file_mnt_idmap(fp->filp),
- fp->filp->f_path.dentry,
+ &fp->filp->f_path,
xattr_stream_name,
NULL, 0, 0);
if (rc < 0) {
@@ -5629,8 +5660,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
da.flags = XATTR_DOSINFO_ATTRIB | XATTR_DOSINFO_CREATE_TIME |
XATTR_DOSINFO_ITIME;
- rc = ksmbd_vfs_set_dos_attrib_xattr(idmap,
- filp->f_path.dentry, &da);
+ rc = ksmbd_vfs_set_dos_attrib_xattr(idmap, &filp->f_path, &da);
if (rc)
ksmbd_debug(SMB,
"failed to restore file attribute in EA\n");
@@ -7485,7 +7515,7 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
da.attr = le32_to_cpu(fp->f_ci->m_fattr);
ret = ksmbd_vfs_set_dos_attrib_xattr(idmap,
- fp->filp->f_path.dentry, &da);
+ &fp->filp->f_path, &da);
if (ret)
fp->f_ci->m_fattr = old_fattr;
}
diff --git a/fs/smb/server/smbacl.c b/fs/smb/server/smbacl.c
index 0a5862a61c77..ad919a4239d0 100644
--- a/fs/smb/server/smbacl.c
+++ b/fs/smb/server/smbacl.c
@@ -1162,8 +1162,7 @@ pass:
pntsd_size += sizeof(struct smb_acl) + nt_size;
}
- ksmbd_vfs_set_sd_xattr(conn, idmap,
- path->dentry, pntsd, pntsd_size);
+ ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, pntsd_size);
kfree(pntsd);
}
@@ -1383,7 +1382,7 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
newattrs.ia_valid |= ATTR_MODE;
newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
- ksmbd_vfs_remove_acl_xattrs(idmap, path->dentry);
+ ksmbd_vfs_remove_acl_xattrs(idmap, path);
/* Update posix acls */
if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
rc = set_posix_acl(idmap, path->dentry,
@@ -1414,9 +1413,8 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
if (test_share_config_flag(tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) {
/* Update WinACL in xattr */
- ksmbd_vfs_remove_sd_xattrs(idmap, path->dentry);
- ksmbd_vfs_set_sd_xattr(conn, idmap,
- path->dentry, pntsd, ntsd_len);
+ ksmbd_vfs_remove_sd_xattrs(idmap, path);
+ ksmbd_vfs_set_sd_xattr(conn, idmap, path, pntsd, ntsd_len);
}
out:
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index f9fb778247e7..81489fdedd8e 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -170,6 +170,10 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
return err;
}
+ err = mnt_want_write(path.mnt);
+ if (err)
+ goto out_err;
+
mode |= S_IFREG;
err = vfs_create(mnt_idmap(path.mnt), d_inode(path.dentry),
dentry, mode, true);
@@ -179,6 +183,9 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
} else {
pr_err("File(%s): creation failed (err:%d)\n", name, err);
}
+ mnt_drop_write(path.mnt);
+
+out_err:
done_path_create(&path, dentry);
return err;
}
@@ -209,30 +216,35 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
return err;
}
+ err = mnt_want_write(path.mnt);
+ if (err)
+ goto out_err2;
+
idmap = mnt_idmap(path.mnt);
mode |= S_IFDIR;
err = vfs_mkdir(idmap, d_inode(path.dentry), dentry, mode);
- if (err) {
- goto out;
- } else if (d_unhashed(dentry)) {
+ if (!err && d_unhashed(dentry)) {
struct dentry *d;
d = lookup_one(idmap, dentry->d_name.name, dentry->d_parent,
dentry->d_name.len);
if (IS_ERR(d)) {
err = PTR_ERR(d);
- goto out;
+ goto out_err1;
}
if (unlikely(d_is_negative(d))) {
dput(d);
err = -ENOENT;
- goto out;
+ goto out_err1;
}
ksmbd_vfs_inherit_owner(work, d_inode(path.dentry), d_inode(d));
dput(d);
}
-out:
+
+out_err1:
+ mnt_drop_write(path.mnt);
+out_err2:
done_path_create(&path, dentry);
if (err)
pr_err("mkdir(%s): creation failed (err:%d)\n", name, err);
@@ -443,7 +455,7 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
memcpy(&stream_buf[*pos], buf, count);
err = ksmbd_vfs_setxattr(idmap,
- fp->filp->f_path.dentry,
+ &fp->filp->f_path,
fp->stream.name,
(void *)stream_buf,
size,
@@ -589,6 +601,10 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
goto out_err;
}
+ err = mnt_want_write(path->mnt);
+ if (err)
+ goto out_err;
+
idmap = mnt_idmap(path->mnt);
if (S_ISDIR(d_inode(path->dentry)->i_mode)) {
err = vfs_rmdir(idmap, d_inode(parent), path->dentry);
@@ -599,6 +615,7 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, const struct path *path)
if (err)
ksmbd_debug(VFS, "unlink failed, err %d\n", err);
}
+ mnt_drop_write(path->mnt);
out_err:
ksmbd_revert_fsids(work);
@@ -644,11 +661,16 @@ int ksmbd_vfs_link(struct ksmbd_work *work, const char *oldname,
goto out3;
}
+ err = mnt_want_write(newpath.mnt);
+ if (err)
+ goto out3;
+
err = vfs_link(oldpath.dentry, mnt_idmap(newpath.mnt),
d_inode(newpath.dentry),
dentry, NULL);
if (err)
ksmbd_debug(VFS, "vfs_link failed err %d\n", err);
+ mnt_drop_write(newpath.mnt);
out3:
done_path_create(&newpath, dentry);
@@ -694,6 +716,10 @@ retry:
goto out2;
}
+ err = mnt_want_write(old_path->mnt);
+ if (err)
+ goto out2;
+
trap = lock_rename_child(old_child, new_path.dentry);
old_parent = dget(old_child->d_parent);
@@ -757,6 +783,7 @@ out4:
out3:
dput(old_parent);
unlock_rename(old_parent, new_path.dentry);
+ mnt_drop_write(old_path->mnt);
out2:
path_put(&new_path);
@@ -897,19 +924,24 @@ ssize_t ksmbd_vfs_getxattr(struct mnt_idmap *idmap,
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *attr_name,
+ const struct path *path, const char *attr_name,
void *attr_value, size_t attr_size, int flags)
{
int err;
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+
err = vfs_setxattr(idmap,
- dentry,
+ path->dentry,
attr_name,
attr_value,
attr_size,
flags);
if (err)
ksmbd_debug(VFS, "setxattr failed, err %d\n", err);
+ mnt_drop_write(path->mnt);
return err;
}
@@ -1013,9 +1045,18 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
}
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry, char *attr_name)
+ const struct path *path, char *attr_name)
{
- return vfs_removexattr(idmap, dentry, attr_name);
+ int err;
+
+ err = mnt_want_write(path->mnt);
+ if (err)
+ return err;
+
+ err = vfs_removexattr(idmap, path->dentry, attr_name);
+ mnt_drop_write(path->mnt);
+
+ return err;
}
int ksmbd_vfs_unlink(struct file *filp)
@@ -1024,6 +1065,10 @@ int ksmbd_vfs_unlink(struct file *filp)
struct dentry *dir, *dentry = filp->f_path.dentry;
struct mnt_idmap *idmap = file_mnt_idmap(filp);
+ err = mnt_want_write(filp->f_path.mnt);
+ if (err)
+ return err;
+
dir = dget_parent(dentry);
err = ksmbd_vfs_lock_parent(dir, dentry);
if (err)
@@ -1041,6 +1086,7 @@ int ksmbd_vfs_unlink(struct file *filp)
ksmbd_debug(VFS, "failed to delete, err %d\n", err);
out:
dput(dir);
+ mnt_drop_write(filp->f_path.mnt);
return err;
}
@@ -1244,13 +1290,13 @@ struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
}
int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry)
+ const struct path *path)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
- xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
@@ -1258,6 +1304,10 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
goto out;
}
+ err = mnt_want_write(path->mnt);
+ if (err)
+ goto out;
+
for (name = xattr_list; name - xattr_list < xattr_list_len;
name += strlen(name) + 1) {
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
@@ -1266,25 +1316,26 @@ int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1) ||
!strncmp(name, XATTR_NAME_POSIX_ACL_DEFAULT,
sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1)) {
- err = vfs_remove_acl(idmap, dentry, name);
+ err = vfs_remove_acl(idmap, path->dentry, name);
if (err)
ksmbd_debug(SMB,
"remove acl xattr failed : %s\n", name);
}
}
+ mnt_drop_write(path->mnt);
+
out:
kvfree(xattr_list);
return err;
}
-int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry)
+int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path)
{
char *name, *xattr_list = NULL;
ssize_t xattr_list_len;
int err = 0;
- xattr_list_len = ksmbd_vfs_listxattr(dentry, &xattr_list);
+ xattr_list_len = ksmbd_vfs_listxattr(path->dentry, &xattr_list);
if (xattr_list_len < 0) {
goto out;
} else if (!xattr_list_len) {
@@ -1297,7 +1348,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name));
if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) {
- err = ksmbd_vfs_remove_xattr(idmap, dentry, name);
+ err = ksmbd_vfs_remove_xattr(idmap, path, name);
if (err)
ksmbd_debug(SMB, "remove xattr failed : %s\n", name);
}
@@ -1374,13 +1425,14 @@ out:
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct smb_ntsd *pntsd, int len)
{
int rc;
struct ndr sd_ndr = {0}, acl_ndr = {0};
struct xattr_ntacl acl = {0};
struct xattr_smb_acl *smb_acl, *def_smb_acl = NULL;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
acl.version = 4;
@@ -1432,7 +1484,7 @@ int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
goto out;
}
- rc = ksmbd_vfs_setxattr(idmap, dentry,
+ rc = ksmbd_vfs_setxattr(idmap, path,
XATTR_NAME_SD, sd_ndr.data,
sd_ndr.offset, 0);
if (rc < 0)
@@ -1522,7 +1574,7 @@ free_n_data:
}
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct xattr_dos_attrib *da)
{
struct ndr n;
@@ -1532,7 +1584,7 @@ int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
if (err)
return err;
- err = ksmbd_vfs_setxattr(idmap, dentry, XATTR_NAME_DOS_ATTRIBUTE,
+ err = ksmbd_vfs_setxattr(idmap, path, XATTR_NAME_DOS_ATTRIBUTE,
(void *)n.data, n.offset, 0);
if (err)
ksmbd_debug(SMB, "failed to store dos attribute in xattr\n");
@@ -1769,10 +1821,11 @@ void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock)
}
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry)
+ struct path *path)
{
struct posix_acl_state acl_state;
struct posix_acl *acls;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
int rc;
@@ -1802,6 +1855,11 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
return -ENOMEM;
}
posix_state_to_acl(&acl_state, acls->a_entries);
+
+ rc = mnt_want_write(path->mnt);
+ if (rc)
+ goto out_err;
+
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1813,16 +1871,20 @@ int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
+ mnt_drop_write(path->mnt);
+
+out_err:
free_acl_state(&acl_state);
posix_acl_release(acls);
return rc;
}
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry, struct inode *parent_inode)
+ struct path *path, struct inode *parent_inode)
{
struct posix_acl *acls;
struct posix_acl_entry *pace;
+ struct dentry *dentry = path->dentry;
struct inode *inode = d_inode(dentry);
int rc, i;
@@ -1841,6 +1903,10 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
}
}
+ rc = mnt_want_write(path->mnt);
+ if (rc)
+ goto out_err;
+
rc = set_posix_acl(idmap, dentry, ACL_TYPE_ACCESS, acls);
if (rc < 0)
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
@@ -1852,6 +1918,9 @@ int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
ksmbd_debug(SMB, "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
rc);
}
+ mnt_drop_write(path->mnt);
+
+out_err:
posix_acl_release(acls);
return rc;
}
diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h
index a4ae89f3230d..8c0931d4d531 100644
--- a/fs/smb/server/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -108,12 +108,12 @@ ssize_t ksmbd_vfs_casexattr_len(struct mnt_idmap *idmap,
struct dentry *dentry, char *attr_name,
int attr_name_len);
int ksmbd_vfs_setxattr(struct mnt_idmap *idmap,
- struct dentry *dentry, const char *attr_name,
+ const struct path *path, const char *attr_name,
void *attr_value, size_t attr_size, int flags);
int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
size_t *xattr_stream_name_size, int s_type);
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry, char *attr_name);
+ const struct path *path, char *attr_name);
int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
unsigned int flags, struct path *path,
bool caseless);
@@ -139,26 +139,25 @@ void ksmbd_vfs_posix_lock_wait(struct file_lock *flock);
int ksmbd_vfs_posix_lock_wait_timeout(struct file_lock *flock, long timeout);
void ksmbd_vfs_posix_lock_unblock(struct file_lock *flock);
int ksmbd_vfs_remove_acl_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry);
-int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap,
- struct dentry *dentry);
+ const struct path *path);
+int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path);
int ksmbd_vfs_set_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct smb_ntsd *pntsd, int len);
int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
struct mnt_idmap *idmap,
struct dentry *dentry,
struct smb_ntsd **pntsd);
int ksmbd_vfs_set_dos_attrib_xattr(struct mnt_idmap *idmap,
- struct dentry *dentry,
+ const struct path *path,
struct xattr_dos_attrib *da);
int ksmbd_vfs_get_dos_attrib_xattr(struct mnt_idmap *idmap,
struct dentry *dentry,
struct xattr_dos_attrib *da);
int ksmbd_vfs_set_init_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry);
+ struct path *path);
int ksmbd_vfs_inherit_posix_acl(struct mnt_idmap *idmap,
- struct dentry *dentry,
+ struct path *path,
struct inode *parent_inode);
#endif /* __KSMBD_VFS_H__ */
diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c
index 2d0138e72d78..f41f8d6108ce 100644
--- a/fs/smb/server/vfs_cache.c
+++ b/fs/smb/server/vfs_cache.c
@@ -252,7 +252,7 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp)
if (ksmbd_stream_fd(fp) && (ci->m_flags & S_DEL_ON_CLS_STREAM)) {
ci->m_flags &= ~S_DEL_ON_CLS_STREAM;
err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp),
- filp->f_path.dentry,
+ &filp->f_path,
fp->stream.name);
if (err)
pr_err("remove xattr failed : %s\n",
diff --git a/fs/super.c b/fs/super.c
index 34afe411cf2b..04bc62ab7dfe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,7 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
* If that happens we could trigger unregistering the shrinker from within the
- * shrinker path and that leads to deadlock on the shrinker_mutex. Hence we
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
* take a passive reference to the superblock to avoid this from occurring.
*/
static unsigned long super_cache_scan(struct shrinker *shrink,
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index e6098a08c914..9ffdc0425bc2 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -761,6 +761,7 @@ ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
acpi_event_status
*event_status))
ACPI_HW_DEPENDENT_RETURN_UINT32(u32 acpi_dispatch_gpe(acpi_handle gpe_device, u32 gpe_number))
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_hw_disable_all_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_disable_all_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_runtime_gpes(void))
ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status acpi_enable_all_wakeup_gpes(void))
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 0f1001dca0e0..3ceb9dfa0993 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -200,6 +200,7 @@ enum cpuhp_state {
/* Online section invoked on the hotplugged CPU from the hotplug thread */
CPUHP_AP_ONLINE_IDLE,
+ CPUHP_AP_HYPERV_ONLINE,
CPUHP_AP_KVM_ONLINE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h
index 3c01c2bf84f5..505c908dbb81 100644
--- a/include/linux/regulator/pca9450.h
+++ b/include/linux/regulator/pca9450.h
@@ -196,11 +196,11 @@ enum {
/* PCA9450_REG_LDO3_VOLT bits */
#define LDO3_EN_MASK 0xC0
-#define LDO3OUT_MASK 0x0F
+#define LDO3OUT_MASK 0x1F
/* PCA9450_REG_LDO4_VOLT bits */
#define LDO4_EN_MASK 0xC0
-#define LDO4OUT_MASK 0x0F
+#define LDO4OUT_MASK 0x1F
/* PCA9450_REG_LDO5_VOLT bits */
#define LDO5L_EN_MASK 0xC0
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 86b2a82da546..54e353c9f919 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -68,7 +68,7 @@ DECLARE_EVENT_CLASS(writeback_folio_template,
strscpy_pad(__entry->name,
bdi_dev_name(mapping ? inode_to_bdi(mapping->host) :
NULL), 32);
- __entry->ino = mapping ? mapping->host->i_ino : 0;
+ __entry->ino = (mapping && mapping->host) ? mapping->host->i_ino : 0;
__entry->index = folio->index;
),
diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index dbb14705d0d3..8df0550415e7 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -50,6 +50,18 @@
#define EVENT_STATUS_OTHER BIT(7)
/*
+ * User register flags are not allowed yet, keep them here until we are
+ * ready to expose them out to the user ABI.
+ */
+enum user_reg_flag {
+ /* Event will not delete upon last reference closing */
+ USER_EVENT_REG_PERSIST = 1U << 0,
+
+ /* This value or above is currently non-ABI */
+ USER_EVENT_REG_MAX = 1U << 1,
+};
+
+/*
* Stores the system name, tables, and locks for a group of events. This
* allows isolation for events by various means.
*/
@@ -85,8 +97,10 @@ struct user_event {
struct hlist_node node;
struct list_head fields;
struct list_head validators;
+ struct work_struct put_work;
refcount_t refcnt;
int min_size;
+ int reg_flags;
char status;
};
@@ -165,76 +179,151 @@ typedef void (*user_event_func_t) (struct user_event *user, struct iov_iter *i,
static int user_event_parse(struct user_event_group *group, char *name,
char *args, char *flags,
- struct user_event **newuser);
+ struct user_event **newuser, int reg_flags);
static struct user_event_mm *user_event_mm_get(struct user_event_mm *mm);
static struct user_event_mm *user_event_mm_get_all(struct user_event *user);
static void user_event_mm_put(struct user_event_mm *mm);
+static int destroy_user_event(struct user_event *user);
static u32 user_event_key(char *name)
{
return jhash(name, strlen(name), 0);
}
-static void user_event_group_destroy(struct user_event_group *group)
+static struct user_event *user_event_get(struct user_event *user)
{
- kfree(group->system_name);
- kfree(group);
+ refcount_inc(&user->refcnt);
+
+ return user;
}
-static char *user_event_group_system_name(struct user_namespace *user_ns)
+static void delayed_destroy_user_event(struct work_struct *work)
{
- char *system_name;
- int len = sizeof(USER_EVENTS_SYSTEM) + 1;
+ struct user_event *user = container_of(
+ work, struct user_event, put_work);
- if (user_ns != &init_user_ns) {
+ mutex_lock(&event_mutex);
+
+ if (!refcount_dec_and_test(&user->refcnt))
+ goto out;
+
+ if (destroy_user_event(user)) {
/*
- * Unexpected at this point:
- * We only currently support init_user_ns.
- * When we enable more, this will trigger a failure so log.
+ * The only reason this would fail here is if we cannot
+ * update the visibility of the event. In this case the
+ * event stays in the hashtable, waiting for someone to
+ * attempt to delete it later.
*/
- pr_warn("user_events: Namespace other than init_user_ns!\n");
- return NULL;
+ pr_warn("user_events: Unable to delete event\n");
+ refcount_set(&user->refcnt, 1);
}
+out:
+ mutex_unlock(&event_mutex);
+}
- system_name = kmalloc(len, GFP_KERNEL);
+static void user_event_put(struct user_event *user, bool locked)
+{
+ bool delete;
- if (!system_name)
- return NULL;
+ if (unlikely(!user))
+ return;
- snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM);
+ /*
+ * When the event is not enabled for auto-delete there will always
+ * be at least 1 reference to the event. During the event creation
+ * we initially set the refcnt to 2 to achieve this. In those cases
+ * the caller must acquire event_mutex and after decrement check if
+ * the refcnt is 1, meaning this is the last reference. When auto
+ * delete is enabled, there will only be 1 ref, IE: refcnt will be
+ * only set to 1 during creation to allow the below checks to go
+ * through upon the last put. The last put must always be done with
+ * the event mutex held.
+ */
+ if (!locked) {
+ lockdep_assert_not_held(&event_mutex);
+ delete = refcount_dec_and_mutex_lock(&user->refcnt, &event_mutex);
+ } else {
+ lockdep_assert_held(&event_mutex);
+ delete = refcount_dec_and_test(&user->refcnt);
+ }
- return system_name;
+ if (!delete)
+ return;
+
+ /*
+ * We now have the event_mutex in all cases, which ensures that
+ * no new references will be taken until event_mutex is released.
+ * New references come through find_user_event(), which requires
+ * the event_mutex to be held.
+ */
+
+ if (user->reg_flags & USER_EVENT_REG_PERSIST) {
+ /* We should not get here when persist flag is set */
+ pr_alert("BUG: Auto-delete engaged on persistent event\n");
+ goto out;
+ }
+
+ /*
+ * Unfortunately we have to attempt the actual destroy in a work
+ * queue. This is because not all cases handle a trace_event_call
+ * being removed within the class->reg() operation for unregister.
+ */
+ INIT_WORK(&user->put_work, delayed_destroy_user_event);
+
+ /*
+ * Since the event is still in the hashtable, we have to re-inc
+ * the ref count to 1. This count will be decremented and checked
+ * in the work queue to ensure it's still the last ref. This is
+ * needed because a user-process could register the same event in
+ * between the time of event_mutex release and the work queue
+ * running the delayed destroy. If we removed the item now from
+ * the hashtable, this would result in a timing window where a
+ * user process would fail a register because the trace_event_call
+ * register would fail in the tracing layers.
+ */
+ refcount_set(&user->refcnt, 1);
+
+ if (WARN_ON_ONCE(!schedule_work(&user->put_work))) {
+ /*
+ * If we fail we must wait for an admin to attempt delete or
+ * another register/close of the event, whichever is first.
+ */
+ pr_warn("user_events: Unable to queue delayed destroy\n");
+ }
+out:
+ /* Ensure if we didn't have event_mutex before we unlock it */
+ if (!locked)
+ mutex_unlock(&event_mutex);
}
-static inline struct user_event_group
-*user_event_group_from_user_ns(struct user_namespace *user_ns)
+static void user_event_group_destroy(struct user_event_group *group)
{
- if (user_ns == &init_user_ns)
- return init_group;
-
- return NULL;
+ kfree(group->system_name);
+ kfree(group);
}
-static struct user_event_group *current_user_event_group(void)
+static char *user_event_group_system_name(void)
{
- struct user_namespace *user_ns = current_user_ns();
- struct user_event_group *group = NULL;
+ char *system_name;
+ int len = sizeof(USER_EVENTS_SYSTEM) + 1;
- while (user_ns) {
- group = user_event_group_from_user_ns(user_ns);
+ system_name = kmalloc(len, GFP_KERNEL);
- if (group)
- break;
+ if (!system_name)
+ return NULL;
- user_ns = user_ns->parent;
- }
+ snprintf(system_name, len, "%s", USER_EVENTS_SYSTEM);
- return group;
+ return system_name;
}
-static struct user_event_group
-*user_event_group_create(struct user_namespace *user_ns)
+static struct user_event_group *current_user_event_group(void)
+{
+ return init_group;
+}
+
+static struct user_event_group *user_event_group_create(void)
{
struct user_event_group *group;
@@ -243,7 +332,7 @@ static struct user_event_group
if (!group)
return NULL;
- group->system_name = user_event_group_system_name(user_ns);
+ group->system_name = user_event_group_system_name();
if (!group->system_name)
goto error;
@@ -259,12 +348,13 @@ error:
return NULL;
};
-static void user_event_enabler_destroy(struct user_event_enabler *enabler)
+static void user_event_enabler_destroy(struct user_event_enabler *enabler,
+ bool locked)
{
list_del_rcu(&enabler->mm_enablers_link);
/* No longer tracking the event via the enabler */
- refcount_dec(&enabler->event->refcnt);
+ user_event_put(enabler->event, locked);
kfree(enabler);
}
@@ -326,7 +416,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work)
/* User asked for enabler to be removed during fault */
if (test_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler))) {
- user_event_enabler_destroy(enabler);
+ user_event_enabler_destroy(enabler, true);
goto out;
}
@@ -501,14 +591,12 @@ static bool user_event_enabler_dup(struct user_event_enabler *orig,
if (!enabler)
return false;
- enabler->event = orig->event;
+ enabler->event = user_event_get(orig->event);
enabler->addr = orig->addr;
/* Only dup part of value (ignore future flags, etc) */
enabler->values = orig->values & ENABLE_VAL_DUP_MASK;
- refcount_inc(&enabler->event->refcnt);
-
/* Enablers not exposed yet, RCU not required */
list_add(&enabler->mm_enablers_link, &mm->enablers);
@@ -625,7 +713,7 @@ static void user_event_mm_destroy(struct user_event_mm *mm)
struct user_event_enabler *enabler, *next;
list_for_each_entry_safe(enabler, next, &mm->enablers, mm_enablers_link)
- user_event_enabler_destroy(enabler);
+ user_event_enabler_destroy(enabler, false);
mmdrop(mm->mm);
kfree(mm);
@@ -780,7 +868,7 @@ retry:
* exit or run exec(), which includes forks and clones.
*/
if (!*write_result) {
- refcount_inc(&enabler->event->refcnt);
+ user_event_get(user);
list_add_rcu(&enabler->mm_enablers_link, &user_mm->enablers);
}
@@ -803,7 +891,12 @@ out:
static __always_inline __must_check
bool user_event_last_ref(struct user_event *user)
{
- return refcount_read(&user->refcnt) == 1;
+ int last = 0;
+
+ if (user->reg_flags & USER_EVENT_REG_PERSIST)
+ last = 1;
+
+ return refcount_read(&user->refcnt) == last;
}
static __always_inline __must_check
@@ -842,7 +935,8 @@ static struct list_head *user_event_get_fields(struct trace_event_call *call)
* Upon success user_event has its ref count increased by 1.
*/
static int user_event_parse_cmd(struct user_event_group *group,
- char *raw_command, struct user_event **newuser)
+ char *raw_command, struct user_event **newuser,
+ int reg_flags)
{
char *name = raw_command;
char *args = strpbrk(name, " ");
@@ -856,7 +950,7 @@ static int user_event_parse_cmd(struct user_event_group *group,
if (flags)
*flags++ = '\0';
- return user_event_parse(group, name, args, flags, newuser);
+ return user_event_parse(group, name, args, flags, newuser, reg_flags);
}
static int user_field_array_size(const char *type)
@@ -1367,10 +1461,8 @@ static struct user_event *find_user_event(struct user_event_group *group,
*outkey = key;
hash_for_each_possible(group->register_table, user, node, key)
- if (!strcmp(EVENT_NAME(user), name)) {
- refcount_inc(&user->refcnt);
- return user;
- }
+ if (!strcmp(EVENT_NAME(user), name))
+ return user_event_get(user);
return NULL;
}
@@ -1432,7 +1524,7 @@ static void user_event_ftrace(struct user_event *user, struct iov_iter *i,
if (unlikely(!entry))
return;
- if (unlikely(!copy_nofault(entry + 1, i->count, i)))
+ if (unlikely(i->count != 0 && !copy_nofault(entry + 1, i->count, i)))
goto discard;
if (!list_empty(&user->validators) &&
@@ -1473,7 +1565,7 @@ static void user_event_perf(struct user_event *user, struct iov_iter *i,
perf_fetch_caller_regs(regs);
- if (unlikely(!copy_nofault(perf_entry + 1, i->count, i)))
+ if (unlikely(i->count != 0 && !copy_nofault(perf_entry + 1, i->count, i)))
goto discard;
if (!list_empty(&user->validators) &&
@@ -1584,12 +1676,12 @@ static int user_event_reg(struct trace_event_call *call,
return ret;
inc:
- refcount_inc(&user->refcnt);
+ user_event_get(user);
update_enable_bit_for(user);
return 0;
dec:
update_enable_bit_for(user);
- refcount_dec(&user->refcnt);
+ user_event_put(user, true);
return 0;
}
@@ -1620,10 +1712,11 @@ static int user_event_create(const char *raw_command)
mutex_lock(&group->reg_mutex);
- ret = user_event_parse_cmd(group, name, &user);
+ /* Dyn events persist, otherwise they would cleanup immediately */
+ ret = user_event_parse_cmd(group, name, &user, USER_EVENT_REG_PERSIST);
if (!ret)
- refcount_dec(&user->refcnt);
+ user_event_put(user, false);
mutex_unlock(&group->reg_mutex);
@@ -1745,6 +1838,8 @@ static bool user_event_match(const char *system, const char *event,
if (match && argc > 0)
match = user_fields_match(user, argc, argv);
+ else if (match && argc == 0)
+ match = list_empty(&user->fields);
return match;
}
@@ -1781,11 +1876,17 @@ static int user_event_trace_register(struct user_event *user)
*/
static int user_event_parse(struct user_event_group *group, char *name,
char *args, char *flags,
- struct user_event **newuser)
+ struct user_event **newuser, int reg_flags)
{
int ret;
u32 key;
struct user_event *user;
+ int argc = 0;
+ char **argv;
+
+ /* User register flags are not ready yet */
+ if (reg_flags != 0 || flags != NULL)
+ return -EINVAL;
/* Prevent dyn_event from racing */
mutex_lock(&event_mutex);
@@ -1793,13 +1894,35 @@ static int user_event_parse(struct user_event_group *group, char *name,
mutex_unlock(&event_mutex);
if (user) {
- *newuser = user;
- /*
- * Name is allocated by caller, free it since it already exists.
- * Caller only worries about failure cases for freeing.
- */
- kfree(name);
+ if (args) {
+ argv = argv_split(GFP_KERNEL, args, &argc);
+ if (!argv) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = user_fields_match(user, argc, (const char **)argv);
+ argv_free(argv);
+
+ } else
+ ret = list_empty(&user->fields);
+
+ if (ret) {
+ *newuser = user;
+ /*
+ * Name is allocated by caller, free it since it already exists.
+ * Caller only worries about failure cases for freeing.
+ */
+ kfree(name);
+ } else {
+ ret = -EADDRINUSE;
+ goto error;
+ }
+
return 0;
+error:
+ user_event_put(user, false);
+ return ret;
}
user = kzalloc(sizeof(*user), GFP_KERNEL_ACCOUNT);
@@ -1852,8 +1975,15 @@ static int user_event_parse(struct user_event_group *group, char *name,
if (ret)
goto put_user_lock;
- /* Ensure we track self ref and caller ref (2) */
- refcount_set(&user->refcnt, 2);
+ user->reg_flags = reg_flags;
+
+ if (user->reg_flags & USER_EVENT_REG_PERSIST) {
+ /* Ensure we track self ref and caller ref (2) */
+ refcount_set(&user->refcnt, 2);
+ } else {
+ /* Ensure we track only caller ref (1) */
+ refcount_set(&user->refcnt, 1);
+ }
dyn_event_init(&user->devent, &user_event_dops);
dyn_event_add(&user->devent, &user->call);
@@ -1885,7 +2015,7 @@ static int delete_user_event(struct user_event_group *group, char *name)
if (!user)
return -ENOENT;
- refcount_dec(&user->refcnt);
+ user_event_put(user, true);
if (!user_event_last_ref(user))
return -EBUSY;
@@ -2044,9 +2174,7 @@ static int user_events_ref_add(struct user_event_file_info *info,
for (i = 0; i < count; ++i)
new_refs->events[i] = refs->events[i];
- new_refs->events[i] = user;
-
- refcount_inc(&user->refcnt);
+ new_refs->events[i] = user_event_get(user);
rcu_assign_pointer(info->refs, new_refs);
@@ -2077,8 +2205,8 @@ static long user_reg_get(struct user_reg __user *ureg, struct user_reg *kreg)
if (ret)
return ret;
- /* Ensure no flags, since we don't support any yet */
- if (kreg->flags != 0)
+ /* Ensure only valid flags */
+ if (kreg->flags & ~(USER_EVENT_REG_MAX-1))
return -EINVAL;
/* Ensure supported size */
@@ -2150,7 +2278,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info,
return ret;
}
- ret = user_event_parse_cmd(info->group, name, &user);
+ ret = user_event_parse_cmd(info->group, name, &user, reg.flags);
if (ret) {
kfree(name);
@@ -2160,7 +2288,7 @@ static long user_events_ioctl_reg(struct user_event_file_info *info,
ret = user_events_ref_add(info, user);
/* No longer need parse ref, ref_add either worked or not */
- refcount_dec(&user->refcnt);
+ user_event_put(user, false);
/* Positive number is index and valid */
if (ret < 0)
@@ -2309,7 +2437,7 @@ static long user_events_ioctl_unreg(unsigned long uarg)
set_bit(ENABLE_VAL_FREEING_BIT, ENABLE_BITOPS(enabler));
if (!test_bit(ENABLE_VAL_FAULTING_BIT, ENABLE_BITOPS(enabler)))
- user_event_enabler_destroy(enabler);
+ user_event_enabler_destroy(enabler, true);
/* Removed at least one */
ret = 0;
@@ -2367,7 +2495,6 @@ static int user_events_release(struct inode *node, struct file *file)
struct user_event_file_info *info = file->private_data;
struct user_event_group *group;
struct user_event_refs *refs;
- struct user_event *user;
int i;
if (!info)
@@ -2391,12 +2518,9 @@ static int user_events_release(struct inode *node, struct file *file)
* The underlying user_events are ref counted, and cannot be freed.
* After this decrement, the user_events may be freed elsewhere.
*/
- for (i = 0; i < refs->count; ++i) {
- user = refs->events[i];
+ for (i = 0; i < refs->count; ++i)
+ user_event_put(refs->events[i], false);
- if (user)
- refcount_dec(&user->refcnt);
- }
out:
file->private_data = NULL;
@@ -2577,7 +2701,7 @@ static int __init trace_events_user_init(void)
if (!fault_cache)
return -ENOMEM;
- init_group = user_event_group_create(&init_user_ns);
+ init_group = user_event_group_create();
if (!init_group) {
kmem_cache_destroy(fault_cache);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 15f05faaae44..1e33f367783e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -847,7 +847,7 @@ static void print_fields(struct trace_iterator *iter, struct trace_event_call *c
int ret;
void *pos;
- list_for_each_entry(field, head, link) {
+ list_for_each_entry_reverse(field, head, link) {
trace_seq_printf(&iter->seq, " %s=", field->name);
if (field->offset + field->size > iter->ent_size) {
trace_seq_puts(&iter->seq, "<OVERFLOW>");
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6b9d39d65b73..2d0d58fb4e7f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2070,7 +2070,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
xas_lock_irq(&xas);
- xas_set(&xas, index);
VM_BUG_ON_PAGE(page != xas_load(&xas), page);
diff --git a/mm/memfd.c b/mm/memfd.c
index 69b90c31d38c..e763e76f1106 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -371,12 +371,15 @@ SYSCALL_DEFINE2(memfd_create,
inode->i_mode &= ~0111;
file_seals = memfd_file_seals_ptr(file);
- *file_seals &= ~F_SEAL_SEAL;
- *file_seals |= F_SEAL_EXEC;
+ if (file_seals) {
+ *file_seals &= ~F_SEAL_SEAL;
+ *file_seals |= F_SEAL_EXEC;
+ }
} else if (flags & MFD_ALLOW_SEALING) {
/* MFD_EXEC and MFD_ALLOW_SEALING are set */
file_seals = memfd_file_seals_ptr(file);
- *file_seals &= ~F_SEAL_SEAL;
+ if (file_seals)
+ *file_seals &= ~F_SEAL_SEAL;
}
fd_install(fd, file);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 92d3d3ca390a..c59e7561698c 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -867,7 +867,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
tlb_finish_mmu(&tlb);
- if (!error && vma_iter_end(&vmi) < end)
+ if (!error && tmp < end)
error = -ENOMEM;
out:
diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c
index fe10436d9911..3ab53fad8876 100644
--- a/mm/shrinker_debug.c
+++ b/mm/shrinker_debug.c
@@ -5,12 +5,10 @@
#include <linux/seq_file.h>
#include <linux/shrinker.h>
#include <linux/memcontrol.h>
-#include <linux/srcu.h>
/* defined in vmscan.c */
-extern struct mutex shrinker_mutex;
+extern struct rw_semaphore shrinker_rwsem;
extern struct list_head shrinker_list;
-extern struct srcu_struct shrinker_srcu;
static DEFINE_IDA(shrinker_debugfs_ida);
static struct dentry *shrinker_debugfs_root;
@@ -51,13 +49,18 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
struct mem_cgroup *memcg;
unsigned long total;
bool memcg_aware;
- int ret = 0, nid, srcu_idx;
+ int ret, nid;
count_per_node = kcalloc(nr_node_ids, sizeof(unsigned long), GFP_KERNEL);
if (!count_per_node)
return -ENOMEM;
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ ret = down_read_killable(&shrinker_rwsem);
+ if (ret) {
+ kfree(count_per_node);
+ return ret;
+ }
+ rcu_read_lock();
memcg_aware = shrinker->flags & SHRINKER_MEMCG_AWARE;
@@ -88,7 +91,8 @@ static int shrinker_debugfs_count_show(struct seq_file *m, void *v)
}
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
+ up_read(&shrinker_rwsem);
kfree(count_per_node);
return ret;
@@ -111,8 +115,9 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
.gfp_mask = GFP_KERNEL,
};
struct mem_cgroup *memcg = NULL;
- int nid, srcu_idx;
+ int nid;
char kbuf[72];
+ ssize_t ret;
read_len = size < (sizeof(kbuf) - 1) ? size : (sizeof(kbuf) - 1);
if (copy_from_user(kbuf, buf, read_len))
@@ -141,7 +146,11 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
return -EINVAL;
}
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ ret = down_read_killable(&shrinker_rwsem);
+ if (ret) {
+ mem_cgroup_put(memcg);
+ return ret;
+ }
sc.nid = nid;
sc.memcg = memcg;
@@ -150,7 +159,7 @@ static ssize_t shrinker_debugfs_scan_write(struct file *file,
shrinker->scan_objects(shrinker, &sc);
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
mem_cgroup_put(memcg);
return size;
@@ -168,7 +177,7 @@ int shrinker_debugfs_add(struct shrinker *shrinker)
char buf[128];
int id;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
/* debugfs isn't initialized yet, add debugfs entries later. */
if (!shrinker_debugfs_root)
@@ -211,7 +220,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
if (!new)
return -ENOMEM;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
old = shrinker->name;
shrinker->name = new;
@@ -229,7 +238,7 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...)
shrinker->debugfs_entry = entry;
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
kfree_const(old);
@@ -242,7 +251,7 @@ struct dentry *shrinker_debugfs_detach(struct shrinker *shrinker,
{
struct dentry *entry = shrinker->debugfs_entry;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
kfree_const(shrinker->name);
shrinker->name = NULL;
@@ -271,14 +280,14 @@ static int __init shrinker_debugfs_init(void)
shrinker_debugfs_root = dentry;
/* Create debugfs entries for shrinkers registered at boot */
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
list_for_each_entry(shrinker, &shrinker_list, list)
if (!shrinker->debugfs_entry) {
ret = shrinker_debugfs_add(shrinker);
if (ret)
break;
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9683573f1225..1d13d71687d7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3098,11 +3098,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
- /* vm_area_alloc_pages() can also fail due to a fatal signal */
- if (!fatal_signal_pending(current))
+ /*
+ * vm_area_alloc_pages() can fail due to insufficient memory but
+ * also:-
+ *
+ * - a pending fatal signal
+ * - insufficient huge page-order pages
+ *
+ * Since we always retry allocations at order-0 in the huge page
+ * case a warning for either is spurious.
+ */
+ if (!fatal_signal_pending(current) && page_order == 0)
warn_alloc(gfp_mask, NULL,
- "vmalloc error: size %lu, page order %u, failed to allocate pages",
- area->nr_pages * PAGE_SIZE, page_order);
+ "vmalloc error: size %lu, failed to allocate pages",
+ area->nr_pages * PAGE_SIZE);
goto fail;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 6d0cd2840cf0..5bf98d0a22c9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -35,7 +35,7 @@
#include <linux/cpuset.h>
#include <linux/compaction.h>
#include <linux/notifier.h>
-#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
@@ -57,7 +57,6 @@
#include <linux/khugepaged.h>
#include <linux/rculist_nulls.h>
#include <linux/random.h>
-#include <linux/srcu.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -190,9 +189,7 @@ struct scan_control {
int vm_swappiness = 60;
LIST_HEAD(shrinker_list);
-DEFINE_MUTEX(shrinker_mutex);
-DEFINE_SRCU(shrinker_srcu);
-static atomic_t shrinker_srcu_generation = ATOMIC_INIT(0);
+DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
static int shrinker_nr_max;
@@ -211,21 +208,8 @@ static inline int shrinker_defer_size(int nr_items)
static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
int nid)
{
- return srcu_dereference_check(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu,
- lockdep_is_held(&shrinker_mutex));
-}
-
-static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg,
- int nid)
-{
- return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info,
- &shrinker_srcu);
-}
-
-static void free_shrinker_info_rcu(struct rcu_head *head)
-{
- kvfree(container_of(head, struct shrinker_info, rcu));
+ return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
+ lockdep_is_held(&shrinker_rwsem));
}
static int expand_one_shrinker_info(struct mem_cgroup *memcg,
@@ -266,7 +250,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg,
defer_size - old_defer_size);
rcu_assign_pointer(pn->shrinker_info, new);
- call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_rcu);
+ kvfree_rcu(old, rcu);
}
return 0;
@@ -292,7 +276,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
int nid, size, ret = 0;
int map_size, defer_size = 0;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
map_size = shrinker_map_size(shrinker_nr_max);
defer_size = shrinker_defer_size(shrinker_nr_max);
size = map_size + defer_size;
@@ -308,7 +292,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
info->map_nr_max = shrinker_nr_max;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
}
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
@@ -324,7 +308,7 @@ static int expand_shrinker_info(int new_id)
if (!root_mem_cgroup)
goto out;
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
map_size = shrinker_map_size(new_nr_max);
defer_size = shrinker_defer_size(new_nr_max);
@@ -352,16 +336,15 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
struct shrinker_info *info;
- int srcu_idx;
- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
if (!WARN_ON_ONCE(shrinker_id >= info->map_nr_max)) {
/* Pairs with smp mb in shrink_slab() */
smp_mb__before_atomic();
set_bit(shrinker_id, info->map);
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ rcu_read_unlock();
}
}
@@ -374,7 +357,8 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
if (mem_cgroup_disabled())
return -ENOSYS;
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
+ /* This may call shrinker, so it must use down_read_trylock() */
id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
@@ -388,7 +372,7 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker)
shrinker->id = id;
ret = 0;
unlock:
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return ret;
}
@@ -398,7 +382,7 @@ static void unregister_memcg_shrinker(struct shrinker *shrinker)
BUG_ON(id < 0);
- lockdep_assert_held(&shrinker_mutex);
+ lockdep_assert_held(&shrinker_rwsem);
idr_remove(&shrinker_idr, id);
}
@@ -408,7 +392,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
}
@@ -417,7 +401,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
{
struct shrinker_info *info;
- info = shrinker_info_srcu(memcg, nid);
+ info = shrinker_info_protected(memcg, nid);
return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
}
@@ -433,7 +417,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
parent = root_mem_cgroup;
/* Prevent from concurrent shrinker_info expand */
- mutex_lock(&shrinker_mutex);
+ down_read(&shrinker_rwsem);
for_each_node(nid) {
child_info = shrinker_info_protected(memcg, nid);
parent_info = shrinker_info_protected(parent, nid);
@@ -442,7 +426,7 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
atomic_long_add(nr, &parent_info->nr_deferred[i]);
}
}
- mutex_unlock(&shrinker_mutex);
+ up_read(&shrinker_rwsem);
}
static bool cgroup_reclaim(struct scan_control *sc)
@@ -743,9 +727,9 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
shrinker->name = NULL;
#endif
if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
- mutex_lock(&shrinker_mutex);
+ down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
return;
}
@@ -755,11 +739,11 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
void register_shrinker_prepared(struct shrinker *shrinker)
{
- mutex_lock(&shrinker_mutex);
- list_add_tail_rcu(&shrinker->list, &shrinker_list);
+ down_write(&shrinker_rwsem);
+ list_add_tail(&shrinker->list, &shrinker_list);
shrinker->flags |= SHRINKER_REGISTERED;
shrinker_debugfs_add(shrinker);
- mutex_unlock(&shrinker_mutex);
+ up_write(&shrinker_rwsem);
}
static int __register_shrinker(struct shrinker *shrinker)
@@ -810,16 +794,13 @@ void unregister_shrinker(struct shrinker *shrinker)
if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
- mutex_lock(&shrinker_mutex);
- list_del_rcu(&shrinker->list);
+ down_write(&shrinker_rwsem);
+ list_del(&shrinker->list);
shrinker->flags &= ~SHRINKER_REGISTERED;
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
unregister_memcg_shrinker(shrinker);
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
- mutex_unlock(&shrinker_mutex);
-
- atomic_inc(&shrinker_srcu_generation);
- synchronize_srcu(&shrinker_srcu);
+ up_write(&shrinker_rwsem);
shrinker_debugfs_remove(debugfs_entry, debugfs_id);
@@ -831,13 +812,15 @@ EXPORT_SYMBOL(unregister_shrinker);
/**
* synchronize_shrinkers - Wait for all running shrinkers to complete.
*
- * This is useful to guarantee that all shrinker invocations have seen an
- * update, before freeing memory.
+ * This is equivalent to calling unregister_shrink() and register_shrinker(),
+ * but atomically and with less overhead. This is useful to guarantee that all
+ * shrinker invocations have seen an update, before freeing memory, similar to
+ * rcu.
*/
void synchronize_shrinkers(void)
{
- atomic_inc(&shrinker_srcu_generation);
- synchronize_srcu(&shrinker_srcu);
+ down_write(&shrinker_rwsem);
+ up_write(&shrinker_rwsem);
}
EXPORT_SYMBOL(synchronize_shrinkers);
@@ -946,20 +929,19 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
- int srcu_idx, generation;
- int i = 0;
+ int i;
if (!mem_cgroup_online(memcg))
return 0;
-again:
- srcu_idx = srcu_read_lock(&shrinker_srcu);
- info = shrinker_info_srcu(memcg, nid);
+ if (!down_read_trylock(&shrinker_rwsem))
+ return 0;
+
+ info = shrinker_info_protected(memcg, nid);
if (unlikely(!info))
goto unlock;
- generation = atomic_read(&shrinker_srcu_generation);
- for_each_set_bit_from(i, info->map, info->map_nr_max) {
+ for_each_set_bit(i, info->map, info->map_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
@@ -1005,14 +987,14 @@ again:
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
- if (atomic_read(&shrinker_srcu_generation) != generation) {
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
- i++;
- goto again;
+
+ if (rwsem_is_contended(&shrinker_rwsem)) {
+ freed = freed ? : 1;
+ break;
}
}
unlock:
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
return freed;
}
#else /* CONFIG_MEMCG */
@@ -1049,7 +1031,6 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
{
unsigned long ret, freed = 0;
struct shrinker *shrinker;
- int srcu_idx, generation;
/*
* The root memcg might be allocated even though memcg is disabled
@@ -1061,11 +1042,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
- srcu_idx = srcu_read_lock(&shrinker_srcu);
+ if (!down_read_trylock(&shrinker_rwsem))
+ goto out;
- generation = atomic_read(&shrinker_srcu_generation);
- list_for_each_entry_srcu(shrinker, &shrinker_list, list,
- srcu_read_lock_held(&shrinker_srcu)) {
+ list_for_each_entry(shrinker, &shrinker_list, list) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
@@ -1076,14 +1056,19 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
-
- if (atomic_read(&shrinker_srcu_generation) != generation) {
+ /*
+ * Bail out if someone want to register a new shrinker to
+ * prevent the registration from being stalled for long periods
+ * by parallel ongoing shrinking.
+ */
+ if (rwsem_is_contended(&shrinker_rwsem)) {
freed = freed ? : 1;
break;
}
}
- srcu_read_unlock(&shrinker_srcu, srcu_idx);
+ up_read(&shrinker_rwsem);
+out:
cond_resched();
return freed;
}
diff --git a/scripts/gdb/linux/constants.py.in b/scripts/gdb/linux/constants.py.in
index 471300ba176c..50a92c4e9984 100644
--- a/scripts/gdb/linux/constants.py.in
+++ b/scripts/gdb/linux/constants.py.in
@@ -48,12 +48,12 @@ if IS_BUILTIN(CONFIG_COMMON_CLK):
LX_GDBPARSED(CLK_GET_RATE_NOCACHE)
/* linux/fs.h */
-LX_VALUE(SB_RDONLY)
-LX_VALUE(SB_SYNCHRONOUS)
-LX_VALUE(SB_MANDLOCK)
-LX_VALUE(SB_DIRSYNC)
-LX_VALUE(SB_NOATIME)
-LX_VALUE(SB_NODIRATIME)
+LX_GDBPARSED(SB_RDONLY)
+LX_GDBPARSED(SB_SYNCHRONOUS)
+LX_GDBPARSED(SB_MANDLOCK)
+LX_GDBPARSED(SB_DIRSYNC)
+LX_GDBPARSED(SB_NOATIME)
+LX_GDBPARSED(SB_NODIRATIME)
/* linux/htimer.h */
LX_GDBPARSED(hrtimer_resolution)
diff --git a/scripts/gfp-translate b/scripts/gfp-translate
index b2ce416d944b..6c9aed17cf56 100755
--- a/scripts/gfp-translate
+++ b/scripts/gfp-translate
@@ -63,11 +63,11 @@ fi
# Extract GFP flags from the kernel source
TMPFILE=`mktemp -t gfptranslate-XXXXXX` || exit 1
-grep -q ___GFP $SOURCE/include/linux/gfp.h
+grep -q ___GFP $SOURCE/include/linux/gfp_types.h
if [ $? -eq 0 ]; then
- grep "^#define ___GFP" $SOURCE/include/linux/gfp.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
+ grep "^#define ___GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/u$//' | grep -v GFP_BITS > $TMPFILE
else
- grep "^#define __GFP" $SOURCE/include/linux/gfp.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
+ grep "^#define __GFP" $SOURCE/include/linux/gfp_types.h | sed -e 's/(__force gfp_t)//' | sed -e 's/u)/)/' | grep -v GFP_BITS | sed -e 's/)\//) \//' > $TMPFILE
fi
# Parse the flags
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 23af4633f0f4..4f0c50c33ba7 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -5,12 +5,15 @@ LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
include local_config.mk
+ifeq ($(ARCH),)
+
ifeq ($(CROSS_COMPILE),)
uname_M := $(shell uname -m 2>/dev/null || echo not)
else
uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
endif
-MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
+endif
# Without this, failed build products remain, with up-to-date timestamps,
# thus tricking Make (and you!) into believing that All Is Well, in subsequent
@@ -65,7 +68,7 @@ TEST_GEN_PROGS += ksm_tests
TEST_GEN_PROGS += ksm_functional_tests
TEST_GEN_PROGS += mdwe_test
-ifeq ($(MACHINE),x86_64)
+ifeq ($(ARCH),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
@@ -87,13 +90,13 @@ TEST_GEN_PROGS += $(BINARIES_64)
endif
else
-ifneq (,$(findstring $(MACHINE),ppc64))
+ifneq (,$(findstring $(ARCH),ppc64))
TEST_GEN_PROGS += protection_keys
endif
endif
-ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
TEST_GEN_PROGS += va_high_addr_switch
TEST_GEN_PROGS += virtual_address_range
TEST_GEN_PROGS += write_to_hugetlbfs
@@ -112,7 +115,7 @@ $(TEST_GEN_PROGS): vm_util.c
$(OUTPUT)/uffd-stress: uffd-common.c
$(OUTPUT)/uffd-unit-tests: uffd-common.c
-ifeq ($(MACHINE),x86_64)
+ifeq ($(ARCH),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index 8879a7b04c6a..d6979a48478f 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -16,42 +16,140 @@
#include "../kselftest_harness.h"
-const char *dyn_file = "/sys/kernel/tracing/dynamic_events";
-const char *clear = "!u:__test_event";
+const char *abi_file = "/sys/kernel/tracing/user_events_data";
+const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable";
-static int Append(const char *value)
+static bool wait_for_delete(void)
{
- int fd = open(dyn_file, O_RDWR | O_APPEND);
- int ret = write(fd, value, strlen(value));
+ int i;
+
+ for (i = 0; i < 1000; ++i) {
+ int fd = open(enable_file, O_RDONLY);
+
+ if (fd == -1)
+ return true;
+
+ close(fd);
+ usleep(1000);
+ }
+
+ return false;
+}
+
+static int reg_event(int fd, int *check, int bit, const char *value)
+{
+ struct user_reg reg = {0};
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)value;
+ reg.enable_bit = bit;
+ reg.enable_addr = (__u64)check;
+ reg.enable_size = sizeof(*check);
+
+ if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
+ return -1;
+
+ return 0;
+}
+
+static int unreg_event(int fd, int *check, int bit)
+{
+ struct user_unreg unreg = {0};
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = bit;
+ unreg.disable_addr = (__u64)check;
+
+ return ioctl(fd, DIAG_IOCSUNREG, &unreg);
+}
+
+static int parse(int *check, const char *value)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret;
+
+ if (fd == -1)
+ return -1;
+
+ /* Until we have persist flags via dynamic events, use the base name */
+ if (value[0] != 'u' || value[1] != ':') {
+ close(fd);
+ return -1;
+ }
+
+ ret = reg_event(fd, check, 31, value + 2);
+
+ if (ret != -1) {
+ if (unreg_event(fd, check, 31) == -1)
+ printf("WARN: Couldn't unreg event\n");
+ }
close(fd);
+
return ret;
}
-#define CLEAR() \
+static int check_match(int *check, const char *first, const char *second, bool *match)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret = -1;
+
+ if (fd == -1)
+ return -1;
+
+ if (reg_event(fd, check, 31, first) == -1)
+ goto cleanup;
+
+ if (reg_event(fd, check, 30, second) == -1) {
+ if (errno == EADDRINUSE) {
+ /* Name is in use, with different fields */
+ *match = false;
+ ret = 0;
+ }
+
+ goto cleanup;
+ }
+
+ *match = true;
+ ret = 0;
+cleanup:
+ unreg_event(fd, check, 31);
+ unreg_event(fd, check, 30);
+
+ close(fd);
+
+ wait_for_delete();
+
+ return ret;
+}
+
+#define TEST_MATCH(x, y) \
do { \
- int ret = Append(clear); \
- if (ret == -1) \
- ASSERT_EQ(ENOENT, errno); \
+ bool match; \
+ ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \
+ ASSERT_EQ(true, match); \
} while (0)
-#define TEST_PARSE(x) \
+#define TEST_NMATCH(x, y) \
do { \
- ASSERT_NE(-1, Append(x)); \
- CLEAR(); \
+ bool match; \
+ ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \
+ ASSERT_EQ(false, match); \
} while (0)
-#define TEST_NPARSE(x) ASSERT_EQ(-1, Append(x))
+#define TEST_PARSE(x) ASSERT_NE(-1, parse(&self->check, x))
+
+#define TEST_NPARSE(x) ASSERT_EQ(-1, parse(&self->check, x))
FIXTURE(user) {
+ int check;
};
FIXTURE_SETUP(user) {
- CLEAR();
}
FIXTURE_TEARDOWN(user) {
- CLEAR();
+ wait_for_delete();
}
TEST_F(user, basic_types) {
@@ -95,33 +193,30 @@ TEST_F(user, size_types) {
TEST_NPARSE("u:__test_event char a 20");
}
-TEST_F(user, flags) {
- /* Should work */
- TEST_PARSE("u:__test_event:BPF_ITER u32 a");
- /* Forward compat */
- TEST_PARSE("u:__test_event:BPF_ITER,FLAG_FUTURE u32 a");
-}
-
TEST_F(user, matching) {
- /* Register */
- ASSERT_NE(-1, Append("u:__test_event struct custom a 20"));
- /* Should not match */
- TEST_NPARSE("!u:__test_event struct custom b");
- /* Should match */
- TEST_PARSE("!u:__test_event struct custom a");
- /* Multi field reg */
- ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b"));
- /* Non matching cases */
- TEST_NPARSE("!u:__test_event u32 a");
- TEST_NPARSE("!u:__test_event u32 b");
- TEST_NPARSE("!u:__test_event u32 a; u32 ");
- TEST_NPARSE("!u:__test_event u32 a; u32 a");
- /* Matching case */
- TEST_PARSE("!u:__test_event u32 a; u32 b");
- /* Register */
- ASSERT_NE(-1, Append("u:__test_event u32 a; u32 b"));
- /* Ensure trailing semi-colon case */
- TEST_PARSE("!u:__test_event u32 a; u32 b;");
+ /* Single name matches */
+ TEST_MATCH("__test_event u32 a",
+ "__test_event u32 a");
+
+ /* Multiple names match */
+ TEST_MATCH("__test_event u32 a; u32 b",
+ "__test_event u32 a; u32 b");
+
+ /* Multiple names match with dangling ; */
+ TEST_MATCH("__test_event u32 a; u32 b",
+ "__test_event u32 a; u32 b;");
+
+ /* Single name doesn't match */
+ TEST_NMATCH("__test_event u32 a",
+ "__test_event u32 b");
+
+ /* Multiple names don't match */
+ TEST_NMATCH("__test_event u32 a; u32 b",
+ "__test_event u32 b; u32 a");
+
+ /* Types don't match */
+ TEST_NMATCH("__test_event u64 a; u64 b",
+ "__test_event u32 a; u32 b");
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index 7c99cef94a65..eb6904d89f14 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -102,30 +102,56 @@ err:
return -1;
}
+static bool wait_for_delete(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; ++i) {
+ int fd = open(enable_file, O_RDONLY);
+
+ if (fd == -1)
+ return true;
+
+ close(fd);
+ usleep(1000);
+ }
+
+ return false;
+}
+
static int clear(int *check)
{
struct user_unreg unreg = {0};
+ int fd;
unreg.size = sizeof(unreg);
unreg.disable_bit = 31;
unreg.disable_addr = (__u64)check;
- int fd = open(data_file, O_RDWR);
+ fd = open(data_file, O_RDWR);
if (fd == -1)
return -1;
if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1)
if (errno != ENOENT)
- return -1;
-
- if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1)
- if (errno != ENOENT)
- return -1;
+ goto fail;
+
+ if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) {
+ if (errno == EBUSY) {
+ if (!wait_for_delete())
+ goto fail;
+ } else if (errno != ENOENT)
+ goto fail;
+ }
close(fd);
return 0;
+fail:
+ close(fd);
+
+ return -1;
}
static int check_print_fmt(const char *event, const char *expected, int *check)
@@ -155,9 +181,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check)
/* Register should work */
ret = ioctl(fd, DIAG_IOCSREG, &reg);
- close(fd);
-
if (ret != 0) {
+ close(fd);
printf("Reg failed in fmt\n");
return ret;
}
@@ -165,6 +190,8 @@ static int check_print_fmt(const char *event, const char *expected, int *check)
/* Ensure correct print_fmt */
ret = get_print_fmt(print_fmt, sizeof(print_fmt));
+ close(fd);
+
if (ret != 0)
return ret;
@@ -228,6 +255,12 @@ TEST_F(user, register_events) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index);
+ /* Multiple registers to same name but different args should fail */
+ reg.enable_bit = 29;
+ reg.name_args = (__u64)"__test_event u32 field1;";
+ ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(EADDRINUSE, errno);
+
/* Ensure disabled */
self->enable_fd = open(enable_file, O_RDWR);
ASSERT_NE(-1, self->enable_fd);
@@ -250,10 +283,10 @@ TEST_F(user, register_events) {
unreg.disable_bit = 30;
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
- /* Delete should work only after close and unregister */
+ /* Delete should have been auto-done after close and unregister */
close(self->data_fd);
- self->data_fd = open(data_file, O_RDWR);
- ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
+
+ ASSERT_EQ(true, wait_for_delete());
}
TEST_F(user, write_events) {
@@ -310,6 +343,39 @@ TEST_F(user, write_events) {
ASSERT_EQ(EINVAL, errno);
}
+TEST_F(user, write_empty_events) {
+ struct user_reg reg = {0};
+ struct iovec io[1];
+ int before = 0, after = 0;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+ /* Event should now be enabled */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Write should make it out to ftrace buffers */
+ before = trace_bytes();
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 1));
+ after = trace_bytes();
+ ASSERT_GT(after, before);
+}
+
TEST_F(user, write_fault) {
struct user_reg reg = {0};
struct iovec io[2];
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
index a070258d4449..8b09be566fa2 100644
--- a/tools/testing/selftests/user_events/perf_test.c
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -81,6 +81,32 @@ static int get_offset(void)
return offset;
}
+static int clear(int *check)
+{
+ struct user_unreg unreg = {0};
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = 31;
+ unreg.disable_addr = (__u64)check;
+
+ int fd = open(data_file, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1)
+ if (errno != ENOENT)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1)
+ if (errno != ENOENT)
+ return -1;
+
+ close(fd);
+
+ return 0;
+}
+
FIXTURE(user) {
int data_fd;
int check;
@@ -93,6 +119,9 @@ FIXTURE_SETUP(user) {
FIXTURE_TEARDOWN(user) {
close(self->data_fd);
+
+ if (clear(&self->check) != 0)
+ printf("WARNING: Clear didn't work!\n");
}
TEST_F(user, perf_write) {
@@ -160,6 +189,59 @@ TEST_F(user, perf_write) {
ASSERT_EQ(0, self->check);
}
+TEST_F(user, perf_empty_events) {
+ struct perf_event_attr pe = {0};
+ struct user_reg reg = {0};
+ struct perf_event_mmap_page *perf_page;
+ int page_size = sysconf(_SC_PAGESIZE);
+ int id, fd;
+ __u32 *val;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Id should be there */
+ id = get_id();
+ ASSERT_NE(-1, id);
+
+ pe.type = PERF_TYPE_TRACEPOINT;
+ pe.size = sizeof(pe);
+ pe.config = id;
+ pe.sample_type = PERF_SAMPLE_RAW;
+ pe.sample_period = 1;
+ pe.wakeup_events = 1;
+
+ /* Tracepoint attach should work */
+ fd = perf_event_open(&pe, 0, -1, -1, 0);
+ ASSERT_NE(-1, fd);
+
+ perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, perf_page);
+
+ /* Status should be updated */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Ensure write shows up at correct offset */
+ ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
+ sizeof(reg.write_index)));
+ val = (void *)(((char *)perf_page) + perf_page->data_offset);
+ ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
+
+ munmap(perf_page, page_size * 2);
+ close(fd);
+
+ /* Status should be updated */
+ ASSERT_EQ(0, self->check);
+}
+
int main(int argc, char **argv)
{
return test_harness_run(argc, argv);