aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap5
-rw-r--r--Documentation/RCU/checklist.rst25
-rw-r--r--Documentation/RCU/rcu_dereference.rst27
-rw-r--r--Documentation/RCU/torture.rst2
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/memory-barriers.txt17
-rw-r--r--MAINTAINERS28
-rw-r--r--fs/bcachefs/fs-ioctl.c31
-rw-r--r--fs/exfat/balloc.c87
-rw-r--r--fs/exfat/exfat_fs.h5
-rw-r--r--fs/exfat/file.c193
-rw-r--r--fs/exfat/inode.c136
-rw-r--r--fs/exfat/namei.c6
-rw-r--r--fs/namei.c16
-rw-r--r--fs/namespace.c50
-rw-r--r--fs/proc/task_mmu.c24
-rw-r--r--include/linux/fsnotify.h19
-rw-r--r--include/linux/namei.h1
-rw-r--r--include/linux/rcu_notifier.h6
-rw-r--r--include/linux/rculist.h2
-rw-r--r--include/linux/rcupdate.h9
-rw-r--r--include/linux/srcu.h2
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--kernel/crash_core.c16
-rw-r--r--kernel/kexec_core.c1
-rw-r--r--kernel/locking/locktorture.c18
-rw-r--r--kernel/rcu/Kconfig.debug25
-rw-r--r--kernel/rcu/rcu.h8
-rw-r--r--kernel/rcu/rcutorture.c16
-rw-r--r--kernel/rcu/srcutree.c24
-rw-r--r--kernel/rcu/tasks.h4
-rw-r--r--kernel/rcu/tree.c2
-rw-r--r--kernel/rcu/tree_stall.h11
-rw-r--r--kernel/rcu/update.c6
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--mm/kasan/generic.c10
-rw-r--r--mm/memory_hotplug.c8
-rw-r--r--mm/mm_init.c6
-rw-r--r--mm/userfaultfd.c6
-rwxr-xr-xscripts/decode_stacktrace.sh19
-rw-r--r--tools/testing/selftests/mm/hugepage-vmemmap.c29
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mkinitrd.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot3
43 files changed, 679 insertions, 239 deletions
diff --git a/.mailmap b/.mailmap
index 1f6ad79b45e4..cc0c58c560b3 100644
--- a/.mailmap
+++ b/.mailmap
@@ -363,7 +363,6 @@ Maheshwar Ajja <[email protected]> <[email protected]>
Manikanta Pubbisetty <[email protected]> <[email protected]>
Manivannan Sadhasivam <[email protected]> <[email protected]>
-Manivannan Sadhasivam <[email protected]> <[email protected]>
Marcin Nowakowski <[email protected]> <[email protected]>
@@ -504,6 +503,9 @@ Ralf Baechle <[email protected]>
Ralf Wildenhues <[email protected]>
Ram Chandra Jangir <[email protected]> <[email protected]>
Ravi Kumar Siddojigari <[email protected]> <[email protected]>
RĂ©mi Denis-Courmont <[email protected]>
@@ -582,6 +584,7 @@ Surabhi Vishnoi <[email protected]> <[email protected]>
Takashi YOSHII <[email protected]>
Tamizh Chelvam Raja <[email protected]> <[email protected]>
Tejun Heo <[email protected]>
Thomas Graf <[email protected]>
diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst
index bd3c58c44bef..2d42998a89a6 100644
--- a/Documentation/RCU/checklist.rst
+++ b/Documentation/RCU/checklist.rst
@@ -241,15 +241,22 @@ over a rather long period of time, but improvements are always welcome!
srcu_struct. The rules for the expedited RCU grace-period-wait
primitives are the same as for their non-expedited counterparts.
- If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(),
- then the readers must refrain from executing voluntary
- context switches, that is, from blocking. If the updater uses
- call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then
- the corresponding readers must use rcu_read_lock_trace() and
- rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude()
- or synchronize_rcu_tasks_rude(), then the corresponding readers
- must use anything that disables preemption, for example,
- preempt_disable() and preempt_enable().
+ Similarly, it is necessary to correctly use the RCU Tasks flavors:
+
+ a. If the updater uses synchronize_rcu_tasks() or
+ call_rcu_tasks(), then the readers must refrain from
+ executing voluntary context switches, that is, from
+ blocking.
+
+ b. If the updater uses call_rcu_tasks_trace()
+ or synchronize_rcu_tasks_trace(), then the
+ corresponding readers must use rcu_read_lock_trace()
+ and rcu_read_unlock_trace().
+
+ c. If an updater uses call_rcu_tasks_rude() or
+ synchronize_rcu_tasks_rude(), then the corresponding
+ readers must use anything that disables preemption,
+ for example, preempt_disable() and preempt_enable().
Mixing things up will result in confusion and broken kernels, and
has even resulted in an exploitable security issue. Therefore,
diff --git a/Documentation/RCU/rcu_dereference.rst b/Documentation/RCU/rcu_dereference.rst
index 3b739f6243c8..659d5913784d 100644
--- a/Documentation/RCU/rcu_dereference.rst
+++ b/Documentation/RCU/rcu_dereference.rst
@@ -3,13 +3,26 @@
PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference()
===============================================================
-Most of the time, you can use values from rcu_dereference() or one of
-the similar primitives without worries. Dereferencing (prefix "*"),
-field selection ("->"), assignment ("="), address-of ("&"), addition and
-subtraction of constants, and casts all work quite naturally and safely.
-
-It is nevertheless possible to get into trouble with other operations.
-Follow these rules to keep your RCU code working properly:
+Proper care and feeding of address and data dependencies is critically
+important to correct use of things like RCU. To this end, the pointers
+returned from the rcu_dereference() family of primitives carry address and
+data dependencies. These dependencies extend from the rcu_dereference()
+macro's load of the pointer to the later use of that pointer to compute
+either the address of a later memory access (representing an address
+dependency) or the value written by a later memory access (representing
+a data dependency).
+
+Most of the time, these dependencies are preserved, permitting you to
+freely use values from rcu_dereference(). For example, dereferencing
+(prefix "*"), field selection ("->"), assignment ("="), address-of
+("&"), casts, and addition or subtraction of constants all work quite
+naturally and safely. However, because current compilers do not take
+either address or data dependencies into account it is still possible
+to get into trouble.
+
+Follow these rules to preserve the address and data dependencies emanating
+from your calls to rcu_dereference() and friends, thus keeping your RCU
+readers working properly:
- You must use one of the rcu_dereference() family of primitives
to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU
diff --git a/Documentation/RCU/torture.rst b/Documentation/RCU/torture.rst
index b3b6dfa85248..49e7beea6ae1 100644
--- a/Documentation/RCU/torture.rst
+++ b/Documentation/RCU/torture.rst
@@ -185,7 +185,7 @@ argument.
Not all changes require that all scenarios be run. For example, a change
to Tree SRCU might run only the SRCU-N and SRCU-P scenarios using the
--configs argument to kvm.sh as follows: "--configs 'SRCU-N SRCU-P'".
-Large systems can run multiple copies of of the full set of scenarios,
+Large systems can run multiple copies of the full set of scenarios,
for example, a system with 448 hardware threads can run five instances
of the full set concurrently. To make this happen::
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 505af40e97bc..6ee0f9a5da70 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5313,6 +5313,12 @@
Dump ftrace buffer after reporting RCU CPU
stall warning.
+ rcupdate.rcu_cpu_stall_notifiers= [KNL]
+ Provide RCU CPU stall notifiers, but see the
+ warnings in the RCU_CPU_STALL_NOTIFIER Kconfig
+ option's help text. TL;DR: You almost certainly
+ do not want rcupdate.rcu_cpu_stall_notifiers.
+
rcupdate.rcu_cpu_stall_suppress= [KNL]
Suppress RCU CPU stall warning messages.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index d414e145f912..4202174a6262 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -396,10 +396,11 @@ Memory barriers come in four basic varieties:
(2) Address-dependency barriers (historical).
- [!] This section is marked as HISTORICAL: For more up-to-date
- information, including how compiler transformations related to pointer
- comparisons can sometimes cause problems, see
- Documentation/RCU/rcu_dereference.rst.
+ [!] This section is marked as HISTORICAL: it covers the long-obsolete
+ smp_read_barrier_depends() macro, the semantics of which are now
+ implicit in all marked accesses. For more up-to-date information,
+ including how compiler transformations can sometimes break address
+ dependencies, see Documentation/RCU/rcu_dereference.rst.
An address-dependency barrier is a weaker form of read barrier. In the
case where two loads are performed such that the second depends on the
@@ -560,9 +561,11 @@ There are certain things that the Linux kernel memory barriers do not guarantee:
ADDRESS-DEPENDENCY BARRIERS (HISTORICAL)
----------------------------------------
-[!] This section is marked as HISTORICAL: For more up-to-date information,
-including how compiler transformations related to pointer comparisons can
-sometimes cause problems, see Documentation/RCU/rcu_dereference.rst.
+[!] This section is marked as HISTORICAL: it covers the long-obsolete
+smp_read_barrier_depends() macro, the semantics of which are now implicit
+in all marked accesses. For more up-to-date information, including
+how compiler transformations can sometimes break address dependencies,
+see Documentation/RCU/rcu_dereference.rst.
As of v4.15 of the Linux kernel, an smp_mb() was added to READ_ONCE() for
DEC Alpha, which means that about the only people who need to pay attention
diff --git a/MAINTAINERS b/MAINTAINERS
index 391bbb855cbe..2cd538369f3c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12724,12 +12724,11 @@ F: Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt
F: drivers/i2c/muxes/i2c-mux-ltc4306.c
LTP (Linux Test Project)
-M: Mike Frysinger <[email protected]>
M: Cyril Hrubis <[email protected]>
-M: Wanlong Gao <[email protected]>
M: Jan Stancek <[email protected]>
-M: Stanislav Kholmanskikh <[email protected]>
-M: Alexey Kodanev <[email protected]>
+M: Petr Vorel <[email protected]>
+M: Li Wang <[email protected]>
+M: Yang Xu <[email protected]>
L: [email protected] (subscribers-only)
S: Maintained
W: http://linux-test-project.github.io/
@@ -19738,6 +19737,19 @@ T: git git://linuxtv.org/media_tree.git
F: drivers/media/i2c/rj54n1cb0c.c
F: include/media/i2c/rj54n1cb0c.h
+SHRINKER
+M: Andrew Morton <[email protected]>
+M: Dave Chinner <[email protected]>
+R: Qi Zheng <[email protected]>
+R: Roman Gushchin <[email protected]>
+R: Muchun Song <[email protected]>
+S: Maintained
+F: Documentation/admin-guide/mm/shrinker_debugfs.rst
+F: include/linux/shrinker.h
+F: mm/shrinker.c
+F: mm/shrinker_debug.c
+
SH_VOU V4L2 OUTPUT DRIVER
S: Orphan
@@ -24263,11 +24275,13 @@ N: zstd
K: zstd
ZSWAP COMPRESSED SWAP CACHING
-M: Seth Jennings <[email protected]>
-M: Dan Streetman <[email protected]>
-M: Vitaly Wool <[email protected]>
+M: Johannes Weiner <[email protected]>
+M: Yosry Ahmed <[email protected]>
+M: Nhat Pham <[email protected]>
S: Maintained
+F: Documentation/admin-guide/mm/zswap.rst
+F: include/linux/zswap.h
F: mm/zswap.c
THE REST
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 946cc610eef5..1cbc5807bc80 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -443,33 +443,36 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
struct bch_ioctl_subvolume arg)
{
+ const char __user *name = (void __user *)(unsigned long)arg.dst_ptr;
struct path path;
struct inode *dir;
+ struct dentry *victim;
int ret = 0;
if (arg.flags)
return -EINVAL;
- ret = user_path_at(arg.dirfd,
- (const char __user *)(unsigned long)arg.dst_ptr,
- LOOKUP_FOLLOW, &path);
- if (ret)
- return ret;
+ victim = user_path_locked_at(arg.dirfd, name, &path);
+ if (IS_ERR(victim))
+ return PTR_ERR(victim);
- if (path.dentry->d_sb->s_fs_info != c) {
+ if (victim->d_sb->s_fs_info != c) {
ret = -EXDEV;
goto err;
}
-
- dir = path.dentry->d_parent->d_inode;
-
- ret = __bch2_unlink(dir, path.dentry, true);
- if (ret)
+ if (!d_is_positive(victim)) {
+ ret = -ENOENT;
goto err;
-
- fsnotify_rmdir(dir, path.dentry);
- d_delete(path.dentry);
+ }
+ dir = d_inode(path.dentry);
+ ret = __bch2_unlink(dir, victim, true);
+ if (!ret) {
+ fsnotify_rmdir(dir, victim);
+ d_delete(victim);
+ }
+ inode_unlock(dir);
err:
+ dput(victim);
path_put(&path);
return ret;
}
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index e918decb3735..0356c88252bd 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -5,42 +5,23 @@
#include <linux/blkdev.h>
#include <linux/slab.h>
+#include <linux/bitmap.h>
#include <linux/buffer_head.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
-static const unsigned char free_bit[] = {
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/
- 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/
- 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/
- 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/
- 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/
- 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/
- 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/
- 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/
- 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/
- 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/
- 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/
-};
-
-static const unsigned char used_bit[] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3,/* 0 ~ 19*/
- 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4,/* 20 ~ 39*/
- 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,/* 40 ~ 59*/
- 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,/* 60 ~ 79*/
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4,/* 80 ~ 99*/
- 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,/*100 ~ 119*/
- 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,/*120 ~ 139*/
- 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,/*140 ~ 159*/
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,/*160 ~ 179*/
- 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5,/*180 ~ 199*/
- 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,/*200 ~ 219*/
- 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,/*220 ~ 239*/
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 /*240 ~ 255*/
-};
+#if BITS_PER_LONG == 32
+#define __le_long __le32
+#define lel_to_cpu(A) le32_to_cpu(A)
+#define cpu_to_lel(A) cpu_to_le32(A)
+#elif BITS_PER_LONG == 64
+#define __le_long __le64
+#define lel_to_cpu(A) le64_to_cpu(A)
+#define cpu_to_lel(A) cpu_to_le64(A)
+#else
+#error "BITS_PER_LONG not 32 or 64"
+#endif
/*
* Allocation Bitmap Management Functions
@@ -200,32 +181,35 @@ unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu)
{
unsigned int i, map_i, map_b, ent_idx;
unsigned int clu_base, clu_free;
- unsigned char k, clu_mask;
+ unsigned long clu_bits, clu_mask;
struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ __le_long bitval;
WARN_ON(clu < EXFAT_FIRST_CLUSTER);
- ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
- clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
+ ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
+ clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
- i += BITS_PER_BYTE) {
- k = *(sbi->vol_amap[map_i]->b_data + map_b);
+ i += BITS_PER_LONG) {
+ bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);
if (clu_mask > 0) {
- k |= clu_mask;
+ bitval |= cpu_to_lel(clu_mask);
clu_mask = 0;
}
- if (k < 0xFF) {
- clu_free = clu_base + free_bit[k];
+ if (lel_to_cpu(bitval) != ULONG_MAX) {
+ clu_bits = lel_to_cpu(bitval);
+ clu_free = clu_base + ffz(clu_bits);
if (clu_free < sbi->num_clusters)
return clu_free;
}
- clu_base += BITS_PER_BYTE;
+ clu_base += BITS_PER_LONG;
+ map_b += sizeof(long);
- if (++map_b >= sb->s_blocksize ||
+ if (map_b >= sb->s_blocksize ||
clu_base >= sbi->num_clusters) {
if (++map_i >= sbi->map_sectors) {
clu_base = EXFAT_FIRST_CLUSTER;
@@ -244,25 +228,24 @@ int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count)
unsigned int count = 0;
unsigned int i, map_i = 0, map_b = 0;
unsigned int total_clus = EXFAT_DATA_CLUSTER_COUNT(sbi);
- unsigned int last_mask = total_clus & BITS_PER_BYTE_MASK;
- unsigned char clu_bits;
- const unsigned char last_bit_mask[] = {0, 0b00000001, 0b00000011,
- 0b00000111, 0b00001111, 0b00011111, 0b00111111, 0b01111111};
+ unsigned int last_mask = total_clus & (BITS_PER_LONG - 1);
+ unsigned long *bitmap, clu_bits;
total_clus &= ~last_mask;
- for (i = 0; i < total_clus; i += BITS_PER_BYTE) {
- clu_bits = *(sbi->vol_amap[map_i]->b_data + map_b);
- count += used_bit[clu_bits];
- if (++map_b >= (unsigned int)sb->s_blocksize) {
+ for (i = 0; i < total_clus; i += BITS_PER_LONG) {
+ bitmap = (void *)(sbi->vol_amap[map_i]->b_data + map_b);
+ count += hweight_long(*bitmap);
+ map_b += sizeof(long);
+ if (map_b >= (unsigned int)sb->s_blocksize) {
map_i++;
map_b = 0;
}
}
if (last_mask) {
- clu_bits = *(sbi->vol_amap[map_i]->b_data + map_b);
- clu_bits &= last_bit_mask[last_mask];
- count += used_bit[clu_bits];
+ bitmap = (void *)(sbi->vol_amap[map_i]->b_data + map_b);
+ clu_bits = lel_to_cpu(*(__le_long *)bitmap);
+ count += hweight_long(clu_bits & BITMAP_LAST_WORD_MASK(last_mask));
}
*ret_count = count;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index a7a2c35d74fb..9474cd50da6d 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -135,8 +135,7 @@ enum {
#define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb))
#define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \
((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1))
-#define BITS_PER_BYTE_MASK 0x7
-#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1)
+#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) - (clu_base))) - 1)
#define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1)
/* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */
@@ -208,6 +207,7 @@ struct exfat_dir_entry {
unsigned char flags;
unsigned short attr;
loff_t size;
+ loff_t valid_size;
unsigned int num_subdirs;
struct timespec64 atime;
struct timespec64 mtime;
@@ -317,6 +317,7 @@ struct exfat_inode_info {
loff_t i_size_aligned;
/* on-disk position of directory entry or 0 */
loff_t i_pos;
+ loff_t valid_size;
/* hash by i_location */
struct hlist_node i_hash_fat;
/* protect bmap against truncate */
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index bfdfafe00993..d25a96a148af 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -11,37 +11,76 @@
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/msdos_fs.h>
+#include <linux/writeback.h>
#include "exfat_raw.h"
#include "exfat_fs.h"
static int exfat_cont_expand(struct inode *inode, loff_t size)
{
- struct address_space *mapping = inode->i_mapping;
- loff_t start = i_size_read(inode), count = size - i_size_read(inode);
- int err, err2;
+ int ret;
+ unsigned int num_clusters, new_num_clusters, last_clu;
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ struct super_block *sb = inode->i_sb;
+ struct exfat_sb_info *sbi = EXFAT_SB(sb);
+ struct exfat_chain clu;
- err = generic_cont_expand_simple(inode, size);
- if (err)
- return err;
+ ret = inode_newsize_ok(inode, size);
+ if (ret)
+ return ret;
+
+ num_clusters = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi);
+ new_num_clusters = EXFAT_B_TO_CLU_ROUND_UP(size, sbi);
+
+ if (new_num_clusters == num_clusters)
+ goto out;
+
+ exfat_chain_set(&clu, ei->start_clu, num_clusters, ei->flags);
+ ret = exfat_find_last_cluster(sb, &clu, &last_clu);
+ if (ret)
+ return ret;
+
+ clu.dir = (last_clu == EXFAT_EOF_CLUSTER) ?
+ EXFAT_EOF_CLUSTER : last_clu + 1;
+ clu.size = 0;
+ clu.flags = ei->flags;
+
+ ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters,
+ &clu, IS_DIRSYNC(inode));
+ if (ret)
+ return ret;
+
+ /* Append new clusters to chain */
+ if (clu.flags != ei->flags) {
+ exfat_chain_cont_cluster(sb, ei->start_clu, num_clusters);
+ ei->flags = ALLOC_FAT_CHAIN;
+ }
+ if (clu.flags == ALLOC_FAT_CHAIN)
+ if (exfat_ent_set(sb, last_clu, clu.dir))
+ goto free_clu;
+ if (num_clusters == 0)
+ ei->start_clu = clu.dir;
+
+out:
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
- mark_inode_dirty(inode);
+ /* Expanded range not zeroed, do not update valid_size */
+ i_size_write(inode, size);
- if (!IS_SYNC(inode))
- return 0;
+ ei->i_size_aligned = round_up(size, sb->s_blocksize);
+ ei->i_size_ondisk = ei->i_size_aligned;
+ inode->i_blocks = round_up(size, sbi->cluster_size) >> 9;
- err = filemap_fdatawrite_range(mapping, start, start + count - 1);
- err2 = sync_mapping_buffers(mapping);
- if (!err)
- err = err2;
- err2 = write_inode_now(inode, 1);
- if (!err)
- err = err2;
- if (err)
- return err;
+ if (IS_DIRSYNC(inode))
+ return write_inode_now(inode, 1);
+
+ mark_inode_dirty(inode);
- return filemap_fdatawait_range(mapping, start, start + count - 1);
+ return 0;
+
+free_clu:
+ exfat_free_cluster(inode, &clu);
+ return -EIO;
}
static bool exfat_allow_set_time(struct exfat_sb_info *sbi, struct inode *inode)
@@ -146,6 +185,9 @@ int __exfat_truncate(struct inode *inode)
ei->start_clu = EXFAT_EOF_CLUSTER;
}
+ if (i_size_read(inode) < ei->valid_size)
+ ei->valid_size = i_size_read(inode);
+
if (ei->type == TYPE_FILE)
ei->attr |= EXFAT_ATTR_ARCHIVE;
@@ -474,15 +516,124 @@ int exfat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
return blkdev_issue_flush(inode->i_sb->s_bdev);
}
+static int exfat_file_zeroed_range(struct file *file, loff_t start, loff_t end)
+{
+ int err;
+ struct inode *inode = file_inode(file);
+ struct address_space *mapping = inode->i_mapping;
+ const struct address_space_operations *ops = mapping->a_ops;
+
+ while (start < end) {
+ u32 zerofrom, len;
+ struct page *page = NULL;
+
+ zerofrom = start & (PAGE_SIZE - 1);
+ len = PAGE_SIZE - zerofrom;
+ if (start + len > end)
+ len = end - start;
+
+ err = ops->write_begin(file, mapping, start, len, &page, NULL);
+ if (err)
+ goto out;
+
+ zero_user_segment(page, zerofrom, zerofrom + len);
+
+ err = ops->write_end(file, mapping, start, len, len, page, NULL);
+ if (err < 0)
+ goto out;
+ start += len;
+
+ balance_dirty_pages_ratelimited(mapping);
+ cond_resched();
+ }
+
+out:
+ return err;
+}
+
+static ssize_t exfat_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ ssize_t ret;
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ loff_t pos = iocb->ki_pos;
+ loff_t valid_size;
+
+ inode_lock(inode);
+
+ valid_size = ei->valid_size;
+
+ ret = generic_write_checks(iocb, iter);
+ if (ret < 0)
+ goto unlock;
+
+ if (pos > valid_size) {
+ ret = exfat_file_zeroed_range(file, valid_size, pos);
+ if (ret < 0 && ret != -ENOSPC) {
+ exfat_err(inode->i_sb,
+ "write: fail to zero from %llu to %llu(%zd)",
+ valid_size, pos, ret);
+ }
+ if (ret < 0)
+ goto unlock;
+ }
+
+ ret = __generic_file_write_iter(iocb, iter);
+ if (ret < 0)
+ goto unlock;
+
+ inode_unlock(inode);
+
+ if (pos > valid_size)
+ pos = valid_size;
+
+ if (iocb_is_dsync(iocb) && iocb->ki_pos > pos) {
+ ssize_t err = vfs_fsync_range(file, pos, iocb->ki_pos - 1,
+ iocb->ki_flags & IOCB_SYNC);
+ if (err < 0)
+ return err;
+ }
+
+ return ret;
+
+unlock:
+ inode_unlock(inode);
+
+ return ret;
+}
+
+static int exfat_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int ret;
+ struct inode *inode = file_inode(file);
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ loff_t start = ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+ loff_t end = min_t(loff_t, i_size_read(inode),
+ start + vma->vm_end - vma->vm_start);
+
+ if ((vma->vm_flags & VM_WRITE) && ei->valid_size < end) {
+ ret = exfat_file_zeroed_range(file, ei->valid_size, end);
+ if (ret < 0) {
+ exfat_err(inode->i_sb,
+ "mmap: fail to zero from %llu to %llu(%d)",
+ start, end, ret);
+ return ret;
+ }
+ }
+
+ return generic_file_mmap(file, vma);
+}
+
const struct file_operations exfat_file_operations = {
.llseek = generic_file_llseek,
.read_iter = generic_file_read_iter,
- .write_iter = generic_file_write_iter,
+ .write_iter = exfat_file_write_iter,
.unlocked_ioctl = exfat_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = exfat_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = exfat_file_mmap,
.fsync = exfat_file_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index e7ff58b8e68c..522edcbb2ce4 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -75,8 +75,17 @@ int __exfat_write_inode(struct inode *inode, int sync)
if (ei->start_clu == EXFAT_EOF_CLUSTER)
on_disk_size = 0;
- ep2->dentry.stream.valid_size = cpu_to_le64(on_disk_size);
- ep2->dentry.stream.size = ep2->dentry.stream.valid_size;
+ ep2->dentry.stream.size = cpu_to_le64(on_disk_size);
+ /*
+ * mmap write does not use exfat_write_end(), valid_size may be
+ * extended to the sector-aligned length in exfat_get_block().
+ * So we need to fixup valid_size to the writren length.
+ */
+ if (on_disk_size < ei->valid_size)
+ ep2->dentry.stream.valid_size = ep2->dentry.stream.size;
+ else
+ ep2->dentry.stream.valid_size = cpu_to_le64(ei->valid_size);
+
if (on_disk_size) {
ep2->dentry.stream.flags = ei->flags;
ep2->dentry.stream.start_clu = cpu_to_le32(ei->start_clu);
@@ -278,6 +287,7 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
unsigned int cluster, sec_offset;
sector_t last_block;
sector_t phys = 0;
+ sector_t valid_blks;
loff_t pos;
mutex_lock(&sbi->s_lock);
@@ -306,17 +316,32 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
mapped_blocks = sbi->sect_per_clus - sec_offset;
max_blocks = min(mapped_blocks, max_blocks);
- /* Treat newly added block / cluster */
- if (iblock < last_block)
- create = 0;
-
- if (create || buffer_delay(bh_result)) {
- pos = EXFAT_BLK_TO_B((iblock + 1), sb);
+ pos = EXFAT_BLK_TO_B((iblock + 1), sb);
+ if ((create && iblock >= last_block) || buffer_delay(bh_result)) {
if (ei->i_size_ondisk < pos)
ei->i_size_ondisk = pos;
}
+ map_bh(bh_result, sb, phys);
+ if (buffer_delay(bh_result))
+ clear_buffer_delay(bh_result);
+
if (create) {
+ valid_blks = EXFAT_B_TO_BLK_ROUND_UP(ei->valid_size, sb);
+
+ if (iblock + max_blocks < valid_blks) {
+ /* The range has been written, map it */
+ goto done;
+ } else if (iblock < valid_blks) {
+ /*
+ * The range has been partially written,
+ * map the written part.
+ */
+ max_blocks = valid_blks - iblock;
+ goto done;
+ }
+
+ /* The area has not been written, map and mark as new. */
err = exfat_map_new_buffer(ei, bh_result, pos);
if (err) {
exfat_fs_error(sb,
@@ -324,11 +349,58 @@ static int exfat_get_block(struct inode *inode, sector_t iblock,
pos, ei->i_size_aligned);
goto unlock_ret;
}
- }
- if (buffer_delay(bh_result))
- clear_buffer_delay(bh_result);
- map_bh(bh_result, sb, phys);
+ ei->valid_size = EXFAT_BLK_TO_B(iblock + max_blocks, sb);
+ mark_inode_dirty(inode);
+ } else {
+ valid_blks = EXFAT_B_TO_BLK(ei->valid_size, sb);
+
+ if (iblock + max_blocks < valid_blks) {
+ /* The range has been written, map it */
+ goto done;
+ } else if (iblock < valid_blks) {
+ /*
+ * The area has been partially written,
+ * map the written part.
+ */
+ max_blocks = valid_blks - iblock;
+ goto done;
+ } else if (iblock == valid_blks &&
+ (ei->valid_size & (sb->s_blocksize - 1))) {
+ /*
+ * The block has been partially written,
+ * zero the unwritten part and map the block.
+ */
+ loff_t size, off;
+
+ max_blocks = 1;
+
+ /*
+ * For direct read, the unwritten part will be zeroed in
+ * exfat_direct_IO()
+ */
+ if (!bh_result->b_folio)
+ goto done;
+
+ pos -= sb->s_blocksize;
+ size = ei->valid_size - pos;
+ off = pos & (PAGE_SIZE - 1);
+
+ folio_set_bh(bh_result, bh_result->b_folio, off);
+ err = bh_read(bh_result, 0);
+ if (err < 0)
+ goto unlock_ret;
+
+ folio_zero_segment(bh_result->b_folio, off + size,
+ off + sb->s_blocksize);
+ } else {
+ /*
+ * The range has not been written, clear the mapped flag
+ * to only zero the cache and do not read from disk.
+ */
+ clear_buffer_mapped(bh_result);
+ }
+ }
done:
bh_result->b_size = EXFAT_BLK_TO_B(max_blocks, sb);
unlock_ret:
@@ -343,6 +415,17 @@ static int exfat_read_folio(struct file *file, struct folio *folio)
static void exfat_readahead(struct readahead_control *rac)
{
+ struct address_space *mapping = rac->mapping;
+ struct inode *inode = mapping->host;
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ loff_t pos = readahead_pos(rac);
+
+ /* Range cross valid_size, read it page by page. */
+ if (ei->valid_size < i_size_read(inode) &&
+ pos <= ei->valid_size &&
+ ei->valid_size < pos + readahead_length(rac))
+ return;
+
mpage_readahead(rac, exfat_get_block);
}
@@ -370,9 +453,7 @@ static int exfat_write_begin(struct file *file, struct address_space *mapping,
int ret;
*pagep = NULL;
- ret = cont_write_begin(file, mapping, pos, len, pagep, fsdata,
- exfat_get_block,
- &EXFAT_I(mapping->host)->i_size_ondisk);
+ ret = block_write_begin(mapping, pos, len, pagep, exfat_get_block);
if (ret < 0)
exfat_write_failed(mapping, pos+len);
@@ -400,6 +481,11 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
if (err < len)
exfat_write_failed(mapping, pos+len);
+ if (!(err < 0) && pos + err > ei->valid_size) {
+ ei->valid_size = pos + err;
+ mark_inode_dirty(inode);
+ }
+
if (!(err < 0) && !(ei->attr & EXFAT_ATTR_ARCHIVE)) {
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
ei->attr |= EXFAT_ATTR_ARCHIVE;
@@ -413,6 +499,8 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
+ struct exfat_inode_info *ei = EXFAT_I(inode);
+ loff_t pos = iocb->ki_pos;
loff_t size = iocb->ki_pos + iov_iter_count(iter);
int rw = iov_iter_rw(iter);
ssize_t ret;
@@ -436,8 +524,21 @@ static ssize_t exfat_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
* condition of exfat_get_block() and ->truncate().
*/
ret = blockdev_direct_IO(iocb, inode, iter, exfat_get_block);
- if (ret < 0 && (rw & WRITE))
- exfat_write_failed(mapping, size);
+ if (ret < 0) {
+ if (rw == WRITE)
+ exfat_write_failed(mapping, size);
+
+ if (ret != -EIOCBQUEUED)
+ return ret;
+ } else
+ size = pos + ret;
+
+ /* zero the unwritten part in the partially written block */
+ if (rw == READ && pos < ei->valid_size && ei->valid_size < size) {
+ iov_iter_revert(iter, size - ei->valid_size);
+ iov_iter_zero(size - ei->valid_size, iter);
+ }
+
return ret;
}
@@ -537,6 +638,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
ei->start_clu = info->start_clu;
ei->flags = info->flags;
ei->type = info->type;
+ ei->valid_size = info->valid_size;
ei->version = 0;
ei->hint_stat.eidx = 0;
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 5d737e0b639a..9c549fd11fc8 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -406,6 +406,7 @@ static int exfat_find_empty_entry(struct inode *inode,
i_size_write(inode, size);
ei->i_size_ondisk += sbi->cluster_size;
ei->i_size_aligned += sbi->cluster_size;
+ ei->valid_size += sbi->cluster_size;
ei->flags = p_dir->flags;
inode->i_blocks += sbi->cluster_size >> 9;
}
@@ -558,6 +559,8 @@ static int exfat_add_entry(struct inode *inode, const char *path,
info->size = clu_size;
info->num_subdirs = EXFAT_MIN_SUBDIR;
}
+ info->valid_size = info->size;
+
memset(&info->crtime, 0, sizeof(info->crtime));
memset(&info->mtime, 0, sizeof(info->mtime));
memset(&info->atime, 0, sizeof(info->atime));
@@ -660,6 +663,8 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
info->type = exfat_get_entry_type(ep);
info->attr = le16_to_cpu(ep->dentry.file.attr);
info->size = le64_to_cpu(ep2->dentry.stream.valid_size);
+ info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size);
+ info->size = le64_to_cpu(ep2->dentry.stream.size);
if (info->size == 0) {
info->flags = ALLOC_NO_FAT_CHAIN;
info->start_clu = EXFAT_EOF_CLUSTER;
@@ -1288,6 +1293,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
}
i_size_write(new_inode, 0);
+ new_ei->valid_size = 0;
new_ei->start_clu = EXFAT_EOF_CLUSTER;
new_ei->flags = ALLOC_NO_FAT_CHAIN;
}
diff --git a/fs/namei.c b/fs/namei.c
index 5c318d657503..4e0de939fea1 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2572,13 +2572,13 @@ static int filename_parentat(int dfd, struct filename *name,
}
/* does lookup, returns the object with parent locked */
-static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
+static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path)
{
struct dentry *d;
struct qstr last;
int type, error;
- error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type);
+ error = filename_parentat(dfd, name, 0, path, &last, &type);
if (error)
return ERR_PTR(error);
if (unlikely(type != LAST_NORM)) {
@@ -2597,12 +2597,22 @@ static struct dentry *__kern_path_locked(struct filename *name, struct path *pat
struct dentry *kern_path_locked(const char *name, struct path *path)
{
struct filename *filename = getname_kernel(name);
- struct dentry *res = __kern_path_locked(filename, path);
+ struct dentry *res = __kern_path_locked(AT_FDCWD, filename, path);
putname(filename);
return res;
}
+struct dentry *user_path_locked_at(int dfd, const char __user *name, struct path *path)
+{
+ struct filename *filename = getname(name);
+ struct dentry *res = __kern_path_locked(dfd, filename, path);
+
+ putname(filename);
+ return res;
+}
+EXPORT_SYMBOL(user_path_locked_at);
+
int kern_path(const char *name, unsigned int flags, struct path *path)
{
struct filename *filename = getname_kernel(name);
diff --git a/fs/namespace.c b/fs/namespace.c
index ef1fd6829814..437f60e96d40 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -5042,13 +5042,12 @@ static struct mount *listmnt_next(struct mount *curr)
return node_to_mount(rb_next(&curr->mnt_node));
}
-static ssize_t do_listmount(struct mount *first, struct path *orig, u64 mnt_id,
- u64 __user *buf, size_t bufsize,
- const struct path *root)
+static ssize_t do_listmount(struct mount *first, struct path *orig,
+ u64 mnt_parent_id, u64 __user *mnt_ids,
+ size_t nr_mnt_ids, const struct path *root)
{
struct mount *r;
- ssize_t ctr;
- int err;
+ ssize_t ret;
/*
* Don't trigger audit denials. We just want to determine what
@@ -5058,50 +5057,57 @@ static ssize_t do_listmount(struct mount *first, struct path *orig, u64 mnt_id,
!ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
return -EPERM;
- err = security_sb_statfs(orig->dentry);
- if (err)
- return err;
+ ret = security_sb_statfs(orig->dentry);
+ if (ret)
+ return ret;
- for (ctr = 0, r = first; r && ctr < bufsize; r = listmnt_next(r)) {
- if (r->mnt_id_unique == mnt_id)
+ for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) {
+ if (r->mnt_id_unique == mnt_parent_id)
continue;
if (!is_path_reachable(r, r->mnt.mnt_root, orig))
continue;
- ctr = array_index_nospec(ctr, bufsize);
- if (put_user(r->mnt_id_unique, buf + ctr))
+ if (put_user(r->mnt_id_unique, mnt_ids))
return -EFAULT;
- if (check_add_overflow(ctr, 1, &ctr))
- return -ERANGE;
+ mnt_ids++;
+ nr_mnt_ids--;
+ ret++;
}
- return ctr;
+ return ret;
}
-SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
- u64 __user *, buf, size_t, bufsize, unsigned int, flags)
+SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
+ mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct mnt_id_req kreq;
struct mount *first;
struct path root, orig;
- u64 mnt_id, last_mnt_id;
+ u64 mnt_parent_id, last_mnt_id;
+ const size_t maxcount = (size_t)-1 >> 3;
ssize_t ret;
if (flags)
return -EINVAL;
+ if (unlikely(nr_mnt_ids > maxcount))
+ return -EFAULT;
+
+ if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
+ return -EFAULT;
+
ret = copy_mnt_id_req(req, &kreq);
if (ret)
return ret;
- mnt_id = kreq.mnt_id;
+ mnt_parent_id = kreq.mnt_id;
last_mnt_id = kreq.param;
down_read(&namespace_sem);
get_fs_root(current->fs, &root);
- if (mnt_id == LSMT_ROOT) {
+ if (mnt_parent_id == LSMT_ROOT) {
orig = root;
} else {
ret = -ENOENT;
- orig.mnt = lookup_mnt_in_ns(mnt_id, ns);
+ orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
if (!orig.mnt)
goto err;
orig.dentry = orig.mnt->mnt_root;
@@ -5111,7 +5117,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
else
first = mnt_find_id_at(ns, last_mnt_id + 1);
- ret = do_listmount(first, &orig, mnt_id, buf, bufsize, &root);
+ ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root);
err:
path_put(&root);
up_read(&namespace_sem);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 62b16f42d5d2..3f78ebbb795f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2432,7 +2432,6 @@ static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p)
static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
{
- struct mmu_notifier_range range;
struct pagemap_scan_private p = {0};
unsigned long walk_start;
size_t n_ranges_out = 0;
@@ -2448,15 +2447,9 @@ static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
if (ret)
return ret;
- /* Protection change for the range is going to happen. */
- if (p.arg.flags & PM_SCAN_WP_MATCHING) {
- mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0,
- mm, p.arg.start, p.arg.end);
- mmu_notifier_invalidate_range_start(&range);
- }
-
for (walk_start = p.arg.start; walk_start < p.arg.end;
walk_start = p.arg.walk_end) {
+ struct mmu_notifier_range range;
long n_out;
if (fatal_signal_pending(current)) {
@@ -2467,8 +2460,20 @@ static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
ret = mmap_read_lock_killable(mm);
if (ret)
break;
+
+ /* Protection change for the range is going to happen. */
+ if (p.arg.flags & PM_SCAN_WP_MATCHING) {
+ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_VMA, 0,
+ mm, walk_start, p.arg.end);
+ mmu_notifier_invalidate_range_start(&range);
+ }
+
ret = walk_page_range(mm, walk_start, p.arg.end,
&pagemap_scan_ops, &p);
+
+ if (p.arg.flags & PM_SCAN_WP_MATCHING)
+ mmu_notifier_invalidate_range_end(&range);
+
mmap_read_unlock(mm);
n_out = pagemap_scan_flush_buffer(&p);
@@ -2494,9 +2499,6 @@ static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
if (pagemap_scan_writeback_args(&p.arg, uarg))
ret = -EFAULT;
- if (p.arg.flags & PM_SCAN_WP_MATCHING)
- mmu_notifier_invalidate_range_end(&range);
-
kfree(p.vec_buf);
return ret;
}
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 11e6434b8e71..8300a5286988 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -100,6 +100,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
}
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
/*
* fsnotify_file_area_perm - permission hook before access to file range
*/
@@ -145,6 +146,24 @@ static inline int fsnotify_open_perm(struct file *file)
return fsnotify_file(file, FS_OPEN_PERM);
}
+#else
+static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
+ const loff_t *ppos, size_t count)
+{
+ return 0;
+}
+
+static inline int fsnotify_file_perm(struct file *file, int perm_mask)
+{
+ return 0;
+}
+
+static inline int fsnotify_open_perm(struct file *file)
+{
+ return 0;
+}
+#endif
+
/*
* fsnotify_link_count - inode's link count changed
*/
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 3100371b5e32..74e0cc14ebf8 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -66,6 +66,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne
extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int);
extern void done_path_create(struct path *, struct dentry *);
extern struct dentry *kern_path_locked(const char *, struct path *);
+extern struct dentry *user_path_locked_at(int , const char __user *, struct path *);
int vfs_path_parent_lookup(struct filename *filename, unsigned int flags,
struct path *parent, struct qstr *last, int *type,
const struct path *root);
diff --git a/include/linux/rcu_notifier.h b/include/linux/rcu_notifier.h
index ebf371364581..5640f024773b 100644
--- a/include/linux/rcu_notifier.h
+++ b/include/linux/rcu_notifier.h
@@ -13,7 +13,7 @@
#define RCU_STALL_NOTIFY_NORM 1
#define RCU_STALL_NOTIFY_EXP 2
-#ifdef CONFIG_RCU_STALL_COMMON
+#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
#include <linux/notifier.h>
#include <linux/types.h>
@@ -21,12 +21,12 @@
int rcu_stall_chain_notifier_register(struct notifier_block *n);
int rcu_stall_chain_notifier_unregister(struct notifier_block *n);
-#else // #ifdef CONFIG_RCU_STALL_COMMON
+#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
// No RCU CPU stall warnings in Tiny RCU.
static inline int rcu_stall_chain_notifier_register(struct notifier_block *n) { return -EEXIST; }
static inline int rcu_stall_chain_notifier_unregister(struct notifier_block *n) { return -ENOENT; }
-#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON
+#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
#endif /* __LINUX_RCU_NOTIFIER_H */
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index d29740be4833..3dc1e58865f7 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -355,7 +355,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
})
/**
- * list_next_or_null_rcu - get the first element from a list
+ * list_next_or_null_rcu - get the next element from a list
* @head: the head for the list.
* @ptr: the list head to take the next element from.
* @type: the type of the struct this is embedded in.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index f7206b2623c9..0746b1b0b663 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -34,9 +34,6 @@
#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
-#define ulong2long(a) (*(long *)(&(a)))
-#define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b)))
-#define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b)))
/* Exported common interfaces */
void call_rcu(struct rcu_head *head, rcu_callback_t func);
@@ -301,6 +298,11 @@ static inline void rcu_lock_acquire(struct lockdep_map *map)
lock_acquire(map, 0, 0, 2, 0, NULL, _THIS_IP_);
}
+static inline void rcu_try_lock_acquire(struct lockdep_map *map)
+{
+ lock_acquire(map, 0, 1, 2, 0, NULL, _THIS_IP_);
+}
+
static inline void rcu_lock_release(struct lockdep_map *map)
{
lock_release(map, _THIS_IP_);
@@ -315,6 +317,7 @@ int rcu_read_lock_any_held(void);
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
# define rcu_lock_acquire(a) do { } while (0)
+# define rcu_try_lock_acquire(a) do { } while (0)
# define rcu_lock_release(a) do { } while (0)
static inline int rcu_read_lock_held(void)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 127ef3b2e607..236610e4a8fa 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -229,7 +229,7 @@ static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp
srcu_check_nmi_safety(ssp, true);
retval = __srcu_read_lock_nmisafe(ssp);
- rcu_lock_acquire(&ssp->dep_map);
+ rcu_try_lock_acquire(&ssp->dep_map);
return retval;
}
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5c0dbef55792..cdba4d0c6d4a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -414,7 +414,7 @@ asmlinkage long sys_statmount(const struct mnt_id_req __user *req,
struct statmount __user *buf, size_t bufsize,
unsigned int flags);
asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
- u64 __user *buf, size_t bufsize,
+ u64 __user *mnt_ids, size_t nr_mnt_ids,
unsigned int flags);
asmlinkage long sys_truncate(const char __user *path, long length);
asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index d48315667752..75cd6a736d03 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -376,7 +376,6 @@ static int __init reserve_crashkernel_low(unsigned long long low_size)
crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1;
- insert_resource(&iomem_resource, &crashk_low_res);
#endif
return 0;
}
@@ -458,8 +457,19 @@ retry:
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
- insert_resource(&iomem_resource, &crashk_res);
}
+
+static __init int insert_crashkernel_resources(void)
+{
+ if (crashk_res.start < crashk_res.end)
+ insert_resource(&iomem_resource, &crashk_res);
+
+ if (crashk_low_res.start < crashk_low_res.end)
+ insert_resource(&iomem_resource, &crashk_low_res);
+
+ return 0;
+}
+early_initcall(insert_crashkernel_resources);
#endif
int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map,
@@ -867,7 +877,7 @@ subsys_initcall(crash_notes_memory_init);
* regions are online. So mutex lock __crash_hotplug_lock is used to
* serialize the crash hotplug handling specifically.
*/
-DEFINE_MUTEX(__crash_hotplug_lock);
+static DEFINE_MUTEX(__crash_hotplug_lock);
#define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock)
#define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index a08031b57a61..d08fc7b5db97 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -1257,6 +1257,7 @@ int kernel_kexec(void)
kexec_in_progress = true;
kernel_restart_prepare("kexec reboot");
migrate_to_reboot_cpu();
+ syscore_shutdown();
/*
* migrate_to_reboot_cpu() disables CPU hotplug assuming that
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 69d3cd2cfc3b..415d81e6ce70 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -124,7 +124,7 @@ struct call_rcu_chain {
struct rcu_head crc_rh;
bool crc_stop;
};
-struct call_rcu_chain *call_rcu_chain;
+struct call_rcu_chain *call_rcu_chain_list;
/* Forward reference. */
static void lock_torture_cleanup(void);
@@ -1074,12 +1074,12 @@ static int call_rcu_chain_init(void)
if (call_rcu_chains <= 0)
return 0;
- call_rcu_chain = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain), GFP_KERNEL);
- if (!call_rcu_chain)
+ call_rcu_chain_list = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain_list), GFP_KERNEL);
+ if (!call_rcu_chain_list)
return -ENOMEM;
for (i = 0; i < call_rcu_chains; i++) {
- call_rcu_chain[i].crc_stop = false;
- call_rcu(&call_rcu_chain[i].crc_rh, call_rcu_chain_cb);
+ call_rcu_chain_list[i].crc_stop = false;
+ call_rcu(&call_rcu_chain_list[i].crc_rh, call_rcu_chain_cb);
}
return 0;
}
@@ -1089,13 +1089,13 @@ static void call_rcu_chain_cleanup(void)
{
int i;
- if (!call_rcu_chain)
+ if (!call_rcu_chain_list)
return;
for (i = 0; i < call_rcu_chains; i++)
- smp_store_release(&call_rcu_chain[i].crc_stop, true);
+ smp_store_release(&call_rcu_chain_list[i].crc_stop, true);
rcu_barrier();
- kfree(call_rcu_chain);
- call_rcu_chain = NULL;
+ kfree(call_rcu_chain_list);
+ call_rcu_chain_list = NULL;
}
static void lock_torture_cleanup(void)
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 2984de629f74..9b0b52e1836f 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -105,6 +105,31 @@ config RCU_CPU_STALL_CPUTIME
The boot option rcupdate.rcu_cpu_stall_cputime has the same function
as this one, but will override this if it exists.
+config RCU_CPU_STALL_NOTIFIER
+ bool "Provide RCU CPU-stall notifiers"
+ depends on RCU_STALL_COMMON
+ depends on DEBUG_KERNEL
+ depends on RCU_EXPERT
+ default n
+ help
+ WARNING: You almost certainly do not want this!!!
+
+ Enable RCU CPU-stall notifiers, which are invoked just before
+ printing the RCU CPU stall warning. As such, bugs in notifier
+ callbacks can prevent stall warnings from being printed.
+ And the whole reason that a stall warning is being printed is
+ that something is hung up somewhere. Therefore, the notifier
+ callbacks must be written extremely carefully, preferably
+ containing only lockless code. After all, it is quite possible
+ that the whole reason that the RCU CPU stall is happening in
+ the first place is that someone forgot to release whatever lock
+ that you are thinking of acquiring. In which case, having your
+ notifier callback acquire that lock will hang, preventing the
+ RCU CPU stall warning from appearing.
+
+ Say Y here if you want RCU CPU stall notifiers (you don't want them)
+ Say N if you are unsure.
+
config RCU_TRACE
bool "Enable tracing for RCU"
depends on DEBUG_KERNEL
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index b531c33e9545..f94f65877f2b 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -262,6 +262,8 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
return rcu_cpu_stall_suppress_at_boot && !rcu_inkernel_boot_has_ended();
}
+extern int rcu_cpu_stall_notifiers;
+
#ifdef CONFIG_RCU_STALL_COMMON
extern int rcu_cpu_stall_ftrace_dump;
@@ -659,10 +661,10 @@ static inline bool rcu_cpu_beenfullyonline(int cpu) { return true; }
bool rcu_cpu_beenfullyonline(int cpu);
#endif
-#ifdef CONFIG_RCU_STALL_COMMON
+#if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
int rcu_stall_notifier_call_chain(unsigned long val, void *v);
-#else // #ifdef CONFIG_RCU_STALL_COMMON
+#else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
static inline int rcu_stall_notifier_call_chain(unsigned long val, void *v) { return NOTIFY_DONE; }
-#endif // #else // #ifdef CONFIG_RCU_STALL_COMMON
+#endif // #else // #if defined(CONFIG_RCU_STALL_COMMON) && defined(CONFIG_RCU_CPU_STALL_NOTIFIER)
#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 30fc9d34e329..7567ca8e743c 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2450,10 +2450,12 @@ static int rcu_torture_stall(void *args)
unsigned long stop_at;
VERBOSE_TOROUT_STRING("rcu_torture_stall task started");
- ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block);
- if (ret)
- pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n",
- __func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : "");
+ if (rcu_cpu_stall_notifiers) {
+ ret = rcu_stall_chain_notifier_register(&rcu_torture_stall_block);
+ if (ret)
+ pr_info("%s: rcu_stall_chain_notifier_register() returned %d, %sexpected.\n",
+ __func__, ret, !IS_ENABLED(CONFIG_RCU_STALL_COMMON) ? "un" : "");
+ }
if (stall_cpu_holdoff > 0) {
VERBOSE_TOROUT_STRING("rcu_torture_stall begin holdoff");
schedule_timeout_interruptible(stall_cpu_holdoff * HZ);
@@ -2497,7 +2499,7 @@ static int rcu_torture_stall(void *args)
cur_ops->readunlock(idx);
}
pr_alert("%s end.\n", __func__);
- if (!ret) {
+ if (rcu_cpu_stall_notifiers && !ret) {
ret = rcu_stall_chain_notifier_unregister(&rcu_torture_stall_block);
if (ret)
pr_info("%s: rcu_stall_chain_notifier_unregister() returned %d.\n", __func__, ret);
@@ -3872,7 +3874,9 @@ rcu_torture_init(void)
}
if (fqs_duration < 0)
fqs_duration = 0;
- if (fqs_duration) {
+ if (fqs_holdoff < 0)
+ fqs_holdoff = 0;
+ if (fqs_duration && fqs_holdoff) {
/* Create the fqs thread */
firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
fqs_task);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 560e99ec5333..0351a4e83529 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -772,20 +772,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock_nmisafe);
*/
static void srcu_gp_start(struct srcu_struct *ssp)
{
- struct srcu_data *sdp;
int state;
- if (smp_load_acquire(&ssp->srcu_sup->srcu_size_state) < SRCU_SIZE_WAIT_BARRIER)
- sdp = per_cpu_ptr(ssp->sda, get_boot_cpu_id());
- else
- sdp = this_cpu_ptr(ssp->sda);
lockdep_assert_held(&ACCESS_PRIVATE(ssp->srcu_sup, lock));
WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_sup->srcu_gp_seq, ssp->srcu_sup->srcu_gp_seq_needed));
- spin_lock_rcu_node(sdp); /* Interrupts already disabled. */
- rcu_segcblist_advance(&sdp->srcu_cblist,
- rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
- WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
- spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */
WRITE_ONCE(ssp->srcu_sup->srcu_gp_start, jiffies);
WRITE_ONCE(ssp->srcu_sup->srcu_n_exp_nodelay, 0);
smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
@@ -1271,9 +1261,11 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp,
* period (gp_num = X + 8). So acceleration fails.
*/
s = rcu_seq_snap(&ssp->srcu_sup->srcu_gp_seq);
- rcu_segcblist_advance(&sdp->srcu_cblist,
- rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
- WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s) && rhp);
+ if (rhp) {
+ rcu_segcblist_advance(&sdp->srcu_cblist,
+ rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
+ WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s));
+ }
if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
sdp->srcu_gp_seq_needed = s;
needgp = true;
@@ -1723,6 +1715,11 @@ static void srcu_invoke_callbacks(struct work_struct *work)
WARN_ON_ONCE(!rcu_segcblist_segempty(&sdp->srcu_cblist, RCU_NEXT_TAIL));
rcu_segcblist_advance(&sdp->srcu_cblist,
rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq));
+ /*
+ * Although this function is theoretically re-entrant, concurrent
+ * callbacks invocation is disallowed to avoid executing an SRCU barrier
+ * too early.
+ */
if (sdp->srcu_cblist_invoking ||
!rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
spin_unlock_irq_rcu_node(sdp);
@@ -1753,6 +1750,7 @@ static void srcu_invoke_callbacks(struct work_struct *work)
sdp->srcu_cblist_invoking = false;
more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
spin_unlock_irq_rcu_node(sdp);
+ /* An SRCU barrier or callbacks from previous nesting work pending */
if (more)
srcu_schedule_cbs_sdp(sdp, 0);
}
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index f54d5782eca0..732ad5b39946 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -975,7 +975,7 @@ static void check_holdout_task(struct task_struct *t,
t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
!rcu_tasks_is_holdout(t) ||
(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
- !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
+ !is_idle_task(t) && READ_ONCE(t->rcu_tasks_idle_cpu) >= 0)) {
WRITE_ONCE(t->rcu_tasks_holdout, false);
list_del_init(&t->rcu_tasks_holdout_list);
put_task_struct(t);
@@ -993,7 +993,7 @@ static void check_holdout_task(struct task_struct *t,
t, ".I"[is_idle_task(t)],
"N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
- t->rcu_tasks_idle_cpu, cpu);
+ data_race(t->rcu_tasks_idle_cpu), cpu);
sched_show_task(t);
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 3ac3c846105f..1ae851777806 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2338,6 +2338,8 @@ void rcu_force_quiescent_state(void)
struct rcu_node *rnp;
struct rcu_node *rnp_old = NULL;
+ if (!rcu_gp_in_progress())
+ return;
/* Funnel through hierarchy to reduce memory contention. */
rnp = raw_cpu_read(rcu_data.mynode);
for (; rnp != NULL; rnp = rnp->parent) {
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index ac8e86babe44..5d666428546b 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -1061,6 +1061,7 @@ static int __init rcu_sysrq_init(void)
}
early_initcall(rcu_sysrq_init);
+#ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
//////////////////////////////////////////////////////////////////////////////
//
@@ -1081,7 +1082,13 @@ static ATOMIC_NOTIFIER_HEAD(rcu_cpu_stall_notifier_list);
*/
int rcu_stall_chain_notifier_register(struct notifier_block *n)
{
- return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n);
+ int rcsn = rcu_cpu_stall_notifiers;
+
+ WARN(1, "Adding %pS() to RCU stall notifier list (%s).\n", n->notifier_call,
+ rcsn ? "possibly suppressing RCU CPU stall warnings" : "failed, so all is well");
+ if (rcsn)
+ return atomic_notifier_chain_register(&rcu_cpu_stall_notifier_list, n);
+ return -EEXIST;
}
EXPORT_SYMBOL_GPL(rcu_stall_chain_notifier_register);
@@ -1115,3 +1122,5 @@ int rcu_stall_notifier_call_chain(unsigned long val, void *v)
{
return atomic_notifier_call_chain(&rcu_cpu_stall_notifier_list, val, v);
}
+
+#endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c534d6806d3d..46aaaa9fe339 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -538,9 +538,15 @@ long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
EXPORT_SYMBOL_GPL(torture_sched_setaffinity);
#endif
+int rcu_cpu_stall_notifiers __read_mostly; // !0 = provide stall notifiers (rarely useful)
+EXPORT_SYMBOL_GPL(rcu_cpu_stall_notifiers);
+
#ifdef CONFIG_RCU_STALL_COMMON
int rcu_cpu_stall_ftrace_dump __read_mostly;
module_param(rcu_cpu_stall_ftrace_dump, int, 0644);
+#ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
+module_param(rcu_cpu_stall_notifiers, int, 0444);
+#endif // #ifdef CONFIG_RCU_CPU_STALL_NOTIFIER
int rcu_cpu_stall_suppress __read_mostly; // !0 = suppress stall warnings.
EXPORT_SYMBOL_GPL(rcu_cpu_stall_suppress);
module_param(rcu_cpu_stall_suppress, int, 0644);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 97ce28f4d154..ba25129563ad 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -378,6 +378,8 @@ config DEBUG_INFO_BTF
depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
depends on BPF_SYSCALL
depends on !DEBUG_INFO_DWARF5 || PAHOLE_VERSION >= 121
+ # pahole uses elfutils, which does not have support for Hexagon relocations
+ depends on !HEXAGON
help
Generate deduplicated BTF type information from DWARF debug info.
Turning this on expects presence of pahole tool, which will convert
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 24c13dfb1e94..df6627f62402 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -487,6 +487,7 @@ void kasan_init_object_meta(struct kmem_cache *cache, const void *object)
__memset(alloc_meta, 0, sizeof(*alloc_meta));
/*
+ * Prepare the lock for saving auxiliary stack traces.
* Temporarily disable KASAN bug reporting to allow instrumented
* raw_spin_lock_init to access aux_lock, which resides inside
* of a redzone.
@@ -510,8 +511,13 @@ static void release_alloc_meta(struct kasan_alloc_meta *meta)
stack_depot_put(meta->aux_stack[0]);
stack_depot_put(meta->aux_stack[1]);
- /* Zero out alloc meta to mark it as invalid. */
- __memset(meta, 0, sizeof(*meta));
+ /*
+ * Zero out alloc meta to mark it as invalid but keep aux_lock
+ * initialized to avoid having to reinitialize it when another object
+ * is allocated in the same slot.
+ */
+ __memset(&meta->alloc_track, 0, sizeof(meta->alloc_track));
+ __memset(meta->aux_stack, 0, sizeof(meta->aux_stack));
}
static void release_free_meta(const void *object, struct kasan_free_meta *meta)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b3c0ff52bb72..21890994c1d3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -101,9 +101,11 @@ static int set_memmap_mode(const char *val, const struct kernel_param *kp)
static int get_memmap_mode(char *buffer, const struct kernel_param *kp)
{
- if (*((int *)kp->arg) == MEMMAP_ON_MEMORY_FORCE)
- return sprintf(buffer, "force\n");
- return param_get_bool(buffer, kp);
+ int mode = *((int *)kp->arg);
+
+ if (mode == MEMMAP_ON_MEMORY_FORCE)
+ return sprintf(buffer, "force\n");
+ return sprintf(buffer, "%c\n", mode ? 'Y' : 'N');
}
static const struct kernel_param_ops memmap_mode_ops = {
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 89dc29f1e6c6..2c19f5515e36 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -26,6 +26,7 @@
#include <linux/pgtable.h>
#include <linux/swap.h>
#include <linux/cma.h>
+#include <linux/crash_dump.h>
#include "internal.h"
#include "slab.h"
#include "shuffle.h"
@@ -381,6 +382,11 @@ static void __init find_zone_movable_pfns_for_nodes(void)
goto out;
}
+ if (is_kdump_kernel()) {
+ pr_warn("The system is under kdump, ignore kernelcore=mirror.\n");
+ goto out;
+ }
+
for_each_mem_region(r) {
if (memblock_is_mirror(r))
continue;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 216ab4c8621f..20e3b0d9cf7e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1393,6 +1393,12 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, struct mm_struct *mm,
err = -ENOENT;
break;
}
+ /* Avoid moving zeropages for now */
+ if (is_huge_zero_pmd(*src_pmd)) {
+ spin_unlock(ptl);
+ err = -EBUSY;
+ break;
+ }
/* Check if we can move the pmd without splitting it. */
if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) ||
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
index cb980b144ca1..fa5be6f57b00 100755
--- a/scripts/decode_stacktrace.sh
+++ b/scripts/decode_stacktrace.sh
@@ -16,6 +16,21 @@ elif type c++filt >/dev/null 2>&1 ; then
cppfilt_opts=-i
fi
+UTIL_SUFFIX=
+if [[ -z ${LLVM:-} ]]; then
+ UTIL_PREFIX=${CROSS_COMPILE:-}
+else
+ UTIL_PREFIX=llvm-
+ if [[ ${LLVM} == */ ]]; then
+ UTIL_PREFIX=${LLVM}${UTIL_PREFIX}
+ elif [[ ${LLVM} == -* ]]; then
+ UTIL_SUFFIX=${LLVM}
+ fi
+fi
+
+READELF=${UTIL_PREFIX}readelf${UTIL_SUFFIX}
+ADDR2LINE=${UTIL_PREFIX}addr2line${UTIL_SUFFIX}
+
if [[ $1 == "-r" ]] ; then
vmlinux=""
basepath="auto"
@@ -75,7 +90,7 @@ find_module() {
if [[ "$modpath" != "" ]] ; then
for fn in $(find "$modpath" -name "${module//_/[-_]}.ko*") ; do
- if readelf -WS "$fn" | grep -qwF .debug_line ; then
+ if ${READELF} -WS "$fn" | grep -qwF .debug_line ; then
echo $fn
return
fi
@@ -169,7 +184,7 @@ parse_symbol() {
if [[ $aarray_support == true && "${cache[$module,$address]+isset}" == "isset" ]]; then
local code=${cache[$module,$address]}
else
- local code=$(${CROSS_COMPILE}addr2line -i -e "$objfile" "$address" 2>/dev/null)
+ local code=$(${ADDR2LINE} -i -e "$objfile" "$address" 2>/dev/null)
if [[ $aarray_support == true ]]; then
cache[$module,$address]=$code
fi
diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c
index 5b354c209e93..894d28c3dd47 100644
--- a/tools/testing/selftests/mm/hugepage-vmemmap.c
+++ b/tools/testing/selftests/mm/hugepage-vmemmap.c
@@ -10,10 +10,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
-
-#define MAP_LENGTH (2UL * 1024 * 1024)
-
-#define PAGE_SIZE 4096
+#include "vm_util.h"
#define PAGE_COMPOUND_HEAD (1UL << 15)
#define PAGE_COMPOUND_TAIL (1UL << 16)
@@ -39,6 +36,9 @@
#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
#endif
+static size_t pagesize;
+static size_t maplength;
+
static void write_bytes(char *addr, size_t length)
{
unsigned long i;
@@ -56,7 +56,7 @@ static unsigned long virt_to_pfn(void *addr)
if (fd < 0)
return -1UL;
- lseek(fd, (unsigned long)addr / PAGE_SIZE * sizeof(pagemap), SEEK_SET);
+ lseek(fd, (unsigned long)addr / pagesize * sizeof(pagemap), SEEK_SET);
read(fd, &pagemap, sizeof(pagemap));
close(fd);
@@ -86,7 +86,7 @@ static int check_page_flags(unsigned long pfn)
* this also verifies kernel has correctly set the fake page_head to tail
* while hugetlb_free_vmemmap is enabled.
*/
- for (i = 1; i < MAP_LENGTH / PAGE_SIZE; i++) {
+ for (i = 1; i < maplength / pagesize; i++) {
read(fd, &pageflags, sizeof(pageflags));
if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
(pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
@@ -106,18 +106,25 @@ int main(int argc, char **argv)
void *addr;
unsigned long pfn;
- addr = mmap(MAP_ADDR, MAP_LENGTH, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+ pagesize = psize();
+ maplength = default_huge_page_size();
+ if (!maplength) {
+ printf("Unable to determine huge page size\n");
+ exit(1);
+ }
+
+ addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
if (addr == MAP_FAILED) {
perror("mmap");
exit(1);
}
/* Trigger allocation of HugeTLB page. */
- write_bytes(addr, MAP_LENGTH);
+ write_bytes(addr, maplength);
pfn = virt_to_pfn(addr);
if (pfn == -1UL) {
- munmap(addr, MAP_LENGTH);
+ munmap(addr, maplength);
perror("virt_to_pfn");
exit(1);
}
@@ -125,13 +132,13 @@ int main(int argc, char **argv)
printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
if (check_page_flags(pfn) < 0) {
- munmap(addr, MAP_LENGTH);
+ munmap(addr, maplength);
perror("check_page_flags");
exit(1);
}
/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
- if (munmap(addr, MAP_LENGTH)) {
+ if (munmap(addr, maplength)) {
perror("munmap");
exit(1);
}
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 212c52ca90b5..f3f867129560 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -67,7 +67,10 @@ ___EOF___
# build using nolibc on supported archs (smaller executable) and fall
# back to regular glibc on other ones.
if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \
- "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \
+ "||__ARM_EABI__||__aarch64__||(__mips__ && _ABIO32)" \
+ "||__powerpc__||(__riscv && __riscv_xlen == 64)" \
+ "||__s390x__||__loongarch__" \
+ "\nyes\n#endif" \
| ${CROSS_COMPILE}gcc -E -nostdlib -xc - \
| grep -q '^yes'; then
# architecture supported by nolibc
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
index d44609937503..979edbf4c820 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -1 +1,4 @@
nohz_full=2-9
+rcutorture.stall_cpu=14
+rcutorture.stall_cpu_holdoff=90
+rcutorture.fwd_progress=0