aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi.h9
-rw-r--r--include/linux/alarmtimer.h10
-rw-r--r--include/linux/alloc_tag.h16
-rw-r--r--include/linux/arch_topology.h4
-rw-r--r--include/linux/arm-smccc.h32
-rw-r--r--include/linux/asn1_decoder.h1
-rw-r--r--include/linux/asn1_encoder.h1
-rw-r--r--include/linux/attribute_container.h6
-rw-r--r--include/linux/auxiliary_bus.h2
-rw-r--r--include/linux/backing-file.h2
-rw-r--r--include/linux/bio-integrity.h4
-rw-r--r--include/linux/bio.h19
-rw-r--r--include/linux/bitmap.h140
-rw-r--r--include/linux/bits.h15
-rw-r--r--include/linux/blk-integrity.h14
-rw-r--r--include/linux/blk-mq.h118
-rw-r--r--include/linux/blk_types.h4
-rw-r--r--include/linux/blkdev.h113
-rw-r--r--include/linux/bpf.h25
-rw-r--r--include/linux/bpf_mem_alloc.h3
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/ceph/decode.h2
-rw-r--r--include/linux/ceph/libceph.h2
-rw-r--r--include/linux/ceph/osd_client.h2
-rw-r--r--include/linux/cgroup-defs.h3
-rw-r--r--include/linux/cleanup.h205
-rw-r--r--include/linux/clocksource.h1
-rw-r--r--include/linux/closure.h35
-rw-r--r--include/linux/compiler-gcc.h4
-rw-r--r--include/linux/compiler.h2
-rw-r--r--include/linux/coredump.h8
-rw-r--r--include/linux/coresight-pmu.h17
-rw-r--r--include/linux/coresight.h21
-rw-r--r--include/linux/cpufreq.h6
-rw-r--r--include/linux/cpuhotplug.h3
-rw-r--r--include/linux/cpumask.h212
-rw-r--r--include/linux/cxl-event.h175
-rw-r--r--include/linux/debugfs.h1
-rw-r--r--include/linux/debugobjects.h12
-rw-r--r--include/linux/delay.h79
-rw-r--r--include/linux/device-mapper.h1
-rw-r--r--include/linux/device.h3
-rw-r--r--include/linux/device/bus.h6
-rw-r--r--include/linux/device/class.h2
-rw-r--r--include/linux/device/driver.h2
-rw-r--r--include/linux/dw_apb_timer.h3
-rw-r--r--include/linux/efi.h2
-rw-r--r--include/linux/einj-cxl.h44
-rw-r--r--include/linux/energy_model.h29
-rw-r--r--include/linux/entry-common.h3
-rw-r--r--include/linux/entry-kvm.h5
-rw-r--r--include/linux/etherdevice.h2
-rw-r--r--include/linux/eventpoll.h2
-rw-r--r--include/linux/exportfs.h13
-rw-r--r--include/linux/f2fs_fs.h4
-rw-r--r--include/linux/fdtable.h13
-rw-r--r--include/linux/file.h8
-rw-r--r--include/linux/file_ref.h177
-rw-r--r--include/linux/filelock.h5
-rw-r--r--include/linux/find.h50
-rw-r--r--include/linux/folio_queue.h168
-rw-r--r--include/linux/fs.h119
-rw-r--r--include/linux/fs_context.h6
-rw-r--r--include/linux/fs_parser.h5
-rw-r--r--include/linux/fsl/enetc_mdio.h3
-rw-r--r--include/linux/fsl/mc.h2
-rw-r--r--include/linux/fsnotify_backend.h10
-rw-r--r--include/linux/hdmi.h9
-rw-r--r--include/linux/hisi_acc_qm.h56
-rw-r--r--include/linux/host1x.h1
-rw-r--r--include/linux/hrtimer.h51
-rw-r--r--include/linux/huge_mm.h18
-rw-r--r--include/linux/hugetlb.h10
-rw-r--r--include/linux/hwmon.h5
-rw-r--r--include/linux/ieee80211.h2
-rw-r--r--include/linux/iio/backend.h62
-rw-r--r--include/linux/iio/iio.h39
-rw-r--r--include/linux/input.h10
-rw-r--r--include/linux/interrupt.h47
-rw-r--r--include/linux/io_uring/cmd.h2
-rw-r--r--include/linux/io_uring_types.h89
-rw-r--r--include/linux/iomap.h40
-rw-r--r--include/linux/iopoll.h52
-rw-r--r--include/linux/irqchip/arm-gic-v4.h4
-rw-r--r--include/linux/irqflags.h6
-rw-r--r--include/linux/irqnr.h36
-rw-r--r--include/linux/jbd2.h15
-rw-r--r--include/linux/jiffies.h15
-rw-r--r--include/linux/kprobes.h9
-rw-r--r--include/linux/ksm.h10
-rw-r--r--include/linux/kvm_host.h20
-rw-r--r--include/linux/libata.h4
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/lsm/apparmor.h17
-rw-r--r--include/linux/lsm/bpf.h16
-rw-r--r--include/linux/lsm/selinux.h16
-rw-r--r--include/linux/lsm/smack.h17
-rw-r--r--include/linux/lsm_hook_defs.h22
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h12
-rw-r--r--include/linux/mlx5/mlx5_ifc.h2
-rw-r--r--include/linux/mm.h21
-rw-r--r--include/linux/mm_types.h72
-rw-r--r--include/linux/mman.h28
-rw-r--r--include/linux/mmzone.h8
-rw-r--r--include/linux/mnt_idmapping.h1
-rw-r--r--include/linux/mtd/map.h2
-rw-r--r--include/linux/mutex.h19
-rw-r--r--include/linux/netdevice.h30
-rw-r--r--include/linux/netfilter.h4
-rw-r--r--include/linux/netlink.h2
-rw-r--r--include/linux/nfs.h9
-rw-r--r--include/linux/nfs_common.h17
-rw-r--r--include/linux/nfs_fs_sb.h14
-rw-r--r--include/linux/nfs_xdr.h22
-rw-r--r--include/linux/nfslocalio.h90
-rw-r--r--include/linux/nodemask.h86
-rw-r--r--include/linux/nvme.h135
-rw-r--r--include/linux/page-flags.h12
-rw-r--r--include/linux/percpu.h6
-rw-r--r--include/linux/perf/arm_pmuv3.h1
-rw-r--r--include/linux/perf_event.h54
-rw-r--r--include/linux/phy.h9
-rw-r--r--include/linux/platform_data/ad5449.h39
-rw-r--r--include/linux/platform_data/dma-ep93xx.h94
-rw-r--r--include/linux/platform_data/eth-ep93xx.h10
-rw-r--r--include/linux/platform_data/keypad-ep93xx.h32
-rw-r--r--include/linux/platform_data/max6639.h15
-rw-r--r--include/linux/platform_data/spi-ep93xx.h15
-rw-r--r--include/linux/platform_device.h2
-rw-r--r--include/linux/pm_domain.h6
-rw-r--r--include/linux/posix-timers.h72
-rw-r--r--include/linux/posix_acl.h6
-rw-r--r--include/linux/prandom.h1
-rw-r--r--include/linux/preempt.h8
-rw-r--r--include/linux/printk.h11
-rw-r--r--include/linux/ptp_classify.h2
-rw-r--r--include/linux/random.h7
-rw-r--r--include/linux/rbtree_latch.h20
-rw-r--r--include/linux/rcutiny.h1
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/regmap.h38
-rw-r--r--include/linux/rwlock_rt.h10
-rw-r--r--include/linux/sbitmap.h2
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/sched/ext.h1
-rw-r--r--include/linux/sched/mm.h17
-rw-r--r--include/linux/sched/signal.h4
-rw-r--r--include/linux/sched/task_stack.h4
-rw-r--r--include/linux/security.h106
-rw-r--r--include/linux/sed-opal.h1
-rw-r--r--include/linux/seqlock.h98
-rw-r--r--include/linux/serial_8250.h2
-rw-r--r--include/linux/serial_s3c.h24
-rw-r--r--include/linux/shmem_fs.h6
-rw-r--r--include/linux/slab.h1
-rw-r--r--include/linux/soc/cirrus/ep93xx.h47
-rw-r--r--include/linux/soc/qcom/geni-se.h11
-rw-r--r--include/linux/soc/qcom/llcc-qcom.h2
-rw-r--r--include/linux/sockptr.h4
-rw-r--r--include/linux/soundwire/sdw_intel.h2
-rw-r--r--include/linux/spinlock_rt.h28
-rw-r--r--include/linux/srcu.h92
-rw-r--r--include/linux/srcutiny.h3
-rw-r--r--include/linux/srcutree.h67
-rw-r--r--include/linux/sunrpc/sched.h16
-rw-r--r--include/linux/sunrpc/svc.h7
-rw-r--r--include/linux/sunrpc/svcauth.h5
-rw-r--r--include/linux/sunrpc/xdr.h2
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/syscalls.h13
-rw-r--r--include/linux/sysfb.h7
-rw-r--r--include/linux/task_work.h5
-rw-r--r--include/linux/thermal.h6
-rw-r--r--include/linux/thread_info.h21
-rw-r--r--include/linux/tick.h10
-rw-r--r--include/linux/timekeeper_internal.h114
-rw-r--r--include/linux/timekeeping.h7
-rw-r--r--include/linux/timex.h8
-rw-r--r--include/linux/tpm.h5
-rw-r--r--include/linux/tracepoint.h20
-rw-r--r--include/linux/uaccess.h118
-rw-r--r--include/linux/unaligned.h146
-rw-r--r--include/linux/unicode.h4
-rw-r--r--include/linux/uprobes.h83
-rw-r--r--include/linux/usb.h8
-rw-r--r--include/linux/usb/composite.h2
-rw-r--r--include/linux/usb/func_utils.h86
-rw-r--r--include/linux/usb/gadget_configfs.h7
-rw-r--r--include/linux/usb/serial.h7
-rw-r--r--include/linux/usb/tcpci.h31
-rw-r--r--include/linux/usb/usbnet.h15
-rw-r--r--include/linux/user_namespace.h3
-rw-r--r--include/linux/userfaultfd_k.h5
-rw-r--r--include/linux/vdpa.h9
-rw-r--r--include/linux/virtio_net.h4
-rw-r--r--include/linux/vm_event_item.h2
-rw-r--r--include/linux/wait.h5
-rw-r--r--include/linux/wait_bit.h444
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--include/linux/writeback.h32
-rw-r--r--include/linux/ww_mutex.h14
-rw-r--r--include/linux/xattr.h4
203 files changed, 3939 insertions, 1642 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1655c4c23a78..7dd24acd9ffe 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -363,6 +363,7 @@ void acpi_unregister_gsi (u32 gsi);
struct pci_dev;
+struct acpi_prt_entry *acpi_pci_irq_lookup(struct pci_dev *dev, int pin);
int acpi_pci_irq_enable (struct pci_dev *dev);
void acpi_penalize_isa_irq(int irq, int active);
bool acpi_isa_irq_available(int irq);
@@ -1163,8 +1164,6 @@ int acpi_subsys_suspend_noirq(struct device *dev);
int acpi_subsys_suspend(struct device *dev);
int acpi_subsys_freeze(struct device *dev);
int acpi_subsys_poweroff(struct device *dev);
-void acpi_ec_mark_gpe_for_wake(void);
-void acpi_ec_set_gpe_wake_mask(u8 action);
int acpi_subsys_restore_early(struct device *dev);
#else
static inline int acpi_subsys_prepare(struct device *dev) { return 0; }
@@ -1175,6 +1174,12 @@ static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
static inline int acpi_subsys_poweroff(struct device *dev) { return 0; }
static inline int acpi_subsys_restore_early(struct device *dev) { return 0; }
+#endif
+
+#if defined(CONFIG_ACPI_EC) && defined(CONFIG_PM_SLEEP)
+void acpi_ec_mark_gpe_for_wake(void);
+void acpi_ec_set_gpe_wake_mask(u8 action);
+#else
static inline void acpi_ec_mark_gpe_for_wake(void) {}
static inline void acpi_ec_set_gpe_wake_mask(u8 action) {}
#endif
diff --git a/include/linux/alarmtimer.h b/include/linux/alarmtimer.h
index 05e758b8b894..3ffa5341dce2 100644
--- a/include/linux/alarmtimer.h
+++ b/include/linux/alarmtimer.h
@@ -20,12 +20,6 @@ enum alarmtimer_type {
ALARM_BOOTTIME_FREEZER,
};
-enum alarmtimer_restart {
- ALARMTIMER_NORESTART,
- ALARMTIMER_RESTART,
-};
-
-
#define ALARMTIMER_STATE_INACTIVE 0x00
#define ALARMTIMER_STATE_ENQUEUED 0x01
@@ -42,14 +36,14 @@ enum alarmtimer_restart {
struct alarm {
struct timerqueue_node node;
struct hrtimer timer;
- enum alarmtimer_restart (*function)(struct alarm *, ktime_t now);
+ void (*function)(struct alarm *, ktime_t now);
enum alarmtimer_type type;
int state;
void *data;
};
void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
- enum alarmtimer_restart (*function)(struct alarm *, ktime_t));
+ void (*function)(struct alarm *, ktime_t));
void alarm_start(struct alarm *alarm, ktime_t start);
void alarm_start_relative(struct alarm *alarm, ktime_t start);
void alarm_restart(struct alarm *alarm);
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 1f0a9ff23a2c..941deffc590d 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -135,18 +135,21 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
#endif
/* Caller should verify both ref and tag to be valid */
-static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
alloc_tag_add_check(ref, tag);
if (!ref || !tag)
- return;
+ return false;
ref->ct = &tag->ct;
+ return true;
}
-static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
+static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
{
- __alloc_tag_ref_set(ref, tag);
+ if (unlikely(!__alloc_tag_ref_set(ref, tag)))
+ return false;
+
/*
* We need in increment the call counter every time we have a new
* allocation or when we split a large allocation into smaller ones.
@@ -154,12 +157,13 @@ static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *t
* counter because when we free each part the counter will be decremented.
*/
this_cpu_inc(tag->counters->calls);
+ return true;
}
static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes)
{
- alloc_tag_ref_set(ref, tag);
- this_cpu_add(tag->counters->bytes, bytes);
+ if (likely(alloc_tag_ref_set(ref, tag)))
+ this_cpu_add(tag->counters->bytes, bytes);
}
static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index b721f360d759..4a952c4885ed 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -11,10 +11,6 @@
void topology_normalize_cpu_scale(void);
int topology_update_cpu_topology(void);
-#ifdef CONFIG_ACPI_CPPC_LIB
-void topology_init_cpu_capacity_cppc(void);
-#endif
-
struct device_node;
bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index f59099a213d0..67f6fdf2e7cd 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -315,8 +315,6 @@ u32 arm_smccc_get_version(void);
void __init arm_smccc_version_init(u32 version, enum arm_smccc_conduit conduit);
-extern u64 smccc_has_sve_hint;
-
/**
* arm_smccc_get_soc_id_version()
*
@@ -415,15 +413,6 @@ struct arm_smccc_quirk {
};
/**
- * __arm_smccc_sve_check() - Set the SVE hint bit when doing SMC calls
- *
- * Sets the SMCCC hint bit to indicate if there is live state in the SVE
- * registers, this modifies x0 in place and should never be called from C
- * code.
- */
-asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
-
-/**
* __arm_smccc_smc() - make SMC calls
* @a0-a7: arguments passed in registers 0 to 7
* @res: result values from registers 0 to 3
@@ -490,20 +479,6 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#endif
-/* nVHE hypervisor doesn't have a current thread so needs separate checks */
-#if defined(CONFIG_ARM64_SVE) && !defined(__KVM_NVHE_HYPERVISOR__)
-
-#define SMCCC_SVE_CHECK ALTERNATIVE("nop \n", "bl __arm_smccc_sve_check \n", \
- ARM64_SVE)
-#define smccc_sve_clobbers "x16", "x30", "cc",
-
-#else
-
-#define SMCCC_SVE_CHECK
-#define smccc_sve_clobbers
-
-#endif
-
#define __constraint_read_2 "r" (arg0)
#define __constraint_read_3 __constraint_read_2, "r" (arg1)
#define __constraint_read_4 __constraint_read_3, "r" (arg2)
@@ -574,12 +549,11 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
register unsigned long r3 asm("r3"); \
CONCATENATE(__declare_arg_, \
COUNT_ARGS(__VA_ARGS__))(__VA_ARGS__); \
- asm volatile(SMCCC_SVE_CHECK \
- inst "\n" : \
+ asm volatile(inst "\n" : \
"=r" (r0), "=r" (r1), "=r" (r2), "=r" (r3) \
: CONCATENATE(__constraint_read_, \
COUNT_ARGS(__VA_ARGS__)) \
- : smccc_sve_clobbers "memory"); \
+ : "memory"); \
if (___res) \
*___res = (typeof(*___res)){r0, r1, r2, r3}; \
} while (0)
@@ -628,7 +602,7 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
asm ("" : \
: CONCATENATE(__constraint_read_, \
COUNT_ARGS(__VA_ARGS__)) \
- : smccc_sve_clobbers "memory"); \
+ : "memory"); \
if (___res) \
___res->a0 = SMCCC_RET_NOT_SUPPORTED; \
} while (0)
diff --git a/include/linux/asn1_decoder.h b/include/linux/asn1_decoder.h
index 83f9c6e1e5e9..b41bce82a191 100644
--- a/include/linux/asn1_decoder.h
+++ b/include/linux/asn1_decoder.h
@@ -9,6 +9,7 @@
#define _LINUX_ASN1_DECODER_H
#include <linux/asn1.h>
+#include <linux/types.h>
struct asn1_decoder;
diff --git a/include/linux/asn1_encoder.h b/include/linux/asn1_encoder.h
index 08cd0c2ad34f..d17484dffb74 100644
--- a/include/linux/asn1_encoder.h
+++ b/include/linux/asn1_encoder.h
@@ -6,7 +6,6 @@
#include <linux/types.h>
#include <linux/asn1.h>
#include <linux/asn1_ber_bytecode.h>
-#include <linux/bug.h>
#define asn1_oid_len(oid) (sizeof(oid)/sizeof(u32))
unsigned char *
diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h
index e4004d1e6725..b3643de9931d 100644
--- a/include/linux/attribute_container.h
+++ b/include/linux/attribute_container.h
@@ -61,14 +61,8 @@ int attribute_container_device_trigger_safe(struct device *dev,
int (*undo)(struct attribute_container *,
struct device *,
struct device *));
-void attribute_container_trigger(struct device *dev,
- int (*fn)(struct attribute_container *,
- struct device *));
int attribute_container_add_attrs(struct device *classdev);
int attribute_container_add_class_device(struct device *classdev);
-int attribute_container_add_class_device_adapter(struct attribute_container *cont,
- struct device *dev,
- struct device *classdev);
void attribute_container_remove_attrs(struct device *classdev);
void attribute_container_class_device_del(struct device *classdev);
struct attribute_container *attribute_container_classdev_to_container(struct device *);
diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h
index 662b8ae54b6a..31762324bcc9 100644
--- a/include/linux/auxiliary_bus.h
+++ b/include/linux/auxiliary_bus.h
@@ -271,6 +271,6 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv);
struct auxiliary_device *auxiliary_find_device(struct device *start,
const void *data,
- int (*match)(struct device *dev, const void *data));
+ device_match_t match);
#endif /* _AUXILIARY_BUS_H_ */
diff --git a/include/linux/backing-file.h b/include/linux/backing-file.h
index 4b61b0e57720..2eed0ffb5e8f 100644
--- a/include/linux/backing-file.h
+++ b/include/linux/backing-file.h
@@ -16,7 +16,7 @@ struct backing_file_ctx {
const struct cred *cred;
struct file *user_file;
void (*accessed)(struct file *);
- void (*end_write)(struct file *);
+ void (*end_write)(struct file *, loff_t, ssize_t);
};
struct file *backing_file_open(const struct path *user_path, int flags,
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index dd831c269e99..dbf0f74c1529 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -72,7 +72,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, gfp_t gfp,
unsigned int nr);
int bio_integrity_add_page(struct bio *bio, struct page *page, unsigned int len,
unsigned int offset);
-int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed);
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len);
void bio_integrity_unmap_user(struct bio *bio);
bool bio_integrity_prep(struct bio *bio);
void bio_integrity_advance(struct bio *bio, unsigned int bytes_done);
@@ -99,7 +99,7 @@ static inline void bioset_integrity_free(struct bio_set *bs)
}
static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
- ssize_t len, u32 seed)
+ ssize_t len)
{
return -EINVAL;
}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index faceadb040f9..60830a6a5939 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -418,8 +418,6 @@ bool __must_check bio_add_folio(struct bio *bio, struct folio *folio,
size_t len, size_t off);
extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
unsigned int, unsigned int);
-int bio_add_zone_append_page(struct bio *bio, struct page *page,
- unsigned int len, unsigned int offset);
void __bio_add_page(struct bio *bio, struct page *page,
unsigned int len, unsigned int off);
void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
@@ -677,6 +675,23 @@ static inline void bio_clear_polled(struct bio *bio)
bio->bi_opf &= ~REQ_POLLED;
}
+/**
+ * bio_is_zone_append - is this a zone append bio?
+ * @bio: bio to check
+ *
+ * Check if @bio is a zone append operation. Core block layer code and end_io
+ * handlers must use this instead of an open coded REQ_OP_ZONE_APPEND check
+ * because the block layer can rewrite REQ_OP_ZONE_APPEND to REQ_OP_WRITE if
+ * it is not natively supported.
+ */
+static inline bool bio_is_zone_append(struct bio *bio)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ return false;
+ return bio_op(bio) == REQ_OP_ZONE_APPEND ||
+ bio_flagged(bio, BIO_EMULATES_ZONE_APPEND);
+}
+
struct bio *blk_next_bio(struct bio *bio, struct block_device *bdev,
unsigned int nr_pages, blk_opf_t opf, gfp_t gfp);
struct bio *bio_chain_and_submit(struct bio *prev, struct bio *new);
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index d3b66d77df7a..262b6596eca5 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -203,12 +203,12 @@ unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
* the bit offset of all zero areas this function finds is multiples of that
* power of 2. A @align_mask of 0 means no alignment is required.
*/
-static inline unsigned long
-bitmap_find_next_zero_area(unsigned long *map,
- unsigned long size,
- unsigned long start,
- unsigned int nr,
- unsigned long align_mask)
+static __always_inline
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask)
{
return bitmap_find_next_zero_area_off(map, size, start, nr,
align_mask, 0);
@@ -228,7 +228,7 @@ void bitmap_fold(unsigned long *dst, const unsigned long *orig,
#define bitmap_size(nbits) (ALIGN(nbits, BITS_PER_LONG) / BITS_PER_BYTE)
-static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
+static __always_inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
unsigned int len = bitmap_size(nbits);
@@ -238,7 +238,7 @@ static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
memset(dst, 0, len);
}
-static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
+static __always_inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
{
unsigned int len = bitmap_size(nbits);
@@ -248,8 +248,8 @@ static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
memset(dst, 0xff, len);
}
-static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
- unsigned int nbits)
+static __always_inline
+void bitmap_copy(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
unsigned int len = bitmap_size(nbits);
@@ -262,8 +262,8 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
/*
* Copy bitmap and clear tail bits in last word.
*/
-static inline void bitmap_copy_clear_tail(unsigned long *dst,
- const unsigned long *src, unsigned int nbits)
+static __always_inline
+void bitmap_copy_clear_tail(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
bitmap_copy(dst, src, nbits);
if (nbits % BITS_PER_LONG)
@@ -318,16 +318,18 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits);
bitmap_copy_clear_tail((unsigned long *)(buf), (const unsigned long *)(bitmap), (nbits))
#endif
-static inline bool bitmap_and(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_and(dst, src1, src2, nbits);
}
-static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
@@ -335,8 +337,9 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
__bitmap_or(dst, src1, src2, nbits);
}
-static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+void bitmap_xor(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 ^ *src2;
@@ -344,16 +347,17 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
__bitmap_xor(dst, src1, src2, nbits);
}
-static inline bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
return __bitmap_andnot(dst, src1, src2, nbits);
}
-static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
- unsigned int nbits)
+static __always_inline
+void bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = ~(*src);
@@ -368,8 +372,8 @@ static inline void bitmap_complement(unsigned long *dst, const unsigned long *sr
#endif
#define BITMAP_MEM_MASK (BITMAP_MEM_ALIGNMENT - 1)
-static inline bool bitmap_equal(const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_equal(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -388,10 +392,9 @@ static inline bool bitmap_equal(const unsigned long *src1,
*
* Returns: True if (*@src1 | *@src2) == *@src3, false otherwise
*/
-static inline bool bitmap_or_equal(const unsigned long *src1,
- const unsigned long *src2,
- const unsigned long *src3,
- unsigned int nbits)
+static __always_inline
+bool bitmap_or_equal(const unsigned long *src1, const unsigned long *src2,
+ const unsigned long *src3, unsigned int nbits)
{
if (!small_const_nbits(nbits))
return __bitmap_or_equal(src1, src2, src3, nbits);
@@ -399,9 +402,8 @@ static inline bool bitmap_or_equal(const unsigned long *src1,
return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
}
-static inline bool bitmap_intersects(const unsigned long *src1,
- const unsigned long *src2,
- unsigned int nbits)
+static __always_inline
+bool bitmap_intersects(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -409,8 +411,8 @@ static inline bool bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
-static inline bool bitmap_subset(const unsigned long *src1,
- const unsigned long *src2, unsigned int nbits)
+static __always_inline
+bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
@@ -418,7 +420,8 @@ static inline bool bitmap_subset(const unsigned long *src1,
return __bitmap_subset(src1, src2, nbits);
}
-static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
+static __always_inline
+bool bitmap_empty(const unsigned long *src, unsigned nbits)
{
if (small_const_nbits(nbits))
return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -426,7 +429,8 @@ static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
return find_first_bit(src, nbits) == nbits;
}
-static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
+static __always_inline
+bool bitmap_full(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -460,8 +464,8 @@ unsigned long bitmap_weight_andnot(const unsigned long *src1,
return __bitmap_weight_andnot(src1, src2, nbits);
}
-static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
- unsigned int nbits)
+static __always_inline
+void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
{
if (__builtin_constant_p(nbits) && nbits == 1)
__set_bit(start, map);
@@ -476,8 +480,8 @@ static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
__bitmap_set(map, start, nbits);
}
-static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
- unsigned int nbits)
+static __always_inline
+void bitmap_clear(unsigned long *map, unsigned int start, unsigned int nbits)
{
if (__builtin_constant_p(nbits) && nbits == 1)
__clear_bit(start, map);
@@ -492,8 +496,9 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
__bitmap_clear(map, start, nbits);
}
-static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
- unsigned int shift, unsigned int nbits)
+static __always_inline
+void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
@@ -501,8 +506,9 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s
__bitmap_shift_right(dst, src, shift, nbits);
}
-static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
- unsigned int shift, unsigned int nbits)
+static __always_inline
+void bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
@@ -510,11 +516,12 @@ static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
__bitmap_shift_left(dst, src, shift, nbits);
}
-static inline void bitmap_replace(unsigned long *dst,
- const unsigned long *old,
- const unsigned long *new,
- const unsigned long *mask,
- unsigned int nbits)
+static __always_inline
+void bitmap_replace(unsigned long *dst,
+ const unsigned long *old,
+ const unsigned long *new,
+ const unsigned long *mask,
+ unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = (*old & ~(*mask)) | (*new & *mask);
@@ -557,8 +564,9 @@ static inline void bitmap_replace(unsigned long *dst,
* bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
* See bitmap_scatter() for details related to this relationship.
*/
-static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
- const unsigned long *mask, unsigned int nbits)
+static __always_inline
+void bitmap_scatter(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
{
unsigned int n = 0;
unsigned int bit;
@@ -611,8 +619,9 @@ static inline void bitmap_scatter(unsigned long *dst, const unsigned long *src,
* bitmap_scatter(res, src, mask, n) and a call to
* bitmap_scatter(res, result, mask, n) will lead to the same res value.
*/
-static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
- const unsigned long *mask, unsigned int nbits)
+static __always_inline
+void bitmap_gather(unsigned long *dst, const unsigned long *src,
+ const unsigned long *mask, unsigned int nbits)
{
unsigned int n = 0;
unsigned int bit;
@@ -623,9 +632,9 @@ static inline void bitmap_gather(unsigned long *dst, const unsigned long *src,
__assign_bit(n++, dst, test_bit(bit, src));
}
-static inline void bitmap_next_set_region(unsigned long *bitmap,
- unsigned int *rs, unsigned int *re,
- unsigned int end)
+static __always_inline
+void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs,
+ unsigned int *re, unsigned int end)
{
*rs = find_next_bit(bitmap, end, *rs);
*re = find_next_zero_bit(bitmap, end, *rs + 1);
@@ -640,7 +649,8 @@ static inline void bitmap_next_set_region(unsigned long *bitmap,
* This is the complement to __bitmap_find_free_region() and releases
* the found region (by clearing it in the bitmap).
*/
-static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
+static __always_inline
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order)
{
bitmap_clear(bitmap, pos, BIT(order));
}
@@ -656,7 +666,8 @@ static inline void bitmap_release_region(unsigned long *bitmap, unsigned int pos
* Returns: 0 on success, or %-EBUSY if specified region wasn't
* free (not all bits were zero).
*/
-static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
+static __always_inline
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
{
unsigned int len = BIT(order);
@@ -680,7 +691,8 @@ static inline int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos
* Returns: the bit offset in bitmap of the allocated region,
* or -errno on failure.
*/
-static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
+static __always_inline
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order)
{
unsigned int pos, end; /* scans bitmap by regions of size order */
@@ -734,7 +746,7 @@ static inline int bitmap_find_free_region(unsigned long *bitmap, unsigned int bi
* That is ``(u32 *)(&val)[0]`` gets the upper 32 bits,
* but we expect the lower 32-bits of u64.
*/
-static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
+static __always_inline void bitmap_from_u64(unsigned long *dst, u64 mask)
{
bitmap_from_arr64(dst, &mask, 64);
}
@@ -749,9 +761,8 @@ static inline void bitmap_from_u64(unsigned long *dst, u64 mask)
* @map memory region. For @nbits = 0 and @nbits > BITS_PER_LONG the return
* value is undefined.
*/
-static inline unsigned long bitmap_read(const unsigned long *map,
- unsigned long start,
- unsigned long nbits)
+static __always_inline
+unsigned long bitmap_read(const unsigned long *map, unsigned long start, unsigned long nbits)
{
size_t index = BIT_WORD(start);
unsigned long offset = start % BITS_PER_LONG;
@@ -784,8 +795,9 @@ static inline unsigned long bitmap_read(const unsigned long *map,
*
* For @nbits == 0 and @nbits > BITS_PER_LONG no writes are performed.
*/
-static inline void bitmap_write(unsigned long *map, unsigned long value,
- unsigned long start, unsigned long nbits)
+static __always_inline
+void bitmap_write(unsigned long *map, unsigned long value,
+ unsigned long start, unsigned long nbits)
{
size_t index;
unsigned long offset;
diff --git a/include/linux/bits.h b/include/linux/bits.h
index 0eb24d21aac2..60044b608817 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -36,4 +36,19 @@
#define GENMASK_ULL(h, l) \
(GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
+#if !defined(__ASSEMBLY__)
+/*
+ * Missing asm support
+ *
+ * __GENMASK_U128() depends on _BIT128() which would not work
+ * in the asm code, as it shifts an 'unsigned __init128' data
+ * type instead of direct representation of 128 bit constants
+ * such as long and unsigned long. The fundamental problem is
+ * that a 128 bit constant will get silently truncated by the
+ * gcc compiler.
+ */
+#define GENMASK_U128(h, l) \
+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l))
+#endif
+
#endif /* __LINUX_BITS_H */
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index de98049b7ded..c7eae0bfb013 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -25,9 +25,10 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
-int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
- struct scatterlist *);
+int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
+ ssize_t bytes);
static inline bool
blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -96,12 +97,17 @@ static inline int blk_rq_count_integrity_sg(struct request_queue *q,
{
return 0;
}
-static inline int blk_rq_map_integrity_sg(struct request_queue *q,
- struct bio *b,
+static inline int blk_rq_map_integrity_sg(struct request *q,
struct scatterlist *s)
{
return 0;
}
+static inline int blk_rq_integrity_map_user(struct request *rq,
+ void __user *ubuf,
+ ssize_t bytes)
+{
+ return -EINVAL;
+}
static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
{
return NULL;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8d304b1d16b1..c596e0e4cb75 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -149,19 +149,13 @@ struct request {
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
-
-#ifdef CONFIG_BLK_DEV_INTEGRITY
unsigned short nr_integrity_segments;
-#endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct bio_crypt_ctx *crypt_ctx;
struct blk_crypto_keyslot *crypt_keyslot;
#endif
- enum rw_hint write_hint;
- unsigned short ioprio;
-
enum mq_rq_state state;
atomic_t ref;
@@ -225,7 +219,9 @@ static inline bool blk_rq_is_passthrough(struct request *rq)
static inline unsigned short req_get_ioprio(struct request *req)
{
- return req->ioprio;
+ if (req->bio)
+ return req->bio->bi_ioprio;
+ return 0;
}
#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ)
@@ -233,62 +229,61 @@ static inline unsigned short req_get_ioprio(struct request *req)
#define rq_dma_dir(rq) \
(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
-#define rq_list_add(listptr, rq) do { \
- (rq)->rq_next = *(listptr); \
- *(listptr) = rq; \
-} while (0)
-
-#define rq_list_add_tail(lastpptr, rq) do { \
- (rq)->rq_next = NULL; \
- **(lastpptr) = rq; \
- *(lastpptr) = &rq->rq_next; \
-} while (0)
-
-#define rq_list_pop(listptr) \
-({ \
- struct request *__req = NULL; \
- if ((listptr) && *(listptr)) { \
- __req = *(listptr); \
- *(listptr) = __req->rq_next; \
- } \
- __req; \
-})
+static inline int rq_list_empty(const struct rq_list *rl)
+{
+ return rl->head == NULL;
+}
-#define rq_list_peek(listptr) \
-({ \
- struct request *__req = NULL; \
- if ((listptr) && *(listptr)) \
- __req = *(listptr); \
- __req; \
-})
+static inline void rq_list_init(struct rq_list *rl)
+{
+ rl->head = NULL;
+ rl->tail = NULL;
+}
-#define rq_list_for_each(listptr, pos) \
- for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos))
+static inline void rq_list_add_tail(struct rq_list *rl, struct request *rq)
+{
+ rq->rq_next = NULL;
+ if (rl->tail)
+ rl->tail->rq_next = rq;
+ else
+ rl->head = rq;
+ rl->tail = rq;
+}
-#define rq_list_for_each_safe(listptr, pos, nxt) \
- for (pos = rq_list_peek((listptr)), nxt = rq_list_next(pos); \
- pos; pos = nxt, nxt = pos ? rq_list_next(pos) : NULL)
+static inline void rq_list_add_head(struct rq_list *rl, struct request *rq)
+{
+ rq->rq_next = rl->head;
+ rl->head = rq;
+ if (!rl->tail)
+ rl->tail = rq;
+}
-#define rq_list_next(rq) (rq)->rq_next
-#define rq_list_empty(list) ((list) == (struct request *) NULL)
+static inline struct request *rq_list_pop(struct rq_list *rl)
+{
+ struct request *rq = rl->head;
-/**
- * rq_list_move() - move a struct request from one list to another
- * @src: The source list @rq is currently in
- * @dst: The destination list that @rq will be appended to
- * @rq: The request to move
- * @prev: The request preceding @rq in @src (NULL if @rq is the head)
- */
-static inline void rq_list_move(struct request **src, struct request **dst,
- struct request *rq, struct request *prev)
+ if (rq) {
+ rl->head = rl->head->rq_next;
+ if (!rl->head)
+ rl->tail = NULL;
+ rq->rq_next = NULL;
+ }
+
+ return rq;
+}
+
+static inline struct request *rq_list_peek(struct rq_list *rl)
{
- if (prev)
- prev->rq_next = rq->rq_next;
- else
- *src = rq->rq_next;
- rq_list_add(dst, rq);
+ return rl->head;
}
+#define rq_list_for_each(rl, pos) \
+ for (pos = rq_list_peek((rl)); (pos); pos = pos->rq_next)
+
+#define rq_list_for_each_safe(rl, pos, nxt) \
+ for (pos = rq_list_peek((rl)), nxt = pos->rq_next; \
+ pos; pos = nxt, nxt = pos ? pos->rq_next : NULL)
+
/**
* enum blk_eh_timer_return - How the timeout handler should proceed
* @BLK_EH_DONE: The block driver completed the command or will complete it at
@@ -580,7 +575,7 @@ struct blk_mq_ops {
* empty the @rqlist completely, then the rest will be queued
* individually by the block layer upon return.
*/
- void (*queue_rqs)(struct request **rqlist);
+ void (*queue_rqs)(struct rq_list *rqlist);
/**
* @get_budget: Reserve budget before queue request, once .queue_rq is
@@ -860,12 +855,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib);
*/
static inline bool blk_mq_need_time_stamp(struct request *rq)
{
- /*
- * passthrough io doesn't use iostat accounting, cgroup stats
- * and io scheduler functionalities.
- */
- if (blk_rq_is_passthrough(rq))
- return false;
return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED));
}
@@ -895,7 +884,7 @@ static inline bool blk_mq_add_to_batch(struct request *req,
else if (iob->complete != complete)
return false;
iob->need_ts |= blk_mq_need_time_stamp(req);
- rq_list_add(&iob->req_list, req);
+ rq_list_add_tail(&iob->req_list, req);
return true;
}
@@ -928,6 +917,8 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
+void blk_mq_unfreeze_queue_non_owner(struct request_queue *q);
+void blk_freeze_queue_start_non_owner(struct request_queue *q);
void blk_mq_map_queues(struct blk_mq_queue_map *qmap);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
@@ -992,7 +983,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
rq->nr_phys_segments = nr_segs;
rq->__data_len = bio->bi_iter.bi_size;
rq->bio = rq->biotail = bio;
- rq->ioprio = bio_prio(bio);
}
void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 413ebdff974b..dce7615c35e7 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -251,11 +251,9 @@ struct bio {
struct bio_crypt_ctx *bi_crypt_context;
#endif
- union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
- struct bio_integrity_payload *bi_integrity; /* data integrity */
+ struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
- };
unsigned short bi_vcnt; /* how many bio_vec's */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 643c9020a35a..a1fd0ddce5cf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@
#include <linux/uuid.h>
#include <linux/xarray.h>
#include <linux/file.h>
+#include <linux/lockdep.h>
struct module;
struct request_queue;
@@ -194,7 +195,7 @@ struct gendisk {
unsigned int nr_zones;
unsigned int zone_capacity;
unsigned int last_zone_capacity;
- unsigned long *conv_zones_bitmap;
+ unsigned long __rcu *conv_zones_bitmap;
unsigned int zone_wplugs_hash_bits;
spinlock_t zone_wplugs_lock;
struct mempool_s *zone_wplugs_pool;
@@ -349,6 +350,9 @@ typedef unsigned int __bitwise blk_flags_t;
/* I/O topology is misaligned */
#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
+/* passthrough command IO accounting */
+#define BLK_FLAG_IOSTATS_PASSTHROUGH ((__force blk_flags_t)(1u << 2))
+
struct queue_limits {
blk_features_t features;
blk_flags_t flags;
@@ -371,6 +375,7 @@ struct queue_limits {
unsigned int max_user_discard_sectors;
unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
+ unsigned int max_hw_zone_append_sectors;
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;
@@ -471,6 +476,11 @@ struct request_queue {
struct xarray hctx_table;
struct percpu_ref q_usage_counter;
+ struct lock_class_key io_lock_cls_key;
+ struct lockdep_map io_lockdep_map;
+
+ struct lock_class_key q_lock_cls_key;
+ struct lockdep_map q_lockdep_map;
struct request *last_merge;
@@ -566,6 +576,10 @@ struct request_queue {
struct throtl_data *td;
#endif
struct rcu_head rcu_head;
+#ifdef CONFIG_LOCKDEP
+ struct task_struct *mq_freeze_owner;
+ int mq_freeze_owner_depth;
+#endif
wait_queue_head_t mq_freeze_wq;
/*
* Protect concurrent access to q_usage_counter by
@@ -617,6 +631,8 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_passthrough_stat(q) \
+ ((q)->limits.flags & BLK_FLAG_IOSTATS_PASSTHROUGH)
#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -725,6 +741,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
#define for_each_bio(_bio) \
for (; _bio; _bio = _bio->bi_next)
+int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
+ const struct attribute_group **groups,
+ struct fwnode_handle *fwnode);
int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
const struct attribute_group **groups);
static inline int __must_check add_disk(struct gendisk *disk)
@@ -929,6 +948,7 @@ queue_limits_start_update(struct request_queue *q)
int queue_limits_commit_update(struct request_queue *q,
struct queue_limits *lim);
int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
+int blk_validate_limits(struct queue_limits *lim);
/**
* queue_limits_cancel_update - cancel an atomic update of queue limits
@@ -968,8 +988,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
/*
* Access functions for manipulating queue properties
*/
-extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
-extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
extern void blk_set_stacking_limits(struct queue_limits *lim);
extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
@@ -988,6 +1006,11 @@ extern void blk_put_queue(struct request_queue *);
void blk_mark_disk_dead(struct gendisk *disk);
+struct rq_list {
+ struct request *head;
+ struct request *tail;
+};
+
#ifdef CONFIG_BLOCK
/*
* blk_plug permits building a queue of related requests by holding the I/O
@@ -1001,10 +1024,10 @@ void blk_mark_disk_dead(struct gendisk *disk);
* blk_flush_plug() is called.
*/
struct blk_plug {
- struct request *mq_list; /* blk-mq requests */
+ struct rq_list mq_list; /* blk-mq requests */
/* if ios_left is > 1, we can batch tag/rq allocations */
- struct request *cached_rq;
+ struct rq_list cached_rqs;
u64 cur_ktime;
unsigned short nr_ios;
@@ -1147,6 +1170,11 @@ enum blk_default_limits {
*/
#define BLK_DEF_MAX_SECTORS_CAP 2560u
+static inline struct queue_limits *bdev_limits(struct block_device *bdev)
+{
+ return &bdev_get_queue(bdev)->limits;
+}
+
static inline unsigned long queue_segment_boundary(const struct request_queue *q)
{
return q->limits.seg_boundary_mask;
@@ -1187,25 +1215,9 @@ static inline unsigned int queue_max_segment_size(const struct request_queue *q)
return q->limits.max_segment_size;
}
-static inline unsigned int
-queue_limits_max_zone_append_sectors(const struct queue_limits *l)
-{
- unsigned int max_sectors = min(l->chunk_sectors, l->max_hw_sectors);
-
- return min_not_zero(l->max_zone_append_sectors, max_sectors);
-}
-
-static inline unsigned int queue_max_zone_append_sectors(struct request_queue *q)
-{
- if (!blk_queue_is_zoned(q))
- return 0;
-
- return queue_limits_max_zone_append_sectors(&q->limits);
-}
-
static inline bool queue_emulates_zone_append(struct request_queue *q)
{
- return blk_queue_is_zoned(q) && !q->limits.max_zone_append_sectors;
+ return blk_queue_is_zoned(q) && !q->limits.max_hw_zone_append_sectors;
}
static inline bool bdev_emulates_zone_append(struct block_device *bdev)
@@ -1216,7 +1228,7 @@ static inline bool bdev_emulates_zone_append(struct block_device *bdev)
static inline unsigned int
bdev_max_zone_append_sectors(struct block_device *bdev)
{
- return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ return bdev_limits(bdev)->max_zone_append_sectors;
}
static inline unsigned int bdev_max_segments(struct block_device *bdev)
@@ -1281,23 +1293,23 @@ unsigned int bdev_discard_alignment(struct block_device *bdev);
static inline unsigned int bdev_max_discard_sectors(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.max_discard_sectors;
+ return bdev_limits(bdev)->max_discard_sectors;
}
static inline unsigned int bdev_discard_granularity(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.discard_granularity;
+ return bdev_limits(bdev)->discard_granularity;
}
static inline unsigned int
bdev_max_secure_erase_sectors(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.max_secure_erase_sectors;
+ return bdev_limits(bdev)->max_secure_erase_sectors;
}
static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors;
+ return bdev_limits(bdev)->max_write_zeroes_sectors;
}
static inline bool bdev_nonrot(struct block_device *bdev)
@@ -1333,7 +1345,7 @@ static inline bool bdev_write_cache(struct block_device *bdev)
static inline bool bdev_fua(struct block_device *bdev)
{
- return bdev_get_queue(bdev)->limits.features & BLK_FEAT_FUA;
+ return bdev_limits(bdev)->features & BLK_FEAT_FUA;
}
static inline bool bdev_nowait(struct block_device *bdev)
@@ -1378,6 +1390,33 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
return bdev_offset_from_zone_start(bdev, sector) == 0;
}
+/**
+ * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
+ * @bdev: block device to check
+ * @sector: sector number
+ *
+ * Check if @sector on @bdev is contained in a sequential write required zone.
+ */
+static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
+{
+ bool is_seq = false;
+
+#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
+ if (bdev_is_zoned(bdev)) {
+ struct gendisk *disk = bdev->bd_disk;
+ unsigned long *bitmap;
+
+ rcu_read_lock();
+ bitmap = rcu_dereference(disk->conv_zones_bitmap);
+ is_seq = !bitmap ||
+ !test_bit(disk_zone_no(disk, sector), bitmap);
+ rcu_read_unlock();
+ }
+#endif
+
+ return is_seq;
+}
+
static inline int queue_dma_alignment(const struct request_queue *q)
{
return q->limits.dma_alignment;
@@ -1650,7 +1689,7 @@ int bdev_thaw(struct block_device *bdev);
void bdev_fput(struct file *bdev_file);
struct io_comp_batch {
- struct request *req_list;
+ struct rq_list req_list;
bool need_ts;
void (*complete)(struct io_comp_batch *);
};
@@ -1676,6 +1715,22 @@ static inline bool bdev_can_atomic_write(struct block_device *bdev)
return true;
}
+static inline unsigned int
+bdev_atomic_write_unit_min_bytes(struct block_device *bdev)
+{
+ if (!bdev_can_atomic_write(bdev))
+ return 0;
+ return queue_atomic_write_unit_min_bytes(bdev_get_queue(bdev));
+}
+
+static inline unsigned int
+bdev_atomic_write_unit_max_bytes(struct block_device *bdev)
+{
+ if (!bdev_can_atomic_write(bdev))
+ return 0;
+ return queue_atomic_write_unit_max_bytes(bdev_get_queue(bdev));
+}
+
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
#endif /* _LINUX_BLKDEV_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0c3893c47171..bdadb0bb6cec 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -635,6 +635,7 @@ enum bpf_type_flag {
*/
PTR_UNTRUSTED = BIT(6 + BPF_BASE_TYPE_BITS),
+ /* MEM can be uninitialized. */
MEM_UNINIT = BIT(7 + BPF_BASE_TYPE_BITS),
/* DYNPTR points to memory local to the bpf program. */
@@ -700,6 +701,13 @@ enum bpf_type_flag {
*/
MEM_ALIGNED = BIT(17 + BPF_BASE_TYPE_BITS),
+ /* MEM is being written to, often combined with MEM_UNINIT. Non-presence
+ * of MEM_WRITE means that MEM is only being read. MEM_WRITE without the
+ * MEM_UNINIT means that memory needs to be initialized since it is also
+ * read.
+ */
+ MEM_WRITE = BIT(18 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
@@ -758,10 +766,10 @@ enum bpf_arg_type {
ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET,
ARG_PTR_TO_STACK_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_STACK,
ARG_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_BTF_ID,
- /* pointer to memory does not need to be initialized, helper function must fill
- * all bytes or clear them in error case.
+ /* Pointer to memory does not need to be initialized, since helper function
+ * fills all bytes or clears them in error case.
*/
- ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | ARG_PTR_TO_MEM,
+ ARG_PTR_TO_UNINIT_MEM = MEM_UNINIT | MEM_WRITE | ARG_PTR_TO_MEM,
/* Pointer to valid memory of size known at compile time. */
ARG_PTR_TO_FIXED_SIZE_MEM = MEM_FIXED_SIZE | ARG_PTR_TO_MEM,
@@ -2246,7 +2254,16 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu);
struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
-struct bpf_map *__bpf_map_get(struct fd f);
+
+static inline struct bpf_map *__bpf_map_get(struct fd f)
+{
+ if (fd_empty(f))
+ return ERR_PTR(-EBADF);
+ if (unlikely(fd_file(f)->f_op != &bpf_map_fops))
+ return ERR_PTR(-EINVAL);
+ return fd_file(f)->private_data;
+}
+
void bpf_map_inc(struct bpf_map *map);
void bpf_map_inc_with_uref(struct bpf_map *map);
struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
diff --git a/include/linux/bpf_mem_alloc.h b/include/linux/bpf_mem_alloc.h
index aaf004d94322..e45162ef59bb 100644
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);
+/* Check the allocation size for kmalloc equivalent allocator */
+int bpf_mem_alloc_check_size(bool percpu, size_t size);
+
/* kmalloc/kfree equivalent: */
void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9f2a6b83b49e..fa78f49d4a9a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -146,6 +146,7 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp)
BPF_LINK_TYPE(BPF_LINK_TYPE_NETFILTER, netfilter)
BPF_LINK_TYPE(BPF_LINK_TYPE_TCX, tcx)
BPF_LINK_TYPE(BPF_LINK_TYPE_NETKIT, netkit)
+BPF_LINK_TYPE(BPF_LINK_TYPE_SOCKMAP, sockmap)
#endif
#ifdef CONFIG_PERF_EVENTS
BPF_LINK_TYPE(BPF_LINK_TYPE_PERF_EVENT, perf)
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 04f3ace5787b..8fc1aed64113 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -6,7 +6,7 @@
#include <linux/bug.h>
#include <linux/slab.h>
#include <linux/time.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/ceph/types.h>
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 4497d0a6772c..15fb566d3f46 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -4,7 +4,7 @@
#include <linux/ceph/ceph_debug.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/backing-dev.h>
#include <linux/completion.h>
#include <linux/exportfs.h>
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f66f6aac74f6..d7941478158c 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -449,8 +449,6 @@ extern int ceph_osdc_init(struct ceph_osd_client *osdc,
extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
-extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
- struct ceph_msg *msg);
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
struct ceph_msg *msg);
void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 47ae4c4d924c..1b20d2d8ef7c 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -327,6 +327,7 @@ struct cgroup_base_stat {
#ifdef CONFIG_SCHED_CORE
u64 forceidle_sum;
#endif
+ u64 ntime;
};
/*
@@ -397,7 +398,7 @@ struct cgroup_freezer_state {
bool freeze;
/* Should the cgroup actually be frozen? */
- int e_freeze;
+ bool e_freeze;
/* Fields below are protected by css_set_lock */
diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h
index a3d3e888cf1f..966fcc5ff8ef 100644
--- a/include/linux/cleanup.h
+++ b/include/linux/cleanup.h
@@ -4,6 +4,142 @@
#include <linux/compiler.h>
+/**
+ * DOC: scope-based cleanup helpers
+ *
+ * The "goto error" pattern is notorious for introducing subtle resource
+ * leaks. It is tedious and error prone to add new resource acquisition
+ * constraints into code paths that already have several unwind
+ * conditions. The "cleanup" helpers enable the compiler to help with
+ * this tedium and can aid in maintaining LIFO (last in first out)
+ * unwind ordering to avoid unintentional leaks.
+ *
+ * As drivers make up the majority of the kernel code base, here is an
+ * example of using these helpers to clean up PCI drivers. The target of
+ * the cleanups are occasions where a goto is used to unwind a device
+ * reference (pci_dev_put()), or unlock the device (pci_dev_unlock())
+ * before returning.
+ *
+ * The DEFINE_FREE() macro can arrange for PCI device references to be
+ * dropped when the associated variable goes out of scope::
+ *
+ * DEFINE_FREE(pci_dev_put, struct pci_dev *, if (_T) pci_dev_put(_T))
+ * ...
+ * struct pci_dev *dev __free(pci_dev_put) =
+ * pci_get_slot(parent, PCI_DEVFN(0, 0));
+ *
+ * The above will automatically call pci_dev_put() if @dev is non-NULL
+ * when @dev goes out of scope (automatic variable scope). If a function
+ * wants to invoke pci_dev_put() on error, but return @dev (i.e. without
+ * freeing it) on success, it can do::
+ *
+ * return no_free_ptr(dev);
+ *
+ * ...or::
+ *
+ * return_ptr(dev);
+ *
+ * The DEFINE_GUARD() macro can arrange for the PCI device lock to be
+ * dropped when the scope where guard() is invoked ends::
+ *
+ * DEFINE_GUARD(pci_dev, struct pci_dev *, pci_dev_lock(_T), pci_dev_unlock(_T))
+ * ...
+ * guard(pci_dev)(dev);
+ *
+ * The lifetime of the lock obtained by the guard() helper follows the
+ * scope of automatic variable declaration. Take the following example::
+ *
+ * func(...)
+ * {
+ * if (...) {
+ * ...
+ * guard(pci_dev)(dev); // pci_dev_lock() invoked here
+ * ...
+ * } // <- implied pci_dev_unlock() triggered here
+ * }
+ *
+ * Observe the lock is held for the remainder of the "if ()" block not
+ * the remainder of "func()".
+ *
+ * Now, when a function uses both __free() and guard(), or multiple
+ * instances of __free(), the LIFO order of variable definition order
+ * matters. GCC documentation says:
+ *
+ * "When multiple variables in the same scope have cleanup attributes,
+ * at exit from the scope their associated cleanup functions are run in
+ * reverse order of definition (last defined, first cleanup)."
+ *
+ * When the unwind order matters it requires that variables be defined
+ * mid-function scope rather than at the top of the file. Take the
+ * following example and notice the bug highlighted by "!!"::
+ *
+ * LIST_HEAD(list);
+ * DEFINE_MUTEX(lock);
+ *
+ * struct object {
+ * struct list_head node;
+ * };
+ *
+ * static struct object *alloc_add(void)
+ * {
+ * struct object *obj;
+ *
+ * lockdep_assert_held(&lock);
+ * obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ * if (obj) {
+ * LIST_HEAD_INIT(&obj->node);
+ * list_add(obj->node, &list):
+ * }
+ * return obj;
+ * }
+ *
+ * static void remove_free(struct object *obj)
+ * {
+ * lockdep_assert_held(&lock);
+ * list_del(&obj->node);
+ * kfree(obj);
+ * }
+ *
+ * DEFINE_FREE(remove_free, struct object *, if (_T) remove_free(_T))
+ * static int init(void)
+ * {
+ * struct object *obj __free(remove_free) = NULL;
+ * int err;
+ *
+ * guard(mutex)(&lock);
+ * obj = alloc_add();
+ *
+ * if (!obj)
+ * return -ENOMEM;
+ *
+ * err = other_init(obj);
+ * if (err)
+ * return err; // remove_free() called without the lock!!
+ *
+ * no_free_ptr(obj);
+ * return 0;
+ * }
+ *
+ * That bug is fixed by changing init() to call guard() and define +
+ * initialize @obj in this order::
+ *
+ * guard(mutex)(&lock);
+ * struct object *obj __free(remove_free) = alloc_add();
+ *
+ * Given that the "__free(...) = NULL" pattern for variables defined at
+ * the top of the function poses this potential interdependency problem
+ * the recommendation is to always define and assign variables in one
+ * statement and not group variable definitions at the top of the
+ * function when __free() is used.
+ *
+ * Lastly, given that the benefit of cleanup helpers is removal of
+ * "goto", and that the "goto" statement can jump between scopes, the
+ * expectation is that usage of "goto" and cleanup helpers is never
+ * mixed in the same function. I.e. for a given routine, convert all
+ * resources that need a "goto" cleanup to scope-based cleanup, or
+ * convert none of them.
+ */
+
/*
* DEFINE_FREE(name, type, free):
* simple helper macro that defines the required wrapper for a __free()
@@ -98,7 +234,7 @@ const volatile void * __must_check_fn(const volatile void *val)
* DEFINE_CLASS(fdget, struct fd, fdput(_T), fdget(fd), int fd)
*
* CLASS(fdget, f)(fd);
- * if (!fd_file(f))
+ * if (fd_empty(f))
* return -EBADF;
*
* // use 'f' without concern
@@ -137,6 +273,12 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
* an anonymous instance of the (guard) class, not recommended for
* conditional locks.
*
+ * if_not_guard(name, args...) { <error handling> }:
+ * convenience macro for conditional guards that calls the statement that
+ * follows only if the lock was not acquired (typically an error return).
+ *
+ * Only for conditional locks.
+ *
* scoped_guard (name, args...) { }:
* similar to CLASS(name, scope)(args), except the variable (with the
* explicit name 'scope') is declard in a for-loop such that its scope is
@@ -149,14 +291,20 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
* similar to scoped_guard(), except it does fail when the lock
* acquire fails.
*
+ * Only for conditional locks.
*/
+#define __DEFINE_CLASS_IS_CONDITIONAL(_name, _is_cond) \
+static __maybe_unused const bool class_##_name##_is_conditional = _is_cond
+
#define DEFINE_GUARD(_name, _type, _lock, _unlock) \
+ __DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
DEFINE_CLASS(_name, _type, if (_T) { _unlock; }, ({ _lock; _T; }), _type _T); \
static inline void * class_##_name##_lock_ptr(class_##_name##_t *_T) \
- { return *_T; }
+ { return (void *)(__force unsigned long)*_T; }
#define DEFINE_GUARD_COND(_name, _ext, _condlock) \
+ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
EXTEND_CLASS(_name, _ext, \
({ void *_t = _T; if (_T && !(_condlock)) _t = NULL; _t; }), \
class_##_name##_t _T) \
@@ -167,16 +315,48 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \
CLASS(_name, __UNIQUE_ID(guard))
#define __guard_ptr(_name) class_##_name##_lock_ptr
+#define __is_cond_ptr(_name) class_##_name##_is_conditional
+
+/*
+ * Helper macro for scoped_guard().
+ *
+ * Note that the "!__is_cond_ptr(_name)" part of the condition ensures that
+ * compiler would be sure that for the unconditional locks the body of the
+ * loop (caller-provided code glued to the else clause) could not be skipped.
+ * It is needed because the other part - "__guard_ptr(_name)(&scope)" - is too
+ * hard to deduce (even if could be proven true for unconditional locks).
+ */
+#define __scoped_guard(_name, _label, args...) \
+ for (CLASS(_name, scope)(args); \
+ __guard_ptr(_name)(&scope) || !__is_cond_ptr(_name); \
+ ({ goto _label; })) \
+ if (0) { \
+_label: \
+ break; \
+ } else
+
+#define scoped_guard(_name, args...) \
+ __scoped_guard(_name, __UNIQUE_ID(label), args)
+
+#define __scoped_cond_guard(_name, _fail, _label, args...) \
+ for (CLASS(_name, scope)(args); true; ({ goto _label; })) \
+ if (!__guard_ptr(_name)(&scope)) { \
+ BUILD_BUG_ON(!__is_cond_ptr(_name)); \
+ _fail; \
+_label: \
+ break; \
+ } else
+
+#define scoped_cond_guard(_name, _fail, args...) \
+ __scoped_cond_guard(_name, _fail, __UNIQUE_ID(label), args)
-#define scoped_guard(_name, args...) \
- for (CLASS(_name, scope)(args), \
- *done = NULL; __guard_ptr(_name)(&scope) && !done; done = (void *)1)
+#define __if_not_guard(_name, _id, args...) \
+ BUILD_BUG_ON(!__is_cond_ptr(_name)); \
+ CLASS(_name, _id)(args); \
+ if (!__guard_ptr(_name)(&_id))
-#define scoped_cond_guard(_name, _fail, args...) \
- for (CLASS(_name, scope)(args), \
- *done = NULL; !done; done = (void *)1) \
- if (!__guard_ptr(_name)(&scope)) _fail; \
- else
+#define if_not_guard(_name, args...) \
+ __if_not_guard(_name, __UNIQUE_ID(guard), args)
/*
* Additional helper macros for generating lock guards with types, either for
@@ -211,7 +391,7 @@ static inline void class_##_name##_destructor(class_##_name##_t *_T) \
\
static inline void *class_##_name##_lock_ptr(class_##_name##_t *_T) \
{ \
- return _T->lock; \
+ return (void *)(__force unsigned long)_T->lock; \
}
@@ -233,14 +413,17 @@ static inline class_##_name##_t class_##_name##_constructor(void) \
}
#define DEFINE_LOCK_GUARD_1(_name, _type, _lock, _unlock, ...) \
+__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
__DEFINE_UNLOCK_GUARD(_name, _type, _unlock, __VA_ARGS__) \
__DEFINE_LOCK_GUARD_1(_name, _type, _lock)
#define DEFINE_LOCK_GUARD_0(_name, _lock, _unlock, ...) \
+__DEFINE_CLASS_IS_CONDITIONAL(_name, false); \
__DEFINE_UNLOCK_GUARD(_name, void, _unlock, __VA_ARGS__) \
__DEFINE_LOCK_GUARD_0(_name, _lock)
#define DEFINE_LOCK_GUARD_1_COND(_name, _ext, _condlock) \
+ __DEFINE_CLASS_IS_CONDITIONAL(_name##_ext, true); \
EXTEND_CLASS(_name, _ext, \
({ class_##_name##_t _t = { .lock = l }, *_T = &_t;\
if (_T->lock && !(_condlock)) _T->lock = NULL; \
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index d35b677b08fe..ef1b16da6ad5 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -215,7 +215,6 @@ static inline s64 clocksource_cyc2ns(u64 cycles, u32 mult, u32 shift)
extern int clocksource_unregister(struct clocksource*);
extern void clocksource_touch_watchdog(void);
-extern void clocksource_change_rating(struct clocksource *cs, int rating);
extern void clocksource_suspend(void);
extern void clocksource_resume(void);
extern struct clocksource * __init clocksource_default_clock(void);
diff --git a/include/linux/closure.h b/include/linux/closure.h
index 2af44427107d..880fe85e35e9 100644
--- a/include/linux/closure.h
+++ b/include/linux/closure.h
@@ -454,4 +454,39 @@ do { \
__closure_wait_event(waitlist, _cond); \
} while (0)
+#define __closure_wait_event_timeout(waitlist, _cond, _until) \
+({ \
+ struct closure cl; \
+ long _t; \
+ \
+ closure_init_stack(&cl); \
+ \
+ while (1) { \
+ closure_wait(waitlist, &cl); \
+ if (_cond) { \
+ _t = max_t(long, 1L, _until - jiffies); \
+ break; \
+ } \
+ _t = max_t(long, 0L, _until - jiffies); \
+ if (!_t) \
+ break; \
+ closure_sync_timeout(&cl, _t); \
+ } \
+ closure_wake_up(waitlist); \
+ closure_sync(&cl); \
+ _t; \
+})
+
+/*
+ * Returns 0 if timeout expired, remaining time in jiffies (at least 1) if
+ * condition became true
+ */
+#define closure_wait_event_timeout(waitlist, _cond, _timeout) \
+({ \
+ unsigned long _until = jiffies + _timeout; \
+ (_cond) \
+ ? max_t(long, 1L, _until - jiffies) \
+ : __closure_wait_event_timeout(waitlist, _cond, _until);\
+})
+
#endif /* _LINUX_CLOSURE_H */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index f805adaa316e..cd6f9aae311f 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -80,7 +80,11 @@
#define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
#endif
+#ifdef __SANITIZE_HWADDRESS__
+#define __no_sanitize_address __attribute__((__no_sanitize__("hwaddress")))
+#else
#define __no_sanitize_address __attribute__((__no_sanitize_address__))
+#endif
#if defined(__SANITIZE_THREAD__)
#define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ec55bcce4146..4d4e23b6e3e7 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -133,7 +133,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
#define annotate_unreachable() __annotate_unreachable(__COUNTER__)
/* Annotate a C jump table to allow objtool to follow the code flow */
-#define __annotate_jump_table __section(".rodata..c_jump_table")
+#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #")
#else /* !CONFIG_OBJTOOL */
#define annotate_reachable()
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index edeb8532ce0f..45e598fe3476 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -42,7 +42,7 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
extern int dump_align(struct coredump_params *cprm, int align);
int dump_user_range(struct coredump_params *cprm, unsigned long start,
unsigned long len);
-extern int do_coredump(const kernel_siginfo_t *siginfo);
+extern void do_coredump(const kernel_siginfo_t *siginfo);
/*
* Logging for the coredump code, ratelimited.
@@ -62,11 +62,7 @@ extern int do_coredump(const kernel_siginfo_t *siginfo);
#define coredump_report_failure(fmt, ...) __COREDUMP_PRINTK(KERN_WARNING, fmt, ##__VA_ARGS__)
#else
-static inline int do_coredump(const kernel_siginfo_t *siginfo)
-{
- /* Coredump support is not available, can't fail. */
- return 0;
-}
+static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
#define coredump_report(...)
#define coredump_report_failure(...)
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index 51ac441a37c3..89b0ac0014b0 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -49,12 +49,21 @@
* Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
* Used to associate a CPU with the CoreSight Trace ID.
* [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
- * [59:08] - Unused (SBZ)
- * [63:60] - Version
+ * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
+ * Added in minor version 1.
+ * [55:40] - Unused (SBZ)
+ * [59:56] - Minor Version - previously existing fields are compatible with
+ * all minor versions.
+ * [63:60] - Major Version - previously existing fields mean different things
+ * in new major versions.
*/
#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
-#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
+#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
-#define CS_AUX_HW_ID_CURR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
+#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
+
+#define CS_AUX_HW_ID_MAJOR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION 1
#endif
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index f09ace92176e..c13342594278 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -218,6 +218,24 @@ struct coresight_sysfs_link {
const char *target_name;
};
+/* architecturally we have 128 IDs some of which are reserved */
+#define CORESIGHT_TRACE_IDS_MAX 128
+
+/**
+ * Trace ID map.
+ *
+ * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
+ * Initialised so that the reserved IDs are permanently marked as
+ * in use.
+ * @perf_cs_etm_session_active: Number of Perf sessions using this ID map.
+ */
+struct coresight_trace_id_map {
+ DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
+ atomic_t __percpu *cpu_map;
+ atomic_t perf_cs_etm_session_active;
+ spinlock_t lock;
+};
+
/**
* struct coresight_device - representation of a device as used by the framework
* @pdata: Platform data with device connections associated to this device.
@@ -271,6 +289,7 @@ struct coresight_device {
bool sysfs_sink_activated;
struct dev_ext_attribute *ea;
struct coresight_device *def_sink;
+ struct coresight_trace_id_map perf_sink_id_map;
/* sysfs links between components */
int nr_links;
bool has_conns_grp;
@@ -365,7 +384,7 @@ struct coresight_ops_link {
struct coresight_ops_source {
int (*cpu_id)(struct coresight_device *csdev);
int (*enable)(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode);
+ enum cs_mode mode, struct coresight_trace_id_map *id_map);
void (*disable)(struct coresight_device *csdev,
struct perf_event *event);
};
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index e0e19d9c1323..7fe0981a7e46 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1107,10 +1107,9 @@ static inline int parse_perf_domain(int cpu, const char *list_name,
const char *cell_name,
struct of_phandle_args *args)
{
- struct device_node *cpu_np;
int ret;
- cpu_np = of_cpu_device_node_get(cpu);
+ struct device_node *cpu_np __free(device_node) = of_cpu_device_node_get(cpu);
if (!cpu_np)
return -ENODEV;
@@ -1118,9 +1117,6 @@ static inline int parse_perf_domain(int cpu, const char *list_name,
args);
if (ret < 0)
return ret;
-
- of_node_put(cpu_np);
-
return 0;
}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 2361ed4d2b15..a04b73c40173 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -147,6 +147,7 @@ enum cpuhp_state {
CPUHP_AP_IRQ_EIOINTC_STARTING,
CPUHP_AP_IRQ_AVECINTC_STARTING,
CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING,
+ CPUHP_AP_IRQ_THEAD_ACLINT_SSWI_STARTING,
CPUHP_AP_IRQ_RISCV_IMSIC_STARTING,
CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING,
CPUHP_AP_ARM_MVEBU_COHERENCY,
@@ -208,7 +209,6 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
CPUHP_AP_PERF_X86_AMD_POWER_ONLINE,
- CPUHP_AP_PERF_X86_RAPL_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
@@ -227,6 +227,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_APM_XGENE_ONLINE,
CPUHP_AP_PERF_ARM_CAVIUM_TX2_UNCORE_ONLINE,
CPUHP_AP_PERF_ARM_MARVELL_CN10K_DDR_ONLINE,
+ CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 53158de44b83..9278a50d514f 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -30,7 +30,7 @@
extern unsigned int nr_cpu_ids;
#endif
-static inline void set_nr_cpu_ids(unsigned int nr)
+static __always_inline void set_nr_cpu_ids(unsigned int nr)
{
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
WARN_ON(nr != nr_cpu_ids);
@@ -149,7 +149,7 @@ static __always_inline unsigned int cpumask_check(unsigned int cpu)
*
* Return: >= nr_cpu_ids if no cpus set.
*/
-static inline unsigned int cpumask_first(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_first(const struct cpumask *srcp)
{
return find_first_bit(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -160,7 +160,7 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if all cpus are set.
*/
-static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
{
return find_first_zero_bit(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -172,7 +172,7 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
*/
-static inline
+static __always_inline
unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
{
return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
@@ -186,7 +186,7 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask
*
* Return: >= nr_cpu_ids if no cpus set in all.
*/
-static inline
+static __always_inline
unsigned int cpumask_first_and_and(const struct cpumask *srcp1,
const struct cpumask *srcp2,
const struct cpumask *srcp3)
@@ -201,7 +201,7 @@ unsigned int cpumask_first_and_and(const struct cpumask *srcp1,
*
* Return: >= nr_cpumask_bits if no CPUs set.
*/
-static inline unsigned int cpumask_last(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_last(const struct cpumask *srcp)
{
return find_last_bit(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -213,7 +213,7 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if no further cpus set.
*/
-static inline
+static __always_inline
unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
@@ -229,7 +229,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
*
* Return: >= nr_cpu_ids if no further cpus unset.
*/
-static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
if (n != -1)
@@ -239,18 +240,21 @@ static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
#if NR_CPUS == 1
/* Uniprocessor: there is only one valid CPU */
-static inline unsigned int cpumask_local_spread(unsigned int i, int node)
+static __always_inline
+unsigned int cpumask_local_spread(unsigned int i, int node)
{
return 0;
}
-static inline unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+unsigned int cpumask_any_and_distribute(const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
return cpumask_first_and(src1p, src2p);
}
-static inline unsigned int cpumask_any_distribute(const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_any_distribute(const struct cpumask *srcp)
{
return cpumask_first(srcp);
}
@@ -269,9 +273,9 @@ unsigned int cpumask_any_distribute(const struct cpumask *srcp);
*
* Return: >= nr_cpu_ids if no further cpus set in both.
*/
-static inline
+static __always_inline
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
- const struct cpumask *src2p)
+ const struct cpumask *src2p)
{
/* -1 is a legal arg here. */
if (n != -1)
@@ -291,7 +295,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
#if NR_CPUS == 1
-static inline
+static __always_inline
unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
{
cpumask_check(start);
@@ -394,7 +398,7 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* Often used to find any cpu but smp_processor_id() in a mask.
* Return: >= nr_cpu_ids if no cpus set.
*/
-static inline
+static __always_inline
unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
{
unsigned int i;
@@ -414,7 +418,7 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
*
* Returns >= nr_cpu_ids if no cpus set.
*/
-static inline
+static __always_inline
unsigned int cpumask_any_and_but(const struct cpumask *mask1,
const struct cpumask *mask2,
unsigned int cpu)
@@ -436,7 +440,8 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1,
*
* Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
-static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
+static __always_inline
+unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
return find_nth_bit(cpumask_bits(srcp), small_cpumask_bits, cpumask_check(cpu));
}
@@ -449,7 +454,7 @@ static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *s
*
* Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
-static inline
+static __always_inline
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
@@ -465,7 +470,7 @@ unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
*
* Return: >= nr_cpu_ids if such cpu doesn't exist.
*/
-static inline
+static __always_inline
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
@@ -508,12 +513,14 @@ unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp
* @cpu: cpu number (< nr_cpu_ids)
* @dstp: the cpumask pointer
*/
-static __always_inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline
+void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
-static __always_inline void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+static __always_inline
+void __cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
{
__set_bit(cpumask_check(cpu), cpumask_bits(dstp));
}
@@ -557,7 +564,8 @@ static __always_inline void __cpumask_assign_cpu(int cpu, struct cpumask *dstp,
*
* Return: true if @cpu is set in @cpumask, else returns false
*/
-static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_cpu(int cpu, const struct cpumask *cpumask)
{
return test_bit(cpumask_check(cpu), cpumask_bits((cpumask)));
}
@@ -571,7 +579,8 @@ static __always_inline bool cpumask_test_cpu(int cpu, const struct cpumask *cpum
*
* Return: true if @cpu is set in old bitmap of @cpumask, else returns false
*/
-static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
{
return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}
@@ -585,7 +594,8 @@ static __always_inline bool cpumask_test_and_set_cpu(int cpu, struct cpumask *cp
*
* Return: true if @cpu is set in old bitmap of @cpumask, else returns false
*/
-static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+static __always_inline
+bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
{
return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
}
@@ -594,7 +604,7 @@ static __always_inline bool cpumask_test_and_clear_cpu(int cpu, struct cpumask *
* cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
* @dstp: the cpumask pointer
*/
-static inline void cpumask_setall(struct cpumask *dstp)
+static __always_inline void cpumask_setall(struct cpumask *dstp)
{
if (small_const_nbits(small_cpumask_bits)) {
cpumask_bits(dstp)[0] = BITMAP_LAST_WORD_MASK(nr_cpumask_bits);
@@ -607,7 +617,7 @@ static inline void cpumask_setall(struct cpumask *dstp)
* cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
* @dstp: the cpumask pointer
*/
-static inline void cpumask_clear(struct cpumask *dstp)
+static __always_inline void cpumask_clear(struct cpumask *dstp)
{
bitmap_zero(cpumask_bits(dstp), large_cpumask_bits);
}
@@ -620,9 +630,9 @@ static inline void cpumask_clear(struct cpumask *dstp)
*
* Return: false if *@dstp is empty, else returns true
*/
-static inline bool cpumask_and(struct cpumask *dstp,
- const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_and(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -634,8 +644,9 @@ static inline bool cpumask_and(struct cpumask *dstp,
* @src1p: the first input
* @src2p: the second input
*/
-static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -647,9 +658,9 @@ static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
* @src1p: the first input
* @src2p: the second input
*/
-static inline void cpumask_xor(struct cpumask *dstp,
- const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+void cpumask_xor(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -663,9 +674,9 @@ static inline void cpumask_xor(struct cpumask *dstp,
*
* Return: false if *@dstp is empty, else returns true
*/
-static inline bool cpumask_andnot(struct cpumask *dstp,
- const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p,
+ const struct cpumask *src2p)
{
return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
cpumask_bits(src2p), small_cpumask_bits);
@@ -678,8 +689,8 @@ static inline bool cpumask_andnot(struct cpumask *dstp,
*
* Return: true if the cpumasks are equal, false if not
*/
-static inline bool cpumask_equal(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_equal(const struct cpumask *src1p, const struct cpumask *src2p)
{
return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits);
@@ -694,9 +705,9 @@ static inline bool cpumask_equal(const struct cpumask *src1p,
* Return: true if first cpumask ORed with second cpumask == third cpumask,
* otherwise false
*/
-static inline bool cpumask_or_equal(const struct cpumask *src1p,
- const struct cpumask *src2p,
- const struct cpumask *src3p)
+static __always_inline
+bool cpumask_or_equal(const struct cpumask *src1p, const struct cpumask *src2p,
+ const struct cpumask *src3p)
{
return bitmap_or_equal(cpumask_bits(src1p), cpumask_bits(src2p),
cpumask_bits(src3p), small_cpumask_bits);
@@ -710,8 +721,8 @@ static inline bool cpumask_or_equal(const struct cpumask *src1p,
* Return: true if first cpumask ANDed with second cpumask is non-empty,
* otherwise false
*/
-static inline bool cpumask_intersects(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_intersects(const struct cpumask *src1p, const struct cpumask *src2p)
{
return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits);
@@ -724,8 +735,8 @@ static inline bool cpumask_intersects(const struct cpumask *src1p,
*
* Return: true if *@src1p is a subset of *@src2p, else returns false
*/
-static inline bool cpumask_subset(const struct cpumask *src1p,
- const struct cpumask *src2p)
+static __always_inline
+bool cpumask_subset(const struct cpumask *src1p, const struct cpumask *src2p)
{
return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits);
@@ -737,7 +748,7 @@ static inline bool cpumask_subset(const struct cpumask *src1p,
*
* Return: true if srcp is empty (has no bits set), else false
*/
-static inline bool cpumask_empty(const struct cpumask *srcp)
+static __always_inline bool cpumask_empty(const struct cpumask *srcp)
{
return bitmap_empty(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -748,7 +759,7 @@ static inline bool cpumask_empty(const struct cpumask *srcp)
*
* Return: true if srcp is full (has all bits set), else false
*/
-static inline bool cpumask_full(const struct cpumask *srcp)
+static __always_inline bool cpumask_full(const struct cpumask *srcp)
{
return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
}
@@ -759,7 +770,7 @@ static inline bool cpumask_full(const struct cpumask *srcp)
*
* Return: count of bits set in *srcp
*/
-static inline unsigned int cpumask_weight(const struct cpumask *srcp)
+static __always_inline unsigned int cpumask_weight(const struct cpumask *srcp)
{
return bitmap_weight(cpumask_bits(srcp), small_cpumask_bits);
}
@@ -771,8 +782,8 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
*
* Return: count of bits set in both *srcp1 and *srcp2
*/
-static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
- const struct cpumask *srcp2)
+static __always_inline
+unsigned int cpumask_weight_and(const struct cpumask *srcp1, const struct cpumask *srcp2)
{
return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}
@@ -784,8 +795,9 @@ static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
*
* Return: count of bits set in both *srcp1 and *srcp2
*/
-static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
- const struct cpumask *srcp2)
+static __always_inline
+unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
+ const struct cpumask *srcp2)
{
return bitmap_weight_andnot(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits);
}
@@ -796,8 +808,8 @@ static inline unsigned int cpumask_weight_andnot(const struct cpumask *srcp1,
* @srcp: the input to shift
* @n: the number of bits to shift by
*/
-static inline void cpumask_shift_right(struct cpumask *dstp,
- const struct cpumask *srcp, int n)
+static __always_inline
+void cpumask_shift_right(struct cpumask *dstp, const struct cpumask *srcp, int n)
{
bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
small_cpumask_bits);
@@ -809,8 +821,8 @@ static inline void cpumask_shift_right(struct cpumask *dstp,
* @srcp: the input to shift
* @n: the number of bits to shift by
*/
-static inline void cpumask_shift_left(struct cpumask *dstp,
- const struct cpumask *srcp, int n)
+static __always_inline
+void cpumask_shift_left(struct cpumask *dstp, const struct cpumask *srcp, int n)
{
bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
nr_cpumask_bits);
@@ -821,8 +833,8 @@ static inline void cpumask_shift_left(struct cpumask *dstp,
* @dstp: the result
* @srcp: the input cpumask
*/
-static inline void cpumask_copy(struct cpumask *dstp,
- const struct cpumask *srcp)
+static __always_inline
+void cpumask_copy(struct cpumask *dstp, const struct cpumask *srcp)
{
bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), large_cpumask_bits);
}
@@ -858,8 +870,8 @@ static inline void cpumask_copy(struct cpumask *dstp,
*
* Return: -errno, or 0 for success.
*/
-static inline int cpumask_parse_user(const char __user *buf, int len,
- struct cpumask *dstp)
+static __always_inline
+int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp)
{
return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits);
}
@@ -872,8 +884,8 @@ static inline int cpumask_parse_user(const char __user *buf, int len,
*
* Return: -errno, or 0 for success.
*/
-static inline int cpumask_parselist_user(const char __user *buf, int len,
- struct cpumask *dstp)
+static __always_inline
+int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp)
{
return bitmap_parselist_user(buf, len, cpumask_bits(dstp),
nr_cpumask_bits);
@@ -886,7 +898,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len,
*
* Return: -errno, or 0 for success.
*/
-static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
+static __always_inline int cpumask_parse(const char *buf, struct cpumask *dstp)
{
return bitmap_parse(buf, UINT_MAX, cpumask_bits(dstp), nr_cpumask_bits);
}
@@ -898,7 +910,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp)
*
* Return: -errno, or 0 for success.
*/
-static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
+static __always_inline int cpulist_parse(const char *buf, struct cpumask *dstp)
{
return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits);
}
@@ -908,7 +920,7 @@ static inline int cpulist_parse(const char *buf, struct cpumask *dstp)
*
* Return: size to allocate for a &struct cpumask in bytes
*/
-static inline unsigned int cpumask_size(void)
+static __always_inline unsigned int cpumask_size(void)
{
return bitmap_size(large_cpumask_bits);
}
@@ -920,7 +932,7 @@ static inline unsigned int cpumask_size(void)
bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
-static inline
+static __always_inline
bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
{
return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
@@ -938,13 +950,13 @@ bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
*
* Return: %true if allocation succeeded, %false if not
*/
-static inline
+static __always_inline
bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return alloc_cpumask_var_node(mask, flags, NUMA_NO_NODE);
}
-static inline
+static __always_inline
bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return alloc_cpumask_var(mask, flags | __GFP_ZERO);
@@ -954,7 +966,7 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
void free_cpumask_var(cpumask_var_t mask);
void free_bootmem_cpumask_var(cpumask_var_t mask);
-static inline bool cpumask_available(cpumask_var_t mask)
+static __always_inline bool cpumask_available(cpumask_var_t mask)
{
return mask != NULL;
}
@@ -964,43 +976,43 @@ static inline bool cpumask_available(cpumask_var_t mask)
#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
#define __cpumask_var_read_mostly
-static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static __always_inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
return true;
}
-static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+static __always_inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
int node)
{
return true;
}
-static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+static __always_inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
{
cpumask_clear(*mask);
return true;
}
-static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+static __always_inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
int node)
{
cpumask_clear(*mask);
return true;
}
-static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+static __always_inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
{
}
-static inline void free_cpumask_var(cpumask_var_t mask)
+static __always_inline void free_cpumask_var(cpumask_var_t mask)
{
}
-static inline void free_bootmem_cpumask_var(cpumask_var_t mask)
+static __always_inline void free_bootmem_cpumask_var(cpumask_var_t mask)
{
}
-static inline bool cpumask_available(cpumask_var_t mask)
+static __always_inline bool cpumask_available(cpumask_var_t mask)
{
return true;
}
@@ -1058,7 +1070,7 @@ void set_cpu_online(unsigned int cpu, bool online);
((struct cpumask *)(1 ? (bitmap) \
: (void *)sizeof(__check_is_bitmap(bitmap))))
-static inline int __check_is_bitmap(const unsigned long *bitmap)
+static __always_inline int __check_is_bitmap(const unsigned long *bitmap)
{
return 1;
}
@@ -1073,7 +1085,7 @@ static inline int __check_is_bitmap(const unsigned long *bitmap)
extern const unsigned long
cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+static __always_inline const struct cpumask *get_cpu_mask(unsigned int cpu)
{
const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
p -= cpu / BITS_PER_LONG;
@@ -1100,32 +1112,32 @@ static __always_inline unsigned int num_online_cpus(void)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask)
-static inline bool cpu_online(unsigned int cpu)
+static __always_inline bool cpu_online(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_online_mask);
}
-static inline bool cpu_enabled(unsigned int cpu)
+static __always_inline bool cpu_enabled(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_enabled_mask);
}
-static inline bool cpu_possible(unsigned int cpu)
+static __always_inline bool cpu_possible(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_possible_mask);
}
-static inline bool cpu_present(unsigned int cpu)
+static __always_inline bool cpu_present(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_present_mask);
}
-static inline bool cpu_active(unsigned int cpu)
+static __always_inline bool cpu_active(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_active_mask);
}
-static inline bool cpu_dying(unsigned int cpu)
+static __always_inline bool cpu_dying(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_dying_mask);
}
@@ -1138,32 +1150,32 @@ static inline bool cpu_dying(unsigned int cpu)
#define num_present_cpus() 1U
#define num_active_cpus() 1U
-static inline bool cpu_online(unsigned int cpu)
+static __always_inline bool cpu_online(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_possible(unsigned int cpu)
+static __always_inline bool cpu_possible(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_enabled(unsigned int cpu)
+static __always_inline bool cpu_enabled(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_present(unsigned int cpu)
+static __always_inline bool cpu_present(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_active(unsigned int cpu)
+static __always_inline bool cpu_active(unsigned int cpu)
{
return cpu == 0;
}
-static inline bool cpu_dying(unsigned int cpu)
+static __always_inline bool cpu_dying(unsigned int cpu)
{
return false;
}
@@ -1197,7 +1209,7 @@ static inline bool cpu_dying(unsigned int cpu)
* Return: the length of the (null-terminated) @buf string, zero if
* nothing is copied.
*/
-static inline ssize_t
+static __always_inline ssize_t
cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
{
return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask),
@@ -1220,9 +1232,9 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
* Return: the length of how many bytes have been copied, excluding
* terminating '\0'.
*/
-static inline ssize_t
-cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
- loff_t off, size_t count)
+static __always_inline
+ssize_t cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
+ loff_t off, size_t count)
{
return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask),
nr_cpu_ids, off, count) - 1;
@@ -1242,9 +1254,9 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
* Return: the length of how many bytes have been copied, excluding
* terminating '\0'.
*/
-static inline ssize_t
-cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
- loff_t off, size_t count)
+static __always_inline
+ssize_t cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+ loff_t off, size_t count)
{
return bitmap_print_list_to_buf(buf, cpumask_bits(mask),
nr_cpu_ids, off, count) - 1;
diff --git a/include/linux/cxl-event.h b/include/linux/cxl-event.h
deleted file mode 100644
index 0bea1afbd747..000000000000
--- a/include/linux/cxl-event.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 2023 Intel Corporation. */
-#ifndef _LINUX_CXL_EVENT_H
-#define _LINUX_CXL_EVENT_H
-
-#include <linux/types.h>
-#include <linux/uuid.h>
-#include <linux/workqueue_types.h>
-
-/*
- * Common Event Record Format
- * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
- */
-struct cxl_event_record_hdr {
- u8 length;
- u8 flags[3];
- __le16 handle;
- __le16 related_handle;
- __le64 timestamp;
- u8 maint_op_class;
- u8 reserved[15];
-} __packed;
-
-struct cxl_event_media_hdr {
- struct cxl_event_record_hdr hdr;
- __le64 phys_addr;
- u8 descriptor;
- u8 type;
- u8 transaction_type;
- /*
- * The meaning of Validity Flags from bit 2 is
- * different across DRAM and General Media records
- */
- u8 validity_flags[2];
- u8 channel;
- u8 rank;
-} __packed;
-
-#define CXL_EVENT_RECORD_DATA_LENGTH 0x50
-struct cxl_event_generic {
- struct cxl_event_record_hdr hdr;
- u8 data[CXL_EVENT_RECORD_DATA_LENGTH];
-} __packed;
-
-/*
- * General Media Event Record
- * CXL rev 3.0 Section 8.2.9.2.1.1; Table 8-43
- */
-#define CXL_EVENT_GEN_MED_COMP_ID_SIZE 0x10
-struct cxl_event_gen_media {
- struct cxl_event_media_hdr media_hdr;
- u8 device[3];
- u8 component_id[CXL_EVENT_GEN_MED_COMP_ID_SIZE];
- u8 reserved[46];
-} __packed;
-
-/*
- * DRAM Event Record - DER
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 3-44
- */
-#define CXL_EVENT_DER_CORRECTION_MASK_SIZE 0x20
-struct cxl_event_dram {
- struct cxl_event_media_hdr media_hdr;
- u8 nibble_mask[3];
- u8 bank_group;
- u8 bank;
- u8 row[3];
- u8 column[2];
- u8 correction_mask[CXL_EVENT_DER_CORRECTION_MASK_SIZE];
- u8 reserved[0x17];
-} __packed;
-
-/*
- * Get Health Info Record
- * CXL rev 3.0 section 8.2.9.8.3.1; Table 8-100
- */
-struct cxl_get_health_info {
- u8 health_status;
- u8 media_status;
- u8 add_status;
- u8 life_used;
- u8 device_temp[2];
- u8 dirty_shutdown_cnt[4];
- u8 cor_vol_err_cnt[4];
- u8 cor_per_err_cnt[4];
-} __packed;
-
-/*
- * Memory Module Event Record
- * CXL rev 3.0 section 8.2.9.2.1.3; Table 8-45
- */
-struct cxl_event_mem_module {
- struct cxl_event_record_hdr hdr;
- u8 event_type;
- struct cxl_get_health_info info;
- u8 reserved[0x3d];
-} __packed;
-
-union cxl_event {
- struct cxl_event_generic generic;
- struct cxl_event_gen_media gen_media;
- struct cxl_event_dram dram;
- struct cxl_event_mem_module mem_module;
- /* dram & gen_media event header */
- struct cxl_event_media_hdr media_hdr;
-} __packed;
-
-/*
- * Common Event Record Format; in event logs
- * CXL rev 3.0 section 8.2.9.2.1; Table 8-42
- */
-struct cxl_event_record_raw {
- uuid_t id;
- union cxl_event event;
-} __packed;
-
-enum cxl_event_type {
- CXL_CPER_EVENT_GENERIC,
- CXL_CPER_EVENT_GEN_MEDIA,
- CXL_CPER_EVENT_DRAM,
- CXL_CPER_EVENT_MEM_MODULE,
-};
-
-#define CPER_CXL_DEVICE_ID_VALID BIT(0)
-#define CPER_CXL_DEVICE_SN_VALID BIT(1)
-#define CPER_CXL_COMP_EVENT_LOG_VALID BIT(2)
-struct cxl_cper_event_rec {
- struct {
- u32 length;
- u64 validation_bits;
- struct cper_cxl_event_devid {
- u16 vendor_id;
- u16 device_id;
- u8 func_num;
- u8 device_num;
- u8 bus_num;
- u16 segment_num;
- u16 slot_num; /* bits 2:0 reserved */
- u8 reserved;
- } __packed device_id;
- struct cper_cxl_event_sn {
- u32 lower_dw;
- u32 upper_dw;
- } __packed dev_serial_num;
- } __packed hdr;
-
- union cxl_event event;
-} __packed;
-
-struct cxl_cper_work_data {
- enum cxl_event_type event_type;
- struct cxl_cper_event_rec rec;
-};
-
-#ifdef CONFIG_ACPI_APEI_GHES
-int cxl_cper_register_work(struct work_struct *work);
-int cxl_cper_unregister_work(struct work_struct *work);
-int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd);
-#else
-static inline int cxl_cper_register_work(struct work_struct *work)
-{
- return 0;
-}
-
-static inline int cxl_cper_unregister_work(struct work_struct *work)
-{
- return 0;
-}
-static inline int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
-{
- return 0;
-}
-#endif
-
-#endif /* _LINUX_CXL_EVENT_H */
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index c9c65b132c0f..0928a6c8ae1e 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -57,7 +57,6 @@ static const struct file_operations __fops = { \
.release = simple_attr_release, \
.read = debugfs_attr_read, \
.write = (__is_signed) ? debugfs_attr_write_signed : debugfs_attr_write, \
- .llseek = no_llseek, \
}
#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 32444686b6ff..8b95545e7924 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -23,13 +23,17 @@ struct debug_obj_descr;
* @state: tracked object state
* @astate: current active state
* @object: pointer to the real object
+ * @batch_last: pointer to the last hlist node in a batch
* @descr: pointer to an object type specific debug description structure
*/
struct debug_obj {
- struct hlist_node node;
- enum debug_obj_state state;
- unsigned int astate;
- void *object;
+ struct hlist_node node;
+ enum debug_obj_state state;
+ unsigned int astate;
+ union {
+ void *object;
+ struct hlist_node *batch_last;
+ };
const struct debug_obj_descr *descr;
};
diff --git a/include/linux/delay.h b/include/linux/delay.h
index ff9cda975e30..89866bab100d 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -6,21 +6,12 @@
* Copyright (C) 1993 Linus Torvalds
*
* Delay routines, using a pre-computed "loops_per_jiffy" value.
- *
- * Please note that ndelay(), udelay() and mdelay() may return early for
- * several reasons:
- * 1. computed loops_per_jiffy too low (due to the time taken to
- * execute the timer interrupt.)
- * 2. cache behaviour affecting the time it takes to execute the
- * loop function.
- * 3. CPU clock rate changes.
- *
- * Please see this thread:
- * https://lists.openwall.net/linux-kernel/2011/01/09/56
+ * Sleep routines using timer list timers or hrtimers.
*/
#include <linux/math.h>
#include <linux/sched.h>
+#include <linux/jiffies.h>
extern unsigned long loops_per_jiffy;
@@ -35,12 +26,21 @@ extern unsigned long loops_per_jiffy;
* The 2nd mdelay() definition ensures GCC will optimize away the
* while loop for the common cases where n <= MAX_UDELAY_MS -- Paul G.
*/
-
#ifndef MAX_UDELAY_MS
#define MAX_UDELAY_MS 5
#endif
#ifndef mdelay
+/**
+ * mdelay - Inserting a delay based on milliseconds with busy waiting
+ * @n: requested delay in milliseconds
+ *
+ * See udelay() for basic information about mdelay() and it's variants.
+ *
+ * Please double check, whether mdelay() is the right way to go or whether a
+ * refactoring of the code is the better variant to be able to use msleep()
+ * instead.
+ */
#define mdelay(n) (\
(__builtin_constant_p(n) && (n)<=MAX_UDELAY_MS) ? udelay((n)*1000) : \
({unsigned long __ms=(n); while (__ms--) udelay(1000);}))
@@ -63,30 +63,75 @@ unsigned long msleep_interruptible(unsigned int msecs);
void usleep_range_state(unsigned long min, unsigned long max,
unsigned int state);
+/**
+ * usleep_range - Sleep for an approximate time
+ * @min: Minimum time in microseconds to sleep
+ * @max: Maximum time in microseconds to sleep
+ *
+ * For basic information please refere to usleep_range_state().
+ *
+ * The task will be in the state TASK_UNINTERRUPTIBLE during the sleep.
+ */
static inline void usleep_range(unsigned long min, unsigned long max)
{
usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
}
-static inline void usleep_idle_range(unsigned long min, unsigned long max)
+/**
+ * usleep_range_idle - Sleep for an approximate time with idle time accounting
+ * @min: Minimum time in microseconds to sleep
+ * @max: Maximum time in microseconds to sleep
+ *
+ * For basic information please refere to usleep_range_state().
+ *
+ * The sleeping task has the state TASK_IDLE during the sleep to prevent
+ * contribution to the load avarage.
+ */
+static inline void usleep_range_idle(unsigned long min, unsigned long max)
{
usleep_range_state(min, max, TASK_IDLE);
}
+/**
+ * ssleep - wrapper for seconds around msleep
+ * @seconds: Requested sleep duration in seconds
+ *
+ * Please refere to msleep() for detailed information.
+ */
static inline void ssleep(unsigned int seconds)
{
msleep(seconds * 1000);
}
-/* see Documentation/timers/timers-howto.rst for the thresholds */
+static const unsigned int max_slack_shift = 2;
+#define USLEEP_RANGE_UPPER_BOUND ((TICK_NSEC << max_slack_shift) / NSEC_PER_USEC)
+
+/**
+ * fsleep - flexible sleep which autoselects the best mechanism
+ * @usecs: requested sleep duration in microseconds
+ *
+ * flseep() selects the best mechanism that will provide maximum 25% slack
+ * to the requested sleep duration. Therefore it uses:
+ *
+ * * udelay() loop for sleep durations <= 10 microseconds to avoid hrtimer
+ * overhead for really short sleep durations.
+ * * usleep_range() for sleep durations which would lead with the usage of
+ * msleep() to a slack larger than 25%. This depends on the granularity of
+ * jiffies.
+ * * msleep() for all other sleep durations.
+ *
+ * Note: When %CONFIG_HIGH_RES_TIMERS is not set, all sleeps are processed with
+ * the granularity of jiffies and the slack might exceed 25% especially for
+ * short sleep durations.
+ */
static inline void fsleep(unsigned long usecs)
{
if (usecs <= 10)
udelay(usecs);
- else if (usecs <= 20000)
- usleep_range(usecs, 2 * usecs);
+ else if (usecs < USLEEP_RANGE_UPPER_BOUND)
+ usleep_range(usecs, usecs + (usecs >> max_slack_shift));
else
- msleep(DIV_ROUND_UP(usecs, 1000));
+ msleep(DIV_ROUND_UP(usecs, USEC_PER_MSEC));
}
#endif /* defined(_LINUX_DELAY_H) */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 53ca3a913d06..8321f65897f3 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -524,7 +524,6 @@ int dm_post_suspending(struct dm_target *ti);
int dm_noflush_suspending(struct dm_target *ti);
void dm_accept_partial_bio(struct bio *bio, unsigned int n_sectors);
void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
-union map_info *dm_get_rq_mapinfo(struct request *rq);
#ifdef CONFIG_BLK_DEV_ZONED
struct dm_report_zones_args {
diff --git a/include/linux/device.h b/include/linux/device.h
index b4bde8d22697..667cb6db9019 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1078,6 +1078,9 @@ int device_for_each_child(struct device *dev, void *data,
int (*fn)(struct device *dev, void *data));
int device_for_each_child_reverse(struct device *dev, void *data,
int (*fn)(struct device *dev, void *data));
+int device_for_each_child_reverse_from(struct device *parent,
+ struct device *from, const void *data,
+ int (*fn)(struct device *, const void *));
struct device *device_find_child(struct device *dev, void *data,
int (*match)(struct device *dev, void *data));
struct device *device_find_child_by_name(struct device *parent,
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 807831d6bf0f..cdc4757217f9 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -126,6 +126,9 @@ struct bus_attribute {
int __must_check bus_create_file(const struct bus_type *bus, struct bus_attribute *attr);
void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr);
+/* Matching function type for drivers/base APIs to find a specific device */
+typedef int (*device_match_t)(struct device *dev, const void *data);
+
/* Generic device matching functions that all busses can use to match with */
int device_match_name(struct device *dev, const void *name);
int device_match_of_node(struct device *dev, const void *np);
@@ -139,8 +142,7 @@ int device_match_any(struct device *dev, const void *unused);
int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data,
int (*fn)(struct device *dev, void *data));
struct device *bus_find_device(const struct bus_type *bus, struct device *start,
- const void *data,
- int (*match)(struct device *dev, const void *data));
+ const void *data, device_match_t match);
/**
* bus_find_device_by_name - device iterator for locating a particular device
* of a specific name.
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index c576b49c55c2..518c9c83d64b 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -95,7 +95,7 @@ void class_dev_iter_exit(struct class_dev_iter *iter);
int class_for_each_device(const struct class *class, const struct device *start, void *data,
int (*fn)(struct device *dev, void *data));
struct device *class_find_device(const struct class *class, const struct device *start,
- const void *data, int (*match)(struct device *, const void *));
+ const void *data, device_match_t match);
/**
* class_find_device_by_name - device iterator for locating a particular device
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 1fc8b68786de..5c04b8e3833b 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -157,7 +157,7 @@ int __must_check driver_for_each_device(struct device_driver *drv, struct device
void *data, int (*fn)(struct device *dev, void *));
struct device *driver_find_device(const struct device_driver *drv,
struct device *start, const void *data,
- int (*match)(struct device *dev, const void *data));
+ device_match_t match);
/**
* driver_find_device_by_name - device iterator for locating a particular device
diff --git a/include/linux/dw_apb_timer.h b/include/linux/dw_apb_timer.h
index 82ebf9223948..f8811c46b89e 100644
--- a/include/linux/dw_apb_timer.h
+++ b/include/linux/dw_apb_timer.h
@@ -34,9 +34,6 @@ struct dw_apb_clocksource {
};
void dw_apb_clockevent_register(struct dw_apb_clock_event_device *dw_ced);
-void dw_apb_clockevent_pause(struct dw_apb_clock_event_device *dw_ced);
-void dw_apb_clockevent_resume(struct dw_apb_clock_event_device *dw_ced);
-void dw_apb_clockevent_stop(struct dw_apb_clock_event_device *dw_ced);
struct dw_apb_clock_event_device *
dw_apb_clockevent_init(int cpu, const char *name, unsigned rating,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6bf3c4fe8511..e28d88066033 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -764,8 +764,6 @@ extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
extern int __efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
extern void efi_mem_reserve(phys_addr_t addr, u64 size);
extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size);
-extern void efi_initialize_iomem_resources(struct resource *code_resource,
- struct resource *data_resource, struct resource *bss_resource);
extern u64 efi_get_fdt_params(struct efi_memory_map_data *data);
extern struct kobject *efi_kobj;
diff --git a/include/linux/einj-cxl.h b/include/linux/einj-cxl.h
deleted file mode 100644
index 624ff6ff41f9..000000000000
--- a/include/linux/einj-cxl.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * CXL protocol Error INJection support.
- *
- * Copyright (c) 2023 Advanced Micro Devices, Inc.
- * All Rights Reserved.
- *
- * Author: Ben Cheatham <[email protected]>
- */
-#ifndef EINJ_CXL_H
-#define EINJ_CXL_H
-
-#include <linux/errno.h>
-#include <linux/types.h>
-
-struct pci_dev;
-struct seq_file;
-
-#if IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL)
-int einj_cxl_available_error_type_show(struct seq_file *m, void *v);
-int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type);
-int einj_cxl_inject_rch_error(u64 rcrb, u64 type);
-bool einj_cxl_is_initialized(void);
-#else /* !IS_ENABLED(CONFIG_ACPI_APEI_EINJ_CXL) */
-static inline int einj_cxl_available_error_type_show(struct seq_file *m,
- void *v)
-{
- return -ENXIO;
-}
-
-static inline int einj_cxl_inject_error(struct pci_dev *dport_dev, u64 type)
-{
- return -ENXIO;
-}
-
-static inline int einj_cxl_inject_rch_error(u64 rcrb, u64 type)
-{
- return -ENXIO;
-}
-
-static inline bool einj_cxl_is_initialized(void) { return false; }
-#endif /* CONFIG_ACPI_APEI_EINJ_CXL */
-
-#endif /* EINJ_CXL_H */
diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 1ff52020cf75..752e0b297582 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -55,6 +55,8 @@ struct em_perf_table {
* struct em_perf_domain - Performance domain
* @em_table: Pointer to the runtime modifiable em_perf_table
* @nr_perf_states: Number of performance states
+ * @min_perf_state: Minimum allowed Performance State index
+ * @max_perf_state: Maximum allowed Performance State index
* @flags: See "em_perf_domain flags"
* @cpus: Cpumask covering the CPUs of the domain. It's here
* for performance reasons to avoid potential cache
@@ -70,6 +72,8 @@ struct em_perf_table {
struct em_perf_domain {
struct em_perf_table __rcu *em_table;
int nr_perf_states;
+ int min_perf_state;
+ int max_perf_state;
unsigned long flags;
unsigned long cpus[];
};
@@ -173,13 +177,14 @@ void em_table_free(struct em_perf_table __rcu *table);
int em_dev_compute_costs(struct device *dev, struct em_perf_state *table,
int nr_states);
int em_dev_update_chip_binning(struct device *dev);
+int em_update_performance_limits(struct em_perf_domain *pd,
+ unsigned long freq_min_khz, unsigned long freq_max_khz);
/**
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
* @table: List of performance states, in ascending order
- * @nr_perf_states: Number of performance states
+ * @pd: performance domain for which this must be done
* @max_util: Max utilization to map with the EM
- * @pd_flags: Performance Domain flags
*
* It is called from the scheduler code quite frequently and as a consequence
* doesn't implement any check.
@@ -188,13 +193,16 @@ int em_dev_update_chip_binning(struct device *dev);
* requirement.
*/
static inline int
-em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
- unsigned long max_util, unsigned long pd_flags)
+em_pd_get_efficient_state(struct em_perf_state *table,
+ struct em_perf_domain *pd, unsigned long max_util)
{
+ unsigned long pd_flags = pd->flags;
+ int min_ps = pd->min_perf_state;
+ int max_ps = pd->max_perf_state;
struct em_perf_state *ps;
int i;
- for (i = 0; i < nr_perf_states; i++) {
+ for (i = min_ps; i <= max_ps; i++) {
ps = &table[i];
if (ps->performance >= max_util) {
if (pd_flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
@@ -204,7 +212,7 @@ em_pd_get_efficient_state(struct em_perf_state *table, int nr_perf_states,
}
}
- return nr_perf_states - 1;
+ return max_ps;
}
/**
@@ -253,8 +261,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* requested performance.
*/
em_table = rcu_dereference(pd->em_table);
- i = em_pd_get_efficient_state(em_table->state, pd->nr_perf_states,
- max_util, pd->flags);
+ i = em_pd_get_efficient_state(em_table->state, pd, max_util);
ps = &em_table->state[i];
/*
@@ -391,6 +398,12 @@ static inline int em_dev_update_chip_binning(struct device *dev)
{
return -EINVAL;
}
+static inline
+int em_update_performance_limits(struct em_perf_domain *pd,
+ unsigned long freq_min_khz, unsigned long freq_max_khz)
+{
+ return -EINVAL;
+}
#endif
#endif
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 1e50cdb83ae5..fc61d0205c97 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -64,7 +64,8 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
+ _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
+ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 6813171afccb..16149f6625e4 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -17,8 +17,9 @@
#endif
#define XFER_TO_GUEST_MODE_WORK \
- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
- _TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
+ (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | _TIF_SIGPENDING | \
+ _TIF_NOTIFY_SIGNAL | _TIF_NOTIFY_RESUME | \
+ ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu;
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 30114c25ad12..ecf203f01034 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -21,7 +21,7 @@
#include <linux/netdevice.h>
#include <linux/random.h>
#include <linux/crc32.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/bitsperlong.h>
#ifdef __KERNEL__
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 3337745d81bd..0c0d00fcd131 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -42,7 +42,7 @@ static inline void eventpoll_release(struct file *file)
* because the file in on the way to be removed and nobody ( but
* eventpoll ) has still a reference to this file.
*/
- if (likely(!file->f_ep))
+ if (likely(!READ_ONCE(file->f_ep)))
return;
/*
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 893a1d21dc1c..1ab165c2939f 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -250,19 +250,6 @@ struct export_operations {
unsigned long flags;
};
-/**
- * exportfs_lock_op_is_async() - export op supports async lock operation
- * @export_ops: the nfs export operations to check
- *
- * Returns true if the nfs export_operations structure has
- * EXPORT_OP_ASYNC_LOCK in their flags set
- */
-static inline bool
-exportfs_lock_op_is_async(const struct export_operations *export_ops)
-{
- return export_ops->flags & EXPORT_OP_ASYNC_LOCK;
-}
-
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent,
int flags);
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 01bee2b289c2..b0b821edfd97 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -19,7 +19,6 @@
#define F2FS_BLKSIZE_BITS PAGE_SHIFT /* bits for F2FS_BLKSIZE */
#define F2FS_MAX_EXTENSION 64 /* # of extension entries */
#define F2FS_EXTENSION_LEN 8 /* max size of extension */
-#define F2FS_BLK_ALIGN(x) (((x) + F2FS_BLKSIZE - 1) >> F2FS_BLKSIZE_BITS)
#define NULL_ADDR ((block_t)0) /* used as block_t addresses */
#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */
@@ -28,6 +27,7 @@
#define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS)
#define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS)
#define F2FS_BLK_END_BYTES(blk) (F2FS_BLK_TO_BYTES(blk + 1) - 1)
+#define F2FS_BLK_ALIGN(x) (F2FS_BYTES_TO_BLK((x) + F2FS_BLKSIZE - 1))
/* 0, 1(node nid), 2(meta nid) are reserved node id */
#define F2FS_RESERVED_NODE_NUM 3
@@ -278,7 +278,7 @@ struct node_footer {
#define F2FS_INLINE_DATA 0x02 /* file inline data flag */
#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */
#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */
-#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries */
+#define F2FS_INLINE_DOTS 0x10 /* file having implicit dot dentries (obsolete) */
#define F2FS_EXTRA_ATTR 0x20 /* file having extra attribute */
#define F2FS_PIN_FILE 0x40 /* file should not be gced */
#define F2FS_COMPRESS_RELEASED 0x80 /* file released compressed blocks */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 2944d4aa413b..c45306a9f007 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -22,7 +22,6 @@
* as this is the granularity returned by copy_fdset().
*/
#define NR_OPEN_DEFAULT BITS_PER_LONG
-#define NR_OPEN_MAX ~0U
struct fdtable {
unsigned int max_fds;
@@ -93,10 +92,6 @@ static inline struct file *files_lookup_fd_locked(struct files_struct *files, un
return files_lookup_fd_raw(files, fd);
}
-struct file *lookup_fdget_rcu(unsigned int fd);
-struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd);
-struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd);
-
static inline bool close_on_exec(unsigned int fd, const struct files_struct *files)
{
return test_bit(fd, files_fdtable(files)->close_on_exec);
@@ -106,17 +101,17 @@ struct task_struct;
void put_files_struct(struct files_struct *fs);
int unshare_files(void);
-struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
+struct fd_range {
+ unsigned int from, to;
+};
+struct files_struct *dup_fd(struct files_struct *, struct fd_range *) __latent_entropy;
void do_close_on_exec(struct files_struct *);
int iterate_fd(struct files_struct *, unsigned,
int (*)(const void *, struct file *, unsigned),
const void *);
extern int close_fd(unsigned int fd);
-extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
extern struct file *file_close_fd(unsigned int fd);
-extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
- struct files_struct **new_fdp);
extern struct kmem_cache *files_cachep;
diff --git a/include/linux/file.h b/include/linux/file.h
index f98de143245a..302f11355b10 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -30,12 +30,6 @@ extern struct file *alloc_file_pseudo_noaccount(struct inode *, struct vfsmount
extern struct file *alloc_file_clone(struct file *, int flags,
const struct file_operations *);
-static inline void fput_light(struct file *file, int fput_needed)
-{
- if (fput_needed)
- fput(file);
-}
-
/* either a reference to struct file + flags
* (cloned vs. borrowed, pos locked), with
* flags stored in lower bits of value,
@@ -72,6 +66,7 @@ static inline void fdput(struct fd fd)
extern struct file *fget(unsigned int fd);
extern struct file *fget_raw(unsigned int fd);
extern struct file *fget_task(struct task_struct *task, unsigned int fd);
+extern struct file *fget_task_next(struct task_struct *task, unsigned int *fd);
extern void __f_unlock_pos(struct file *);
struct fd fdget(unsigned int fd);
@@ -87,6 +82,7 @@ static inline void fdput_pos(struct fd f)
DEFINE_CLASS(fd, struct fd, fdput(_T), fdget(fd), int fd)
DEFINE_CLASS(fd_raw, struct fd, fdput(_T), fdget_raw(fd), int fd)
+DEFINE_CLASS(fd_pos, struct fd, fdput_pos(_T), fdget_pos(fd), int fd)
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
extern int replace_fd(unsigned fd, struct file *file, unsigned flags);
diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h
new file mode 100644
index 000000000000..9b3a8d9b17ab
--- /dev/null
+++ b/include/linux/file_ref.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LINUX_FILE_REF_H
+#define _LINUX_FILE_REF_H
+
+#include <linux/atomic.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+/*
+ * file_ref is a reference count implementation specifically for use by
+ * files. It takes inspiration from rcuref but differs in key aspects
+ * such as support for SLAB_TYPESAFE_BY_RCU type caches.
+ *
+ * FILE_REF_ONEREF FILE_REF_MAXREF
+ * 0x0000000000000000UL 0x7FFFFFFFFFFFFFFFUL
+ * <-------------------valid ------------------->
+ *
+ * FILE_REF_SATURATED
+ * 0x8000000000000000UL 0xA000000000000000UL 0xBFFFFFFFFFFFFFFFUL
+ * <-----------------------saturation zone---------------------->
+ *
+ * FILE_REF_RELEASED FILE_REF_DEAD
+ * 0xC000000000000000UL 0xE000000000000000UL
+ * <-------------------dead zone------------------->
+ *
+ * FILE_REF_NOREF
+ * 0xFFFFFFFFFFFFFFFFUL
+ */
+
+#ifdef CONFIG_64BIT
+#define FILE_REF_ONEREF 0x0000000000000000UL
+#define FILE_REF_MAXREF 0x7FFFFFFFFFFFFFFFUL
+#define FILE_REF_SATURATED 0xA000000000000000UL
+#define FILE_REF_RELEASED 0xC000000000000000UL
+#define FILE_REF_DEAD 0xE000000000000000UL
+#define FILE_REF_NOREF 0xFFFFFFFFFFFFFFFFUL
+#else
+#define FILE_REF_ONEREF 0x00000000U
+#define FILE_REF_MAXREF 0x7FFFFFFFU
+#define FILE_REF_SATURATED 0xA0000000U
+#define FILE_REF_RELEASED 0xC0000000U
+#define FILE_REF_DEAD 0xE0000000U
+#define FILE_REF_NOREF 0xFFFFFFFFU
+#endif
+
+typedef struct {
+#ifdef CONFIG_64BIT
+ atomic64_t refcnt;
+#else
+ atomic_t refcnt;
+#endif
+} file_ref_t;
+
+/**
+ * file_ref_init - Initialize a file reference count
+ * @ref: Pointer to the reference count
+ * @cnt: The initial reference count typically '1'
+ */
+static inline void file_ref_init(file_ref_t *ref, unsigned long cnt)
+{
+ atomic_long_set(&ref->refcnt, cnt - 1);
+}
+
+bool __file_ref_put(file_ref_t *ref, unsigned long cnt);
+
+/**
+ * file_ref_get - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Similar to atomic_inc_not_zero() but saturates at FILE_REF_MAXREF.
+ *
+ * Provides full memory ordering.
+ *
+ * Return: False if the attempt to acquire a reference failed. This happens
+ * when the last reference has been put already. True if a reference
+ * was successfully acquired
+ */
+static __always_inline __must_check bool file_ref_get(file_ref_t *ref)
+{
+ /*
+ * Unconditionally increase the reference count with full
+ * ordering. The saturation and dead zones provide enough
+ * tolerance for this.
+ *
+ * If this indicates negative the file in question the fail can
+ * be freed and immediately reused due to SLAB_TYPSAFE_BY_RCU.
+ * Hence, unconditionally altering the file reference count to
+ * e.g., reset the file reference count back to the middle of
+ * the deadzone risk end up marking someone else's file as dead
+ * behind their back.
+ *
+ * It would be possible to do a careful:
+ *
+ * cnt = atomic_long_inc_return();
+ * if (likely(cnt >= 0))
+ * return true;
+ *
+ * and then something like:
+ *
+ * if (cnt >= FILE_REF_RELEASE)
+ * atomic_long_try_cmpxchg(&ref->refcnt, &cnt, FILE_REF_DEAD),
+ *
+ * to set the value back to the middle of the deadzone. But it's
+ * practically impossible to go from FILE_REF_DEAD to
+ * FILE_REF_ONEREF. It would need 2305843009213693952/2^61
+ * file_ref_get()s to resurrect such a dead file.
+ */
+ return !atomic_long_add_negative(1, &ref->refcnt);
+}
+
+/**
+ * file_ref_inc - Acquire one reference on a file
+ * @ref: Pointer to the reference count
+ *
+ * Acquire an additional reference on a file. Warns if the caller didn't
+ * already hold a reference.
+ */
+static __always_inline void file_ref_inc(file_ref_t *ref)
+{
+ long prior = atomic_long_fetch_inc_relaxed(&ref->refcnt);
+ WARN_ONCE(prior < 0, "file_ref_inc() on a released file reference");
+}
+
+/**
+ * file_ref_put -- Release a file reference
+ * @ref: Pointer to the reference count
+ *
+ * Provides release memory ordering, such that prior loads and stores
+ * are done before, and provides an acquire ordering on success such
+ * that free() must come after.
+ *
+ * Return: True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely release
+ * the object which is protected by the reference counter.
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * release the protected object.
+ */
+static __always_inline __must_check bool file_ref_put(file_ref_t *ref)
+{
+ long cnt;
+
+ /*
+ * While files are SLAB_TYPESAFE_BY_RCU and thus file_ref_put()
+ * calls don't risk UAFs when a file is recyclyed, it is still
+ * vulnerable to UAFs caused by freeing the whole slab page once
+ * it becomes unused. Prevent file_ref_put() from being
+ * preempted protects against this.
+ */
+ guard(preempt)();
+ /*
+ * Unconditionally decrease the reference count. The saturation
+ * and dead zones provide enough tolerance for this. If this
+ * fails then we need to handle the last reference drop and
+ * cases inside the saturation and dead zones.
+ */
+ cnt = atomic_long_dec_return(&ref->refcnt);
+ if (cnt >= 0)
+ return false;
+ return __file_ref_put(ref, cnt);
+}
+
+/**
+ * file_ref_read - Read the number of file references
+ * @ref: Pointer to the reference count
+ *
+ * Return: The number of held references (0 ... N)
+ */
+static inline unsigned long file_ref_read(file_ref_t *ref)
+{
+ unsigned long c = atomic_long_read(&ref->refcnt);
+
+ /* Return 0 if within the DEAD zone. */
+ return c >= FILE_REF_RELEASED ? 0 : c + 1;
+}
+
+#endif
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index bb44224c6676..c412ded9171e 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -180,6 +180,11 @@ static inline void locks_wake_up(struct file_lock *fl)
wake_up(&fl->c.flc_wait);
}
+static inline bool locks_can_async_lock(const struct file_operations *fops)
+{
+ return !fops->lock || fops->fop_flags & FOP_ASYNC_LOCK;
+}
+
/* fs/locks.c */
void locks_free_lock_context(struct inode *inode);
void locks_free_lock(struct file_lock *fl);
diff --git a/include/linux/find.h b/include/linux/find.h
index 5dfca4225fef..68685714bc18 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -52,7 +52,7 @@ unsigned long _find_next_bit_le(const unsigned long *addr, unsigned
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
@@ -81,7 +81,7 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
@@ -112,7 +112,7 @@ unsigned long find_next_and_bit(const unsigned long *addr1,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_andnot_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
@@ -142,7 +142,7 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_or_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
@@ -171,7 +171,7 @@ unsigned long find_next_or_bit(const unsigned long *addr1,
* Returns the bit number of the next zero bit
* If no bits are zero, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
@@ -198,7 +198,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
* Returns the bit number of the first set bit.
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -224,7 +224,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
* Returns the bit number of the N'th set bit.
* If no such, returns >= @size.
*/
-static inline
+static __always_inline
unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n)
{
if (n >= size)
@@ -249,7 +249,7 @@ unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsign
* Returns the bit number of the N'th set bit.
* If no such, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long size, unsigned long n)
{
@@ -276,7 +276,7 @@ unsigned long find_nth_and_bit(const unsigned long *addr1, const unsigned long *
* Returns the bit number of the N'th set bit.
* If no such, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long size, unsigned long n)
{
@@ -332,7 +332,7 @@ unsigned long find_nth_and_andnot_bit(const unsigned long *addr1,
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_and_bit(const unsigned long *addr1,
const unsigned long *addr2,
unsigned long size)
@@ -357,7 +357,7 @@ unsigned long find_first_and_bit(const unsigned long *addr1,
* Returns the bit number for the first set bit
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_and_and_bit(const unsigned long *addr1,
const unsigned long *addr2,
const unsigned long *addr3,
@@ -381,7 +381,7 @@ unsigned long find_first_and_and_bit(const unsigned long *addr1,
* Returns the bit number of the first cleared bit.
* If no bits are zero, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -402,7 +402,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
*
* Returns the bit number of the last set bit, or size.
*/
-static inline
+static __always_inline
unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -425,7 +425,7 @@ unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
* Returns the bit number for the next set bit, or first set bit up to @offset
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
const unsigned long *addr2,
unsigned long size, unsigned long offset)
@@ -448,7 +448,7 @@ unsigned long find_next_and_bit_wrap(const unsigned long *addr1,
* Returns the bit number for the next set bit, or first set bit up to @offset
* If no bits are set, returns @size.
*/
-static inline
+static __always_inline
unsigned long find_next_bit_wrap(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
@@ -465,7 +465,7 @@ unsigned long find_next_bit_wrap(const unsigned long *addr,
* Helper for for_each_set_bit_wrap(). Make sure you're doing right thing
* before using it alone.
*/
-static inline
+static __always_inline
unsigned long __for_each_wrap(const unsigned long *bitmap, unsigned long size,
unsigned long start, unsigned long n)
{
@@ -506,20 +506,20 @@ extern unsigned long find_next_clump8(unsigned long *clump,
#if defined(__LITTLE_ENDIAN)
-static inline unsigned long find_next_zero_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
+static __always_inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned long size, unsigned long offset)
{
return find_next_zero_bit(addr, size, offset);
}
-static inline unsigned long find_next_bit_le(const void *addr,
- unsigned long size, unsigned long offset)
+static __always_inline
+unsigned long find_next_bit_le(const void *addr, unsigned long size, unsigned long offset)
{
return find_next_bit(addr, size, offset);
}
-static inline unsigned long find_first_zero_bit_le(const void *addr,
- unsigned long size)
+static __always_inline
+unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
{
return find_first_zero_bit(addr, size);
}
@@ -527,7 +527,7 @@ static inline unsigned long find_first_zero_bit_le(const void *addr,
#elif defined(__BIG_ENDIAN)
#ifndef find_next_zero_bit_le
-static inline
+static __always_inline
unsigned long find_next_zero_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
@@ -546,7 +546,7 @@ unsigned long find_next_zero_bit_le(const void *addr, unsigned
#endif
#ifndef find_first_zero_bit_le
-static inline
+static __always_inline
unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
{
if (small_const_nbits(size)) {
@@ -560,7 +560,7 @@ unsigned long find_first_zero_bit_le(const void *addr, unsigned long size)
#endif
#ifndef find_next_bit_le
-static inline
+static __always_inline
unsigned long find_next_bit_le(const void *addr, unsigned
long size, unsigned long offset)
{
diff --git a/include/linux/folio_queue.h b/include/linux/folio_queue.h
index 955680c3bb5f..3abe614ef5f0 100644
--- a/include/linux/folio_queue.h
+++ b/include/linux/folio_queue.h
@@ -3,6 +3,12 @@
*
* Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
* Written by David Howells ([email protected])
+ *
+ * See:
+ *
+ * Documentation/core-api/folio_queue.rst
+ *
+ * for a description of the API.
*/
#ifndef _LINUX_FOLIO_QUEUE_H
@@ -33,6 +39,13 @@ struct folio_queue {
#endif
};
+/**
+ * folioq_init - Initialise a folio queue segment
+ * @folioq: The segment to initialise
+ *
+ * Initialise a folio queue segment. Note that the folio pointers are
+ * left uninitialised.
+ */
static inline void folioq_init(struct folio_queue *folioq)
{
folio_batch_init(&folioq->vec);
@@ -43,62 +56,155 @@ static inline void folioq_init(struct folio_queue *folioq)
folioq->marks3 = 0;
}
+/**
+ * folioq_nr_slots: Query the capacity of a folio queue segment
+ * @folioq: The segment to query
+ *
+ * Query the number of folios that a particular folio queue segment might hold.
+ * [!] NOTE: This must not be assumed to be the same for every segment!
+ */
static inline unsigned int folioq_nr_slots(const struct folio_queue *folioq)
{
return PAGEVEC_SIZE;
}
+/**
+ * folioq_count: Query the occupancy of a folio queue segment
+ * @folioq: The segment to query
+ *
+ * Query the number of folios that have been added to a folio queue segment.
+ * Note that this is not decreased as folios are removed from a segment.
+ */
static inline unsigned int folioq_count(struct folio_queue *folioq)
{
return folio_batch_count(&folioq->vec);
}
+/**
+ * folioq_full: Query if a folio queue segment is full
+ * @folioq: The segment to query
+ *
+ * Query if a folio queue segment is fully occupied. Note that this does not
+ * change if folios are removed from a segment.
+ */
static inline bool folioq_full(struct folio_queue *folioq)
{
//return !folio_batch_space(&folioq->vec);
return folioq_count(folioq) >= folioq_nr_slots(folioq);
}
+/**
+ * folioq_is_marked: Check first folio mark in a folio queue segment
+ * @folioq: The segment to query
+ * @slot: The slot number of the folio to query
+ *
+ * Determine if the first mark is set for the folio in the specified slot in a
+ * folio queue segment.
+ */
static inline bool folioq_is_marked(const struct folio_queue *folioq, unsigned int slot)
{
return test_bit(slot, &folioq->marks);
}
+/**
+ * folioq_mark: Set the first mark on a folio in a folio queue segment
+ * @folioq: The segment to modify
+ * @slot: The slot number of the folio to modify
+ *
+ * Set the first mark for the folio in the specified slot in a folio queue
+ * segment.
+ */
static inline void folioq_mark(struct folio_queue *folioq, unsigned int slot)
{
set_bit(slot, &folioq->marks);
}
+/**
+ * folioq_unmark: Clear the first mark on a folio in a folio queue segment
+ * @folioq: The segment to modify
+ * @slot: The slot number of the folio to modify
+ *
+ * Clear the first mark for the folio in the specified slot in a folio queue
+ * segment.
+ */
static inline void folioq_unmark(struct folio_queue *folioq, unsigned int slot)
{
clear_bit(slot, &folioq->marks);
}
+/**
+ * folioq_is_marked2: Check second folio mark in a folio queue segment
+ * @folioq: The segment to query
+ * @slot: The slot number of the folio to query
+ *
+ * Determine if the second mark is set for the folio in the specified slot in a
+ * folio queue segment.
+ */
static inline bool folioq_is_marked2(const struct folio_queue *folioq, unsigned int slot)
{
return test_bit(slot, &folioq->marks2);
}
+/**
+ * folioq_mark2: Set the second mark on a folio in a folio queue segment
+ * @folioq: The segment to modify
+ * @slot: The slot number of the folio to modify
+ *
+ * Set the second mark for the folio in the specified slot in a folio queue
+ * segment.
+ */
static inline void folioq_mark2(struct folio_queue *folioq, unsigned int slot)
{
set_bit(slot, &folioq->marks2);
}
+/**
+ * folioq_unmark2: Clear the second mark on a folio in a folio queue segment
+ * @folioq: The segment to modify
+ * @slot: The slot number of the folio to modify
+ *
+ * Clear the second mark for the folio in the specified slot in a folio queue
+ * segment.
+ */
static inline void folioq_unmark2(struct folio_queue *folioq, unsigned int slot)
{
clear_bit(slot, &folioq->marks2);
}
+/**
+ * folioq_is_marked3: Check third folio mark in a folio queue segment
+ * @folioq: The segment to query
+ * @slot: The slot number of the folio to query
+ *
+ * Determine if the third mark is set for the folio in the specified slot in a
+ * folio queue segment.
+ */
static inline bool folioq_is_marked3(const struct folio_queue *folioq, unsigned int slot)
{
return test_bit(slot, &folioq->marks3);
}
+/**
+ * folioq_mark3: Set the third mark on a folio in a folio queue segment
+ * @folioq: The segment to modify
+ * @slot: The slot number of the folio to modify
+ *
+ * Set the third mark for the folio in the specified slot in a folio queue
+ * segment.
+ */
static inline void folioq_mark3(struct folio_queue *folioq, unsigned int slot)
{
set_bit(slot, &folioq->marks3);
}
+/**
+ * folioq_unmark3: Clear the third mark on a folio in a folio queue segment
+ * @folioq: The segment to modify
+ * @slot: The slot number of the folio to modify
+ *
+ * Clear the third mark for the folio in the specified slot in a folio queue
+ * segment.
+ */
static inline void folioq_unmark3(struct folio_queue *folioq, unsigned int slot)
{
clear_bit(slot, &folioq->marks3);
@@ -111,6 +217,19 @@ static inline unsigned int __folio_order(struct folio *folio)
return folio->_flags_1 & 0xff;
}
+/**
+ * folioq_append: Add a folio to a folio queue segment
+ * @folioq: The segment to add to
+ * @folio: The folio to add
+ *
+ * Add a folio to the tail of the sequence in a folio queue segment, increasing
+ * the occupancy count and returning the slot number for the folio just added.
+ * The folio size is extracted and stored in the queue and the marks are left
+ * unmodified.
+ *
+ * Note that it's left up to the caller to check that the segment capacity will
+ * not be exceeded and to extend the queue.
+ */
static inline unsigned int folioq_append(struct folio_queue *folioq, struct folio *folio)
{
unsigned int slot = folioq->vec.nr++;
@@ -120,6 +239,19 @@ static inline unsigned int folioq_append(struct folio_queue *folioq, struct foli
return slot;
}
+/**
+ * folioq_append_mark: Add a folio to a folio queue segment
+ * @folioq: The segment to add to
+ * @folio: The folio to add
+ *
+ * Add a folio to the tail of the sequence in a folio queue segment, increasing
+ * the occupancy count and returning the slot number for the folio just added.
+ * The folio size is extracted and stored in the queue, the first mark is set
+ * and and the second and third marks are left unmodified.
+ *
+ * Note that it's left up to the caller to check that the segment capacity will
+ * not be exceeded and to extend the queue.
+ */
static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct folio *folio)
{
unsigned int slot = folioq->vec.nr++;
@@ -130,21 +262,57 @@ static inline unsigned int folioq_append_mark(struct folio_queue *folioq, struct
return slot;
}
+/**
+ * folioq_folio: Get a folio from a folio queue segment
+ * @folioq: The segment to access
+ * @slot: The folio slot to access
+ *
+ * Retrieve the folio in the specified slot from a folio queue segment. Note
+ * that no bounds check is made and if the slot hasn't been added into yet, the
+ * pointer will be undefined. If the slot has been cleared, NULL will be
+ * returned.
+ */
static inline struct folio *folioq_folio(const struct folio_queue *folioq, unsigned int slot)
{
return folioq->vec.folios[slot];
}
+/**
+ * folioq_folio_order: Get the order of a folio from a folio queue segment
+ * @folioq: The segment to access
+ * @slot: The folio slot to access
+ *
+ * Retrieve the order of the folio in the specified slot from a folio queue
+ * segment. Note that no bounds check is made and if the slot hasn't been
+ * added into yet, the order returned will be 0.
+ */
static inline unsigned int folioq_folio_order(const struct folio_queue *folioq, unsigned int slot)
{
return folioq->orders[slot];
}
+/**
+ * folioq_folio_size: Get the size of a folio from a folio queue segment
+ * @folioq: The segment to access
+ * @slot: The folio slot to access
+ *
+ * Retrieve the size of the folio in the specified slot from a folio queue
+ * segment. Note that no bounds check is made and if the slot hasn't been
+ * added into yet, the size returned will be PAGE_SIZE.
+ */
static inline size_t folioq_folio_size(const struct folio_queue *folioq, unsigned int slot)
{
return PAGE_SIZE << folioq_folio_order(folioq, slot);
}
+/**
+ * folioq_clear: Clear a folio from a folio queue segment
+ * @folioq: The segment to clear
+ * @slot: The folio slot to clear
+ *
+ * Clear a folio from a sequence in a folio queue segment and clear its marks.
+ * The occupancy count is left unchanged.
+ */
static inline void folioq_clear(struct folio_queue *folioq, unsigned int slot)
{
folioq->vec.folios[slot] = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 776298fbfcb4..7e29433c5ecc 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -45,6 +45,8 @@
#include <linux/slab.h>
#include <linux/maple_tree.h>
#include <linux/rw_hint.h>
+#include <linux/file_ref.h>
+#include <linux/unicode.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -623,6 +625,7 @@ is_uncached_acl(struct posix_acl *acl)
#define IOP_NOFOLLOW 0x0004
#define IOP_XATTR 0x0008
#define IOP_DEFAULT_READLINK 0x0010
+#define IOP_MGTIME 0x0020
/*
* Keep mostly read-only and often accessed (especially for
@@ -1005,7 +1008,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
/**
* struct file - Represents a file
- * @f_count: reference count
+ * @f_ref: reference count
* @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
* @f_mode: FMODE_* flags often used in hotpaths
* @f_op: file operations
@@ -1030,7 +1033,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
* @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
*/
struct file {
- atomic_long_t f_count;
+ file_ref_t f_ref;
spinlock_t f_lock;
fmode_t f_mode;
const struct file_operations *f_op;
@@ -1078,15 +1081,14 @@ struct file_handle {
static inline struct file *get_file(struct file *f)
{
- long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
- WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
+ file_ref_inc(&f->f_ref);
return f;
}
struct file *get_file_rcu(struct file __rcu **f);
struct file *get_file_active(struct file **f);
-#define file_count(x) atomic_long_read(&(x)->f_count)
+#define file_count(f) file_ref_read(&(f)->f_ref)
#define MAX_NON_LFS ((1UL<<31) - 1)
@@ -1229,6 +1231,7 @@ extern int send_sigurg(struct file *file);
#define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
+#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
/* Possible states of 'frozen' field */
enum {
@@ -1583,6 +1586,8 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
struct timespec64 current_time(struct inode *inode);
struct timespec64 inode_set_ctime_current(struct inode *inode);
+struct timespec64 inode_set_ctime_deleg(struct inode *inode,
+ struct timespec64 update);
static inline time64_t inode_get_atime_sec(const struct inode *inode)
{
@@ -1652,6 +1657,17 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode,
return inode_set_mtime_to_ts(inode, ts);
}
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ */
+#define I_CTIME_QUERIED ((u32)BIT(31))
+
static inline time64_t inode_get_ctime_sec(const struct inode *inode)
{
return inode->i_ctime_sec;
@@ -1659,7 +1675,7 @@ static inline time64_t inode_get_ctime_sec(const struct inode *inode)
static inline long inode_get_ctime_nsec(const struct inode *inode)
{
- return inode->i_ctime_nsec;
+ return inode->i_ctime_nsec & ~I_CTIME_QUERIED;
}
static inline struct timespec64 inode_get_ctime(const struct inode *inode)
@@ -1670,13 +1686,7 @@ static inline struct timespec64 inode_get_ctime(const struct inode *inode)
return ts;
}
-static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
- struct timespec64 ts)
-{
- inode->i_ctime_sec = ts.tv_sec;
- inode->i_ctime_nsec = ts.tv_nsec;
- return ts;
-}
+struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts);
/**
* inode_set_ctime - set the ctime in the inode
@@ -2115,6 +2125,8 @@ struct file_operations {
#define FOP_HUGE_PAGES ((__force fop_flags_t)(1 << 4))
/* Treat loff_t as unsigned (e.g., /dev/mem) */
#define FOP_UNSIGNED_OFFSET ((__force fop_flags_t)(1 << 5))
+/* Supports asynchronous lock callbacks */
+#define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6))
/* Wrap a directory iterator that needs exclusive inode access */
int wrap_directory_iterator(struct file *, struct dir_context *,
@@ -2541,6 +2553,7 @@ struct file_system_type {
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2564,6 +2577,17 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+ return inode->i_opflags & IOP_MGTIME;
+}
+
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2765,6 +2789,16 @@ extern struct filename *getname_flags(const char __user *, int);
extern struct filename *getname_uflags(const char __user *, int);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
+extern struct filename *__getname_maybe_null(const char __user *);
+static inline struct filename *getname_maybe_null(const char __user *name, int flags)
+{
+ if (!(flags & AT_EMPTY_PATH))
+ return getname(name);
+
+ if (!name)
+ return NULL;
+ return __getname_maybe_null(name);
+}
extern void putname(struct filename *name);
extern int finish_open(struct file *file, struct dentry *dentry,
@@ -3081,7 +3115,12 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
-extern int inode_init_always(struct super_block *, struct inode *);
+extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t);
+static inline int inode_init_always(struct super_block *sb, struct inode *inode)
+{
+ return inode_init_always_gfp(sb, inode, GFP_NOFS);
+}
+
extern void inode_init_once(struct inode *);
extern void address_space_init_once(struct address_space *mapping);
extern struct inode * igrab(struct inode *);
@@ -3233,7 +3272,6 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
-#define no_llseek NULL
extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
@@ -3321,6 +3359,7 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
void generic_fill_statx_atomic_writes(struct kstat *stat,
@@ -3451,6 +3490,54 @@ extern int generic_ci_match(const struct inode *parent,
const struct qstr *folded_name,
const u8 *de_name, u32 de_name_len);
+#if IS_ENABLED(CONFIG_UNICODE)
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str);
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name);
+
+/**
+ * generic_ci_validate_strict_name - Check if a given name is suitable
+ * for a directory
+ *
+ * This functions checks if the proposed filename is valid for the
+ * parent directory. That means that only valid UTF-8 filenames will be
+ * accepted for casefold directories from filesystems created with the
+ * strict encoding flag. That also means that any name will be
+ * accepted for directories that doesn't have casefold enabled, or
+ * aren't being strict with the encoding.
+ *
+ * @dir: inode of the directory where the new file will be created
+ * @name: name of the new file
+ *
+ * Return:
+ * * True: if the filename is suitable for this directory. It can be
+ * true if a given name is not suitable for a strict encoding
+ * directory, but the directory being used isn't strict
+ * * False if the filename isn't suitable for this directory. This only
+ * happens when a directory is casefolded and the filesystem is strict
+ * about its encoding.
+ */
+static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+{
+ if (!IS_CASEFOLDED(dir) || !sb_has_strict_encoding(dir->i_sb))
+ return true;
+
+ /*
+ * A casefold dir must have a encoding set, unless the filesystem
+ * is corrupted
+ */
+ if (WARN_ON_ONCE(!dir->i_sb->s_encoding))
+ return true;
+
+ return !utf8_validate(dir->i_sb->s_encoding, name);
+}
+#else
+static inline bool generic_ci_validate_strict_name(struct inode *dir, struct qstr *name)
+{
+ return true;
+}
+#endif
+
static inline bool sb_has_encoding(const struct super_block *sb)
{
#if IS_ENABLED(CONFIG_UNICODE)
@@ -3721,6 +3808,6 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
return !c;
}
-bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
+int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
#endif /* _LINUX_FS_H */
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index c13e99cbbf81..4b4bfef6f053 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -160,6 +160,12 @@ extern int get_tree_keyed(struct fs_context *fc,
int setup_bdev_super(struct super_block *sb, int sb_flags,
struct fs_context *fc);
+
+#define GET_TREE_BDEV_QUIET_LOOKUP 0x0001
+int get_tree_bdev_flags(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc), unsigned int flags);
+
extern int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h
index 6cf713a7e6c6..3cef566088fc 100644
--- a/include/linux/fs_parser.h
+++ b/include/linux/fs_parser.h
@@ -28,7 +28,8 @@ typedef int fs_param_type(struct p_log *,
*/
fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64,
fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev,
- fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid;
+ fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid,
+ fs_param_is_file_or_string;
/*
* Specification of the type of value a parameter wants.
@@ -133,6 +134,8 @@ static inline bool fs_validate_description(const char *name,
#define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL)
#define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL)
#define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL)
+#define fsparam_file_or_string(NAME, OPT) \
+ __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL)
#define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL)
#define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL)
diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h
index df25fffdc0ae..623ccfcbf39c 100644
--- a/include/linux/fsl/enetc_mdio.h
+++ b/include/linux/fsl/enetc_mdio.h
@@ -59,7 +59,8 @@ static inline int enetc_mdio_read_c45(struct mii_bus *bus, int phy_id,
static inline int enetc_mdio_write_c45(struct mii_bus *bus, int phy_id,
int devad, int regnum, u16 value)
{ return -EINVAL; }
-struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs)
+static inline struct enetc_hw *enetc_hw_alloc(struct device *dev,
+ void __iomem *port_regs)
{ return ERR_PTR(-EINVAL); }
#endif
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 083c860fd28e..c90ec889bfc2 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -436,7 +436,7 @@ void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev);
struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev,
u16 if_id);
-extern struct bus_type fsl_mc_bus_type;
+extern const struct bus_type fsl_mc_bus_type;
extern struct device_type fsl_mc_bus_dprc_type;
extern struct device_type fsl_mc_bus_dpni_type;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 8be029bc50b1..3ecf7768e577 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -217,7 +217,6 @@ struct fsnotify_group {
#define FSNOTIFY_GROUP_USER 0x01 /* user allocated group */
#define FSNOTIFY_GROUP_DUPS 0x02 /* allow multiple marks per object */
-#define FSNOTIFY_GROUP_NOFS 0x04 /* group lock is not direct reclaim safe */
int flags;
unsigned int owner_flags; /* stored flags of mark_mutex owner */
@@ -268,22 +267,19 @@ struct fsnotify_group {
static inline void fsnotify_group_lock(struct fsnotify_group *group)
{
mutex_lock(&group->mark_mutex);
- if (group->flags & FSNOTIFY_GROUP_NOFS)
- group->owner_flags = memalloc_nofs_save();
+ group->owner_flags = memalloc_nofs_save();
}
static inline void fsnotify_group_unlock(struct fsnotify_group *group)
{
- if (group->flags & FSNOTIFY_GROUP_NOFS)
- memalloc_nofs_restore(group->owner_flags);
+ memalloc_nofs_restore(group->owner_flags);
mutex_unlock(&group->mark_mutex);
}
static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
{
WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
- if (group->flags & FSNOTIFY_GROUP_NOFS)
- WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
+ WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
}
/* When calling fsnotify tell it if the data is a path or inode */
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index 3bb87bf6bc65..455f855bc084 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -59,6 +59,15 @@ enum hdmi_infoframe_type {
#define HDMI_DRM_INFOFRAME_SIZE 26
#define HDMI_VENDOR_INFOFRAME_SIZE 4
+/*
+ * HDMI 1.3a table 5-14 states that the largest InfoFrame_length is 27,
+ * not including the packet header or checksum byte. We include the
+ * checksum byte in HDMI_INFOFRAME_HEADER_SIZE, so this should allow
+ * HDMI_INFOFRAME_SIZE(MAX) to be the largest buffer we could ever need
+ * for any HDMI infoframe.
+ */
+#define HDMI_MAX_INFOFRAME_SIZE 27
+
#define HDMI_INFOFRAME_SIZE(type) \
(HDMI_INFOFRAME_HEADER_SIZE + HDMI_ ## type ## _INFOFRAME_SIZE)
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index 9d7754ad5e9b..6dbd0d49628f 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -229,6 +229,12 @@ struct hisi_qm_status {
struct hisi_qm;
+enum acc_err_result {
+ ACC_ERR_NONE,
+ ACC_ERR_NEED_RESET,
+ ACC_ERR_RECOVERED,
+};
+
struct hisi_qm_err_info {
char *acpi_rst;
u32 msi_wr_port;
@@ -257,9 +263,9 @@ struct hisi_qm_err_ini {
void (*close_axi_master_ooo)(struct hisi_qm *qm);
void (*open_sva_prefetch)(struct hisi_qm *qm);
void (*close_sva_prefetch)(struct hisi_qm *qm);
- void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
void (*show_last_dfx_regs)(struct hisi_qm *qm);
void (*err_info_init)(struct hisi_qm *qm);
+ enum acc_err_result (*get_err_result)(struct hisi_qm *qm);
};
struct hisi_qm_cap_info {
@@ -274,13 +280,25 @@ struct hisi_qm_cap_info {
u32 v3_val;
};
+struct hisi_qm_cap_query_info {
+ u32 type;
+ const char *name;
+ u32 offset;
+ u32 v1_val;
+ u32 v2_val;
+ u32 v3_val;
+};
+
struct hisi_qm_cap_record {
u32 type;
+ const char *name;
u32 cap_val;
};
struct hisi_qm_cap_tables {
+ u32 qm_cap_size;
struct hisi_qm_cap_record *qm_cap_table;
+ u32 dev_cap_size;
struct hisi_qm_cap_record *dev_cap_table;
};
@@ -436,37 +454,6 @@ struct hisi_qp {
struct uacce_queue *uacce_q;
};
-static inline int q_num_set(const char *val, const struct kernel_param *kp,
- unsigned int device)
-{
- struct pci_dev *pdev;
- u32 n, q_num;
- int ret;
-
- if (!val)
- return -EINVAL;
-
- pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL);
- if (!pdev) {
- q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
- pr_info("No device found currently, suppose queue number is %u\n",
- q_num);
- } else {
- if (pdev->revision == QM_HW_V1)
- q_num = QM_QNUM_V1;
- else
- q_num = QM_QNUM_V2;
-
- pci_dev_put(pdev);
- }
-
- ret = kstrtou32(val, 10, &n);
- if (ret || n < QM_MIN_QNUM || n > q_num)
- return -EINVAL;
-
- return param_set_int(val, kp);
-}
-
static inline int vfs_num_set(const char *val, const struct kernel_param *kp)
{
u32 n;
@@ -526,6 +513,8 @@ static inline void hisi_qm_del_list(struct hisi_qm *qm, struct hisi_qm_list *qm_
mutex_unlock(&qm_list->lock);
}
+int hisi_qm_q_num_set(const char *val, const struct kernel_param *kp,
+ unsigned int device);
int hisi_qm_init(struct hisi_qm *qm);
void hisi_qm_uninit(struct hisi_qm *qm);
int hisi_qm_start(struct hisi_qm *qm);
@@ -583,6 +572,9 @@ void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset);
u32 hisi_qm_get_hw_info(struct hisi_qm *qm,
const struct hisi_qm_cap_info *info_table,
u32 index, bool is_read);
+u32 hisi_qm_get_cap_value(struct hisi_qm *qm,
+ const struct hisi_qm_cap_query_info *info_table,
+ u32 index, bool is_read);
int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *dev_algs,
u32 dev_algs_size);
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 9c8119ed13a4..c4dde3aafcac 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -466,6 +466,7 @@ struct host1x_memory_context {
refcount_t ref;
struct pid *owner;
+ struct device_dma_parameters dma_parms;
struct device dev;
u64 dma_mask;
u32 stream_id;
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index aa1e65ccb615..7ef5f7ef31a9 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -228,32 +228,17 @@ static inline void hrtimer_cancel_wait_running(struct hrtimer *timer)
/* Initialize timers: */
extern void hrtimer_init(struct hrtimer *timer, clockid_t which_clock,
enum hrtimer_mode mode);
-extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
- enum hrtimer_mode mode);
+extern void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode);
+extern void hrtimer_setup_on_stack(struct hrtimer *timer,
+ enum hrtimer_restart (*function)(struct hrtimer *),
+ clockid_t clock_id, enum hrtimer_mode mode);
+extern void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id,
+ enum hrtimer_mode mode);
#ifdef CONFIG_DEBUG_OBJECTS_TIMERS
-extern void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t which_clock,
- enum hrtimer_mode mode);
-extern void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
- clockid_t clock_id,
- enum hrtimer_mode mode);
-
extern void destroy_hrtimer_on_stack(struct hrtimer *timer);
#else
-static inline void hrtimer_init_on_stack(struct hrtimer *timer,
- clockid_t which_clock,
- enum hrtimer_mode mode)
-{
- hrtimer_init(timer, which_clock, mode);
-}
-
-static inline void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
- clockid_t clock_id,
- enum hrtimer_mode mode)
-{
- hrtimer_init_sleeper(sl, clock_id, mode);
-}
-
static inline void destroy_hrtimer_on_stack(struct hrtimer *timer) { }
#endif
@@ -337,6 +322,28 @@ static inline int hrtimer_callback_running(struct hrtimer *timer)
return timer->base->running == timer;
}
+/**
+ * hrtimer_update_function - Update the timer's callback function
+ * @timer: Timer to update
+ * @function: New callback function
+ *
+ * Only safe to call if the timer is not enqueued. Can be called in the callback function if the
+ * timer is not enqueued at the same time (see the comments above HRTIMER_STATE_ENQUEUED).
+ */
+static inline void hrtimer_update_function(struct hrtimer *timer,
+ enum hrtimer_restart (*function)(struct hrtimer *))
+{
+ guard(raw_spinlock_irqsave)(&timer->base->cpu_base->lock);
+
+ if (WARN_ON_ONCE(hrtimer_is_queued(timer)))
+ return;
+
+ if (WARN_ON_ONCE(!function))
+ return;
+
+ timer->function = function;
+}
+
/* Forward a hrtimer so it expires after now: */
extern u64
hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 67d0ab3c3bba..ef5b80e48599 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -322,6 +322,24 @@ struct thpsize {
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
+static inline bool vma_thp_disabled(struct vm_area_struct *vma,
+ unsigned long vm_flags)
+{
+ /*
+ * Explicitly disabled through madvise or prctl, or some
+ * architectures may disable THP for some mappings, for
+ * example, s390 kvm.
+ */
+ return (vm_flags & VM_NOHUGEPAGE) ||
+ test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags);
+}
+
+static inline bool thp_disabled_by_hw(void)
+{
+ /* If the hardware/firmware marked hugepage support disabled. */
+ return transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED);
+}
+
unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long addr,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 98c47c394b89..e4697539b665 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -692,6 +692,9 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback);
+struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask);
+
int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
pgoff_t idx);
void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
@@ -1060,6 +1063,13 @@ static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
}
static inline struct folio *
+alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid,
+ nodemask_t *nmask, gfp_t gfp_mask)
+{
+ return NULL;
+}
+
+static inline struct folio *
alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask,
bool allow_alloc_fallback)
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 5c6a421ad580..3a63dff62d03 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -368,7 +368,9 @@ enum hwmon_intrusion_attributes {
/**
* struct hwmon_ops - hwmon device operations
- * @is_visible: Callback to return attribute visibility. Mandatory.
+ * @visible: Static visibility. If non-zero, 'is_visible' is ignored.
+ * @is_visible: Callback to return attribute visibility. Mandatory unless
+ * 'visible' is non-zero.
* Parameters are:
* @const void *drvdata:
* Pointer to driver-private data structure passed
@@ -412,6 +414,7 @@ enum hwmon_intrusion_attributes {
* The function returns 0 on success or a negative error number.
*/
struct hwmon_ops {
+ umode_t visible;
umode_t (*is_visible)(const void *drvdata, enum hwmon_sensor_types type,
u32 attr, int channel);
int (*read)(struct device *dev, enum hwmon_sensor_types type,
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 30cef3b940eb..456bca45ff05 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -20,7 +20,7 @@
#include <linux/etherdevice.h>
#include <linux/bitfield.h>
#include <asm/byteorder.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
/*
* DS bit usage
diff --git a/include/linux/iio/backend.h b/include/linux/iio/backend.h
index 8099759d7242..37d56914d485 100644
--- a/include/linux/iio/backend.h
+++ b/include/linux/iio/backend.h
@@ -3,6 +3,7 @@
#define _IIO_BACKEND_H_
#include <linux/types.h>
+#include <linux/iio/iio.h>
struct iio_chan_spec;
struct fwnode_handle;
@@ -17,11 +18,13 @@ enum iio_backend_data_type {
};
enum iio_backend_data_source {
- IIO_BACKEND_INTERNAL_CONTINUOS_WAVE,
+ IIO_BACKEND_INTERNAL_CONTINUOUS_WAVE,
IIO_BACKEND_EXTERNAL,
IIO_BACKEND_DATA_SOURCE_MAX
};
+#define iio_backend_debugfs_ptr(ptr) PTR_IF(IS_ENABLED(CONFIG_DEBUG_FS), ptr)
+
/**
* IIO_BACKEND_EX_INFO - Helper for an IIO extended channel attribute
* @_name: Attribute name
@@ -54,6 +57,8 @@ enum iio_backend_test_pattern {
IIO_BACKEND_NO_TEST_PATTERN,
/* modified prbs9 */
IIO_BACKEND_ADI_PRBS_9A = 32,
+ /* modified prbs23 */
+ IIO_BACKEND_ADI_PRBS_23A,
IIO_BACKEND_TEST_PATTERN_MAX
};
@@ -81,6 +86,9 @@ enum iio_backend_sample_trigger {
* @extend_chan_spec: Extend an IIO channel.
* @ext_info_set: Extended info setter.
* @ext_info_get: Extended info getter.
+ * @read_raw: Read a channel attribute from a backend device
+ * @debugfs_print_chan_status: Print channel status into a buffer.
+ * @debugfs_reg_access: Read or write register value of backend.
**/
struct iio_backend_ops {
int (*enable)(struct iio_backend *back);
@@ -113,11 +121,31 @@ struct iio_backend_ops {
const char *buf, size_t len);
int (*ext_info_get)(struct iio_backend *back, uintptr_t private,
const struct iio_chan_spec *chan, char *buf);
+ int (*read_raw)(struct iio_backend *back,
+ struct iio_chan_spec const *chan, int *val, int *val2,
+ long mask);
+ int (*debugfs_print_chan_status)(struct iio_backend *back,
+ unsigned int chan, char *buf,
+ size_t len);
+ int (*debugfs_reg_access)(struct iio_backend *back, unsigned int reg,
+ unsigned int writeval, unsigned int *readval);
+};
+
+/**
+ * struct iio_backend_info - info structure for an iio_backend
+ * @name: Backend name.
+ * @ops: Backend operations.
+ */
+struct iio_backend_info {
+ const char *name;
+ const struct iio_backend_ops *ops;
};
int iio_backend_chan_enable(struct iio_backend *back, unsigned int chan);
int iio_backend_chan_disable(struct iio_backend *back, unsigned int chan);
int devm_iio_backend_enable(struct device *dev, struct iio_backend *back);
+int iio_backend_enable(struct iio_backend *back);
+void iio_backend_disable(struct iio_backend *back);
int iio_backend_data_format_set(struct iio_backend *back, unsigned int chan,
const struct iio_backend_data_fmt *data);
int iio_backend_data_source_set(struct iio_backend *back, unsigned int chan,
@@ -141,17 +169,41 @@ ssize_t iio_backend_ext_info_set(struct iio_dev *indio_dev, uintptr_t private,
const char *buf, size_t len);
ssize_t iio_backend_ext_info_get(struct iio_dev *indio_dev, uintptr_t private,
const struct iio_chan_spec *chan, char *buf);
-
-int iio_backend_extend_chan_spec(struct iio_dev *indio_dev,
- struct iio_backend *back,
+int iio_backend_read_raw(struct iio_backend *back,
+ struct iio_chan_spec const *chan, int *val, int *val2,
+ long mask);
+int iio_backend_extend_chan_spec(struct iio_backend *back,
struct iio_chan_spec *chan);
void *iio_backend_get_priv(const struct iio_backend *conv);
struct iio_backend *devm_iio_backend_get(struct device *dev, const char *name);
+struct iio_backend *devm_iio_backend_fwnode_get(struct device *dev,
+ const char *name,
+ struct fwnode_handle *fwnode);
struct iio_backend *
__devm_iio_backend_get_from_fwnode_lookup(struct device *dev,
struct fwnode_handle *fwnode);
int devm_iio_backend_register(struct device *dev,
- const struct iio_backend_ops *ops, void *priv);
+ const struct iio_backend_info *info, void *priv);
+
+static inline int iio_backend_read_scale(struct iio_backend *back,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2)
+{
+ return iio_backend_read_raw(back, chan, val, val2, IIO_CHAN_INFO_SCALE);
+}
+
+static inline int iio_backend_read_offset(struct iio_backend *back,
+ struct iio_chan_spec const *chan,
+ int *val, int *val2)
+{
+ return iio_backend_read_raw(back, chan, val, val2,
+ IIO_CHAN_INFO_OFFSET);
+}
+ssize_t iio_backend_debugfs_print_chan_status(struct iio_backend *back,
+ unsigned int chan, char *buf,
+ size_t len);
+void iio_backend_debugfs_add(struct iio_backend *back,
+ struct iio_dev *indio_dev);
#endif
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 894309294182..18779b631e90 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -609,7 +609,7 @@ struct iio_dev {
int scan_bytes;
const unsigned long *available_scan_masks;
- unsigned masklength;
+ unsigned __private masklength;
const unsigned long *active_scan_mask;
bool scan_timestamp;
struct iio_trigger *trig;
@@ -810,6 +810,23 @@ static inline struct dentry *iio_get_debugfs_dentry(struct iio_dev *indio_dev)
}
#endif
+/**
+ * iio_device_suspend_triggering() - suspend trigger attached to an iio_dev
+ * @indio_dev: iio_dev associated with the device that will have triggers suspended
+ *
+ * Return 0 if successful, negative otherwise
+ **/
+int iio_device_suspend_triggering(struct iio_dev *indio_dev);
+
+/**
+ * iio_device_resume_triggering() - resume trigger attached to an iio_dev
+ * that was previously suspended with iio_device_suspend_triggering()
+ * @indio_dev: iio_dev associated with the device that will have triggers resumed
+ *
+ * Return 0 if successful, negative otherwise
+ **/
+int iio_device_resume_triggering(struct iio_dev *indio_dev);
+
#ifdef CONFIG_ACPI
bool iio_read_acpi_mount_matrix(struct device *dev,
struct iio_mount_matrix *orientation,
@@ -855,6 +872,26 @@ static inline const struct iio_scan_type
return &chan->scan_type;
}
+/**
+ * iio_get_masklength - Get length of the channels mask
+ * @indio_dev: the IIO device to get the masklength for
+ */
+static inline unsigned int iio_get_masklength(const struct iio_dev *indio_dev)
+{
+ return ACCESS_PRIVATE(indio_dev, masklength);
+}
+
+int iio_active_scan_mask_index(struct iio_dev *indio_dev);
+
+/**
+ * iio_for_each_active_channel - Iterated over active channels
+ * @indio_dev: the IIO device
+ * @chan: Holds the index of the enabled channel
+ */
+#define iio_for_each_active_channel(indio_dev, chan) \
+ for_each_set_bit((chan), (indio_dev)->active_scan_mask, \
+ iio_get_masklength(indio_dev))
+
ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals);
int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer,
diff --git a/include/linux/input.h b/include/linux/input.h
index 89a0be6ee0e2..cd866b020a01 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -339,12 +339,16 @@ struct input_handler {
* @name: name given to the handle by handler that created it
* @dev: input device the handle is attached to
* @handler: handler that works with the device through this handle
+ * @handle_events: event sequence handler. It is set up by the input core
+ * according to event handling method specified in the @handler. See
+ * input_handle_setup_event_handler().
+ * This method is being called by the input core with interrupts disabled
+ * and dev->event_lock spinlock held and so it may not sleep.
* @d_node: used to put the handle on device's list of attached handles
* @h_node: used to put the handle on handler's list of handles from which
* it gets events
*/
struct input_handle {
-
void *private;
int open;
@@ -353,6 +357,10 @@ struct input_handle {
struct input_dev *dev;
struct input_handler *handler;
+ unsigned int (*handle_events)(struct input_handle *handle,
+ struct input_value *vals,
+ unsigned int count);
+
struct list_head d_node;
struct list_head h_node;
};
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 457151f9f263..8cd9327e4e78 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -616,6 +616,53 @@ extern void __raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+/*
+ * With forced-threaded interrupts enabled a raised softirq is deferred to
+ * ksoftirqd unless it can be handled within the threaded interrupt. This
+ * affects timer_list timers and hrtimers which are explicitly marked with
+ * HRTIMER_MODE_SOFT.
+ * With PREEMPT_RT enabled more hrtimers are moved to softirq for processing
+ * which includes all timers which are not explicitly marked HRTIMER_MODE_HARD.
+ * Userspace controlled timers (like the clock_nanosleep() interface) is divided
+ * into two categories: Tasks with elevated scheduling policy including
+ * SCHED_{FIFO|RR|DL} and the remaining scheduling policy. The tasks with the
+ * elevated scheduling policy are woken up directly from the HARDIRQ while all
+ * other wake ups are delayed to softirq and so to ksoftirqd.
+ *
+ * The ksoftirqd runs at SCHED_OTHER policy at which it should remain since it
+ * handles the softirq in an overloaded situation (not handled everything
+ * within its last run).
+ * If the timers are handled at SCHED_OTHER priority then they competes with all
+ * other SCHED_OTHER tasks for CPU resources are possibly delayed.
+ * Moving timers softirqs to a low priority SCHED_FIFO thread instead ensures
+ * that timer are performed before scheduling any SCHED_OTHER thread.
+ */
+DECLARE_PER_CPU(struct task_struct *, ktimerd);
+DECLARE_PER_CPU(unsigned long, pending_timer_softirq);
+void raise_ktimers_thread(unsigned int nr);
+
+static inline unsigned int local_timers_pending_force_th(void)
+{
+ return __this_cpu_read(pending_timer_softirq);
+}
+
+static inline void raise_timer_softirq(unsigned int nr)
+{
+ lockdep_assert_in_irq();
+ if (force_irqthreads())
+ raise_ktimers_thread(nr);
+ else
+ __raise_softirq_irqoff(nr);
+}
+
+static inline unsigned int local_timers_pending(void)
+{
+ if (force_irqthreads())
+ return local_timers_pending_force_th();
+ else
+ return local_softirq_pending();
+}
+
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
static inline struct task_struct *this_cpu_ksoftirqd(void)
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c189d36ad55e..578a3fdf5c71 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
{
- return cmd_to_io_kiocb(cmd)->task;
+ return cmd_to_io_kiocb(cmd)->tctx->task;
}
#endif /* _LINUX_IO_URING_CMD_H */
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 4b9ba523978d..593c10a02144 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -37,6 +37,7 @@ enum io_uring_cmd_flags {
/* set when uring wants to cancel a previously issued command */
IO_URING_F_CANCEL = (1 << 11),
IO_URING_F_COMPAT = (1 << 12),
+ IO_URING_F_TASK_DEAD = (1 << 13),
};
struct io_wq_work_node {
@@ -55,19 +56,18 @@ struct io_wq_work {
int cancel_seq;
};
-struct io_fixed_file {
- /* file * with additional FFS_* flags */
- unsigned long file_ptr;
+struct io_rsrc_data {
+ unsigned int nr;
+ struct io_rsrc_node **nodes;
};
struct io_file_table {
- struct io_fixed_file *files;
+ struct io_rsrc_data data;
unsigned long *bitmap;
unsigned int alloc_hint;
};
struct io_hash_bucket {
- spinlock_t lock;
struct hlist_head list;
} ____cacheline_aligned_in_smp;
@@ -76,6 +76,12 @@ struct io_hash_table {
unsigned hash_bits;
};
+struct io_mapped_region {
+ struct page **pages;
+ void *vmap_ptr;
+ size_t nr_pages;
+};
+
/*
* Arbitrary limit, can be raised if need be
*/
@@ -85,6 +91,7 @@ struct io_uring_task {
/* submission side */
int cached_refs;
const struct io_ring_ctx *last;
+ struct task_struct *task;
struct io_wq *io_wq;
struct file *registered_rings[IO_RINGFD_REG_MAX];
@@ -270,7 +277,6 @@ struct io_ring_ctx {
* Fixed resources fast path, should be accessed only under
* uring_lock, and updated through io_uring_register(2)
*/
- struct io_rsrc_node *rsrc_node;
atomic_t cancel_seq;
/*
@@ -283,15 +289,13 @@ struct io_ring_ctx {
struct io_wq_work_list iopoll_list;
struct io_file_table file_table;
- struct io_mapped_ubuf **user_bufs;
- unsigned nr_user_files;
- unsigned nr_user_bufs;
+ struct io_rsrc_data buf_table;
struct io_submit_state submit_state;
struct xarray io_bl_xa;
- struct io_hash_table cancel_table_locked;
+ struct io_hash_table cancel_table;
struct io_alloc_cache apoll_cache;
struct io_alloc_cache netmsg_cache;
struct io_alloc_cache rw_cache;
@@ -302,6 +306,11 @@ struct io_ring_ctx {
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
*/
struct hlist_head cancelable_uring_cmd;
+ /*
+ * For Hybrid IOPOLL, runtime in hybrid polling, without
+ * scheduling time
+ */
+ u64 hybrid_poll_time;
} ____cacheline_aligned_in_smp;
struct {
@@ -316,6 +325,9 @@ struct io_ring_ctx {
unsigned cq_entries;
struct io_ev_fd __rcu *io_ev_fd;
unsigned cq_extra;
+
+ void *cq_wait_arg;
+ size_t cq_wait_size;
} ____cacheline_aligned_in_smp;
/*
@@ -342,7 +354,6 @@ struct io_ring_ctx {
struct list_head io_buffers_comp;
struct list_head cq_overflow_list;
- struct io_hash_table cancel_table;
struct hlist_head waitid_list;
@@ -366,16 +377,6 @@ struct io_ring_ctx {
struct wait_queue_head poll_wq;
struct io_restriction restrictions;
- /* slow path rsrc auxilary data, used by update/register */
- struct io_rsrc_data *file_data;
- struct io_rsrc_data *buf_data;
-
- /* protected by ->uring_lock */
- struct list_head rsrc_ref_list;
- struct io_alloc_cache rsrc_node_cache;
- struct wait_queue_head rsrc_quiesce_wq;
- unsigned rsrc_quiesce;
-
u32 pers_next;
struct xarray personalities;
@@ -409,7 +410,7 @@ struct io_ring_ctx {
/* napi busy poll default timeout */
ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll;
- bool napi_enabled;
+ u8 napi_track_mode;
DECLARE_HASHTABLE(napi_ht, 4);
#endif
@@ -418,6 +419,13 @@ struct io_ring_ctx {
unsigned evfd_last_cq_tail;
/*
+ * Protection for resize vs mmap races - both the mmap and resize
+ * side will need to grab this lock, to prevent either side from
+ * being run concurrently with the other.
+ */
+ struct mutex resize_lock;
+
+ /*
* If IORING_SETUP_NO_MMAP is used, then the below holds
* the gup'ed pages for the two rings, and the sqes.
*/
@@ -425,6 +433,9 @@ struct io_ring_ctx {
unsigned short n_sqe_pages;
struct page **ring_pages;
struct page **sqe_pages;
+
+ /* used for optimised request parameter and wait argument passing */
+ struct io_mapped_region param_region;
};
struct io_tw_state {
@@ -447,6 +458,7 @@ enum {
REQ_F_LINK_TIMEOUT_BIT,
REQ_F_NEED_CLEANUP_BIT,
REQ_F_POLLED_BIT,
+ REQ_F_HYBRID_IOPOLL_STATE_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_BUFFER_RING_BIT,
REQ_F_REISSUE_BIT,
@@ -459,7 +471,6 @@ enum {
REQ_F_DOUBLE_POLL_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
REQ_F_CLEAR_POLLIN_BIT,
- REQ_F_HASH_LOCKED_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_ISREG_BIT,
@@ -468,6 +479,7 @@ enum {
REQ_F_BL_EMPTY_BIT,
REQ_F_BL_NO_RECYCLE_BIT,
REQ_F_BUFFERS_COMMIT_BIT,
+ REQ_F_BUF_NODE_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -506,6 +518,8 @@ enum {
REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
/* already went through poll handler */
REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
+ /* every req only blocks once in hybrid poll */
+ REQ_F_IOPOLL_STATE = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT),
/* buffer already selected */
REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
/* buffer selected from ring, needs commit */
@@ -534,8 +548,6 @@ enum {
REQ_F_APOLL_MULTISHOT = IO_REQ_FLAG(REQ_F_APOLL_MULTISHOT_BIT),
/* recvmsg special flag, clear EPOLLIN */
REQ_F_CLEAR_POLLIN = IO_REQ_FLAG(REQ_F_CLEAR_POLLIN_BIT),
- /* hashed into ->cancel_hash_locked, protected by ->uring_lock */
- REQ_F_HASH_LOCKED = IO_REQ_FLAG(REQ_F_HASH_LOCKED_BIT),
/* don't use lazy poll wake for this request */
REQ_F_POLL_NO_LAZY = IO_REQ_FLAG(REQ_F_POLL_NO_LAZY_BIT),
/* file is pollable */
@@ -546,6 +558,8 @@ enum {
REQ_F_BL_NO_RECYCLE = IO_REQ_FLAG(REQ_F_BL_NO_RECYCLE_BIT),
/* buffer ring head needs incrementing on put */
REQ_F_BUFFERS_COMMIT = IO_REQ_FLAG(REQ_F_BUFFERS_COMMIT_BIT),
+ /* buf node is valid */
+ REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
@@ -615,12 +629,9 @@ struct io_kiocb {
struct io_cqe cqe;
struct io_ring_ctx *ctx;
- struct task_struct *task;
+ struct io_uring_task *tctx;
union {
- /* store used ubuf, so we can prevent reloading */
- struct io_mapped_ubuf *imu;
-
/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
struct io_buffer *kbuf;
@@ -629,6 +640,8 @@ struct io_kiocb {
* REQ_F_BUFFER_RING is set.
*/
struct io_buffer_list *buf_list;
+
+ struct io_rsrc_node *buf_node;
};
union {
@@ -638,13 +651,20 @@ struct io_kiocb {
__poll_t apoll_events;
};
- struct io_rsrc_node *rsrc_node;
+ struct io_rsrc_node *file_node;
atomic_t refs;
bool cancel_seq_set;
struct io_task_work io_task_work;
- /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
- struct hlist_node hash_node;
+ union {
+ /*
+ * for polled requests, i.e. IORING_OP_POLL_ADD and async armed
+ * poll
+ */
+ struct hlist_node hash_node;
+ /* For IOPOLL setup queues, with hybrid polling */
+ u64 iopoll_start;
+ };
/* internal polling, see IORING_FEAT_FAST_POLL */
struct async_poll *apoll;
/* opcode allocated if it needs to store data for async defer */
@@ -667,4 +687,9 @@ struct io_overflow_cqe {
struct io_uring_cqe cqe;
};
+static inline bool io_ctx_cqe32(struct io_ring_ctx *ctx)
+{
+ return ctx->flags & IORING_SETUP_CQE32;
+}
+
#endif
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 4ad12a3c8bae..27048ec10e1c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -178,6 +178,7 @@ struct iomap_folio_ops {
#else
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
+#define IOMAP_ATOMIC (1 << 9)
struct iomap_ops {
/*
@@ -256,6 +257,39 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
return &i->iomap;
}
+/*
+ * Return the file offset for the first unchanged block after a short write.
+ *
+ * If nothing was written, round @pos down to point at the first block in
+ * the range, else round up to include the partially written block.
+ */
+static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos,
+ ssize_t written)
+{
+ if (unlikely(!written))
+ return round_down(pos, i_blocksize(inode));
+ return round_up(pos + written, i_blocksize(inode));
+}
+
+/*
+ * Check if the range needs to be unshared for a FALLOC_FL_UNSHARE_RANGE
+ * operation.
+ *
+ * Don't bother with blocks that are not shared to start with; or mappings that
+ * cannot be shared, such as inline data, delalloc reservations, holes or
+ * unwritten extents.
+ *
+ * Note that we use srcmap directly instead of iomap_iter_srcmap as unsharing
+ * requires providing a separate source map, and the presence of one is a good
+ * indicator that unsharing is needed, unlike IOMAP_F_SHARED which can be set
+ * for any data that goes into the COW fork for XFS.
+ */
+static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter)
+{
+ return (iter->iomap.flags & IOMAP_F_SHARED) &&
+ iter->srcmap.type == IOMAP_MAPPED;
+}
+
ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops, void *private);
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
@@ -276,9 +310,9 @@ vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length,
struct iomap *iomap);
-void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos,
- loff_t length, ssize_t written, unsigned flag,
- struct iomap *iomap, iomap_punch_t punch);
+void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte,
+ loff_t end_byte, unsigned flags, struct iomap *iomap,
+ iomap_punch_t punch);
int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len, const struct iomap_ops *ops);
diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index 19a7b00baff4..91324c331a4b 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -19,19 +19,19 @@
* @op: accessor function (takes @args as its arguments)
* @val: Variable to read the value into
* @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- * tight-loops). Should be less than ~20ms since usleep_range
- * is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ * read usleep_range() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
* @sleep_before_read: if it is true, sleep @sleep_us before read.
* @args: arguments for @op poll
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @args is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
* When available, you'll probably want to use one of the specialized
* macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
*/
#define read_poll_timeout(op, val, cond, sleep_us, timeout_us, \
sleep_before_read, args...) \
@@ -64,22 +64,22 @@
* @op: accessor function (takes @args as its arguments)
* @val: Variable to read the value into
* @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops). Should
- * be less than ~10us since udelay is used (see
- * Documentation/timers/timers-howto.rst).
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ * read udelay() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
* @delay_before_read: if it is true, delay @delay_us before read.
* @args: arguments for @op poll
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @args is stored in @val.
- *
* This macro does not rely on timekeeping. Hence it is safe to call even when
* timekeeping is suspended, at the expense of an underestimation of wall clock
* time, which is rather minimal with a non-zero delay_us.
*
* When available, you'll probably want to use one of the specialized
* macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val.
*/
#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \
delay_before_read, args...) \
@@ -119,17 +119,17 @@
* @addr: Address to poll
* @val: Variable to read the value into
* @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- * tight-loops). Should be less than ~20ms since usleep_range
- * is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ * read usleep_range() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val. Must not
- * be called from atomic context if sleep_us or timeout_us are used.
- *
* When available, you'll probably want to use one of the specialized
* macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val. Must not
+ * be called from atomic context if sleep_us or timeout_us are used.
*/
#define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \
read_poll_timeout(op, val, cond, sleep_us, timeout_us, false, addr)
@@ -140,16 +140,16 @@
* @addr: Address to poll
* @val: Variable to read the value into
* @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops). Should
- * be less than ~10us since udelay is used (see
- * Documentation/timers/timers-howto.rst).
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ * read udelay() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
- * case, the last read value at @addr is stored in @val.
- *
* When available, you'll probably want to use one of the specialized
* macros defined below rather than this macro directly.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @addr is stored in @val.
*/
#define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr)
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index ecabed6d3307..7f1f11a5e4e4 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -66,10 +66,12 @@ struct its_vpe {
bool enabled;
bool group;
} sgi_config[16];
- atomic_t vmapp_count;
};
};
+ /* Track the VPE being mapped */
+ atomic_t vmapp_count;
+
/*
* Ensures mutual exclusion between affinity setting of the
* vPE and vLPI operations using vpe->col_idx.
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 3f003d5fde53..57b074e0cfbb 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -18,6 +18,8 @@
#include <asm/irqflags.h>
#include <asm/percpu.h>
+struct task_struct;
+
/* Currently lockdep_softirqs_on/off is used only by lockdep */
#ifdef CONFIG_PROVE_LOCKING
extern void lockdep_softirqs_on(unsigned long ip);
@@ -25,12 +27,16 @@
extern void lockdep_hardirqs_on_prepare(void);
extern void lockdep_hardirqs_on(unsigned long ip);
extern void lockdep_hardirqs_off(unsigned long ip);
+ extern void lockdep_cleanup_dead_cpu(unsigned int cpu,
+ struct task_struct *idle);
#else
static inline void lockdep_softirqs_on(unsigned long ip) { }
static inline void lockdep_softirqs_off(unsigned long ip) { }
static inline void lockdep_hardirqs_on_prepare(void) { }
static inline void lockdep_hardirqs_on(unsigned long ip) { }
static inline void lockdep_hardirqs_off(unsigned long ip) { }
+ static inline void lockdep_cleanup_dead_cpu(unsigned int cpu,
+ struct task_struct *idle) {}
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 3496baa0b07f..e97206c721a0 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -5,30 +5,36 @@
#include <uapi/linux/irqnr.h>
-extern int nr_irqs;
+unsigned int irq_get_nr_irqs(void) __pure;
+unsigned int irq_set_nr_irqs(unsigned int nr);
extern struct irq_desc *irq_to_desc(unsigned int irq);
unsigned int irq_get_next_irq(unsigned int offset);
-# define for_each_irq_desc(irq, desc) \
- for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \
- irq++, desc = irq_to_desc(irq)) \
- if (!desc) \
- ; \
- else
-
+#define for_each_irq_desc(irq, desc) \
+ for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \
+ __nr_irqs__ = 0) \
+ for (irq = 0, desc = irq_to_desc(irq); irq < __nr_irqs__; \
+ irq++, desc = irq_to_desc(irq)) \
+ if (!desc) \
+ ; \
+ else
# define for_each_irq_desc_reverse(irq, desc) \
- for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0; \
- irq--, desc = irq_to_desc(irq)) \
+ for (irq = irq_get_nr_irqs() - 1, desc = irq_to_desc(irq); \
+ irq >= 0; irq--, desc = irq_to_desc(irq)) \
if (!desc) \
; \
else
-# define for_each_active_irq(irq) \
- for (irq = irq_get_next_irq(0); irq < nr_irqs; \
- irq = irq_get_next_irq(irq + 1))
+#define for_each_active_irq(irq) \
+ for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \
+ __nr_irqs__ = 0) \
+ for (irq = irq_get_next_irq(0); irq < __nr_irqs__; \
+ irq = irq_get_next_irq(irq + 1))
-#define for_each_irq_nr(irq) \
- for (irq = 0; irq < nr_irqs; irq++)
+#define for_each_irq_nr(irq) \
+ for (unsigned int __nr_irqs__ = irq_get_nr_irqs(); __nr_irqs__; \
+ __nr_irqs__ = 0) \
+ for (irq = 0; irq < __nr_irqs__; irq++)
#endif
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 8aef9bb6ad57..50f7ea8714bf 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1796,22 +1796,21 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal)
static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
const void *address, unsigned int length)
{
- struct {
- struct shash_desc shash;
- char ctx[JBD_MAX_CHECKSUM_SIZE];
- } desc;
+ DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx,
+ DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE,
+ sizeof(*((struct shash_desc *)0)->__ctx)));
int err;
BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) >
JBD_MAX_CHECKSUM_SIZE);
- desc.shash.tfm = journal->j_chksum_driver;
- *(u32 *)desc.ctx = crc;
+ desc->tfm = journal->j_chksum_driver;
+ *(u32 *)desc->__ctx = crc;
- err = crypto_shash_update(&desc.shash, address, length);
+ err = crypto_shash_update(desc, address, length);
BUG_ON(err);
- return *(u32 *)desc.ctx;
+ return *(u32 *)desc->__ctx;
}
/* Return most recent uncommitted transaction */
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 1220f0fbe5bf..ed945f42e064 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -502,7 +502,7 @@ static inline unsigned long _msecs_to_jiffies(const unsigned int m)
* - all other values are converted to jiffies by either multiplying
* the input value by a factor or dividing it with a factor and
* handling any 32-bit overflows.
- * for the details see __msecs_to_jiffies()
+ * for the details see _msecs_to_jiffies()
*
* msecs_to_jiffies() checks for the passed in value being a constant
* via __builtin_constant_p() allowing gcc to eliminate most of the
@@ -526,6 +526,19 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m)
}
}
+/**
+ * secs_to_jiffies: - convert seconds to jiffies
+ * @_secs: time in seconds
+ *
+ * Conversion is done by simple multiplication with HZ
+ *
+ * secs_to_jiffies() is defined as a macro rather than a static inline
+ * function so it can be used in static initializers.
+ *
+ * Return: jiffies value
+ */
+#define secs_to_jiffies(_secs) ((_secs) * HZ)
+
extern unsigned long __usecs_to_jiffies(const unsigned int u);
#if !(USEC_PER_SEC % HZ)
static inline unsigned long _usecs_to_jiffies(const unsigned int u)
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 5fcbc254d186..8c4f3bb24429 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -269,15 +269,6 @@ extern unsigned long __stop_kprobe_blacklist[];
extern struct kretprobe_blackpoint kretprobe_blacklist[];
-#ifdef CONFIG_KPROBES_SANITY_TEST
-extern int init_test_probes(void);
-#else /* !CONFIG_KPROBES_SANITY_TEST */
-static inline int init_test_probes(void)
-{
- return 0;
-}
-#endif /* CONFIG_KPROBES_SANITY_TEST */
-
extern int arch_prepare_kprobe(struct kprobe *p);
extern void arch_arm_kprobe(struct kprobe *p);
extern void arch_disarm_kprobe(struct kprobe *p);
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 11690dacd986..ec9c05044d4f 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -54,12 +54,11 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm)
return atomic_long_read(&mm->ksm_zero_pages);
}
-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
+ /* Adding mm to ksm is best effort on fork. */
if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
- return __ksm_enter(mm);
-
- return 0;
+ __ksm_enter(mm);
}
static inline int ksm_execve(struct mm_struct *mm)
@@ -107,9 +106,8 @@ static inline int ksm_disable(struct mm_struct *mm)
return 0;
}
-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
- return 0;
}
static inline int ksm_execve(struct mm_struct *mm)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0d5125a3e31a..45be36e5285f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1313,8 +1313,6 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn);
-kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
-kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
@@ -1529,8 +1527,22 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
#endif
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-int kvm_arch_hardware_enable(void);
-void kvm_arch_hardware_disable(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization() are called on one CPU, under
+ * kvm_usage_lock, immediately after/before 0=>1 and 1=>0 transitions of
+ * kvm_usage_count, i.e. at the beginning of the generic hardware enabling
+ * sequence, and at the end of the generic hardware disabling sequence.
+ */
+void kvm_arch_enable_virtualization(void);
+void kvm_arch_disable_virtualization(void);
+/*
+ * kvm_arch_{enable,disable}_virtualization_cpu() are called on "every" CPU to
+ * do the actual twiddling of hardware bits. The hooks are called on all
+ * online CPUs when KVM enables/disabled virtualization, and on a single CPU
+ * when that CPU is onlined/offlined (including for Resume/Suspend).
+ */
+int kvm_arch_enable_virtualization_cpu(void);
+void kvm_arch_disable_virtualization_cpu(void);
#endif
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 9b4a6ff03235..c1a85d46eba6 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -270,9 +270,7 @@ enum {
/* bits 24:31 of host->flags are reserved for LLD specific flags */
- /* various lengths of time */
- ATA_TMOUT_BOOT = 30000, /* heuristic */
- ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */
+ /* Various lengths of time */
ATA_TMOUT_INTERNAL_QUICK = 5000,
ATA_TMOUT_MAX_PARK = 30000,
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 217f7abf2cbf..67964dc4db95 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -173,7 +173,7 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name,
(lock)->dep_map.lock_type)
#define lockdep_set_subclass(lock, sub) \
- lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\
+ lockdep_init_map_type(&(lock)->dep_map, (lock)->dep_map.name, (lock)->dep_map.key, sub,\
(lock)->dep_map.wait_type_inner, \
(lock)->dep_map.wait_type_outer, \
(lock)->dep_map.lock_type)
diff --git a/include/linux/lsm/apparmor.h b/include/linux/lsm/apparmor.h
new file mode 100644
index 000000000000..612cbfacb072
--- /dev/null
+++ b/include/linux/lsm/apparmor.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * AppArmor presents single pointer to an aa_label structure.
+ */
+#ifndef __LINUX_LSM_APPARMOR_H
+#define __LINUX_LSM_APPARMOR_H
+
+struct aa_label;
+
+struct lsm_prop_apparmor {
+#ifdef CONFIG_SECURITY_APPARMOR
+ struct aa_label *label;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_APPARMOR_H */
diff --git a/include/linux/lsm/bpf.h b/include/linux/lsm/bpf.h
new file mode 100644
index 000000000000..8106e206fcef
--- /dev/null
+++ b/include/linux/lsm/bpf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * BPF may present a single u32 value.
+ */
+#ifndef __LINUX_LSM_BPF_H
+#define __LINUX_LSM_BPF_H
+#include <linux/types.h>
+
+struct lsm_prop_bpf {
+#ifdef CONFIG_BPF_LSM
+ u32 secid;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_BPF_H */
diff --git a/include/linux/lsm/selinux.h b/include/linux/lsm/selinux.h
new file mode 100644
index 000000000000..9455a6b5b910
--- /dev/null
+++ b/include/linux/lsm/selinux.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * SELinux presents a single u32 value which is known as a secid.
+ */
+#ifndef __LINUX_LSM_SELINUX_H
+#define __LINUX_LSM_SELINUX_H
+#include <linux/types.h>
+
+struct lsm_prop_selinux {
+#ifdef CONFIG_SECURITY_SELINUX
+ u32 secid;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_SELINUX_H */
diff --git a/include/linux/lsm/smack.h b/include/linux/lsm/smack.h
new file mode 100644
index 000000000000..ff730dd7a734
--- /dev/null
+++ b/include/linux/lsm/smack.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module interface to other subsystems.
+ * Smack presents a pointer into the global Smack label list.
+ */
+#ifndef __LINUX_LSM_SMACK_H
+#define __LINUX_LSM_SMACK_H
+
+struct smack_known;
+
+struct lsm_prop_smack {
+#ifdef CONFIG_SECURITY_SMACK
+ struct smack_known *skp;
+#endif
+};
+
+#endif /* ! __LINUX_LSM_SMACK_H */
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 1d59513bf230..eb2937599cb0 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -176,7 +176,8 @@ LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode,
const char *name, const void *value, size_t size, int flags)
LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer,
size_t buffer_size)
-LSM_HOOK(void, LSM_RET_VOID, inode_getsecid, struct inode *inode, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, inode_getlsmprop, struct inode *inode,
+ struct lsm_prop *prop)
LSM_HOOK(int, 0, inode_copy_up, struct dentry *src, struct cred **new)
LSM_HOOK(int, -EOPNOTSUPP, inode_copy_up_xattr, struct dentry *src,
const char *name)
@@ -217,6 +218,8 @@ LSM_HOOK(int, 0, cred_prepare, struct cred *new, const struct cred *old,
LSM_HOOK(void, LSM_RET_VOID, cred_transfer, struct cred *new,
const struct cred *old)
LSM_HOOK(void, LSM_RET_VOID, cred_getsecid, const struct cred *c, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, cred_getlsmprop, const struct cred *c,
+ struct lsm_prop *prop)
LSM_HOOK(int, 0, kernel_act_as, struct cred *new, u32 secid)
LSM_HOOK(int, 0, kernel_create_files_as, struct cred *new, struct inode *inode)
LSM_HOOK(int, 0, kernel_module_request, char *kmod_name)
@@ -235,9 +238,9 @@ LSM_HOOK(int, 0, task_fix_setgroups, struct cred *new, const struct cred * old)
LSM_HOOK(int, 0, task_setpgid, struct task_struct *p, pid_t pgid)
LSM_HOOK(int, 0, task_getpgid, struct task_struct *p)
LSM_HOOK(int, 0, task_getsid, struct task_struct *p)
-LSM_HOOK(void, LSM_RET_VOID, current_getsecid_subj, u32 *secid)
-LSM_HOOK(void, LSM_RET_VOID, task_getsecid_obj,
- struct task_struct *p, u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, current_getlsmprop_subj, struct lsm_prop *prop)
+LSM_HOOK(void, LSM_RET_VOID, task_getlsmprop_obj,
+ struct task_struct *p, struct lsm_prop *prop)
LSM_HOOK(int, 0, task_setnice, struct task_struct *p, int nice)
LSM_HOOK(int, 0, task_setioprio, struct task_struct *p, int ioprio)
LSM_HOOK(int, 0, task_getioprio, struct task_struct *p)
@@ -256,8 +259,8 @@ LSM_HOOK(void, LSM_RET_VOID, task_to_inode, struct task_struct *p,
struct inode *inode)
LSM_HOOK(int, 0, userns_create, const struct cred *cred)
LSM_HOOK(int, 0, ipc_permission, struct kern_ipc_perm *ipcp, short flag)
-LSM_HOOK(void, LSM_RET_VOID, ipc_getsecid, struct kern_ipc_perm *ipcp,
- u32 *secid)
+LSM_HOOK(void, LSM_RET_VOID, ipc_getlsmprop, struct kern_ipc_perm *ipcp,
+ struct lsm_prop *prop)
LSM_HOOK(int, 0, msg_msg_alloc_security, struct msg_msg *msg)
LSM_HOOK(void, LSM_RET_VOID, msg_msg_free_security, struct msg_msg *msg)
LSM_HOOK(int, 0, msg_queue_alloc_security, struct kern_ipc_perm *perm)
@@ -294,6 +297,8 @@ LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size)
LSM_HOOK(int, 0, ismaclabel, const char *name)
LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata,
u32 *seclen)
+LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop,
+ char **secdata, u32 *seclen)
LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid)
LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen)
LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode)
@@ -416,7 +421,8 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring,
LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr,
void **lsmrule, gfp_t gfp)
LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule)
-LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule)
+LSM_HOOK(int, 0, audit_rule_match, struct lsm_prop *prop, u32 field, u32 op,
+ void *lsmrule)
LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule)
#endif /* CONFIG_AUDIT */
@@ -431,7 +437,7 @@ LSM_HOOK(int, 0, bpf_prog_load, struct bpf_prog *prog, union bpf_attr *attr,
struct bpf_token *token)
LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free, struct bpf_prog *prog)
LSM_HOOK(int, 0, bpf_token_create, struct bpf_token *token, union bpf_attr *attr,
- struct path *path)
+ const struct path *path)
LSM_HOOK(void, LSM_RET_VOID, bpf_token_free, struct bpf_token *token)
LSM_HOOK(int, 0, bpf_token_cmd, const struct bpf_token *token, enum bpf_cmd cmd)
LSM_HOOK(int, 0, bpf_token_capable, const struct bpf_token *token, int cap)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index fc4d75c6cec3..673d5cae7c81 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -467,6 +467,7 @@ static inline __init_memblock bool memblock_bottom_up(void)
phys_addr_t memblock_phys_mem_size(void);
phys_addr_t memblock_reserved_size(void);
+unsigned long memblock_estimated_nr_free_pages(void);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 34d2da05f2f1..e1b41554a5fb 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1760,8 +1760,9 @@ static inline int memcg_kmem_id(struct mem_cgroup *memcg)
struct mem_cgroup *mem_cgroup_from_slab_obj(void *p);
-static inline void count_objcg_event(struct obj_cgroup *objcg,
- enum vm_event_item idx)
+static inline void count_objcg_events(struct obj_cgroup *objcg,
+ enum vm_event_item idx,
+ unsigned long count)
{
struct mem_cgroup *memcg;
@@ -1770,7 +1771,7 @@ static inline void count_objcg_event(struct obj_cgroup *objcg,
rcu_read_lock();
memcg = obj_cgroup_memcg(objcg);
- count_memcg_events(memcg, idx, 1);
+ count_memcg_events(memcg, idx, count);
rcu_read_unlock();
}
@@ -1825,8 +1826,9 @@ static inline struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
return NULL;
}
-static inline void count_objcg_event(struct obj_cgroup *objcg,
- enum vm_event_item idx)
+static inline void count_objcg_events(struct obj_cgroup *objcg,
+ enum vm_event_item idx,
+ unsigned long count)
{
}
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 97f6de69f616..96d369112bfa 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2138,7 +2138,7 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
u8 ts_cqe_metadata_size2wqe_counter[0x5];
u8 reserved_at_250[0x10];
- u8 reserved_at_260[0x120];
+ u8 reserved_at_260[0x20];
u8 format_select_dw_gtpu_dw_0[0x8];
u8 format_select_dw_gtpu_dw_1[0x8];
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ecf63d2b0582..feb5c8021bef 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -329,12 +329,14 @@ extern unsigned int kobjsize(const void *objp);
#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */
+#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0)
#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1)
#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2)
#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3)
#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4)
#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5)
+#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6)
#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -365,7 +367,17 @@ extern unsigned int kobjsize(const void *objp);
* for more details on the guard size.
*/
# define VM_SHADOW_STACK VM_HIGH_ARCH_5
-#else
+#endif
+
+#if defined(CONFIG_ARM64_GCS)
+/*
+ * arm64's Guarded Control Stack implements similar functionality and
+ * has similar constraints to shadow stacks.
+ */
+# define VM_SHADOW_STACK VM_HIGH_ARCH_6
+#endif
+
+#ifndef VM_SHADOW_STACK
# define VM_SHADOW_STACK VM_NONE
#endif
@@ -3818,8 +3830,9 @@ void *sparse_buffer_alloc(unsigned long size);
struct page * __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);
-void pmd_init(void *addr);
void pud_init(void *addr);
+void pmd_init(void *addr);
+void kernel_pte_init(void *addr);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@@ -4219,4 +4232,8 @@ static inline void pgalloc_tag_copy(struct folio *new, struct folio *old)
}
#endif /* CONFIG_MEM_ALLOC_PROFILING */
+int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status);
+int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
+int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e3bdf8e38bc..381d22eba088 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -782,6 +782,7 @@ struct vm_area_struct {
struct mm_cid {
u64 time;
int cid;
+ int recent_cid;
};
#endif
@@ -852,6 +853,27 @@ struct mm_struct {
* When the next mm_cid scan is due (in jiffies).
*/
unsigned long mm_cid_next_scan;
+ /**
+ * @nr_cpus_allowed: Number of CPUs allowed for mm.
+ *
+ * Number of CPUs allowed in the union of all mm's
+ * threads allowed CPUs.
+ */
+ unsigned int nr_cpus_allowed;
+ /**
+ * @max_nr_cid: Maximum number of concurrency IDs allocated.
+ *
+ * Track the highest number of concurrency IDs allocated for the
+ * mm.
+ */
+ atomic_t max_nr_cid;
+ /**
+ * @cpus_allowed_lock: Lock protecting mm cpus_allowed.
+ *
+ * Provide mutual exclusion for mm cpus_allowed and
+ * mm nr_cpus_allowed updates.
+ */
+ raw_spinlock_t cpus_allowed_lock;
#endif
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
@@ -1170,18 +1192,30 @@ static inline int mm_cid_clear_lazy_put(int cid)
return cid & ~MM_CID_LAZY_PUT;
}
+/*
+ * mm_cpus_allowed: Union of all mm's threads allowed CPUs.
+ */
+static inline cpumask_t *mm_cpus_allowed(struct mm_struct *mm)
+{
+ unsigned long bitmap = (unsigned long)mm;
+
+ bitmap += offsetof(struct mm_struct, cpu_bitmap);
+ /* Skip cpu_bitmap */
+ bitmap += cpumask_size();
+ return (struct cpumask *)bitmap;
+}
+
/* Accessor for struct mm_struct's cidmask. */
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
{
- unsigned long cid_bitmap = (unsigned long)mm;
+ unsigned long cid_bitmap = (unsigned long)mm_cpus_allowed(mm);
- cid_bitmap += offsetof(struct mm_struct, cpu_bitmap);
- /* Skip cpu_bitmap */
+ /* Skip mm_cpus_allowed */
cid_bitmap += cpumask_size();
return (struct cpumask *)cid_bitmap;
}
-static inline void mm_init_cid(struct mm_struct *mm)
+static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p)
{
int i;
@@ -1189,17 +1223,22 @@ static inline void mm_init_cid(struct mm_struct *mm)
struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);
pcpu_cid->cid = MM_CID_UNSET;
+ pcpu_cid->recent_cid = MM_CID_UNSET;
pcpu_cid->time = 0;
}
+ mm->nr_cpus_allowed = p->nr_cpus_allowed;
+ atomic_set(&mm->max_nr_cid, 0);
+ raw_spin_lock_init(&mm->cpus_allowed_lock);
+ cpumask_copy(mm_cpus_allowed(mm), &p->cpus_mask);
cpumask_clear(mm_cidmask(mm));
}
-static inline int mm_alloc_cid_noprof(struct mm_struct *mm)
+static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *p)
{
mm->pcpu_cid = alloc_percpu_noprof(struct mm_cid);
if (!mm->pcpu_cid)
return -ENOMEM;
- mm_init_cid(mm);
+ mm_init_cid(mm, p);
return 0;
}
#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
@@ -1212,16 +1251,31 @@ static inline void mm_destroy_cid(struct mm_struct *mm)
static inline unsigned int mm_cid_size(void)
{
- return cpumask_size();
+ return 2 * cpumask_size(); /* mm_cpus_allowed(), mm_cidmask(). */
+}
+
+static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask)
+{
+ struct cpumask *mm_allowed = mm_cpus_allowed(mm);
+
+ if (!mm)
+ return;
+ /* The mm_cpus_allowed is the union of each thread allowed CPUs masks. */
+ raw_spin_lock(&mm->cpus_allowed_lock);
+ cpumask_or(mm_allowed, mm_allowed, cpumask);
+ WRITE_ONCE(mm->nr_cpus_allowed, cpumask_weight(mm_allowed));
+ raw_spin_unlock(&mm->cpus_allowed_lock);
}
#else /* CONFIG_SCHED_MM_CID */
-static inline void mm_init_cid(struct mm_struct *mm) { }
-static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_cid(struct mm_struct *mm, struct task_struct *p) { }
+static inline int mm_alloc_cid(struct mm_struct *mm, struct task_struct *p) { return 0; }
static inline void mm_destroy_cid(struct mm_struct *mm) { }
+
static inline unsigned int mm_cid_size(void)
{
return 0;
}
+static inline void mm_set_cpus_allowed(struct mm_struct *mm, const struct cpumask *cpumask) { }
#endif /* CONFIG_SCHED_MM_CID */
struct mmu_gather;
diff --git a/include/linux/mman.h b/include/linux/mman.h
index bcb201ab7a41..a842783ffa62 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_MMAN_H
#define _LINUX_MMAN_H
+#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/percpu_counter.h>
@@ -94,7 +95,7 @@ static inline void vm_unacct_memory(long pages)
#endif
#ifndef arch_calc_vm_flag_bits
-#define arch_calc_vm_flag_bits(flags) 0
+#define arch_calc_vm_flag_bits(file, flags) 0
#endif
#ifndef arch_validate_prot
@@ -151,13 +152,13 @@ calc_vm_prot_bits(unsigned long prot, unsigned long pkey)
* Combine the mmap "flags" argument into "vm_flags" used internally.
*/
static inline unsigned long
-calc_vm_flag_bits(unsigned long flags)
+calc_vm_flag_bits(struct file *file, unsigned long flags)
{
return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
_calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
_calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
_calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
- arch_calc_vm_flag_bits(flags);
+ arch_calc_vm_flag_bits(file, flags);
}
unsigned long vm_commit_limit(void);
@@ -188,16 +189,31 @@ static inline bool arch_memory_deny_write_exec_supported(void)
*
* d) mmap(PROT_READ | PROT_EXEC)
* mmap(PROT_READ | PROT_EXEC | PROT_BTI)
+ *
+ * This is only applicable if the user has set the Memory-Deny-Write-Execute
+ * (MDWE) protection mask for the current process.
+ *
+ * @old specifies the VMA flags the VMA originally possessed, and @new the ones
+ * we propose to set.
+ *
+ * Return: false if proposed change is OK, true if not ok and should be denied.
*/
-static inline bool map_deny_write_exec(struct vm_area_struct *vma, unsigned long vm_flags)
+static inline bool map_deny_write_exec(unsigned long old, unsigned long new)
{
+ /* If MDWE is disabled, we have nothing to deny. */
if (!test_bit(MMF_HAS_MDWE, &current->mm->flags))
return false;
- if ((vm_flags & VM_EXEC) && (vm_flags & VM_WRITE))
+ /* If the new VMA is not executable, we have nothing to deny. */
+ if (!(new & VM_EXEC))
+ return false;
+
+ /* Under MDWE we do not accept newly writably executable VMAs... */
+ if (new & VM_WRITE)
return true;
- if (!(vma->vm_flags & VM_EXEC) && (vm_flags & VM_EXEC))
+ /* ...nor previously non-executable VMAs becoming executable. */
+ if (!(old & VM_EXEC))
return true;
return false;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 17506e4a2835..80bc5640bb60 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -458,9 +458,7 @@ struct lru_gen_folio {
enum {
MM_LEAF_TOTAL, /* total leaf entries */
- MM_LEAF_OLD, /* old leaf entries */
MM_LEAF_YOUNG, /* young leaf entries */
- MM_NONLEAF_TOTAL, /* total non-leaf entries */
MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */
MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */
NR_MM_STATS
@@ -557,7 +555,7 @@ struct lru_gen_memcg {
void lru_gen_init_pgdat(struct pglist_data *pgdat);
void lru_gen_init_lruvec(struct lruvec *lruvec);
-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
+bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
void lru_gen_init_memcg(struct mem_cgroup *memcg);
void lru_gen_exit_memcg(struct mem_cgroup *memcg);
@@ -576,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
{
}
-static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
{
+ return false;
}
static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
@@ -824,6 +823,7 @@ struct zone {
unsigned long watermark_boost;
unsigned long nr_reserved_highatomic;
+ unsigned long nr_free_highatomic;
/*
* We don't know if the memory that we're going to allocate will be
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index cd4d5c8781f5..b1b219bc3422 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -9,6 +9,7 @@ struct mnt_idmap;
struct user_namespace;
extern struct mnt_idmap nop_mnt_idmap;
+extern struct mnt_idmap invalid_mnt_idmap;
extern struct user_namespace init_user_ns;
typedef struct {
diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index b4fa92a6e44b..1b56796f6cb3 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -15,7 +15,7 @@
#include <linux/kernel.h>
#include <linux/io.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <asm/barrier.h>
#ifdef CONFIG_MTD_MAP_BANK_WIDTH_1
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index a561c629d89f..2bf91b57591b 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -49,7 +49,6 @@ static inline void mutex_destroy(struct mutex *lock) {}
#endif
-#ifndef CONFIG_PREEMPT_RT
/**
* mutex_init - initialize the mutex
* @mutex: the mutex to be initialized
@@ -65,6 +64,18 @@ do { \
__mutex_init((mutex), #mutex, &__key); \
} while (0)
+/**
+ * mutex_init_with_key - initialize a mutex with a given lockdep key
+ * @mutex: the mutex to be initialized
+ * @key: the lockdep key to be associated with the mutex
+ *
+ * Initialize the mutex to the unlocked state.
+ *
+ * It is not allowed to initialize an already locked mutex.
+ */
+#define mutex_init_with_key(mutex, key) __mutex_init((mutex), #mutex, (key))
+
+#ifndef CONFIG_PREEMPT_RT
#define __MUTEX_INITIALIZER(lockname) \
{ .owner = ATOMIC_LONG_INIT(0) \
, .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
@@ -111,12 +122,6 @@ do { \
__mutex_rt_init((mutex), name, key); \
} while (0)
-#define mutex_init(mutex) \
-do { \
- static struct lock_class_key __key; \
- \
- __mutex_init((mutex), #mutex, &__key); \
-} while (0)
#endif /* CONFIG_PREEMPT_RT */
#ifdef CONFIG_DEBUG_MUTEXES
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e87b5e488325..8896705ccd63 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3325,6 +3325,12 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
{
+ /* Paired with READ_ONCE() from dev_watchdog() */
+ WRITE_ONCE(dev_queue->trans_start, jiffies);
+
+ /* This barrier is paired with smp_mb() from dev_watchdog() */
+ smp_mb__before_atomic();
+
/* Must be an atomic op see netif_txq_try_stop() */
set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
}
@@ -3451,6 +3457,12 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
if (likely(dql_avail(&dev_queue->dql) >= 0))
return;
+ /* Paired with READ_ONCE() from dev_watchdog() */
+ WRITE_ONCE(dev_queue->trans_start, jiffies);
+
+ /* This barrier is paired with smp_mb() from dev_watchdog() */
+ smp_mb__before_atomic();
+
set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
/*
@@ -5029,6 +5041,24 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs);
void netif_inherit_tso_max(struct net_device *to,
const struct net_device *from);
+static inline unsigned int
+netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb)
+{
+ /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
+ return skb->protocol == htons(ETH_P_IPV6) ?
+ READ_ONCE(dev->gro_max_size) :
+ READ_ONCE(dev->gro_ipv4_max_size);
+}
+
+static inline unsigned int
+netif_get_gso_max_size(const struct net_device *dev, const struct sk_buff *skb)
+{
+ /* pairs with WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
+ return skb->protocol == htons(ETH_P_IPV6) ?
+ READ_ONCE(dev->gso_max_size) :
+ READ_ONCE(dev->gso_ipv4_max_size);
+}
+
static inline bool netif_is_macsec(const struct net_device *dev)
{
return dev->priv_flags & IFF_MACSEC;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2683b2b77612..2b8aac2c70ad 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -376,15 +376,11 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
struct nf_conn;
enum nf_nat_manip_type;
struct nlattr;
-enum ip_conntrack_dir;
struct nf_nat_hook {
int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip,
const struct nlattr *attr);
void (*decode_session)(struct sk_buff *skb, struct flowi *fl);
- unsigned int (*manip_pkt)(struct sk_buff *skb, struct nf_conn *ct,
- enum nf_nat_manip_type mtype,
- enum ip_conntrack_dir dir);
void (*remove_nat_bysrc)(struct nf_conn *ct);
};
diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index b332c2048c75..a48a30842d84 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -239,7 +239,7 @@ int netlink_register_notifier(struct notifier_block *nb);
int netlink_unregister_notifier(struct notifier_block *nb);
/* finegrained unicast helpers: */
-struct sock *netlink_getsockbyfilp(struct file *filp);
+struct sock *netlink_getsockbyfd(int fd);
int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
long *timeo, struct sock *ssk);
void netlink_detachskb(struct sock *sk, struct sk_buff *skb);
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index ceb70a926b95..9ad727ddfedb 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -8,11 +8,20 @@
#ifndef _LINUX_NFS_H
#define _LINUX_NFS_H
+#include <linux/cred.h>
+#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/string.h>
#include <linux/crc32.h>
#include <uapi/linux/nfs.h>
+/* The LOCALIO program is entirely private to Linux and is
+ * NOT part of the uapi.
+ */
+#define NFS_LOCALIO_PROGRAM 400122
+#define LOCALIOPROC_NULL 0
+#define LOCALIOPROC_UUID_IS_LOCAL 1
+
/*
* This is the kernel NFS client file handle representation
*/
diff --git a/include/linux/nfs_common.h b/include/linux/nfs_common.h
new file mode 100644
index 000000000000..5fc02df88252
--- /dev/null
+++ b/include/linux/nfs_common.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains constants and methods used by both NFS client and server.
+ */
+#ifndef _LINUX_NFS_COMMON_H
+#define _LINUX_NFS_COMMON_H
+
+#include <linux/errno.h>
+#include <uapi/linux/nfs.h>
+
+/* Mapping from NFS error code to "errno" error code. */
+#define errno_NFSERR_IO EIO
+
+int nfs_stat_to_errno(enum nfs_stat status);
+int nfs4_stat_to_errno(int stat);
+
+#endif /* _LINUX_NFS_COMMON_H */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 1df86ab98c77..b804346a9741 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -8,6 +8,7 @@
#include <linux/wait.h>
#include <linux/nfs_xdr.h>
#include <linux/sunrpc/xprt.h>
+#include <linux/nfslocalio.h>
#include <linux/atomic.h>
#include <linux/refcount.h>
@@ -49,6 +50,7 @@ struct nfs_client {
#define NFS_CS_DS 7 /* - Server is a DS */
#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */
#define NFS_CS_PNFS 9 /* - Server used for pnfs */
+#define NFS_CS_LOCAL_IO 10 /* - client is local */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@@ -125,6 +127,13 @@ struct nfs_client {
struct net *cl_net;
struct list_head pending_cb_stateids;
struct rcu_head rcu;
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+ struct timespec64 cl_nfssvc_boot;
+ seqlock_t cl_boot_lock;
+ nfs_uuid_t cl_uuid;
+ spinlock_t cl_localio_lock;
+#endif /* CONFIG_NFS_LOCALIO */
};
/*
@@ -158,6 +167,7 @@ struct nfs_server {
#define NFS_MOUNT_WRITE_WAIT 0x02000000
#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000
#define NFS_MOUNT_SHUTDOWN 0x08000000
+#define NFS_MOUNT_NO_ALIGNWRITE 0x10000000
unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */
@@ -234,12 +244,12 @@ struct nfs_server {
/* the following fields are protected by nfs_client->cl_lock */
struct rb_root state_owners;
#endif
- struct ida openowner_id;
- struct ida lockowner_id;
+ atomic64_t owner_ctr;
struct list_head state_owners_lru;
struct list_head layouts;
struct list_head delegations;
struct list_head ss_copies;
+ struct list_head ss_src_copies;
unsigned long delegation_gen;
unsigned long mig_gen;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 45623af3e7b8..12d8e47bc5a3 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -446,7 +446,7 @@ struct nfs42_clone_res {
struct stateowner_id {
__u64 create_time;
- __u32 uniquifier;
+ __u64 uniquifier;
};
struct nfs4_open_delegation {
@@ -1854,6 +1854,24 @@ struct nfs_rpc_ops {
};
/*
+ * Helper functions used by NFS client and/or server
+ */
+static inline void encode_opaque_fixed(struct xdr_stream *xdr,
+ const void *buf, size_t len)
+{
+ WARN_ON_ONCE(xdr_stream_encode_opaque_fixed(xdr, buf, len) < 0);
+}
+
+static inline int decode_opaque_fixed(struct xdr_stream *xdr,
+ void *buf, size_t len)
+{
+ ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
+ if (unlikely(ret < 0))
+ return -EIO;
+ return 0;
+}
+
+/*
* Function vectors etc. for the NFS client
*/
extern const struct nfs_rpc_ops nfs_v2_clientops;
@@ -1866,4 +1884,4 @@ extern const struct rpc_version nfs_version4;
extern const struct rpc_version nfsacl_version3;
extern const struct rpc_program nfsacl_program;
-#endif
+#endif /* _LINUX_NFS_XDR_H */
diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h
new file mode 100644
index 000000000000..3982fea79919
--- /dev/null
+++ b/include/linux/nfslocalio.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Mike Snitzer <[email protected]>
+ * Copyright (C) 2024 NeilBrown <[email protected]>
+ */
+#ifndef __LINUX_NFSLOCALIO_H
+#define __LINUX_NFSLOCALIO_H
+
+/* nfsd_file structure is purposely kept opaque to NFS client */
+struct nfsd_file;
+
+#if IS_ENABLED(CONFIG_NFS_LOCALIO)
+
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/uuid.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/nfs.h>
+#include <net/net_namespace.h>
+
+/*
+ * Useful to allow a client to negotiate if localio
+ * possible with its server.
+ *
+ * See Documentation/filesystems/nfs/localio.rst for more detail.
+ */
+typedef struct {
+ uuid_t uuid;
+ struct list_head list;
+ struct net __rcu *net; /* nfsd's network namespace */
+ struct auth_domain *dom; /* auth_domain for localio */
+} nfs_uuid_t;
+
+void nfs_uuid_init(nfs_uuid_t *);
+bool nfs_uuid_begin(nfs_uuid_t *);
+void nfs_uuid_end(nfs_uuid_t *);
+void nfs_uuid_is_local(const uuid_t *, struct list_head *,
+ struct net *, struct auth_domain *, struct module *);
+void nfs_uuid_invalidate_clients(struct list_head *list);
+void nfs_uuid_invalidate_one_client(nfs_uuid_t *nfs_uuid);
+
+/* localio needs to map filehandle -> struct nfsd_file */
+extern struct nfsd_file *
+nfsd_open_local_fh(struct net *, struct auth_domain *, struct rpc_clnt *,
+ const struct cred *, const struct nfs_fh *,
+ const fmode_t) __must_hold(rcu);
+
+struct nfsd_localio_operations {
+ bool (*nfsd_serv_try_get)(struct net *);
+ void (*nfsd_serv_put)(struct net *);
+ struct nfsd_file *(*nfsd_open_local_fh)(struct net *,
+ struct auth_domain *,
+ struct rpc_clnt *,
+ const struct cred *,
+ const struct nfs_fh *,
+ const fmode_t);
+ void (*nfsd_file_put_local)(struct nfsd_file *);
+ struct file *(*nfsd_file_file)(struct nfsd_file *);
+} ____cacheline_aligned;
+
+extern void nfsd_localio_ops_init(void);
+extern const struct nfsd_localio_operations *nfs_to;
+
+struct nfsd_file *nfs_open_local_fh(nfs_uuid_t *,
+ struct rpc_clnt *, const struct cred *,
+ const struct nfs_fh *, const fmode_t);
+
+static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+{
+ /*
+ * Once reference to nfsd_serv is dropped, NFSD could be
+ * unloaded, so ensure safe return from nfsd_file_put_local()
+ * by always taking RCU.
+ */
+ rcu_read_lock();
+ nfs_to->nfsd_file_put_local(localio);
+ rcu_read_unlock();
+}
+
+#else /* CONFIG_NFS_LOCALIO */
+static inline void nfsd_localio_ops_init(void)
+{
+}
+static inline void nfs_to_nfsd_file_put_local(struct nfsd_file *localio)
+{
+}
+#endif /* CONFIG_NFS_LOCALIO */
+
+#endif /* __LINUX_NFSLOCALIO_H */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index b61438313a73..9fd7a0ce9c1a 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -107,11 +107,11 @@ extern nodemask_t _unused_nodemask_arg_;
*/
#define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \
__nodemask_pr_bits(maskp)
-static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
+static __always_inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m)
{
return m ? MAX_NUMNODES : 0;
}
-static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
+static __always_inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m)
{
return m ? m->bits : NULL;
}
@@ -132,19 +132,19 @@ static __always_inline void __node_set(int node, volatile nodemask_t *dstp)
}
#define node_clear(node, dst) __node_clear((node), &(dst))
-static inline void __node_clear(int node, volatile nodemask_t *dstp)
+static __always_inline void __node_clear(int node, volatile nodemask_t *dstp)
{
clear_bit(node, dstp->bits);
}
#define nodes_setall(dst) __nodes_setall(&(dst), MAX_NUMNODES)
-static inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
+static __always_inline void __nodes_setall(nodemask_t *dstp, unsigned int nbits)
{
bitmap_fill(dstp->bits, nbits);
}
#define nodes_clear(dst) __nodes_clear(&(dst), MAX_NUMNODES)
-static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
+static __always_inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
{
bitmap_zero(dstp->bits, nbits);
}
@@ -154,14 +154,14 @@ static inline void __nodes_clear(nodemask_t *dstp, unsigned int nbits)
#define node_test_and_set(node, nodemask) \
__node_test_and_set((node), &(nodemask))
-static inline bool __node_test_and_set(int node, nodemask_t *addr)
+static __always_inline bool __node_test_and_set(int node, nodemask_t *addr)
{
return test_and_set_bit(node, addr->bits);
}
#define nodes_and(dst, src1, src2) \
__nodes_and(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -169,7 +169,7 @@ static inline void __nodes_and(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_or(dst, src1, src2) \
__nodes_or(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -177,7 +177,7 @@ static inline void __nodes_or(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_xor(dst, src1, src2) \
__nodes_xor(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -185,7 +185,7 @@ static inline void __nodes_xor(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_andnot(dst, src1, src2) \
__nodes_andnot(&(dst), &(src1), &(src2), MAX_NUMNODES)
-static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
+static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
@@ -193,7 +193,7 @@ static inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *src1p,
#define nodes_complement(dst, src) \
__nodes_complement(&(dst), &(src), MAX_NUMNODES)
-static inline void __nodes_complement(nodemask_t *dstp,
+static __always_inline void __nodes_complement(nodemask_t *dstp,
const nodemask_t *srcp, unsigned int nbits)
{
bitmap_complement(dstp->bits, srcp->bits, nbits);
@@ -201,7 +201,7 @@ static inline void __nodes_complement(nodemask_t *dstp,
#define nodes_equal(src1, src2) \
__nodes_equal(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_equal(const nodemask_t *src1p,
+static __always_inline bool __nodes_equal(const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_equal(src1p->bits, src2p->bits, nbits);
@@ -209,7 +209,7 @@ static inline bool __nodes_equal(const nodemask_t *src1p,
#define nodes_intersects(src1, src2) \
__nodes_intersects(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_intersects(const nodemask_t *src1p,
+static __always_inline bool __nodes_intersects(const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_intersects(src1p->bits, src2p->bits, nbits);
@@ -217,33 +217,33 @@ static inline bool __nodes_intersects(const nodemask_t *src1p,
#define nodes_subset(src1, src2) \
__nodes_subset(&(src1), &(src2), MAX_NUMNODES)
-static inline bool __nodes_subset(const nodemask_t *src1p,
+static __always_inline bool __nodes_subset(const nodemask_t *src1p,
const nodemask_t *src2p, unsigned int nbits)
{
return bitmap_subset(src1p->bits, src2p->bits, nbits);
}
#define nodes_empty(src) __nodes_empty(&(src), MAX_NUMNODES)
-static inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline bool __nodes_empty(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_empty(srcp->bits, nbits);
}
#define nodes_full(nodemask) __nodes_full(&(nodemask), MAX_NUMNODES)
-static inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline bool __nodes_full(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_full(srcp->bits, nbits);
}
#define nodes_weight(nodemask) __nodes_weight(&(nodemask), MAX_NUMNODES)
-static inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
+static __always_inline int __nodes_weight(const nodemask_t *srcp, unsigned int nbits)
{
return bitmap_weight(srcp->bits, nbits);
}
#define nodes_shift_right(dst, src, n) \
__nodes_shift_right(&(dst), &(src), (n), MAX_NUMNODES)
-static inline void __nodes_shift_right(nodemask_t *dstp,
+static __always_inline void __nodes_shift_right(nodemask_t *dstp,
const nodemask_t *srcp, int n, int nbits)
{
bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
@@ -251,7 +251,7 @@ static inline void __nodes_shift_right(nodemask_t *dstp,
#define nodes_shift_left(dst, src, n) \
__nodes_shift_left(&(dst), &(src), (n), MAX_NUMNODES)
-static inline void __nodes_shift_left(nodemask_t *dstp,
+static __always_inline void __nodes_shift_left(nodemask_t *dstp,
const nodemask_t *srcp, int n, int nbits)
{
bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
@@ -261,13 +261,13 @@ static inline void __nodes_shift_left(nodemask_t *dstp,
> MAX_NUMNODES, then the silly min_ts could be dropped. */
#define first_node(src) __first_node(&(src))
-static inline unsigned int __first_node(const nodemask_t *srcp)
+static __always_inline unsigned int __first_node(const nodemask_t *srcp)
{
return min_t(unsigned int, MAX_NUMNODES, find_first_bit(srcp->bits, MAX_NUMNODES));
}
#define next_node(n, src) __next_node((n), &(src))
-static inline unsigned int __next_node(int n, const nodemask_t *srcp)
+static __always_inline unsigned int __next_node(int n, const nodemask_t *srcp)
{
return min_t(unsigned int, MAX_NUMNODES, find_next_bit(srcp->bits, MAX_NUMNODES, n+1));
}
@@ -277,7 +277,7 @@ static inline unsigned int __next_node(int n, const nodemask_t *srcp)
* the first node in src if needed. Returns MAX_NUMNODES if src is empty.
*/
#define next_node_in(n, src) __next_node_in((n), &(src))
-static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
+static __always_inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
{
unsigned int ret = __next_node(node, srcp);
@@ -286,7 +286,7 @@ static inline unsigned int __next_node_in(int node, const nodemask_t *srcp)
return ret;
}
-static inline void init_nodemask_of_node(nodemask_t *mask, int node)
+static __always_inline void init_nodemask_of_node(nodemask_t *mask, int node)
{
nodes_clear(*mask);
node_set(node, *mask);
@@ -304,7 +304,7 @@ static inline void init_nodemask_of_node(nodemask_t *mask, int node)
})
#define first_unset_node(mask) __first_unset_node(&(mask))
-static inline unsigned int __first_unset_node(const nodemask_t *maskp)
+static __always_inline unsigned int __first_unset_node(const nodemask_t *maskp)
{
return min_t(unsigned int, MAX_NUMNODES,
find_first_zero_bit(maskp->bits, MAX_NUMNODES));
@@ -338,21 +338,21 @@ static inline unsigned int __first_unset_node(const nodemask_t *maskp)
#define nodemask_parse_user(ubuf, ulen, dst) \
__nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES)
-static inline int __nodemask_parse_user(const char __user *buf, int len,
+static __always_inline int __nodemask_parse_user(const char __user *buf, int len,
nodemask_t *dstp, int nbits)
{
return bitmap_parse_user(buf, len, dstp->bits, nbits);
}
#define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES)
-static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
+static __always_inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits)
{
return bitmap_parselist(buf, dstp->bits, nbits);
}
#define node_remap(oldbit, old, new) \
__node_remap((oldbit), &(old), &(new), MAX_NUMNODES)
-static inline int __node_remap(int oldbit,
+static __always_inline int __node_remap(int oldbit,
const nodemask_t *oldp, const nodemask_t *newp, int nbits)
{
return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
@@ -360,7 +360,7 @@ static inline int __node_remap(int oldbit,
#define nodes_remap(dst, src, old, new) \
__nodes_remap(&(dst), &(src), &(old), &(new), MAX_NUMNODES)
-static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
+static __always_inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
const nodemask_t *oldp, const nodemask_t *newp, int nbits)
{
bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
@@ -368,7 +368,7 @@ static inline void __nodes_remap(nodemask_t *dstp, const nodemask_t *srcp,
#define nodes_onto(dst, orig, relmap) \
__nodes_onto(&(dst), &(orig), &(relmap), MAX_NUMNODES)
-static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
+static __always_inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
const nodemask_t *relmapp, int nbits)
{
bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
@@ -376,7 +376,7 @@ static inline void __nodes_onto(nodemask_t *dstp, const nodemask_t *origp,
#define nodes_fold(dst, orig, sz) \
__nodes_fold(&(dst), &(orig), sz, MAX_NUMNODES)
-static inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
+static __always_inline void __nodes_fold(nodemask_t *dstp, const nodemask_t *origp,
int sz, int nbits)
{
bitmap_fold(dstp->bits, origp->bits, sz, nbits);
@@ -418,22 +418,22 @@ enum node_states {
extern nodemask_t node_states[NR_NODE_STATES];
#if MAX_NUMNODES > 1
-static inline int node_state(int node, enum node_states state)
+static __always_inline int node_state(int node, enum node_states state)
{
return node_isset(node, node_states[state]);
}
-static inline void node_set_state(int node, enum node_states state)
+static __always_inline void node_set_state(int node, enum node_states state)
{
__node_set(node, &node_states[state]);
}
-static inline void node_clear_state(int node, enum node_states state)
+static __always_inline void node_clear_state(int node, enum node_states state)
{
__node_clear(node, &node_states[state]);
}
-static inline int num_node_state(enum node_states state)
+static __always_inline int num_node_state(enum node_states state)
{
return nodes_weight(node_states[state]);
}
@@ -443,11 +443,11 @@ static inline int num_node_state(enum node_states state)
#define first_online_node first_node(node_states[N_ONLINE])
#define first_memory_node first_node(node_states[N_MEMORY])
-static inline unsigned int next_online_node(int nid)
+static __always_inline unsigned int next_online_node(int nid)
{
return next_node(nid, node_states[N_ONLINE]);
}
-static inline unsigned int next_memory_node(int nid)
+static __always_inline unsigned int next_memory_node(int nid)
{
return next_node(nid, node_states[N_MEMORY]);
}
@@ -455,13 +455,13 @@ static inline unsigned int next_memory_node(int nid)
extern unsigned int nr_node_ids;
extern unsigned int nr_online_nodes;
-static inline void node_set_online(int nid)
+static __always_inline void node_set_online(int nid)
{
node_set_state(nid, N_ONLINE);
nr_online_nodes = num_node_state(N_ONLINE);
}
-static inline void node_set_offline(int nid)
+static __always_inline void node_set_offline(int nid)
{
node_clear_state(nid, N_ONLINE);
nr_online_nodes = num_node_state(N_ONLINE);
@@ -469,20 +469,20 @@ static inline void node_set_offline(int nid)
#else
-static inline int node_state(int node, enum node_states state)
+static __always_inline int node_state(int node, enum node_states state)
{
return node == 0;
}
-static inline void node_set_state(int node, enum node_states state)
+static __always_inline void node_set_state(int node, enum node_states state)
{
}
-static inline void node_clear_state(int node, enum node_states state)
+static __always_inline void node_clear_state(int node, enum node_states state)
{
}
-static inline int num_node_state(enum node_states state)
+static __always_inline int num_node_state(enum node_states state)
{
return 1;
}
@@ -502,7 +502,7 @@ static inline int num_node_state(enum node_states state)
#endif
-static inline int node_random(const nodemask_t *maskp)
+static __always_inline int node_random(const nodemask_t *maskp)
{
#if defined(CONFIG_NUMA) && (MAX_NUMNODES > 1)
int w, bit;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b58d9405d65e..0a6e22038ce3 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -327,7 +327,8 @@ struct nvme_id_ctrl {
__le32 sanicap;
__le32 hmminds;
__le16 hmmaxd;
- __u8 rsvd338[4];
+ __le16 nvmsetidmax;
+ __le16 endgidmax;
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
@@ -522,6 +523,7 @@ enum {
NVME_ID_CNS_NS_DESC_LIST = 0x03,
NVME_ID_CNS_CS_NS = 0x05,
NVME_ID_CNS_CS_CTRL = 0x06,
+ NVME_ID_CNS_NS_ACTIVE_LIST_CS = 0x07,
NVME_ID_CNS_NS_CS_INDEP = 0x08,
NVME_ID_CNS_NS_PRESENT_LIST = 0x10,
NVME_ID_CNS_NS_PRESENT = 0x11,
@@ -530,6 +532,7 @@ enum {
NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15,
NVME_ID_CNS_NS_GRANULARITY = 0x16,
NVME_ID_CNS_UUID_LIST = 0x17,
+ NVME_ID_CNS_ENDGRP_LIST = 0x19,
};
enum {
@@ -560,6 +563,8 @@ enum {
NVME_NS_FLBAS_LBA_SHIFT = 1,
NVME_NS_FLBAS_META_EXT = 0x10,
NVME_NS_NMIC_SHARED = 1 << 0,
+ NVME_NS_ROTATIONAL = 1 << 4,
+ NVME_NS_VWC_NOT_PRESENT = 1 << 5,
NVME_LBAF_RP_BEST = 0,
NVME_LBAF_RP_BETTER = 1,
NVME_LBAF_RP_GOOD = 2,
@@ -617,6 +622,40 @@ enum {
NVME_NIDT_CSI = 0x04,
};
+struct nvme_endurance_group_log {
+ __u8 egcw;
+ __u8 egfeat;
+ __u8 rsvd2;
+ __u8 avsp;
+ __u8 avspt;
+ __u8 pused;
+ __le16 did;
+ __u8 rsvd8[24];
+ __u8 ee[16];
+ __u8 dur[16];
+ __u8 duw[16];
+ __u8 muw[16];
+ __u8 hrc[16];
+ __u8 hwc[16];
+ __u8 mdie[16];
+ __u8 neile[16];
+ __u8 tegcap[16];
+ __u8 uegcap[16];
+ __u8 rsvd192[320];
+};
+
+struct nvme_rotational_media_log {
+ __le16 endgid;
+ __le16 numa;
+ __le16 nrs;
+ __u8 rsvd6[2];
+ __le32 spinc;
+ __le32 fspinc;
+ __le32 ldc;
+ __le32 fldc;
+ __u8 rsvd24[488];
+};
+
struct nvme_smart_log {
__u8 critical_warning;
__u8 temperature[2];
@@ -1244,6 +1283,7 @@ enum {
NVME_FEAT_WRITE_PROTECT = 0x84,
NVME_FEAT_VENDOR_START = 0xC0,
NVME_FEAT_VENDOR_END = 0xFF,
+ NVME_LOG_SUPPORTED = 0x00,
NVME_LOG_ERROR = 0x01,
NVME_LOG_SMART = 0x02,
NVME_LOG_FW_SLOT = 0x03,
@@ -1254,6 +1294,8 @@ enum {
NVME_LOG_TELEMETRY_CTRL = 0x08,
NVME_LOG_ENDURANCE_GROUP = 0x09,
NVME_LOG_ANA = 0x0c,
+ NVME_LOG_FEATURES = 0x12,
+ NVME_LOG_RMI = 0x16,
NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3),
@@ -1261,6 +1303,24 @@ enum {
NVME_FWACT_ACTV = (2 << 3),
};
+struct nvme_supported_log {
+ __le32 lids[256];
+};
+
+enum {
+ NVME_LIDS_LSUPP = 1 << 0,
+};
+
+struct nvme_supported_features_log {
+ __le32 fis[256];
+};
+
+enum {
+ NVME_FIS_FSUPP = 1 << 0,
+ NVME_FIS_NSCPE = 1 << 20,
+ NVME_FIS_CSCPE = 1 << 21,
+};
+
/* NVMe Namespace Write Protect State */
enum {
NVME_NS_NO_WRITE_PROTECT = 0,
@@ -1281,7 +1341,8 @@ struct nvme_identify {
__u8 cns;
__u8 rsvd3;
__le16 ctrlid;
- __u8 rsvd11[3];
+ __le16 cnssid;
+ __u8 rsvd11;
__u8 csi;
__u32 rsvd12[4];
};
@@ -1389,7 +1450,7 @@ struct nvme_get_log_page_command {
__u8 lsp; /* upper 4 bits reserved */
__le16 numdl;
__le16 numdu;
- __u16 rsvd11;
+ __le16 lsi;
union {
struct {
__le32 lpol;
@@ -2037,4 +2098,72 @@ struct nvme_completion {
#define NVME_MINOR(ver) (((ver) >> 8) & 0xff)
#define NVME_TERTIARY(ver) ((ver) & 0xff)
+enum {
+ NVME_AEN_RESV_LOG_PAGE_AVALIABLE = 0x00,
+};
+
+enum {
+ NVME_PR_LOG_EMPTY_LOG_PAGE = 0x00,
+ NVME_PR_LOG_REGISTRATION_PREEMPTED = 0x01,
+ NVME_PR_LOG_RESERVATION_RELEASED = 0x02,
+ NVME_PR_LOG_RESERVATOIN_PREEMPTED = 0x03,
+};
+
+enum {
+ NVME_PR_NOTIFY_BIT_REG_PREEMPTED = 1,
+ NVME_PR_NOTIFY_BIT_RESV_RELEASED = 2,
+ NVME_PR_NOTIFY_BIT_RESV_PREEMPTED = 3,
+};
+
+struct nvme_pr_log {
+ __le64 count;
+ __u8 type;
+ __u8 nr_pages;
+ __u8 rsvd1[2];
+ __le32 nsid;
+ __u8 rsvd2[48];
+};
+
+struct nvmet_pr_register_data {
+ __le64 crkey;
+ __le64 nrkey;
+};
+
+struct nvmet_pr_acquire_data {
+ __le64 crkey;
+ __le64 prkey;
+};
+
+struct nvmet_pr_release_data {
+ __le64 crkey;
+};
+
+enum nvme_pr_capabilities {
+ NVME_PR_SUPPORT_PTPL = 1,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE = 1 << 1,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS = 1 << 2,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_REG_ONLY = 1 << 3,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_REG_ONLY = 1 << 4,
+ NVME_PR_SUPPORT_WRITE_EXCLUSIVE_ALL_REGS = 1 << 5,
+ NVME_PR_SUPPORT_EXCLUSIVE_ACCESS_ALL_REGS = 1 << 6,
+ NVME_PR_SUPPORT_IEKEY_VER_1_3_DEF = 1 << 7,
+};
+
+enum nvme_pr_register_action {
+ NVME_PR_REGISTER_ACT_REG = 0,
+ NVME_PR_REGISTER_ACT_UNREG = 1,
+ NVME_PR_REGISTER_ACT_REPLACE = 1 << 1,
+};
+
+enum nvme_pr_acquire_action {
+ NVME_PR_ACQUIRE_ACT_ACQUIRE = 0,
+ NVME_PR_ACQUIRE_ACT_PREEMPT = 1,
+ NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT = 1 << 1,
+};
+
+enum nvme_pr_release_action {
+ NVME_PR_RELEASE_ACT_RELEASE = 0,
+ NVME_PR_RELEASE_ACT_CLEAR = 1,
+};
+
#endif /* _LINUX_NVME_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 1b3a76710487..908ee0aad554 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -543,7 +543,7 @@ FOLIO_FLAG(swapbacked, FOLIO_HEAD_PAGE)
* - PG_private and PG_private_2 cause release_folio() and co to be invoked
*/
PAGEFLAG(Private, private, PF_ANY)
-PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
+FOLIO_FLAG(private_2, FOLIO_HEAD_PAGE)
/* owner_2 can be set on tail pages for anon memory */
FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
@@ -554,7 +554,7 @@ FOLIO_FLAG(owner_2, FOLIO_HEAD_PAGE)
*/
TESTPAGEFLAG(Writeback, writeback, PF_NO_TAIL)
TESTSCFLAG(Writeback, writeback, PF_NO_TAIL)
-PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
+FOLIO_FLAG(mappedtodisk, FOLIO_HEAD_PAGE)
/* PG_readahead is only used for reads; PG_reclaim is only for writes */
PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
@@ -975,12 +975,16 @@ static __always_inline bool folio_test_##fname(const struct folio *folio) \
} \
static __always_inline void __folio_set_##fname(struct folio *folio) \
{ \
+ if (folio_test_##fname(folio)) \
+ return; \
VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \
folio); \
folio->page.page_type = (unsigned int)PGTY_##lname << 24; \
} \
static __always_inline void __folio_clear_##fname(struct folio *folio) \
{ \
+ if (folio->page.page_type == UINT_MAX) \
+ return; \
VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \
folio->page.page_type = UINT_MAX; \
}
@@ -993,11 +997,15 @@ static __always_inline int Page##uname(const struct page *page) \
} \
static __always_inline void __SetPage##uname(struct page *page) \
{ \
+ if (Page##uname(page)) \
+ return; \
VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \
page->page_type = (unsigned int)PGTY_##lname << 24; \
} \
static __always_inline void __ClearPage##uname(struct page *page) \
{ \
+ if (page->page_type == UINT_MAX) \
+ return; \
VM_BUG_ON_PAGE(!Page##uname(page), page); \
page->page_type = UINT_MAX; \
}
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index b6321fc49159..52b5ea663b9f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -41,7 +41,11 @@
PCPU_MIN_ALLOC_SHIFT)
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
-#define PERCPU_DYNAMIC_SIZE_SHIFT 12
+# if defined(CONFIG_LOCKDEP) && !defined(CONFIG_PAGE_SIZE_4KB)
+# define PERCPU_DYNAMIC_SIZE_SHIFT 13
+# else
+# define PERCPU_DYNAMIC_SIZE_SHIFT 12
+#endif /* LOCKDEP and PAGE_SIZE > 4KiB */
#else
#define PERCPU_DYNAMIC_SIZE_SHIFT 10
#endif
diff --git a/include/linux/perf/arm_pmuv3.h b/include/linux/perf/arm_pmuv3.h
index 3372c1b56486..d698efba28a2 100644
--- a/include/linux/perf/arm_pmuv3.h
+++ b/include/linux/perf/arm_pmuv3.h
@@ -257,6 +257,7 @@
#define ARMV8_PMU_USERENR_SW (1 << 1) /* PMSWINC can be written at EL0 */
#define ARMV8_PMU_USERENR_CR (1 << 2) /* Cycle counter can be read at EL0 */
#define ARMV8_PMU_USERENR_ER (1 << 3) /* Event counter can be read at EL0 */
+#define ARMV8_PMU_USERENR_UEN (1 << 4) /* Fine grained per counter access at EL0 */
/* Mask for writable bits */
#define ARMV8_PMU_USERENR_MASK (ARMV8_PMU_USERENR_EN | ARMV8_PMU_USERENR_SW | \
ARMV8_PMU_USERENR_CR | ARMV8_PMU_USERENR_ER)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fb908843f209..cb99ec8c9e96 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -170,6 +170,12 @@ struct hw_perf_event {
};
struct { /* aux / Intel-PT */
u64 aux_config;
+ /*
+ * For AUX area events, aux_paused cannot be a state
+ * flag because it can be updated asynchronously to
+ * state.
+ */
+ unsigned int aux_paused;
};
struct { /* software */
struct hrtimer hrtimer;
@@ -294,6 +300,7 @@ struct perf_event_pmu_context;
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
+#define PERF_PMU_CAP_AUX_PAUSE 0x0200
/**
* pmu::scope
@@ -384,6 +391,8 @@ struct pmu {
#define PERF_EF_START 0x01 /* start the counter when adding */
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
+#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
+#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
/*
* Adds/Removes a counter to/from the PMU, can be done inside a
@@ -423,6 +432,18 @@ struct pmu {
*
* ->start() with PERF_EF_RELOAD will reprogram the counter
* value, must be preceded by a ->stop() with PERF_EF_UPDATE.
+ *
+ * ->stop() with PERF_EF_PAUSE will stop as simply as possible. Will not
+ * overlap another ->stop() with PERF_EF_PAUSE nor ->start() with
+ * PERF_EF_RESUME.
+ *
+ * ->start() with PERF_EF_RESUME will start as simply as possible but
+ * only if the counter is not otherwise stopped. Will not overlap
+ * another ->start() with PERF_EF_RESUME nor ->stop() with
+ * PERF_EF_PAUSE.
+ *
+ * Notably, PERF_EF_PAUSE/PERF_EF_RESUME *can* be concurrent with other
+ * ->stop()/->start() invocations, just not itself.
*/
void (*start) (struct perf_event *event, int flags);
void (*stop) (struct perf_event *event, int flags);
@@ -1655,15 +1676,35 @@ extern void perf_tp_event(u16 event_type, u64 count, void *record,
struct task_struct *task);
extern void perf_bp_event(struct perf_event *event, void *data);
-#ifndef perf_misc_flags
-# define perf_misc_flags(regs) \
+extern unsigned long perf_misc_flags(struct perf_event *event, struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct perf_event *event,
+ struct pt_regs *regs);
+
+#ifndef perf_arch_misc_flags
+# define perf_arch_misc_flags(regs) \
(user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL)
-# define perf_instruction_pointer(regs) instruction_pointer(regs)
+# define perf_arch_instruction_pointer(regs) instruction_pointer(regs)
#endif
#ifndef perf_arch_bpf_user_pt_regs
# define perf_arch_bpf_user_pt_regs(regs) regs
#endif
+#ifndef perf_arch_guest_misc_flags
+static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs)
+{
+ unsigned long guest_state = perf_guest_state();
+
+ if (!(guest_state & PERF_GUEST_ACTIVE))
+ return 0;
+
+ if (guest_state & PERF_GUEST_USER)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+}
+# define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
+#endif
+
static inline bool has_branch_stack(struct perf_event *event)
{
return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK;
@@ -1679,6 +1720,13 @@ static inline bool has_aux(struct perf_event *event)
return event->pmu->setup_aux;
}
+static inline bool has_aux_action(struct perf_event *event)
+{
+ return event->attr.aux_sample_size ||
+ event->attr.aux_pause ||
+ event->attr.aux_resume;
+}
+
static inline bool is_write_backward(struct perf_event *event)
{
return !!event->attr.write_backward;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a98bc91a0cde..504766d4b2d5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1378,12 +1378,13 @@ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum);
* @regnum: The register on the MMD to read
* @val: Variable to read the register into
* @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- * tight-loops). Should be less than ~20ms since usleep_range
- * is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ * read usleep_range() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
* @sleep_before_read: if it is true, sleep @sleep_us before read.
- * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout. In either
* case, the last read value at @args is stored in @val. Must not
* be called from atomic context if sleep_us or timeout_us are used.
*/
diff --git a/include/linux/platform_data/ad5449.h b/include/linux/platform_data/ad5449.h
deleted file mode 100644
index d687ef5726c2..000000000000
--- a/include/linux/platform_data/ad5449.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AD5415, AD5426, AD5429, AD5432, AD5439, AD5443, AD5449 Digital to Analog
- * Converter driver.
- *
- * Copyright 2012 Analog Devices Inc.
- * Author: Lars-Peter Clausen <[email protected]>
- */
-
-#ifndef __LINUX_PLATFORM_DATA_AD5449_H__
-#define __LINUX_PLATFORM_DATA_AD5449_H__
-
-/**
- * enum ad5449_sdo_mode - AD5449 SDO pin configuration
- * @AD5449_SDO_DRIVE_FULL: Drive the SDO pin with full strength.
- * @AD5449_SDO_DRIVE_WEAK: Drive the SDO pin with not full strength.
- * @AD5449_SDO_OPEN_DRAIN: Operate the SDO pin in open-drain mode.
- * @AD5449_SDO_DISABLED: Disable the SDO pin, in this mode it is not possible to
- * read back from the device.
- */
-enum ad5449_sdo_mode {
- AD5449_SDO_DRIVE_FULL = 0x0,
- AD5449_SDO_DRIVE_WEAK = 0x1,
- AD5449_SDO_OPEN_DRAIN = 0x2,
- AD5449_SDO_DISABLED = 0x3,
-};
-
-/**
- * struct ad5449_platform_data - Platform data for the ad5449 DAC driver
- * @sdo_mode: SDO pin mode
- * @hardware_clear_to_midscale: Whether asserting the hardware CLR pin sets the
- * outputs to midscale (true) or to zero scale(false).
- */
-struct ad5449_platform_data {
- enum ad5449_sdo_mode sdo_mode;
- bool hardware_clear_to_midscale;
-};
-
-#endif
diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h
deleted file mode 100644
index eb9805bb3fe8..000000000000
--- a/include/linux/platform_data/dma-ep93xx.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_ARCH_DMA_H
-#define __ASM_ARCH_DMA_H
-
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-
-/*
- * M2P channels.
- *
- * Note that these values are also directly used for setting the PPALLOC
- * register.
- */
-#define EP93XX_DMA_I2S1 0
-#define EP93XX_DMA_I2S2 1
-#define EP93XX_DMA_AAC1 2
-#define EP93XX_DMA_AAC2 3
-#define EP93XX_DMA_AAC3 4
-#define EP93XX_DMA_I2S3 5
-#define EP93XX_DMA_UART1 6
-#define EP93XX_DMA_UART2 7
-#define EP93XX_DMA_UART3 8
-#define EP93XX_DMA_IRDA 9
-/* M2M channels */
-#define EP93XX_DMA_SSP 10
-#define EP93XX_DMA_IDE 11
-
-/**
- * struct ep93xx_dma_data - configuration data for the EP93xx dmaengine
- * @port: peripheral which is requesting the channel
- * @direction: TX/RX channel
- * @name: optional name for the channel, this is displayed in /proc/interrupts
- *
- * This information is passed as private channel parameter in a filter
- * function. Note that this is only needed for slave/cyclic channels. For
- * memcpy channels %NULL data should be passed.
- */
-struct ep93xx_dma_data {
- int port;
- enum dma_transfer_direction direction;
- const char *name;
-};
-
-/**
- * struct ep93xx_dma_chan_data - platform specific data for a DMA channel
- * @name: name of the channel, used for getting the right clock for the channel
- * @base: mapped registers
- * @irq: interrupt number used by this channel
- */
-struct ep93xx_dma_chan_data {
- const char *name;
- void __iomem *base;
- int irq;
-};
-
-/**
- * struct ep93xx_dma_platform_data - platform data for the dmaengine driver
- * @channels: array of channels which are passed to the driver
- * @num_channels: number of channels in the array
- *
- * This structure is passed to the DMA engine driver via platform data. For
- * M2P channels, contract is that even channels are for TX and odd for RX.
- * There is no requirement for the M2M channels.
- */
-struct ep93xx_dma_platform_data {
- struct ep93xx_dma_chan_data *channels;
- size_t num_channels;
-};
-
-static inline bool ep93xx_dma_chan_is_m2p(struct dma_chan *chan)
-{
- return !strcmp(dev_name(chan->device->dev), "ep93xx-dma-m2p");
-}
-
-/**
- * ep93xx_dma_chan_direction - returns direction the channel can be used
- * @chan: channel
- *
- * This function can be used in filter functions to find out whether the
- * channel supports given DMA direction. Only M2P channels have such
- * limitation, for M2M channels the direction is configurable.
- */
-static inline enum dma_transfer_direction
-ep93xx_dma_chan_direction(struct dma_chan *chan)
-{
- if (!ep93xx_dma_chan_is_m2p(chan))
- return DMA_TRANS_NONE;
-
- /* even channels are for TX, odd for RX */
- return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
-}
-
-#endif /* __ASM_ARCH_DMA_H */
diff --git a/include/linux/platform_data/eth-ep93xx.h b/include/linux/platform_data/eth-ep93xx.h
deleted file mode 100644
index 8eef637a804d..000000000000
--- a/include/linux/platform_data/eth-ep93xx.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_PLATFORM_DATA_ETH_EP93XX
-#define _LINUX_PLATFORM_DATA_ETH_EP93XX
-
-struct ep93xx_eth_data {
- unsigned char dev_addr[6];
- unsigned char phy_id;
-};
-
-#endif
diff --git a/include/linux/platform_data/keypad-ep93xx.h b/include/linux/platform_data/keypad-ep93xx.h
deleted file mode 100644
index 3054fced8509..000000000000
--- a/include/linux/platform_data/keypad-ep93xx.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __KEYPAD_EP93XX_H
-#define __KEYPAD_EP93XX_H
-
-struct matrix_keymap_data;
-
-/* flags for the ep93xx_keypad driver */
-#define EP93XX_KEYPAD_DISABLE_3_KEY (1<<0) /* disable 3-key reset */
-#define EP93XX_KEYPAD_DIAG_MODE (1<<1) /* diagnostic mode */
-#define EP93XX_KEYPAD_BACK_DRIVE (1<<2) /* back driving mode */
-#define EP93XX_KEYPAD_TEST_MODE (1<<3) /* scan only column 0 */
-#define EP93XX_KEYPAD_AUTOREPEAT (1<<4) /* enable key autorepeat */
-
-/**
- * struct ep93xx_keypad_platform_data - platform specific device structure
- * @keymap_data: pointer to &matrix_keymap_data
- * @debounce: debounce start count; terminal count is 0xff
- * @prescale: row/column counter pre-scaler load value
- * @flags: see above
- */
-struct ep93xx_keypad_platform_data {
- struct matrix_keymap_data *keymap_data;
- unsigned int debounce;
- unsigned int prescale;
- unsigned int flags;
- unsigned int clk_rate;
-};
-
-#define EP93XX_MATRIX_ROWS (8)
-#define EP93XX_MATRIX_COLS (8)
-
-#endif /* __KEYPAD_EP93XX_H */
diff --git a/include/linux/platform_data/max6639.h b/include/linux/platform_data/max6639.h
deleted file mode 100644
index 65bfdb4fdc15..000000000000
--- a/include/linux/platform_data/max6639.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_MAX6639_H
-#define _LINUX_MAX6639_H
-
-#include <linux/types.h>
-
-/* platform data for the MAX6639 temperature sensor and fan control */
-
-struct max6639_platform_data {
- bool pwm_polarity; /* Polarity low (0) or high (1, default) */
- int ppr; /* Pulses per rotation 1..4 (default == 2) */
- int rpm_range; /* 2000, 4000 (default), 8000 or 16000 */
-};
-
-#endif /* _LINUX_MAX6639_H */
diff --git a/include/linux/platform_data/spi-ep93xx.h b/include/linux/platform_data/spi-ep93xx.h
deleted file mode 100644
index b439f2a896e0..000000000000
--- a/include/linux/platform_data/spi-ep93xx.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_MACH_EP93XX_SPI_H
-#define __ASM_MACH_EP93XX_SPI_H
-
-struct spi_device;
-
-/**
- * struct ep93xx_spi_info - EP93xx specific SPI descriptor
- * @use_dma: use DMA for the transfers
- */
-struct ep93xx_spi_info {
- bool use_dma;
-};
-
-#endif /* __ASM_MACH_EP93XX_SPI_H */
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index d422db6eec63..7132623e4658 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -52,7 +52,7 @@ struct platform_device {
extern int platform_device_register(struct platform_device *);
extern void platform_device_unregister(struct platform_device *);
-extern struct bus_type platform_bus_type;
+extern const struct bus_type platform_bus_type;
extern struct device platform_bus;
extern struct resource *platform_get_resource(struct platform_device *,
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b637ec14025f..cf4b11be3709 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -92,6 +92,10 @@ struct dev_pm_domain_list {
* GENPD_FLAG_OPP_TABLE_FW: The genpd provider supports performance states,
* but its corresponding OPP tables are not
* described in DT, but are given directly by FW.
+ *
+ * GENPD_FLAG_DEV_NAME_FW: Instructs genpd to generate an unique device name
+ * using ida. It is used by genpd providers which
+ * get their genpd-names directly from FW.
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
@@ -101,6 +105,7 @@ struct dev_pm_domain_list {
#define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
#define GENPD_FLAG_MIN_RESIDENCY (1U << 6)
#define GENPD_FLAG_OPP_TABLE_FW (1U << 7)
+#define GENPD_FLAG_DEV_NAME_FW (1U << 8)
enum gpd_status {
GENPD_STATE_ON = 0, /* PM domain is on */
@@ -163,6 +168,7 @@ struct generic_pm_domain {
atomic_t sd_count; /* Number of subdomains with power "on" */
enum gpd_status status; /* Current state of the domain */
unsigned int device_count; /* Number of devices */
+ unsigned int device_id; /* unique device id */
unsigned int suspended_count; /* System suspend device counter */
unsigned int prepared_count; /* Suspend counter of prepared devices */
unsigned int performance_state; /* Aggregated max performance state */
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 453691710839..f11f10c97bd9 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -5,12 +5,16 @@
#include <linux/alarmtimer.h>
#include <linux/list.h>
#include <linux/mutex.h>
+#include <linux/pid.h>
#include <linux/posix-timers_types.h>
+#include <linux/rcuref.h>
#include <linux/spinlock.h>
#include <linux/timerqueue.h>
struct kernel_siginfo;
struct task_struct;
+struct sigqueue;
+struct k_itimer;
static inline clockid_t make_process_cpuclock(const unsigned int pid,
const clockid_t clock)
@@ -35,6 +39,8 @@ static inline int clockid_to_fd(const clockid_t clk)
#ifdef CONFIG_POSIX_TIMERS
+#include <linux/signal_types.h>
+
/**
* cpu_timer - Posix CPU timer representation for k_itimer
* @node: timerqueue node to queue in the task/sig
@@ -42,6 +48,7 @@ static inline int clockid_to_fd(const clockid_t clk)
* @pid: Pointer to target task PID
* @elist: List head for the expiry list
* @firing: Timer is currently firing
+ * @nanosleep: Timer is used for nanosleep and is not a regular posix-timer
* @handling: Pointer to the task which handles expiry
*/
struct cpu_timer {
@@ -49,7 +56,8 @@ struct cpu_timer {
struct timerqueue_head *head;
struct pid *pid;
struct list_head elist;
- int firing;
+ bool firing;
+ bool nanosleep;
struct task_struct __rcu *handling;
};
@@ -101,6 +109,12 @@ static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct,
pct->bases[CPUCLOCK_SCHED].nextevt = runtime;
}
+void posixtimer_rearm_itimer(struct task_struct *p);
+bool posixtimer_init_sigqueue(struct sigqueue *q);
+void posixtimer_send_sigqueue(struct k_itimer *tmr);
+bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq);
+void posixtimer_free_timer(struct k_itimer *timer);
+
/* Init task static initializer */
#define INIT_CPU_TIMERBASE(b) { \
.nextevt = U64_MAX, \
@@ -122,6 +136,10 @@ struct cpu_timer { };
static inline void posix_cputimers_init(struct posix_cputimers *pct) { }
static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
u64 cpu_limit) { }
+static inline void posixtimer_rearm_itimer(struct task_struct *p) { }
+static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info,
+ struct sigqueue *timer_sigq) { return false; }
+static inline void posixtimer_free_timer(struct k_itimer *timer) { }
#endif
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
@@ -132,50 +150,56 @@ static inline void clear_posix_cputimers_work(struct task_struct *p) { }
static inline void posix_cputimers_init_work(void) { }
#endif
-#define REQUEUE_PENDING 1
-
/**
* struct k_itimer - POSIX.1b interval timer structure.
- * @list: List head for binding the timer to signals->posix_timers
+ * @list: List node for binding the timer to tsk::signal::posix_timers
+ * @ignored_list: List node for tracking ignored timers in tsk::signal::ignored_posix_timers
* @t_hash: Entry in the posix timer hash table
* @it_lock: Lock protecting the timer
* @kclock: Pointer to the k_clock struct handling this timer
* @it_clock: The posix timer clock id
* @it_id: The posix timer id for identifying the timer
- * @it_active: Marker that timer is active
+ * @it_status: The status of the timer
+ * @it_sig_periodic: The periodic status at signal delivery
* @it_overrun: The overrun counter for pending signals
* @it_overrun_last: The overrun at the time of the last delivered signal
- * @it_requeue_pending: Indicator that timer waits for being requeued on
- * signal delivery
+ * @it_signal_seq: Sequence count to control signal delivery
+ * @it_sigqueue_seq: The sequence count at the point where the signal was queued
* @it_sigev_notify: The notify word of sigevent struct for signal delivery
* @it_interval: The interval for periodic timers
* @it_signal: Pointer to the creators signal struct
* @it_pid: The pid of the process/task targeted by the signal
* @it_process: The task to wakeup on clock_nanosleep (CPU timers)
- * @sigq: Pointer to preallocated sigqueue
+ * @rcuref: Reference count for life time management
+ * @sigq: Embedded sigqueue
* @it: Union representing the various posix timer type
* internals.
* @rcu: RCU head for freeing the timer.
*/
struct k_itimer {
struct hlist_node list;
+ struct hlist_node ignored_list;
struct hlist_node t_hash;
spinlock_t it_lock;
const struct k_clock *kclock;
clockid_t it_clock;
timer_t it_id;
- int it_active;
+ int it_status;
+ bool it_sig_periodic;
s64 it_overrun;
s64 it_overrun_last;
- int it_requeue_pending;
+ unsigned int it_signal_seq;
+ unsigned int it_sigqueue_seq;
int it_sigev_notify;
+ enum pid_type it_pid_type;
ktime_t it_interval;
struct signal_struct *it_signal;
union {
struct pid *it_pid;
struct task_struct *it_process;
};
- struct sigqueue *sigq;
+ struct sigqueue sigq;
+ rcuref_t rcuref;
union {
struct {
struct hrtimer timer;
@@ -196,5 +220,29 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
-void posixtimer_rearm(struct kernel_siginfo *info);
+#ifdef CONFIG_POSIX_TIMERS
+static inline void posixtimer_putref(struct k_itimer *tmr)
+{
+ if (rcuref_put(&tmr->rcuref))
+ posixtimer_free_timer(tmr);
+}
+
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q)
+{
+ struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+ WARN_ON_ONCE(!rcuref_get(&tmr->rcuref));
+}
+
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
+{
+ struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+ posixtimer_putref(tmr);
+}
+#else /* CONFIG_POSIX_TIMERS */
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
+#endif /* !CONFIG_POSIX_TIMERS */
+
#endif
diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h
index 0e65b3d634d9..e2d47eb1a7f3 100644
--- a/include/linux/posix_acl.h
+++ b/include/linux/posix_acl.h
@@ -28,9 +28,9 @@ struct posix_acl_entry {
struct posix_acl {
refcount_t a_refcount;
- struct rcu_head a_rcu;
unsigned int a_count;
- struct posix_acl_entry a_entries[];
+ struct rcu_head a_rcu;
+ struct posix_acl_entry a_entries[] __counted_by(a_count);
};
#define FOREACH_ACL_ENTRY(pa, acl, pe) \
@@ -62,7 +62,7 @@ posix_acl_release(struct posix_acl *acl)
/* posix_acl.c */
extern void posix_acl_init(struct posix_acl *, int);
-extern struct posix_acl *posix_acl_alloc(int, gfp_t);
+extern struct posix_acl *posix_acl_alloc(unsigned int count, gfp_t flags);
extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index f7f1e5251c67..f2ed5b72b3d6 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -10,6 +10,7 @@
#include <linux/types.h>
#include <linux/once.h>
+#include <linux/percpu.h>
#include <linux/random.h>
struct rnd_state {
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index ce76f1a45722..ca86235ac15c 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -486,6 +486,7 @@ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
extern bool preempt_model_none(void);
extern bool preempt_model_voluntary(void);
extern bool preempt_model_full(void);
+extern bool preempt_model_lazy(void);
#else
@@ -502,6 +503,11 @@ static inline bool preempt_model_full(void)
return IS_ENABLED(CONFIG_PREEMPT);
}
+static inline bool preempt_model_lazy(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_LAZY);
+}
+
#endif
static inline bool preempt_model_rt(void)
@@ -519,7 +525,7 @@ static inline bool preempt_model_rt(void)
*/
static inline bool preempt_model_preemptible(void)
{
- return preempt_model_full() || preempt_model_rt();
+ return preempt_model_full() || preempt_model_lazy() || preempt_model_rt();
}
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index eca9bb2ee637..4217a9f412b2 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -166,6 +166,9 @@ __printf(1, 2) __cold int _printk_deferred(const char *fmt, ...);
extern void __printk_deferred_enter(void);
extern void __printk_deferred_exit(void);
+extern void printk_force_console_enter(void);
+extern void printk_force_console_exit(void);
+
/*
* The printk_deferred_enter/exit macros are available only as a hack for
* some code paths that need to defer all printk console printing. Interrupts
@@ -229,6 +232,14 @@ static inline void printk_deferred_exit(void)
{
}
+static inline void printk_force_console_enter(void)
+{
+}
+
+static inline void printk_force_console_exit(void)
+{
+}
+
static inline int printk_ratelimit(void)
{
return 0;
diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index 1b5a953c6bbc..3a74f69e0b59 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -10,7 +10,7 @@
#ifndef _PTP_CLASSIFY_H_
#define _PTP_CLASSIFY_H_
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/ip.h>
#include <linux/ktime.h>
#include <linux/skbuff.h>
diff --git a/include/linux/random.h b/include/linux/random.h
index b0a940af4fff..333cecfca93f 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -145,13 +145,6 @@ declare_get_random_var_wait(u64, u32)
declare_get_random_var_wait(long, unsigned long)
#undef declare_get_random_var
-/*
- * This is designed to be standalone for just prandom
- * users, but for now we include it from <linux/random.h>
- * for legacy reasons.
- */
-#include <linux/prandom.h>
-
#ifdef CONFIG_SMP
int random_prepare_cpu(unsigned int cpu);
int random_online_cpu(unsigned int cpu);
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index 6a0999c26c7c..2f630eb8307e 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -14,7 +14,7 @@
*
* If we need to allow unconditional lookups (say as required for NMI context
* usage) we need a more complex setup; this data structure provides this by
- * employing the latch technique -- see @raw_write_seqcount_latch -- to
+ * employing the latch technique -- see @write_seqcount_latch_begin -- to
* implement a latched RB-tree which does allow for unconditional lookups by
* virtue of always having (at least) one stable copy of the tree.
*
@@ -132,7 +132,7 @@ __lt_find(void *key, struct latch_tree_root *ltr, int idx,
* @ops: operators defining the node order
*
* It inserts @node into @root in an ordered fashion such that we can always
- * observe one complete tree. See the comment for raw_write_seqcount_latch().
+ * observe one complete tree. See the comment for write_seqcount_latch_begin().
*
* The inserts use rcu_assign_pointer() to publish the element such that the
* tree structure is stored before we can observe the new @node.
@@ -145,10 +145,11 @@ latch_tree_insert(struct latch_tree_node *node,
struct latch_tree_root *root,
const struct latch_tree_ops *ops)
{
- raw_write_seqcount_latch(&root->seq);
+ write_seqcount_latch_begin(&root->seq);
__lt_insert(node, root, 0, ops->less);
- raw_write_seqcount_latch(&root->seq);
+ write_seqcount_latch(&root->seq);
__lt_insert(node, root, 1, ops->less);
+ write_seqcount_latch_end(&root->seq);
}
/**
@@ -159,7 +160,7 @@ latch_tree_insert(struct latch_tree_node *node,
*
* Removes @node from the trees @root in an ordered fashion such that we can
* always observe one complete tree. See the comment for
- * raw_write_seqcount_latch().
+ * write_seqcount_latch_begin().
*
* It is assumed that @node will observe one RCU quiescent state before being
* reused of freed.
@@ -172,10 +173,11 @@ latch_tree_erase(struct latch_tree_node *node,
struct latch_tree_root *root,
const struct latch_tree_ops *ops)
{
- raw_write_seqcount_latch(&root->seq);
+ write_seqcount_latch_begin(&root->seq);
__lt_erase(node, root, 0);
- raw_write_seqcount_latch(&root->seq);
+ write_seqcount_latch(&root->seq);
__lt_erase(node, root, 1);
+ write_seqcount_latch_end(&root->seq);
}
/**
@@ -204,9 +206,9 @@ latch_tree_find(void *key, struct latch_tree_root *root,
unsigned int seq;
do {
- seq = raw_read_seqcount_latch(&root->seq);
+ seq = read_seqcount_latch(&root->seq);
node = __lt_find(key, root, seq & 1, ops->comp);
- } while (raw_read_seqcount_latch_retry(&root->seq, seq));
+ } while (read_seqcount_latch_retry(&root->seq, seq));
return node;
}
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 0ee270b3f5ed..fe42315f667f 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -165,7 +165,6 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
static inline void rcu_momentary_eqs(void) { }
static inline void kfree_rcu_scheduler_running(void) { }
-static inline bool rcu_gp_might_be_stalled(void) { return false; }
/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 90a684f94776..27d86d912781 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -40,7 +40,6 @@ void kvfree_rcu_barrier(void);
void rcu_barrier(void);
void rcu_momentary_eqs(void);
void kfree_rcu_scheduler_running(void);
-bool rcu_gp_might_be_stalled(void);
struct rcu_gp_oldstate {
unsigned long rgos_norm;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index f9ccad32fc5c..75f162b60ba1 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -106,17 +106,17 @@ struct reg_sequence {
* @addr: Address to poll
* @val: Unsigned integer variable to read the value into
* @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- * tight-loops). Should be less than ~20ms since usleep_range
- * is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ * read usleep_range() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
+ * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
* error return value in case of a error read. In the two former cases,
* the last read value at @addr is stored in @val. Must not be called
* from atomic context if sleep_us or timeout_us are used.
- *
- * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
*/
#define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \
({ \
@@ -133,20 +133,20 @@ struct reg_sequence {
* @addr: Address to poll
* @val: Unsigned integer variable to read the value into
* @cond: Break condition (usually involving @val)
- * @delay_us: Time to udelay between reads in us (0 tight-loops).
- * Should be less than ~10us since udelay is used
- * (see Documentation/timers/timers-howto.rst).
+ * @delay_us: Time to udelay between reads in us (0 tight-loops). Please
+ * read udelay() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
- * error return value in case of a error read. In the two former cases,
- * the last read value at @addr is stored in @val.
- *
* This is modelled after the readx_poll_timeout_atomic macros in linux/iopoll.h.
*
* Note: In general regmap cannot be used in atomic context. If you want to use
* this macro then first setup your regmap for atomic use (flat or no cache
* and MMIO regmap).
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_read
+ * error return value in case of a error read. In the two former cases,
+ * the last read value at @addr is stored in @val.
*/
#define regmap_read_poll_timeout_atomic(map, addr, val, cond, delay_us, timeout_us) \
({ \
@@ -177,17 +177,17 @@ struct reg_sequence {
* @field: Regmap field to read from
* @val: Unsigned integer variable to read the value into
* @cond: Break condition (usually involving @val)
- * @sleep_us: Maximum time to sleep between reads in us (0
- * tight-loops). Should be less than ~20ms since usleep_range
- * is used (see Documentation/timers/timers-howto.rst).
+ * @sleep_us: Maximum time to sleep between reads in us (0 tight-loops). Please
+ * read usleep_range() function description for details and
+ * limitations.
* @timeout_us: Timeout in us, 0 means never timeout
*
- * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read
+ * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
+ *
+ * Returns: 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read
* error return value in case of a error read. In the two former cases,
* the last read value at @addr is stored in @val. Must not be called
* from atomic context if sleep_us or timeout_us are used.
- *
- * This is modelled after the readx_poll_timeout macros in linux/iopoll.h.
*/
#define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \
({ \
diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h
index 8544ff05e594..7d81fc6918ee 100644
--- a/include/linux/rwlock_rt.h
+++ b/include/linux/rwlock_rt.h
@@ -24,13 +24,13 @@ do { \
__rt_rwlock_init(rwl, #rwl, &__key); \
} while (0)
-extern void rt_read_lock(rwlock_t *rwlock);
+extern void rt_read_lock(rwlock_t *rwlock) __acquires(rwlock);
extern int rt_read_trylock(rwlock_t *rwlock);
-extern void rt_read_unlock(rwlock_t *rwlock);
-extern void rt_write_lock(rwlock_t *rwlock);
-extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass);
+extern void rt_read_unlock(rwlock_t *rwlock) __releases(rwlock);
+extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock);
+extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock);
extern int rt_write_trylock(rwlock_t *rwlock);
-extern void rt_write_unlock(rwlock_t *rwlock);
+extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock);
static __always_inline void read_lock(rwlock_t *rwlock)
{
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index c09cdcc99471..189140bf11fc 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -40,7 +40,7 @@ struct sbitmap_word {
/**
* @swap_lock: serializes simultaneous updates of ->word and ->cleared
*/
- spinlock_t swap_lock;
+ raw_spinlock_t swap_lock;
} ____cacheline_aligned_in_smp;
/**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e6ee4258169a..5a2cdd9d8b10 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1001,7 +1001,7 @@ struct task_struct {
#ifdef CONFIG_ARCH_HAS_CPU_PASID
unsigned pasid_activated:1;
#endif
-#ifdef CONFIG_CPU_SUP_INTEL
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
unsigned reported_split_lock:1;
#endif
#ifdef CONFIG_TASK_DELAY_ACCT
@@ -1681,8 +1681,8 @@ extern struct pid *cad_pid;
* I am cleaning dirty pages from some other bdi. */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
-#define PF_MEMALLOC_NORECLAIM 0x00800000 /* All allocation requests will clear __GFP_DIRECT_RECLAIM */
-#define PF_MEMALLOC_NOWARN 0x01000000 /* All allocation requests will inherit __GFP_NOWARN */
+#define PF__HOLE__00800000 0x00800000
+#define PF__HOLE__01000000 0x01000000
#define PF__HOLE__02000000 0x02000000
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
@@ -1898,7 +1898,7 @@ extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
#ifdef CONFIG_THREAD_INFO_IN_TASK
# define task_thread_info(task) (&(task)->thread_info)
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
+#else
# define task_thread_info(task) ((struct thread_info *)(task)->stack)
#endif
@@ -2002,7 +2002,8 @@ static inline void set_tsk_need_resched(struct task_struct *tsk)
static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
- clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY,
+ (atomic_long_t *)&task_thread_info(tsk)->flags);
}
static inline int test_tsk_need_resched(struct task_struct *tsk)
@@ -2133,6 +2134,11 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
+static inline bool task_is_runnable(struct task_struct *p)
+{
+ return p->on_rq && !p->se.sched_delayed;
+}
+
extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);
diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 65bc0a489cd2..1d70a9867fb1 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -199,7 +199,6 @@ struct sched_ext_entity {
#ifdef CONFIG_EXT_GROUP_SCHED
struct cgroup *cgrp_moving_from;
#endif
- /* must be the last field, see init_scx_entity() */
struct list_head tasks_node;
};
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 07bb8d4181d7..928a626725e6 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -251,25 +251,16 @@ static inline gfp_t current_gfp_context(gfp_t flags)
{
unsigned int pflags = READ_ONCE(current->flags);
- if (unlikely(pflags & (PF_MEMALLOC_NOIO |
- PF_MEMALLOC_NOFS |
- PF_MEMALLOC_NORECLAIM |
- PF_MEMALLOC_NOWARN |
- PF_MEMALLOC_PIN))) {
+ if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) {
/*
- * Stronger flags before weaker flags:
- * NORECLAIM implies NOIO, which in turn implies NOFS
+ * NOIO implies both NOIO and NOFS and it is a weaker context
+ * so always make sure it makes precedence
*/
- if (pflags & PF_MEMALLOC_NORECLAIM)
- flags &= ~__GFP_DIRECT_RECLAIM;
- else if (pflags & PF_MEMALLOC_NOIO)
+ if (pflags & PF_MEMALLOC_NOIO)
flags &= ~(__GFP_IO | __GFP_FS);
else if (pflags & PF_MEMALLOC_NOFS)
flags &= ~__GFP_FS;
- if (pflags & PF_MEMALLOC_NOWARN)
- flags |= __GFP_NOWARN;
-
if (pflags & PF_MEMALLOC_PIN)
flags &= ~__GFP_MOVABLE;
}
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index c8ed09ac29ac..d5d03d919df8 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -138,6 +138,7 @@ struct signal_struct {
/* POSIX.1b Interval Timers */
unsigned int next_posix_timer_id;
struct hlist_head posix_timers;
+ struct hlist_head ignored_posix_timers;
/* ITIMER_REAL timer for the process */
struct hrtimer real_timer;
@@ -338,9 +339,6 @@ extern void force_fatal_sig(int);
extern void force_exit_sig(int);
extern int send_sig(int, struct task_struct *, int);
extern int zap_other_threads(struct task_struct *p);
-extern struct sigqueue *sigqueue_alloc(void);
-extern void sigqueue_free(struct sigqueue *);
-extern int send_sigqueue(struct sigqueue *, struct pid *, enum pid_type);
extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
static inline void clear_notify_signal(void)
diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index bf10bdb487dd..cffad65bdc6a 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -9,6 +9,7 @@
#include <linux/sched.h>
#include <linux/magic.h>
#include <linux/refcount.h>
+#include <linux/kasan.h>
#ifdef CONFIG_THREAD_INFO_IN_TASK
@@ -33,7 +34,7 @@ static __always_inline unsigned long *end_of_stack(const struct task_struct *tas
#endif
}
-#elif !defined(__HAVE_THREAD_FUNCTIONS)
+#else
#define task_stack_page(task) ((void *)(task)->stack)
@@ -89,6 +90,7 @@ static inline int object_is_on_stack(const void *obj)
{
void *stack = task_stack_page(current);
+ obj = kasan_reset_tag(obj);
return (obj >= stack) && (obj < (stack + THREAD_SIZE));
}
diff --git a/include/linux/security.h b/include/linux/security.h
index c37c32ebbdcd..cbdba435b798 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -34,6 +34,10 @@
#include <linux/sockptr.h>
#include <linux/bpf.h>
#include <uapi/linux/lsm.h>
+#include <linux/lsm/selinux.h>
+#include <linux/lsm/smack.h>
+#include <linux/lsm/apparmor.h>
+#include <linux/lsm/bpf.h>
struct linux_binprm;
struct cred;
@@ -152,6 +156,16 @@ enum lockdown_reason {
LOCKDOWN_CONFIDENTIALITY_MAX,
};
+/*
+ * Data exported by the security modules
+ */
+struct lsm_prop {
+ struct lsm_prop_selinux selinux;
+ struct lsm_prop_smack smack;
+ struct lsm_prop_apparmor apparmor;
+ struct lsm_prop_bpf bpf;
+};
+
extern const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1];
extern u32 lsm_active_cnt;
extern const struct lsm_id *lsm_idlist[];
@@ -269,8 +283,32 @@ static inline const char *kernel_load_data_id_str(enum kernel_load_data_id id)
return kernel_load_data_str[id];
}
+/**
+ * lsmprop_init - initialize a lsm_prop structure
+ * @prop: Pointer to the data to initialize
+ *
+ * Set all secid for all modules to the specified value.
+ */
+static inline void lsmprop_init(struct lsm_prop *prop)
+{
+ memset(prop, 0, sizeof(*prop));
+}
+
#ifdef CONFIG_SECURITY
+/**
+ * lsmprop_is_set - report if there is a value in the lsm_prop
+ * @prop: Pointer to the exported LSM data
+ *
+ * Returns true if there is a value set, false otherwise
+ */
+static inline bool lsmprop_is_set(struct lsm_prop *prop)
+{
+ const struct lsm_prop empty = {};
+
+ return !!memcmp(prop, &empty, sizeof(*prop));
+}
+
int call_blocking_lsm_notifier(enum lsm_event event, void *data);
int register_blocking_lsm_notifier(struct notifier_block *nb);
int unregister_blocking_lsm_notifier(struct notifier_block *nb);
@@ -348,7 +386,7 @@ int security_dentry_create_files_as(struct dentry *dentry, int mode,
struct cred *new);
int security_path_notify(const struct path *path, u64 mask,
unsigned int obj_type);
-int security_inode_alloc(struct inode *inode);
+int security_inode_alloc(struct inode *inode, gfp_t gfp);
void security_inode_free(struct inode *inode);
int security_inode_init_security(struct inode *inode, struct inode *dir,
const struct qstr *qstr,
@@ -408,7 +446,7 @@ int security_inode_getsecurity(struct mnt_idmap *idmap,
void **buffer, bool alloc);
int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
-void security_inode_getsecid(struct inode *inode, u32 *secid);
+void security_inode_getlsmprop(struct inode *inode, struct lsm_prop *prop);
int security_inode_copy_up(struct dentry *src, struct cred **new);
int security_inode_copy_up_xattr(struct dentry *src, const char *name);
int security_inode_setintegrity(const struct inode *inode,
@@ -444,6 +482,7 @@ void security_cred_free(struct cred *cred);
int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
void security_transfer_creds(struct cred *new, const struct cred *old);
void security_cred_getsecid(const struct cred *c, u32 *secid);
+void security_cred_getlsmprop(const struct cred *c, struct lsm_prop *prop);
int security_kernel_act_as(struct cred *new, u32 secid);
int security_kernel_create_files_as(struct cred *new, struct inode *inode);
int security_kernel_module_request(char *kmod_name);
@@ -463,8 +502,8 @@ int security_task_fix_setgroups(struct cred *new, const struct cred *old);
int security_task_setpgid(struct task_struct *p, pid_t pgid);
int security_task_getpgid(struct task_struct *p);
int security_task_getsid(struct task_struct *p);
-void security_current_getsecid_subj(u32 *secid);
-void security_task_getsecid_obj(struct task_struct *p, u32 *secid);
+void security_current_getlsmprop_subj(struct lsm_prop *prop);
+void security_task_getlsmprop_obj(struct task_struct *p, struct lsm_prop *prop);
int security_task_setnice(struct task_struct *p, int nice);
int security_task_setioprio(struct task_struct *p, int ioprio);
int security_task_getioprio(struct task_struct *p);
@@ -482,7 +521,7 @@ int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
void security_task_to_inode(struct task_struct *p, struct inode *inode);
int security_create_user_ns(const struct cred *cred);
int security_ipc_permission(struct kern_ipc_perm *ipcp, short flag);
-void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid);
+void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp, struct lsm_prop *prop);
int security_msg_msg_alloc(struct msg_msg *msg);
void security_msg_msg_free(struct msg_msg *msg);
int security_msg_queue_alloc(struct kern_ipc_perm *msq);
@@ -515,6 +554,7 @@ int security_setprocattr(int lsmid, const char *name, void *value, size_t size);
int security_netlink_send(struct sock *sk, struct sk_buff *skb);
int security_ismaclabel(const char *name);
int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
+int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen);
int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
void security_release_secctx(char *secdata, u32 seclen);
void security_inode_invalidate_secctx(struct inode *inode);
@@ -531,6 +571,17 @@ int security_bdev_setintegrity(struct block_device *bdev,
size_t size);
#else /* CONFIG_SECURITY */
+/**
+ * lsmprop_is_set - report if there is a value in the lsm_prop
+ * @prop: Pointer to the exported LSM data
+ *
+ * Returns true if there is a value set, false otherwise
+ */
+static inline bool lsmprop_is_set(struct lsm_prop *prop)
+{
+ return false;
+}
+
static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data)
{
return 0;
@@ -789,7 +840,7 @@ static inline int security_path_notify(const struct path *path, u64 mask,
return 0;
}
-static inline int security_inode_alloc(struct inode *inode)
+static inline int security_inode_alloc(struct inode *inode, gfp_t gfp)
{
return 0;
}
@@ -1020,9 +1071,10 @@ static inline int security_inode_listsecurity(struct inode *inode, char *buffer,
return 0;
}
-static inline void security_inode_getsecid(struct inode *inode, u32 *secid)
+static inline void security_inode_getlsmprop(struct inode *inode,
+ struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
static inline int security_inode_copy_up(struct dentry *src, struct cred **new)
@@ -1172,6 +1224,10 @@ static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
*secid = 0;
}
+static inline void security_cred_getlsmprop(const struct cred *c,
+ struct lsm_prop *prop)
+{ }
+
static inline int security_kernel_act_as(struct cred *cred, u32 secid)
{
return 0;
@@ -1249,14 +1305,15 @@ static inline int security_task_getsid(struct task_struct *p)
return 0;
}
-static inline void security_current_getsecid_subj(u32 *secid)
+static inline void security_current_getlsmprop_subj(struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
-static inline void security_task_getsecid_obj(struct task_struct *p, u32 *secid)
+static inline void security_task_getlsmprop_obj(struct task_struct *p,
+ struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
static inline int security_task_setnice(struct task_struct *p, int nice)
@@ -1332,9 +1389,10 @@ static inline int security_ipc_permission(struct kern_ipc_perm *ipcp,
return 0;
}
-static inline void security_ipc_getsecid(struct kern_ipc_perm *ipcp, u32 *secid)
+static inline void security_ipc_getlsmprop(struct kern_ipc_perm *ipcp,
+ struct lsm_prop *prop)
{
- *secid = 0;
+ lsmprop_init(prop);
}
static inline int security_msg_msg_alloc(struct msg_msg *msg)
@@ -1468,7 +1526,14 @@ static inline int security_ismaclabel(const char *name)
return 0;
}
-static inline int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
+static inline int security_secid_to_secctx(u32 secid, char **secdata,
+ u32 *seclen)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int security_lsmprop_to_secctx(struct lsm_prop *prop,
+ char **secdata, u32 *seclen)
{
return -EOPNOTSUPP;
}
@@ -2095,7 +2160,8 @@ static inline void security_key_post_create_or_update(struct key *keyring,
int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule,
gfp_t gfp);
int security_audit_rule_known(struct audit_krule *krule);
-int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule);
+int security_audit_rule_match(struct lsm_prop *prop, u32 field, u32 op,
+ void *lsmrule);
void security_audit_rule_free(void *lsmrule);
#else
@@ -2111,8 +2177,8 @@ static inline int security_audit_rule_known(struct audit_krule *krule)
return 0;
}
-static inline int security_audit_rule_match(u32 secid, u32 field, u32 op,
- void *lsmrule)
+static inline int security_audit_rule_match(struct lsm_prop *prop, u32 field,
+ u32 op, void *lsmrule)
{
return 0;
}
@@ -2182,7 +2248,7 @@ extern int security_bpf_prog_load(struct bpf_prog *prog, union bpf_attr *attr,
struct bpf_token *token);
extern void security_bpf_prog_free(struct bpf_prog *prog);
extern int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- struct path *path);
+ const struct path *path);
extern void security_bpf_token_free(struct bpf_token *token);
extern int security_bpf_token_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
extern int security_bpf_token_capable(const struct bpf_token *token, int cap);
@@ -2222,7 +2288,7 @@ static inline void security_bpf_prog_free(struct bpf_prog *prog)
{ }
static inline int security_bpf_token_create(struct bpf_token *token, union bpf_attr *attr,
- struct path *path)
+ const struct path *path)
{
return 0;
}
diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 2ac50822554e..80f33a93f944 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -52,6 +52,7 @@ static inline bool is_sed_ioctl(unsigned int cmd)
case IOC_OPAL_GET_GEOMETRY:
case IOC_OPAL_DISCOVERY:
case IOC_OPAL_REVERT_LSP:
+ case IOC_OPAL_SET_SID_PW:
return true;
}
return false;
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index fffeb754880f..5298765d6ca4 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -622,6 +622,23 @@ static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *
}
/**
+ * read_seqcount_latch() - pick even/odd latch data copy
+ * @s: Pointer to seqcount_latch_t
+ *
+ * See write_seqcount_latch() for details and a full reader/writer usage
+ * example.
+ *
+ * Return: sequence counter raw value. Use the lowest bit as an index for
+ * picking which data copy to read. The full counter must then be checked
+ * with read_seqcount_latch_retry().
+ */
+static __always_inline unsigned read_seqcount_latch(const seqcount_latch_t *s)
+{
+ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
+ return raw_read_seqcount_latch(s);
+}
+
+/**
* raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section
* @s: Pointer to seqcount_latch_t
* @start: count, from raw_read_seqcount_latch()
@@ -636,8 +653,33 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
}
/**
+ * read_seqcount_latch_retry() - end a seqcount_latch_t read section
+ * @s: Pointer to seqcount_latch_t
+ * @start: count, from read_seqcount_latch()
+ *
+ * Return: true if a read section retry is required, else false
+ */
+static __always_inline int
+read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
+{
+ kcsan_atomic_next(0);
+ return raw_read_seqcount_latch_retry(s, start);
+}
+
+/**
* raw_write_seqcount_latch() - redirect latch readers to even/odd copy
* @s: Pointer to seqcount_latch_t
+ */
+static __always_inline void raw_write_seqcount_latch(seqcount_latch_t *s)
+{
+ smp_wmb(); /* prior stores before incrementing "sequence" */
+ s->seqcount.sequence++;
+ smp_wmb(); /* increment "sequence" before following stores */
+}
+
+/**
+ * write_seqcount_latch_begin() - redirect latch readers to odd copy
+ * @s: Pointer to seqcount_latch_t
*
* The latch technique is a multiversion concurrency control method that allows
* queries during non-atomic modifications. If you can guarantee queries never
@@ -665,17 +707,11 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
*
* void latch_modify(struct latch_struct *latch, ...)
* {
- * smp_wmb(); // Ensure that the last data[1] update is visible
- * latch->seq.sequence++;
- * smp_wmb(); // Ensure that the seqcount update is visible
- *
+ * write_seqcount_latch_begin(&latch->seq);
* modify(latch->data[0], ...);
- *
- * smp_wmb(); // Ensure that the data[0] update is visible
- * latch->seq.sequence++;
- * smp_wmb(); // Ensure that the seqcount update is visible
- *
+ * write_seqcount_latch(&latch->seq);
* modify(latch->data[1], ...);
+ * write_seqcount_latch_end(&latch->seq);
* }
*
* The query will have a form like::
@@ -686,13 +722,13 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
* unsigned seq, idx;
*
* do {
- * seq = raw_read_seqcount_latch(&latch->seq);
+ * seq = read_seqcount_latch(&latch->seq);
*
* idx = seq & 0x01;
* entry = data_query(latch->data[idx], ...);
*
* // This includes needed smp_rmb()
- * } while (raw_read_seqcount_latch_retry(&latch->seq, seq));
+ * } while (read_seqcount_latch_retry(&latch->seq, seq));
*
* return entry;
* }
@@ -716,11 +752,31 @@ raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start)
* When data is a dynamic data structure; one should use regular RCU
* patterns to manage the lifetimes of the objects within.
*/
-static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
+static __always_inline void write_seqcount_latch_begin(seqcount_latch_t *s)
{
- smp_wmb(); /* prior stores before incrementing "sequence" */
- s->seqcount.sequence++;
- smp_wmb(); /* increment "sequence" before following stores */
+ kcsan_nestable_atomic_begin();
+ raw_write_seqcount_latch(s);
+}
+
+/**
+ * write_seqcount_latch() - redirect latch readers to even copy
+ * @s: Pointer to seqcount_latch_t
+ */
+static __always_inline void write_seqcount_latch(seqcount_latch_t *s)
+{
+ raw_write_seqcount_latch(s);
+}
+
+/**
+ * write_seqcount_latch_end() - end a seqcount_latch_t write section
+ * @s: Pointer to seqcount_latch_t
+ *
+ * Marks the end of a seqcount_latch_t writer section, after all copies of the
+ * latch-protected data have been updated.
+ */
+static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s)
+{
+ kcsan_nestable_atomic_end();
}
#define __SEQLOCK_UNLOCKED(lockname) \
@@ -754,11 +810,7 @@ static inline void raw_write_seqcount_latch(seqcount_latch_t *s)
*/
static inline unsigned read_seqbegin(const seqlock_t *sl)
{
- unsigned ret = read_seqcount_begin(&sl->seqcount);
-
- kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */
- kcsan_flat_atomic_begin();
- return ret;
+ return read_seqcount_begin(&sl->seqcount);
}
/**
@@ -774,12 +826,6 @@ static inline unsigned read_seqbegin(const seqlock_t *sl)
*/
static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
{
- /*
- * Assume not nested: read_seqretry() may be called multiple times when
- * completing read critical section.
- */
- kcsan_flat_atomic_end();
-
return read_seqcount_retry(&sl->seqcount, start);
}
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index fd59ed2cca53..e0717c8393d7 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -193,7 +193,7 @@ void serial8250_do_pm(struct uart_port *port, unsigned int state,
unsigned int oldstate);
void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl);
void serial8250_do_set_divisor(struct uart_port *port, unsigned int baud,
- unsigned int quot, unsigned int quot_frac);
+ unsigned int quot);
int fsl8250_handle_irq(struct uart_port *port);
int serial8250_handle_irq(struct uart_port *port, unsigned int iir);
u16 serial8250_rx_chars(struct uart_8250_port *up, u16 lsr);
diff --git a/include/linux/serial_s3c.h b/include/linux/serial_s3c.h
index 1672cf0810ef..102aa33d956c 100644
--- a/include/linux/serial_s3c.h
+++ b/include/linux/serial_s3c.h
@@ -246,24 +246,28 @@
S5PV210_UFCON_TXTRIG4 | \
S5PV210_UFCON_RXTRIG4)
-#define APPLE_S5L_UCON_RXTO_ENA 9
-#define APPLE_S5L_UCON_RXTHRESH_ENA 12
-#define APPLE_S5L_UCON_TXTHRESH_ENA 13
-#define APPLE_S5L_UCON_RXTO_ENA_MSK (1 << APPLE_S5L_UCON_RXTO_ENA)
-#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_RXTHRESH_ENA)
-#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK (1 << APPLE_S5L_UCON_TXTHRESH_ENA)
+#define APPLE_S5L_UCON_RXTO_ENA 9
+#define APPLE_S5L_UCON_RXTO_LEGACY_ENA 11
+#define APPLE_S5L_UCON_RXTHRESH_ENA 12
+#define APPLE_S5L_UCON_TXTHRESH_ENA 13
+#define APPLE_S5L_UCON_RXTO_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_ENA)
+#define APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK BIT(APPLE_S5L_UCON_RXTO_LEGACY_ENA)
+#define APPLE_S5L_UCON_RXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_RXTHRESH_ENA)
+#define APPLE_S5L_UCON_TXTHRESH_ENA_MSK BIT(APPLE_S5L_UCON_TXTHRESH_ENA)
#define APPLE_S5L_UCON_DEFAULT (S3C2410_UCON_TXIRQMODE | \
S3C2410_UCON_RXIRQMODE | \
S3C2410_UCON_RXFIFO_TOI)
#define APPLE_S5L_UCON_MASK (APPLE_S5L_UCON_RXTO_ENA_MSK | \
+ APPLE_S5L_UCON_RXTO_LEGACY_ENA_MSK | \
APPLE_S5L_UCON_RXTHRESH_ENA_MSK | \
APPLE_S5L_UCON_TXTHRESH_ENA_MSK)
-#define APPLE_S5L_UTRSTAT_RXTHRESH (1<<4)
-#define APPLE_S5L_UTRSTAT_TXTHRESH (1<<5)
-#define APPLE_S5L_UTRSTAT_RXTO (1<<9)
-#define APPLE_S5L_UTRSTAT_ALL_FLAGS (0x3f0)
+#define APPLE_S5L_UTRSTAT_RXTO_LEGACY BIT(3)
+#define APPLE_S5L_UTRSTAT_RXTHRESH BIT(4)
+#define APPLE_S5L_UTRSTAT_TXTHRESH BIT(5)
+#define APPLE_S5L_UTRSTAT_RXTO BIT(9)
+#define APPLE_S5L_UTRSTAT_ALL_FLAGS GENMASK(9, 3)
#ifndef __ASSEMBLY__
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 515a9a6a3c6f..018da28c01e7 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -42,10 +42,10 @@ struct shmem_inode_info {
struct inode vfs_inode;
};
-#define SHMEM_FL_USER_VISIBLE FS_FL_USER_VISIBLE
+#define SHMEM_FL_USER_VISIBLE (FS_FL_USER_VISIBLE | FS_CASEFOLD_FL)
#define SHMEM_FL_USER_MODIFIABLE \
- (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
-#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
+ (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
+#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL | FS_CASEFOLD_FL)
struct shmem_quota_limits {
qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index b35e2db7eb0e..0268ea7abf8b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -448,6 +448,7 @@ void kfree_sensitive(const void *objp);
size_t __ksize(const void *objp);
DEFINE_FREE(kfree, void *, if (!IS_ERR_OR_NULL(_T)) kfree(_T))
+DEFINE_FREE(kfree_sensitive, void *, if (_T) kfree_sensitive(_T))
/**
* ksize - Report actual allocation size of associated object
diff --git a/include/linux/soc/cirrus/ep93xx.h b/include/linux/soc/cirrus/ep93xx.h
index 56fbe2dc59b1..3e6cf2b25a97 100644
--- a/include/linux/soc/cirrus/ep93xx.h
+++ b/include/linux/soc/cirrus/ep93xx.h
@@ -2,7 +2,18 @@
#ifndef _SOC_EP93XX_H
#define _SOC_EP93XX_H
-struct platform_device;
+struct regmap;
+struct spinlock_t;
+
+enum ep93xx_soc_model {
+ EP93XX_9301_SOC,
+ EP93XX_9307_SOC,
+ EP93XX_9312_SOC,
+};
+
+#include <linux/auxiliary_bus.h>
+#include <linux/compiler_types.h>
+#include <linux/container_of.h>
#define EP93XX_CHIP_REV_D0 3
#define EP93XX_CHIP_REV_D1 4
@@ -10,28 +21,18 @@ struct platform_device;
#define EP93XX_CHIP_REV_E1 6
#define EP93XX_CHIP_REV_E2 7
-#ifdef CONFIG_ARCH_EP93XX
-int ep93xx_pwm_acquire_gpio(struct platform_device *pdev);
-void ep93xx_pwm_release_gpio(struct platform_device *pdev);
-int ep93xx_ide_acquire_gpio(struct platform_device *pdev);
-void ep93xx_ide_release_gpio(struct platform_device *pdev);
-int ep93xx_keypad_acquire_gpio(struct platform_device *pdev);
-void ep93xx_keypad_release_gpio(struct platform_device *pdev);
-int ep93xx_i2s_acquire(void);
-void ep93xx_i2s_release(void);
-unsigned int ep93xx_chip_revision(void);
+struct ep93xx_regmap_adev {
+ struct auxiliary_device adev;
+ struct regmap *map;
+ void __iomem *base;
+ spinlock_t *lock;
+ void (*write)(struct regmap *map, spinlock_t *lock, unsigned int reg,
+ unsigned int val);
+ void (*update_bits)(struct regmap *map, spinlock_t *lock,
+ unsigned int reg, unsigned int mask, unsigned int val);
+};
-#else
-static inline int ep93xx_pwm_acquire_gpio(struct platform_device *pdev) { return 0; }
-static inline void ep93xx_pwm_release_gpio(struct platform_device *pdev) {}
-static inline int ep93xx_ide_acquire_gpio(struct platform_device *pdev) { return 0; }
-static inline void ep93xx_ide_release_gpio(struct platform_device *pdev) {}
-static inline int ep93xx_keypad_acquire_gpio(struct platform_device *pdev) { return 0; }
-static inline void ep93xx_keypad_release_gpio(struct platform_device *pdev) {}
-static inline int ep93xx_i2s_acquire(void) { return 0; }
-static inline void ep93xx_i2s_release(void) {}
-static inline unsigned int ep93xx_chip_revision(void) { return 0; }
-
-#endif
+#define to_ep93xx_regmap_adev(_adev) \
+ container_of((_adev), struct ep93xx_regmap_adev, adev)
#endif
diff --git a/include/linux/soc/qcom/geni-se.h b/include/linux/soc/qcom/geni-se.h
index 0f038a1a0330..2996a3c28ef3 100644
--- a/include/linux/soc/qcom/geni-se.h
+++ b/include/linux/soc/qcom/geni-se.h
@@ -88,11 +88,15 @@ struct geni_se {
#define SE_GENI_M_IRQ_STATUS 0x610
#define SE_GENI_M_IRQ_EN 0x614
#define SE_GENI_M_IRQ_CLEAR 0x618
+#define SE_GENI_M_IRQ_EN_SET 0x61c
+#define SE_GENI_M_IRQ_EN_CLEAR 0x620
#define SE_GENI_S_CMD0 0x630
#define SE_GENI_S_CMD_CTRL_REG 0x634
#define SE_GENI_S_IRQ_STATUS 0x640
#define SE_GENI_S_IRQ_EN 0x644
#define SE_GENI_S_IRQ_CLEAR 0x648
+#define SE_GENI_S_IRQ_EN_SET 0x64c
+#define SE_GENI_S_IRQ_EN_CLEAR 0x650
#define SE_GENI_TX_FIFOn 0x700
#define SE_GENI_RX_FIFOn 0x780
#define SE_GENI_TX_FIFO_STATUS 0x800
@@ -101,6 +105,8 @@ struct geni_se {
#define SE_GENI_RX_WATERMARK_REG 0x810
#define SE_GENI_RX_RFR_WATERMARK_REG 0x814
#define SE_GENI_IOS 0x908
+#define SE_GENI_M_GP_LENGTH 0x910
+#define SE_GENI_S_GP_LENGTH 0x914
#define SE_DMA_TX_IRQ_STAT 0xc40
#define SE_DMA_TX_IRQ_CLR 0xc44
#define SE_DMA_TX_FSM_RST 0xc58
@@ -234,6 +240,9 @@ struct geni_se {
#define IO2_DATA_IN BIT(1)
#define RX_DATA_IN BIT(0)
+/* SE_GENI_M_GP_LENGTH and SE_GENI_S_GP_LENGTH fields */
+#define GP_LENGTH GENMASK(31, 0)
+
/* SE_DMA_TX_IRQ_STAT Register fields */
#define TX_DMA_DONE BIT(0)
#define TX_EOT BIT(1)
@@ -249,8 +258,8 @@ struct geni_se {
#define RX_DMA_PARITY_ERR BIT(5)
#define RX_DMA_BREAK GENMASK(8, 7)
#define RX_GENI_GP_IRQ GENMASK(10, 5)
-#define RX_GENI_CANCEL_IRQ BIT(11)
#define RX_GENI_GP_IRQ_EXT GENMASK(13, 12)
+#define RX_GENI_CANCEL_IRQ BIT(14)
/* SE_HW_PARAM_0 fields */
#define TX_FIFO_WIDTH_MSK GENMASK(29, 24)
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 9e9f528b1370..2f20281d4ad4 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -125,6 +125,7 @@ struct llcc_edac_reg_offset {
* @num_banks: Number of llcc banks
* @bitmap: Bit map to track the active slice ids
* @ecc_irq: interrupt for llcc cache error detection and reporting
+ * @ecc_irq_configured: 'True' if firmware has already configured the irq propagation
* @version: Indicates the LLCC version
*/
struct llcc_drv_data {
@@ -139,6 +140,7 @@ struct llcc_drv_data {
u32 num_banks;
unsigned long *bitmap;
int ecc_irq;
+ bool ecc_irq_configured;
u32 version;
};
diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h
index fc5a206c4043..195debe2b1db 100644
--- a/include/linux/sockptr.h
+++ b/include/linux/sockptr.h
@@ -77,7 +77,9 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
{
if (optlen < ksize)
return -EINVAL;
- return copy_from_sockptr(dst, optval, ksize);
+ if (copy_from_sockptr(dst, optval, ksize))
+ return -EFAULT;
+ return 0;
}
static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 37ae69365fe2..734dc1fa3b5b 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -227,7 +227,7 @@ struct sdw_intel_ops {
/**
* struct sdw_intel_acpi_info - Soundwire Intel information found in ACPI tables
* @handle: ACPI controller handle
- * @count: link count found with "sdw-master-count" property
+ * @count: link count found with "sdw-master-count" or "sdw-manager-list" property
* @link_mask: bit-wise mask listing links enabled by BIOS menu
*
* this structure could be expanded to e.g. provide all the _ADR
diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h
index 61c49b16f69a..f6499c37157d 100644
--- a/include/linux/spinlock_rt.h
+++ b/include/linux/spinlock_rt.h
@@ -16,26 +16,25 @@ static inline void __rt_spin_lock_init(spinlock_t *lock, const char *name,
}
#endif
-#define spin_lock_init(slock) \
+#define __spin_lock_init(slock, name, key, percpu) \
do { \
- static struct lock_class_key __key; \
- \
rt_mutex_base_init(&(slock)->lock); \
- __rt_spin_lock_init(slock, #slock, &__key, false); \
+ __rt_spin_lock_init(slock, name, key, percpu); \
} while (0)
-#define local_spin_lock_init(slock) \
+#define _spin_lock_init(slock, percpu) \
do { \
static struct lock_class_key __key; \
- \
- rt_mutex_base_init(&(slock)->lock); \
- __rt_spin_lock_init(slock, #slock, &__key, true); \
+ __spin_lock_init(slock, #slock, &__key, percpu); \
} while (0)
-extern void rt_spin_lock(spinlock_t *lock);
-extern void rt_spin_lock_nested(spinlock_t *lock, int subclass);
-extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock);
-extern void rt_spin_unlock(spinlock_t *lock);
+#define spin_lock_init(slock) _spin_lock_init(slock, false)
+#define local_spin_lock_init(slock) _spin_lock_init(slock, true)
+
+extern void rt_spin_lock(spinlock_t *lock) __acquires(lock);
+extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock);
+extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock);
+extern void rt_spin_unlock(spinlock_t *lock) __releases(lock);
extern void rt_spin_lock_unlock(spinlock_t *lock);
extern int rt_spin_trylock_bh(spinlock_t *lock);
extern int rt_spin_trylock(spinlock_t *lock);
@@ -132,7 +131,7 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock,
#define spin_trylock_irq(lock) \
__cond_lock(lock, rt_spin_trylock(lock))
-#define __spin_trylock_irqsave(lock, flags) \
+#define spin_trylock_irqsave(lock, flags) \
({ \
int __locked; \
\
@@ -142,9 +141,6 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock,
__locked; \
})
-#define spin_trylock_irqsave(lock, flags) \
- __cond_lock(lock, __spin_trylock_irqsave(lock, flags))
-
#define spin_is_contended(lock) (((void)(lock), 0))
static inline int spin_is_locked(spinlock_t *lock)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 835bbb2d1f88..08339eb8a01c 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,6 +56,13 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
void cleanup_srcu_struct(struct srcu_struct *ssp);
int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
+#ifdef CONFIG_TINY_SRCU
+#define __srcu_read_lock_lite __srcu_read_lock
+#define __srcu_read_unlock_lite __srcu_read_unlock
+#else // #ifdef CONFIG_TINY_SRCU
+int __srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp);
+void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx) __releases(ssp);
+#endif // #else // #ifdef CONFIG_TINY_SRCU
void synchronize_srcu(struct srcu_struct *ssp);
#define SRCU_GET_STATE_COMPLETED 0x1
@@ -176,17 +183,6 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
-#define SRCU_NMI_UNKNOWN 0x0
-#define SRCU_NMI_UNSAFE 0x1
-#define SRCU_NMI_SAFE 0x2
-
-#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_TREE_SRCU)
-void srcu_check_nmi_safety(struct srcu_struct *ssp, bool nmi_safe);
-#else
-static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
- bool nmi_safe) { }
-#endif
-
/**
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
@@ -236,33 +232,67 @@ static inline void srcu_check_nmi_safety(struct srcu_struct *ssp,
* a mutex that is held elsewhere while calling synchronize_srcu() or
* synchronize_srcu_expedited().
*
- * Note that srcu_read_lock() and the matching srcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
- * was invoked in process context.
+ * The return value from srcu_read_lock() must be passed unaltered
+ * to the matching srcu_read_unlock(). Note that srcu_read_lock() and
+ * the matching srcu_read_unlock() must occur in the same context, for
+ * example, it is illegal to invoke srcu_read_unlock() in an irq handler
+ * if the matching srcu_read_lock() was invoked in process context. Or,
+ * for that matter to invoke srcu_read_unlock() from one task and the
+ * matching srcu_read_lock() from another.
*/
static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_nmi_safety(ssp, false);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
retval = __srcu_read_lock(ssp);
srcu_lock_acquire(&ssp->dep_map);
return retval;
}
/**
+ * srcu_read_lock_lite - register a new reader for an SRCU-protected structure.
+ * @ssp: srcu_struct in which to register the new reader.
+ *
+ * Enter an SRCU read-side critical section, but for a light-weight
+ * smp_mb()-free reader. See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_lite() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after. Note that grace-period auto-expediting is disabled for _lite
+ * srcu_struct structures because auto-expedited grace periods invoke
+ * synchronize_rcu_expedited(), IPIs and all.
+ *
+ * Note that srcu_read_lock_lite() can be invoked only from those contexts
+ * where RCU is watching, that is, from contexts where it would be legal
+ * to invoke rcu_read_lock(). Otherwise, lockdep will complain.
+ */
+static inline int srcu_read_lock_lite(struct srcu_struct *ssp) __acquires(ssp)
+{
+ int retval;
+
+ srcu_check_read_flavor_lite(ssp);
+ retval = __srcu_read_lock_lite(ssp);
+ rcu_try_lock_acquire(&ssp->dep_map);
+ return retval;
+}
+
+/**
* srcu_read_lock_nmisafe - register a new reader for an SRCU-protected structure.
* @ssp: srcu_struct in which to register the new reader.
*
* Enter an SRCU read-side critical section, but in an NMI-safe manner.
* See srcu_read_lock() for more information.
+ *
+ * If srcu_read_lock_nmisafe() is ever used on an srcu_struct structure,
+ * then none of the other flavors may be used, whether before, during,
+ * or after.
*/
static inline int srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_nmi_safety(ssp, true);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI);
retval = __srcu_read_lock_nmisafe(ssp);
rcu_try_lock_acquire(&ssp->dep_map);
return retval;
@@ -274,7 +304,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
{
int retval;
- srcu_check_nmi_safety(ssp, false);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
retval = __srcu_read_lock(ssp);
return retval;
}
@@ -303,7 +333,7 @@ srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp)
static inline int srcu_down_read(struct srcu_struct *ssp) __acquires(ssp)
{
WARN_ON_ONCE(in_nmi());
- srcu_check_nmi_safety(ssp, false);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
return __srcu_read_lock(ssp);
}
@@ -318,12 +348,28 @@ static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
- srcu_check_nmi_safety(ssp, false);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
srcu_lock_release(&ssp->dep_map);
__srcu_read_unlock(ssp, idx);
}
/**
+ * srcu_read_unlock_lite - unregister a old reader from an SRCU-protected structure.
+ * @ssp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Exit a light-weight SRCU read-side critical section.
+ */
+static inline void srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+ __releases(ssp)
+{
+ WARN_ON_ONCE(idx & ~0x1);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+ srcu_lock_release(&ssp->dep_map);
+ __srcu_read_unlock_lite(ssp, idx);
+}
+
+/**
* srcu_read_unlock_nmisafe - unregister a old reader from an SRCU-protected structure.
* @ssp: srcu_struct in which to unregister the old reader.
* @idx: return value from corresponding srcu_read_lock().
@@ -334,7 +380,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
__releases(ssp)
{
WARN_ON_ONCE(idx & ~0x1);
- srcu_check_nmi_safety(ssp, true);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NMI);
rcu_lock_release(&ssp->dep_map);
__srcu_read_unlock_nmisafe(ssp, idx);
}
@@ -343,7 +389,7 @@ static inline void srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx)
static inline notrace void
srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp)
{
- srcu_check_nmi_safety(ssp, false);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
__srcu_read_unlock(ssp, idx);
}
@@ -360,7 +406,7 @@ static inline void srcu_up_read(struct srcu_struct *ssp, int idx)
{
WARN_ON_ONCE(idx & ~0x1);
WARN_ON_ONCE(in_nmi());
- srcu_check_nmi_safety(ssp, false);
+ srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_NORMAL);
__srcu_read_unlock(ssp, idx);
}
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 4d96bbdb45f0..1321da803274 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -81,6 +81,9 @@ static inline void srcu_barrier(struct srcu_struct *ssp)
synchronize_srcu(ssp);
}
+#define srcu_check_read_flavor(ssp, read_flavor) do { } while (0)
+#define srcu_check_read_flavor_lite(ssp) do { } while (0)
+
/* Defined here to avoid size increase for non-torture kernels. */
static inline void srcu_torture_stats_print(struct srcu_struct *ssp,
char *tt, char *tf)
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index ed57598394de..490aeecc6bb4 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -25,7 +25,7 @@ struct srcu_data {
/* Read-side state. */
atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */
atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */
- int srcu_nmi_safety; /* NMI-safe srcu_struct structure? */
+ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */
/* Update-side state. */
spinlock_t __private lock ____cacheline_internodealigned_in_smp;
@@ -43,6 +43,11 @@ struct srcu_data {
struct srcu_struct *ssp;
};
+/* Values for ->srcu_reader_flavor. */
+#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock().
+#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe().
+#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite().
+
/*
* Node in SRCU combining tree, similar in function to rcu_data.
*/
@@ -204,4 +209,64 @@ void synchronize_srcu_expedited(struct srcu_struct *ssp);
void srcu_barrier(struct srcu_struct *ssp);
void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf);
+/*
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Returns an index that must be passed to the matching
+ * srcu_read_unlock_lite().
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+static inline int __srcu_read_lock_lite(struct srcu_struct *ssp)
+{
+ int idx;
+
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_lock_lite().");
+ idx = READ_ONCE(ssp->srcu_idx) & 0x1;
+ this_cpu_inc(ssp->sda->srcu_lock_count[idx].counter); /* Y */
+ barrier(); /* Avoid leaking the critical section. */
+ return idx;
+}
+
+/*
+ * Removes the count for the old reader from the appropriate
+ * per-CPU element of the srcu_struct. Note that this may well be a
+ * different CPU than that which was incremented by the corresponding
+ * srcu_read_lock_lite(), but it must be within the same task.
+ *
+ * Note that this_cpu_inc() is an RCU read-side critical section either
+ * because it disables interrupts, because it is a single instruction,
+ * or because it is a read-modify-write atomic operation, depending on
+ * the whims of the architecture.
+ */
+static inline void __srcu_read_unlock_lite(struct srcu_struct *ssp, int idx)
+{
+ barrier(); /* Avoid leaking the critical section. */
+ this_cpu_inc(ssp->sda->srcu_unlock_count[idx].counter); /* Z */
+ RCU_LOCKDEP_WARN(!rcu_is_watching(), "RCU must be watching srcu_read_unlock_lite().");
+}
+
+void __srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor);
+
+// Record _lite() usage even for CONFIG_PROVE_RCU=n kernels.
+static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp)
+{
+ struct srcu_data *sdp = raw_cpu_ptr(ssp->sda);
+
+ if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE))
+ return;
+
+ // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered.
+ __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE);
+}
+
+// Record non-_lite() usage only for CONFIG_PROVE_RCU=y kernels.
+static inline void srcu_check_read_flavor(struct srcu_struct *ssp, int read_flavor)
+{
+ if (IS_ENABLED(CONFIG_PROVE_RCU))
+ __srcu_check_read_flavor(ssp, read_flavor);
+}
+
#endif
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 0c77ba488bba..fec1e8a1570c 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -151,13 +151,15 @@ struct rpc_task_setup {
#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT)
#define RPC_IS_MOVEABLE(t) ((t)->tk_flags & RPC_TASK_MOVEABLE)
-#define RPC_TASK_RUNNING 0
-#define RPC_TASK_QUEUED 1
-#define RPC_TASK_ACTIVE 2
-#define RPC_TASK_NEED_XMIT 3
-#define RPC_TASK_NEED_RECV 4
-#define RPC_TASK_MSG_PIN_WAIT 5
-#define RPC_TASK_SIGNALLED 6
+enum {
+ RPC_TASK_RUNNING,
+ RPC_TASK_QUEUED,
+ RPC_TASK_ACTIVE,
+ RPC_TASK_NEED_XMIT,
+ RPC_TASK_NEED_RECV,
+ RPC_TASK_MSG_PIN_WAIT,
+ RPC_TASK_SIGNALLED,
+};
#define rpc_test_and_set_running(t) \
test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index c419a61f60e5..e68fecf6eab5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -67,9 +67,10 @@ enum {
* We currently do not support more than one RPC program per daemon.
*/
struct svc_serv {
- struct svc_program * sv_program; /* RPC program */
+ struct svc_program * sv_programs; /* RPC programs */
struct svc_stat * sv_stats; /* RPC statistics */
spinlock_t sv_lock;
+ unsigned int sv_nprogs; /* Number of sv_programs */
unsigned int sv_nrthreads; /* # of server threads */
unsigned int sv_maxconn; /* max connections allowed or
* '0' causing max to be based
@@ -360,10 +361,9 @@ struct svc_process_info {
};
/*
- * List of RPC programs on the same transport endpoint
+ * RPC program - an array of these can use the same transport endpoint
*/
struct svc_program {
- struct svc_program * pg_next; /* other programs (same xprt) */
u32 pg_prog; /* program number */
unsigned int pg_lovers; /* lowest version */
unsigned int pg_hivers; /* highest version */
@@ -441,6 +441,7 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *prog,
+ unsigned int nprog,
struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data));
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 63cf6fb26dcc..2e111153f7cd 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -14,6 +14,7 @@
#include <linux/sunrpc/msg_prot.h>
#include <linux/sunrpc/cache.h>
#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/hash.h>
#include <linux/stringhash.h>
#include <linux/cred.h>
@@ -157,6 +158,10 @@ extern enum svc_auth_status svc_set_client(struct svc_rqst *rqstp);
extern int svc_auth_register(rpc_authflavor_t flavor, struct auth_ops *aops);
extern void svc_auth_unregister(rpc_authflavor_t flavor);
+extern void svcauth_map_clnt_to_svc_cred_local(struct rpc_clnt *clnt,
+ const struct cred *,
+ struct svc_cred *);
+
extern struct auth_domain *unix_domain_find(char *name);
extern void auth_domain_put(struct auth_domain *item);
extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *new);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 2f8dc47f1eb0..5f775e104f9a 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -13,7 +13,7 @@
#include <linux/uio.h>
#include <asm/byteorder.h>
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
#include <linux/scatterlist.h>
struct bio_vec;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ca533b478c21..f3e0ac20c2e8 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -335,6 +335,7 @@ struct swap_info_struct {
* list.
*/
struct work_struct discard_work; /* discard worker */
+ struct work_struct reclaim_work; /* reclaim worker */
struct list_head discard_clusters; /* discard clusters list */
struct plist_node avail_lists[]; /*
* entries in swap_avail_heads, one
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5758104921e6..c6333204d451 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -77,6 +77,7 @@ struct cachestat_range;
struct cachestat;
struct statmount;
struct mnt_id_req;
+struct xattr_args;
#include <linux/types.h>
#include <linux/aio_abi.h>
@@ -338,23 +339,35 @@ asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
void __user *arg, unsigned int nr_args);
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
const void __user *value, size_t size, int flags);
+asmlinkage long sys_setxattrat(int dfd, const char __user *path, unsigned int at_flags,
+ const char __user *name,
+ const struct xattr_args __user *args, size_t size);
asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name,
const void __user *value, size_t size, int flags);
asmlinkage long sys_fsetxattr(int fd, const char __user *name,
const void __user *value, size_t size, int flags);
asmlinkage long sys_getxattr(const char __user *path, const char __user *name,
void __user *value, size_t size);
+asmlinkage long sys_getxattrat(int dfd, const char __user *path, unsigned int at_flags,
+ const char __user *name,
+ struct xattr_args __user *args, size_t size);
asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name,
void __user *value, size_t size);
asmlinkage long sys_fgetxattr(int fd, const char __user *name,
void __user *value, size_t size);
asmlinkage long sys_listxattr(const char __user *path, char __user *list,
size_t size);
+asmlinkage long sys_listxattrat(int dfd, const char __user *path,
+ unsigned int at_flags,
+ char __user *list, size_t size);
asmlinkage long sys_llistxattr(const char __user *path, char __user *list,
size_t size);
asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size);
asmlinkage long sys_removexattr(const char __user *path,
const char __user *name);
+asmlinkage long sys_removexattrat(int dfd, const char __user *path,
+ unsigned int at_flags,
+ const char __user *name);
asmlinkage long sys_lremovexattr(const char __user *path,
const char __user *name);
asmlinkage long sys_fremovexattr(int fd, const char __user *name);
diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h
index bef5f06a91de..07cbab516942 100644
--- a/include/linux/sysfb.h
+++ b/include/linux/sysfb.h
@@ -60,12 +60,19 @@ struct efifb_dmi_info {
void sysfb_disable(struct device *dev);
+bool sysfb_handles_screen_info(void);
+
#else /* CONFIG_SYSFB */
static inline void sysfb_disable(struct device *dev)
{
}
+static inline bool sysfb_handles_screen_info(void)
+{
+ return false;
+}
+
#endif /* CONFIG_SYSFB */
#ifdef CONFIG_EFI
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index cf5e7e891a77..2964171856e0 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -14,11 +14,14 @@ init_task_work(struct callback_head *twork, task_work_func_t func)
}
enum task_work_notify_mode {
- TWA_NONE,
+ TWA_NONE = 0,
TWA_RESUME,
TWA_SIGNAL,
TWA_SIGNAL_NO_IPI,
TWA_NMI_CURRENT,
+
+ TWA_FLAGS = 0xff00,
+ TWAF_NO_ALLOC = 0x0100,
};
static inline bool task_work_pending(struct task_struct *task)
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 25ea8fe2313e..754802478b96 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -56,6 +56,9 @@ enum thermal_notify_event {
THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */
THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */
THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */
+ THERMAL_TZ_ADD_THRESHOLD, /* Threshold added */
+ THERMAL_TZ_DEL_THRESHOLD, /* Threshold deleted */
+ THERMAL_TZ_FLUSH_THRESHOLDS, /* All thresholds deleted */
};
/**
@@ -137,6 +140,9 @@ struct thermal_cooling_device {
#endif
};
+DEFINE_GUARD(cooling_dev, struct thermal_cooling_device *, mutex_lock(&_T->lock),
+ mutex_unlock(&_T->lock))
+
/* Structure to define Thermal Zone parameters */
struct thermal_zone_params {
const char *governor_name;
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 9ea0b28068f4..cf2446c9c30d 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -59,6 +59,14 @@ enum syscall_work_bit {
#include <asm/thread_info.h>
+#ifndef TIF_NEED_RESCHED_LAZY
+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
+#error Inconsistent PREEMPT_LAZY
+#endif
+#define TIF_NEED_RESCHED_LAZY TIF_NEED_RESCHED
+#define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
+#endif
+
#ifdef __KERNEL__
#ifndef arch_set_restart_data
@@ -179,22 +187,27 @@ static __always_inline unsigned long read_ti_thread_flags(struct thread_info *ti
#ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H
-static __always_inline bool tif_need_resched(void)
+static __always_inline bool tif_test_bit(int bit)
{
- return arch_test_bit(TIF_NEED_RESCHED,
+ return arch_test_bit(bit,
(unsigned long *)(&current_thread_info()->flags));
}
#else
-static __always_inline bool tif_need_resched(void)
+static __always_inline bool tif_test_bit(int bit)
{
- return test_bit(TIF_NEED_RESCHED,
+ return test_bit(bit,
(unsigned long *)(&current_thread_info()->flags));
}
#endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */
+static __always_inline bool tif_need_resched(void)
+{
+ return tif_test_bit(TIF_NEED_RESCHED);
+}
+
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
static inline int arch_within_stack_frames(const void * const stack,
const void * const stackend,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 72744638c5b0..b8ddc8e631a3 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -20,12 +20,10 @@ extern void __init tick_init(void);
extern void tick_suspend_local(void);
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
extern void tick_resume_local(void);
-extern void tick_cleanup_dead_cpu(int cpu);
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_suspend_local(void) { }
static inline void tick_resume_local(void) { }
-static inline void tick_cleanup_dead_cpu(int cpu) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
#if defined(CONFIG_GENERIC_CLOCKEVENTS) && defined(CONFIG_HOTPLUG_CPU)
@@ -251,12 +249,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
if (tick_nohz_full_enabled())
tick_nohz_dep_set_task(tsk, bit);
}
+
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit)
{
if (tick_nohz_full_enabled())
tick_nohz_dep_clear_task(tsk, bit);
}
+
+static inline void tick_dep_init_task(struct task_struct *tsk)
+{
+ atomic_set(&tsk->tick_dep_mask, 0);
+}
+
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit)
{
@@ -290,6 +295,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_task(struct task_struct *tsk,
enum tick_dep_bits bit) { }
+static inline void tick_dep_init_task(struct task_struct *tsk) { }
static inline void tick_dep_set_signal(struct task_struct *tsk,
enum tick_dep_bits bit) { }
static inline void tick_dep_clear_signal(struct signal_struct *signal,
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 902c20ef495a..e39d4d563b19 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -26,7 +26,7 @@
* occupies a single 64byte cache line.
*
* The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessors.
+ * for the fast NMI safe accessors.
*
* @base_real is for the fast NMI safe accessor to allow reading clock
* realtime from any context.
@@ -44,33 +44,38 @@ struct tk_read_base {
/**
* struct timekeeper - Structure holding internal timekeeping values.
- * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
- * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
- * @xtime_sec: Current CLOCK_REALTIME time in seconds
- * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
- * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
- * @offs_real: Offset clock monotonic -> clock realtime
- * @offs_boot: Offset clock monotonic -> clock boottime
- * @offs_tai: Offset clock monotonic -> clock tai
- * @tai_offset: The current UTC to TAI offset in seconds
- * @clock_was_set_seq: The sequence number of clock was set events
- * @cs_was_changed_seq: The sequence number of clocksource change events
- * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
- * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
- * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
- * @cycle_interval: Number of clock cycles in one NTP interval
- * @xtime_interval: Number of clock shifted nano seconds in one NTP
- * interval.
- * @xtime_remainder: Shifted nano seconds left over when rounding
- * @cycle_interval
- * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
- * @ntp_error: Difference between accumulated time and NTP time in ntp
- * shifted nano seconds.
- * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds.
- * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING)
- * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING)
- * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING)
+ * @tkr_mono: The readout base structure for CLOCK_MONOTONIC
+ * @xtime_sec: Current CLOCK_REALTIME time in seconds
+ * @ktime_sec: Current CLOCK_MONOTONIC time in seconds
+ * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
+ * @offs_real: Offset clock monotonic -> clock realtime
+ * @offs_boot: Offset clock monotonic -> clock boottime
+ * @offs_tai: Offset clock monotonic -> clock tai
+ * @tai_offset: The current UTC to TAI offset in seconds
+ * @tkr_raw: The readout base structure for CLOCK_MONOTONIC_RAW
+ * @raw_sec: CLOCK_MONOTONIC_RAW time in seconds
+ * @clock_was_set_seq: The sequence number of clock was set events
+ * @cs_was_changed_seq: The sequence number of clocksource change events
+ * @monotonic_to_boot: CLOCK_MONOTONIC to CLOCK_BOOTTIME offset
+ * @cycle_interval: Number of clock cycles in one NTP interval
+ * @xtime_interval: Number of clock shifted nano seconds in one NTP
+ * interval.
+ * @xtime_remainder: Shifted nano seconds left over when rounding
+ * @cycle_interval
+ * @raw_interval: Shifted raw nano seconds accumulated per NTP interval.
+ * @next_leap_ktime: CLOCK_MONOTONIC time value of a pending leap-second
+ * @ntp_tick: The ntp_tick_length() value currently being
+ * used. This cached copy ensures we consistently
+ * apply the tick length for an entire tick, as
+ * ntp_tick_length may change mid-tick, and we don't
+ * want to apply that new value to the tick in
+ * progress.
+ * @ntp_error: Difference between accumulated time and NTP time in ntp
+ * shifted nano seconds.
+ * @ntp_error_shift: Shift conversion between clock shifted nano seconds and
+ * ntp shifted nano seconds.
+ * @ntp_err_mult: Multiplication factor for scaled math conversion
+ * @skip_second_overflow: Flag used to avoid updating NTP twice with same second
*
* Note: For timespec(64) based interfaces wall_to_monotonic is what
* we need to add to xtime (or xtime corrected for sub jiffy times)
@@ -88,10 +93,28 @@ struct tk_read_base {
*
* @monotonic_to_boottime is a timespec64 representation of @offs_boot to
* accelerate the VDSO update for CLOCK_BOOTTIME.
+ *
+ * The cacheline ordering of the structure is optimized for in kernel usage of
+ * the ktime_get() and ktime_get_ts64() family of time accessors. Struct
+ * timekeeper is prepended in the core timekeeping code with a sequence count,
+ * which results in the following cacheline layout:
+ *
+ * 0: seqcount, tkr_mono
+ * 1: xtime_sec ... tai_offset
+ * 2: tkr_raw, raw_sec
+ * 3,4: Internal variables
+ *
+ * Cacheline 0,1 contain the data which is used for accessing
+ * CLOCK_MONOTONIC/REALTIME/BOOTTIME/TAI, while cacheline 2 contains the
+ * data for accessing CLOCK_MONOTONIC_RAW. Cacheline 3,4 are internal
+ * variables which are only accessed during timekeeper updates once per
+ * tick.
*/
struct timekeeper {
+ /* Cacheline 0 (together with prepended seqcount of timekeeper core): */
struct tk_read_base tkr_mono;
- struct tk_read_base tkr_raw;
+
+ /* Cacheline 1: */
u64 xtime_sec;
unsigned long ktime_sec;
struct timespec64 wall_to_monotonic;
@@ -99,43 +122,28 @@ struct timekeeper {
ktime_t offs_boot;
ktime_t offs_tai;
s32 tai_offset;
+
+ /* Cacheline 2: */
+ struct tk_read_base tkr_raw;
+ u64 raw_sec;
+
+ /* Cachline 3 and 4 (timekeeping internal variables): */
unsigned int clock_was_set_seq;
u8 cs_was_changed_seq;
- ktime_t next_leap_ktime;
- u64 raw_sec;
+
struct timespec64 monotonic_to_boot;
- /* The following members are for timekeeping internal use */
u64 cycle_interval;
u64 xtime_interval;
s64 xtime_remainder;
u64 raw_interval;
- /* The ntp_tick_length() value currently being used.
- * This cached copy ensures we consistently apply the tick
- * length for an entire tick, as ntp_tick_length may change
- * mid-tick, and we don't want to apply that new value to
- * the tick in progress.
- */
+
+ ktime_t next_leap_ktime;
u64 ntp_tick;
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
s64 ntp_error;
u32 ntp_error_shift;
u32 ntp_err_mult;
- /* Flag used to avoid updating NTP twice with same second */
u32 skip_second_overflow;
-#ifdef CONFIG_DEBUG_TIMEKEEPING
- long last_warning;
- /*
- * These simple flag variables are managed
- * without locks, which is racy, but they are
- * ok since we don't really care about being
- * super precise about how many events were
- * seen, just that a problem was observed.
- */
- int underflow_seen;
- int overflow_seen;
-#endif
};
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index fc12a9ba2c88..0e035f675efe 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -45,6 +45,11 @@ extern void ktime_get_real_ts64(struct timespec64 *tv);
extern void ktime_get_coarse_ts64(struct timespec64 *ts);
extern void ktime_get_coarse_real_ts64(struct timespec64 *ts);
+/* Multigrain timestamp interfaces */
+extern void ktime_get_coarse_real_ts64_mg(struct timespec64 *ts);
+extern void ktime_get_real_ts64_mg(struct timespec64 *ts);
+extern unsigned long timekeeping_get_mg_floor_swaps(void);
+
void getboottime64(struct timespec64 *ts);
/*
@@ -275,6 +280,7 @@ struct ktime_timestamps {
* counter value
* @cycles: Clocksource counter value to produce the system times
* @real: Realtime system time
+ * @boot: Boot time
* @raw: Monotonic raw system time
* @cs_id: Clocksource ID
* @clock_was_set_seq: The sequence number of clock-was-set events
@@ -283,6 +289,7 @@ struct ktime_timestamps {
struct system_time_snapshot {
u64 cycles;
ktime_t real;
+ ktime_t boot;
ktime_t raw;
enum clocksource_ids cs_id;
unsigned int clock_was_set_seq;
diff --git a/include/linux/timex.h b/include/linux/timex.h
index 3871b06bd302..4ee32eff3f22 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -139,14 +139,6 @@ unsigned long random_get_entropy_fallback(void);
#define MAXSEC 2048 /* max interval between updates (s) */
#define NTP_PHASE_LIMIT ((MAXPHASE / NSEC_PER_USEC) << 5) /* beyond max. dispersion */
-/*
- * kernel variables
- * Note: maximum error = NTP sync distance = dispersion + delay / 2;
- * estimated error = NTP dispersion.
- */
-extern unsigned long tick_usec; /* USER_HZ period (usec) */
-extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */
-
/* Required to safely shift negative values */
#define shift_right(x, s) ({ \
__typeof__(x) __x = (x); \
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index e93ee8d936a9..20a40ade8030 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -421,6 +421,7 @@ void tpm_buf_append_u32(struct tpm_buf *buf, const u32 value);
u8 tpm_buf_read_u8(struct tpm_buf *buf, off_t *offset);
u16 tpm_buf_read_u16(struct tpm_buf *buf, off_t *offset);
u32 tpm_buf_read_u32(struct tpm_buf *buf, off_t *offset);
+void tpm_buf_append_handle(struct tpm_chip *chip, struct tpm_buf *buf, u32 handle);
/*
* Check if TPM device is in the firmware upgrade mode.
@@ -505,6 +506,8 @@ void tpm_buf_append_name(struct tpm_chip *chip, struct tpm_buf *buf,
void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
u8 attributes, u8 *passphrase,
int passphraselen);
+void tpm_buf_append_auth(struct tpm_chip *chip, struct tpm_buf *buf,
+ u8 attributes, u8 *passphrase, int passphraselen);
static inline void tpm_buf_append_hmac_session_opt(struct tpm_chip *chip,
struct tpm_buf *buf,
u8 attributes,
@@ -537,7 +540,7 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
int rc);
void tpm2_end_auth_session(struct tpm_chip *chip);
#else
-#include <asm/unaligned.h>
+#include <linux/unaligned.h>
static inline int tpm2_start_auth_session(struct tpm_chip *chip)
{
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 6be396bb4297..93a9f3070b48 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -64,6 +64,13 @@ struct tp_module {
bool trace_module_has_bad_taint(struct module *mod);
extern int register_tracepoint_module_notifier(struct notifier_block *nb);
extern int unregister_tracepoint_module_notifier(struct notifier_block *nb);
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
+ struct module *, void *),
+ void *priv);
+void for_each_tracepoint_in_module(struct module *,
+ void (*fct)(struct tracepoint *,
+ struct module *, void *),
+ void *priv);
#else
static inline bool trace_module_has_bad_taint(struct module *mod)
{
@@ -79,6 +86,19 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
{
return 0;
}
+static inline
+void for_each_module_tracepoint(void (*fct)(struct tracepoint *,
+ struct module *, void *),
+ void *priv)
+{
+}
+static inline
+void for_each_tracepoint_in_module(struct module *mod,
+ void (*fct)(struct tracepoint *,
+ struct module *, void *),
+ void *priv)
+{
+}
#endif /* CONFIG_MODULES */
/*
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 39c7cf82b0c2..e9c702c1908d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -38,6 +38,7 @@
#else
#define can_do_masked_user_access() 0
#define masked_user_access_begin(src) NULL
+ #define mask_user_address(src) (src)
#endif
/*
@@ -159,19 +160,27 @@ _inline_copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long res = n;
might_fault();
- if (!should_fail_usercopy() && likely(access_ok(from, n))) {
+ if (should_fail_usercopy())
+ goto fail;
+ if (can_do_masked_user_access())
+ from = mask_user_address(from);
+ else {
+ if (!access_ok(from, n))
+ goto fail;
/*
* Ensure that bad access_ok() speculation will not
* lead to nasty side effects *after* the copy is
* finished:
*/
barrier_nospec();
- instrument_copy_from_user_before(to, from, n);
- res = raw_copy_from_user(to, from, n);
- instrument_copy_from_user_after(to, from, n, res);
}
- if (unlikely(res))
- memset(to + (n - res), 0, res);
+ instrument_copy_from_user_before(to, from, n);
+ res = raw_copy_from_user(to, from, n);
+ instrument_copy_from_user_after(to, from, n, res);
+ if (likely(!res))
+ return 0;
+fail:
+ memset(to + (n - res), 0, res);
return res;
}
extern __must_check unsigned long
@@ -394,6 +403,103 @@ copy_struct_from_user(void *dst, size_t ksize, const void __user *src,
return 0;
}
+/**
+ * copy_struct_to_user: copy a struct to userspace
+ * @dst: Destination address, in userspace. This buffer must be @ksize
+ * bytes long.
+ * @usize: (Alleged) size of @dst struct.
+ * @src: Source address, in kernel space.
+ * @ksize: Size of @src struct.
+ * @ignored_trailing: Set to %true if there was a non-zero byte in @src that
+ * userspace cannot see because they are using an smaller struct.
+ *
+ * Copies a struct from kernel space to userspace, in a way that guarantees
+ * backwards-compatibility for struct syscall arguments (as long as future
+ * struct extensions are made such that all new fields are *appended* to the
+ * old struct, and zeroed-out new fields have the same meaning as the old
+ * struct).
+ *
+ * Some syscalls may wish to make sure that userspace knows about everything in
+ * the struct, and if there is a non-zero value that userspce doesn't know
+ * about, they want to return an error (such as -EMSGSIZE) or have some other
+ * fallback (such as adding a "you're missing some information" flag). If
+ * @ignored_trailing is non-%NULL, it will be set to %true if there was a
+ * non-zero byte that could not be copied to userspace (ie. was past @usize).
+ *
+ * While unconditionally returning an error in this case is the simplest
+ * solution, for maximum backward compatibility you should try to only return
+ * -EMSGSIZE if the user explicitly requested the data that couldn't be copied.
+ * Note that structure sizes can change due to header changes and simple
+ * recompilations without code changes(!), so if you care about
+ * @ignored_trailing you probably want to make sure that any new field data is
+ * associated with a flag. Otherwise you might assume that a program knows
+ * about data it does not.
+ *
+ * @ksize is just sizeof(*src), and @usize should've been passed by userspace.
+ * The recommended usage is something like the following:
+ *
+ * SYSCALL_DEFINE2(foobar, struct foo __user *, uarg, size_t, usize)
+ * {
+ * int err;
+ * bool ignored_trailing;
+ * struct foo karg = {};
+ *
+ * if (usize > PAGE_SIZE)
+ * return -E2BIG;
+ * if (usize < FOO_SIZE_VER0)
+ * return -EINVAL;
+ *
+ * // ... modify karg somehow ...
+ *
+ * err = copy_struct_to_user(uarg, usize, &karg, sizeof(karg),
+ * &ignored_trailing);
+ * if (err)
+ * return err;
+ * if (ignored_trailing)
+ * return -EMSGSIZE:
+ *
+ * // ...
+ * }
+ *
+ * There are three cases to consider:
+ * * If @usize == @ksize, then it's copied verbatim.
+ * * If @usize < @ksize, then the kernel is trying to pass userspace a newer
+ * struct than it supports. Thus we only copy the interoperable portions
+ * (@usize) and ignore the rest (but @ignored_trailing is set to %true if
+ * any of the trailing (@ksize - @usize) bytes are non-zero).
+ * * If @usize > @ksize, then the kernel is trying to pass userspace an older
+ * struct than userspace supports. In order to make sure the
+ * unknown-to-the-kernel fields don't contain garbage values, we zero the
+ * trailing (@usize - @ksize) bytes.
+ *
+ * Returns (in all cases, some data may have been copied):
+ * * -EFAULT: access to userspace failed.
+ */
+static __always_inline __must_check int
+copy_struct_to_user(void __user *dst, size_t usize, const void *src,
+ size_t ksize, bool *ignored_trailing)
+{
+ size_t size = min(ksize, usize);
+ size_t rest = max(ksize, usize) - size;
+
+ /* Double check if ksize is larger than a known object size. */
+ if (WARN_ON_ONCE(ksize > __builtin_object_size(src, 1)))
+ return -E2BIG;
+
+ /* Deal with trailing bytes. */
+ if (usize > ksize) {
+ if (clear_user(dst + size, rest))
+ return -EFAULT;
+ }
+ if (ignored_trailing)
+ *ignored_trailing = ksize < usize &&
+ memchr_inv(src + size, 0, rest) != NULL;
+ /* Copy the interoperable parts of the struct. */
+ if (copy_to_user(dst, src, size))
+ return -EFAULT;
+ return 0;
+}
+
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size);
long copy_from_kernel_nofault(void *dst, const void *src, size_t size);
diff --git a/include/linux/unaligned.h b/include/linux/unaligned.h
new file mode 100644
index 000000000000..4a9651017e3c
--- /dev/null
+++ b/include/linux/unaligned.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_UNALIGNED_H
+#define __LINUX_UNALIGNED_H
+
+/*
+ * This is the most generic implementation of unaligned accesses
+ * and should work almost anywhere.
+ */
+#include <linux/unaligned/packed_struct.h>
+#include <asm/byteorder.h>
+#include <vdso/unaligned.h>
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+#define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr))
+
+static inline u16 get_unaligned_le16(const void *p)
+{
+ return le16_to_cpu(__get_unaligned_t(__le16, p));
+}
+
+static inline u32 get_unaligned_le32(const void *p)
+{
+ return le32_to_cpu(__get_unaligned_t(__le32, p));
+}
+
+static inline u64 get_unaligned_le64(const void *p)
+{
+ return le64_to_cpu(__get_unaligned_t(__le64, p));
+}
+
+static inline void put_unaligned_le16(u16 val, void *p)
+{
+ __put_unaligned_t(__le16, cpu_to_le16(val), p);
+}
+
+static inline void put_unaligned_le32(u32 val, void *p)
+{
+ __put_unaligned_t(__le32, cpu_to_le32(val), p);
+}
+
+static inline void put_unaligned_le64(u64 val, void *p)
+{
+ __put_unaligned_t(__le64, cpu_to_le64(val), p);
+}
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return be16_to_cpu(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return be32_to_cpu(__get_unaligned_t(__be32, p));
+}
+
+static inline u64 get_unaligned_be64(const void *p)
+{
+ return be64_to_cpu(__get_unaligned_t(__be64, p));
+}
+
+static inline void put_unaligned_be16(u16 val, void *p)
+{
+ __put_unaligned_t(__be16, cpu_to_be16(val), p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ __put_unaligned_t(__be32, cpu_to_be32(val), p);
+}
+
+static inline void put_unaligned_be64(u64 val, void *p)
+{
+ __put_unaligned_t(__be64, cpu_to_be64(val), p);
+}
+
+static inline u32 __get_unaligned_be24(const u8 *p)
+{
+ return p[0] << 16 | p[1] << 8 | p[2];
+}
+
+static inline u32 get_unaligned_be24(const void *p)
+{
+ return __get_unaligned_be24(p);
+}
+
+static inline u32 __get_unaligned_le24(const u8 *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16;
+}
+
+static inline u32 get_unaligned_le24(const void *p)
+{
+ return __get_unaligned_le24(p);
+}
+
+static inline void __put_unaligned_be24(const u32 val, u8 *p)
+{
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
+}
+
+static inline void put_unaligned_be24(const u32 val, void *p)
+{
+ __put_unaligned_be24(val, p);
+}
+
+static inline void __put_unaligned_le24(const u32 val, u8 *p)
+{
+ *p++ = val & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+}
+
+static inline void put_unaligned_le24(const u32 val, void *p)
+{
+ __put_unaligned_le24(val, p);
+}
+
+static inline void __put_unaligned_be48(const u64 val, u8 *p)
+{
+ *p++ = (val >> 40) & 0xff;
+ *p++ = (val >> 32) & 0xff;
+ *p++ = (val >> 24) & 0xff;
+ *p++ = (val >> 16) & 0xff;
+ *p++ = (val >> 8) & 0xff;
+ *p++ = val & 0xff;
+}
+
+static inline void put_unaligned_be48(const u64 val, void *p)
+{
+ __put_unaligned_be48(val, p);
+}
+
+static inline u64 __get_unaligned_be48(const u8 *p)
+{
+ return (u64)p[0] << 40 | (u64)p[1] << 32 | (u64)p[2] << 24 |
+ p[3] << 16 | p[4] << 8 | p[5];
+}
+
+static inline u64 get_unaligned_be48(const void *p)
+{
+ return __get_unaligned_be48(p);
+}
+
+#endif /* __LINUX_UNALIGNED_H */
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 4d39e6e11a95..5e6b212a2aed 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -16,6 +16,8 @@ struct utf8data_table;
((unsigned int)(MIN) << UNICODE_MIN_SHIFT) | \
((unsigned int)(REV)))
+#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
+
static inline u8 unicode_major(unsigned int age)
{
return (age >> UNICODE_MAJ_SHIFT) & 0xff;
@@ -76,4 +78,6 @@ int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
struct unicode_map *utf8_load(unsigned int version);
void utf8_unload(struct unicode_map *um);
+int utf8_parse_version(char *version);
+
#endif /* _LINUX_UNICODE_H */
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 2b294bf1881f..e0a4c2082245 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -15,6 +15,7 @@
#include <linux/rbtree.h>
#include <linux/types.h>
#include <linux/wait.h>
+#include <linux/timer.h>
struct uprobe;
struct vm_area_struct;
@@ -23,8 +24,17 @@ struct inode;
struct notifier_block;
struct page;
+/*
+ * Allowed return values from uprobe consumer's handler callback
+ * with following meaning:
+ *
+ * UPROBE_HANDLER_REMOVE
+ * - Remove the uprobe breakpoint from current->mm.
+ * UPROBE_HANDLER_IGNORE
+ * - Ignore ret_handler callback for this consumer.
+ */
#define UPROBE_HANDLER_REMOVE 1
-#define UPROBE_HANDLER_MASK 1
+#define UPROBE_HANDLER_IGNORE 2
#define MAX_URETPROBE_DEPTH 64
@@ -37,13 +47,15 @@ struct uprobe_consumer {
* for the current process. If filter() is omitted or returns true,
* UPROBE_HANDLER_REMOVE is effectively ignored.
*/
- int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
+ int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data);
int (*ret_handler)(struct uprobe_consumer *self,
unsigned long func,
- struct pt_regs *regs);
+ struct pt_regs *regs, __u64 *data);
bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
struct list_head cons_node;
+
+ __u64 id; /* set when uprobe_consumer is registered */
};
#ifdef CONFIG_UPROBES
@@ -56,12 +68,62 @@ enum uprobe_task_state {
UTASK_SSTEP_TRAPPED,
};
+/* The state of hybrid-lifetime uprobe inside struct return_instance */
+enum hprobe_state {
+ HPROBE_LEASED, /* uretprobes_srcu-protected uprobe */
+ HPROBE_STABLE, /* refcounted uprobe */
+ HPROBE_GONE, /* NULL uprobe, SRCU expired, refcount failed */
+ HPROBE_CONSUMED, /* uprobe "consumed" by uretprobe handler */
+};
+
+/*
+ * Hybrid lifetime uprobe. Represents a uprobe instance that could be either
+ * SRCU protected (with SRCU protection eventually potentially timing out),
+ * refcounted using uprobe->ref, or there could be no valid uprobe (NULL).
+ *
+ * hprobe's internal state is setup such that background timer thread can
+ * atomically "downgrade" temporarily RCU-protected uprobe into refcounted one
+ * (or no uprobe, if refcounting failed).
+ *
+ * *stable* pointer always point to the uprobe (or could be NULL if there is
+ * was no valid underlying uprobe to begin with).
+ *
+ * *leased* pointer is the key to achieving race-free atomic lifetime state
+ * transition and can have three possible states:
+ * - either the same non-NULL value as *stable*, in which case uprobe is
+ * SRCU-protected;
+ * - NULL, in which case uprobe (if there is any) is refcounted;
+ * - special __UPROBE_DEAD value, which represents an uprobe that was SRCU
+ * protected initially, but SRCU period timed out and we attempted to
+ * convert it to refcounted, but refcount_inc_not_zero() failed, because
+ * uprobe effectively went away (the last consumer unsubscribed). In this
+ * case it's important to know that *stable* pointer (which still has
+ * non-NULL uprobe pointer) shouldn't be used, because lifetime of
+ * underlying uprobe is not guaranteed anymore. __UPROBE_DEAD is just an
+ * internal marker and is handled transparently by hprobe_fetch() helper.
+ *
+ * When uprobe is SRCU-protected, we also record srcu_idx value, necessary for
+ * SRCU unlocking.
+ *
+ * See hprobe_expire() and hprobe_fetch() for details of race-free uprobe
+ * state transitioning details. It all hinges on atomic xchg() over *leaded*
+ * pointer. *stable* pointer, once initially set, is not modified concurrently.
+ */
+struct hprobe {
+ enum hprobe_state state;
+ int srcu_idx;
+ struct uprobe *uprobe;
+};
+
/*
* uprobe_task: Metadata of a task while it singlesteps.
*/
struct uprobe_task {
enum uprobe_task_state state;
+ unsigned int depth;
+ struct return_instance *return_instances;
+
union {
struct {
struct arch_uprobe_task autask;
@@ -75,23 +137,30 @@ struct uprobe_task {
};
struct uprobe *active_uprobe;
+ struct timer_list ri_timer;
unsigned long xol_vaddr;
struct arch_uprobe *auprobe;
+};
- struct return_instance *return_instances;
- unsigned int depth;
+struct return_consumer {
+ __u64 cookie;
+ __u64 id;
};
struct return_instance {
- struct uprobe *uprobe;
+ struct hprobe hprobe;
unsigned long func;
unsigned long stack; /* stack pointer */
unsigned long orig_ret_vaddr; /* original return address */
bool chained; /* true, if instance is nested */
+ int consumers_cnt;
struct return_instance *next; /* keep as stack */
-};
+ struct rcu_head rcu;
+
+ struct return_consumer consumers[] __counted_by(consumers_cnt);
+} ____cacheline_aligned;
enum rp_check {
RP_CHECK_CALL,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 832997a9da0a..672d8fc2abdb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -495,6 +495,12 @@ struct usb_dev_state;
struct usb_tt;
+enum usb_link_tunnel_mode {
+ USB_LINK_UNKNOWN = 0,
+ USB_LINK_NATIVE,
+ USB_LINK_TUNNELED,
+};
+
enum usb_port_connect_type {
USB_PORT_CONNECT_TYPE_UNKNOWN = 0,
USB_PORT_CONNECT_TYPE_HOT_PLUG,
@@ -605,6 +611,7 @@ struct usb3_lpm_parameters {
* WUSB devices are not, until we authorize them from user space.
* FIXME -- complete doc
* @authenticated: Crypto authentication passed
+ * @tunnel_mode: Connection native or tunneled over USB4
* @lpm_capable: device supports LPM
* @lpm_devinit_allow: Allow USB3 device initiated LPM, exit latency is in range
* @usb2_hw_lpm_capable: device can perform USB2 hardware LPM
@@ -714,6 +721,7 @@ struct usb_device {
unsigned do_remote_wakeup:1;
unsigned reset_resume:1;
unsigned port_is_suspended:1;
+ enum usb_link_tunnel_mode tunnel_mode;
int slot_id;
struct usb2_lpm_parameters l1_params;
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index af3cd2aae4bc..6e38fb9d2117 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -256,7 +256,7 @@ int config_ep_by_speed(struct usb_gadget *g, struct usb_function *f,
struct usb_ep *_ep);
int usb_func_wakeup(struct usb_function *func);
-#define MAX_CONFIG_INTERFACES 16 /* arbitrary; max 255 */
+#define MAX_CONFIG_INTERFACES 32
/**
* struct usb_configuration - represents one gadget configuration
diff --git a/include/linux/usb/func_utils.h b/include/linux/usb/func_utils.h
new file mode 100644
index 000000000000..c8795c965109
--- /dev/null
+++ b/include/linux/usb/func_utils.h
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * func_utils.h
+ *
+ * Utility definitions for USB functions
+ *
+ * Copyright (c) 2013 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * Author: Andrzej Pietrasiewicz <[email protected]>
+ */
+
+#ifndef _FUNC_UTILS_H_
+#define _FUNC_UTILS_H_
+
+#include <linux/usb/gadget.h>
+#include <linux/overflow.h>
+
+/* Variable Length Array Macros **********************************************/
+#define vla_group(groupname) size_t groupname##__next = 0
+#define vla_group_size(groupname) groupname##__next
+
+#define vla_item(groupname, type, name, n) \
+ size_t groupname##_##name##__offset = ({ \
+ size_t offset = 0; \
+ if (groupname##__next != SIZE_MAX) { \
+ size_t align_mask = __alignof__(type) - 1; \
+ size_t size = array_size(n, sizeof(type)); \
+ offset = (groupname##__next + align_mask) & \
+ ~align_mask; \
+ if (check_add_overflow(offset, size, \
+ &groupname##__next)) { \
+ groupname##__next = SIZE_MAX; \
+ offset = 0; \
+ } \
+ } \
+ offset; \
+ })
+
+#define vla_item_with_sz(groupname, type, name, n) \
+ size_t groupname##_##name##__sz = array_size(n, sizeof(type)); \
+ size_t groupname##_##name##__offset = ({ \
+ size_t offset = 0; \
+ if (groupname##__next != SIZE_MAX) { \
+ size_t align_mask = __alignof__(type) - 1; \
+ offset = (groupname##__next + align_mask) & \
+ ~align_mask; \
+ if (check_add_overflow(offset, groupname##_##name##__sz,\
+ &groupname##__next)) { \
+ groupname##__next = SIZE_MAX; \
+ offset = 0; \
+ } \
+ } \
+ offset; \
+ })
+
+#define vla_ptr(ptr, groupname, name) \
+ ((void *) ((char *)ptr + groupname##_##name##__offset))
+
+struct usb_ep;
+struct usb_request;
+
+/**
+ * alloc_ep_req - returns a usb_request allocated by the gadget driver and
+ * allocates the request's buffer.
+ *
+ * @ep: the endpoint to allocate a usb_request
+ * @len: usb_requests's buffer suggested size
+ *
+ * In case @ep direction is OUT, the @len will be aligned to ep's
+ * wMaxPacketSize. In order to avoid memory leaks or drops, *always* use
+ * usb_requests's length (req->length) to refer to the allocated buffer size.
+ * Requests allocated via alloc_ep_req() *must* be freed by free_ep_req().
+ */
+struct usb_request *alloc_ep_req(struct usb_ep *ep, size_t len);
+
+/* Frees a usb_request previously allocated by alloc_ep_req() */
+static inline void free_ep_req(struct usb_ep *ep, struct usb_request *req)
+{
+ WARN_ON(req->buf == NULL);
+ kfree(req->buf);
+ req->buf = NULL;
+ usb_ep_free_request(ep, req);
+}
+
+#endif /* _FUNC_UTILS_H_ */
diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h
index d61aebd68128..6b5d6838f865 100644
--- a/include/linux/usb/gadget_configfs.h
+++ b/include/linux/usb/gadget_configfs.h
@@ -4,9 +4,6 @@
#include <linux/configfs.h>
-int check_user_usb_string(const char *name,
- struct usb_gadget_strings *stringtab_dev);
-
#define GS_STRINGS_W(__struct, __name) \
static ssize_t __struct##_##__name##_store(struct config_item *item, \
const char *page, size_t len) \
@@ -37,7 +34,7 @@ static struct configfs_item_operations struct_in##_langid_item_ops = { \
.release = struct_in##_attr_release, \
}; \
\
-static struct config_item_type struct_in##_langid_type = { \
+static const struct config_item_type struct_in##_langid_type = { \
.ct_item_ops = &struct_in##_langid_item_ops, \
.ct_attrs = struct_in##_langid_attrs, \
.ct_owner = THIS_MODULE, \
@@ -94,7 +91,7 @@ static struct configfs_group_operations struct_in##_strings_ops = { \
.drop_item = &struct_in##_strings_drop, \
}; \
\
-static struct config_item_type struct_in##_strings_type = { \
+static const struct config_item_type struct_in##_strings_type = { \
.ct_group_ops = &struct_in##_strings_ops, \
.ct_owner = THIS_MODULE, \
}
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 1a0a4dc87980..75b2b763f1ba 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -311,8 +311,11 @@ struct usb_serial_driver {
#define to_usb_serial_driver(d) \
container_of(d, struct usb_serial_driver, driver)
-int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[],
- const char *name, const struct usb_device_id *id_table);
+#define usb_serial_register_drivers(serial_drivers, name, id_table) \
+ __usb_serial_register_drivers(serial_drivers, THIS_MODULE, name, id_table)
+int __usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[],
+ struct module *owner, const char *name,
+ const struct usb_device_id *id_table);
void usb_serial_deregister_drivers(struct usb_serial_driver *const serial_drivers[]);
void usb_serial_port_softint(struct usb_serial_port *port);
diff --git a/include/linux/usb/tcpci.h b/include/linux/usb/tcpci.h
index 0ab39b6ea205..f7f5cfbdef12 100644
--- a/include/linux/usb/tcpci.h
+++ b/include/linux/usb/tcpci.h
@@ -63,15 +63,12 @@
#define TCPC_ROLE_CTRL 0x1a
#define TCPC_ROLE_CTRL_DRP BIT(6)
-#define TCPC_ROLE_CTRL_RP_VAL_SHIFT 4
-#define TCPC_ROLE_CTRL_RP_VAL_MASK 0x3
+#define TCPC_ROLE_CTRL_RP_VAL GENMASK(5, 4)
#define TCPC_ROLE_CTRL_RP_VAL_DEF 0x0
#define TCPC_ROLE_CTRL_RP_VAL_1_5 0x1
#define TCPC_ROLE_CTRL_RP_VAL_3_0 0x2
-#define TCPC_ROLE_CTRL_CC2_SHIFT 2
-#define TCPC_ROLE_CTRL_CC2_MASK 0x3
-#define TCPC_ROLE_CTRL_CC1_SHIFT 0
-#define TCPC_ROLE_CTRL_CC1_MASK 0x3
+#define TCPC_ROLE_CTRL_CC2 GENMASK(3, 2)
+#define TCPC_ROLE_CTRL_CC1 GENMASK(1, 0)
#define TCPC_ROLE_CTRL_CC_RA 0x0
#define TCPC_ROLE_CTRL_CC_RP 0x1
#define TCPC_ROLE_CTRL_CC_RD 0x2
@@ -92,11 +89,9 @@
#define TCPC_CC_STATUS_TERM BIT(4)
#define TCPC_CC_STATUS_TERM_RP 0
#define TCPC_CC_STATUS_TERM_RD 1
+#define TCPC_CC_STATUS_CC2 GENMASK(3, 2)
+#define TCPC_CC_STATUS_CC1 GENMASK(1, 0)
#define TCPC_CC_STATE_SRC_OPEN 0
-#define TCPC_CC_STATUS_CC2_SHIFT 2
-#define TCPC_CC_STATUS_CC2_MASK 0x3
-#define TCPC_CC_STATUS_CC1_SHIFT 0
-#define TCPC_CC_STATUS_CC1_MASK 0x3
#define TCPC_POWER_STATUS 0x1e
#define TCPC_POWER_STATUS_DBG_ACC_CON BIT(7)
@@ -134,9 +129,8 @@
#define TCPC_MSG_HDR_INFO 0x2e
#define TCPC_MSG_HDR_INFO_DATA_ROLE BIT(3)
+#define TCPC_MSG_HDR_INFO_REV GENMASK(2, 1)
#define TCPC_MSG_HDR_INFO_PWR_ROLE BIT(0)
-#define TCPC_MSG_HDR_INFO_REV_SHIFT 1
-#define TCPC_MSG_HDR_INFO_REV_MASK 0x3
#define TCPC_RX_DETECT 0x2f
#define TCPC_RX_DETECT_HARD_RESET BIT(5)
@@ -154,10 +148,8 @@
#define TCPC_RX_DATA 0x34 /* through 0x4f */
#define TCPC_TRANSMIT 0x50
-#define TCPC_TRANSMIT_RETRY_SHIFT 4
-#define TCPC_TRANSMIT_RETRY_MASK 0x3
-#define TCPC_TRANSMIT_TYPE_SHIFT 0
-#define TCPC_TRANSMIT_TYPE_MASK 0x7
+#define TCPC_TRANSMIT_RETRY GENMASK(5, 4)
+#define TCPC_TRANSMIT_TYPE GENMASK(2, 0)
#define TCPC_TX_BYTE_CNT 0x51
#define TCPC_TX_HDR 0x52
@@ -178,8 +170,7 @@
#define tcpc_presenting_rd(reg, cc) \
(!(TCPC_ROLE_CTRL_DRP & (reg)) && \
- (((reg) & (TCPC_ROLE_CTRL_## cc ##_MASK << TCPC_ROLE_CTRL_## cc ##_SHIFT)) == \
- (TCPC_ROLE_CTRL_CC_RD << TCPC_ROLE_CTRL_## cc ##_SHIFT)))
+ FIELD_GET(TCPC_ROLE_CTRL_## cc, reg) == TCPC_ROLE_CTRL_CC_RD)
struct tcpci;
@@ -190,7 +181,7 @@ struct tcpci;
* Optional; Callback to perform chip specific operations when FRS
* is sourcing vbus.
* @auto_discharge_disconnect:
- * Optional; Enables TCPC to autonously discharge vbus on disconnect.
+ * Optional; Enables TCPC to autonomously discharge vbus on disconnect.
* @vbus_vsafe0v:
* optional; Set when TCPC can detect whether vbus is at VSAFE0V.
* @set_partner_usb_comm_capable:
@@ -256,7 +247,7 @@ static inline enum typec_cc_status tcpci_to_typec_cc(unsigned int cc, bool sink)
if (sink)
return TYPEC_CC_RP_3_0;
fallthrough;
- case 0x0:
+ case TCPC_CC_STATE_SRC_OPEN:
default:
return TYPEC_CC_OPEN;
}
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 9f08a584d707..0b9f1e598e3a 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -76,8 +76,23 @@ struct usbnet {
# define EVENT_LINK_CHANGE 11
# define EVENT_SET_RX_MODE 12
# define EVENT_NO_IP_ALIGN 13
+/* This one is special, as it indicates that the device is going away
+ * there are cyclic dependencies between tasklet, timer and bh
+ * that must be broken
+ */
+# define EVENT_UNPLUG 31
};
+static inline bool usbnet_going_away(struct usbnet *ubn)
+{
+ return test_bit(EVENT_UNPLUG, &ubn->flags);
+}
+
+static inline void usbnet_mark_going_away(struct usbnet *ubn)
+{
+ set_bit(EVENT_UNPLUG, &ubn->flags);
+}
+
static inline struct usb_driver *driver_of(struct usb_interface *intf)
{
return to_usb_driver(intf->dev.driver);
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 3625096d5f85..7183e5aca282 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -141,7 +141,8 @@ static inline long get_rlimit_value(struct ucounts *ucounts, enum rlimit_type ty
long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v);
-long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type,
+ bool override_rlimit);
void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type);
bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long max);
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 9fc6ce15c499..cb40f1a1d081 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
extern void dup_userfaultfd_complete(struct list_head *);
+void dup_userfaultfd_fail(struct list_head *);
extern void mremap_userfaultfd_prep(struct vm_area_struct *,
struct vm_userfaultfd_ctx *);
@@ -351,6 +352,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
{
}
+static inline void dup_userfaultfd_fail(struct list_head *l)
+{
+}
+
static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx *ctx)
{
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 7977ca03ac7a..2e7a30fe6b92 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -582,11 +582,20 @@ void vdpa_set_status(struct vdpa_device *vdev, u8 status);
* @dev: vdpa device to remove
* Driver need to remove the specified device by calling
* _vdpa_unregister_device().
+ * @dev_set_attr: change a vdpa device's attr after it was create
+ * @mdev: parent device to use for device
+ * @dev: vdpa device structure
+ * @config:Attributes to be set for the device.
+ * The driver needs to check the mask of the structure and then set
+ * the related information to the vdpa device. The driver must return 0
+ * if set successfully.
*/
struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name,
const struct vdpa_dev_set_config *config);
void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
+ int (*dev_set_attr)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev,
+ const struct vdpa_dev_set_config *config);
};
/**
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 276ca543ef44..02a9f4dc594d 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -103,8 +103,10 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
if (!skb_partial_csum_set(skb, start, off))
return -EINVAL;
+ if (skb_transport_offset(skb) < nh_min_len)
+ return -EINVAL;
- nh_min_len = max_t(u32, nh_min_len, skb_transport_offset(skb));
+ nh_min_len = skb_transport_offset(skb);
p_off = nh_min_len + thlen;
if (!pskb_may_pull(skb, p_off))
return -EINVAL;
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index aed952d04132..f70d0958095c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -134,6 +134,8 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_SWAP
SWAP_RA,
SWAP_RA_HIT,
+ SWPIN_ZERO,
+ SWPOUT_ZERO,
#ifdef CONFIG_KSM
KSM_SWPIN_COPY,
#endif
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 8aa3372f21a0..6d90ad974408 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -221,6 +221,7 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1)
#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0)
+#define wake_up_sync(x) __wake_up_sync(x, TASK_NORMAL)
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
@@ -541,8 +542,8 @@ do { \
int __ret = 0; \
struct hrtimer_sleeper __t; \
\
- hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \
- HRTIMER_MODE_REL); \
+ hrtimer_setup_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \
+ HRTIMER_MODE_REL); \
if ((timeout) != KTIME_MAX) { \
hrtimer_set_expires_range_ns(&__t.timer, timeout, \
current->timer_slack_ns); \
diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h
index 7725b7579b78..9e29d79fc790 100644
--- a/include/linux/wait_bit.h
+++ b/include/linux/wait_bit.h
@@ -8,7 +8,7 @@
#include <linux/wait.h>
struct wait_bit_key {
- void *flags;
+ unsigned long *flags;
int bit_nr;
unsigned long timeout;
};
@@ -23,14 +23,14 @@ struct wait_bit_queue_entry {
typedef int wait_bit_action_f(struct wait_bit_key *key, int mode);
-void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit);
+void __wake_up_bit(struct wait_queue_head *wq_head, unsigned long *word, int bit);
int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode);
-void wake_up_bit(void *word, int bit);
-int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode);
-int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
-int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode);
-struct wait_queue_head *bit_waitqueue(void *word, int bit);
+void wake_up_bit(unsigned long *word, int bit);
+int out_of_line_wait_on_bit(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode);
+int out_of_line_wait_on_bit_timeout(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout);
+int out_of_line_wait_on_bit_lock(unsigned long *word, int, wait_bit_action_f *action, unsigned int mode);
+struct wait_queue_head *bit_waitqueue(unsigned long *word, int bit);
extern void __init wait_bit_init(void);
int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync, void *key);
@@ -49,23 +49,24 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
extern int bit_wait(struct wait_bit_key *key, int mode);
extern int bit_wait_io(struct wait_bit_key *key, int mode);
extern int bit_wait_timeout(struct wait_bit_key *key, int mode);
-extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode);
/**
* wait_on_bit - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * @word: the address containing the bit being waited on
+ * @bit: the bit at that address being waited on
* @mode: the task state to sleep in
*
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit.
- * For instance, if one were to have waiters on a bitflag, one would
- * call wait_on_bit() in threads waiting for the bit to clear.
- * One uses wait_on_bit() where one is waiting for the bit to clear,
- * but has no intention of setting it.
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
+ * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
+ * to be cleared. The clearing of the bit must be signalled with
+ * wake_up_bit(), often as clear_and_wake_up_bit().
+ *
+ * The process will wait on a waitqueue selected by hash from a shared
+ * pool. It will only be woken on a wake_up for the target bit, even
+ * if other processes on the same queue are waiting for other bits.
+ *
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or %-EINTR if the process received a
+ * signal and the mode permitted wake up on that signal.
*/
static inline int
wait_on_bit(unsigned long *word, int bit, unsigned mode)
@@ -80,17 +81,20 @@ wait_on_bit(unsigned long *word, int bit, unsigned mode)
/**
* wait_on_bit_io - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * @word: the address containing the bit being waited on
+ * @bit: the bit at that address being waited on
* @mode: the task state to sleep in
*
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), but calls
- * io_schedule() instead of schedule() for the actual waiting.
+ * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
+ * to be cleared. The clearing of the bit must be signalled with
+ * wake_up_bit(), often as clear_and_wake_up_bit().
*
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
+ * This is similar to wait_on_bit(), but calls io_schedule() instead of
+ * schedule() for the actual waiting.
+ *
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or %-EINTR if the process received a
+ * signal and the mode permitted wake up on that signal.
*/
static inline int
wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
@@ -104,19 +108,24 @@ wait_on_bit_io(unsigned long *word, int bit, unsigned mode)
}
/**
- * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_timeout - wait for a bit to be cleared or a timeout to elapse
+ * @word: the address containing the bit being waited on
+ * @bit: the bit at that address being waited on
* @mode: the task state to sleep in
* @timeout: timeout, in jiffies
*
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared. This is similar to wait_on_bit(), except also takes a
- * timeout parameter.
+ * Wait for the given bit in an unsigned long or bitmap (see
+ * DECLARE_BITMAP()) to be cleared, or for a timeout to expire. The
+ * clearing of the bit must be signalled with wake_up_bit(), often as
+ * clear_and_wake_up_bit().
+ *
+ * This is similar to wait_on_bit(), except it also takes a timeout
+ * parameter.
*
- * Returned value will be zero if the bit was cleared before the
- * @timeout elapsed, or non-zero if the @timeout elapsed or process
- * received a signal and the mode permitted wakeup on that signal.
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or %-EINTR if the process received a
+ * signal and the mode permitted wake up on that signal, or %-EAGAIN if the
+ * timeout elapsed.
*/
static inline int
wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
@@ -132,19 +141,21 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode,
/**
* wait_on_bit_action - wait for a bit to be cleared
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * @word: the address containing the bit waited on
+ * @bit: the bit at that address being waited on
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in
*
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared, and allow the waiting action to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
+ * Wait for the given bit in an unsigned long or bitmap (see DECLARE_BITMAP())
+ * to be cleared. The clearing of the bit must be signalled with
+ * wake_up_bit(), often as clear_and_wake_up_bit().
+ *
+ * This is similar to wait_on_bit(), but calls @action() instead of
+ * schedule() for the actual waiting.
*
- * Returned value will be zero if the bit was cleared, or non-zero
- * if the process received a signal and the mode permitted wakeup
- * on that signal.
+ * Returned value will be zero if the bit was cleared in which case the
+ * call has ACQUIRE semantics, or the error code returned by @action if
+ * that call returned non-zero.
*/
static inline int
wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
@@ -157,23 +168,22 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action,
}
/**
- * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_lock - wait for a bit to be cleared, then set it
+ * @word: the address containing the bit being waited on
+ * @bit: the bit of the word being waited on and set
* @mode: the task state to sleep in
*
- * There is a standard hashed waitqueue table for generic use. This
- * is the part of the hashtable's accessor API that waits on a bit
- * when one intends to set it, for instance, trying to lock bitflags.
- * For instance, if one were to have waiters trying to set bitflag
- * and waiting for it to clear before setting it, one would call
- * wait_on_bit() in threads waiting to be able to set the bit.
- * One uses wait_on_bit_lock() where one is waiting for the bit to
- * clear with the intention of setting it, and when done, clearing it.
+ * Wait for the given bit in an unsigned long or bitmap (see
+ * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be
+ * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As
+ * soon as it is clear, atomically set it and return.
*
- * Returns zero if the bit was (eventually) found to be clear and was
- * set. Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
+ * This is similar to wait_on_bit(), but sets the bit before returning.
+ *
+ * Returned value will be zero if the bit was successfully set in which
+ * case the call has the same memory sequencing semantics as
+ * test_and_clear_bit(), or %-EINTR if the process received a signal and
+ * the mode permitted wake up on that signal.
*/
static inline int
wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
@@ -185,15 +195,18 @@ wait_on_bit_lock(unsigned long *word, int bit, unsigned mode)
}
/**
- * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_lock_io - wait for a bit to be cleared, then set it
+ * @word: the address containing the bit being waited on
+ * @bit: the bit of the word being waited on and set
* @mode: the task state to sleep in
*
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to atomically set it. This is similar
- * to wait_on_bit(), but calls io_schedule() instead of schedule()
- * for the actual waiting.
+ * Wait for the given bit in an unsigned long or bitmap (see
+ * DECLARE_BITMAP()) to be cleared. The clearing of the bit must be
+ * signalled with wake_up_bit(), often as clear_and_wake_up_bit(). As
+ * soon as it is clear, atomically set it and return.
+ *
+ * This is similar to wait_on_bit_lock(), but calls io_schedule() instead
+ * of schedule().
*
* Returns zero if the bit was (eventually) found to be clear and was
* set. Returns non-zero if a signal was delivered to the process and
@@ -209,21 +222,19 @@ wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode)
}
/**
- * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it
- * @word: the word being waited on, a kernel virtual address
- * @bit: the bit of the word being waited on
+ * wait_on_bit_lock_action - wait for a bit to be cleared, then set it
+ * @word: the address containing the bit being waited on
+ * @bit: the bit of the word being waited on and set
* @action: the function used to sleep, which may take special actions
* @mode: the task state to sleep in
*
- * Use the standard hashed waitqueue table to wait for a bit
- * to be cleared and then to set it, and allow the waiting action
- * to be specified.
- * This is like wait_on_bit() but allows fine control of how the waiting
- * is done.
+ * This is similar to wait_on_bit_lock(), but calls @action() instead of
+ * schedule() for the actual waiting.
*
- * Returns zero if the bit was (eventually) found to be clear and was
- * set. Returns non-zero if a signal was delivered to the process and
- * the @mode allows that signal to wake the process.
+ * Returned value will be zero if the bit was successfully set in which
+ * case the call has the same memory sequencing semantics as
+ * test_and_clear_bit(), or the error code returned by @action if that
+ * call returned non-zero.
*/
static inline int
wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action,
@@ -269,7 +280,26 @@ __out: __ret; \
#define __wait_var_event(var, condition) \
___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
schedule())
+#define __wait_var_event_io(var, condition) \
+ ___wait_var_event(var, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
+ io_schedule())
+/**
+ * wait_var_event - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for a @condition to be true, only re-checking when a wake up is
+ * received for the given @var (an arbitrary kernel address which need
+ * not be directly related to the given condition, but usually is).
+ *
+ * The process will wait on a waitqueue selected by hash from a shared
+ * pool. It will only be woken on a wake_up for the given address.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
#define wait_var_event(var, condition) \
do { \
might_sleep(); \
@@ -278,10 +308,56 @@ do { \
__wait_var_event(var, condition); \
} while (0)
+/**
+ * wait_var_event_io - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for an IO related @condition to be true, only re-checking when a
+ * wake up is received for the given @var (an arbitrary kernel address
+ * which need not be directly related to the given condition, but
+ * usually is).
+ *
+ * The process will wait on a waitqueue selected by hash from a shared
+ * pool. It will only be woken on a wake_up for the given address.
+ *
+ * This is similar to wait_var_event(), but calls io_schedule() instead
+ * of schedule().
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
+#define wait_var_event_io(var, condition) \
+do { \
+ might_sleep(); \
+ if (condition) \
+ break; \
+ __wait_var_event_io(var, condition); \
+} while (0)
+
#define __wait_var_event_killable(var, condition) \
___wait_var_event(var, condition, TASK_KILLABLE, 0, 0, \
schedule())
+/**
+ * wait_var_event_killable - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for a @condition to be true or a fatal signal to be received,
+ * only re-checking the condition when a wake up is received for the given
+ * @var (an arbitrary kernel address which need not be directly related
+ * to the given condition, but usually is).
+ *
+ * This is similar to wait_var_event() but returns a value which is
+ * 0 if the condition became true, or %-ERESTARTSYS if a fatal signal
+ * was received.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
#define wait_var_event_killable(var, condition) \
({ \
int __ret = 0; \
@@ -296,6 +372,26 @@ do { \
TASK_UNINTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))
+/**
+ * wait_var_event_timeout - wait for a variable to be updated or a timeout to expire
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ * @timeout: maximum time to wait in jiffies
+ *
+ * Wait for a @condition to be true or a timeout to expire, only
+ * re-checking the condition when a wake up is received for the given
+ * @var (an arbitrary kernel address which need not be directly related
+ * to the given condition, but usually is).
+ *
+ * This is similar to wait_var_event() but returns a value which is 0 if
+ * the timeout expired and the condition was still false, or the
+ * remaining time left in the timeout (but at least 1) if the condition
+ * was found to be true.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
#define wait_var_event_timeout(var, condition, timeout) \
({ \
long __ret = timeout; \
@@ -309,6 +405,23 @@ do { \
___wait_var_event(var, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule())
+/**
+ * wait_var_event_killable - wait for a variable to be updated and notified
+ * @var: the address of variable being waited on
+ * @condition: the condition to wait for
+ *
+ * Wait for a @condition to be true or a signal to be received, only
+ * re-checking the condition when a wake up is received for the given
+ * @var (an arbitrary kernel address which need not be directly related
+ * to the given condition, but usually is).
+ *
+ * This is similar to wait_var_event() but returns a value which is 0 if
+ * the condition became true, or %-ERESTARTSYS if a signal was received.
+ *
+ * The condition should normally use smp_load_acquire() or a similarly
+ * ordered access to ensure that any changes to memory made before the
+ * condition became true will be visible after the wait completes.
+ */
#define wait_var_event_interruptible(var, condition) \
({ \
int __ret = 0; \
@@ -319,15 +432,122 @@ do { \
})
/**
- * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
+ * wait_var_event_any_lock - wait for a variable to be updated under a lock
+ * @var: the address of the variable being waited on
+ * @condition: condition to wait for
+ * @lock: the object that is locked to protect updates to the variable
+ * @type: prefix on lock and unlock operations
+ * @state: waiting state, %TASK_UNINTERRUPTIBLE etc.
+ *
+ * Wait for a condition which can only be reliably tested while holding
+ * a lock. The variables assessed in the condition will normal be updated
+ * under the same lock, and the wake up should be signalled with
+ * wake_up_var_locked() under the same lock.
+ *
+ * This is similar to wait_var_event(), but assumes a lock is held
+ * while calling this function and while updating the variable.
*
+ * This must be called while the given lock is held and the lock will be
+ * dropped when schedule() is called to wait for a wake up, and will be
+ * reclaimed before testing the condition again. The functions used to
+ * unlock and lock the object are constructed by appending _unlock and _lock
+ * to @type.
+ *
+ * Return %-ERESTARTSYS if a signal arrives which is allowed to interrupt
+ * the wait according to @state.
+ */
+#define wait_var_event_any_lock(var, condition, lock, type, state) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __ret = ___wait_var_event(var, condition, state, 0, 0, \
+ type ## _unlock(lock); \
+ schedule(); \
+ type ## _lock(lock)); \
+ __ret; \
+})
+
+/**
+ * wait_var_event_spinlock - wait for a variable to be updated under a spinlock
+ * @var: the address of the variable being waited on
+ * @condition: condition to wait for
+ * @lock: the spinlock which protects updates to the variable
+ *
+ * Wait for a condition which can only be reliably tested while holding
+ * a spinlock. The variables assessed in the condition will normal be updated
+ * under the same spinlock, and the wake up should be signalled with
+ * wake_up_var_locked() under the same spinlock.
+ *
+ * This is similar to wait_var_event(), but assumes a spinlock is held
+ * while calling this function and while updating the variable.
+ *
+ * This must be called while the given lock is held and the lock will be
+ * dropped when schedule() is called to wait for a wake up, and will be
+ * reclaimed before testing the condition again.
+ */
+#define wait_var_event_spinlock(var, condition, lock) \
+ wait_var_event_any_lock(var, condition, lock, spin, TASK_UNINTERRUPTIBLE)
+
+/**
+ * wait_var_event_mutex - wait for a variable to be updated under a mutex
+ * @var: the address of the variable being waited on
+ * @condition: condition to wait for
+ * @mutex: the mutex which protects updates to the variable
+ *
+ * Wait for a condition which can only be reliably tested while holding
+ * a mutex. The variables assessed in the condition will normal be
+ * updated under the same mutex, and the wake up should be signalled
+ * with wake_up_var_locked() under the same mutex.
+ *
+ * This is similar to wait_var_event(), but assumes a mutex is held
+ * while calling this function and while updating the variable.
+ *
+ * This must be called while the given mutex is held and the mutex will be
+ * dropped when schedule() is called to wait for a wake up, and will be
+ * reclaimed before testing the condition again.
+ */
+#define wait_var_event_mutex(var, condition, lock) \
+ wait_var_event_any_lock(var, condition, lock, mutex, TASK_UNINTERRUPTIBLE)
+
+/**
+ * wake_up_var_protected - wake up waiters for a variable asserting that it is safe
+ * @var: the address of the variable being waited on
+ * @cond: the condition which afirms this is safe
+ *
+ * When waking waiters which use wait_var_event_any_lock() the waker must be
+ * holding the reelvant lock to avoid races. This version of wake_up_var()
+ * asserts that the relevant lock is held and so no barrier is needed.
+ * The @cond is only tested when CONFIG_LOCKDEP is enabled.
+ */
+#define wake_up_var_protected(var, cond) \
+do { \
+ lockdep_assert(cond); \
+ wake_up_var(var); \
+} while (0)
+
+/**
+ * wake_up_var_locked - wake up waiters for a variable while holding a spinlock or mutex
+ * @var: the address of the variable being waited on
+ * @lock: The spinlock or mutex what protects the variable
+ *
+ * Send a wake up for the given variable which should be waited for with
+ * wait_var_event_spinlock() or wait_var_event_mutex(). Unlike wake_up_var(),
+ * no extra barriers are needed as the locking provides sufficient sequencing.
+ */
+#define wake_up_var_locked(var, lock) \
+ wake_up_var_protected(var, lockdep_is_held(lock))
+
+/**
+ * clear_and_wake_up_bit - clear a bit and wake up anyone waiting on that bit
* @bit: the bit of the word being waited on
- * @word: the word being waited on, a kernel virtual address
+ * @word: the address containing the bit being waited on
*
- * You can use this helper if bitflags are manipulated atomically rather than
- * non-atomically under a lock.
+ * The designated bit is cleared and any tasks waiting in wait_on_bit()
+ * or similar will be woken. This call has RELEASE semantics so that
+ * any changes to memory made before this call are guaranteed to be visible
+ * after the corresponding wait_on_bit() completes.
*/
-static inline void clear_and_wake_up_bit(int bit, void *word)
+static inline void clear_and_wake_up_bit(int bit, unsigned long *word)
{
clear_bit_unlock(bit, word);
/* See wake_up_bit() for which memory barrier you need to use. */
@@ -335,4 +555,64 @@ static inline void clear_and_wake_up_bit(int bit, void *word)
wake_up_bit(word, bit);
}
+/**
+ * test_and_clear_wake_up_bit - clear a bit if it was set: wake up anyone waiting on that bit
+ * @bit: the bit of the word being waited on
+ * @word: the address of memory containing that bit
+ *
+ * If the bit is set and can be atomically cleared, any tasks waiting in
+ * wait_on_bit() or similar will be woken. This call has the same
+ * complete ordering semantics as test_and_clear_bit(). Any changes to
+ * memory made before this call are guaranteed to be visible after the
+ * corresponding wait_on_bit() completes.
+ *
+ * Returns %true if the bit was successfully set and the wake up was sent.
+ */
+static inline bool test_and_clear_wake_up_bit(int bit, unsigned long *word)
+{
+ if (!test_and_clear_bit(bit, word))
+ return false;
+ /* no extra barrier required */
+ wake_up_bit(word, bit);
+ return true;
+}
+
+/**
+ * atomic_dec_and_wake_up - decrement an atomic_t and if zero, wake up waiters
+ * @var: the variable to dec and test
+ *
+ * Decrements the atomic variable and if it reaches zero, send a wake_up to any
+ * processes waiting on the variable.
+ *
+ * This function has the same complete ordering semantics as atomic_dec_and_test.
+ *
+ * Returns %true is the variable reaches zero and the wake up was sent.
+ */
+
+static inline bool atomic_dec_and_wake_up(atomic_t *var)
+{
+ if (!atomic_dec_and_test(var))
+ return false;
+ /* No extra barrier required */
+ wake_up_var(var);
+ return true;
+}
+
+/**
+ * store_release_wake_up - update a variable and send a wake_up
+ * @var: the address of the variable to be updated and woken
+ * @val: the value to store in the variable.
+ *
+ * Store the given value in the variable send a wake up to any tasks
+ * waiting on the variable. All necessary barriers are included to ensure
+ * the task calling wait_var_event() sees the new value and all values
+ * written to memory before this call.
+ */
+#define store_release_wake_up(var, val) \
+do { \
+ smp_store_release(var, val); \
+ smp_mb(); \
+ wake_up_var(var); \
+} while (0)
+
#endif /* _LINUX_WAIT_BIT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 59c2695e12e7..b0dc957c3e56 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -412,7 +412,7 @@ enum wq_flags {
};
enum wq_consts {
- WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */
+ WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */
WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE,
WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d6db822e4bb3..d11b903c2edb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -213,11 +213,8 @@ static inline void wait_on_inode(struct inode *inode)
#include <linux/bio.h>
void __inode_attach_wb(struct inode *inode, struct folio *folio);
-void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
- __releases(&inode->i_lock);
void wbc_detach_inode(struct writeback_control *wbc);
-void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
+void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio,
size_t bytes);
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
enum wb_reason reason, struct wb_completion *done);
@@ -254,22 +251,8 @@ static inline void inode_detach_wb(struct inode *inode)
}
}
-/**
- * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
- * @wbc: writeback_control of interest
- * @inode: target inode
- *
- * This function is to be used by __filemap_fdatawrite_range(), which is an
- * alternative entry point into writeback code, and first ensures @inode is
- * associated with a bdi_writeback and attaches it to @wbc.
- */
-static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
- struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- inode_attach_wb(inode, NULL);
- wbc_attach_and_unlock_inode(wbc, inode);
-}
+void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+ struct inode *inode);
/**
* wbc_init_bio - writeback specific initializtion of bio
@@ -303,13 +286,6 @@ static inline void inode_detach_wb(struct inode *inode)
{
}
-static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
- __releases(&inode->i_lock)
-{
- spin_unlock(&inode->i_lock);
-}
-
static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
struct inode *inode)
{
@@ -324,7 +300,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
}
static inline void wbc_account_cgroup_owner(struct writeback_control *wbc,
- struct page *page, size_t bytes)
+ struct folio *folio, size_t bytes)
{
}
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index bb763085479a..a401a2f31a77 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -65,6 +65,16 @@ struct ww_acquire_ctx {
#endif
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
+ /**
+ * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex.
+ *
+ * lockdep requires the lockdep_map for the first locked ww_mutex
+ * in a ww transaction to remain in memory until all ww_mutexes of
+ * the transaction have been unlocked. Ensure this by keeping a
+ * fake locked ww_mutex lockdep map between ww_acquire_init() and
+ * ww_acquire_fini().
+ */
+ struct lockdep_map first_lock_dep_map;
#endif
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
unsigned int deadlock_inject_interval;
@@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx,
debug_check_no_locks_freed((void *)ctx, sizeof(*ctx));
lockdep_init_map(&ctx->dep_map, ww_class->acquire_name,
&ww_class->acquire_key, 0);
+ lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name,
+ &ww_class->mutex_key, 0);
mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_);
+ mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_);
#endif
#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
ctx->deadlock_inject_interval = 1;
@@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ mutex_release(&ctx->first_lock_dep_map, _THIS_IP_);
mutex_release(&ctx->dep_map, _THIS_IP_);
#endif
#ifdef DEBUG_WW_MUTEXES
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d20051865800..86b0d47984a1 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -19,6 +19,10 @@
#include <linux/user_namespace.h>
#include <uapi/linux/xattr.h>
+/* List of all open_how "versions". */
+#define XATTR_ARGS_SIZE_VER0 16 /* sizeof first published struct */
+#define XATTR_ARGS_SIZE_LATEST XATTR_ARGS_SIZE_VER0
+
struct inode;
struct dentry;